Remove code unneeded for playing, refactor, update to latest master dev

2025-04-30 00:33:09 +00:00 · 2020-07-20 05:45:24 +02:00 · 2020-07-20 05:45:24 +02:00 · 4cceeb7380
commit 4cceeb7380
parent 961047ed6e
90 changed files with 2007 additions and 12428 deletions
--- a/4
+++ b/4
@ -5,6 +5,10 @@ Marco Costalba (mcostalba)
 Joona Kiiski (zamar)
 Gary Linscott (glinscott)

+Yu Nasu (ynasu87)              # The original inventor of NNUE
+Motohiro Isozaki (yaneurao)    # The author of the training data generator and the trainer
+Hisayori Noda (nodchip)        # Ported NNUE to Stockfish.
+
 Aditya (absimaldata)
 Adrian Petrescu (apetresc)
 Ajith Chandy Jose (ajithcj)
--- a/script/README.md
+++ b/script/README.md
@ -1,52 +0,0 @@
-# `pgn_to_plain`
-This script converts pgn files into text file to apply `learn convert_bin` command. You need to import [python-chess](https://pypi.org/project/python-chess/) to use this script.
-
-
-    pip install python-chess
-	
-
-# Example of Qhapaq's finetune using `pgn_to_plain`
-
-## Download data
-You can download data from [here](http://rebel13.nl/index.html)
-
-## Convert pgn files
-
-**Important : convert text will be superheavy (approx 200 byte / position)** 
-
-    python pgn_to_plain.py --pgn "pgn/*.pgn" --start_ply 1 --output converted_pgn.txt
-
-
-`--pgn` option supports wildcard. When you use pgn files with elo >= 3300, You will get 1.7 GB text file.
-	
-	
-## Convert into training data
-
-
-### Example build command
-
-    make nnue-learn ARCH=x86-64
-
-See `src/Makefile` for detail.
-
-
-### Convert
-
-    ./stockfish
-    learn convert_bin converted_pgn.txt output_file_name pgn_bin.bin
-	learn shuffle pgn_bin.bin
-	
-You also need to prepare validation data for training like following.
-	
-	python pgn_to_plain.py --pgn "pgn/ccrl-40-15-3400.pgn" --start_ply 1 --output ccrl-40-15-3400.txt
-	./stockfish
-    learn convert_bin ccrl-40-15-3400.txt ccrl-40-15-3400_plain.bin
-	
-	
-### Learn
-
-    ./stockfish
-	setoption name Threads value 8
-    learn shuffled_sfen.bin newbob_decay 0.5  validation_set_file_name ccrl-40-15-3400_plain.bin  nn_batch_size 50000 batchsize 1000000 eval_save_interval 8000000 eta 0.05 lambda 0.0 eval_limit 3000 mirror_percentage 0 use_draw_in_training 1
-
-
--- a/script/pgn_to_plain.py
+++ b/script/pgn_to_plain.py
@ -1,68 +0,0 @@
-import chess.pgn
-import argparse
-import glob
-from typing import List
-
-# todo close in c++ tools using pgn-extract
-# https://www.cs.kent.ac.uk/people/staff/djb/pgn-extract/help.html#-w
-
-def parse_result(result_str:str, board:chess.Board) -> int:
-    if result_str == "1/2-1/2":
-        return 0
-    if result_str == "0-1":
-        if board.turn == chess.WHITE:
-            return -1
-        else:
-            return 1
-    elif result_str == "1-0":
-        if board.turn == chess.WHITE:
-            return 1
-        else:
-            return -1
-    else:
-        print("illeagal result", result_str)
-        raise ValueError
-
-def game_sanity_check(game: chess.pgn.Game) -> bool:
-    if not game.headers["Result"] in ["1/2-1/2", "0-1", "1-0"]:
-        print("invalid result", game.headers["Result"])
-        return False
-    return True
-    
-def parse_game(game: chess.pgn.Game, writer, start_play: int=1)->None:
-    board: chess.Board = game.board()
-    if not game_sanity_check(game):
-        return
-    result: str = game.headers["Result"]
-    for ply, move in enumerate(game.mainline_moves()):
-        if ply >= start_play:
-            writer.write("fen " + board.fen() + "\n")
-            writer.write("move " + str(move) + "\n")
-            writer.write("score 0\n")
-            writer.write("ply " + str(ply)+"\n")
-            writer.write("result " + str(parse_result(result, board)) +"\n")
-            writer.write("e\n")
-
-        board.push(move)
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--pgn", type=str, required=True)
-    parser.add_argument("--start_ply", type=int, default=1)
-    parser.add_argument("--output", type=str, default="plain.txt")
-    args = parser.parse_args()
-
-    pgn_files: List[str] = glob.glob(args.pgn)
-    f = open(args.output, 'w')
-    for pgn_file in pgn_files:
-        print("parse", pgn_file)
-        pgn_loader = open(pgn_file)
-        while True:
-            game = chess.pgn.read_game(pgn_loader)
-            if game is None:
-                break
-            parse_game(game, f, args.start_ply)
-    f.close()
-    
-if __name__=="__main__":
-    main()
--- a/src/Makefile
+++ b/src/Makefile
@ -39,21 +39,7 @@ PGOBENCH = ./$(EXE) bench
 SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \
 	material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \
 	search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
-	eval/evaluate_mir_inv_tools.cpp \
-	eval/nnue/evaluate_nnue.cpp \
-	eval/nnue/evaluate_nnue_learner.cpp \
-	eval/nnue/features/half_kp.cpp \
-	eval/nnue/features/half_relative_kp.cpp \
-	eval/nnue/features/k.cpp \
-	eval/nnue/features/p.cpp \
-	eval/nnue/features/castling_right.cpp \
-	eval/nnue/features/enpassant.cpp \
-	eval/nnue/nnue_test_command.cpp \
-	extra/sfen_packer.cpp \
-	learn/gensfen2019.cpp \
-	learn/learner.cpp \
-	learn/learning_tools.cpp \
-	learn/multi_think.cpp
+	nnue/evaluate_nnue.cpp nnue/features/half_kp.cpp

 OBJS = $(SRCS:.cpp=.o)

@ -211,7 +197,7 @@ endif
 ### ==========================================================================

 ### 3.1 Selecting compiler (default = gcc)
-CXXFLAGS += -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS) $(NNUECXXFLAGS)
+CXXFLAGS += -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS)
 DEPENDFLAGS += -std=c++17
 LDFLAGS += $(EXTRALDFLAGS)

@ -564,12 +550,12 @@ clean: objclean profileclean

 # clean binaries and objects
 objclean:
-	@rm -f $(EXE) *.o ./syzygy/*.o ./learn/*.o ./extra/*.o ./eval/*.o ./eval/nnue/*.o ./eval/nnue/features/*.o
+	@rm -f $(EXE) *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o

 # clean auxiliary profiling files
 profileclean:
 	@rm -rf profdir
-	@rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./learn/*.gcda ./extra/*.gcda ./eval/*.gcda ./eval/nnue/*.gcda ./eval/nnue/features/*.gcda
+	@rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda
 	@rm -f stockfish.profdata *.profraw

 default:
@ -663,19 +649,6 @@ icc-profile-use:
 	EXTRACXXFLAGS='-prof_use -prof_dir ./profdir' \
 	all

-nnue: config-sanity
-	$(MAKE) CXXFLAGS='$(CXXFLAGS) -DEVAL_NNUE -DENABLE_TEST_CMD -fopenmp' LDFLAGS='$(LDFLAGS) -fopenmp' build
-
-profile-nnue: export NNUECXXFLAGS = -DEVAL_NNUE -DENABLE_TEST_CMD
-profile-nnue: config-sanity
-	$(MAKE) profile-build
-
-nnue-gen-sfen-from-original-eval: config-sanity
-	$(MAKE) CXXFLAGS='$(CXXFLAGS) -DEVAL_LEARN -DUSE_EVAL_HASH -DENABLE_TEST_CMD -fopenmp' LDFLAGS='$(LDFLAGS) -fopenmp' build
-
-nnue-learn: config-sanity
-	$(MAKE) CXXFLAGS='$(CXXFLAGS) -DEVAL_LEARN -DEVAL_NNUE -DUSE_EVAL_HASH -DENABLE_TEST_CMD -DUSE_BLAS -I/mingw64/include/OpenBLAS -fopenmp' LDFLAGS='$(LDFLAGS) -lopenblas -fopenmp' build
-
 .depend:
 	-@$(CXX) $(DEPENDFLAGS) -MM $(OBJS:.o=.cpp) > $@ 2> /dev/null

--- a/src/endgame.cpp
+++ b/src/endgame.cpp
@ -589,8 +589,8 @@ ScaleFactor Endgame<KPsK>::operator()(const Position& pos) const {
  Bitboard strongPawns = pos.pieces(strongSide, PAWN);

  // If all pawns are ahead of the king on a single rook file, it's a draw.
-  if (!((strongPawns & ~FileABB) || (strongPawns & ~FileHBB)) &&
-      !(strongPawns & ~passed_pawn_span(weakSide, weakKing)))
+  if (   !(strongPawns & ~(FileABB | FileHBB))
+      && !(strongPawns & ~passed_pawn_span(weakSide, weakKing)))
      return SCALE_FACTOR_DRAW;

  return SCALE_FACTOR_NONE;
--- a/src/eval/evaluate_common.h
+++ b/src/eval/evaluate_common.h
@ -1,82 +0,0 @@
-#ifndef _EVALUATE_COMMON_H_
-#define _EVALUATE_COMMON_H_
-
-// A common header-like function for modern evaluation functions (EVAL_KPPT and EVAL_KPP_KKPT).
-
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-#include <functional>
-
-// KK file name
-#define KK_BIN "KK_synthesized.bin"
-
-// KKP file name
-#define KKP_BIN "KKP_synthesized.bin"
-
-// KPP file name
-#define KPP_BIN "KPP_synthesized.bin"
-
-namespace Eval
-{
-
-#if defined(USE_EVAL_HASH)
-	// prefetch function
-	void prefetch_evalhash(const Key key);
-#endif
-
-	// An operator that applies the function f to each parameter of the evaluation function.
-	// Used for parameter analysis etc.
-	// type indicates the survey target.
-	// type = -1 :KK,KKP,KPP all
-	// type = 0: KK only
-	// type = 1: KKP only
-	// type = 2: KPP only
-	void foreach_eval_param(std::function<void(int32_t, int32_t)>f, int type = -1);
-
-	// --------------------------
-	// for learning
-	// --------------------------
-
-#if defined(EVAL_LEARN)
-	// Initialize the gradient array during learning
-	// Pass the learning rate as an argument. If 0.0, the default value is used.
-	// The epoch of update_weights() gradually changes from eta to eta2 until eta_epoch.
-	// After eta2_epoch, gradually change from eta2 to eta3.
-	void init_grad(double eta1, uint64_t eta_epoch, double eta2, uint64_t eta2_epoch, double eta3);
-
-	// Add the gradient difference value to the gradient array for all features that appear in the current phase.
-	// freeze[0]: Flag that kk does not learn
-	// freeze[1]: Flag that kkp does not learn
-	// freeze[2]: Flag that kpp does not learn
-	// freeze[3]: Flag that kppp does not learn
-	void add_grad(Position& pos, Color rootColor, double delt_grad, const std::array<bool, 4>& freeze);
-
-	// Do SGD or AdaGrad or something based on the current gradient.
-	// epoch: Generation counter (starting from 0)
-	// freeze[0]: Flag that kk does not learn
-	// freeze[1]: Flag that kkp does not learn
-	// freeze[2]: Flag that kpp does not learn
-	// freeze[3]: Flag that kppp does not learn
-	void update_weights(uint64_t epoch, const std::array<bool, 4>& freeze);
-
-	// Save the evaluation function parameters to a file.
-	// You can specify the extension added to the end of the file.
-	void save_eval(std::string suffix);
-
-	// Get the current eta.
-	double get_eta();
-
-	// --learning related commands
-
-	// A function that normalizes KK. Note that it is not completely equivalent to the original evaluation function.
-	// By making the values of kkp and kpp as close to zero as possible, the value of the feature factor (which is zero) that did not appear during learning
-	// The idea of ensuring it is valid.
-	void regularize_kk();
-
-#endif
-
-
-}
-
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
-#endif // _EVALUATE_KPPT_COMMON_H_
--- a/src/eval/evaluate_mir_inv_tools.cpp
+++ b/src/eval/evaluate_mir_inv_tools.cpp
@ -1,190 +0,0 @@
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
-#include "evaluate_mir_inv_tools.h"
-
-namespace Eval
-{
-
-	// --- tables
-
-	// Value when a certain BonaPiece is seen from the other side
-	// BONA_PIECE_INIT is -1, so it must be a signed type.
-	// Even if KPPT is expanded, BonaPiece will not exceed 2^15 for the time being, so int16_t is good.
-	int16_t inv_piece_[Eval::fe_end];
-
-	// Returns the one at the position where a BonaPiece on the board is mirrored.
-	int16_t mir_piece_[Eval::fe_end];
-
-
-	// --- methods
-
-// Returns the value when a certain BonaPiece is seen from the other side
-	Eval::BonaPiece inv_piece(Eval::BonaPiece p) { return (Eval::BonaPiece)inv_piece_[p]; }
-
-	// Returns the one at the position where a BonaPiece on the board is mirrored.
-	Eval::BonaPiece mir_piece(Eval::BonaPiece p) { return (Eval::BonaPiece)mir_piece_[p]; }
-
-	std::function<void()> mir_piece_init_function;
-
-	void init_mir_inv_tables()
-	{
-		// Initialize the mirror and inverse tables.
-
-		// Initialization is limited to once.
-		static bool first = true;
-		if (!first) return;
-		first = false;
-
-		// exchange f and e
-		int t[] = {
-			f_pawn             , e_pawn            ,
-			f_knight           , e_knight          ,
-			f_bishop           , e_bishop          ,
-			f_rook             , e_rook            ,
-			f_queen            , e_queen           ,
-		};
-
-		// Insert uninitialized value.
-		for (BonaPiece p = BONA_PIECE_ZERO; p < fe_end; ++p)
-		{
-			inv_piece_[p] = BONA_PIECE_NOT_INIT;
-
-			// mirror does not work for hand pieces. Just return the original value.
-			mir_piece_[p] = (p < f_pawn) ? p : BONA_PIECE_NOT_INIT;
-		}
-
-		for (BonaPiece p = BONA_PIECE_ZERO; p < fe_end; ++p)
-		{
-			for (int i = 0; i < 32 /* t.size() */; i += 2)
-			{
-				if (t[i] <= p && p < t[i + 1])
-				{
-					Square sq = (Square)(p - t[i]);
-
-					// found!!
-					BonaPiece q = (p < fe_hand_end) ? BonaPiece(sq + t[i + 1]) : (BonaPiece)(Inv(sq) + t[i + 1]);
-					inv_piece_[p] = q;
-					inv_piece_[q] = p;
-
-					/*
-					It's a bit tricky, but regarding p
-										p >= fe_hand_end
-										When.
-
-					For this p, let n be an integer (i in the above code can only be an even number),
-					a) When t[2n + 0] <= p <t[2n + 1], the first piece
-					b) When t[2n + 1] <= p <t[2n + 2], the back piece
-					Is.
-
-					Therefore, if p in the range of a) is set to q = Inv(p-t[2n+0]) + t[2n+1], it becomes the back piece in the box rotated 180 degrees.
-					So inv_piece[] is initialized by swapping p and q.
-					*/
-
-					// There is no mirror for hand pieces.
-					if (p < fe_hand_end)
-						continue;
-
-					BonaPiece r1 = (BonaPiece)(Mir(sq) + t[i]);
-					mir_piece_[p] = r1;
-					mir_piece_[r1] = p;
-
-					BonaPiece p2 = (BonaPiece)(sq + t[i + 1]);
-					BonaPiece r2 = (BonaPiece)(Mir(sq) + t[i + 1]);
-					mir_piece_[p2] = r2;
-					mir_piece_[r2] = p2;
-
-					break;
-				}
-			}
-		}
-
-		if (mir_piece_init_function)
-			mir_piece_init_function();
-
-		for (BonaPiece p = BONA_PIECE_ZERO; p < fe_end; ++p)
-		{
-			// It remains uninitialized. The initialization code in the table above is incorrect.
-			assert(mir_piece_[p] != BONA_PIECE_NOT_INIT && mir_piece_[p] < fe_end);
-			assert(inv_piece_[p] != BONA_PIECE_NOT_INIT && inv_piece_[p] < fe_end);
-
-			// mir and inv return to their original coordinates after being applied twice.
-			assert(mir_piece_[mir_piece_[p]] == p);
-			assert(inv_piece_[inv_piece_[p]] == p);
-
-			// mir->inv->mir->inv must be the original location.
-			assert(p == inv_piece(mir_piece(inv_piece(mir_piece(p)))));
-
-			// inv->mir->inv->mir must be the original location.
-			assert(p == mir_piece(inv_piece(mir_piece(inv_piece(p)))));
-		}
-
-#if 0
-		// Pre-verification that it is okay to mirror the evaluation function
-		// When writing a value, there is an assertion, so if you can't mirror it,
-		// Should get caught in the assert.
-
-		// Apery's WCSC26 evaluation function, kpp p1==0 or p1==20 (0th step on the back)
-		// There is dust in it, and if you don't avoid it, it will get caught in the assert.
-
-		std::unordered_set<BonaPiece> s;
-		vector<int> a = {
-			f_hand_pawn - 1,e_hand_pawn - 1,
-			f_hand_lance - 1, e_hand_lance - 1,
-			f_hand_knight - 1, e_hand_knight - 1,
-			f_hand_silver - 1, e_hand_silver - 1,
-			f_hand_gold - 1, e_hand_gold - 1,
-			f_hand_bishop - 1, e_hand_bishop - 1,
-			f_hand_rook - 1, e_hand_rook - 1,
-		};
-		for (auto b : a)
-			s.insert((BonaPiece)b);
-
-		// Excludes walks, incense, and katsura on the board that do not appear further (Apery also contains garbage here)
-		for (Rank r = RANK_1; r <= RANK_2; ++r)
-			for (File f = FILE_1; f <= FILE_9; ++f)
-			{
-				if (r == RANK_1)
-				{
-					// first step
-					BonaPiece b1 = BonaPiece(f_pawn + (f | r));
-					s.insert(b1);
-					s.insert(inv_piece[b1]);
-
-					// 1st stage incense
-					BonaPiece b2 = BonaPiece(f_lance + (f | r));
-					s.insert(b2);
-					s.insert(inv_piece[b2]);
-				}
-
-				// Katsura on the 1st and 2nd steps
-				BonaPiece b = BonaPiece(f_knight + (f | r));
-				s.insert(b);
-				s.insert(inv_piece[b]);
-			}
-
-		cout << "\nchecking kpp_write()..";
-		for (auto sq : SQ)
-		{
-			cout << sq << ' ';
-			for (BonaPiece p1 = BONA_PIECE_ZERO; p1 < fe_end; ++p1)
-				for (BonaPiece p2 = BONA_PIECE_ZERO; p2 < fe_end; ++p2)
-					if (!s.count(p1) && !s.count(p2))
-						kpp_write(sq, p1, p2, kpp[sq][p1][p2]);
-		}
-		cout << "\nchecking kkp_write()..";
-
-		for (auto sq1 : SQ)
-		{
-			cout << sq1 << ' ';
-			for (auto sq2 : SQ)
-				for (BonaPiece p1 = BONA_PIECE_ZERO; p1 < fe_end; ++p1)
-					if (!s.count(p1))
-						kkp_write(sq1, sq2, p1, kkp[sq1][sq2][p1]);
-		}
-		cout << "..done!" << endl;
-#endif
-	}
-
-}
-
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
--- a/src/eval/evaluate_mir_inv_tools.h
+++ b/src/eval/evaluate_mir_inv_tools.h
@ -1,47 +0,0 @@
-#ifndef _EVALUATE_MIR_INV_TOOLS_
-#define _EVALUATE_MIR_INV_TOOLS_
-
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
-// BonaPiece's mirror (horizontal flip) and inverse (180° on the board) tools to get pieces.
-
-#include "../types.h"
-#include "../evaluate.h"
-#include <functional>
-
-namespace Eval
-{
-	// -------------------------------------------------
-	//                  tables
-	// -------------------------------------------------
-
-	// --- Provide Mirror and Inverse to BonaPiece.
-
-	// These arrays are initialized by calling init() or init_mir_inv_tables();.
-	// If you want to use only this table from the evaluation function,
-	// Call init_mir_inv_tables().
-	// These arrays are referenced from the KK/KKP/KPP classes below.
-
-	// Returns the value when a certain BonaPiece is seen from the other side
-	extern Eval::BonaPiece inv_piece(Eval::BonaPiece p);
-
-	// Returns the one at the position where a BonaPiece on the board is mirrored.
-	extern Eval::BonaPiece mir_piece(Eval::BonaPiece p);
-
-
-	// callback called when initializing mir_piece/inv_piece
-	// Used when extending fe_end on the user side.
-	// Inv_piece_ and inv_piece_ are exposed because they are necessary for this initialization.
-	// At the timing when mir_piece_init_function is called, until fe_old_end
-	// It is guaranteed that these tables have been initialized.
-	extern std::function<void()> mir_piece_init_function;
-	extern int16_t mir_piece_[Eval::fe_end];
-	extern int16_t inv_piece_[Eval::fe_end];
-
-	// The table above will be initialized when you call this function explicitly or call init().
-	extern void init_mir_inv_tables();
-}
-
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
-#endif
--- a/src/eval/nnue/architectures/halfkp-cr-ep_256x2-32-32.h
+++ b/src/eval/nnue/architectures/halfkp-cr-ep_256x2-32-32.h
@ -1,42 +0,0 @@
-// Definition of input features and network structure used in NNUE evaluation function
-
-#ifndef HALFKP_CR_EP_256X2_32_32_H
-#define HALFKP_CR_EP_256X2_32_32_H
-
-#include "../features/feature_set.h"
-#include "../features/half_kp.h"
-#include "../features/castling_right.h"
-#include "../features/enpassant.h"
-
-#include "../layers/input_slice.h"
-#include "../layers/affine_transform.h"
-#include "../layers/clipped_relu.h"
-
-namespace Eval {
-
-  namespace NNUE {
-
-    // Input features used in evaluation function
-    using RawFeatures = Features::FeatureSet<
-      Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight,
-      Features::EnPassant>;
-
-    // Number of input feature dimensions after conversion
-    constexpr IndexType kTransformedFeatureDimensions = 256;
-
-    namespace Layers {
-
-      // define network structure
-      using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
-      using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
-      using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
-      using OutputLayer = AffineTransform<HiddenLayer2, 1>;
-
-    }  // namespace Layers
-
-    using Network = Layers::OutputLayer;
-
-  }  // namespace NNUE
-
-}  // namespace Eval
-#endif // HALFKP_CR_EP_256X2_32_32_H
--- a/src/eval/nnue/architectures/k-p-cr-ep_256x2-32-32.h
+++ b/src/eval/nnue/architectures/k-p-cr-ep_256x2-32-32.h
@ -1,42 +0,0 @@
-// Definition of input features and network structure used in NNUE evaluation function
-
-#ifndef K_P_CR_EP_256X2_32_32_H
-#define K_P_CR_EP_256X2_32_32_H
-
-#include "../features/feature_set.h"
-#include "../features/k.h"
-#include "../features/p.h"
-#include "../features/castling_right.h"
-#include "../features/enpassant.h"
-
-#include "../layers/input_slice.h"
-#include "../layers/affine_transform.h"
-#include "../layers/clipped_relu.h"
-
-namespace Eval {
-
-  namespace NNUE {
-
-    // Input features used in evaluation function
-    using RawFeatures = Features::FeatureSet<Features::K, Features::P,
-      Features::CastlingRight, Features::EnPassant>;
-
-    // Number of input feature dimensions after conversion
-    constexpr IndexType kTransformedFeatureDimensions = 256;
-
-    namespace Layers {
-
-      // define network structure
-      using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
-      using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
-      using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
-      using OutputLayer = AffineTransform<HiddenLayer2, 1>;
-
-    }  // namespace Layers
-
-    using Network = Layers::OutputLayer;
-
-  }  // namespace NNUE
-
-}  // namespace Eval
-#endif // K_P_CR_EP_256X2_32_32_H
--- a/src/eval/nnue/architectures/k-p-cr_256x2-32-32.h
+++ b/src/eval/nnue/architectures/k-p-cr_256x2-32-32.h
@ -1,41 +0,0 @@
-// Definition of input features and network structure used in NNUE evaluation function
-
-#ifndef K_P_CR_256X2_32_32_H
-#define K_P_CR_256X2_32_32_H
-
-#include "../features/feature_set.h"
-#include "../features/k.h"
-#include "../features/p.h"
-#include "../features/castling_right.h"
-
-#include "../layers/input_slice.h"
-#include "../layers/affine_transform.h"
-#include "../layers/clipped_relu.h"
-
-namespace Eval {
-
-  namespace NNUE {
-
-    // Input features used in evaluation function
-    using RawFeatures = Features::FeatureSet<Features::K, Features::P,
-      Features::CastlingRight>;
-
-    // Number of input feature dimensions after conversion
-    constexpr IndexType kTransformedFeatureDimensions = 256;
-
-    namespace Layers {
-
-      // define network structure
-      using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
-      using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
-      using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
-      using OutputLayer = AffineTransform<HiddenLayer2, 1>;
-
-    }  // namespace Layers
-
-    using Network = Layers::OutputLayer;
-
-  }  // namespace NNUE
-
-}  // namespace Eval
-#endif // K_P_CR_256X2_32_32_H
--- a/src/eval/nnue/architectures/k-p_256x2-32-32.h
+++ b/src/eval/nnue/architectures/k-p_256x2-32-32.h
@ -1,38 +0,0 @@
-// Definition of input features and network structure used in NNUE evaluation function
-#ifndef K_P_256X2_32_32_H
-#define K_P_256X2_32_32_H
-
-#include "../features/feature_set.h"
-#include "../features/k.h"
-#include "../features/p.h"
-
-#include "../layers/input_slice.h"
-#include "../layers/affine_transform.h"
-#include "../layers/clipped_relu.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-// Input features used in evaluation function
-using RawFeatures = Features::FeatureSet<Features::K, Features::P>;
-
-// Number of input feature dimensions after conversion
-constexpr IndexType kTransformedFeatureDimensions = 256;
-
-namespace Layers {
-
-// define network structure
-using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
-using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
-using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
-using OutputLayer = AffineTransform<HiddenLayer2, 1>;
-
-}  // namespace Layers
-
-using Network = Layers::OutputLayer;
-
-}  // namespace NNUE
-
-}  // namespace Eval
-#endif // K_P_256X2_32_32_H
--- a/src/eval/nnue/evaluate_nnue.cpp
+++ b/src/eval/nnue/evaluate_nnue.cpp
@ -1,326 +0,0 @@
-// Code for calculating NNUE evaluation function
-
-#if defined(EVAL_NNUE)
-
-#include <fstream>
-#include <iostream>
-
-#include "../../evaluate.h"
-#include "../../position.h"
-#include "../../misc.h"
-#include "../../uci.h"
-
-#include "evaluate_nnue.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-// Input feature converter
-AlignedPtr<FeatureTransformer> feature_transformer;
-
-// Evaluation function
-AlignedPtr<Network> network;
-
-// Evaluation function file name
-std::string fileName = "nn.bin";
-
-// Saved evaluation function file name
-std::string savedfileName = "nn.bin";
-
-// Get a string that represents the structure of the evaluation function
-std::string GetArchitectureString() {
-  return "Features=" + FeatureTransformer::GetStructureString() +
-      ",Network=" + Network::GetStructureString();
-}
-
-namespace {
-
-namespace Detail {
-
-// Initialize the evaluation function parameters
-template <typename T>
-void Initialize(AlignedPtr<T>& pointer) {
-  pointer.reset(reinterpret_cast<T*>(aligned_malloc(sizeof(T), alignof(T))));
-  std::memset(pointer.get(), 0, sizeof(T));
-}
-
-// read evaluation function parameters
-template <typename T>
-bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
-  std::uint32_t header;
-  stream.read(reinterpret_cast<char*>(&header), sizeof(header));
-  if (!stream || header != T::GetHashValue()) return false;
-  return pointer->ReadParameters(stream);
-}
-
-// write evaluation function parameters
-template <typename T>
-bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
-  constexpr std::uint32_t header = T::GetHashValue();
-  stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
-  return pointer->WriteParameters(stream);
-}
-
-}  // namespace Detail
-
-// Initialize the evaluation function parameters
-void Initialize() {
-  Detail::Initialize(feature_transformer);
-  Detail::Initialize(network);
-}
-
-}  // namespace
-
-// read the header
-bool ReadHeader(std::istream& stream,
-  std::uint32_t* hash_value, std::string* architecture) {
-  std::uint32_t version, size;
-  stream.read(reinterpret_cast<char*>(&version), sizeof(version));
-  stream.read(reinterpret_cast<char*>(hash_value), sizeof(*hash_value));
-  stream.read(reinterpret_cast<char*>(&size), sizeof(size));
-  if (!stream || version != kVersion) return false;
-  architecture->resize(size);
-  stream.read(&(*architecture)[0], size);
-  return !stream.fail();
-}
-
-// write the header
-bool WriteHeader(std::ostream& stream,
-  std::uint32_t hash_value, const std::string& architecture) {
-  stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
-  stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
-  const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
-  stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
-  stream.write(architecture.data(), size);
-  return !stream.fail();
-}
-
-// read evaluation function parameters
-bool ReadParameters(std::istream& stream) {
-  std::uint32_t hash_value;
-  std::string architecture;
-  if (!ReadHeader(stream, &hash_value, &architecture)) return false;
-  if (hash_value != kHashValue) return false;
-  if (!Detail::ReadParameters(stream, feature_transformer)) return false;
-  if (!Detail::ReadParameters(stream, network)) return false;
-  return stream && stream.peek() == std::ios::traits_type::eof();
-}
-
-// write evaluation function parameters
-bool WriteParameters(std::ostream& stream) {
-  if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false;
-  if (!Detail::WriteParameters(stream, feature_transformer)) return false;
-  if (!Detail::WriteParameters(stream, network)) return false;
-  return !stream.fail();
-}
-
-// proceed if you can calculate the difference
-static void UpdateAccumulatorIfPossible(const Position& pos) {
-  feature_transformer->UpdateAccumulatorIfPossible(pos);
-}
-
-// Calculate the evaluation value
-static Value ComputeScore(const Position& pos, bool refresh = false) {
-  auto& accumulator = pos.state()->accumulator;
-  if (!refresh && accumulator.computed_score) {
-    return accumulator.score;
-  }
-
-  alignas(kCacheLineSize) TransformedFeatureType
-      transformed_features[FeatureTransformer::kBufferSize];
-  feature_transformer->Transform(pos, transformed_features, refresh);
-  alignas(kCacheLineSize) char buffer[Network::kBufferSize];
-  const auto output = network->Propagate(transformed_features, buffer);
-
-  // When a value larger than VALUE_MAX_EVAL is returned, aspiration search fails high
-  // It should be guaranteed that it is less than VALUE_MAX_EVAL because the search will not end.
-
-  // Even if this phenomenon occurs, if the seconds are fixed when playing, the search will be aborted there, so
-  // The best move in the previous iteration is pointed to as bestmove, so apparently
-  // no problem. The situation in which this VALUE_MAX_EVAL is returned is almost at a dead end,
-  // Since such a jamming phase often appears at the end, there is a big difference in the situation
-  // Doesn't really affect the outcome.
-
-  // However, when searching with a fixed depth such as when creating a teacher, it will not return from the search
-  // Waste the computation time for that thread. Also, it will be timed out with fixed depth game.
-
-  auto score = static_cast<Value>(output[0] / FV_SCALE);
-
-  // 1) I feel that if I clip too poorly, it will have an effect on my learning...
-  // 2) Since accumulator.score is not used at the time of difference calculation, it can be rewritten without any problem.
-  score = Math::clamp(score , -VALUE_MAX_EVAL , VALUE_MAX_EVAL);
-
-  accumulator.score = score;
-  accumulator.computed_score = true;
-  return accumulator.score;
-}
-
-} // namespace NNUE
-
-#if defined(USE_EVAL_HASH)
-// Class used to store evaluation values in HashTable
-struct alignas(16) ScoreKeyValue {
-#if defined(USE_SSE2)
-  ScoreKeyValue() = default;
-  ScoreKeyValue(const ScoreKeyValue& other) {
-    static_assert(sizeof(ScoreKeyValue) == sizeof(__m128i),
-                  "sizeof(ScoreKeyValue) should be equal to sizeof(__m128i)");
-    _mm_store_si128(&as_m128i, other.as_m128i);
-  }
-  ScoreKeyValue& operator=(const ScoreKeyValue& other) {
-    _mm_store_si128(&as_m128i, other.as_m128i);
-    return *this;
-  }
-#endif
-
-  // It is necessary to be able to operate atomically with evaluate hash, so the manipulator for that
-  void encode() {
-#if defined(USE_SSE2)
-    // ScoreKeyValue is copied to atomic, so if the key matches, the data matches.
-#else
-    key ^= score;
-#endif
-  }
-  // decode() is the reverse conversion of encode(), but since it is xor, the reverse conversion is the same.
-  void decode() { encode(); }
-
-  union {
-    struct {
-      std::uint64_t key;
-      std::uint64_t score;
-    };
-#if defined(USE_SSE2)
-    __m128i as_m128i;
-#endif
-  };
-};
-
-// Simple HashTable implementation.
-// Size is a power of 2.
-template <typename T, size_t Size>
-struct HashTable {
-  HashTable() { clear(); }
-  T* operator [] (const Key k) { return entries_ + (static_cast<size_t>(k) & (Size - 1)); }
-  void clear() { memset(entries_, 0, sizeof(T)*Size); }
-
-  // Check that Size is a power of 2
-  static_assert((Size & (Size - 1)) == 0, "");
-
- private:
-  T entries_[Size];
-};
-
-//HashTable to save the evaluated ones (following ehash)
-
-#if !defined(USE_LARGE_EVAL_HASH)
-// 134MB (setting other than witch's AVX2)
-struct EvaluateHashTable : HashTable<ScoreKeyValue, 0x800000> {};
-#else
-// If you have prefetch, it's better to have a big one...
-// → It doesn't change much and the memory is wasteful, so is it okay to set ↑ by default?
-// 1GB (setting for witch's AVX2)
-struct EvaluateHashTable : HashTable<ScoreKeyValue, 0x4000000> {};
-#endif
-
-EvaluateHashTable g_evalTable;
-
-// Prepare a function to prefetch.
-void prefetch_evalhash(const Key key) {
-  constexpr auto mask = ~((uint64_t)0x1f);
-  prefetch((void*)((uint64_t)g_evalTable[key] & mask));
-}
-#endif
-
-// read the evaluation function file
-// Save and restore Options with bench command etc., so EvalDir is changed at this time,
-// This function may be called twice to flag that the evaluation function needs to be reloaded.
-void load_eval() {
-
-  // Must be done!
-  NNUE::Initialize();
-
-  if (Options["SkipLoadingEval"])
-  {
-      std::cout << "info string SkipLoadingEval set to true, Net not loaded!" << std::endl;
-      return;
-  }
-
-  const std::string file_name = Options["EvalFile"];
-  NNUE::fileName = file_name;
-
-  std::ifstream stream(file_name, std::ios::binary);
-  const bool result = NNUE::ReadParameters(stream);
-
-  if (!result)
-      // It's a problem if it doesn't finish when there is a read error.
-      std::cout << "Error! " << NNUE::fileName << " not found or wrong format" << std::endl;
-
-  else
-      std::cout << "info string NNUE " << NNUE::fileName << " found & loaded" << std::endl;
-}
-
-// Initialization
-void init() {
-}
-
-// Evaluation function. Perform full calculation instead of difference calculation.
-// Called only once with Position::set(). (The difference calculation after that)
-// Note that the evaluation value seen from the turn side is returned. (Design differs from other evaluation functions in this respect)
-// Since, we will not try to optimize this function.
-Value compute_eval(const Position& pos) {
-  return NNUE::ComputeScore(pos, true);
-}
-
-// Evaluation function
-Value evaluate(const Position& pos) {
-  const auto& accumulator = pos.state()->accumulator;
-  if (accumulator.computed_score) {
-    return accumulator.score;
-  }
-
-#if defined(USE_GLOBAL_OPTIONS)
-  // If Global Options is set not to use eval hash
-  // Skip the query to the eval hash.
-  if (!GlobalOptions.use_eval_hash) {
-    ASSERT_LV5(pos.state()->materialValue == Eval::material(pos));
-    return NNUE::ComputeScore(pos);
-  }
-#endif
-
-#if defined(USE_EVAL_HASH)
-  // May be in the evaluate hash table.
-  const Key key = pos.key();
-  ScoreKeyValue entry = *g_evalTable[key];
-  entry.decode();
-  if (entry.key == key) {
-    // there were!
-    return Value(entry.score);
-  }
-#endif
-
-  Value score = NNUE::ComputeScore(pos);
-#if defined(USE_EVAL_HASH)
-  // Since it was calculated carefully, save it in the evaluate hash table.
-  entry.key = key;
-  entry.score = score;
-  entry.encode();
-  *g_evalTable[key] = entry;
-#endif
-
-  return score;
-}
-
-// proceed if you can calculate the difference
-void evaluate_with_no_return(const Position& pos) {
-  NNUE::UpdateAccumulatorIfPossible(pos);
-}
-
-// display the breakdown of the evaluation value of the current phase
-void print_eval_stat(Position& /*pos*/) {
-  std::cout << "--- EVAL STAT: not implemented" << std::endl;
-}
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
--- a/src/eval/nnue/evaluate_nnue.h
+++ b/src/eval/nnue/evaluate_nnue.h
@ -1,67 +0,0 @@
-// header used in NNUE evaluation function
-
-#ifndef _EVALUATE_NNUE_H_
-#define _EVALUATE_NNUE_H_
-
-#if defined(EVAL_NNUE)
-
-#include "nnue_feature_transformer.h"
-#include "nnue_architecture.h"
-
-#include <memory>
-
-namespace Eval {
-
-namespace NNUE {
-
-// hash value of evaluation function structure
-constexpr std::uint32_t kHashValue =
-    FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
-
-// Deleter for automating release of memory area
-template <typename T>
-struct AlignedDeleter {
-  void operator()(T* ptr) const {
-    ptr->~T();
-    aligned_free(ptr);
-  }
-};
-template <typename T>
-using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
-
-// Input feature converter
-extern AlignedPtr<FeatureTransformer> feature_transformer;
-
-// Evaluation function
-extern AlignedPtr<Network> network;
-
-// Evaluation function file name
-extern std::string fileName;
-
-// Saved evaluation function file name
-extern std::string savedfileName;
-
-// Get a string that represents the structure of the evaluation function
-std::string GetArchitectureString();
-
-// read the header
-bool ReadHeader(std::istream& stream,
-    std::uint32_t* hash_value, std::string* architecture);
-
-// write the header
-bool WriteHeader(std::ostream& stream,
-    std::uint32_t hash_value, const std::string& architecture);
-
-// read evaluation function parameters
-bool ReadParameters(std::istream& stream);
-
-// write evaluation function parameters
-bool WriteParameters(std::ostream& stream);
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/evaluate_nnue_learner.cpp
+++ b/src/eval/nnue/evaluate_nnue_learner.cpp
@ -1,231 +0,0 @@
-// Code for learning NNUE evaluation function
-
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#include <random>
-#include <fstream>
-
-#include "../../learn/learn.h"
-#include "../../learn/learning_tools.h"
-
-#include "../../position.h"
-#include "../../uci.h"
-#include "../../misc.h"
-#include "../../thread_win32_osx.h"
-
-#include "../evaluate_common.h"
-
-#include "evaluate_nnue.h"
-#include "evaluate_nnue_learner.h"
-#include "trainer/features/factorizer_feature_set.h"
-#include "trainer/features/factorizer_half_kp.h"
-#include "trainer/trainer_feature_transformer.h"
-#include "trainer/trainer_input_slice.h"
-#include "trainer/trainer_affine_transform.h"
-#include "trainer/trainer_clipped_relu.h"
-#include "trainer/trainer_sum.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace {
-
-// learning data
-std::vector<Example> examples;
-
-// Mutex for exclusive control of examples
-std::mutex examples_mutex;
-
-// number of samples in mini-batch
-uint64_t batch_size;
-
-// random number generator
-std::mt19937 rng;
-
-// learner
-std::shared_ptr<Trainer<Network>> trainer;
-
-// Learning rate scale
-double global_learning_rate_scale;
-
-// Get the learning rate scale
-double GetGlobalLearningRateScale() {
-  return global_learning_rate_scale;
-}
-
-// Tell the learner options such as hyperparameters
-void SendMessages(std::vector<Message> messages) {
-  for (auto& message : messages) {
-    trainer->SendMessage(&message);
-    assert(message.num_receivers > 0);
-  }
-}
-
-}  // namespace
-
-// Initialize learning
-void InitializeTraining(double eta1, uint64_t eta1_epoch,
-                        double eta2, uint64_t eta2_epoch, double eta3) {
-  std::cout << "Initializing NN training for "
-            << GetArchitectureString() << std::endl;
-
-  assert(feature_transformer);
-  assert(network);
-  trainer = Trainer<Network>::Create(network.get(), feature_transformer.get());
-
-  if (Options["SkipLoadingEval"]) {
-    trainer->Initialize(rng);
-  }
-
-  global_learning_rate_scale = 1.0;
-  EvalLearningTools::Weight::init_eta(eta1, eta2, eta3, eta1_epoch, eta2_epoch);
-}
-
-// set the number of samples in the mini-batch
-void SetBatchSize(uint64_t size) {
-  assert(size > 0);
-  batch_size = size;
-}
-
-// set the learning rate scale
-void SetGlobalLearningRateScale(double scale) {
-  global_learning_rate_scale = scale;
-}
-
-// Set options such as hyperparameters
-void SetOptions(const std::string& options) {
-  std::vector<Message> messages;
-  for (const auto& option : Split(options, ',')) {
-    const auto fields = Split(option, '=');
-    assert(fields.size() == 1 || fields.size() == 2);
-    if (fields.size() == 1) {
-      messages.emplace_back(fields[0]);
-    } else {
-      messages.emplace_back(fields[0], fields[1]);
-    }
-  }
-  SendMessages(std::move(messages));
-}
-
-// Reread the evaluation function parameters for learning from the file
-void RestoreParameters(const std::string& dir_name) {
-  const std::string file_name = Path::Combine(dir_name, NNUE::savedfileName);
-  std::ifstream stream(file_name, std::ios::binary);
-  bool result = ReadParameters(stream);
-  assert(result);
-
-  SendMessages({{"reset"}});
-}
-
-// Add 1 sample of learning data
-void AddExample(Position& pos, Color rootColor,
-                const Learner::PackedSfenValue& psv, double weight) {
-  Example example;
-  if (rootColor == pos.side_to_move()) {
-    example.sign = 1;
-  } else {
-    example.sign = -1;
-  }
-  example.psv = psv;
-  example.weight = weight;
-
-  Features::IndexList active_indices[2];
-  for (const auto trigger : kRefreshTriggers) {
-    RawFeatures::AppendActiveIndices(pos, trigger, active_indices);
-  }
-  if (pos.side_to_move() != WHITE) {
-    active_indices[0].swap(active_indices[1]);
-  }
-  for (const auto color : Colors) {
-    std::vector<TrainingFeature> training_features;
-    for (const auto base_index : active_indices[color]) {
-      static_assert(Features::Factorizer<RawFeatures>::GetDimensions() <
-                    (1 << TrainingFeature::kIndexBits), "");
-      Features::Factorizer<RawFeatures>::AppendTrainingFeatures(
-          base_index, &training_features);
-    }
-    std::sort(training_features.begin(), training_features.end());
-
-    auto& unique_features = example.training_features[color];
-    for (const auto& feature : training_features) {
-      if (!unique_features.empty() &&
-          feature.GetIndex() == unique_features.back().GetIndex()) {
-        unique_features.back() += feature;
-      } else {
-        unique_features.push_back(feature);
-      }
-    }
-  }
-
-  std::lock_guard<std::mutex> lock(examples_mutex);
-  examples.push_back(std::move(example));
-}
-
-// update the evaluation function parameters
-void UpdateParameters(uint64_t epoch) {
-  assert(batch_size > 0);
-
-  EvalLearningTools::Weight::calc_eta(epoch);
-  const auto learning_rate = static_cast<LearnFloatType>(
-      get_eta() / batch_size);
-
-  std::lock_guard<std::mutex> lock(examples_mutex);
-  std::shuffle(examples.begin(), examples.end(), rng);
-  while (examples.size() >= batch_size) {
-    std::vector<Example> batch(examples.end() - batch_size, examples.end());
-    examples.resize(examples.size() - batch_size);
-
-    const auto network_output = trainer->Propagate(batch);
-
-    std::vector<LearnFloatType> gradients(batch.size());
-    for (std::size_t b = 0; b < batch.size(); ++b) {
-      const auto shallow = static_cast<Value>(Round<std::int32_t>(
-          batch[b].sign * network_output[b] * kPonanzaConstant));
-      const auto& psv = batch[b].psv;
-      const double gradient = batch[b].sign * Learner::calc_grad(shallow, psv);
-      gradients[b] = static_cast<LearnFloatType>(gradient * batch[b].weight);
-    }
-
-    trainer->Backpropagate(gradients.data(), learning_rate);
-  }
-  SendMessages({{"quantize_parameters"}});
-}
-
-// Check if there are any problems with learning
-void CheckHealth() {
-  SendMessages({{"check_health"}});
-}
-
-}  // namespace NNUE
-
-// save merit function parameters to a file
-void save_eval(std::string dir_name) {
-  auto eval_dir = Path::Combine(Options["EvalSaveDir"], dir_name);
-  std::cout << "save_eval() start. folder = " << eval_dir << std::endl;
-
-  // mkdir() will fail if this folder already exists, but
-  // Apart from that. If not, I just want you to make it.
-  // Also, assume that the folders up to EvalSaveDir have been dug.
-  Dependency::mkdir(eval_dir);
-
-  if (Options["SkipLoadingEval"] && NNUE::trainer) {
-    NNUE::SendMessages({{"clear_unobserved_feature_weights"}});
-  }
-
-  const std::string file_name = Path::Combine(eval_dir, NNUE::savedfileName);
-  std::ofstream stream(file_name, std::ios::binary);
-  const bool result = NNUE::WriteParameters(stream);
-  assert(result);
-
-  std::cout << "save_eval() finished. folder = " << eval_dir << std::endl;
-}
-
-// get the current eta
-double get_eta() {
-  return NNUE::GetGlobalLearningRateScale() * EvalLearningTools::Weight::eta;
-}
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
--- a/src/eval/nnue/evaluate_nnue_learner.h
+++ b/src/eval/nnue/evaluate_nnue_learner.h
@ -1,46 +0,0 @@
-// Interface used for learning NNUE evaluation function
-
-#ifndef _EVALUATE_NNUE_LEARNER_H_
-#define _EVALUATE_NNUE_LEARNER_H_
-
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#include "../../learn/learn.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-// Initialize learning
-void InitializeTraining(double eta1, uint64_t eta1_epoch,
-                        double eta2, uint64_t eta2_epoch, double eta3);
-
-// set the number of samples in the mini-batch
-void SetBatchSize(uint64_t size);
-
-// set the learning rate scale
-void SetGlobalLearningRateScale(double scale);
-
-// Set options such as hyperparameters
-void SetOptions(const std::string& options);
-
-// Reread the evaluation function parameters for learning from the file
-void RestoreParameters(const std::string& dir_name);
-
-// Add 1 sample of learning data
-void AddExample(Position& pos, Color rootColor,
-                const Learner::PackedSfenValue& psv, double weight);
-
-// update the evaluation function parameters
-void UpdateParameters(uint64_t epoch);
-
-// Check if there are any problems with learning
-void CheckHealth();
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/features/castling_right.cpp
+++ b/src/eval/nnue/features/castling_right.cpp
@ -1,73 +0,0 @@
-//Definition of input feature quantity K of NNUE evaluation function
-
-#if defined(EVAL_NNUE)
-
-#include "castling_right.h"
-#include "index_list.h"
-
-namespace Eval {
-
-  namespace NNUE {
-
-    namespace Features {
-
-      // Get a list of indices with a value of 1 among the features
-      void CastlingRight::AppendActiveIndices(
-        const Position& pos, Color perspective, IndexList* active) {
-        // do nothing if array size is small to avoid compiler warning
-        if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-
-        int castling_rights = pos.state()->castlingRights;
-        int relative_castling_rights;
-        if (perspective == WHITE) {
-          relative_castling_rights = castling_rights;
-        }
-        else {
-          // Invert the perspective.
-          relative_castling_rights = ((castling_rights & 3) << 2)
-            & ((castling_rights >> 2) & 3);
-        }
-
-        for (int i = 0; i <kDimensions; ++i) {
-          if (relative_castling_rights & (i << 1)) {
-            active->push_back(i);
-          }
-        }
-      }
-
-      // Get a list of indices whose values have changed from the previous one in the feature quantity
-      void CastlingRight::AppendChangedIndices(
-        const Position& pos, Color perspective,
-        IndexList* removed, IndexList* added) {
-
-        int previous_castling_rights = pos.state()->previous->castlingRights;
-        int current_castling_rights = pos.state()->castlingRights;
-        int relative_previous_castling_rights;
-        int relative_current_castling_rights;
-        if (perspective == WHITE) {
-          relative_previous_castling_rights = previous_castling_rights;
-          relative_current_castling_rights = current_castling_rights;
-        }
-        else {
-          // Invert the perspective.
-          relative_previous_castling_rights = ((previous_castling_rights & 3) << 2)
-            & ((previous_castling_rights >> 2) & 3);
-          relative_current_castling_rights = ((current_castling_rights & 3) << 2)
-            & ((current_castling_rights >> 2) & 3);
-        }
-
-        for (int i = 0; i < kDimensions; ++i) {
-          if ((relative_previous_castling_rights & (i << 1)) &&
-            (relative_current_castling_rights & (i << 1)) == 0) {
-            removed->push_back(i);
-          }
-        }
-      }
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
--- a/src/eval/nnue/features/castling_right.h
+++ b/src/eval/nnue/features/castling_right.h
@ -1,48 +0,0 @@
-//Definition of input feature quantity K of NNUE evaluation function
-
-#ifndef _NNUE_FEATURES_CASTLING_RIGHT_H_
-#define _NNUE_FEATURES_CASTLING_RIGHT_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../evaluate.h"
-#include "features_common.h"
-
-namespace Eval {
-
-  namespace NNUE {
-
-    namespace Features {
-
-      // Feature K: Ball position
-      class CastlingRight {
-      public:
-        // feature quantity name
-        static constexpr const char* kName = "CastlingRight";
-        // Hash value embedded in the evaluation function file
-        static constexpr std::uint32_t kHashValue = 0x913968AAu;
-        // number of feature dimensions
-        static constexpr IndexType kDimensions = 4;
-        // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-        static constexpr IndexType kMaxActiveDimensions = 4;
-        // Timing of full calculation instead of difference calculation
-        static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
-
-        // Get a list of indices with a value of 1 among the features
-        static void AppendActiveIndices(const Position& pos, Color perspective,
-          IndexList* active);
-
-        // Get a list of indices whose values ??have changed from the previous one in the feature quantity
-        static void AppendChangedIndices(const Position& pos, Color perspective,
-          IndexList* removed, IndexList* added);
-      };
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/features/enpassant.cpp
+++ b/src/eval/nnue/features/enpassant.cpp
@ -1,47 +0,0 @@
-//Definition of input feature quantity K of NNUE evaluation function
-
-#if defined(EVAL_NNUE)
-
-#include "enpassant.h"
-#include "index_list.h"
-
-namespace Eval {
-
-  namespace NNUE {
-
-    namespace Features {
-
-      // Get a list of indices with a value of 1 among the features
-      void EnPassant::AppendActiveIndices(
-        const Position& pos, Color perspective, IndexList* active) {
-        // do nothing if array size is small to avoid compiler warning
-        if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-
-        auto epSquare = pos.state()->epSquare;
-        if (epSquare == SQ_NONE) {
-          return;
-        }
-
-        if (perspective == BLACK) {
-          epSquare = Inv(epSquare);
-        }
-
-        auto file = file_of(epSquare);
-        active->push_back(file);
-      }
-
-      // Get a list of indices whose values ??have changed from the previous one in the feature quantity
-      void EnPassant::AppendChangedIndices(
-        const Position& pos, Color perspective,
-        IndexList* removed, IndexList* added) {
-        // Not implemented.
-        assert(false);
-      }
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
--- a/src/eval/nnue/features/enpassant.h
+++ b/src/eval/nnue/features/enpassant.h
@ -1,48 +0,0 @@
-//Definition of input feature quantity K of NNUE evaluation function
-
-#ifndef _NNUE_FEATURES_ENPASSANT_H_
-#define _NNUE_FEATURES_ENPASSANT_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../evaluate.h"
-#include "features_common.h"
-
-namespace Eval {
-
-  namespace NNUE {
-
-    namespace Features {
-
-      // Feature K: Ball position
-      class EnPassant {
-      public:
-        // feature quantity name
-        static constexpr const char* kName = "EnPassant";
-        // Hash value embedded in the evaluation function file
-        static constexpr std::uint32_t kHashValue = 0x02924F91u;
-        // number of feature dimensions
-        static constexpr IndexType kDimensions = 8;
-        // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-        static constexpr IndexType kMaxActiveDimensions = 1;
-        // Timing of full calculation instead of difference calculation
-        static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
-
-        // Get a list of indices with a value of 1 among the features
-        static void AppendActiveIndices(const Position& pos, Color perspective,
-          IndexList* active);
-
-        // Get a list of indices whose values ??have changed from the previous one in the feature quantity
-        static void AppendChangedIndices(const Position& pos, Color perspective,
-          IndexList* removed, IndexList* added);
-      };
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/features/feature_set.h
+++ b/src/eval/nnue/features/feature_set.h
@ -1,249 +0,0 @@
-// A class template that represents the input feature set of the NNUE evaluation function
-
-#ifndef _NNUE_FEATURE_SET_H_
-#define _NNUE_FEATURE_SET_H_
-
-#if defined(EVAL_NNUE)
-
-#include "features_common.h"
-#include <array>
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// A class template that represents a list of values
-template <typename T, T... Values>
-struct CompileTimeList;
-template <typename T, T First, T... Remaining>
-struct CompileTimeList<T, First, Remaining...> {
-  static constexpr bool Contains(T value) {
-    return value == First || CompileTimeList<T, Remaining...>::Contains(value);
-  }
-  static constexpr std::array<T, sizeof...(Remaining) + 1>
-      kValues = {{First, Remaining...}};
-};
-template <typename T, T First, T... Remaining>
-constexpr std::array<T, sizeof...(Remaining) + 1>
-    CompileTimeList<T, First, Remaining...>::kValues;
-template <typename T>
-struct CompileTimeList<T> {
-  static constexpr bool Contains(T /*value*/) {
-    return false;
-  }
-  static constexpr std::array<T, 0> kValues = {{}};
-};
-
-// Class template that adds to the beginning of the list
-template <typename T, typename ListType, T Value>
-struct AppendToList;
-template <typename T, T... Values, T AnotherValue>
-struct AppendToList<T, CompileTimeList<T, Values...>, AnotherValue> {
-  using Result = CompileTimeList<T, AnotherValue, Values...>;
-};
-
-// Class template for adding to a sorted, unique list
-template <typename T, typename ListType, T Value>
-struct InsertToSet;
-template <typename T, T First, T... Remaining, T AnotherValue>
-struct InsertToSet<T, CompileTimeList<T, First, Remaining...>, AnotherValue> {
-  using Result = std::conditional_t<
-      CompileTimeList<T, First, Remaining...>::Contains(AnotherValue),
-      CompileTimeList<T, First, Remaining...>,
-      std::conditional_t<(AnotherValue <First),
-          CompileTimeList<T, AnotherValue, First, Remaining...>,
-          typename AppendToList<T, typename InsertToSet<
-              T, CompileTimeList<T, Remaining...>, AnotherValue>::Result,
-              First>::Result>>;
-};
-template <typename T, T Value>
-struct InsertToSet<T, CompileTimeList<T>, Value> {
-  using Result = CompileTimeList<T, Value>;
-};
-
-// Base class of feature set
-template <typename Derived>
-class FeatureSetBase {
- public:
-  // Get a list of indices with a value of 1 among the features
-  template <typename IndexListType>
-  static void AppendActiveIndices(
-      const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
-    for (const auto perspective :Colors) {
-      Derived::CollectActiveIndices(
-          pos, trigger, perspective, &active[perspective]);
-    }
-  }
-
-  // Get a list of indices whose values have changed from the previous one in the feature quantity
-  template <typename PositionType, typename IndexListType>
-  static void AppendChangedIndices(
-      const PositionType& pos, TriggerEvent trigger,
-      IndexListType removed[2], IndexListType added[2], bool reset[2]) {
-    const auto& dp = pos.state()->dirtyPiece;
-    if (dp.dirty_num == 0) return;
-
-    for (const auto perspective :Colors) {
-      reset[perspective] = false;
-      switch (trigger) {
-        case TriggerEvent::kNone:
-          break;
-        case TriggerEvent::kFriendKingMoved:
-          reset[perspective] =
-              dp.pieceNo[0] == PIECE_NUMBER_KING + perspective;
-          break;
-        case TriggerEvent::kEnemyKingMoved:
-          reset[perspective] =
-              dp.pieceNo[0] == PIECE_NUMBER_KING + ~perspective;
-          break;
-        case TriggerEvent::kAnyKingMoved:
-          reset[perspective] = dp.pieceNo[0] >= PIECE_NUMBER_KING;
-          break;
-        case TriggerEvent::kAnyPieceMoved:
-          reset[perspective] = true;
-          break;
-        default:
-          assert(false);
-          break;
-      }
-      if (reset[perspective]) {
-        Derived::CollectActiveIndices(
-            pos, trigger, perspective, &added[perspective]);
-      } else {
-        Derived::CollectChangedIndices(
-            pos, trigger, perspective,
-            &removed[perspective], &added[perspective]);
-      }
-    }
-  }
-};
-
-// Class template that represents the feature set
-// do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime
-template <typename FirstFeatureType, typename... RemainingFeatureTypes>
-class FeatureSet<FirstFeatureType, RemainingFeatureTypes...> :
-    public FeatureSetBase<
-        FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
- private:
-  using Head = FirstFeatureType;
-  using Tail = FeatureSet<RemainingFeatureTypes...>;
-
- public:
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue =
-      Head::kHashValue ^ (Tail::kHashValue << 1) ^ (Tail::kHashValue >> 31);
-  // number of feature dimensions
-  static constexpr IndexType kDimensions =
-      Head::kDimensions + Tail::kDimensions;
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions =
-      Head::kMaxActiveDimensions + Tail::kMaxActiveDimensions;
-  // List of timings to perform all calculations instead of difference calculation
-  using SortedTriggerSet = typename InsertToSet<TriggerEvent,
-      typename Tail::SortedTriggerSet, Head::kRefreshTrigger>::Result;
-  static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
-
-  // Get the feature quantity name
-  static std::string GetName() {
-    return std::string(Head::kName) + "+" + Tail::GetName();
-  }
-
- private:
-  // Get a list of indices with a value of 1 among the features
-  template <typename IndexListType>
-  static void CollectActiveIndices(
-      const Position& pos, const TriggerEvent trigger, const Color perspective,
-      IndexListType* const active) {
-    Tail::CollectActiveIndices(pos, trigger, perspective, active);
-    if (Head::kRefreshTrigger == trigger) {
-      const auto start = active->size();
-      Head::AppendActiveIndices(pos, perspective, active);
-      for (auto i = start; i < active->size(); ++i) {
-        (*active)[i] += Tail::kDimensions;
-      }
-    }
-  }
-
-  // Get a list of indices whose values have changed from the previous one in the feature quantity
-  template <typename IndexListType>
-  static void CollectChangedIndices(
-      const Position& pos, const TriggerEvent trigger, const Color perspective,
-      IndexListType* const removed, IndexListType* const added) {
-    Tail::CollectChangedIndices(pos, trigger, perspective, removed, added);
-    if (Head::kRefreshTrigger == trigger) {
-      const auto start_removed = removed->size();
-      const auto start_added = added->size();
-      Head::AppendChangedIndices(pos, perspective, removed, added);
-      for (auto i = start_removed; i < removed->size(); ++i) {
-        (*removed)[i] += Tail::kDimensions;
-      }
-      for (auto i = start_added; i < added->size(); ++i) {
-        (*added)[i] += Tail::kDimensions;
-      }
-    }
-  }
-
-  // Make the base class and the class template that recursively uses itself a friend
-  friend class FeatureSetBase<FeatureSet>;
-  template <typename... FeatureTypes>
-  friend class FeatureSet;
-};
-
-// Class template that represents the feature set
-// Specialization with one template argument
-template <typename FeatureType>
-class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
- public:
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
-  // number of feature dimensions
-  static constexpr IndexType kDimensions = FeatureType::kDimensions;
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions =
-      FeatureType::kMaxActiveDimensions;
-  // List of timings to perform all calculations instead of difference calculation
-  using SortedTriggerSet =
-      CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
-  static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
-
-  // Get the feature quantity name
-  static std::string GetName() {
-    return FeatureType::kName;
-  }
-
- private:
-  // Get a list of indices with a value of 1 among the features
-  static void CollectActiveIndices(
-      const Position& pos, const TriggerEvent trigger, const Color perspective,
-      IndexList* const active) {
-    if (FeatureType::kRefreshTrigger == trigger) {
-      FeatureType::AppendActiveIndices(pos, perspective, active);
-    }
-  }
-
-  // Get a list of indices whose values have changed from the previous one in the feature quantity
-  static void CollectChangedIndices(
-      const Position& pos, const TriggerEvent trigger, const Color perspective,
-      IndexList* const removed, IndexList* const added) {
-    if (FeatureType::kRefreshTrigger == trigger) {
-      FeatureType::AppendChangedIndices(pos, perspective, removed, added);
-    }
-  }
-
-  // Make the base class and the class template that recursively uses itself a friend
-  friend class FeatureSetBase<FeatureSet>;
-  template <typename... FeatureTypes>
-  friend class FeatureSet;
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/features/features_common.h
+++ b/src/eval/nnue/features/features_common.h
@ -1,47 +0,0 @@
-//Common header of input features of NNUE evaluation function
-
-#ifndef _NNUE_FEATURES_COMMON_H_
-#define _NNUE_FEATURES_COMMON_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../evaluate.h"
-#include "../nnue_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Index list type
-class IndexList;
-
-// Class template that represents the feature set
-template <typename... FeatureTypes>
-class FeatureSet;
-
-// Type of timing to perform all calculations instead of difference calculation
-enum class TriggerEvent {
-  kNone, // Calculate the difference whenever possible
-  kFriendKingMoved, // calculate all when own ball moves
-  kEnemyKingMoved, // do all calculations when enemy balls move
-  kAnyKingMoved, // do all calculations if either ball moves
-  kAnyPieceMoved, // always do all calculations
-};
-
-// turn side or other side
-enum class Side {
-  kFriend, // turn side
-  kEnemy, // opponent
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/features/half_kp.cpp
+++ b/src/eval/nnue/features/half_kp.cpp
@ -1,84 +0,0 @@
-//Definition of input features HalfKP of NNUE evaluation function
-
-#if defined(EVAL_NNUE)
-
-#include "half_kp.h"
-#include "index_list.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Find the index of the feature quantity from the ball position and BonaPiece
-template <Side AssociatedKing>
-inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, BonaPiece p) {
-  return static_cast<IndexType>(fe_end) * static_cast<IndexType>(sq_k) + p;
-}
-
-// Get the piece information
-template <Side AssociatedKing>
-inline void HalfKP<AssociatedKing>::GetPieces(
-    const Position& pos, Color perspective,
-    BonaPiece** pieces, Square* sq_target_k) {
-  *pieces = (perspective == BLACK) ?
-      pos.eval_list()->piece_list_fb() :
-      pos.eval_list()->piece_list_fw();
-  const PieceNumber target = (AssociatedKing == Side::kFriend) ?
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + perspective) :
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + ~perspective);
-  *sq_target_k = static_cast<Square>(((*pieces)[target] - f_king) % SQUARE_NB);
-}
-
-// Get a list of indices with a value of 1 among the features
-template <Side AssociatedKing>
-void HalfKP<AssociatedKing>::AppendActiveIndices(
-    const Position& pos, Color perspective, IndexList* active) {
-  // do nothing if array size is small to avoid compiler warning
-  if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-
-  BonaPiece* pieces;
-  Square sq_target_k;
-  GetPieces(pos, perspective, &pieces, &sq_target_k);
-  for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
-    if (pieces[i] != Eval::BONA_PIECE_ZERO) {
-      active->push_back(MakeIndex(sq_target_k, pieces[i]));
-    }
-  }
-}
-
-// Get a list of indices whose values have changed from the previous one in the feature quantity
-template <Side AssociatedKing>
-void HalfKP<AssociatedKing>::AppendChangedIndices(
-    const Position& pos, Color perspective,
-    IndexList* removed, IndexList* added) {
-  BonaPiece* pieces;
-  Square sq_target_k;
-  GetPieces(pos, perspective, &pieces, &sq_target_k);
-  const auto& dp = pos.state()->dirtyPiece;
-  for (int i = 0; i < dp.dirty_num; ++i) {
-    if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
-    const auto old_p = static_cast<BonaPiece>(
-        dp.changed_piece[i].old_piece.from[perspective]);
-    if (old_p != Eval::BONA_PIECE_ZERO) {
-      removed->push_back(MakeIndex(sq_target_k, old_p));
-    }
-    const auto new_p = static_cast<BonaPiece>(
-        dp.changed_piece[i].new_piece.from[perspective]);
-    if (new_p != Eval::BONA_PIECE_ZERO) {
-      added->push_back(MakeIndex(sq_target_k, new_p));
-    }
-  }
-}
-
-template class HalfKP<Side::kFriend>;
-template class HalfKP<Side::kEnemy>;
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
--- a/src/eval/nnue/features/half_kp.h
+++ b/src/eval/nnue/features/half_kp.h
@ -1,62 +0,0 @@
-//Definition of input features HalfKP of NNUE evaluation function
-
-#ifndef _NNUE_FEATURES_HALF_KP_H_
-#define _NNUE_FEATURES_HALF_KP_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../evaluate.h"
-#include "features_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Feature HalfKP: Combination of the position of own ball or enemy ball and the position of pieces other than balls
-template <Side AssociatedKing>
-class HalfKP {
- public:
-  // feature quantity name
-  static constexpr const char* kName =
-      (AssociatedKing == Side::kFriend) ? "HalfKP(Friend)" : "HalfKP(Enemy)";
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue =
-      0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
-  // number of feature dimensions
-  static constexpr IndexType kDimensions =
-      static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(fe_end);
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
-  // Timing of full calculation instead of difference calculation
-  static constexpr TriggerEvent kRefreshTrigger =
-      (AssociatedKing == Side::kFriend) ?
-      TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
-
-  // Get a list of indices with a value of 1 among the features
-  static void AppendActiveIndices(const Position& pos, Color perspective,
-                                  IndexList* active);
-
-  // Get a list of indices whose values have changed from the previous one in the feature quantity
-  static void AppendChangedIndices(const Position& pos, Color perspective,
-                                   IndexList* removed, IndexList* added);
-
-  // Find the index of the feature quantity from the ball position and BonaPiece
-  static IndexType MakeIndex(Square sq_k, BonaPiece p);
-
- private:
-  // Get the piece information
-  static void GetPieces(const Position& pos, Color perspective,
-                        BonaPiece** pieces, Square* sq_target_k);
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/features/half_relative_kp.cpp
+++ b/src/eval/nnue/features/half_relative_kp.cpp
@ -1,97 +0,0 @@
-//Definition of input features HalfRelativeKP of NNUE evaluation function
-
-#if defined(EVAL_NNUE)
-
-#include "half_relative_kp.h"
-#include "index_list.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Find the index of the feature quantity from the ball position and BonaPiece
-template <Side AssociatedKing>
-inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
-    Square sq_k, BonaPiece p) {
-  constexpr IndexType W = kBoardWidth;
-  constexpr IndexType H = kBoardHeight;
-  const IndexType piece_index = (p - fe_hand_end) / SQUARE_NB;
-  const Square sq_p = static_cast<Square>((p - fe_hand_end) % SQUARE_NB);
-  const IndexType relative_file = file_of(sq_p) - file_of(sq_k) + (W / 2);
-  const IndexType relative_rank = rank_of(sq_p) - rank_of(sq_k) + (H / 2);
-  return H * W * piece_index + H * relative_file + relative_rank;
-}
-
-// Get the piece information
-template <Side AssociatedKing>
-inline void HalfRelativeKP<AssociatedKing>::GetPieces(
-    const Position& pos, Color perspective,
-    BonaPiece** pieces, Square* sq_target_k) {
-  *pieces = (perspective == BLACK) ?
-      pos.eval_list()->piece_list_fb() :
-      pos.eval_list()->piece_list_fw();
-  const PieceNumber target = (AssociatedKing == Side::kFriend) ?
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + perspective) :
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + ~perspective);
-  *sq_target_k = static_cast<Square>(((*pieces)[target] - f_king) % SQUARE_NB);
-}
-
-// Get a list of indices with a value of 1 among the features
-template <Side AssociatedKing>
-void HalfRelativeKP<AssociatedKing>::AppendActiveIndices(
-    const Position& pos, Color perspective, IndexList* active) {
-  // do nothing if array size is small to avoid compiler warning
-  if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-
-  BonaPiece* pieces;
-  Square sq_target_k;
-  GetPieces(pos, perspective, &pieces, &sq_target_k);
-  for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
-    if (pieces[i] >= fe_hand_end) {
-      if (pieces[i] != Eval::BONA_PIECE_ZERO) {
-        active->push_back(MakeIndex(sq_target_k, pieces[i]));
-      }
-    }
-  }
-}
-
-// Get a list of indices whose values have changed from the previous one in the feature quantity
-template <Side AssociatedKing>
-void HalfRelativeKP<AssociatedKing>::AppendChangedIndices(
-    const Position& pos, Color perspective,
-    IndexList* removed, IndexList* added) {
-  BonaPiece* pieces;
-  Square sq_target_k;
-  GetPieces(pos, perspective, &pieces, &sq_target_k);
-  const auto& dp = pos.state()->dirtyPiece;
-  for (int i = 0; i < dp.dirty_num; ++i) {
-    if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
-    const auto old_p = static_cast<BonaPiece>(
-        dp.changed_piece[i].old_piece.from[perspective]);
-    if (old_p >= fe_hand_end) {
-      if (old_p != Eval::BONA_PIECE_ZERO) {
-        removed->push_back(MakeIndex(sq_target_k, old_p));
-      }
-    }
-    const auto new_p = static_cast<BonaPiece>(
-        dp.changed_piece[i].new_piece.from[perspective]);
-    if (new_p >= fe_hand_end) {
-      if (new_p != Eval::BONA_PIECE_ZERO) {
-        added->push_back(MakeIndex(sq_target_k, new_p));
-      }
-    }
-  }
-}
-
-template class HalfRelativeKP<Side::kFriend>;
-template class HalfRelativeKP<Side::kEnemy>;
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
--- a/src/eval/nnue/features/half_relative_kp.h
+++ b/src/eval/nnue/features/half_relative_kp.h
@ -1,68 +0,0 @@
-//Definition of input features HalfRelativeKP of NNUE evaluation function
-
-#ifndef _NNUE_FEATURES_HALF_RELATIVE_KP_H_
-#define _NNUE_FEATURES_HALF_RELATIVE_KP_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../evaluate.h"
-#include "features_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Feature HalfRelativeKP: Relative position of each piece other than the ball based on own ball or enemy ball
-template <Side AssociatedKing>
-class HalfRelativeKP {
- public:
-  // feature quantity name
-  static constexpr const char* kName = (AssociatedKing == Side::kFriend) ?
-      "HalfRelativeKP(Friend)" : "HalfRelativeKP(Enemy)";
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue =
-      0xF9180919u ^ (AssociatedKing == Side::kFriend);
-  // Piece type excluding balls
-  static constexpr IndexType kNumPieceKinds = (fe_end - fe_hand_end) / SQUARE_NB;
-  // width of the virtual board with the ball in the center
-  static constexpr IndexType kBoardWidth = FILE_NB * 2 - 1;
-  // height of a virtual board with balls in the center
-  static constexpr IndexType kBoardHeight = RANK_NB * 2 - 1;
-  // number of feature dimensions
-  static constexpr IndexType kDimensions =
-      kNumPieceKinds * kBoardHeight * kBoardWidth;
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
-  // Timing of full calculation instead of difference calculation
-  static constexpr TriggerEvent kRefreshTrigger =
-      (AssociatedKing == Side::kFriend) ?
-      TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
-
-  // Get a list of indices with a value of 1 among the features
-  static void AppendActiveIndices(const Position& pos, Color perspective,
-                                  IndexList* active);
-
-  // Get a list of indices whose values have changed from the previous one in the feature quantity
-  static void AppendChangedIndices(const Position& pos, Color perspective,
-                                   IndexList* removed, IndexList* added);
-
-  // Find the index of the feature quantity from the ball position and BonaPiece
-  static IndexType MakeIndex(Square sq_k, BonaPiece p);
-
- private:
-  // Get the piece information
-  static void GetPieces(const Position& pos, Color perspective,
-                        BonaPiece** pieces, Square* sq_target_k);
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/features/index_list.h
+++ b/src/eval/nnue/features/index_list.h
@ -1,55 +0,0 @@
-// Definition of index list of input features
-
-#ifndef _NNUE_FEATURES_INDEX_LIST_H_
-#define _NNUE_FEATURES_INDEX_LIST_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../position.h"
-#include "../nnue_architecture.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Class template used for feature index list
-template <typename T, std::size_t MaxSize>
-class ValueList {
- public:
-  std::size_t size() const { return size_; }
-  void resize(std::size_t size) { size_ = size; }
-  void push_back(const T& value) { values_[size_++] = value; }
-  T& operator[](std::size_t index) { return values_[index]; }
-  T* begin() { return values_; }
-  T* end() { return values_ + size_; }
-  const T& operator[](std::size_t index) const { return values_[index]; }
-  const T* begin() const { return values_; }
-  const T* end() const { return values_ + size_; }
-  void swap(ValueList& other) {
-    const std::size_t max_size = std::max(size_, other.size_);
-    for (std::size_t i = 0; i < max_size; ++i) {
-      std::swap(values_[i], other.values_[i]);
-    }
-    std::swap(size_, other.size_);
-  }
- private:
-  T values_[MaxSize];
-  std::size_t size_ = 0;
-};
-
-//Type of feature index list
-class IndexList
-    : public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/features/k.cpp
+++ b/src/eval/nnue/features/k.cpp
@ -1,49 +0,0 @@
-//Definition of input feature quantity K of NNUE evaluation function
-
-#if defined(EVAL_NNUE)
-
-#include "k.h"
-#include "index_list.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Get a list of indices with a value of 1 among the features
-void K::AppendActiveIndices(
-    const Position& pos, Color perspective, IndexList* active) {
-  // do nothing if array size is small to avoid compiler warning
-  if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-
-  const BonaPiece* pieces = (perspective == BLACK) ?
-      pos.eval_list()->piece_list_fb() :
-      pos.eval_list()->piece_list_fw();
-  assert(pieces[PIECE_NUMBER_BKING] != BONA_PIECE_ZERO);
-  assert(pieces[PIECE_NUMBER_WKING] != BONA_PIECE_ZERO);
-  for (PieceNumber i = PIECE_NUMBER_KING; i < PIECE_NUMBER_NB; ++i) {
-    active->push_back(pieces[i] - fe_end);
-  }
-}
-
-// Get a list of indices whose values have changed from the previous one in the feature quantity
-void K::AppendChangedIndices(
-    const Position& pos, Color perspective,
-    IndexList* removed, IndexList* added) {
-  const auto& dp = pos.state()->dirtyPiece;
-  if (dp.pieceNo[0] >= PIECE_NUMBER_KING) {
-    removed->push_back(
-        dp.changed_piece[0].old_piece.from[perspective] - fe_end);
-    added->push_back(
-        dp.changed_piece[0].new_piece.from[perspective] - fe_end);
-  }
-}
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
--- a/src/eval/nnue/features/k.h
+++ b/src/eval/nnue/features/k.h
@ -1,48 +0,0 @@
-//Definition of input feature quantity K of NNUE evaluation function
-
-#ifndef _NNUE_FEATURES_K_H_
-#define _NNUE_FEATURES_K_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../evaluate.h"
-#include "features_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Feature K: Ball position
-class K {
- public:
-  // feature quantity name
-  static constexpr const char* kName = "K";
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue = 0xD3CEE169u;
-  // number of feature dimensions
-  static constexpr IndexType kDimensions = SQUARE_NB * 2;
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions = 2;
-  // Timing of full calculation instead of difference calculation
-  static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
-
-  // Get a list of indices with a value of 1 among the features
-  static void AppendActiveIndices(const Position& pos, Color perspective,
-                                  IndexList* active);
-
-  // Get a list of indices whose values have changed from the previous one in the feature quantity
-  static void AppendChangedIndices(const Position& pos, Color perspective,
-                                   IndexList* removed, IndexList* added);
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/features/p.cpp
+++ b/src/eval/nnue/features/p.cpp
@ -1,52 +0,0 @@
-//Definition of input feature P of NNUE evaluation function
-
-#if defined(EVAL_NNUE)
-
-#include "p.h"
-#include "index_list.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Get a list of indices with a value of 1 among the features
-void P::AppendActiveIndices(
-    const Position& pos, Color perspective, IndexList* active) {
-  // do nothing if array size is small to avoid compiler warning
-  if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-
-  const BonaPiece* pieces = (perspective == BLACK) ?
-      pos.eval_list()->piece_list_fb() :
-      pos.eval_list()->piece_list_fw();
-  for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
-    if (pieces[i] != Eval::BONA_PIECE_ZERO) {
-      active->push_back(pieces[i]);
-    }
-  }
-}
-
-// Get a list of indices whose values have changed from the previous one in the feature quantity
-void P::AppendChangedIndices(
-    const Position& pos, Color perspective,
-    IndexList* removed, IndexList* added) {
-  const auto& dp = pos.state()->dirtyPiece;
-  for (int i = 0; i < dp.dirty_num; ++i) {
-    if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
-    if (dp.changed_piece[i].old_piece.from[perspective] != Eval::BONA_PIECE_ZERO) {
-      removed->push_back(dp.changed_piece[i].old_piece.from[perspective]);
-    }
-    if (dp.changed_piece[i].new_piece.from[perspective] != Eval::BONA_PIECE_ZERO) {
-      added->push_back(dp.changed_piece[i].new_piece.from[perspective]);
-    }
-  }
-}
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
--- a/src/eval/nnue/features/p.h
+++ b/src/eval/nnue/features/p.h
@ -1,48 +0,0 @@
-//Definition of input feature P of NNUE evaluation function
-
-#ifndef _NNUE_FEATURES_P_H_
-#define _NNUE_FEATURES_P_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../evaluate.h"
-#include "features_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Feature P: BonaPiece of pieces other than balls
-class P {
- public:
-  // feature quantity name
-  static constexpr const char* kName = "P";
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue = 0x764CFB4Bu;
-  // number of feature dimensions
-  static constexpr IndexType kDimensions = fe_end;
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
-  // Timing of full calculation instead of difference calculation
-  static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
-
-  // Get a list of indices with a value of 1 among the features
-  static void AppendActiveIndices(const Position& pos, Color perspective,
-                                  IndexList* active);
-
-  // Get a list of indices whose values have changed from the previous one in the feature quantity
-  static void AppendChangedIndices(const Position& pos, Color perspective,
-                                   IndexList* removed, IndexList* added);
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/layers/affine_transform.h
+++ b/src/eval/nnue/layers/affine_transform.h
@ -1,178 +0,0 @@
-// Definition of layer AffineTransform of NNUE evaluation function
-
-#ifndef _NNUE_LAYERS_AFFINE_TRANSFORM_H_
-#define _NNUE_LAYERS_AFFINE_TRANSFORM_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../nnue_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Layers {
-
-// affine transformation layer
-template <typename PreviousLayer, IndexType OutputDimensions>
-class AffineTransform {
- public:
-  // Input/output type
-  using InputType = typename PreviousLayer::OutputType;
-  using OutputType = std::int32_t;
-  static_assert(std::is_same<InputType, std::uint8_t>::value, "");
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      PreviousLayer::kOutputDimensions;
-  static constexpr IndexType kOutputDimensions = OutputDimensions;
-  static constexpr IndexType kPaddedInputDimensions =
-      CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);
-
-  // Size of forward propagation buffer used in this layer
-  static constexpr std::size_t kSelfBufferSize =
-      CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
-
-  // Size of the forward propagation buffer used from the input layer to this layer
-  static constexpr std::size_t kBufferSize =
-      PreviousLayer::kBufferSize + kSelfBufferSize;
-
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t GetHashValue() {
-    std::uint32_t hash_value = 0xCC03DAE4u;
-    hash_value += kOutputDimensions;
-    hash_value ^= PreviousLayer::GetHashValue() >> 1;
-    hash_value ^= PreviousLayer::GetHashValue() << 31;
-    return hash_value;
-  }
-
-  // A string that represents the structure from the input layer to this layer
-  static std::string GetStructureString() {
-    return "AffineTransform[" +
-        std::to_string(kOutputDimensions) + "<-" +
-        std::to_string(kInputDimensions) + "](" +
-        PreviousLayer::GetStructureString() + ")";
-  }
-
-  // read parameters
-  bool ReadParameters(std::istream& stream) {
-    if (!previous_layer_.ReadParameters(stream)) return false;
-    stream.read(reinterpret_cast<char*>(biases_),
-                kOutputDimensions * sizeof(BiasType));
-    stream.read(reinterpret_cast<char*>(weights_),
-                kOutputDimensions * kPaddedInputDimensions *
-                sizeof(WeightType));
-    return !stream.fail();
-  }
-
-  // write parameters
-  bool WriteParameters(std::ostream& stream) const {
-    if (!previous_layer_.WriteParameters(stream)) return false;
-    stream.write(reinterpret_cast<const char*>(biases_),
-                 kOutputDimensions * sizeof(BiasType));
-    stream.write(reinterpret_cast<const char*>(weights_),
-                 kOutputDimensions * kPaddedInputDimensions *
-                 sizeof(WeightType));
-    return !stream.fail();
-  }
-
-  // forward propagation
-  const OutputType* Propagate(
-      const TransformedFeatureType* transformed_features, char* buffer) const {
-    const auto input = previous_layer_.Propagate(
-        transformed_features, buffer + kSelfBufferSize);
-    const auto output = reinterpret_cast<OutputType*>(buffer);
-#if defined(USE_AVX2)
-    constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-    const __m256i kOnes = _mm256_set1_epi16(1);
-    const auto input_vector = reinterpret_cast<const __m256i*>(input);
-#elif defined(USE_SSSE3)
-    constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-    const __m128i kOnes = _mm_set1_epi16(1);
-    const auto input_vector = reinterpret_cast<const __m128i*>(input);
-#elif defined(IS_ARM)
-    constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-    const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
-#endif
-    for (IndexType i = 0; i < kOutputDimensions; ++i) {
-      const IndexType offset = i * kPaddedInputDimensions;
-#if defined(USE_AVX2)
-      __m256i sum = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, biases_[i]);
-      const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        __m256i product = _mm256_maddubs_epi16(
-#if defined(__MINGW32__) || defined(__MINGW64__)
-          // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
-          //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
-          //       even though alignas is specified.
-          _mm256_loadu_si256
-#else
-          _mm256_load_si256
-#endif
-          (&input_vector[j]), _mm256_load_si256(&row[j]));
-        product = _mm256_madd_epi16(product, kOnes);
-        sum = _mm256_add_epi32(sum, product);
-      }
-      sum = _mm256_hadd_epi32(sum, sum);
-      sum = _mm256_hadd_epi32(sum, sum);
-      const __m128i lo = _mm256_extracti128_si256(sum, 0);
-      const __m128i hi = _mm256_extracti128_si256(sum, 1);
-      output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi);
-#elif defined(USE_SSSE3)
-      __m128i sum = _mm_cvtsi32_si128(biases_[i]);
-      const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        __m128i product = _mm_maddubs_epi16(
-            _mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
-        product = _mm_madd_epi16(product, kOnes);
-        sum = _mm_add_epi32(sum, product);
-      }
-      sum = _mm_hadd_epi32(sum, sum);
-      sum = _mm_hadd_epi32(sum, sum);
-      output[i] = _mm_cvtsi128_si32(sum);
-#elif defined(IS_ARM)
-      int32x4_t sum = {biases_[i]};
-      const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
-        product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
-        sum = vpadalq_s16(sum, product);
-      }
-      output[i] = sum[0] + sum[1] + sum[2] + sum[3];
-#else
-      OutputType sum = biases_[i];
-      for (IndexType j = 0; j < kInputDimensions; ++j) {
-        sum += weights_[offset + j] * input[j];
-      }
-      output[i] = sum;
-#endif
-    }
-    return output;
-  }
-
- private:
-  // parameter type
-  using BiasType = OutputType;
-  using WeightType = std::int8_t;
-
-  // Make the learning class a friend
-  friend class Trainer<AffineTransform>;
-
-  // the layer immediately before this layer
-  PreviousLayer previous_layer_;
-
-  // parameter
-  alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
-  alignas(kCacheLineSize)
-      WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
-};
-
-}  // namespace Layers
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/layers/clipped_relu.h
+++ b/src/eval/nnue/layers/clipped_relu.h
@ -1,177 +0,0 @@
-// Definition of layer ClippedReLU of NNUE evaluation function
-
-#ifndef _NNUE_LAYERS_CLIPPED_RELU_H_
-#define _NNUE_LAYERS_CLIPPED_RELU_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../nnue_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Layers {
-
-// Clipped ReLU
-template <typename PreviousLayer>
-class ClippedReLU {
- public:
-  // Input/output type
-  using InputType = typename PreviousLayer::OutputType;
-  using OutputType = std::uint8_t;
-  static_assert(std::is_same<InputType, std::int32_t>::value, "");
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      PreviousLayer::kOutputDimensions;
-  static constexpr IndexType kOutputDimensions = kInputDimensions;
-
-  // Size of forward propagation buffer used in this layer
-  static constexpr std::size_t kSelfBufferSize =
-      CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
-
-  // Size of the forward propagation buffer used from the input layer to this layer
-  static constexpr std::size_t kBufferSize =
-      PreviousLayer::kBufferSize + kSelfBufferSize;
-
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t GetHashValue() {
-    std::uint32_t hash_value = 0x538D24C7u;
-    hash_value += PreviousLayer::GetHashValue();
-    return hash_value;
-  }
-
-  // A string that represents the structure from the input layer to this layer
-  static std::string GetStructureString() {
-    return "ClippedReLU[" +
-        std::to_string(kOutputDimensions) + "](" +
-        PreviousLayer::GetStructureString() + ")";
-  }
-
-  // read parameters
-  bool ReadParameters(std::istream& stream) {
-    return previous_layer_.ReadParameters(stream);
-  }
-
-  // write parameters
-  bool WriteParameters(std::ostream& stream) const {
-    return previous_layer_.WriteParameters(stream);
-  }
-
-  // forward propagation
-  const OutputType* Propagate(
-      const TransformedFeatureType* transformed_features, char* buffer) const {
-    const auto input = previous_layer_.Propagate(
-        transformed_features, buffer + kSelfBufferSize);
-    const auto output = reinterpret_cast<OutputType*>(buffer);
-#if defined(USE_AVX2)
-    constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
-    const __m256i kZero = _mm256_setzero_si256();
-    const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
-    const auto in = reinterpret_cast<const __m256i*>(input);
-    const auto out = reinterpret_cast<__m256i*>(output);
-    for (IndexType i = 0; i < kNumChunks; ++i) {
-      const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
-        //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
-        //       even though alignas is specified.
-        _mm256_loadu_si256
-#else
-        _mm256_load_si256
-#endif
-        (&in[i * 4 + 0]),
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        _mm256_loadu_si256
-#else
-        _mm256_load_si256
-#endif
-        (&in[i * 4 + 1])), kWeightScaleBits);
-      const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        _mm256_loadu_si256
-#else
-        _mm256_load_si256
-#endif
-        (&in[i * 4 + 2]),
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        _mm256_loadu_si256
-#else
-        _mm256_load_si256
-#endif
-        (&in[i * 4 + 3])), kWeightScaleBits);
-#if defined(__MINGW32__) || defined(__MINGW64__)
-      _mm256_storeu_si256
-#else
-      _mm256_store_si256
-#endif
-        (&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
-          _mm256_packs_epi16(words0, words1), kZero), kOffsets));
-    }
-    constexpr IndexType kStart = kNumChunks * kSimdWidth;
-#elif defined(USE_SSSE3)
-    constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
-    const __m128i kZero = _mm_setzero_si128();
-#ifndef USE_SSE41
-    const __m128i k0x80s = _mm_set1_epi8(-128);
-#endif
-    const auto in = reinterpret_cast<const __m128i*>(input);
-    const auto out = reinterpret_cast<__m128i*>(output);
-    for (IndexType i = 0; i < kNumChunks; ++i) {
-      const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
-          _mm_load_si128(&in[i * 4 + 0]),
-          _mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
-      const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
-          _mm_load_si128(&in[i * 4 + 2]),
-          _mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
-      const __m128i packedbytes = _mm_packs_epi16(words0, words1);
-      _mm_store_si128(&out[i], 
-#ifdef USE_SSE41
-        _mm_max_epi8(packedbytes, kZero)
-#else
-        _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
-#endif
-      );
-    }
-    constexpr IndexType kStart = kNumChunks * kSimdWidth;
-#elif defined(IS_ARM)
-    constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
-    const int8x8_t kZero = {0};
-    const auto in = reinterpret_cast<const int32x4_t*>(input);
-    const auto out = reinterpret_cast<int8x8_t*>(output);
-    for (IndexType i = 0; i < kNumChunks; ++i) {
-      int16x8_t shifted;
-      const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
-      pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
-      pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
-      out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
-    }
-    constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
-#else
-    constexpr IndexType kStart = 0;
-#endif
-    for (IndexType i = kStart; i < kInputDimensions; ++i) {
-      output[i] = static_cast<OutputType>(
-          std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
-    }
-    return output;
-  }
-
- private:
-  // Make the learning class a friend
-  friend class Trainer<ClippedReLU>;
-
-  // the layer immediately before this layer
-  PreviousLayer previous_layer_;
-};
-
-}  // namespace Layers
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/layers/sum.h
+++ b/src/eval/nnue/layers/sum.h
@ -1,163 +0,0 @@
-// Definition of layer Sum of NNUE evaluation function
-
-#ifndef _NNUE_LAYERS_SUM_H_
-#define _NNUE_LAYERS_SUM_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../nnue_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Layers {
-
-// Layer that sums the output of multiple layers
-template <typename FirstPreviousLayer, typename... RemainingPreviousLayers>
-class Sum : public Sum<RemainingPreviousLayers...> {
- private:
-  using Head = FirstPreviousLayer;
-  using Tail = Sum<RemainingPreviousLayers...>;
-
- public:
-  // Input/output type
-  using InputType = typename Head::OutputType;
-  using OutputType = InputType;
-  static_assert(std::is_same<InputType, typename Tail::InputType>::value, "");
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions = Head::kOutputDimensions;
-  static constexpr IndexType kOutputDimensions = kInputDimensions;
-  static_assert(kInputDimensions == Tail::kInputDimensions ,"");
-
-  // Size of forward propagation buffer used in this layer
-  static constexpr std::size_t kSelfBufferSize =
-      CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
-
-  // Size of the forward propagation buffer used from the input layer to this layer
-  static constexpr std::size_t kBufferSize =
-      std::max(Head::kBufferSize + kSelfBufferSize, Tail::kBufferSize);
-
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t GetHashValue() {
-    std::uint32_t hash_value = 0xBCE400B4u;
-    hash_value ^= Head::GetHashValue() >> 1;
-    hash_value ^= Head::GetHashValue() << 31;
-    hash_value ^= Tail::GetHashValue() >> 2;
-    hash_value ^= Tail::GetHashValue() << 30;
-    return hash_value;
-  }
-
-  // A string that represents the structure from the input layer to this layer
-  static std::string GetStructureString() {
-    return "Sum[" +
-        std::to_string(kOutputDimensions) + "](" + GetSummandsString() + ")";
-  }
-
-  // read parameters
-  bool ReadParameters(std::istream& stream) {
-    if (!Tail::ReadParameters(stream)) return false;
-    return previous_layer_.ReadParameters(stream);
-  }
-
-  // write parameters
-  bool WriteParameters(std::ostream& stream) const {
-    if (!Tail::WriteParameters(stream)) return false;
-    return previous_layer_.WriteParameters(stream);
-  }
-
-  // forward propagation
-  const OutputType* Propagate(
-      const TransformedFeatureType* transformed_features, char* buffer) const {
-    Tail::Propagate(transformed_features, buffer);
-    const auto head_output = previous_layer_.Propagate(
-        transformed_features, buffer + kSelfBufferSize);
-    const auto output = reinterpret_cast<OutputType*>(buffer);
-    for (IndexType i = 0; i <kOutputDimensions; ++i) {
-      output[i] += head_output[i];
-    }
-    return output;
-  }
-
- protected:
-  // A string that represents the list of layers to be summed
-  static std::string GetSummandsString() {
-    return Head::GetStructureString() + "," + Tail::GetSummandsString();
-  }
-
-  // Make the learning class a friend
-  friend class Trainer<Sum>;
-
-  // the layer immediately before this layer
-  FirstPreviousLayer previous_layer_;
-};
-
-// Layer that sums the output of multiple layers (when there is one template argument)
-template <typename PreviousLayer>
-class Sum<PreviousLayer> {
- public:
-  // Input/output type
-  using InputType = typename PreviousLayer::OutputType;
-  using OutputType = InputType;
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      PreviousLayer::kOutputDimensions;
-  static constexpr IndexType kOutputDimensions = kInputDimensions;
-
-  // Size of the forward propagation buffer used from the input layer to this layer
-  static constexpr std::size_t kBufferSize = PreviousLayer::kBufferSize;
-
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t GetHashValue() {
-    std::uint32_t hash_value = 0xBCE400B4u;
-    hash_value ^= PreviousLayer::GetHashValue() >> 1;
-    hash_value ^= PreviousLayer::GetHashValue() << 31;
-    return hash_value;
-  }
-
-  // A string that represents the structure from the input layer to this layer
-  static std::string GetStructureString() {
-    return "Sum[" +
-        std::to_string(kOutputDimensions) + "](" + GetSummandsString() + ")";
-  }
-
-  // read parameters
-  bool ReadParameters(std::istream& stream) {
-    return previous_layer_.ReadParameters(stream);
-  }
-
-  // write parameters
-  bool WriteParameters(std::ostream& stream) const {
-    return previous_layer_.WriteParameters(stream);
-  }
-
-  // forward propagation
-  const OutputType* Propagate(
-      const TransformedFeatureType* transformed_features, char* buffer) const {
-    return previous_layer_.Propagate(transformed_features, buffer);
-  }
-
- protected:
-  // A string that represents the list of layers to be summed
-  static std::string GetSummandsString() {
-    return PreviousLayer::GetStructureString();
-  }
-
-  // Make the learning class a friend
-  friend class Trainer<Sum>;
-
-  // the layer immediately before this layer
-  PreviousLayer previous_layer_;
-};
-
-}  // namespace Layers
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/nnue_accumulator.h
+++ b/src/eval/nnue/nnue_accumulator.h
@ -1,30 +0,0 @@
-// Class for difference calculation of NNUE evaluation function
-
-#ifndef _NNUE_ACCUMULATOR_H_
-#define _NNUE_ACCUMULATOR_H_
-
-#if defined(EVAL_NNUE)
-
-#include "nnue_architecture.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-// Class that holds the result of affine transformation of input features
-// Keep the evaluation value that is the final output together
-struct alignas(32) Accumulator {
-  std::int16_t
-      accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
-  Value score = VALUE_ZERO;
-  bool computed_accumulation = false;
-  bool computed_score = false;
-};
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/nnue_architecture.h
+++ b/src/eval/nnue/nnue_architecture.h
@ -1,33 +0,0 @@
-// Input features and network structure used in NNUE evaluation function
-
-#ifndef _NNUE_ARCHITECTURE_H_
-#define _NNUE_ARCHITECTURE_H_
-
-#if defined(EVAL_NNUE)
-
-// include a header that defines the input features and network structure
-//#include "architectures/k-p_256x2-32-32.h"
-//#include "architectures/k-p-cr_256x2-32-32.h"
-//#include "architectures/k-p-cr-ep_256x2-32-32.h"
-#include "architectures/halfkp_256x2-32-32.h"
-//#include "architectures/halfkp-cr-ep_256x2-32-32.h"
-//#include "architectures/halfkp_384x2-32-32.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
-static_assert(Network::kOutputDimensions == 1, "");
-static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
-
-// List of timings to perform all calculations instead of difference calculation
-constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/nnue_common.h
+++ b/src/eval/nnue/nnue_common.h
@ -1,64 +0,0 @@
-// Constants used in NNUE evaluation function
-
-#ifndef _NNUE_COMMON_H_
-#define _NNUE_COMMON_H_
-
-#if defined(EVAL_NNUE)
-
-#if defined(USE_AVX2)
-#include <immintrin.h>
-#elif defined(USE_SSE41)
-#include <smmintrin.h>
-#elif defined(USE_SSSE3)
-#include <tmmintrin.h>
-#elif defined(USE_SSE2)
-#include <emmintrin.h>
-#endif
-
-namespace Eval {
-
-namespace NNUE {
-
-// A constant that represents the version of the evaluation function file
-constexpr std::uint32_t kVersion = 0x7AF32F16u;
-
-// Constant used in evaluation value calculation
-constexpr int FV_SCALE = 16;
-constexpr int kWeightScaleBits = 6;
-
-// Size of cache line (in bytes)
-constexpr std::size_t kCacheLineSize = 64;
-
-// SIMD width (in bytes)
-#if defined(USE_AVX2)
-constexpr std::size_t kSimdWidth = 32;
-#elif defined(USE_SSE2)
-constexpr std::size_t kSimdWidth = 16;
-#elif defined(IS_ARM)
-constexpr std::size_t kSimdWidth = 16;
-#endif
-constexpr std::size_t kMaxSimdWidth = 32;
-
-// Type of input feature after conversion
-using TransformedFeatureType = std::uint8_t;
-
-// index type
-using IndexType = std::uint32_t;
-
-// Forward declaration of learning class template
-template <typename Layer>
-class Trainer;
-
-// find the smallest multiple of n and above
-template <typename IntType>
-constexpr IntType CeilToMultiple(IntType n, IntType base) {
-  return (n + base - 1) / base * base;
-}
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/nnue_feature_transformer.h
+++ b/src/eval/nnue/nnue_feature_transformer.h
@ -1,357 +0,0 @@
-// A class that converts the input features of the NNUE evaluation function
-
-#ifndef _NNUE_FEATURE_TRANSFORMER_H_
-#define _NNUE_FEATURE_TRANSFORMER_H_
-
-#if defined(EVAL_NNUE)
-
-#include "nnue_common.h"
-#include "nnue_architecture.h"
-#include "features/index_list.h"
-
-#include <cstring> // std::memset()
-
-namespace Eval {
-
-namespace NNUE {
-
-// Input feature converter
-class FeatureTransformer {
- private:
-  // number of output dimensions for one side
-  static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
-
- public:
-  // output type
-  using OutputType = TransformedFeatureType;
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
-  static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
-
-  // size of forward propagation buffer
-  static constexpr std::size_t kBufferSize =
-      kOutputDimensions * sizeof(OutputType);
-
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t GetHashValue() {
-    return RawFeatures::kHashValue ^ kOutputDimensions;
-  }
-
-  // a string representing the structure
-  static std::string GetStructureString() {
-    return RawFeatures::GetName() + "[" +
-        std::to_string(kInputDimensions) + "->" +
-        std::to_string(kHalfDimensions) + "x2]";
-  }
-
-  // read parameters
-  bool ReadParameters(std::istream& stream) {
-    stream.read(reinterpret_cast<char*>(biases_),
-                kHalfDimensions * sizeof(BiasType));
-    stream.read(reinterpret_cast<char*>(weights_),
-                kHalfDimensions * kInputDimensions * sizeof(WeightType));
-    return !stream.fail();
-  }
-
-  // write parameters
-  bool WriteParameters(std::ostream& stream) const {
-    stream.write(reinterpret_cast<const char*>(biases_),
-                 kHalfDimensions * sizeof(BiasType));
-    stream.write(reinterpret_cast<const char*>(weights_),
-                 kHalfDimensions * kInputDimensions * sizeof(WeightType));
-    return !stream.fail();
-  }
-
-  // proceed with the difference calculation if possible
-  bool UpdateAccumulatorIfPossible(const Position& pos) const {
-    const auto now = pos.state();
-    if (now->accumulator.computed_accumulation) {
-      return true;
-    }
-    const auto prev = now->previous;
-    if (prev && prev->accumulator.computed_accumulation) {
-      UpdateAccumulator(pos);
-      return true;
-    }
-    return false;
-  }
-
-  // convert input features
-  void Transform(const Position& pos, OutputType* output, bool refresh) const {
-    if (refresh || !UpdateAccumulatorIfPossible(pos)) {
-      RefreshAccumulator(pos);
-    }
-    const auto& accumulation = pos.state()->accumulator.accumulation;
-#if defined(USE_AVX2)
-    constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
-    constexpr int kControl = 0b11011000;
-    const __m256i kZero = _mm256_setzero_si256();
-#elif defined(USE_SSSE3)
-    constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
-    const __m128i kZero = _mm_setzero_si128();
-#ifndef USE_SSE41
-    const __m128i k0x80s = _mm_set1_epi8(-128);
-#endif
-#elif defined(IS_ARM)
-    constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-    const int8x8_t kZero = {0};
-#endif
-    const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
-    for (IndexType p = 0; p < 2; ++p) {
-      const IndexType offset = kHalfDimensions * p;
-#if defined(USE_AVX2)
-      auto out = reinterpret_cast<__m256i*>(&output[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        __m256i sum0 =
-#if defined(__MINGW32__) || defined(__MINGW64__)
-          // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
-          //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
-          //       even though alignas is specified.
-          _mm256_loadu_si256
-#else
-          _mm256_load_si256
-#endif
-          (&reinterpret_cast<const __m256i*>(
-            accumulation[perspectives[p]][0])[j * 2 + 0]);
-        __m256i sum1 =
-#if defined(__MINGW32__) || defined(__MINGW64__)
-          _mm256_loadu_si256
-#else
-          _mm256_load_si256
-#endif
-          (&reinterpret_cast<const __m256i*>(
-            accumulation[perspectives[p]][0])[j * 2 + 1]);
-        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
-          sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
-              accumulation[perspectives[p]][i])[j * 2 + 0]);
-          sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
-              accumulation[perspectives[p]][i])[j * 2 + 1]);
-        }
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        _mm256_storeu_si256
-#else
-        _mm256_store_si256
-#endif
-        (&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
-            _mm256_packs_epi16(sum0, sum1), kZero), kControl));
-      }
-#elif defined(USE_SSSE3)
-      auto out = reinterpret_cast<__m128i*>(&output[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        __m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
-            accumulation[perspectives[p]][0])[j * 2 + 0]);
-        __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
-            accumulation[perspectives[p]][0])[j * 2 + 1]);
-        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
-          sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
-              accumulation[perspectives[p]][i])[j * 2 + 0]);
-          sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
-              accumulation[perspectives[p]][i])[j * 2 + 1]);
-        }
-  	const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
- 
-        _mm_store_si128(&out[j],
-#ifdef USE_SSE41
-          _mm_max_epi8(packedbytes, kZero)
-#else
-          _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
-#endif
-        );
-      }
-#elif defined(IS_ARM)
-      const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        int16x8_t sum = reinterpret_cast<const int16x8_t*>(
-            accumulation[perspectives[p]][0])[j];
-        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
-          sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
-              accumulation[perspectives[p]][i])[j]);
-        }
-        out[j] = vmax_s8(vqmovn_s16(sum), kZero);
-      }
-#else
-      for (IndexType j = 0; j < kHalfDimensions; ++j) {
-        BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
-        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
-          sum += accumulation[static_cast<int>(perspectives[p])][i][j];
-        }
-        output[offset + j] = static_cast<OutputType>(
-            std::max<int>(0, std::min<int>(127, sum)));
-      }
-#endif
-    }
-  }
-
- private:
-  // Calculate cumulative value without using difference calculation
-  void RefreshAccumulator(const Position& pos) const {
-    auto& accumulator = pos.state()->accumulator;
-    for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
-      Features::IndexList active_indices[2];
-      RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
-                                       active_indices);
-      for (const auto perspective : Colors) {
-        if (i == 0) {
-          std::memcpy(accumulator.accumulation[perspective][i], biases_,
-                      kHalfDimensions * sizeof(BiasType));
-        } else {
-          std::memset(accumulator.accumulation[perspective][i], 0,
-                      kHalfDimensions * sizeof(BiasType));
-        }
-        for (const auto index : active_indices[perspective]) {
-          const IndexType offset = kHalfDimensions * index;
-#if defined(USE_AVX2)
-          auto accumulation = reinterpret_cast<__m256i*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j) {
-#if defined(__MINGW32__) || defined(__MINGW64__)
-            _mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j]));
-#else
-            accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
-#endif
-          }
-#elif defined(USE_SSE2)
-          auto accumulation = reinterpret_cast<__m128i*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j) {
-            accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
-          }
-#elif defined(IS_ARM)
-          auto accumulation = reinterpret_cast<int16x8_t*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j) {
-            accumulation[j] = vaddq_s16(accumulation[j], column[j]);
-          }
-#else
-          for (IndexType j = 0; j < kHalfDimensions; ++j) {
-            accumulator.accumulation[perspective][i][j] += weights_[offset + j];
-          }
-#endif
-        }
-      }
-    }
-
-    accumulator.computed_accumulation = true;
-    accumulator.computed_score = false;
-  }
-
-  // Calculate cumulative value using difference calculation
-  void UpdateAccumulator(const Position& pos) const {
-    const auto prev_accumulator = pos.state()->previous->accumulator;
-    auto& accumulator = pos.state()->accumulator;
-    for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
-      Features::IndexList removed_indices[2], added_indices[2];
-      bool reset[2];
-      RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
-                                        removed_indices, added_indices, reset);
-      for (const auto perspective : Colors) {
-#if defined(USE_AVX2)
-        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-        auto accumulation = reinterpret_cast<__m256i*>(
-            &accumulator.accumulation[perspective][i][0]);
-#elif defined(USE_SSE2)
-        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-        auto accumulation = reinterpret_cast<__m128i*>(
-            &accumulator.accumulation[perspective][i][0]);
-#elif defined(IS_ARM)
-        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-        auto accumulation = reinterpret_cast<int16x8_t*>(
-            &accumulator.accumulation[perspective][i][0]);
-#endif
-        if (reset[perspective]) {
-          if (i == 0) {
-            std::memcpy(accumulator.accumulation[perspective][i], biases_,
-                        kHalfDimensions * sizeof(BiasType));
-          } else {
-            std::memset(accumulator.accumulation[perspective][i], 0,
-                        kHalfDimensions * sizeof(BiasType));
-          }
-        } else {// Difference calculation for the feature amount changed from 1 to 0
-          std::memcpy(accumulator.accumulation[perspective][i],
-                      prev_accumulator.accumulation[perspective][i],
-                      kHalfDimensions * sizeof(BiasType));
-          for (const auto index : removed_indices[perspective]) {
-            const IndexType offset = kHalfDimensions * index;
-#if defined(USE_AVX2)
-            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
-            }
-#elif defined(USE_SSE2)
-            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
-            }
-#elif defined(IS_ARM)
-            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = vsubq_s16(accumulation[j], column[j]);
-            }
-#else
-            for (IndexType j = 0; j < kHalfDimensions; ++j) {
-              accumulator.accumulation[perspective][i][j] -=
-                  weights_[offset + j];
-            }
-#endif
-          }
-        }
-        {// Difference calculation for features that changed from 0 to 1
-          for (const auto index : added_indices[perspective]) {
-            const IndexType offset = kHalfDimensions * index;
-#if defined(USE_AVX2)
-            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
-            }
-#elif defined(USE_SSE2)
-            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
-            }
-#elif defined(IS_ARM)
-            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = vaddq_s16(accumulation[j], column[j]);
-            }
-#else
-            for (IndexType j = 0; j < kHalfDimensions; ++j) {
-              accumulator.accumulation[perspective][i][j] +=
-                  weights_[offset + j];
-            }
-#endif
-          }
-        }
-      }
-    }
-
-    accumulator.computed_accumulation = true;
-    accumulator.computed_score = false;
-  }
-
-  // parameter type
-  using BiasType = std::int16_t;
-  using WeightType = std::int16_t;
-
-  // Make the learning class a friend
-  friend class Trainer<FeatureTransformer>;
-
-  // parameter
-  alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
-  alignas(kCacheLineSize)
-      WeightType weights_[kHalfDimensions * kInputDimensions];
-};
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/nnue_test_command.cpp
+++ b/src/eval/nnue/nnue_test_command.cpp
@ -1,201 +0,0 @@
-// USI extended command for NNUE evaluation function
-
-#if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
-
-#include "../../thread.h"
-#include "../../uci.h"
-#include "evaluate_nnue.h"
-#include "nnue_test_command.h"
-
-#include <set>
-#include <fstream>
-
-#define ASSERT(X) { if (!(X)) { std::cout << "\nError : ASSERT(" << #X << "), " << __FILE__ << "(" << __LINE__ << "): " << __func__ << std::endl; \
- std::this_thread::sleep_for(std::chrono::microseconds(3000)); *(int*)1 =0;} }
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace {
-
-// Testing RawFeatures mainly for difference calculation
-void TestFeatures(Position& pos) {
-  const std::uint64_t num_games = 1000;
-  StateInfo si;
-  pos.set(StartFEN, false, &si, Threads.main());
-  const int MAX_PLY = 256; // test up to 256 hands
-
-  StateInfo state[MAX_PLY]; // StateInfo only for the maximum number of steps
-  int ply; // Trouble from the initial phase
-
-  PRNG prng(20171128);
-
-  std::uint64_t num_moves = 0;
-  std::vector<std::uint64_t> num_updates(kRefreshTriggers.size() + 1);
-  std::vector<std::uint64_t> num_resets(kRefreshTriggers.size());
-  constexpr IndexType kUnknown = -1;
-  std::vector<IndexType> trigger_map(RawFeatures::kDimensions, kUnknown);
-  auto make_index_sets = [&](const Position& pos) {
-    std::vector<std::vector<std::set<IndexType>>> index_sets(
-        kRefreshTriggers.size(), std::vector<std::set<IndexType>>(2));
-    for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
-      Features::IndexList active_indices[2];
-      RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
-                                       active_indices);
-      for (const auto perspective : Colors) {
-        for (const auto index : active_indices[perspective]) {
-          ASSERT(index < RawFeatures::kDimensions);
-          ASSERT(index_sets[i][perspective].count(index) == 0);
-          ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
-          index_sets[i][perspective].insert(index);
-          trigger_map[index] = i;
-        }
-      }
-    }
-    return index_sets;
-  };
-  auto update_index_sets = [&](const Position& pos, auto* index_sets) {
-    for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
-      Features::IndexList removed_indices[2], added_indices[2];
-      bool reset[2];
-      RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
-                                        removed_indices, added_indices, reset);
-      for (const auto perspective : Colors) {
-        if (reset[perspective]) {
-          (*index_sets)[i][perspective].clear();
-          ++num_resets[i];
-        } else {
-          for (const auto index : removed_indices[perspective]) {
-            ASSERT(index < RawFeatures::kDimensions);
-            ASSERT((*index_sets)[i][perspective].count(index) == 1);
-            ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
-            (*index_sets)[i][perspective].erase(index);
-            ++num_updates.back();
-            ++num_updates[i];
-            trigger_map[index] = i;
-          }
-        }
-        for (const auto index : added_indices[perspective]) {
-          ASSERT(index < RawFeatures::kDimensions);
-          ASSERT((*index_sets)[i][perspective].count(index) == 0);
-          ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
-          (*index_sets)[i][perspective].insert(index);
-          ++num_updates.back();
-          ++num_updates[i];
-          trigger_map[index] = i;
-        }
-      }
-    }
-  };
-
-  std::cout << "feature set: " << RawFeatures::GetName()
-            << "[" << RawFeatures::kDimensions << "]" << std::endl;
-  std::cout << "start testing with random games";
-
-  for (std::uint64_t i = 0; i < num_games; ++i) {
-    auto index_sets = make_index_sets(pos);
-    for (ply = 0; ply < MAX_PLY; ++ply) {
-      MoveList<LEGAL> mg(pos); // Generate all legal hands
-
-      // There was no legal move == Clog
-      if (mg.size() == 0)
-        break;
-
-      // Randomly choose from the generated moves and advance the phase with the moves.
-      Move m = mg.begin()[prng.rand(mg.size())];
-      pos.do_move(m, state[ply]);
-
-      ++num_moves;
-      update_index_sets(pos, &index_sets);
-      ASSERT(index_sets == make_index_sets(pos));
-    }
-
-    pos.set(StartFEN, false, &si, Threads.main());
-
-    // Output'.' every 100 times (so you can see that it's progressing)
-    if ((i % 100) == 0)
-      std::cout << "." << std::flush;
-  }
-  std::cout << "passed." << std::endl;
-  std::cout << num_games << " games, " << num_moves << " moves, "
-            << num_updates.back() << " updates, "
-            << (1.0 * num_updates.back() / num_moves)
-            << " updates per move" << std::endl;
-  std::size_t num_observed_indices = 0;
-  for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
-    const auto count = std::count(trigger_map.begin(), trigger_map.end(), i);
-    num_observed_indices += count;
-    std::cout << "TriggerEvent(" << static_cast<int>(kRefreshTriggers[i])
-              << "): " << count << " features ("
-              << (100.0 * count / RawFeatures::kDimensions) << "%), "
-              << num_updates[i] << " updates ("
-              << (1.0 * num_updates[i] / num_moves) << " per move), "
-              << num_resets[i] << " resets ("
-              << (100.0 * num_resets[i] / num_moves) << "%)"
-              << std::endl;
-  }
-  std::cout << "observed " << num_observed_indices << " ("
-            << (100.0 * num_observed_indices / RawFeatures::kDimensions)
-            << "% of " << RawFeatures::kDimensions
-            << ") features" << std::endl;
-}
-
-// Output a string that represents the structure of the evaluation function
-void PrintInfo(std::istream& stream) {
-  std::cout << "network architecture: " << GetArchitectureString() << std::endl;
-
-  while (true) {
-    std::string file_name;
-    stream >> file_name;
-    if (file_name.empty()) break;
-
-    std::uint32_t hash_value;
-    std::string architecture;
-    const bool success = [&]() {
-      std::ifstream file_stream(file_name, std::ios::binary);
-      if (!file_stream) return false;
-      if (!ReadHeader(file_stream, &hash_value, &architecture)) return false;
-      return true;
-    }();
-
-    std::cout << file_name << ": ";
-    if (success) {
-      if (hash_value == kHashValue) {
-        std::cout << "matches with this binary";
-        if (architecture != GetArchitectureString()) {
-          std::cout << ", but architecture string differs: " << architecture;
-        }
-        std::cout << std::endl;
-      } else {
-        std::cout << architecture << std::endl;
-      }
-    } else {
-      std::cout << "failed to read header" << std::endl;
-    }
-  }
-}
-
-}  // namespace
-
-// USI extended command for NNUE evaluation function
-void TestCommand(Position& pos, std::istream& stream) {
-  std::string sub_command;
-  stream >> sub_command;
-
-  if (sub_command == "test_features") {
-    TestFeatures(pos);
-  } else if (sub_command == "info") {
-    PrintInfo(stream);
-  } else {
-    std::cout << "usage:" << std::endl;
-    std::cout << " test nnue test_features" << std::endl;
-    std::cout << " test nnue info [path/to/" << fileName << "...]" << std::endl;
-  }
-}
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
--- a/src/eval/nnue/nnue_test_command.h
+++ b/src/eval/nnue/nnue_test_command.h
@ -1,21 +0,0 @@
-// USI extended command interface for NNUE evaluation function
-
-#ifndef _NNUE_TEST_COMMAND_H_
-#define _NNUE_TEST_COMMAND_H_
-
-#if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
-
-namespace Eval {
-
-namespace NNUE {
-
-// USI extended command for NNUE evaluation function
-void TestCommand(Position& pos, std::istream& stream);
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/trainer/features/factorizer.h
+++ b/src/eval/nnue/trainer/features/factorizer.h
@ -1,110 +0,0 @@
-// NNUE evaluation function feature conversion class template
-
-#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_H_
-#define _NNUE_TRAINER_FEATURES_FACTORIZER_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../nnue_common.h"
-#include "../trainer.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Class template that converts input features into learning features
-// By default, the learning feature is the same as the original input feature, and specialized as necessary
-template <typename FeatureType>
-class Factorizer {
- public:
-  // Get the dimensionality of the learning feature
-  static constexpr IndexType GetDimensions() {
-    return FeatureType::kDimensions;
-  }
-
-  // Get index of learning feature and scale of learning rate
-  static void AppendTrainingFeatures(
-      IndexType base_index, std::vector<TrainingFeature>* training_features) {
-    assert(base_index <FeatureType::kDimensions);
-    training_features->emplace_back(base_index);
-  }
-};
-
-// Learning feature information
-struct FeatureProperties {
-  bool active;
-  IndexType dimensions;
-};
-
-// Add the original input features to the learning features
-template <typename FeatureType>
-IndexType AppendBaseFeature(
-    FeatureProperties properties, IndexType base_index,
-    std::vector<TrainingFeature>* training_features) {
-  assert(properties.dimensions == FeatureType::kDimensions);
-  assert(base_index < FeatureType::kDimensions);
-  training_features->emplace_back(base_index);
-  return properties.dimensions;
-}
-
-// If the learning rate scale is not 0, inherit other types of learning features
-template <typename FeatureType>
-IndexType InheritFeaturesIfRequired(
-    IndexType index_offset, FeatureProperties properties, IndexType base_index,
-    std::vector<TrainingFeature>* training_features) {
-  if (!properties.active) {
-    return 0;
-  }
-  assert(properties.dimensions == Factorizer<FeatureType>::GetDimensions());
-  assert(base_index < FeatureType::kDimensions);
-  const auto start = training_features->size();
-  Factorizer<FeatureType>::AppendTrainingFeatures(
-      base_index, training_features);
-  for (auto i = start; i < training_features->size(); ++i) {
-    auto& feature = (*training_features)[i];
-    assert(feature.GetIndex() < Factorizer<FeatureType>::GetDimensions());
-    feature.ShiftIndex(index_offset);
-  }
-  return properties.dimensions;
-}
-
-// Return the index difference as needed, without adding learning features
-// Call instead of InheritFeaturesIfRequired() if there are no corresponding features
-IndexType SkipFeatures(FeatureProperties properties) {
-  if (!properties.active) {
-    return 0;
-  }
-  return properties.dimensions;
-}
-
-// Get the dimensionality of the learning feature
-template <std::size_t N>
-constexpr IndexType GetActiveDimensions(
-    const FeatureProperties (&properties)[N]) {
-  static_assert(N > 0, "");
-  IndexType dimensions = properties[0].dimensions;
-  for (std::size_t i = 1; i < N; ++i) {
-    if (properties[i].active) {
-      dimensions += properties[i].dimensions;
-    }
-  }
-  return dimensions;
-}
-
-// get the number of elements in the array
-template <typename T, std::size_t N>
-constexpr std::size_t GetArrayLength(const T (&/*array*/)[N]) {
-  return N;
-}
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/trainer/features/factorizer_feature_set.h
+++ b/src/eval/nnue/trainer/features/factorizer_feature_set.h
@ -1,104 +0,0 @@
-// Specialization for feature set of feature conversion class template of NNUE evaluation function
-
-#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_
-#define _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../features/feature_set.h"
-#include "factorizer.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Class template that converts input features into learning features
-// Specialization for FeatureSet
-template <typename FirstFeatureType, typename... RemainingFeatureTypes>
-class Factorizer<FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
- private:
-  using Head = Factorizer<FeatureSet<FirstFeatureType>>;
-  using Tail = Factorizer<FeatureSet<RemainingFeatureTypes...>>;
-
- public:
-  // number of dimensions of original input features
-  static constexpr IndexType kBaseDimensions =
-      FeatureSet<FirstFeatureType, RemainingFeatureTypes...>::kDimensions;
-
-  // Get the dimensionality of the learning feature
-  static constexpr IndexType GetDimensions() {
-    return Head::GetDimensions() + Tail::GetDimensions();
-  }
-
-  // Get index of learning feature and scale of learning rate
-  static void AppendTrainingFeatures(
-      IndexType base_index, std::vector<TrainingFeature>* training_features,
-      IndexType base_dimensions = kBaseDimensions) {
-    assert(base_index < kBaseDimensions);
-    constexpr auto boundary = FeatureSet<RemainingFeatureTypes...>::kDimensions;
-    if (base_index < boundary) {
-      Tail::AppendTrainingFeatures(
-          base_index, training_features, base_dimensions);
-    } else {
-      const auto start = training_features->size();
-      Head::AppendTrainingFeatures(
-          base_index - boundary, training_features, base_dimensions);
-      for (auto i = start; i < training_features->size(); ++i) {
-        auto& feature = (*training_features)[i];
-        const auto index = feature.GetIndex();
-        assert(index < Head::GetDimensions() ||
-                   (index >= base_dimensions &&
-                    index < base_dimensions +
-                            Head::GetDimensions() - Head::kBaseDimensions));
-        if (index < Head::kBaseDimensions) {
-          feature.ShiftIndex(Tail::kBaseDimensions);
-        } else {
-          feature.ShiftIndex(Tail::GetDimensions() - Tail::kBaseDimensions);
-        }
-      }
-    }
-  }
-};
-
-// Class template that converts input features into learning features
-// Specialization when FeatureSet has one template argument
-template <typename FeatureType>
-class Factorizer<FeatureSet<FeatureType>> {
-public:
-  // number of dimensions of original input features
-  static constexpr IndexType kBaseDimensions = FeatureType::kDimensions;
-
-  // Get the dimensionality of the learning feature
-  static constexpr IndexType GetDimensions() {
-    return Factorizer<FeatureType>::GetDimensions();
-  }
-
-  // Get index of learning feature and scale of learning rate
-  static void AppendTrainingFeatures(
-      IndexType base_index, std::vector<TrainingFeature>* training_features,
-      IndexType base_dimensions = kBaseDimensions) {
-    assert(base_index < kBaseDimensions);
-    const auto start = training_features->size();
-    Factorizer<FeatureType>::AppendTrainingFeatures(
-        base_index, training_features);
-    for (auto i = start; i < training_features->size(); ++i) {
-      auto& feature = (*training_features)[i];
-      assert(feature.GetIndex() < Factorizer<FeatureType>::GetDimensions());
-      if (feature.GetIndex() >= kBaseDimensions) {
-        feature.ShiftIndex(base_dimensions - kBaseDimensions);
-      }
-    }
-  }
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/trainer/features/factorizer_half_kp.h
+++ b/src/eval/nnue/trainer/features/factorizer_half_kp.h
@ -1,103 +0,0 @@
-// Specialization of NNUE evaluation function feature conversion class template for HalfKP
-
-#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_
-#define _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../features/half_kp.h"
-#include "../../features/p.h"
-#include "../../features/half_relative_kp.h"
-#include "factorizer.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Class template that converts input features into learning features
-// Specialization for HalfKP
-template <Side AssociatedKing>
-class Factorizer<HalfKP<AssociatedKing>> {
- private:
-  using FeatureType = HalfKP<AssociatedKing>;
-
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions =
-      FeatureType::kMaxActiveDimensions;
-
-  // Type of learning feature
-  enum TrainingFeatureType {
-    kFeaturesHalfKP,
-    kFeaturesHalfK,
-    kFeaturesP,
-    kFeaturesHalfRelativeKP,
-    kNumTrainingFeatureTypes,
-  };
-
-  // Learning feature information
-  static constexpr FeatureProperties kProperties[] = {
-    // kFeaturesHalfKP
-    {true, FeatureType::kDimensions},
-    // kFeaturesHalfK
-    {true, SQUARE_NB},
-    // kFeaturesP
-    {true, Factorizer<P>::GetDimensions()},
-    // kFeaturesHalfRelativeKP
-    {true, Factorizer<HalfRelativeKP<AssociatedKing>>::GetDimensions()},
-  };
-  static_assert(GetArrayLength(kProperties) == kNumTrainingFeatureTypes, "");
-
- public:
-  // Get the dimensionality of the learning feature
-  static constexpr IndexType GetDimensions() {
-    return GetActiveDimensions(kProperties);
-  }
-
-  // Get index of learning feature and scale of learning rate
-  static void AppendTrainingFeatures(
-      IndexType base_index, std::vector<TrainingFeature>* training_features) {
-    // kFeaturesHalfKP
-    IndexType index_offset = AppendBaseFeature<FeatureType>(
-        kProperties[kFeaturesHalfKP], base_index, training_features);
-
-    const auto sq_k = static_cast<Square>(base_index / fe_end);
-    const auto p = static_cast<BonaPiece>(base_index % fe_end);
-    // kFeaturesHalfK
-    {
-      const auto& properties = kProperties[kFeaturesHalfK];
-      if (properties.active) {
-        training_features->emplace_back(index_offset + sq_k);
-        index_offset += properties.dimensions;
-      }
-    }
-    // kFeaturesP
-    index_offset += InheritFeaturesIfRequired<P>(
-        index_offset, kProperties[kFeaturesP], p, training_features);
-    // kFeaturesHalfRelativeKP
-    if (p >= fe_hand_end) {
-      index_offset += InheritFeaturesIfRequired<HalfRelativeKP<AssociatedKing>>(
-          index_offset, kProperties[kFeaturesHalfRelativeKP],
-          HalfRelativeKP<AssociatedKing>::MakeIndex(sq_k, p),
-          training_features);
-    } else {
-      index_offset += SkipFeatures(kProperties[kFeaturesHalfRelativeKP]);
-    }
-
-    assert(index_offset == GetDimensions());
-  }
-};
-
-template <Side AssociatedKing>
-constexpr FeatureProperties Factorizer<HalfKP<AssociatedKing>>::kProperties[];
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/trainer/trainer.h
+++ b/src/eval/nnue/trainer/trainer.h
@ -1,125 +0,0 @@
-// Common header of class template for learning NNUE evaluation function
-
-#ifndef _NNUE_TRAINER_H_
-#define _NNUE_TRAINER_H_
-
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#include "../nnue_common.h"
-#include "../features/index_list.h"
-
-#include <sstream>
-#if defined(USE_BLAS)
-static_assert(std::is_same<LearnFloatType, float>::value, "");
-#include <cblas.h>
-#endif
-
-namespace Eval {
-
-namespace NNUE {
-
-// Ponanza constant used in the relation between evaluation value and winning percentage
-constexpr double kPonanzaConstant = 600.0;
-
-// Class that represents one index of learning feature
-class TrainingFeature {
-  using StorageType = std::uint32_t;
-  static_assert(std::is_unsigned<StorageType>::value, "");
-
- public:
-  static constexpr std::uint32_t kIndexBits = 24;
-  static_assert(kIndexBits < std::numeric_limits<StorageType>::digits, "");
-  static constexpr std::uint32_t kCountBits =
-      std::numeric_limits<StorageType>::digits - kIndexBits;
-
-  explicit TrainingFeature(IndexType index) :
-      index_and_count_((index << kCountBits) | 1) {
-    assert(index < (1 << kIndexBits));
-  }
-  TrainingFeature& operator+=(const TrainingFeature& other) {
-    assert(other.GetIndex() == GetIndex());
-    assert(other.GetCount() + GetCount() < (1 << kCountBits));
-    index_and_count_ += other.GetCount();
-    return *this;
-  }
-  IndexType GetIndex() const {
-    return static_cast<IndexType>(index_and_count_ >> kCountBits);
-  }
-  void ShiftIndex(IndexType offset) {
-    assert(GetIndex() + offset < (1 << kIndexBits));
-    index_and_count_ += offset << kCountBits;
-  }
-  IndexType GetCount() const {
-    return static_cast<IndexType>(index_and_count_ & ((1 << kCountBits) - 1));
-  }
-  bool operator<(const TrainingFeature& other) const {
-    return index_and_count_ < other.index_and_count_;
-  }
-
- private:
-  StorageType index_and_count_;
-};
-
-// Structure that represents one sample of training data
-struct Example {
-  std::vector<TrainingFeature> training_features[2];
-  Learner::PackedSfenValue psv;
-  int sign;
-  double weight;
-};
-
-// Message used for setting hyperparameters
-struct Message {
-  Message(const std::string& name, const std::string& value = ""):
-      name(name), value(value), num_peekers(0), num_receivers(0) {}
-  const std::string name;
-  const std::string value;
-  std::uint32_t num_peekers;
-  std::uint32_t num_receivers;
-};
-
-// determine whether to accept the message
-bool ReceiveMessage(const std::string& name, Message* message) {
-  const auto subscript = "[" + std::to_string(message->num_peekers) + "]";
-  if (message->name.substr(0, name.size() + 1) == name + "[") {
-    ++message->num_peekers;
-  }
-  if (message->name == name || message->name == name + subscript) {
-    ++message->num_receivers;
-    return true;
-  }
-  return false;
-}
-
-// split the string
-std::vector<std::string> Split(const std::string& input, char delimiter) {
-  std::istringstream stream(input);
-  std::string field;
-  std::vector<std::string> fields;
-  while (std::getline(stream, field, delimiter)) {
-    fields.push_back(field);
-  }
-  return fields;
-}
-
-// round a floating point number to an integer
-template <typename IntType>
-IntType Round(double value) {
-  return static_cast<IntType>(std::floor(value + 0.5));
-}
-
-// make_shared with alignment
-template <typename T, typename... ArgumentTypes>
-std::shared_ptr<T> MakeAlignedSharedPtr(ArgumentTypes&&... arguments) {
-  const auto ptr = new(aligned_malloc(sizeof(T), alignof(T)))
-      T(std::forward<ArgumentTypes>(arguments)...);
-  return std::shared_ptr<T>(ptr, AlignedDeleter<T>());
-}
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/trainer/trainer_affine_transform.h
+++ b/src/eval/nnue/trainer/trainer_affine_transform.h
@ -1,301 +0,0 @@
-// Specialization of NNUE evaluation function learning class template for AffineTransform
-
-#ifndef _NNUE_TRAINER_AFFINE_TRANSFORM_H_
-#define _NNUE_TRAINER_AFFINE_TRANSFORM_H_
-
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#include "../../../learn/learn.h"
-#include "../layers/affine_transform.h"
-#include "trainer.h"
-
-#include <random>
-
-namespace Eval {
-
-namespace NNUE {
-
-// Learning: Affine transformation layer
-template <typename PreviousLayer, IndexType OutputDimensions>
-class Trainer<Layers::AffineTransform<PreviousLayer, OutputDimensions>> {
- private:
-  // Type of layer to learn
-  using LayerType = Layers::AffineTransform<PreviousLayer, OutputDimensions>;
-
- public:
-  // factory function
-  static std::shared_ptr<Trainer> Create(
-      LayerType* target_layer, FeatureTransformer* feature_transformer) {
-    return std::shared_ptr<Trainer>(
-        new Trainer(target_layer, feature_transformer));
-  }
-
-  // Set options such as hyperparameters
-  void SendMessage(Message* message) {
-    previous_layer_trainer_->SendMessage(message);
-    if (ReceiveMessage("momentum", message)) {
-      momentum_ = static_cast<LearnFloatType>(std::stod(message->value));
-    }
-    if (ReceiveMessage("learning_rate_scale", message)) {
-      learning_rate_scale_ =
-          static_cast<LearnFloatType>(std::stod(message->value));
-    }
-    if (ReceiveMessage("reset", message)) {
-      DequantizeParameters();
-    }
-    if (ReceiveMessage("quantize_parameters", message)) {
-      QuantizeParameters();
-    }
-  }
-
-  // Initialize the parameters with random numbers
-  template <typename RNG>
-  void Initialize(RNG& rng) {
-    previous_layer_trainer_->Initialize(rng);
-    if (kIsOutputLayer) {
-      // Initialize output layer with 0
-      std::fill(std::begin(biases_), std::end(biases_),
-                static_cast<LearnFloatType>(0.0));
-      std::fill(std::begin(weights_), std::end(weights_),
-                static_cast<LearnFloatType>(0.0));
-    } else {
-      // Assuming that the input distribution is unit-mean 0.5, equal variance,
-      // Initialize the output distribution so that each unit has a mean of 0.5 and the same variance as the input
-      const double kSigma = 1.0 / std::sqrt(kInputDimensions);
-      auto distribution = std::normal_distribution<double>(0.0, kSigma);
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        double sum = 0.0;
-        for (IndexType j = 0; j < kInputDimensions; ++j) {
-          const auto weight = static_cast<LearnFloatType>(distribution(rng));
-          weights_[kInputDimensions * i + j] = weight;
-          sum += weight;
-        }
-        biases_[i] = static_cast<LearnFloatType>(0.5 - 0.5 * sum);
-      }
-    }
-    QuantizeParameters();
-  }
-
-  // forward propagation
-  const LearnFloatType* Propagate(const std::vector<Example>& batch) {
-    if (output_.size() < kOutputDimensions * batch.size()) {
-      output_.resize(kOutputDimensions * batch.size());
-      gradients_.resize(kInputDimensions * batch.size());
-    }
-    batch_size_ = static_cast<IndexType>(batch.size());
-    batch_input_ = previous_layer_trainer_->Propagate(batch);
-#if defined(USE_BLAS)
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      cblas_scopy(kOutputDimensions, biases_, 1, &output_[batch_offset], 1);
-    }
-    cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
-                kOutputDimensions, batch_size_, kInputDimensions, 1.0,
-                weights_, kInputDimensions,
-                batch_input_, kInputDimensions,
-                1.0, &output_[0], kOutputDimensions);
-#else
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType input_batch_offset = kInputDimensions * b;
-      const IndexType output_batch_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        double sum = biases_[i];
-        for (IndexType j = 0; j < kInputDimensions; ++j) {
-          const IndexType index = kInputDimensions * i + j;
-          sum += weights_[index] * batch_input_[input_batch_offset + j];
-        }
-        output_[output_batch_offset + i] = static_cast<LearnFloatType>(sum);
-      }
-    }
-#endif
-    return output_.data();
-  }
-
-  // backpropagation
-  void Backpropagate(const LearnFloatType* gradients,
-                     LearnFloatType learning_rate) {
-    const LearnFloatType local_learning_rate =
-        learning_rate * learning_rate_scale_;
-#if defined(USE_BLAS)
-    // backpropagate
-    cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
-                kInputDimensions, batch_size_, kOutputDimensions, 1.0,
-                weights_, kInputDimensions,
-                gradients, kOutputDimensions,
-                0.0, &gradients_[0], kInputDimensions);
-    // update
-    cblas_sscal(kOutputDimensions, momentum_, biases_diff_, 1);
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      cblas_saxpy(kOutputDimensions, 1.0,
-                  &gradients[batch_offset], 1, biases_diff_, 1);
-    }
-    cblas_saxpy(kOutputDimensions, -local_learning_rate,
-                biases_diff_, 1, biases_, 1);
-    cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
-                kOutputDimensions, kInputDimensions, batch_size_, 1.0,
-                gradients, kOutputDimensions,
-                batch_input_, kInputDimensions,
-                momentum_, weights_diff_, kInputDimensions);
-    cblas_saxpy(kOutputDimensions * kInputDimensions, -local_learning_rate,
-                weights_diff_, 1, weights_, 1);
-#else
-    // backpropagate
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType input_batch_offset = kInputDimensions * b;
-      const IndexType output_batch_offset = kOutputDimensions * b;
-      for (IndexType j = 0; j < kInputDimensions; ++j) {
-        double sum = 0.0;
-        for (IndexType i = 0; i < kOutputDimensions; ++i) {
-          const IndexType index = kInputDimensions * i + j;
-          sum += weights_[index] * gradients[output_batch_offset + i];
-        }
-        gradients_[input_batch_offset + j] = static_cast<LearnFloatType>(sum);
-      }
-    }
-    // update
-    for (IndexType i = 0; i < kOutputDimensions; ++i) {
-      biases_diff_[i] *= momentum_;
-    }
-    for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
-      weights_diff_[i] *= momentum_;
-    }
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType input_batch_offset = kInputDimensions * b;
-      const IndexType output_batch_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        biases_diff_[i] += gradients[output_batch_offset + i];
-      }
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        for (IndexType j = 0; j < kInputDimensions; ++j) {
-          const IndexType index = kInputDimensions * i + j;
-          weights_diff_[index] += gradients[output_batch_offset + i] *
-              batch_input_[input_batch_offset + j];
-        }
-      }
-    }
-    for (IndexType i = 0; i < kOutputDimensions; ++i) {
-      biases_[i] -= local_learning_rate * biases_diff_[i];
-    }
-    for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
-      weights_[i] -= local_learning_rate * weights_diff_[i];
-    }
-#endif
-    previous_layer_trainer_->Backpropagate(gradients_.data(), learning_rate);
-  }
-
- private:
-  // constructor
-  Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer) :
-      batch_size_(0),
-      batch_input_(nullptr),
-      previous_layer_trainer_(Trainer<PreviousLayer>::Create(
-          &target_layer->previous_layer_, feature_transformer)),
-      target_layer_(target_layer),
-      biases_(),
-      weights_(),
-      biases_diff_(),
-      weights_diff_(),
-      momentum_(0.0),
-      learning_rate_scale_(1.0) {
-    DequantizeParameters();
-  }
-
-  // Weight saturation and parameterization
-  void QuantizeParameters() {
-    for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
-      weights_[i] = std::max(-kMaxWeightMagnitude,
-                             std::min(+kMaxWeightMagnitude, weights_[i]));
-    }
-    for (IndexType i = 0; i < kOutputDimensions; ++i) {
-      target_layer_->biases_[i] =
-          Round<typename LayerType::BiasType>(biases_[i] * kBiasScale);
-    }
-    for (IndexType i = 0; i < kOutputDimensions; ++i) {
-      const auto offset = kInputDimensions * i;
-      const auto padded_offset = LayerType::kPaddedInputDimensions * i;
-      for (IndexType j = 0; j < kInputDimensions; ++j) {
-        target_layer_->weights_[padded_offset + j] =
-            Round<typename LayerType::WeightType>(
-                weights_[offset + j] * kWeightScale);
-      }
-    }
-  }
-
-  // read parameterized integer
-  void DequantizeParameters() {
-    for (IndexType i = 0; i < kOutputDimensions; ++i) {
-      biases_[i] = static_cast<LearnFloatType>(
-          target_layer_->biases_[i] / kBiasScale);
-    }
-    for (IndexType i = 0; i < kOutputDimensions; ++i) {
-      const auto offset = kInputDimensions * i;
-      const auto padded_offset = LayerType::kPaddedInputDimensions * i;
-      for (IndexType j = 0; j < kInputDimensions; ++j) {
-        weights_[offset + j] = static_cast<LearnFloatType>(
-            target_layer_->weights_[padded_offset + j] / kWeightScale);
-      }
-    }
-    std::fill(std::begin(biases_diff_), std::end(biases_diff_),
-              static_cast<LearnFloatType>(0.0));
-    std::fill(std::begin(weights_diff_), std::end(weights_diff_),
-              static_cast<LearnFloatType>(0.0));
-  }
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions = LayerType::kInputDimensions;
-  static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
-
-  // If the output dimensionality is 1, the output layer
-  static constexpr bool kIsOutputLayer = kOutputDimensions == 1;
-
-  // Coefficient used for parameterization
-  static constexpr LearnFloatType kActivationScale =
-      std::numeric_limits<std::int8_t>::max();
-  static constexpr LearnFloatType kBiasScale = kIsOutputLayer ?
-      (kPonanzaConstant * FV_SCALE) :
-      ((1 << kWeightScaleBits) * kActivationScale);
-  static constexpr LearnFloatType kWeightScale = kBiasScale / kActivationScale;
-
-  // Upper limit of absolute value of weight used to prevent overflow when parameterizing integers
-  static constexpr LearnFloatType kMaxWeightMagnitude =
-      std::numeric_limits<typename LayerType::WeightType>::max() / kWeightScale;
-
-  // number of samples in mini-batch
-  IndexType batch_size_;
-
-  // Input mini batch
-  const LearnFloatType* batch_input_;
-
-  // Trainer of the previous layer
-  const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
-
-  // layer to learn
-  LayerType* const target_layer_;
-
-  // parameter
-  LearnFloatType biases_[kOutputDimensions];
-  LearnFloatType weights_[kOutputDimensions * kInputDimensions];
-
-  // Buffer used for updating parameters
-  LearnFloatType biases_diff_[kOutputDimensions];
-  LearnFloatType weights_diff_[kOutputDimensions * kInputDimensions];
-
-  // Forward propagation buffer
-  std::vector<LearnFloatType> output_;
-
-  // buffer for back propagation
-  std::vector<LearnFloatType> gradients_;
-
-  // hyper parameter
-  LearnFloatType momentum_;
-  LearnFloatType learning_rate_scale_;
-};
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/trainer/trainer_clipped_relu.h
+++ b/src/eval/nnue/trainer/trainer_clipped_relu.h
@ -1,142 +0,0 @@
-// Specialization of NNUE evaluation function learning class template for ClippedReLU
-
-#ifndef _NNUE_TRAINER_CLIPPED_RELU_H_
-#define _NNUE_TRAINER_CLIPPED_RELU_H_
-
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#include "../../../learn/learn.h"
-#include "../layers/clipped_relu.h"
-#include "trainer.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-// Learning: Affine transformation layer
-template <typename PreviousLayer>
-class Trainer<Layers::ClippedReLU<PreviousLayer>> {
- private:
-  // Type of layer to learn
-  using LayerType = Layers::ClippedReLU<PreviousLayer>;
-
- public:
-  // factory function
-  static std::shared_ptr<Trainer> Create(
-      LayerType* target_layer, FeatureTransformer* feature_transformer) {
-    return std::shared_ptr<Trainer>(
-        new Trainer(target_layer, feature_transformer));
-  }
-
-  // Set options such as hyperparameters
-  void SendMessage(Message* message) {
-    previous_layer_trainer_->SendMessage(message);
-    if (ReceiveMessage("check_health", message)) {
-      CheckHealth();
-    }
-  }
-
-  // Initialize the parameters with random numbers
-  template <typename RNG>
-  void Initialize(RNG& rng) {
-    previous_layer_trainer_->Initialize(rng);
-  }
-
-  // forward propagation
-  const LearnFloatType* Propagate(const std::vector<Example>& batch) {
-    if (output_.size() < kOutputDimensions * batch.size()) {
-      output_.resize(kOutputDimensions * batch.size());
-      gradients_.resize(kInputDimensions * batch.size());
-    }
-    const auto input = previous_layer_trainer_->Propagate(batch);
-    batch_size_ = static_cast<IndexType>(batch.size());
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        const IndexType index = batch_offset + i;
-        output_[index] = std::max(+kZero, std::min(+kOne, input[index]));
-        min_activations_[i] = std::min(min_activations_[i], output_[index]);
-        max_activations_[i] = std::max(max_activations_[i], output_[index]);
-      }
-    }
-    return output_.data();
-  }
-
-  // backpropagation
-  void Backpropagate(const LearnFloatType* gradients,
-                     LearnFloatType learning_rate) {
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        const IndexType index = batch_offset + i;
-        gradients_[index] = gradients[index] *
-            (output_[index] > kZero) * (output_[index] < kOne);
-      }
-    }
-    previous_layer_trainer_->Backpropagate(gradients_.data(), learning_rate);
-  }
-
- private:
-  // constructor
-  Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer) :
-      batch_size_(0),
-      previous_layer_trainer_(Trainer<PreviousLayer>::Create(
-          &target_layer->previous_layer_, feature_transformer)),
-      target_layer_(target_layer) {
-    std::fill(std::begin(min_activations_), std::end(min_activations_),
-              std::numeric_limits<LearnFloatType>::max());
-    std::fill(std::begin(max_activations_), std::end(max_activations_),
-              std::numeric_limits<LearnFloatType>::lowest());
-  }
-
-  // Check if there are any problems with learning
-  void CheckHealth() {
-    const auto largest_min_activation = *std::max_element(
-        std::begin(min_activations_), std::end(min_activations_));
-    const auto smallest_max_activation = *std::min_element(
-        std::begin(max_activations_), std::end(max_activations_));
-    std::cout << "INFO: largest min activation = " << largest_min_activation
-              << ", smallest max activation = " << smallest_max_activation
-              << std::endl;
-
-    std::fill(std::begin(min_activations_), std::end(min_activations_),
-              std::numeric_limits<LearnFloatType>::max());
-    std::fill(std::begin(max_activations_), std::end(max_activations_),
-              std::numeric_limits<LearnFloatType>::lowest());
-  }
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions = LayerType::kOutputDimensions;
-  static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
-
-  // LearnFloatType constant
-  static constexpr LearnFloatType kZero = static_cast<LearnFloatType>(0.0);
-  static constexpr LearnFloatType kOne = static_cast<LearnFloatType>(1.0);
-
-  // number of samples in mini-batch
-  IndexType batch_size_;
-
-  // Trainer of the previous layer
-  const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
-
-  // layer to learn
-  LayerType* const target_layer_;
-
-  // Forward propagation buffer
-  std::vector<LearnFloatType> output_;
-
-  // buffer for back propagation
-  std::vector<LearnFloatType> gradients_;
-
-  // Health check statistics
-  LearnFloatType min_activations_[kOutputDimensions];
-  LearnFloatType max_activations_[kOutputDimensions];
-};
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/trainer/trainer_feature_transformer.h
+++ b/src/eval/nnue/trainer/trainer_feature_transformer.h
@ -1,377 +0,0 @@
-// Specialization for feature transformer of learning class template of NNUE evaluation function
-
-#ifndef _NNUE_TRAINER_FEATURE_TRANSFORMER_H_
-#define _NNUE_TRAINER_FEATURE_TRANSFORMER_H_
-
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#include "../../../learn/learn.h"
-#include "../nnue_feature_transformer.h"
-#include "trainer.h"
-#include "features/factorizer_feature_set.h"
-
-#include <array>
-#include <bitset>
-#include <numeric>
-#include <random>
-#include <set>
-
-#if defined(_OPENMP)
-#include <omp.h>
-#endif
-
-namespace Eval {
-
-namespace NNUE {
-
-// Learning: Input feature converter
-template <>
-class Trainer<FeatureTransformer> {
- private:
-  // Type of layer to learn
-  using LayerType = FeatureTransformer;
-
- public:
-  template <typename T>
-  friend struct AlignedDeleter;
-  template <typename T, typename... ArgumentTypes>
-  friend std::shared_ptr<T> MakeAlignedSharedPtr(ArgumentTypes&&... arguments);
-
-  // factory function
-  static std::shared_ptr<Trainer> Create(LayerType* target_layer) {
-    return MakeAlignedSharedPtr<Trainer>(target_layer);
-  }
-
-  // Set options such as hyperparameters
-  void SendMessage(Message* message) {
-    if (ReceiveMessage("momentum", message)) {
-      momentum_ = static_cast<LearnFloatType>(std::stod(message->value));
-    }
-    if (ReceiveMessage("learning_rate_scale", message)) {
-      learning_rate_scale_ =
-          static_cast<LearnFloatType>(std::stod(message->value));
-    }
-    if (ReceiveMessage("reset", message)) {
-      DequantizeParameters();
-    }
-    if (ReceiveMessage("quantize_parameters", message)) {
-      QuantizeParameters();
-    }
-    if (ReceiveMessage("clear_unobserved_feature_weights", message)) {
-      ClearUnobservedFeatureWeights();
-    }
-    if (ReceiveMessage("check_health", message)) {
-      CheckHealth();
-    }
-  }
-
-  // Initialize the parameters with random numbers
-  template <typename RNG>
-  void Initialize(RNG& rng) {
-    std::fill(std::begin(weights_), std::end(weights_), +kZero);
-    const double kSigma = 0.1 / std::sqrt(RawFeatures::kMaxActiveDimensions);
-    auto distribution = std::normal_distribution<double>(0.0, kSigma);
-    for (IndexType i = 0; i < kHalfDimensions * RawFeatures::kDimensions; ++i) {
-      const auto weight = static_cast<LearnFloatType>(distribution(rng));
-      weights_[i] = weight;
-    }
-    for (IndexType i = 0; i < kHalfDimensions; ++i) {
-      biases_[i] = static_cast<LearnFloatType>(0.5);
-    }
-    QuantizeParameters();
-  }
-
-  // forward propagation
-  const LearnFloatType* Propagate(const std::vector<Example>& batch) {
-    if (output_.size() < kOutputDimensions * batch.size()) {
-      output_.resize(kOutputDimensions * batch.size());
-      gradients_.resize(kOutputDimensions * batch.size());
-    }
-    batch_ = &batch;
-    // affine transform
-#pragma omp parallel for
-    for (IndexType b = 0; b < batch.size(); ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType c = 0; c < 2; ++c) {
-        const IndexType output_offset = batch_offset + kHalfDimensions * c;
-#if defined(USE_BLAS)
-        cblas_scopy(kHalfDimensions, biases_, 1, &output_[output_offset], 1);
-        for (const auto& feature : batch[b].training_features[c]) {
-          const IndexType weights_offset = kHalfDimensions * feature.GetIndex();
-          cblas_saxpy(kHalfDimensions, (float)feature.GetCount(),
-                      &weights_[weights_offset], 1, &output_[output_offset], 1);
-        }
-#else
-        for (IndexType i = 0; i < kHalfDimensions; ++i) {
-          output_[output_offset + i] = biases_[i];
-        }
-        for (const auto& feature : batch[b].training_features[c]) {
-          const IndexType weights_offset = kHalfDimensions * feature.GetIndex();
-          for (IndexType i = 0; i < kHalfDimensions; ++i) {
-            output_[output_offset + i] +=
-                feature.GetCount() * weights_[weights_offset + i];
-          }
-        }
-#endif
-      }
-    }
-    // clipped ReLU
-    for (IndexType b = 0; b < batch.size(); ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        const IndexType index = batch_offset + i;
-        min_pre_activation_ = std::min(min_pre_activation_, output_[index]);
-        max_pre_activation_ = std::max(max_pre_activation_, output_[index]);
-        output_[index] = std::max(+kZero, std::min(+kOne, output_[index]));
-        const IndexType t = i % kHalfDimensions;
-        min_activations_[t] = std::min(min_activations_[t], output_[index]);
-        max_activations_[t] = std::max(max_activations_[t], output_[index]);
-      }
-    }
-    return output_.data();
-  }
-
-  // backpropagation
-  void Backpropagate(const LearnFloatType* gradients,
-                     LearnFloatType learning_rate) {
-    const LearnFloatType local_learning_rate =
-        learning_rate * learning_rate_scale_;
-    for (IndexType b = 0; b < batch_->size(); ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        const IndexType index = batch_offset + i;
-        gradients_[index] = gradients[index] *
-            ((output_[index] > kZero) * (output_[index] < kOne));
-      }
-    }
-    // Since the weight matrix updates only the columns corresponding to the features that appeared in the input,
-    // Correct the learning rate and adjust the scale without using momentum
-    const LearnFloatType effective_learning_rate =
-        static_cast<LearnFloatType>(local_learning_rate / (1.0 - momentum_));
-#if defined(USE_BLAS)
-    cblas_sscal(kHalfDimensions, momentum_, biases_diff_, 1);
-    for (IndexType b = 0; b < batch_->size(); ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType c = 0; c < 2; ++c) {
-        const IndexType output_offset = batch_offset + kHalfDimensions * c;
-        cblas_saxpy(kHalfDimensions, 1.0,
-                    &gradients_[output_offset], 1, biases_diff_, 1);
-      }
-    }
-    cblas_saxpy(kHalfDimensions, -local_learning_rate,
-                biases_diff_, 1, biases_, 1);
-#pragma omp parallel
-    {
-#if defined(_OPENMP)
-      const IndexType num_threads = omp_get_num_threads();
-      const IndexType thread_index = omp_get_thread_num();
-#endif
-      for (IndexType b = 0; b < batch_->size(); ++b) {
-        const IndexType batch_offset = kOutputDimensions * b;
-        for (IndexType c = 0; c < 2; ++c) {
-          const IndexType output_offset = batch_offset + kHalfDimensions * c;
-          for (const auto& feature : (*batch_)[b].training_features[c]) {
-#if defined(_OPENMP)
-            if (feature.GetIndex() % num_threads != thread_index) continue;
-#endif
-            const IndexType weights_offset =
-                kHalfDimensions * feature.GetIndex();
-            const auto scale = static_cast<LearnFloatType>(
-                effective_learning_rate / feature.GetCount());
-            cblas_saxpy(kHalfDimensions, -scale,
-                        &gradients_[output_offset], 1,
-                        &weights_[weights_offset], 1);
-          }
-        }
-      }
-    }
-#else
-    for (IndexType i = 0; i < kHalfDimensions; ++i) {
-      biases_diff_[i] *= momentum_;
-    }
-    for (IndexType b = 0; b < batch_->size(); ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType c = 0; c < 2; ++c) {
-        const IndexType output_offset = batch_offset + kHalfDimensions * c;
-        for (IndexType i = 0; i < kHalfDimensions; ++i) {
-          biases_diff_[i] += gradients_[output_offset + i];
-        }
-      }
-    }
-    for (IndexType i = 0; i < kHalfDimensions; ++i) {
-      biases_[i] -= local_learning_rate * biases_diff_[i];
-    }
-    for (IndexType b = 0; b < batch_->size(); ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType c = 0; c < 2; ++c) {
-        const IndexType output_offset = batch_offset + kHalfDimensions * c;
-        for (const auto& feature : (*batch_)[b].training_features[c]) {
-          const IndexType weights_offset = kHalfDimensions * feature.GetIndex();
-          const auto scale = static_cast<LearnFloatType>(
-              effective_learning_rate / feature.GetCount());
-          for (IndexType i = 0; i < kHalfDimensions; ++i) {
-            weights_[weights_offset + i] -=
-                scale * gradients_[output_offset + i];
-          }
-        }
-      }
-    }
-#endif
-    for (IndexType b = 0; b < batch_->size(); ++b) {
-      for (IndexType c = 0; c < 2; ++c) {
-        for (const auto& feature : (*batch_)[b].training_features[c]) {
-          observed_features.set(feature.GetIndex());
-        }
-      }
-    }
-  }
-
- private:
-  // constructor
-  Trainer(LayerType* target_layer) :
-      batch_(nullptr),
-      target_layer_(target_layer),
-      biases_(),
-      weights_(),
-      biases_diff_(),
-      momentum_(0.0),
-      learning_rate_scale_(1.0) {
-    min_pre_activation_ = std::numeric_limits<LearnFloatType>::max();
-    max_pre_activation_ = std::numeric_limits<LearnFloatType>::lowest();
-    std::fill(std::begin(min_activations_), std::end(min_activations_),
-              std::numeric_limits<LearnFloatType>::max());
-    std::fill(std::begin(max_activations_), std::end(max_activations_),
-              std::numeric_limits<LearnFloatType>::lowest());
-    DequantizeParameters();
-  }
-
-  // Weight saturation and parameterization
-  void QuantizeParameters() {
-    for (IndexType i = 0; i < kHalfDimensions; ++i) {
-      target_layer_->biases_[i] =
-          Round<typename LayerType::BiasType>(biases_[i] * kBiasScale);
-    }
-    std::vector<TrainingFeature> training_features;
-#pragma omp parallel for private(training_features)
-    for (IndexType j = 0; j < RawFeatures::kDimensions; ++j) {
-      training_features.clear();
-      Features::Factorizer<RawFeatures>::AppendTrainingFeatures(
-          j, &training_features);
-      for (IndexType i = 0; i < kHalfDimensions; ++i) {
-        double sum = 0.0;
-        for (const auto& feature : training_features) {
-          sum += weights_[kHalfDimensions * feature.GetIndex() + i];
-        }
-        target_layer_->weights_[kHalfDimensions * j + i] =
-            Round<typename LayerType::WeightType>(sum * kWeightScale);
-      }
-    }
-  }
-
-  // read parameterized integer
-  void DequantizeParameters() {
-    for (IndexType i = 0; i < kHalfDimensions; ++i) {
-      biases_[i] = static_cast<LearnFloatType>(
-          target_layer_->biases_[i] / kBiasScale);
-    }
-    std::fill(std::begin(weights_), std::end(weights_), +kZero);
-    for (IndexType i = 0; i < kHalfDimensions * RawFeatures::kDimensions; ++i) {
-      weights_[i] = static_cast<LearnFloatType>(
-          target_layer_->weights_[i] / kWeightScale);
-    }
-    std::fill(std::begin(biases_diff_), std::end(biases_diff_), +kZero);
-  }
-
-  // Set the weight corresponding to the feature that does not appear in the learning data to 0
-  void ClearUnobservedFeatureWeights() {
-    for (IndexType i = 0; i < kInputDimensions; ++i) {
-      if (!observed_features.test(i)) {
-        std::fill(std::begin(weights_) + kHalfDimensions * i,
-                  std::begin(weights_) + kHalfDimensions * (i + 1), +kZero);
-      }
-    }
-    QuantizeParameters();
-  }
-
-  // Check if there are any problems with learning
-  void CheckHealth() {
-    std::cout << "INFO: observed " << observed_features.count()
-              << " (out of " << kInputDimensions << ") features" << std::endl;
-
-    constexpr LearnFloatType kPreActivationLimit =
-        std::numeric_limits<typename LayerType::WeightType>::max() /
-        kWeightScale;
-    std::cout << "INFO: (min, max) of pre-activations = "
-              << min_pre_activation_ << ", "
-              << max_pre_activation_ << " (limit = "
-              << kPreActivationLimit << ")" << std::endl;
-
-    const auto largest_min_activation = *std::max_element(
-        std::begin(min_activations_), std::end(min_activations_));
-    const auto smallest_max_activation = *std::min_element(
-        std::begin(max_activations_), std::end(max_activations_));
-    std::cout << "INFO: largest min activation = " << largest_min_activation
-              << ", smallest max activation = " << smallest_max_activation
-              << std::endl;
-
-    std::fill(std::begin(min_activations_), std::end(min_activations_),
-              std::numeric_limits<LearnFloatType>::max());
-    std::fill(std::begin(max_activations_), std::end(max_activations_),
-              std::numeric_limits<LearnFloatType>::lowest());
-  }
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      Features::Factorizer<RawFeatures>::GetDimensions();
-  static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
-  static constexpr IndexType kHalfDimensions = LayerType::kHalfDimensions;
-
-  // Coefficient used for parameterization
-  static constexpr LearnFloatType kActivationScale =
-      std::numeric_limits<std::int8_t>::max();
-  static constexpr LearnFloatType kBiasScale = kActivationScale;
-  static constexpr LearnFloatType kWeightScale = kActivationScale;
-
-  // LearnFloatType constant
-  static constexpr LearnFloatType kZero = static_cast<LearnFloatType>(0.0);
-  static constexpr LearnFloatType kOne = static_cast<LearnFloatType>(1.0);
-
-  // mini batch
-  const std::vector<Example>* batch_;
-
-  // layer to learn
-  LayerType* const target_layer_;
-
-  // parameter
-  alignas(kCacheLineSize) LearnFloatType biases_[kHalfDimensions];
-  alignas(kCacheLineSize)
-      LearnFloatType weights_[kHalfDimensions * kInputDimensions];
-
-  // Buffer used for updating parameters
-  LearnFloatType biases_diff_[kHalfDimensions];
-  std::vector<LearnFloatType> gradients_;
-
-  // Forward propagation buffer
-  std::vector<LearnFloatType> output_;
-
-  // Features that appeared in the training data
-  std::bitset<kInputDimensions> observed_features;
-
-  // hyper parameter
-  LearnFloatType momentum_;
-  LearnFloatType learning_rate_scale_;
-
-  // Health check statistics
-  LearnFloatType min_pre_activation_;
-  LearnFloatType max_pre_activation_;
-  LearnFloatType min_activations_[kHalfDimensions];
-  LearnFloatType max_activations_[kHalfDimensions];
-};
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/trainer/trainer_input_slice.h
+++ b/src/eval/nnue/trainer/trainer_input_slice.h
@ -1,251 +0,0 @@
-// Specialization of NNUE evaluation function learning class template for InputSlice
-
-#ifndef _NNUE_TRAINER_INPUT_SLICE_H_
-#define _NNUE_TRAINER_INPUT_SLICE_H_
-
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#include "../../../learn/learn.h"
-#include "../layers/input_slice.h"
-#include "trainer.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-// Learning: Input layer
-class SharedInputTrainer {
- public:
-  // factory function
-  static std::shared_ptr<SharedInputTrainer> Create(
-      FeatureTransformer* feature_transformer) {
-    static std::shared_ptr<SharedInputTrainer> instance;
-    if (!instance) {
-      instance.reset(new SharedInputTrainer(feature_transformer));
-    }
-    ++instance->num_referrers_;
-    return instance;
-  }
-
-  // Set options such as hyperparameters
-  void SendMessage(Message* message) {
-    if (num_calls_ == 0) {
-      current_operation_ = Operation::kSendMessage;
-      feature_transformer_trainer_->SendMessage(message);
-    }
-    assert(current_operation_ == Operation::kSendMessage);
-    if (++num_calls_ == num_referrers_) {
-      num_calls_ = 0;
-      current_operation_ = Operation::kNone;
-    }
-  }
-
-  // Initialize the parameters with random numbers
-  template <typename RNG>
-  void Initialize(RNG& rng) {
-    if (num_calls_ == 0) {
-      current_operation_ = Operation::kInitialize;
-      feature_transformer_trainer_->Initialize(rng);
-    }
-    assert(current_operation_ == Operation::kInitialize);
-    if (++num_calls_ == num_referrers_) {
-      num_calls_ = 0;
-      current_operation_ = Operation::kNone;
-    }
-  }
-
-  // forward propagation
-  const LearnFloatType* Propagate(const std::vector<Example>& batch) {
-    if (gradients_.size() < kInputDimensions * batch.size()) {
-      gradients_.resize(kInputDimensions * batch.size());
-    }
-    batch_size_ = static_cast<IndexType>(batch.size());
-    if (num_calls_ == 0) {
-      current_operation_ = Operation::kPropagate;
-      output_ = feature_transformer_trainer_->Propagate(batch);
-    }
-    assert(current_operation_ == Operation::kPropagate);
-    if (++num_calls_ == num_referrers_) {
-      num_calls_ = 0;
-      current_operation_ = Operation::kNone;
-    }
-    return output_;
-  }
-
-  // backpropagation
-  void Backpropagate(const LearnFloatType* gradients,
-                     LearnFloatType learning_rate) {
-    if (num_referrers_ == 1) {
-      feature_transformer_trainer_->Backpropagate(gradients, learning_rate);
-      return;
-    }
-    if (num_calls_ == 0) {
-      current_operation_ = Operation::kBackPropagate;
-      for (IndexType b = 0; b < batch_size_; ++b) {
-        const IndexType batch_offset = kInputDimensions * b;
-        for (IndexType i = 0; i < kInputDimensions; ++i) {
-          gradients_[batch_offset + i] = static_cast<LearnFloatType>(0.0);
-        }
-      }
-    }
-    assert(current_operation_ == Operation::kBackPropagate);
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType batch_offset = kInputDimensions * b;
-      for (IndexType i = 0; i < kInputDimensions; ++i) {
-        gradients_[batch_offset + i] += gradients[batch_offset + i];
-      }
-    }
-    if (++num_calls_ == num_referrers_) {
-      feature_transformer_trainer_->Backpropagate(
-          gradients_.data(), learning_rate);
-      num_calls_ = 0;
-      current_operation_ = Operation::kNone;
-    }
-  }
-
- private:
-  // constructor
-  SharedInputTrainer(FeatureTransformer* feature_transformer) :
-      batch_size_(0),
-      num_referrers_(0),
-      num_calls_(0),
-      current_operation_(Operation::kNone),
-      feature_transformer_trainer_(Trainer<FeatureTransformer>::Create(
-          feature_transformer)),
-      output_(nullptr) {
-  }
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      FeatureTransformer::kOutputDimensions;
-
-  // type of processing
-  enum class Operation {
-    kNone,
-    kSendMessage,
-    kInitialize,
-    kPropagate,
-    kBackPropagate,
-  };
-
-  // number of samples in mini-batch
-  IndexType batch_size_;
-
-  // number of layers sharing this layer as input
-  std::uint32_t num_referrers_;
-
-  // Number of times the current process has been called
-  std::uint32_t num_calls_;
-
-  // current processing type
-  Operation current_operation_;
-
-  // Trainer of input feature converter
-  const std::shared_ptr<Trainer<FeatureTransformer>>
-      feature_transformer_trainer_;
-
-  // pointer to output shared for forward propagation
-  const LearnFloatType* output_;
-
-  // buffer for back propagation
-  std::vector<LearnFloatType> gradients_;
-};
-
-// Learning: Input layer
-template <IndexType OutputDimensions, IndexType Offset>
-class Trainer<Layers::InputSlice<OutputDimensions, Offset>> {
- private:
-  // Type of layer to learn
-  using LayerType = Layers::InputSlice<OutputDimensions, Offset>;
-
- public:
-  // factory function
-  static std::shared_ptr<Trainer> Create(
-      LayerType* /*target_layer*/, FeatureTransformer* feature_transformer) {
-    return std::shared_ptr<Trainer>(new Trainer(feature_transformer));
-  }
-
-  // Set options such as hyperparameters
-  void SendMessage(Message* message) {
-    shared_input_trainer_->SendMessage(message);
-  }
-
-  // Initialize the parameters with random numbers
-  template <typename RNG>
-  void Initialize(RNG& rng) {
-    shared_input_trainer_->Initialize(rng);
-  }
-
-  // forward propagation
-  const LearnFloatType* Propagate(const std::vector<Example>& batch) {
-    if (output_.size() < kOutputDimensions * batch.size()) {
-      output_.resize(kOutputDimensions * batch.size());
-      gradients_.resize(kInputDimensions * batch.size());
-    }
-    batch_size_ = static_cast<IndexType>(batch.size());
-    const auto input = shared_input_trainer_->Propagate(batch);
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType input_offset = kInputDimensions * b;
-      const IndexType output_offset = kOutputDimensions * b;
-#if defined(USE_BLAS)
-      cblas_scopy(kOutputDimensions, &input[input_offset + Offset], 1,
-                  &output_[output_offset], 1);
-#else
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        output_[output_offset + i] = input[input_offset + Offset + i];
-      }
-#endif
-    }
-    return output_.data();
-  }
-
-  // backpropagation
-  void Backpropagate(const LearnFloatType* gradients,
-                     LearnFloatType learning_rate) {
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType input_offset = kInputDimensions * b;
-      const IndexType output_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kInputDimensions; ++i) {
-        if (i < Offset || i >= Offset + kOutputDimensions) {
-          gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
-        } else {
-          gradients_[input_offset + i] = gradients[output_offset + i - Offset];
-        }
-      }
-    }
-    shared_input_trainer_->Backpropagate(gradients_.data(), learning_rate);
-  }
-
- private:
-  // constructor
-  Trainer(FeatureTransformer* feature_transformer):
-      batch_size_(0),
-      shared_input_trainer_(SharedInputTrainer::Create(feature_transformer)) {
-  }
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      FeatureTransformer::kOutputDimensions;
-  static constexpr IndexType kOutputDimensions = OutputDimensions;
-  static_assert(Offset + kOutputDimensions <= kInputDimensions, "");
-
-  // number of samples in mini-batch
-  IndexType batch_size_;
-
-  // Trainer of shared input layer
-  const std::shared_ptr<SharedInputTrainer> shared_input_trainer_;
-
-  // Forward propagation buffer
-  std::vector<LearnFloatType> output_;
-
-  // buffer for back propagation
-  std::vector<LearnFloatType> gradients_;
-};
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#endif
--- a/src/eval/nnue/trainer/trainer_sum.h
+++ b/src/eval/nnue/trainer/trainer_sum.h
@ -1,190 +0,0 @@
-// Specialization of NNUE evaluation function learning class template for Sum
-
-#ifndef _NNUE_TRAINER_SUM_H_
-#define _NNUE_TRAINER_SUM_H_
-
-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#include "../../../learn/learn.h"
-#include "../layers/sum.h"
-#include "trainer.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-// Learning: A layer that sums the outputs of multiple layers
-template <typename FirstPreviousLayer, typename... RemainingPreviousLayers>
-class Trainer<Layers::Sum<FirstPreviousLayer, RemainingPreviousLayers...>> :
-      Trainer<Layers::Sum<RemainingPreviousLayers...>> {
- private:
-  // Type of layer to learn
-  using LayerType = Layers::Sum<FirstPreviousLayer, RemainingPreviousLayers...>;
-  using Tail = Trainer<Layers::Sum<RemainingPreviousLayers...>>;
-
- public:
-  // factory function
-  static std::shared_ptr<Trainer> Create(
-      LayerType* target_layer, FeatureTransformer* feature_transformer) {
-    return std::shared_ptr<Trainer>(
-        new Trainer(target_layer, feature_transformer));
-  }
-
-  // Set options such as hyperparameters
-  void SendMessage(Message* message) {
-    // The results of other member functions do not depend on the processing order, so
-    // Tail is processed first for the purpose of simplifying the implementation, but
-    // SendMessage processes Head first to make it easier to understand subscript correspondence
-    previous_layer_trainer_->SendMessage(message);
-    Tail::SendMessage(message);
-  }
-
-  // Initialize the parameters with random numbers
-  template <typename RNG>
-  void Initialize(RNG& rng) {
-    Tail::Initialize(rng);
-    previous_layer_trainer_->Initialize(rng);
-  }
-
-  // forward propagation
-  /*const*/ LearnFloatType* Propagate(const std::vector<Example>& batch) {
-    batch_size_ = static_cast<IndexType>(batch.size());
-    auto output = Tail::Propagate(batch);
-    const auto head_output = previous_layer_trainer_->Propagate(batch);
-#if defined(USE_BLAS)
-    cblas_saxpy(kOutputDimensions * batch_size_, 1.0,
-                head_output, 1, output, 1);
-#else
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        output[batch_offset + i] += head_output[batch_offset + i];
-      }
-    }
-#endif
-    return output;
-  }
-
-  // backpropagation
-  void Backpropagate(const LearnFloatType* gradients,
-                     LearnFloatType learning_rate) {
-    Tail::Backpropagate(gradients, learning_rate);
-    previous_layer_trainer_->Backpropagate(gradients, learning_rate);
-  }
-
- private:
-  // constructor
-  Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer):
-      Tail(target_layer, feature_transformer),
-      batch_size_(0),
-      previous_layer_trainer_(Trainer<FirstPreviousLayer>::Create(
-          &target_layer->previous_layer_, feature_transformer)),
-      target_layer_(target_layer) {
-  }
-
-  // number of input/output dimensions
-  static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
-
-  // make subclass friend
-  template <typename SumLayer>
-  friend class Trainer;
-
-  // number of samples in mini-batch
-  IndexType batch_size_;
-
-  // Trainer of the previous layer
-  const std::shared_ptr<Trainer<FirstPreviousLayer>> previous_layer_trainer_;
-
-  // layer to learn
-  LayerType* const target_layer_;
-};
-
-
-// Learning: Layer that takes the sum of the outputs of multiple layers (when there is one template argument)
-template <typename PreviousLayer>
-class Trainer<Layers::Sum<PreviousLayer>> {
- private:
-  // Type of layer to learn
-  using LayerType = Layers::Sum<PreviousLayer>;
-
- public:
-  // factory function
-  static std::shared_ptr<Trainer> Create(
-      LayerType* target_layer, FeatureTransformer* feature_transformer) {
-    return std::shared_ptr<Trainer>(
-        new Trainer(target_layer, feature_transformer));
-  }
-
-  // Set options such as hyperparameters
-  void SendMessage(Message* message) {
-    previous_layer_trainer_->SendMessage(message);
-  }
-
-  // Initialize the parameters with random numbers
-  template <typename RNG>
-  void Initialize(RNG& rng) {
-    previous_layer_trainer_->Initialize(rng);
-  }
-
-  // forward propagation
-  /*const*/ LearnFloatType* Propagate(const std::vector<Example>& batch) {
-    if (output_.size() < kOutputDimensions * batch.size()) {
-      output_.resize(kOutputDimensions * batch.size());
-    }
-    batch_size_ = static_cast<IndexType>(batch.size());
-    const auto output = previous_layer_trainer_->Propagate(batch);
-#if defined(USE_BLAS)
-    cblas_scopy(kOutputDimensions * batch_size_, output, 1, &output_[0], 1);
-#else
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType batch_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        output_[batch_offset + i] = output[batch_offset + i];
-      }
-    }
-#endif
-    return output_.data();
-  }
-
-  // backpropagation
-  void Backpropagate(const LearnFloatType* gradients,
-                     LearnFloatType learning_rate) {
-    previous_layer_trainer_->Backpropagate(gradients, learning_rate);
-  }
-
- private:
-  // constructor
-  Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer) :
-      batch_size_(0),
-      previous_layer_trainer_(Trainer<PreviousLayer>::Create(
-          &target_layer->previous_layer_, feature_transformer)),
-      target_layer_(target_layer) {
-  }
-
-  // number of input/output dimensions
-  static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
-
-  // make subclass friend
-  template <typename SumLayer>
-  friend class Trainer;
-
-  // number of samples in mini-batch
-  IndexType batch_size_;
-
-  // Trainer of the previous layer
-  const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
-
-  // layer to learn
-  LayerType* const target_layer_;
-
-  // Forward propagation buffer
-  std::vector<LearnFloatType> output_;
-};
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
-
-#endif
--- a/src/evaluate.cpp
+++ b/src/evaluate.cpp
@ -22,7 +22,6 @@
 #include <cassert>
 #include <cstring>   // For std::memset
 #include <iomanip>
-#include <set>
 #include <sstream>

 #include "bitboard.h"
@ -30,7 +29,6 @@
 #include "material.h"
 #include "pawns.h"
 #include "thread.h"
-#include "eval/nnue/evaluate_nnue.h"

 namespace Trace {

@ -76,7 +74,8 @@ using namespace Trace;
 namespace {

  // Threshold for lazy and space evaluation
-  constexpr Value LazyThreshold  = Value(1400);
+  constexpr Value LazyThreshold1  = Value(1400);
+  constexpr Value LazyThreshold2  = Value(1300);
  constexpr Value SpaceThreshold = Value(12222);

  // KingAttackWeights[PieceType] contains king attack weights by piece type
@ -788,7 +787,7 @@ namespace {
                && pos.non_pawn_material(BLACK) == RookValueMg
                && pos.count<PAWN>(strongSide) - pos.count<PAWN>(~strongSide) <= 1
                && bool(KingSide & pos.pieces(strongSide, PAWN)) != bool(QueenSide & pos.pieces(strongSide, PAWN))
-                && (attackedBy[~strongSide][KING] & pos.pieces(~strongSide, PAWN)))
+                && (attacks_bb<KING>(pos.square<KING>(~strongSide)) & pos.pieces(~strongSide, PAWN)))
            sf = 36;
        else if (pos.count<QUEEN>() == 1)
            sf = 37 + 3 * (pos.count<QUEEN>(WHITE) == 1 ? pos.count<BISHOP>(BLACK) + pos.count<KNIGHT>(BLACK)
@ -839,9 +838,12 @@ namespace {
    score += pe->pawn_score(WHITE) - pe->pawn_score(BLACK);

    // Early exit if score is high
-    Value v = (mg_value(score) + eg_value(score)) / 2;
-    if (abs(v) > LazyThreshold + pos.non_pawn_material() / 64)
-       return pos.side_to_move() == WHITE ? v : -v;
+    auto lazy_skip = [&](Value lazyThreshold) {
+        return abs(mg_value(score) + eg_value(score)) / 2 > lazyThreshold + pos.non_pawn_material() / 64;
+    };
+
+    if (lazy_skip(LazyThreshold1))
+        goto make_v;

    // Main evaluation begins here
    initialize<WHITE>();
@ -858,12 +860,17 @@ namespace {

    // More complex interactions that require fully populated attack bitboards
    score +=  king<   WHITE>() - king<   BLACK>()
-            + threats<WHITE>() - threats<BLACK>()
-            + passed< WHITE>() - passed< BLACK>()
+            + passed< WHITE>() - passed< BLACK>();
+
+    if (lazy_skip(LazyThreshold2))
+        goto make_v;
+
+    score +=  threats<WHITE>() - threats<BLACK>()
            + space<  WHITE>() - space<  BLACK>();

+make_v:
    // Derive single value from mg and eg parts of score
-    v = winnable(score);
+    Value v = winnable(score);

    // In case of tracing add all remaining individual evaluation terms
    if (T)
@ -892,12 +899,12 @@ namespace {
 /// evaluate() is the evaluator for the outer world. It returns a static
 /// evaluation of the position from the point of view of the side to move.

-#if !defined(EVAL_NNUE)
 Value Eval::evaluate(const Position& pos) {
-  return Evaluation<NO_TRACE>(pos).value();
+  if (pos.use_nnue())
+    return NNUE::evaluate(pos);
+  else
+    return Evaluation<NO_TRACE>(pos).value();
 }
-#endif  // defined(EVAL_NNUE)
-

 /// trace() is like evaluate(), but instead of returning a value, it returns
 /// a string (suitable for outputting to stdout) that contains the detailed
@ -941,138 +948,3 @@ std::string Eval::trace(const Position& pos) {

  return ss.str();
 }
-
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-namespace Eval {
-ExtBonaPiece kpp_board_index[PIECE_NB] = {
-    { BONA_PIECE_ZERO, BONA_PIECE_ZERO },
-    { f_pawn, e_pawn },
-    { f_knight, e_knight },
-    { f_bishop, e_bishop },
-    { f_rook, e_rook },
-    { f_queen, e_queen },
-    { f_king, e_king },
-    { BONA_PIECE_ZERO, BONA_PIECE_ZERO },
-
-    // When viewed from behind. f and e are exchanged.
-    { BONA_PIECE_ZERO, BONA_PIECE_ZERO },
-    { e_pawn, f_pawn },
-    { e_knight, f_knight },
-    { e_bishop, f_bishop },
-    { e_rook, f_rook },
-    { e_queen, f_queen },
-    { e_king, f_king },
-    { BONA_PIECE_ZERO, BONA_PIECE_ZERO }, // no money
-};
-
-// Check whether the pieceListFw[] held internally is a correct BonaPiece.
-// Note: For debugging. slow.
-bool EvalList::is_valid(const Position& pos)
-{
-  std::set<PieceNumber> piece_numbers;
-  for (Square sq = SQ_A1; sq != SQUARE_NB; ++sq) {
-    auto piece_number = piece_no_of_board(sq);
-    if (piece_number == PIECE_NUMBER_NB) {
-      continue;
-    }
-    assert(!piece_numbers.count(piece_number));
-    piece_numbers.insert(piece_number);
-  }
-
-  for (int i = 0; i < length(); ++i)
-  {
-    BonaPiece fw = pieceListFw[i];
-    // Go to the Position class to see if this fw really exists.
-
-    if (fw == Eval::BONA_PIECE_ZERO) {
-      continue;
-    }
-
-    // Out of range
-    if (!(0 <= fw && fw < fe_end))
-      return false;
-
-    // Since it is a piece on the board, I will check if this piece really exists.
-    for (Piece pc = NO_PIECE; pc < PIECE_NB; ++pc)
-    {
-      auto pt = type_of(pc);
-      if (pt == NO_PIECE_TYPE || pt == 7) // non-existing piece
-        continue;
-
-      // BonaPiece start number of piece pc
-      auto s = BonaPiece(kpp_board_index[pc].fw);
-      if (s <= fw && fw < s + SQUARE_NB)
-      {
-        // Since it was found, check if this piece is at sq.
-        Square sq = (Square)(fw - s);
-        Piece pc2 = pos.piece_on(sq);
-
-        if (pc2 != pc)
-          return false;
-
-        goto Found;
-      }
-    }
-    // It was a piece that did not exist for some reason..
-    return false;
-  Found:;
-  }
-
-  // Validate piece_no_list_board
-  for (auto sq = SQUARE_ZERO; sq < SQUARE_NB; ++sq) {
-    Piece expected_piece = pos.piece_on(sq);
-    PieceNumber piece_number = piece_no_list_board[sq];
-    if (piece_number == PIECE_NUMBER_NB) {
-      assert(expected_piece == NO_PIECE);
-      if (expected_piece != NO_PIECE) {
-        return false;
-      }
-      continue;
-    }
-
-    BonaPiece bona_piece_white = pieceListFw[piece_number];
-    Piece actual_piece;
-    for (actual_piece = NO_PIECE; actual_piece < PIECE_NB; ++actual_piece) {
-      if (kpp_board_index[actual_piece].fw == BONA_PIECE_ZERO) {
-        continue;
-      }
-
-      if (kpp_board_index[actual_piece].fw <= bona_piece_white
-        && bona_piece_white < kpp_board_index[actual_piece].fw + SQUARE_NB) {
-        break;
-      }
-    }
-
-    assert(actual_piece != PIECE_NB);
-    if (actual_piece == PIECE_NB) {
-      return false;
-    }
-
-    assert(actual_piece == expected_piece);
-    if (actual_piece != expected_piece) {
-      return false;
-    }
-
-    Square actual_square = static_cast<Square>(
-      bona_piece_white - kpp_board_index[actual_piece].fw);
-    assert(sq == actual_square);
-    if (sq != actual_square) {
-      return false;
-    }
-  }
-
-  return true;
-}
-}
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
-#if !defined(EVAL_NNUE)
-namespace Eval {
-void evaluate_with_no_return(const Position& pos) {}
-void update_weights(uint64_t epoch, const std::array<bool, 4> & freeze) {}
-void init_grad(double eta1, uint64_t eta_epoch, double eta2, uint64_t eta2_epoch, double eta3) {}
-void add_grad(Position& pos, Color rootColor, double delt_grad, const std::array<bool, 4> & freeze) {}
-void save_eval(std::string suffix) {}
-double get_eta() { return 0.0; }
-}
-#endif  // defined(EVAL_NNUE)
--- a/src/evaluate.h
+++ b/src/evaluate.h
@ -30,193 +30,17 @@ class Position;
 namespace Eval {

 std::string trace(const Position& pos);
-
 Value evaluate(const Position& pos);

-void evaluate_with_no_return(const Position& pos);
+namespace NNUE {

+Value evaluate(const Position& pos);
 Value compute_eval(const Position& pos);
+void  update_eval(const Position& pos);
+void  load_eval(const std::string& evalFile);

-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-// Read the evaluation function file.
-// This is only called once in response to the "is_ready" command. It is not supposed to be called twice.
-// (However, if isready is sent again after EvalDir (evaluation function folder) has been changed, read it again.)
-void load_eval();
+} // namespace NNUE

-static uint64_t calc_check_sum() {return 0;}
-
-static void print_softname(uint64_t check_sum) {}
-
-// --- enum corresponding to P of constant KPP (ball and arbitrary 2 pieces) used in evaluation function
-
-// (BonaPiece wants to define freely in experiment of evaluation function, so I don't define it here.)
-
-
-// A type that represents P(Piece) when calling KKP/KPP in Bonanza.
-// When you ask for ƒ° KPP, you need a unique number for each box <20>~ piece type, like the step at 39 points.
-enum BonaPiece : int32_t
-{
-	// Meaning of f = friend (<28>àfirst move). Meaning of e = enemy (<28>àrear)
-
-	// Value when uninitialized
-	BONA_PIECE_NOT_INIT = -1,
-
-	// Invalid piece. When you drop a piece, move unnecessary pieces here.
-	BONA_PIECE_ZERO = 0,
-
-	fe_hand_end = BONA_PIECE_ZERO + 1,
-
-	// Don't pack the numbers of unrealistic walks and incense on the board like Bonanza.
-	// Reason 1) When learning, there are times when the incense is on the first stage in relative PP, and it is difficult to display it correctly in the inverse transformation.
-	// Reason 2) It is difficult to convert from Square with vertical Bitboard.
-
-	// --- Pieces on the board
-	f_pawn = fe_hand_end,
-	e_pawn = f_pawn + SQUARE_NB,
-	f_knight = e_pawn + SQUARE_NB,
-	e_knight = f_knight + SQUARE_NB,
-	f_bishop = e_knight + SQUARE_NB,
-	e_bishop = f_bishop + SQUARE_NB,
-	f_rook = e_bishop + SQUARE_NB,
-	e_rook = f_rook + SQUARE_NB,
-	f_queen = e_rook + SQUARE_NB,
-	e_queen = f_queen + SQUARE_NB,
-	fe_end = e_queen + SQUARE_NB,
-	f_king = fe_end,
-	e_king = f_king + SQUARE_NB,
-	fe_end2 = e_king + SQUARE_NB, // Last number including balls.
-};
-
-#define ENABLE_INCR_OPERATORS_ON(T)                                \
-inline T& operator++(T& d) { return d = T(int(d) + 1); }           \
-inline T& operator--(T& d) { return d = T(int(d) - 1); }
-
-ENABLE_INCR_OPERATORS_ON(BonaPiece)
-
-#undef ENABLE_INCR_OPERATORS_ON
-
-// The number when you look at BonaPiece from the back (the number of steps from the previous 39 to the number 71 from the back)
-// Let's call the paired one the ExtBonaPiece type.
-union ExtBonaPiece
-{
-	struct {
-		BonaPiece fw; // from white
-		BonaPiece fb; // from black
-	};
-	BonaPiece from[2];
-
-	ExtBonaPiece() {}
-	ExtBonaPiece(BonaPiece fw_, BonaPiece fb_) : fw(fw_), fb(fb_) {}
-};
-
-// Information about where the piece has moved from where to by this move.
-// Assume the piece is an ExtBonaPiece expression.
-struct ChangedBonaPiece
-{
-	ExtBonaPiece old_piece;
-	ExtBonaPiece new_piece;
-};
-
-// An array for finding the BonaPiece corresponding to the piece pc on the board of the KPP table.
-// example)
-// BonaPiece fb = kpp_board_index[pc].fb + sq; // BonaPiece corresponding to pc in sq seen from the front
-// BonaPiece fw = kpp_board_index[pc].fw + sq; // BonaPiece corresponding to pc in sq seen from behind
-extern ExtBonaPiece kpp_board_index[PIECE_NB];
-
-// List of pieces used in the evaluation function. A structure holding which piece (PieceNumber) is where (BonaPiece)
-struct EvalList
-{
-	// List of frame numbers used in evaluation function (FV38 type)
-	BonaPiece* piece_list_fw() const { return const_cast<BonaPiece*>(pieceListFw); }
-	BonaPiece* piece_list_fb() const { return const_cast<BonaPiece*>(pieceListFb); }
-
-	// Convert the specified piece_no piece to ExtBonaPiece type and return it.
-	ExtBonaPiece bona_piece(PieceNumber piece_no) const
-	{
-		ExtBonaPiece bp;
-		bp.fw = pieceListFw[piece_no];
-		bp.fb = pieceListFb[piece_no];
-		return bp;
-	}
-
-	// Place the piece_no pc piece in the sq box on the board
-	void put_piece(PieceNumber piece_no, Square sq, Piece pc) {
-		set_piece_on_board(piece_no, BonaPiece(kpp_board_index[pc].fw + sq), BonaPiece(kpp_board_index[pc].fb + Inv(sq)), sq);
-	}
-
-	// Returns the PieceNumber corresponding to a box on the board.
-	PieceNumber piece_no_of_board(Square sq) const { return piece_no_list_board[sq]; }
-
-	// Initialize the pieceList.
-	// Set the value of unused pieces to BONA_PIECE_ZERO in case you want to deal with dropped pieces.
-	// A normal evaluation function can be used as an evaluation function for missing frames.
-	// piece_no_list is initialized with PIECE_NUMBER_NB to facilitate debugging.
-	void clear()
-	{
-
-		for (auto& p: pieceListFw)
-			p = BONA_PIECE_ZERO;
-
-		for (auto& p: pieceListFb)
-			p = BONA_PIECE_ZERO;
-
-		for (auto& v :piece_no_list_board)
-			v = PIECE_NUMBER_NB;
-	}
-
-	// Check whether the pieceListFw[] held internally is a correct BonaPiece.
-	// Note: For debugging. slow.
-	bool is_valid(const Position& pos);
-
-	// Set that the BonaPiece of the piece_no piece on the board sq is fb,fw.
-	inline void set_piece_on_board(PieceNumber piece_no, BonaPiece fw, BonaPiece fb, Square sq)
-	{
-		assert(is_ok(piece_no));
-		pieceListFw[piece_no] = fw;
-		pieceListFb[piece_no] = fb;
-		piece_no_list_board[sq] = piece_no;
-	}
-
-	// Piece list. Piece Number Shows how many pieces are in place (Bona Piece). Used in FV38 etc.
-
-	// Length of piece list
-  // 38 fixed
-public:
-	int length() const { return PIECE_NUMBER_KING; }
-
-	// Must be a multiple of 4 to use VPGATHERDD.
-	// In addition, the KPPT type evaluation function, etc. is based on the assumption that the 39th and 40th elements are zero.
-	// Please note that there is a part that is accessed.
-	static const int MAX_LENGTH = 32;
-
-  // An array that holds the piece number (PieceNumber) for the pieces on the board
-  // Hold up to +1 for when the ball is moving to SQUARE_NB,
-  // SQUARE_NB balls are not moved, so this value should never be used.
-  PieceNumber piece_no_list_board[SQUARE_NB_PLUS1];
-private:
-
-	BonaPiece pieceListFw[MAX_LENGTH];
-	BonaPiece pieceListFb[MAX_LENGTH];
-};
-
-// For management of evaluation value difference calculation
-// A structure for managing the number of pieces that have moved from the previous stage
-// Up to 2 moving pieces.
-struct DirtyPiece
-{
-	// What changed from the piece with that piece number
-	Eval::ChangedBonaPiece changed_piece[2];
-
-	// The number of dirty pieces
-	PieceNumber pieceNo[2];
-
-	// The number of dirty files.
-	// It can be 0 for null move.
-	// Up to 2 moving pieces and taken pieces.
-	int dirty_num;
-
-};
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-}
+} // namespace Eval

 #endif // #ifndef EVALUATE_H_INCLUDED
--- a/src/extra/sfen_packer.cpp
+++ b/src/extra/sfen_packer.cpp
@ -1,447 +0,0 @@
-#if defined (EVAL_LEARN)
-
-#include "../misc.h"
-#include "../position.h"
-
-#include <sstream>
-#include <fstream>
-#include <cstring> // std::memset()
-
-using namespace std;
-
-// -----------------------------------
-// stage compression/decompression
-// -----------------------------------
-
-// Class that handles bitstream
-// useful when doing aspect encoding
-struct BitStream
-{
-  // Set the memory to store the data in advance.
-  // Assume that memory is cleared to 0.
-  void  set_data(uint8_t* data_) { data = data_; reset(); }
-
-  // Get the pointer passed in set_data().
-  uint8_t* get_data() const { return data; }
-
-  // Get the cursor.
-  int get_cursor() const { return bit_cursor; }
-
-  // reset the cursor
-  void reset() { bit_cursor = 0; }
-
-  // Write 1bit to the stream.
-  // If b is non-zero, write out 1. If 0, write 0.
-  void write_one_bit(int b)
-  {
-    if (b)
-      data[bit_cursor / 8] |= 1 << (bit_cursor & 7);
-
-    ++bit_cursor;
-  }
-
-  // Get 1 bit from the stream.
-  int read_one_bit()
-  {
-    int b = (data[bit_cursor / 8] >> (bit_cursor & 7)) & 1;
-    ++bit_cursor;
-
-    return b;
-  }
-
-  // write n bits of data
-  // Data shall be written out from the lower order of d.
-  void write_n_bit(int d, int n)
-  {
-    for (int i = 0; i <n; ++i)
-      write_one_bit(d & (1 << i));
-  }
-
-  // read n bits of data
-  // Reverse conversion of write_n_bit().
-  int read_n_bit(int n)
-  {
-    int result = 0;
-    for (int i = 0; i < n; ++i)
-      result |= read_one_bit() ? (1 << i) : 0;
-
-    return result;
-  }
-
-private:
-  // Next bit position to read/write.
-  int bit_cursor;
-
-  // data entity
-  uint8_t* data;
-};
-
-
-// Huffman coding
-// * is simplified from mini encoding to make conversion easier.
-//
-// 1 box on the board (other than NO_PIECE) = 2 to 6 bits (+ 1-bit flag + 1-bit forward and backward)
-// 1 piece of hand piece = 1-5bit (+ 1-bit flag + 1bit ahead and behind)
-//
-// empty xxxxx0 + 0 (none)
-// step xxxx01 + 2 xxxx0 + 2
-// incense xx0011 + 2 xx001 + 2
-// Katsura xx1011 + 2 xx101 + 2
-// silver xx0111 + 2 xx011 + 2
-// Gold x01111 + 1 x0111 + 1 // Gold is valid and has no flags.
-// corner 011111 + 2 01111 + 2
-// Fly 111111 + 2 11111 + 2
-//
-// Assuming all pieces are on the board,
-// Sky 81-40 pieces = 41 boxes = 41bit
-// Walk 4bit*18 pieces = 72bit
-// Incense 6bit*4 pieces = 24bit
-// Katsura 6bit*4 pieces = 24bit
-// Silver 6bit*4 pieces = 24bit
-// Gold 6bit* 4 pieces = 24bit
-// corner 8bit* 2 pieces = 16bit
-// Fly 8bit* 2 pieces = 16bit
-// -------
-// 241bit + 1bit (turn) + 7bit × 2 (King's position after) = 256bit
-//
-// When the piece on the board moves to the hand piece, the piece on the board becomes empty, so the box on the board can be expressed with 1 bit,
-// Since the hand piece can be expressed by 1 bit less than the piece on the board, the total number of bits does not change in the end.
-// Therefore, in this expression, any aspect can be expressed by this bit number.
-// It is a hand piece and no flag is required, but if you include this, the bit number of the piece on the board will be -1
-// Since the total number of bits can be fixed, we will include this as well.
-
-// Huffman Encoding
-//
-// Empty  xxxxxxx0
-// Pawn   xxxxx001 + 1 bit (Side to move)
-// Knight xxxxx011 + 1 bit (Side to move)
-// Bishop xxxxx101 + 1 bit (Side to move)
-// Rook   xxxxx111 + 1 bit (Side to move)
-
-struct HuffmanedPiece
-{
-  int code; // how it will be coded
-  int bits; // How many bits do you have
-};
-
-HuffmanedPiece huffman_table[] =
-{
-  {0b0000,1}, // NO_PIECE
-  {0b0001,4}, // PAWN
-  {0b0011,4}, // KNIGHT
-  {0b0101,4}, // BISHOP
-  {0b0111,4}, // ROOK
-  {0b1001,4}, // QUEEN
-};
-
-// Class for compressing/decompressing sfen
-// sfen can be packed to 256bit (32bytes) by Huffman coding.
-// This is proven by mini. The above is Huffman coding.
-//
-// Internal format = 1-bit turn + 7-bit king position *2 + piece on board (Huffman coding) + hand piece (Huffman coding)
-// Side to move (White = 0, Black = 1) (1bit)
-// White King Position (6 bits)
-// Black King Position (6 bits)
-// Huffman Encoding of the board
-// Castling availability (1 bit x 4)
-// En passant square (1 or 1 + 6 bits)
-// Rule 50 (6 bits)
-// Game play (8 bits)
-//
-// TODO(someone): Rename SFEN to FEN.
-//
-struct SfenPacker
-{
-  // Pack sfen and store in data[32].
-  void pack(const Position& pos)
-  {
-// cout << pos;
-
-    memset(data, 0, 32 /* 256bit */);
-    stream.set_data(data);
-
-    // turn
-    // Side to move.
-    stream.write_one_bit((int)(pos.side_to_move()));
-
-    // 7-bit positions for leading and trailing balls
-    // White king and black king, 6 bits for each.
-    for(auto c: Colors)
-      stream.write_n_bit(pos.king_square(c), 6);
-
-    // Write the pieces on the board other than the kings.
-    for (Rank r = RANK_8; r >= RANK_1; --r)
-    {
-      for (File f = FILE_A; f <= FILE_H; ++f)
-      {
-        Piece pc = pos.piece_on(make_square(f, r));
-        if (type_of(pc) == KING)
-          continue;
-        write_board_piece_to_stream(pc);
-      }
-    }
-
-    // TODO(someone): Support chess960.
-    stream.write_one_bit(pos.can_castle(WHITE_OO));
-    stream.write_one_bit(pos.can_castle(WHITE_OOO));
-    stream.write_one_bit(pos.can_castle(BLACK_OO));
-    stream.write_one_bit(pos.can_castle(BLACK_OOO));
-
-    if (pos.ep_square() == SQ_NONE) {
-      stream.write_one_bit(0);
-    }
-    else {
-      stream.write_one_bit(1);
-      stream.write_n_bit(static_cast<int>(pos.ep_square()), 6);
-    }
-
-    stream.write_n_bit(pos.state()->rule50, 6);
-
-    stream.write_n_bit(1 + (pos.game_ply()-(pos.side_to_move() == BLACK)) / 2, 8);
-
-    assert(stream.get_cursor() <= 256);
-  }
-
-  // sfen packed by pack() (256bit = 32bytes)
-  // Or sfen to decode with unpack()
-  uint8_t *data; // uint8_t[32];
-
-//private:
-  // Position::set_from_packed_sfen(uint8_t data[32]) I want to use these functions, so the line is bad, but I want to keep it public.
-
-  BitStream stream;
-
-  // Output the board pieces to stream.
-  void write_board_piece_to_stream(Piece pc)
-  {
-    // piece type
-    PieceType pr = type_of(pc);
-    auto c = huffman_table[pr];
-    stream.write_n_bit(c.code, c.bits);
- 
-    if (pc == NO_PIECE)
-      return;
-
-    // first and second flag
-    stream.write_one_bit(color_of(pc));
-  }
-
-  // Read one board piece from stream
-  Piece read_board_piece_from_stream()
-  {
-    PieceType pr = NO_PIECE_TYPE;
-    int code = 0, bits = 0;
-    while (true)
-    {
-      code |= stream.read_one_bit() << bits;
-      ++bits;
-
-      assert(bits <= 6);
-
-      for (pr = NO_PIECE_TYPE; pr <KING; ++pr)
-        if (huffman_table[pr].code == code
-          && huffman_table[pr].bits == bits)
-          goto Found;
-    }
-  Found:;
-    if (pr == NO_PIECE_TYPE)
-      return NO_PIECE;
-
-    // first and second flag
-    Color c = (Color)stream.read_one_bit();
-    
-    return make_piece(c, pr);
-  }
-};
-
-
-// -----------------------------------
-// Add to Position class
-// -----------------------------------
-
-// Add a function that directly unpacks for speed. It's pretty tough.
-// Write it by combining packer::unpack() and Position::set().
-// If there is a problem with the passed phase and there is an error, non-zero is returned.
-int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thread* th, bool mirror)
-{
-	SfenPacker packer;
-	auto& stream = packer.stream;
-	stream.set_data((uint8_t*)&sfen);
-
-	std::memset(this, 0, sizeof(Position));
-	std::memset(si, 0, sizeof(StateInfo));
-  std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE);
-  st = si;
-
-	// Active color
-	sideToMove = (Color)stream.read_one_bit();
-
-	// clear evalList. It is cleared when memset is cleared to zero above...
-	evalList.clear();
-
-	// In updating the PieceList, we have to set which piece is where,
-	// A counter of how much each piece has been used
-  PieceNumber next_piece_number = PIECE_NUMBER_ZERO;
-
-  pieceList[W_KING][0] = SQUARE_NB;
-  pieceList[B_KING][0] = SQUARE_NB;
-
-	// First the position of the ball
-	if (mirror)
-	{
-		for (auto c : Colors)
-			board[Mir((Square)stream.read_n_bit(6))] = make_piece(c, KING);
-	}
-	else
-	{
-		for (auto c : Colors)
-			board[stream.read_n_bit(6)] = make_piece(c, KING);
-	}
-
-  // Piece placement
-  for (Rank r = RANK_8; r >= RANK_1; --r)
-  {
-    for (File f = FILE_A; f <= FILE_H; ++f)
-    {
-      auto sq = make_square(f, r);
-      if (mirror) {
-        sq = Mir(sq);
-      }
-
-      // it seems there are already balls
-      Piece pc;
-      if (type_of(board[sq]) != KING)
-      {
-        assert(board[sq] == NO_PIECE);
-        pc = packer.read_board_piece_from_stream();
-      }
-      else
-      {
-        pc = board[sq];
-        board[sq] = NO_PIECE; // put_piece() will catch ASSERT unless you remove it all.
-      }
-
-      // There may be no pieces, so skip in that case.
-      if (pc == NO_PIECE)
-        continue;
-
-      put_piece(Piece(pc), sq);
-
-      // update evalList
-      PieceNumber piece_no =
-        (pc == B_KING) ?PIECE_NUMBER_BKING :// Move ball
-        (pc == W_KING) ?PIECE_NUMBER_WKING :// Backing ball
-        next_piece_number++; // otherwise
-
-      evalList.put_piece(piece_no, sq, pc); // Place the pc piece in the sq box
-
-      //cout << sq << ' ' << board[sq] << ' ' << stream.get_cursor() << endl;
-
-      if (stream.get_cursor()> 256)
-        return 1;
-      //assert(stream.get_cursor() <= 256);
-
-    }
-  }
-
-  // Castling availability.
-  // TODO(someone): Support chess960.
-  st->castlingRights = 0;
-  if (stream.read_one_bit()) {
-    Square rsq;
-    for (rsq = relative_square(WHITE, SQ_H1); piece_on(rsq) != W_ROOK; --rsq) {}
-    set_castling_right(WHITE, rsq);
-  }
-  if (stream.read_one_bit()) {
-    Square rsq;
-    for (rsq = relative_square(WHITE, SQ_A1); piece_on(rsq) != W_ROOK; ++rsq) {}
-    set_castling_right(WHITE, rsq);
-  }
-  if (stream.read_one_bit()) {
-    Square rsq;
-    for (rsq = relative_square(BLACK, SQ_H1); piece_on(rsq) != B_ROOK; --rsq) {}
-    set_castling_right(BLACK, rsq);
-  }
-  if (stream.read_one_bit()) {
-    Square rsq;
-    for (rsq = relative_square(BLACK, SQ_A1); piece_on(rsq) != B_ROOK; ++rsq) {}
-    set_castling_right(BLACK, rsq);
-  }
-
-  // En passant square. Ignore if no pawn capture is possible
-  if (stream.read_one_bit()) {
-    Square ep_square = static_cast<Square>(stream.read_n_bit(6));
-    if (mirror) {
-      ep_square = Mir(ep_square);
-    }
-    st->epSquare = ep_square;
-
-    if (!(attackers_to(st->epSquare) & pieces(sideToMove, PAWN))
-      || !(pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove))))
-      st->epSquare = SQ_NONE;
-  }
-  else {
-    st->epSquare = SQ_NONE;
-  }
-
-  // Halfmove clock
-  st->rule50 = static_cast<Square>(stream.read_n_bit(6));
-
-  // Fullmove number
-  gamePly = static_cast<Square>(stream.read_n_bit(8));
-  // Convert from fullmove starting from 1 to gamePly starting from 0,
-  // handle also common incorrect FEN with fullmove = 0.
-  gamePly = std::max(2 * (gamePly - 1), 0) + (sideToMove == BLACK);
-
-  assert(stream.get_cursor() <= 256);
-
-  chess960 = false;
-  thisThread = th;
-set_state(st);
-
-  //std::cout << *this << std::endl;
-
-  assert(pos_is_ok());
-#if defined(EVAL_NNUE)
-  assert(evalList.is_valid(*this));
-#endif  // defined(EVAL_NNUE)
-
-	return 0;
-}
-
-// Give the board, hand piece, and turn, and return the sfen.
-//std::string Position::sfen_from_rawdata(Piece board[81], Hand hands[2], Color turn, int gamePly_)
-//{
-// // Copy it to an internal structure and call sfen() if the conversion process depends only on it
-// // Maybe it will be converted normally...
-//  Position pos;
-//
-//  memcpy(pos.board, board, sizeof(Piece) * 81);
-//  memcpy(pos.hand, hands, sizeof(Hand) * 2);
-//  pos.sideToMove = turn;
-//  pos.gamePly = gamePly_;
-//
-//  return pos.sfen();
-//
-// // Implementation of ↑ is beautiful, but slow.
-// // This is a bottleneck when learning a large amount of game records, so write a function to unpack directly.
-//}
-
-// Get the packed sfen. Returns to the buffer specified in the argument.
-void Position::sfen_pack(PackedSfen& sfen)
-{
-  SfenPacker sp;
-  sp.data = (uint8_t*)&sfen;
-  sp.pack(*this);
-}
-
-//// Unpack the packed sfen. Returns an sfen string.
-//std::string Position::sfen_unpack(const PackedSfen& sfen)
-//{
-// SfenPacker sp;
-// sp.data = (uint8_t*)&sfen;
-// return sp.unpack();
-//}
-
-
-#endif // USE_SFEN_PACKER
--- a/src/learn/gensfen2019.cpp
+++ b/src/learn/gensfen2019.cpp
@ -1 +0,0 @@
-// just a place holder
--- a/src/learn/half_float.h
+++ b/src/learn/half_float.h
@ -1,133 +0,0 @@
-#ifndef __HALF_FLOAT_H__
-#define __HALF_FLOAT_H__
-
-// Half Float Library by yaneurao
-// (16-bit float)
-
-// Floating point operation by 16bit type
-// Assume that the float type code generated by the compiler is in IEEE 754 format and use it.
-
-#include "../types.h"
-
-namespace HalfFloat
-{
-	// IEEE 754 float 32 format is :
-	//   sign(1bit) + exponent(8bits) + fraction(23bits) = 32bits
-	//
-	// Our float16 format is :
-	//   sign(1bit) + exponent(5bits) + fraction(10bits) = 16bits
-	union float32_converter
-	{
-		int32_t n;
-		float f;
-	};
-
-
-	// 16-bit float
-	struct float16
-	{
-		// --- constructors
-
-		float16() {}
-		float16(int16_t n) { from_float((float)n);  }
-		float16(int32_t n) { from_float((float)n); }
-		float16(float n) { from_float(n); }
-		float16(double n) { from_float((float)n); }
-
-		// build from a float
-		void from_float(float f) { *this = to_float16(f); }
-
-		// --- implicit converters
-
-		operator int32_t() const { return (int32_t)to_float(*this); }
-		operator float() const { return to_float(*this); }
-		operator double() const { return double(to_float(*this)); }
-
-		// --- operators
-
-		float16 operator += (float16 rhs) { from_float(to_float(*this) + to_float(rhs)); return *this; }
-		float16 operator -= (float16 rhs) { from_float(to_float(*this) - to_float(rhs)); return *this; }
-		float16 operator *= (float16 rhs) { from_float(to_float(*this) * to_float(rhs)); return *this; }
-		float16 operator /= (float16 rhs) { from_float(to_float(*this) / to_float(rhs)); return *this; }
-		float16 operator + (float16 rhs) const { return float16(*this) += rhs; }
-		float16 operator - (float16 rhs) const { return float16(*this) -= rhs; }
-		float16 operator * (float16 rhs) const { return float16(*this) *= rhs; }
-		float16 operator / (float16 rhs) const { return float16(*this) /= rhs; }
-		float16 operator - () const { return float16(-to_float(*this)); }
-		bool operator == (float16 rhs) const { return this->v_ == rhs.v_; }
-		bool operator != (float16 rhs) const { return !(*this == rhs); }
-
-		static void UnitTest() { unit_test(); }
-
-	private:
-
-		// --- entity
-
-		uint16_t v_;
-
-		// --- conversion between float and float16
-
-		static float16 to_float16(float f)
-		{
-			float32_converter c;
-			c.f = f;
-			u32 n = c.n;
-
-			// The sign bit is MSB in common.
-			uint16_t sign_bit = (n >> 16) & 0x8000;
-
-			// The exponent of IEEE 754's float 32 is biased +127 , so we change this bias into +15 and limited to 5-bit.
-			uint16_t exponent = (((n >> 23) - 127 + 15) & 0x1f) << 10;
-
-			// The fraction is limited to 10-bit.
-			uint16_t fraction = (n >> (23-10)) & 0x3ff;
-
-			float16 f_;
-			f_.v_ = sign_bit | exponent | fraction;
-
-			return f_;
-		}
-
-		static float to_float(float16 v)
-		{
-			u32 sign_bit = (v.v_ & 0x8000) << 16;
-			u32 exponent = ((((v.v_ >> 10) & 0x1f) - 15 + 127) & 0xff) << 23;
-			u32 fraction = (v.v_ & 0x3ff) << (23 - 10);
-
-			float32_converter c;
-			c.n = sign_bit | exponent | fraction;
-			return c.f;
-		}
-
-		// It is not a unit test, but I confirmed that it can be calculated. I'll fix the code later (maybe).
-		static void unit_test()
-		{
-			float16 a, b, c, d;
-			a = 1;
-			std::cout << (float)a << std::endl;
-			b = -118.625;
-			std::cout << (float)b << std::endl;
-			c = 2.5;
-			std::cout << (float)c << std::endl;
-			d = a + c;
-			std::cout << (float)d << std::endl;
-
-			c *= 1.5;
-			std::cout << (float)c << std::endl;
-
-			b /= 3;
-			std::cout << (float)b << std::endl;
-
-			float f1 = 1.5;
-			a += f1;
-			std::cout << (float)a << std::endl;
-
-			a += f1 * (float)a;
-			std::cout << (float)a << std::endl;
-		}
-
-	};
-
-}
-
-#endif // __HALF_FLOAT_H__
--- a/src/learn/learn.h
+++ b/src/learn/learn.h
@ -1,237 +0,0 @@
-#ifndef _LEARN_H_
-#define _LEARN_H_
-
-#if defined(EVAL_LEARN)
-
-#include <vector>
-
-// =====================
-// Settings for learning
-// =====================
-
-// If you select one of the following, the details after that will be automatically selected.
-// If you don't select any of them, you need to set the subsequent details one by one.
-
-// Learning setting by elmo method. This is the default setting.
-// To make a standard squeeze diaphragm, specify "lambda 1" with the learn command.
-#define LEARN_ELMO_METHOD
-
-
-// ----------------------
-// update formula
-// ----------------------
-
-// Ada Grad. Recommended because it is stable.
-// #define ADA_GRAD_UPDATE
-
-// SGD looking only at the sign of the gradient. It requires less memory, but the accuracy is...
-// #define SGD_UPDATE
-
-// ----------------------
-// Settings for learning
-// ----------------------
-
-// mini-batch size.
-// Calculate the gradient by combining this number of phases.
-// If you make it smaller, the number of update_weights() will increase and the convergence will be faster. The gradient is incorrect.
-// If you increase it, the number of update_weights() decreases, so the convergence will be slow. The slope will come out accurately.
-// I don't think you need to change this value in most cases.
-
-#define LEARN_MINI_BATCH_SIZE (1000 * 1000 * 1)
-
-// The number of phases to read from the file at one time. After reading this much, shuffle.
-// It is better to have a certain size, but this number x 40 bytes x 3 times as much memory is consumed. 400MB*3 is consumed in the 10M phase.
-// Must be a multiple of THREAD_BUFFER_SIZE(=10000).
-
-#define LEARN_SFEN_READ_SIZE (1000 * 1000 * 10)
-
-// Saving interval of evaluation function at learning. Save each time you learn this number of phases.
-// Needless to say, the longer the saving interval, the shorter the learning time.
-// Folder name is incremented for each save like 0/, 1/, 2/...
-// By default, once every 1 billion phases.
-#define LEARN_EVAL_SAVE_INTERVAL (1000000000ULL)
-
-
-// ----------------------
-// Select the objective function
-// ----------------------
-
-// The objective function is the sum of squares of the difference in winning percentage
-// See learner.cpp for more information.
-
-//#define LOSS_FUNCTION_IS_WINNING_PERCENTAGE
-
-// Objective function is cross entropy
-// See learner.cpp for more information.
-// So-called ordinary "rag cloth squeezer"
-//#define LOSS_FUNCTION_IS_CROSS_ENTOROPY
-
-// A version in which the objective function is cross entropy, but the win rate function is not passed
-// #define LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE
-
-// elmo (WCSC27) method
-// #define LOSS_FUNCTION_IS_ELMO_METHOD
-
-// ※ Other things may be added.
-
-
-// ----------------------
-// debug settings for learning
-// ----------------------
-
-// Reduce the output of rmse during learning to 1 for this number of times.
-// rmse calculation is done in one thread, so it takes some time, so reducing the output is effective.
-#define LEARN_RMSE_OUTPUT_INTERVAL 1
-
-
-// ----------------------
-// learning from zero vector
-// ----------------------
-
-// Start learning the evaluation function parameters from the zero vector.
-// Initialize to zero, generate a game, learn from zero vector,
-// Game generation → If you repeat learning, you will get parameters that do not depend on the professional game. (maybe)
-// (very time consuming)
-
-//#define RESET_TO_ZERO_VECTOR
-
-
-// ----------------------
-// Floating point for learning
-// ----------------------
-
-// If this is set to double, the calculation accuracy will be higher, but the weight array entangled memory will be doubled.
-// Currently, if this is float, the weight array is 4.5 times the size of the evaluation function file. (About 4.5GB with KPPT)
-// Even if it is a double type, there is almost no difference in the way of convergence, so fix it to float.
-
-// when using float
-typedef float LearnFloatType;
-
-// when using double
-//typedef double LearnFloatType;
-
-// when using float16
-//#include "half_float.h"
-//typedef HalfFloat::float16 LearnFloatType;
-
-// ----------------------
-// save memory
-// ----------------------
-
-// Use a triangular array for the Weight array (of which is KPP) to save memory.
-// If this is used, the weight array for learning will be about 3 times as large as the evaluation function file.
-
-#define USE_TRIANGLE_WEIGHT_ARRAY
-
-// ----------------------
-// dimension down
-// ----------------------
-
-// Dimension reduction for mirrors (left/right symmetry) and inverse (forward/backward symmetry).
-// All on by default.
-
-// Dimension reduction using mirror and inverse for KK. (Unclear effect)
-// USE_KK_MIRROR_WRITE must be on when USE_KK_INVERSE_WRITE is on.
-#define USE_KK_MIRROR_WRITE
-#define USE_KK_INVERSE_WRITE
-
-// Dimension reduction using Mirror and Inverse for KKP. (Inverse is not so effective)
-// When USE_KKP_INVERSE_WRITE is turned on, USE_KKP_MIRROR_WRITE must also be turned on.
-#define USE_KKP_MIRROR_WRITE
-#define USE_KKP_INVERSE_WRITE
-
-// Perform dimension reduction using a mirror for KPP. (Turning this off requires double the teacher position)
-// KPP has no inverse. (Because there is only K on the front side)
-#define USE_KPP_MIRROR_WRITE
-
-// Perform a dimension reduction using a mirror for KPPP. (Turning this off requires double the teacher position)
-// KPPP has no inverse. (Because there is only K on the front side)
-#define USE_KPPP_MIRROR_WRITE
-
-// Reduce the dimension by KPP for learning the KKPP component.
-// Learning is very slow.
-// Do not use as it is not debugged.
-//#define USE_KKPP_LOWER_DIM
-
-
-// ======================
-// Settings for creating teacher phases
-// ======================
-
-// ----------------------
-// write out the draw
-// ----------------------
-
-// When you reach a draw, write it out as a teacher position
-// It's subtle whether it's better to do this.
-// #define LEARN_GENSFEN_USE_DRAW_RESULT
-
-
-// ======================
-// configure
-// ======================
-
-// ----------------------
-// Learning with the method of elmo (WCSC27)
-// ----------------------
-
-#if defined( LEARN_ELMO_METHOD )
-#define LOSS_FUNCTION_IS_ELMO_METHOD
-#define ADA_GRAD_UPDATE
-#endif
-
-
-// ----------------------
-// Definition of struct used in Learner
-// ----------------------
-#include "../position.h"
-
-namespace Learner
-{
-	//Structure in which PackedSfen and evaluation value are integrated
-	// If you write different contents for each option, it will be a problem when reusing the teacher game
-	// For the time being, write all the following members regardless of the options.
-	struct PackedSfenValue
-	{
-		// phase
-		PackedSfen sfen;
-
-		// Evaluation value returned from Learner::search()
-		int16_t score;
-
-		// PV first move
-		// Used when finding the match rate with the teacher
-		uint16_t move;
-
-		// Trouble of the phase from the initial phase.
-		uint16_t gamePly;
-
-		// 1 if the player on this side ultimately wins the game. -1 if you are losing.
-		// 0 if a draw is reached.
-		// The draw is in the teacher position generation command gensfen,
-		// Only write if LEARN_GENSFEN_DRAW_RESULT is enabled.
-		int8_t game_result;
-
-		// When exchanging the file that wrote the teacher aspect with other people
-		//Because this structure size is not fixed, pad it so that it is 40 bytes in any environment.
-		uint8_t padding;
-
-		// 32 + 2 + 2 + 2 + 1 + 1 = 40bytes
-	};
-
-	// Type that returns the reading line and the evaluation value at that time
-	// Used in Learner::search(), Learner::qsearch().
-	typedef std::pair<Value, std::vector<Move> > ValueAndPV;
-
-	// So far, only Yaneura King 2018 Otafuku has this stub
-	// This stub is required if EVAL_LEARN is defined.
-	extern Learner::ValueAndPV  search(Position& pos, int depth , size_t multiPV = 1 , uint64_t NodesLimit = 0);
-	extern Learner::ValueAndPV qsearch(Position& pos);
-
-	double calc_grad(Value shallow, const PackedSfenValue& psv);
-
-}
-
-#endif
-
-#endif // ifndef _LEARN_H_
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
--- a/src/learn/learning_tools.cpp
+++ b/src/learn/learning_tools.cpp
@ -1,256 +0,0 @@
-#include "learning_tools.h"
-
-#if defined (EVAL_LEARN)
-
-#if defined(_OPENMP)
-#include <omp.h>
-#endif
-#include "../misc.h"
-
-using namespace Eval;
-
-namespace EvalLearningTools
-{
-
-	// --- static variables
-
-	double Weight::eta;
-	double Weight::eta1;
-	double Weight::eta2;
-	double Weight::eta3;
-	uint64_t Weight::eta1_epoch;
-	uint64_t Weight::eta2_epoch;
-
-	std::vector<bool> min_index_flag;
-
-	// --- initialization for each individual table
-
-	void init_min_index_flag()
-	{
-		// Initialization of mir_piece and inv_piece must be completed.
-		assert(mir_piece(Eval::f_pawn) == Eval::e_pawn);
-
-		// Initialize the flag array for dimension reduction
-		// Not involved in KPPP.
-
-		KK g_kk;
-		g_kk.set(SQUARE_NB, Eval::fe_end, 0);
-		KKP g_kkp;
-		g_kkp.set(SQUARE_NB, Eval::fe_end, g_kk.max_index());
-		KPP g_kpp;
-		g_kpp.set(SQUARE_NB, Eval::fe_end, g_kkp.max_index());
-
-		uint64_t size = g_kpp.max_index();
-		min_index_flag.resize(size);
-
-#pragma omp parallel
-		{
-#if defined(_OPENMP)
-			// To prevent the logical 64 cores from being used when there are two CPUs under Windows
-			// explicitly assign to CPU here
-			int thread_index = omp_get_thread_num(); // get your thread number
-			WinProcGroup::bindThisThread(thread_index);
-#endif
-
-#pragma omp for schedule(dynamic,20000)
-
-			for (int64_t index_ = 0; index_ < (int64_t)size; ++index_)
-			{
-				// It seems that the loop variable must be a sign type due to OpenMP restrictions, but
-				// It's really difficult to use.
-				uint64_t index = (uint64_t)index_;
-
-				if (g_kk.is_ok(index))
-				{
-					// Make sure that the original index will be restored by conversion from index and reverse conversion.
-					// It is a process that is executed only once at startup, so write it in assert.
-					assert(g_kk.fromIndex(index).toIndex() == index);
-
-					KK a[KK_LOWER_COUNT];
-					g_kk.fromIndex(index).toLowerDimensions(a);
-
-					// Make sure that the first element of dimension reduction is the same as the original index.
-					assert(a[0].toIndex() == index);
-
-					uint64_t min_index = UINT64_MAX;
-					for (auto& e : a)
-						min_index = std::min(min_index, e.toIndex());
-					min_index_flag[index] = (min_index == index);
-				}
-				else if (g_kkp.is_ok(index))
-				{
-					assert(g_kkp.fromIndex(index).toIndex() == index);
-
-					KKP x = g_kkp.fromIndex(index);
-					KKP a[KKP_LOWER_COUNT];
-					x.toLowerDimensions(a);
-
-					assert(a[0].toIndex() == index);
-
-					uint64_t min_index = UINT64_MAX;
-					for (auto& e : a)
-						min_index = std::min(min_index, e.toIndex());
-					min_index_flag[index] = (min_index == index);
-				}
-				else if (g_kpp.is_ok(index))
-				{
-					assert(g_kpp.fromIndex(index).toIndex() == index);
-
-					KPP x = g_kpp.fromIndex(index);
-					KPP a[KPP_LOWER_COUNT];
-					x.toLowerDimensions(a);
-
-					assert(a[0].toIndex() == index);
-
-					uint64_t min_index = UINT64_MAX;
-					for (auto& e : a)
-						min_index = std::min(min_index, e.toIndex());
-					min_index_flag[index] = (min_index == index);
-				}
-				else
-				{
-					assert(false);
-				}
-			}
-		}
-	}
-
-	void learning_tools_unit_test_kpp()
-	{
-
-		// test KPP triangulation for bugs
-		// All combinations of k-p0-p1 are properly handled by KPP, and the dimension reduction at that time is
-		// Determine if it is correct.
-
-		KK g_kk;
-		g_kk.set(SQUARE_NB, Eval::fe_end, 0);
-		KKP g_kkp;
-		g_kkp.set(SQUARE_NB, Eval::fe_end, g_kk.max_index());
-		KPP g_kpp;
-		g_kpp.set(SQUARE_NB, Eval::fe_end, g_kkp.max_index());
-
-		std::vector<bool> f;
-		f.resize(g_kpp.max_index() - g_kpp.min_index());
-
-		for(auto k = SQUARE_ZERO ; k < SQUARE_NB ; ++k)
-			for(auto p0 = BonaPiece::BONA_PIECE_ZERO; p0 < fe_end ; ++p0)
-				for (auto p1 = BonaPiece::BONA_PIECE_ZERO; p1 < fe_end; ++p1)
-				{
-					KPP kpp_org = g_kpp.fromKPP(k,p0,p1);
-					KPP kpp0;
-					KPP kpp1 = g_kpp.fromKPP(Mir(k), mir_piece(p0), mir_piece(p1));
-					KPP kpp_array[2];
-
-					auto index = kpp_org.toIndex();
-					assert(g_kpp.is_ok(index));
-
-					kpp0 = g_kpp.fromIndex(index);
-
-					//if (kpp0 != kpp_org)
-					//	std::cout << "index = " << index << "," << kpp_org << "," << kpp0 << std::endl;
-
-					kpp0.toLowerDimensions(kpp_array);
-
-					assert(kpp_array[0] == kpp0);
-					assert(kpp0 == kpp_org);
-					assert(kpp_array[1] == kpp1);
-
-					auto index2 = kpp1.toIndex();
-					f[index - g_kpp.min_index()] = f[index2-g_kpp.min_index()] = true;
-				}
-
-		// Check if there is no missing index.
-		for(size_t index = 0 ; index < f.size(); index++)
-			if (!f[index])
-			{
-				std::cout << index << g_kpp.fromIndex(index + g_kpp.min_index()) << std::endl;
-			}
-	}
-
-	void learning_tools_unit_test_kppp()
-	{
-		// Test for missing KPPP calculations
-
-		KPPP g_kppp;
-		g_kppp.set(15, Eval::fe_end,0);
-		uint64_t min_index = g_kppp.min_index();
-		uint64_t max_index = g_kppp.max_index();
-
-		// Confirm last element.
-		//KPPP x = KPPP::fromIndex(max_index-1);
-		//std::cout << x << std::endl;
-
-		for (uint64_t index = min_index; index < max_index; ++index)
-		{
-			KPPP x = g_kppp.fromIndex(index);
-			//std::cout << x << std::endl;
-
-#if 0
-			if ((index % 10000000) == 0)
-				std::cout << "index = " << index << std::endl;
-
-			// index = 9360000000
-			//	done.
-
-			if (x.toIndex() != index)
-			{
-				std::cout << "assertion failed , index = " << index << std::endl;
-			}
-#endif
-
-			assert(x.toIndex() == index);
-
-//			ASSERT((&kppp_ksq_pcpcpc(x.king(), x.piece0(), x.piece1(), x.piece2()) - &kppp[0][0]) == (index - min_index));
-		}
-
-	}
-
-	void learning_tools_unit_test_kkpp()
-	{
-		KKPP g_kkpp;
-		g_kkpp.set(SQUARE_NB, 10000, 0);
-		uint64_t n = 0;
-		for (int k = 0; k<SQUARE_NB; ++k)
-			for (int i = 0; i<10000; ++i) // As a test, assuming a large fe_end, try turning at 10000.
-				for (int j = 0; j < i; ++j)
-				{
-					auto kkpp = g_kkpp.fromKKPP(k, (BonaPiece)i, (BonaPiece)j);
-					auto r = kkpp.toRawIndex();
-					assert(n++ == r);
-					auto kkpp2 = g_kkpp.fromIndex(r + g_kkpp.min_index());
-					assert(kkpp2.king() == k && kkpp2.piece0() == i && kkpp2.piece1() == j);
-				}
-	}
-
-	// Initialize this entire EvalLearningTools
-	void init()
-	{
-		// Initialization is required only once after startup, so a flag for that.
-		static bool first = true;
-
-		if (first)
-		{
-			std::cout << "EvalLearningTools init..";
-
-			// Make mir_piece() and inv_piece() available.
-			// After this, the min_index_flag is initialized, but
-			// It depends on this, so you need to do this first.
-			init_mir_inv_tables();
-
-			//learning_tools_unit_test_kpp();
-			//learning_tools_unit_test_kppp();
-			//learning_tools_unit_test_kkpp();
-
-			// It may be the last time to execute UnitTest, but since init_min_index_flag() takes a long time,
-			// I want to do this at the time of debugging.
-
-			init_min_index_flag();
-
-			std::cout << "done." << std::endl;
-
-			first = false;
-		}
-	}
-}
-
-#endif
--- a/src/learn/learning_tools.h
+++ b/src/learn/learning_tools.h
--- a/src/learn/multi_think.cpp
+++ b/src/learn/multi_think.cpp
@ -1,123 +0,0 @@
-#include "../types.h"
-
-#if defined(EVAL_LEARN)
-
-#include "multi_think.h"
-#include "../tt.h"
-#include "../uci.h"
-
-#include <thread>
-
-void MultiThink::go_think()
-{
-	// Keep a copy to restore the Options settings later.
-	auto oldOptions = Options;
-
-	// When using the constant track, it takes a lot of time to perform on the fly & the part to access the file is
-	// Since it is not thread safe, it is guaranteed here that it is being completely read in memory.
-	Options["BookOnTheFly"] = std::string("false");
-
-	// Read evaluation function, etc.
-	// In the case of the learn command, the value of the evaluation function may be corrected after reading the evaluation function, so
-	// Skip memory corruption check.
-	init_nnue(true);
-
-	// Call the derived class's init().
-	init();
-
-	// The loop upper limit is set with set_loop_max().
-	loop_count = 0;
-	done_count = 0;
-
-	// Create threads as many as Options["Threads"] and start thinking.
-	std::vector<std::thread> threads;
-	auto thread_num = (size_t)Options["Threads"];
-
-	// Secure end flag of worker thread
-	thread_finished.resize(thread_num);
-	
-	// start worker thread
-	for (size_t i = 0; i < thread_num; ++i)
-	{
-		thread_finished[i] = 0;
-		threads.push_back(std::thread([i, this]
-		{ 
-			// exhaust all processor threads.
-			WinProcGroup::bindThisThread(i);
-
-			// execute the overridden process
-			this->thread_worker(i);
-
-			// Set the end flag because the thread has ended
-			this->thread_finished[i] = 1;
-		}));
-	}
-
-	// wait for all threads to finish
-	// for (auto& th :threads)
-	// th.join();
-	// If you write like, the thread will rush here while it is still working,
-	// During that time, callback_func() cannot be called and you cannot save.
-	// Therefore, you need to check the end flag yourself.
-
-	// function to determine if all threads have finished
-	auto threads_done = [&]()
-	{
-		// returns false if no one is finished
-		for (auto& f : thread_finished)
-			if (!f)
-				return false;
-		return true;
-	};
-
-	// Call back if the callback function is set.
-	auto do_a_callback = [&]()
-	{
-		if (callback_func)
-			callback_func();
-	};
-
-
-	for (uint64_t i = 0 ; ; )
-	{
-		// If all threads have finished, exit the loop.
-		if (threads_done())
-			break;
-
-		sleep(1000);
-
-		// callback_func() is called every callback_seconds.
-		if (++i == callback_seconds)
-		{
-			do_a_callback();
-			// Since I am returning from ↑, I reset the counter, so
-			// no matter how long it takes to save() etc. in do_a_callback()
-			// The next call will take a certain amount of time.
-			i = 0;
-		}
-	}
-
-	// Last save.
-	std::cout << std::endl << "finalize..";
-
-	// do_a_callback();
-	// → It should be saved by the caller, so I feel that it is not necessary here.
-
-	// It is possible that the exit code of the thread is running but the exit code of the thread is running, so
-	// We need to wait for the end with join().
-	for (auto& th : threads)
-		th.join();
-
-	// The file writing thread etc. are still running only when all threads are finished
-	// Since the work itself may not have completed, output only that all threads have finished.
-	std::cout << "all threads are joined." << std::endl;
-
-	// Restored because Options were rewritten.
-	// Restore the handler because the handler will not start unless you assign a value.
-	for (auto& s : oldOptions)
-		Options[s.first] = std::string(s.second);
-
-}
-
-
-#endif // defined(EVAL_LEARN)
--- a/src/learn/multi_think.h
+++ b/src/learn/multi_think.h
@ -1,151 +0,0 @@
-#ifndef _MULTI_THINK_
-#define _MULTI_THINK_
-
-#if defined(EVAL_LEARN)
-
-#include <functional>
-
-#include "../misc.h"
-#include "../learn/learn.h"
-#include "../thread_win32_osx.h"
-
-#include <atomic>
-
-// Learning from a game record, when making yourself think and generating a fixed track, etc.
-// Helper class used when multiple threads want to call Search::think() individually.
-// Derive and use this class.
-struct MultiThink
-{
-	MultiThink() : prng(21120903)
-	{
-		loop_count = 0;
-	}
-
-	// Call this function from the master thread, each thread will think,
-	// Return control when the thought ending condition is satisfied.
-	// Do something else.
-	// ・It is safe for each thread to call Learner::search(),qsearch()
-	// Separates the substitution table for each thread. (It will be restored after the end.)
-	// ・Book is not thread safe when in on the fly mode, so temporarily change this mode.
-	// Turn it off.
-	// [Requirements]
-	// 1) Override thread_worker()
-	// 2) Set the loop count with set_loop_max()
-	// 3) set a function to be called back periodically (if necessary)
-	// callback_func and callback_interval
-	void go_think();
-
-	// If there is something you want to initialize on the derived class side, override this,
-	// Called when initialization is completed with go_think().
-	// It is better to read the fixed trace at that timing.
-	virtual void init() {}
-
-	// A thread worker that is called by creating a thread when you go_think()
-	// Override and use this.
-	virtual void thread_worker(size_t thread_id) = 0;
-
-	// Called back every callback_seconds [seconds] when go_think().
-	std::function<void()> callback_func;
-	uint64_t callback_seconds = 600;
-
-	// Set the number of times worker processes (calls Search::think()).
-	void set_loop_max(uint64_t loop_max_) { loop_max = loop_max_; }
-
-	// Get the value set by set_loop_max().
-	uint64_t get_loop_max() const { return loop_max; }
-
-	// [ASYNC] Take the value of the loop counter and add the loop counter after taking it out.
-	// If the loop counter has reached loop_max, return UINT64_MAX.
-	// If you want to generate a phase, you must call this function at the time of generating the phase,
-	// Please note that the number of generated phases and the value of the counter will not match.
-	uint64_t get_next_loop_count() {
-		std::unique_lock<std::mutex> lk(loop_mutex);
-		if (loop_count >= loop_max)
-			return UINT64_MAX;
-		return loop_count++;
-	}
-
-	// [ASYNC] For returning the processed number. Each time it is called, it returns a counter that is incremented.
-	uint64_t get_done_count() {
-		std::unique_lock<std::mutex> lk(loop_mutex);
-		return ++done_count;
-	}
-
-	// Mutex when worker thread accesses I/O
-	std::mutex io_mutex;
-
-protected:
-	// Random number generator body
-	AsyncPRNG prng;
-
-private:
-	// number of times worker processes (calls Search::think())
-	std::atomic<uint64_t> loop_max;
-	// number of times the worker has processed (calls Search::think())
-	std::atomic<uint64_t> loop_count;
-	// To return the number of times it has been processed.
-	std::atomic<uint64_t> done_count;
-
-	// Mutex when changing the variables in ↑
-	std::mutex loop_mutex;
-
-	// Thread end flag.
-	// vector<bool> may not be reflected properly when trying to rewrite from multiple threads...
-	typedef uint8_t Flag;
-	std::vector<Flag> thread_finished;
-
-};
-
-// Mechanism to process task during idle time.
-// master passes the task with push_task_async() whenever you like.
-// When slave executes on_idle() in its spare time, it retrieves one task and continues execution until there is no queue.
-// Convenient to use when you want to write MultiThink thread worker in master-slave method.
-struct TaskDispatcher
-{
-	typedef std::function<void(size_t /* thread_id */)> Task;
-
-	// slave calls this function during idle.
-	void on_idle(size_t thread_id)
-	{
-		Task task;
-		while ((task = get_task_async()) != nullptr)
-			task(thread_id);
-
-		sleep(1);
-	}
-
-	// Stack [ASYNC] task.
-	void push_task_async(Task task)
-	{
-		std::unique_lock<std::mutex> lk(task_mutex);
-		tasks.push_back(task);
-	}
-
-	// Allocate size array elements for task in advance.
-	void task_reserve(size_t size)
-	{
-		tasks.reserve(size);
-	}
-
-protected:
-	// set of tasks
-	std::vector<Task> tasks;
-
-	// Take out one [ASYNC] task. Called from on_idle().
-	Task get_task_async()
-	{
-		std::unique_lock<std::mutex> lk(task_mutex);
-		if (tasks.size() == 0)
-			return nullptr;
-		Task task = *tasks.rbegin();
-		tasks.pop_back();
-		return task;
-	}
-
-	// a mutex for accessing tasks
-	std::mutex task_mutex;
-};
-
-#endif // defined(EVAL_LEARN) && defined(YANEURAOU_2018_OTAFUKU_ENGINE)
-
-#endif
--- a/src/misc.cpp
+++ b/src/misc.cpp
@ -42,7 +42,6 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
 #endif

 #include <fstream>
-#include <functional>
 #include <iomanip>
 #include <iostream>
 #include <sstream>
@ -140,7 +139,7 @@ const string engine_info(bool to_uci) {
  string month, day, year;
  stringstream ss, date(__DATE__); // From compiler, format is "Sep 21 2008"

-  ss << "Stockfish+NNUE " << Version << setfill('0');
+  ss << "Stockfish NNUE " << Version << setfill('0');

  if (Version.empty())
  {
@ -151,7 +150,7 @@ const string engine_info(bool to_uci) {
  ss << (Is64Bit ? " 64" : "")
     << (HasPext ? " BMI2" : (HasPopCnt ? " POPCNT" : ""))
     << (to_uci  ? "\nid author ": " by ")
-     << "T. Romstad, M. Costalba, J. Kiiski, G. Linscott, H. Noda, Y. Nasu, M. Isozaki";
+     << "T. Romstad, M. Costalba, J. Kiiski, G. Linscott";

  return ss.str();
 }
@ -371,8 +370,8 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
  {
      if (mem)
          sync_cout << "info string Hash table allocation: Windows large pages used." << sync_endl;
-      //else
-          //sync_cout << "info string Hash table allocation: Windows large pages not used." << sync_endl;
+      else
+          sync_cout << "info string Hash table allocation: Windows large pages not used." << sync_endl;
  }
  firstCall = false;

@ -527,163 +526,18 @@ void bindThisThread(size_t idx) {

 } // namespace WinProcGroup

-// Returns a string that represents the current time. (Used when learning evaluation functions)
-std::string now_string()
-{
-  // Using std::ctime(), localtime() gives a warning that MSVC is not secure.
-  // This shouldn't happen in the C++ standard, but...
-
-#if defined(_MSC_VER)
-  // C4996 : 'ctime' : This function or variable may be unsafe.Consider using ctime_s instead.
-#pragma warning(disable : 4996)
-#endif
-
-  auto now = std::chrono::system_clock::now();
-  auto tp = std::chrono::system_clock::to_time_t(now);
-  auto result = string(std::ctime(&tp));
-
-  // remove line endings if they are included at the end
-  while (*result.rbegin() == '\n' || (*result.rbegin() == '\r'))
-    result.pop_back();
-  return result;
-}
-
-void sleep(int ms)
-{
-	std::this_thread::sleep_for(std::chrono::milliseconds(ms));
-}
-
 void* aligned_malloc(size_t size, size_t align)
 {
-	void* p = _mm_malloc(size, align);
-	if (p == nullptr)
-	{
-		std::cout << "info string can't allocate memory. sise = " << size << std::endl;
-		exit(1);
-	}
-	return p;
+ void* p = _mm_malloc(size, align);
+ if (p == nullptr)
+ {
+   std::cout << "info string can't allocate memory. sise = " << size << std::endl;
+   exit(1);
+ }
+ return p;
 }

-int read_file_to_memory(std::string filename, std::function<void* (uint64_t)> callback_func)
+void aligned_free(void* ptr)
 {
-  fstream fs(filename, ios::in | ios::binary);
-  if (fs.fail())
-    return 1;
-
-  fs.seekg(0, fstream::end);
-  uint64_t eofPos = (uint64_t)fs.tellg();
-  fs.clear(); // Otherwise the next seek may fail.
-  fs.seekg(0, fstream::beg);
-  uint64_t begPos = (uint64_t)fs.tellg();
-  uint64_t file_size = eofPos - begPos;
-  //std::cout << "filename = " << filename << " , file_size = " << file_size << endl;
-
-  // I know the file size, so call callback_func to get a buffer for this,
-  // Get the pointer.
-  void* ptr = callback_func(file_size);
-
-  // If the buffer could not be secured, or if the file size is different from the expected file size,
-  // It is supposed to return nullptr. At this time, reading is interrupted and an error is returned.
-  if (ptr == nullptr)
-    return 2;
-
-  // read in pieces
-
-  const uint64_t block_size = 1024 * 1024 * 1024; // number of elements to read in one read (1GB)
-  for (uint64_t pos = 0; pos < file_size; pos += block_size)
-  {
-    // size to read this time
-    uint64_t read_size = (pos + block_size < file_size) ? block_size : (file_size - pos);
-    fs.read((char*)ptr + pos, read_size);
-
-    // Read error occurred in the middle of the file.
-    if (fs.fail())
-      return 2;
-
-    //cout << ".";
-  }
-  fs.close();
-
-  return 0;
+  _mm_free(ptr);
 }
-
-int write_memory_to_file(std::string filename, void* ptr, uint64_t size)
-{
-  fstream fs(filename, ios::out | ios::binary);
-  if (fs.fail())
-    return 1;
-
-  const uint64_t block_size = 1024 * 1024 * 1024; // number of elements to write in one write (1GB)
-  for (uint64_t pos = 0; pos < size; pos += block_size)
-  {
-    // Memory size to write this time
-    uint64_t write_size = (pos + block_size < size) ? block_size : (size - pos);
-    fs.write((char*)ptr + pos, write_size);
-    //cout << ".";
-  }
-  fs.close();
-  return 0;
-}
-
-// ----------------------------
-//     mkdir wrapper
-// ----------------------------
-
-// Specify relative to the current folder. Returns 0 on success, non-zero on failure.
-// Create a folder. Japanese is not used.
-// In case of gcc under msys2 environment, folder creation fails with _wmkdir(). Cause unknown.
-// Use _mkdir() because there is no help for it.
-
-#if defined(_WIN32)
-// for Windows
-
-#if defined(_MSC_VER)
-#include <codecvt> // I need this because I want wstring to mkdir
-#include <locale> // This is required for wstring_convert.
-
-namespace Dependency {
-  int mkdir(std::string dir_name)
-  {
-    std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> cv;
-    return _wmkdir(cv.from_bytes(dir_name).c_str());
-    //	::CreateDirectory(cv.from_bytes(dir_name).c_str(),NULL);
-  }
-}
-
-#elif defined(__GNUC__) 
-
-#include <direct.h>
-namespace Dependency {
-  int mkdir(std::string dir_name)
-  {
-    return _mkdir(dir_name.c_str());
-  }
-}
-
-#endif
-#elif defined(__linux__)
-
-// In the linux environment, this symbol _LINUX is defined in the makefile.
-
-// mkdir implementation for Linux.
-#include "sys/stat.h"
-
-namespace Dependency {
-  int mkdir(std::string dir_name)
-  {
-    return ::mkdir(dir_name.c_str(), 0777);
-  }
-}
-#else
-
-// In order to judge whether it is a Linux environment, we have to divide the makefile..
-// The function to dig a folder on linux is good for the time being... Only used to save the evaluation function file...
-
-namespace Dependency {
-  int mkdir(std::string dir_name)
-  {
-    return 0;
-  }
-}
-
-#endif
--- a/src/misc.h
+++ b/src/misc.h
@ -21,20 +21,13 @@
 #ifndef MISC_H_INCLUDED
 #define MISC_H_INCLUDED

-#include <algorithm>
 #include <cassert>
 #include <chrono>
-#include <functional>
-#include <mutex>
 #include <ostream>
 #include <string>
 #include <vector>
-#ifndef _MSC_VER
-#include <mm_malloc.h>
-#endif

 #include "types.h"
-#include "thread_win32_osx.h"

 const std::string engine_info(bool to_uci = false);
 const std::string compiler_info();
@ -115,20 +108,8 @@ public:
  /// Output values only have 1/8th of their bits set on average.
  template<typename T> T sparse_rand()
  { return T(rand64() & rand64() & rand64()); }
-  // Returns a random number from 0 to n-1. (Not uniform distribution, but this is enough in reality)
-  uint64_t rand(uint64_t n) { return rand<uint64_t>() % n; }
-
-  // Return the random seed used internally.
-  uint64_t get_seed() const { return s; }
 };

-// Display a random seed. (For debugging)
-inline std::ostream& operator<<(std::ostream& os, PRNG& prng)
-{
-  os << "PRNG::seed = " << std::hex << prng.get_seed() << std::dec;
-  return os;
-}
-
 inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
 #if defined(__GNUC__) && defined(IS_64BIT)
    __extension__ typedef unsigned __int128 uint128;
@ -152,155 +133,8 @@ inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
 namespace WinProcGroup {
  void bindThisThread(size_t idx);
 }
-// sleep for the specified number of milliseconds.
-extern void sleep(int ms);
-
-// Returns a string that represents the current time. (Used for log output when learning evaluation function)
-std::string now_string();
-
-// wrapper for end processing on the way
-static void my_exit()
-{
-	sleep(3000); // It is bad to finish before the error message is output, so put wait.
-	exit(EXIT_FAILURE);
-}
-
-// When compiled with gcc/clang such as msys2, Windows Subsystem for Linux,
-// In C++ std::ifstream, ::read() is a wrapper for that because it is not possible to read and write files larger than 2GB in one shot.
-//
-// callback_func of the argument of read_file_to_memory() uses the file size as an argument when the file can be opened
-// It will be called back, so if you allocate a buffer and pass a function that returns the first pointer, it will be read there.
-// These functions return non-zero on error, such as when the file cannot be found.
-//
-// Also, if the buffer cannot be allocated in the callback function or if the file size is different from the expected file size,
-// Return nullptr. At this time, read_file_to_memory() interrupts reading and returns with an error.
-
-int read_file_to_memory(std::string filename, std::function<void* (uint64_t)> callback_func);
-int write_memory_to_file(std::string filename, void* ptr, uint64_t size);
-
-// --------------------
-// async version of PRNG
-// --------------------
-
-// async version of PRNG
-struct AsyncPRNG
-{
-  AsyncPRNG(uint64_t seed) : prng(seed) { assert(seed); }
-  // [ASYNC] Extract one random number.
-  template<typename T> T rand() {
-    std::unique_lock<std::mutex> lk(mutex);
-    return prng.rand<T>();
-  }
-
-  // [ASYNC] Returns a random number from 0 to n-1. (Not uniform distribution, but this is enough in reality)
-  uint64_t rand(uint64_t n) {
-    std::unique_lock<std::mutex> lk(mutex);
-    return prng.rand(n);
-  }
-
-  // Return the random seed used internally.
-  uint64_t get_seed() const { return prng.get_seed(); }
-
-protected:
-  std::mutex mutex;
-  PRNG prng;
-};
-
-// Display a random seed. (For debugging)
-inline std::ostream& operator<<(std::ostream& os, AsyncPRNG& prng)
-{
-  os << "AsyncPRNG::seed = " << std::hex << prng.get_seed() << std::dec;
-  return os;
-}
-
-// --------------------
-//       Math
-// --------------------
-
-// Mathematical function used for progress calculation and learning
-namespace Math {
-	// Sigmoid function
-	// = 1.0 / (1.0 + std::exp(-x))
-	double sigmoid(double x);
-
-	// Differentiation of sigmoid function
-	// = sigmoid(x) * (1.0-sigmoid(x))
-	double dsigmoid(double x);
-
-	// Clip v so that it fits between [lo,hi].
-	// * In Stockfish, this function is written in bitboard.h.
-	template<class T> constexpr const T& clamp(const T& v, const T& lo, const T& hi) {
-		return v < lo ? lo : v > hi ? hi : v;
-	}
-
-}
-
-// --------------------
-//       Path
-// --------------------
-
-// Something like Path class in C#. File name manipulation.
-// Match with the C# method name.
-struct Path
-{
-	// Combine the path name and file name and return it.
-	// If the folder name is not an empty string, append it if there is no'/' or'\\' at the end.
-	static std::string Combine(const std::string& folder, const std::string& filename)
-	{
-		if (folder.length() >= 1 && *folder.rbegin() != '/' && *folder.rbegin() != '\\')
-			return folder + "/" + filename;
-
-		return folder + filename;
-	}
-
-	// Get the file name part (excluding the folder name) from the full path expression.
-	static std::string GetFileName(const std::string& path)
-	{
-		// I don't know which "\" or "/" is used.
-		auto path_index1 = path.find_last_of("\\") + 1;
-		auto path_index2 = path.find_last_of("/") + 1;
-		auto path_index = std::max(path_index1, path_index2);
-
-		return path.substr(path_index);
-	}
-};

 extern void* aligned_malloc(size_t size, size_t align);
-static void aligned_free(void* ptr) { _mm_free(ptr); }
-
-// It is ignored when new even though alignas is specified & because it is ignored when the STL container allocates memory,
-// A custom allocator used for that.
-template <typename T>
-class AlignedAllocator {
-public:
-  using value_type = T;
-
-  AlignedAllocator() {}
-  AlignedAllocator(const AlignedAllocator&) {}
-  AlignedAllocator(AlignedAllocator&&) {}
-
-  template <typename U> AlignedAllocator(const AlignedAllocator<U>&) {}
-
-  T* allocate(std::size_t n) { return (T*)aligned_malloc(n * sizeof(T), alignof(T)); }
-  void deallocate(T* p, std::size_t n) { aligned_free(p); }
-};
-
-// --------------------
-//  Dependency Wrapper
-// --------------------
-
-namespace Dependency
-{
-  // In the Linux environment, if you getline() the text file is'\r\n'
-  // Since'\r' remains at the end, write a wrapper to remove this'\r'.
-  // So when calling getline() on fstream,
-  // just write getline() instead of std::getline() and use this function.
-  extern bool getline(std::ifstream& fs, std::string& s);
-
-  // Create a folder.
-  // Specify relative to the current folder. Japanese is not used for dir_name.
-  // Returns 0 on success, non-zero on failure.
-  extern int mkdir(std::string dir_name);
-}
+extern void aligned_free(void* ptr);

 #endif // #ifndef MISC_H_INCLUDED
--- a/src/movegen.h
+++ b/src/movegen.h
@ -68,9 +68,6 @@ struct MoveList {
    return std::find(begin(), end(), move) != end();
  }

-  // returns the i th element
-  const ExtMove at(size_t i) const { assert(0 <= i && i < size()); return begin()[i]; }
-
 private:
  ExtMove moveList[MAX_MOVES], *last;
 };
--- a/src/eval/nnue/architectures/halfkp_256x2-32-32.h
+++ b/src/eval/nnue/architectures/halfkp_256x2-32-32.h
@ -1,7 +1,7 @@
 // Definition of input features and network structure used in NNUE evaluation function

-#ifndef HALFKP_256X2_32_32_H
-#define HALFKP_256X2_32_32_H
+#ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
+#define NNUE_HALFKP_256X2_32_32_H_INCLUDED

 #include "../features/feature_set.h"
 #include "../features/half_kp.h"
@ -10,9 +10,7 @@
 #include "../layers/affine_transform.h"
 #include "../layers/clipped_relu.h"

-namespace Eval {
-
-namespace NNUE {
+namespace Eval::NNUE {

 // Input features used in evaluation function
 using RawFeatures = Features::FeatureSet<
@ -33,7 +31,6 @@ using OutputLayer = AffineTransform<HiddenLayer2, 1>;

 using Network = Layers::OutputLayer;

-}  // namespace NNUE
+}  // namespace Eval::NNUE

-}  // namespace Eval
-#endif // HALFKP_256X2_32_32_H
+#endif // #ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
--- a/src/eval/nnue/architectures/halfkp_384x2-32-32.h
+++ b/src/eval/nnue/architectures/halfkp_384x2-32-32.h
@ -1,7 +1,7 @@
 // Definition of input features and network structure used in NNUE evaluation function

-#ifndef HALFKP_384X2_32_32_H
-#define HALFKP_384X2_32_32_H
+#ifndef NNUE_HALFKP_384X2_32_32_H_INCLUDED
+#define NNUE_HALFKP_384X2_32_32_H_INCLUDED

 #include "../features/feature_set.h"
 #include "../features/half_kp.h"
@ -10,9 +10,7 @@
 #include "../layers/affine_transform.h"
 #include "../layers/clipped_relu.h"

-namespace Eval {
-
-namespace NNUE {
+namespace Eval::NNUE {

 // Input features used in evaluation function
 using RawFeatures = Features::FeatureSet<
@ -33,7 +31,6 @@ using OutputLayer = AffineTransform<HiddenLayer2, 1>;

 using Network = Layers::OutputLayer;

-}  // namespace NNUE
+}  // namespace Eval::NNUE

-}  // namespace Eval
-#endif // HALFKP_384X2_32_32_H
+#endif // #ifndef NNUE_HALFKP_384X2_32_32_H_INCLUDED
--- a/src/nnue/evaluate_nnue.cpp
+++ b/src/nnue/evaluate_nnue.cpp
@ -0,0 +1,168 @@
+// Code for calculating NNUE evaluation function
+
+#include <fstream>
+#include <iostream>
+#include <set>
+
+#include "../evaluate.h"
+#include "../position.h"
+#include "../misc.h"
+#include "../uci.h"
+
+#include "evaluate_nnue.h"
+
+ExtPieceSquare kpp_board_index[PIECE_NB] = {
+ // convention: W - us, B - them
+ // viewed from other side, W and B are reversed
+    { PS_NONE,     PS_NONE     },
+    { PS_W_PAWN,   PS_B_PAWN   },
+    { PS_W_KNIGHT, PS_B_KNIGHT },
+    { PS_W_BISHOP, PS_B_BISHOP },
+    { PS_W_ROOK,   PS_B_ROOK   },
+    { PS_W_QUEEN,  PS_B_QUEEN  },
+    { PS_W_KING,   PS_B_KING   },
+    { PS_NONE,     PS_NONE     },
+    { PS_NONE,     PS_NONE     },
+    { PS_B_PAWN,   PS_W_PAWN   },
+    { PS_B_KNIGHT, PS_W_KNIGHT },
+    { PS_B_BISHOP, PS_W_BISHOP },
+    { PS_B_ROOK,   PS_W_ROOK   },
+    { PS_B_QUEEN,  PS_W_QUEEN  },
+    { PS_B_KING,   PS_W_KING   },
+    { PS_NONE,     PS_NONE     }
+};
+
+
+namespace Eval::NNUE {
+
+  // Input feature converter
+  AlignedPtr<FeatureTransformer> feature_transformer;
+
+  // Evaluation function
+  AlignedPtr<Network> network;
+
+  // Evaluation function file name
+  std::string fileName = "nn.bin";
+
+  // Get a string that represents the structure of the evaluation function
+  std::string GetArchitectureString() {
+
+    return "Features=" + FeatureTransformer::GetStructureString() +
+        ",Network=" + Network::GetStructureString();
+  }
+
+  namespace Detail {
+
+  // Initialize the evaluation function parameters
+  template <typename T>
+  void Initialize(AlignedPtr<T>& pointer) {
+
+    pointer.reset(reinterpret_cast<T*>(aligned_malloc(sizeof(T), alignof(T))));
+    std::memset(pointer.get(), 0, sizeof(T));
+  }
+
+  // read evaluation function parameters
+  template <typename T>
+  bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
+
+    std::uint32_t header;
+    stream.read(reinterpret_cast<char*>(&header), sizeof(header));
+    if (!stream || header != T::GetHashValue()) return false;
+    return pointer->ReadParameters(stream);
+  }
+
+  }  // namespace Detail
+
+  // Initialize the evaluation function parameters
+  void Initialize() {
+
+    Detail::Initialize(feature_transformer);
+    Detail::Initialize(network);
+  }
+
+  // read the header
+  bool ReadHeader(std::istream& stream,
+    std::uint32_t* hash_value, std::string* architecture) {
+
+    std::uint32_t version, size;
+    stream.read(reinterpret_cast<char*>(&version), sizeof(version));
+    stream.read(reinterpret_cast<char*>(hash_value), sizeof(*hash_value));
+    stream.read(reinterpret_cast<char*>(&size), sizeof(size));
+    if (!stream || version != kVersion) return false;
+    architecture->resize(size);
+    stream.read(&(*architecture)[0], size);
+    return !stream.fail();
+  }
+
+  // read evaluation function parameters
+  bool ReadParameters(std::istream& stream) {
+
+    std::uint32_t hash_value;
+    std::string architecture;
+    if (!ReadHeader(stream, &hash_value, &architecture)) return false;
+    if (hash_value != kHashValue) return false;
+    if (!Detail::ReadParameters(stream, feature_transformer)) return false;
+    if (!Detail::ReadParameters(stream, network)) return false;
+    return stream && stream.peek() == std::ios::traits_type::eof();
+  }
+
+  // proceed if you can calculate the difference
+  static void UpdateAccumulatorIfPossible(const Position& pos) {
+
+    feature_transformer->UpdateAccumulatorIfPossible(pos);
+  }
+
+  // Calculate the evaluation value
+  static Value ComputeScore(const Position& pos, bool refresh) {
+
+    auto& accumulator = pos.state()->accumulator;
+    if (!refresh && accumulator.computed_score) {
+      return accumulator.score;
+    }
+
+    alignas(kCacheLineSize) TransformedFeatureType
+        transformed_features[FeatureTransformer::kBufferSize];
+    feature_transformer->Transform(pos, transformed_features, refresh);
+    alignas(kCacheLineSize) char buffer[Network::kBufferSize];
+    const auto output = network->Propagate(transformed_features, buffer);
+
+    auto score = static_cast<Value>(output[0] / FV_SCALE);
+
+    accumulator.score = score;
+    accumulator.computed_score = true;
+    return accumulator.score;
+  }
+
+  // read the evaluation function file
+  // Save and restore Options with bench command etc., so EvalFile is changed at this time,
+  // This function may be called twice to flag that the evaluation function needs to be reloaded.
+  void load_eval(const std::string& evalFile) {
+
+    Initialize();
+    fileName = evalFile;
+
+    std::ifstream stream(evalFile, std::ios::binary);
+    const bool result = ReadParameters(stream);
+
+    if (!result)
+        std::cout << "Error! " << fileName << " not found or wrong format" << std::endl;
+    else
+        std::cout << "info string NNUE " << fileName << " found & loaded" << std::endl;
+  }
+
+  // Evaluation function. Perform differential calculation.
+  Value evaluate(const Position& pos) {
+    return ComputeScore(pos, false);
+  }
+
+  // Evaluation function. Perform full calculation.
+  Value compute_eval(const Position& pos) {
+    return ComputeScore(pos, true);
+  }
+
+  // proceed if you can calculate the difference
+  void update_eval(const Position& pos) {
+    UpdateAccumulatorIfPossible(pos);
+  }
+
+} // namespace Eval::NNUE
--- a/src/nnue/evaluate_nnue.h
+++ b/src/nnue/evaluate_nnue.h
@ -0,0 +1,50 @@
+// header used in NNUE evaluation function
+
+#ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
+#define NNUE_EVALUATE_NNUE_H_INCLUDED
+
+#include "nnue_feature_transformer.h"
+#include "nnue_architecture.h"
+
+#include <memory>
+
+namespace Eval::NNUE {
+
+  // hash value of evaluation function structure
+  constexpr std::uint32_t kHashValue =
+      FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
+
+  // Deleter for automating release of memory area
+  template <typename T>
+  struct AlignedDeleter {
+    void operator()(T* ptr) const {
+      ptr->~T();
+      aligned_free(ptr);
+    }
+  };
+
+  template <typename T>
+  using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
+
+  // Input feature converter
+  extern AlignedPtr<FeatureTransformer> feature_transformer;
+
+  // Evaluation function
+  extern AlignedPtr<Network> network;
+
+  // Evaluation function file name
+  extern std::string fileName;
+
+  // Get a string that represents the structure of the evaluation function
+  std::string GetArchitectureString();
+
+  // read the header
+  bool ReadHeader(std::istream& stream,
+      std::uint32_t* hash_value, std::string* architecture);
+
+  // read evaluation function parameters
+  bool ReadParameters(std::istream& stream);
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
--- a/src/nnue/features/feature_set.h
+++ b/src/nnue/features/feature_set.h
@ -0,0 +1,251 @@
+// A class template that represents the input feature set of the NNUE evaluation function
+
+#ifndef NNUE_FEATURE_SET_H_INCLUDED
+#define NNUE_FEATURE_SET_H_INCLUDED
+
+#include "features_common.h"
+#include <array>
+
+namespace Eval::NNUE::Features {
+
+  // A class template that represents a list of values
+  template <typename T, T... Values>
+  struct CompileTimeList;
+
+  template <typename T, T First, T... Remaining>
+  struct CompileTimeList<T, First, Remaining...> {
+    static constexpr bool Contains(T value) {
+      return value == First || CompileTimeList<T, Remaining...>::Contains(value);
+    }
+    static constexpr std::array<T, sizeof...(Remaining) + 1>
+        kValues = {{First, Remaining...}};
+  };
+
+  template <typename T, T First, T... Remaining>
+  constexpr std::array<T, sizeof...(Remaining) + 1>
+      CompileTimeList<T, First, Remaining...>::kValues;
+
+  template <typename T>
+  struct CompileTimeList<T> {
+    static constexpr bool Contains(T /*value*/) {
+      return false;
+    }
+    static constexpr std::array<T, 0> kValues = {{}};
+  };
+
+  // Class template that adds to the beginning of the list
+  template <typename T, typename ListType, T Value>
+  struct AppendToList;
+
+  template <typename T, T... Values, T AnotherValue>
+  struct AppendToList<T, CompileTimeList<T, Values...>, AnotherValue> {
+    using Result = CompileTimeList<T, AnotherValue, Values...>;
+  };
+
+  // Class template for adding to a sorted, unique list
+  template <typename T, typename ListType, T Value>
+  struct InsertToSet;
+
+  template <typename T, T First, T... Remaining, T AnotherValue>
+  struct InsertToSet<T, CompileTimeList<T, First, Remaining...>, AnotherValue> {
+    using Result = std::conditional_t<
+        CompileTimeList<T, First, Remaining...>::Contains(AnotherValue),
+        CompileTimeList<T, First, Remaining...>,
+        std::conditional_t<(AnotherValue <First),
+            CompileTimeList<T, AnotherValue, First, Remaining...>,
+            typename AppendToList<T, typename InsertToSet<
+                T, CompileTimeList<T, Remaining...>, AnotherValue>::Result,
+                First>::Result>>;
+  };
+
+  template <typename T, T Value>
+  struct InsertToSet<T, CompileTimeList<T>, Value> {
+    using Result = CompileTimeList<T, Value>;
+  };
+
+  // Base class of feature set
+  template <typename Derived>
+  class FeatureSetBase {
+
+   public:
+    // Get a list of indices with a value of 1 among the features
+    template <typename IndexListType>
+    static void AppendActiveIndices(
+        const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
+
+      for (Color perspective : { WHITE, BLACK }) {
+        Derived::CollectActiveIndices(
+            pos, trigger, perspective, &active[perspective]);
+      }
+    }
+
+    // Get a list of indices whose values have changed from the previous one in the feature quantity
+    template <typename PositionType, typename IndexListType>
+    static void AppendChangedIndices(
+        const PositionType& pos, TriggerEvent trigger,
+        IndexListType removed[2], IndexListType added[2], bool reset[2]) {
+
+      const auto& dp = pos.state()->dirtyPiece;
+      if (dp.dirty_num == 0) return;
+
+      for (Color perspective : { WHITE, BLACK }) {
+        reset[perspective] = false;
+        switch (trigger) {
+          case TriggerEvent::kNone:
+            break;
+          case TriggerEvent::kFriendKingMoved:
+            reset[perspective] =
+                dp.pieceId[0] == PIECE_ID_KING + perspective;
+            break;
+          case TriggerEvent::kEnemyKingMoved:
+            reset[perspective] =
+                dp.pieceId[0] == PIECE_ID_KING + ~perspective;
+            break;
+          case TriggerEvent::kAnyKingMoved:
+            reset[perspective] = dp.pieceId[0] >= PIECE_ID_KING;
+            break;
+          case TriggerEvent::kAnyPieceMoved:
+            reset[perspective] = true;
+            break;
+          default:
+            assert(false);
+            break;
+        }
+        if (reset[perspective]) {
+          Derived::CollectActiveIndices(
+              pos, trigger, perspective, &added[perspective]);
+        } else {
+          Derived::CollectChangedIndices(
+              pos, trigger, perspective,
+              &removed[perspective], &added[perspective]);
+        }
+      }
+    }
+  };
+
+  // Class template that represents the feature set
+  // do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime
+  template <typename FirstFeatureType, typename... RemainingFeatureTypes>
+  class FeatureSet<FirstFeatureType, RemainingFeatureTypes...> :
+      public FeatureSetBase<
+          FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
+
+   private:
+    using Head = FirstFeatureType;
+    using Tail = FeatureSet<RemainingFeatureTypes...>;
+
+   public:
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t kHashValue =
+        Head::kHashValue ^ (Tail::kHashValue << 1) ^ (Tail::kHashValue >> 31);
+    // number of feature dimensions
+    static constexpr IndexType kDimensions =
+        Head::kDimensions + Tail::kDimensions;
+    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
+    static constexpr IndexType kMaxActiveDimensions =
+        Head::kMaxActiveDimensions + Tail::kMaxActiveDimensions;
+    // List of timings to perform all calculations instead of difference calculation
+    using SortedTriggerSet = typename InsertToSet<TriggerEvent,
+        typename Tail::SortedTriggerSet, Head::kRefreshTrigger>::Result;
+    static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
+
+    // Get the feature quantity name
+    static std::string GetName() {
+      return std::string(Head::kName) + "+" + Tail::GetName();
+    }
+
+   private:
+    // Get a list of indices with a value of 1 among the features
+    template <typename IndexListType>
+    static void CollectActiveIndices(
+        const Position& pos, const TriggerEvent trigger, const Color perspective,
+        IndexListType* const active) {
+
+      Tail::CollectActiveIndices(pos, trigger, perspective, active);
+      if (Head::kRefreshTrigger == trigger) {
+        const auto start = active->size();
+        Head::AppendActiveIndices(pos, perspective, active);
+        for (auto i = start; i < active->size(); ++i) {
+          (*active)[i] += Tail::kDimensions;
+        }
+      }
+    }
+
+    // Get a list of indices whose values have changed from the previous one in the feature quantity
+    template <typename IndexListType>
+    static void CollectChangedIndices(
+        const Position& pos, const TriggerEvent trigger, const Color perspective,
+        IndexListType* const removed, IndexListType* const added) {
+
+      Tail::CollectChangedIndices(pos, trigger, perspective, removed, added);
+      if (Head::kRefreshTrigger == trigger) {
+        const auto start_removed = removed->size();
+        const auto start_added = added->size();
+        Head::AppendChangedIndices(pos, perspective, removed, added);
+        for (auto i = start_removed; i < removed->size(); ++i) {
+          (*removed)[i] += Tail::kDimensions;
+        }
+        for (auto i = start_added; i < added->size(); ++i) {
+          (*added)[i] += Tail::kDimensions;
+        }
+      }
+    }
+
+    // Make the base class and the class template that recursively uses itself a friend
+    friend class FeatureSetBase<FeatureSet>;
+    template <typename... FeatureTypes>
+    friend class FeatureSet;
+  };
+
+  // Class template that represents the feature set
+  // Specialization with one template argument
+  template <typename FeatureType>
+  class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
+
+   public:
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
+    // number of feature dimensions
+    static constexpr IndexType kDimensions = FeatureType::kDimensions;
+    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
+    static constexpr IndexType kMaxActiveDimensions =
+        FeatureType::kMaxActiveDimensions;
+    // List of timings to perform all calculations instead of difference calculation
+    using SortedTriggerSet =
+        CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
+    static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
+
+    // Get the feature quantity name
+    static std::string GetName() {
+      return FeatureType::kName;
+    }
+
+   private:
+    // Get a list of indices with a value of 1 among the features
+    static void CollectActiveIndices(
+        const Position& pos, const TriggerEvent trigger, const Color perspective,
+        IndexList* const active) {
+      if (FeatureType::kRefreshTrigger == trigger) {
+        FeatureType::AppendActiveIndices(pos, perspective, active);
+      }
+    }
+
+    // Get a list of indices whose values have changed from the previous one in the feature quantity
+    static void CollectChangedIndices(
+        const Position& pos, const TriggerEvent trigger, const Color perspective,
+        IndexList* const removed, IndexList* const added) {
+
+      if (FeatureType::kRefreshTrigger == trigger) {
+        FeatureType::AppendChangedIndices(pos, perspective, removed, added);
+      }
+    }
+
+    // Make the base class and the class template that recursively uses itself a friend
+    friend class FeatureSetBase<FeatureSet>;
+    template <typename... FeatureTypes>
+    friend class FeatureSet;
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // #ifndef NNUE_FEATURE_SET_H_INCLUDED
--- a/src/nnue/features/features_common.h
+++ b/src/nnue/features/features_common.h
@ -0,0 +1,37 @@
+//Common header of input features of NNUE evaluation function
+
+#ifndef NNUE_FEATURES_COMMON_H_INCLUDED
+#define NNUE_FEATURES_COMMON_H_INCLUDED
+
+#include "../../evaluate.h"
+#include "../nnue_common.h"
+
+namespace Eval::NNUE::Features {
+
+  // Index list type
+  class IndexList;
+
+  // Class template that represents the feature set
+  template <typename... FeatureTypes>
+  class FeatureSet;
+
+  // Type of timing to perform all calculations instead of difference calculation
+  enum class TriggerEvent {
+
+    kNone, // Calculate the difference whenever possible
+    kFriendKingMoved, // calculate all when own king moves
+    kEnemyKingMoved, // do all calculations when enemy king moves
+    kAnyKingMoved, // do all calculations if either king moves
+    kAnyPieceMoved, // always do all calculations
+  };
+
+  // turn side or other side
+  enum class Side {
+
+    kFriend, // turn side
+    kEnemy, // opponent
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // #ifndef NNUE_FEATURES_COMMON_H_INCLUDED
--- a/src/nnue/features/half_kp.cpp
+++ b/src/nnue/features/half_kp.cpp
@ -0,0 +1,75 @@
+//Definition of input features HalfKP of NNUE evaluation function
+
+#include "half_kp.h"
+#include "index_list.h"
+
+namespace Eval::NNUE::Features {
+
+  // Find the index of the feature quantity from the king position and PieceSquare
+  template <Side AssociatedKing>
+  inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, PieceSquare p) {
+    return static_cast<IndexType>(PS_END) * static_cast<IndexType>(sq_k) + p;
+  }
+
+  // Get the piece information
+  template <Side AssociatedKing>
+  inline void HalfKP<AssociatedKing>::GetPieces(
+      const Position& pos, Color perspective,
+      PieceSquare** pieces, Square* sq_target_k) {
+
+    *pieces = (perspective == BLACK) ?
+        pos.eval_list()->piece_list_fb() :
+        pos.eval_list()->piece_list_fw();
+    const PieceId target = (AssociatedKing == Side::kFriend) ?
+        static_cast<PieceId>(PIECE_ID_KING + perspective) :
+        static_cast<PieceId>(PIECE_ID_KING + ~perspective);
+    *sq_target_k = static_cast<Square>(((*pieces)[target] - PS_W_KING) % SQUARE_NB);
+  }
+
+  // Get a list of indices with a value of 1 among the features
+  template <Side AssociatedKing>
+  void HalfKP<AssociatedKing>::AppendActiveIndices(
+      const Position& pos, Color perspective, IndexList* active) {
+
+    // do nothing if array size is small to avoid compiler warning
+    if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
+
+    PieceSquare* pieces;
+    Square sq_target_k;
+    GetPieces(pos, perspective, &pieces, &sq_target_k);
+    for (PieceId i = PIECE_ID_ZERO; i < PIECE_ID_KING; ++i) {
+      if (pieces[i] != PS_NONE) {
+        active->push_back(MakeIndex(sq_target_k, pieces[i]));
+      }
+    }
+  }
+
+  // Get a list of indices whose values have changed from the previous one in the feature quantity
+  template <Side AssociatedKing>
+  void HalfKP<AssociatedKing>::AppendChangedIndices(
+      const Position& pos, Color perspective,
+      IndexList* removed, IndexList* added) {
+
+    PieceSquare* pieces;
+    Square sq_target_k;
+    GetPieces(pos, perspective, &pieces, &sq_target_k);
+    const auto& dp = pos.state()->dirtyPiece;
+    for (int i = 0; i < dp.dirty_num; ++i) {
+      if (dp.pieceId[i] >= PIECE_ID_KING) continue;
+      const auto old_p = static_cast<PieceSquare>(
+          dp.old_piece[i].from[perspective]);
+      if (old_p != PS_NONE) {
+        removed->push_back(MakeIndex(sq_target_k, old_p));
+      }
+      const auto new_p = static_cast<PieceSquare>(
+          dp.new_piece[i].from[perspective]);
+      if (new_p != PS_NONE) {
+        added->push_back(MakeIndex(sq_target_k, new_p));
+      }
+    }
+  }
+
+  template class HalfKP<Side::kFriend>;
+  template class HalfKP<Side::kEnemy>;
+
+}  // namespace Eval::NNUE::Features
--- a/src/nnue/features/half_kp.h
+++ b/src/nnue/features/half_kp.h
@ -0,0 +1,52 @@
+//Definition of input features HalfKP of NNUE evaluation function
+
+#ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
+#define NNUE_FEATURES_HALF_KP_H_INCLUDED
+
+#include "../../evaluate.h"
+#include "features_common.h"
+
+namespace Eval::NNUE::Features {
+
+  // Feature HalfKP: Combination of the position of own king or enemy king
+  // and the position of pieces other than kings
+  template <Side AssociatedKing>
+  class HalfKP {
+
+   public:
+    // feature quantity name
+    static constexpr const char* kName =
+        (AssociatedKing == Side::kFriend) ? "HalfKP(Friend)" : "HalfKP(Enemy)";
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t kHashValue =
+        0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
+    // number of feature dimensions
+    static constexpr IndexType kDimensions =
+        static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
+    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
+    static constexpr IndexType kMaxActiveDimensions = PIECE_ID_KING;
+    // Timing of full calculation instead of difference calculation
+    static constexpr TriggerEvent kRefreshTrigger =
+        (AssociatedKing == Side::kFriend) ?
+        TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
+
+    // Get a list of indices with a value of 1 among the features
+    static void AppendActiveIndices(const Position& pos, Color perspective,
+                                    IndexList* active);
+
+    // Get a list of indices whose values have changed from the previous one in the feature quantity
+    static void AppendChangedIndices(const Position& pos, Color perspective,
+                                     IndexList* removed, IndexList* added);
+
+    // Find the index of the feature quantity from the king position and PieceSquare
+    static IndexType MakeIndex(Square sq_k, PieceSquare p);
+
+   private:
+    // Get the piece information
+    static void GetPieces(const Position& pos, Color perspective,
+                          PieceSquare** pieces, Square* sq_target_k);
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // #ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
--- a/src/nnue/features/index_list.h
+++ b/src/nnue/features/index_list.h
@ -0,0 +1,46 @@
+// Definition of index list of input features
+
+#ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED
+#define NNUE_FEATURES_INDEX_LIST_H_INCLUDED
+
+#include "../../position.h"
+#include "../nnue_architecture.h"
+
+namespace Eval::NNUE::Features {
+
+  // Class template used for feature index list
+  template <typename T, std::size_t MaxSize>
+  class ValueList {
+
+   public:
+    std::size_t size() const { return size_; }
+    void resize(std::size_t size) { size_ = size; }
+    void push_back(const T& value) { values_[size_++] = value; }
+    T& operator[](std::size_t index) { return values_[index]; }
+    T* begin() { return values_; }
+    T* end() { return values_ + size_; }
+    const T& operator[](std::size_t index) const { return values_[index]; }
+    const T* begin() const { return values_; }
+    const T* end() const { return values_ + size_; }
+
+    void swap(ValueList& other) {
+      const std::size_t max_size = std::max(size_, other.size_);
+      for (std::size_t i = 0; i < max_size; ++i) {
+        std::swap(values_[i], other.values_[i]);
+      }
+      std::swap(size_, other.size_);
+    }
+
+   private:
+    T values_[MaxSize];
+    std::size_t size_ = 0;
+  };
+
+  //Type of feature index list
+  class IndexList
+      : public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // NNUE_FEATURES_INDEX_LIST_H_INCLUDED
--- a/src/nnue/layers/affine_transform.h
+++ b/src/nnue/layers/affine_transform.h
@ -0,0 +1,167 @@
+// Definition of layer AffineTransform of NNUE evaluation function
+
+#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
+#define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
+
+#include <iostream>
+#include "../nnue_common.h"
+
+namespace Eval::NNUE::Layers {
+
+  // affine transformation layer
+  template <typename PreviousLayer, IndexType OutputDimensions>
+  class AffineTransform {
+   public:
+    // Input/output type
+    using InputType = typename PreviousLayer::OutputType;
+    using OutputType = std::int32_t;
+    static_assert(std::is_same<InputType, std::uint8_t>::value, "");
+
+    // number of input/output dimensions
+    static constexpr IndexType kInputDimensions =
+        PreviousLayer::kOutputDimensions;
+    static constexpr IndexType kOutputDimensions = OutputDimensions;
+    static constexpr IndexType kPaddedInputDimensions =
+        CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);
+
+    // Size of forward propagation buffer used in this layer
+    static constexpr std::size_t kSelfBufferSize =
+        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
+
+    // Size of the forward propagation buffer used from the input layer to this layer
+    static constexpr std::size_t kBufferSize =
+        PreviousLayer::kBufferSize + kSelfBufferSize;
+
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t GetHashValue() {
+      std::uint32_t hash_value = 0xCC03DAE4u;
+      hash_value += kOutputDimensions;
+      hash_value ^= PreviousLayer::GetHashValue() >> 1;
+      hash_value ^= PreviousLayer::GetHashValue() << 31;
+      return hash_value;
+    }
+
+    // A string that represents the structure from the input layer to this layer
+    static std::string GetStructureString() {
+      return "AffineTransform[" +
+          std::to_string(kOutputDimensions) + "<-" +
+          std::to_string(kInputDimensions) + "](" +
+          PreviousLayer::GetStructureString() + ")";
+    }
+
+    // read parameters
+    bool ReadParameters(std::istream& stream) {
+      if (!previous_layer_.ReadParameters(stream)) return false;
+      stream.read(reinterpret_cast<char*>(biases_),
+                  kOutputDimensions * sizeof(BiasType));
+      stream.read(reinterpret_cast<char*>(weights_),
+                  kOutputDimensions * kPaddedInputDimensions *
+                  sizeof(WeightType));
+      return !stream.fail();
+    }
+
+    // forward propagation
+    const OutputType* Propagate(
+        const TransformedFeatureType* transformed_features, char* buffer) const {
+      const auto input = previous_layer_.Propagate(
+          transformed_features, buffer + kSelfBufferSize);
+      const auto output = reinterpret_cast<OutputType*>(buffer);
+
+  #if defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const __m256i kOnes = _mm256_set1_epi16(1);
+      const auto input_vector = reinterpret_cast<const __m256i*>(input);
+
+  #elif defined(USE_SSSE3)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const __m128i kOnes = _mm_set1_epi16(1);
+      const auto input_vector = reinterpret_cast<const __m128i*>(input);
+
+  #elif defined(IS_ARM)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
+  #endif
+
+      for (IndexType i = 0; i < kOutputDimensions; ++i) {
+        const IndexType offset = i * kPaddedInputDimensions;
+
+  #if defined(USE_AVX2)
+        __m256i sum = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, biases_[i]);
+        const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m256i product = _mm256_maddubs_epi16(
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+            // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
+            //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
+            //       even though alignas is specified.
+            _mm256_loadu_si256
+  #else
+            _mm256_load_si256
+  #endif
+
+            (&input_vector[j]), _mm256_load_si256(&row[j]));
+          product = _mm256_madd_epi16(product, kOnes);
+          sum = _mm256_add_epi32(sum, product);
+        }
+        sum = _mm256_hadd_epi32(sum, sum);
+        sum = _mm256_hadd_epi32(sum, sum);
+        const __m128i lo = _mm256_extracti128_si256(sum, 0);
+        const __m128i hi = _mm256_extracti128_si256(sum, 1);
+        output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi);
+
+  #elif defined(USE_SSSE3)
+        __m128i sum = _mm_cvtsi32_si128(biases_[i]);
+        const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m128i product = _mm_maddubs_epi16(
+              _mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
+          product = _mm_madd_epi16(product, kOnes);
+          sum = _mm_add_epi32(sum, product);
+        }
+        sum = _mm_hadd_epi32(sum, sum);
+        sum = _mm_hadd_epi32(sum, sum);
+        output[i] = _mm_cvtsi128_si32(sum);
+
+  #elif defined(IS_ARM)
+        int32x4_t sum = {biases_[i]};
+        const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
+          product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
+          sum = vpadalq_s16(sum, product);
+        }
+        output[i] = sum[0] + sum[1] + sum[2] + sum[3];
+
+  #else
+        OutputType sum = biases_[i];
+        for (IndexType j = 0; j < kInputDimensions; ++j) {
+          sum += weights_[offset + j] * input[j];
+        }
+        output[i] = sum;
+  #endif
+
+      }
+      return output;
+    }
+
+   private:
+    // parameter type
+    using BiasType = OutputType;
+    using WeightType = std::int8_t;
+
+    // Make the learning class a friend
+    friend class Trainer<AffineTransform>;
+
+    // the layer immediately before this layer
+    PreviousLayer previous_layer_;
+
+    // parameter
+    alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
+    alignas(kCacheLineSize)
+        WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
+  };
+
+}  // namespace Eval::NNUE::Layers
+
+#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
--- a/src/nnue/layers/clipped_relu.h
+++ b/src/nnue/layers/clipped_relu.h
@ -0,0 +1,178 @@
+// Definition of layer ClippedReLU of NNUE evaluation function
+
+#ifndef NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
+#define NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
+
+#include "../nnue_common.h"
+
+namespace Eval::NNUE::Layers {
+
+  // Clipped ReLU
+  template <typename PreviousLayer>
+  class ClippedReLU {
+   public:
+    // Input/output type
+    using InputType = typename PreviousLayer::OutputType;
+    using OutputType = std::uint8_t;
+    static_assert(std::is_same<InputType, std::int32_t>::value, "");
+
+    // number of input/output dimensions
+    static constexpr IndexType kInputDimensions =
+        PreviousLayer::kOutputDimensions;
+    static constexpr IndexType kOutputDimensions = kInputDimensions;
+
+    // Size of forward propagation buffer used in this layer
+    static constexpr std::size_t kSelfBufferSize =
+        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
+
+    // Size of the forward propagation buffer used from the input layer to this layer
+    static constexpr std::size_t kBufferSize =
+        PreviousLayer::kBufferSize + kSelfBufferSize;
+
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t GetHashValue() {
+      std::uint32_t hash_value = 0x538D24C7u;
+      hash_value += PreviousLayer::GetHashValue();
+      return hash_value;
+    }
+
+    // A string that represents the structure from the input layer to this layer
+    static std::string GetStructureString() {
+      return "ClippedReLU[" +
+          std::to_string(kOutputDimensions) + "](" +
+          PreviousLayer::GetStructureString() + ")";
+    }
+
+    // read parameters
+    bool ReadParameters(std::istream& stream) {
+      return previous_layer_.ReadParameters(stream);
+    }
+
+    // forward propagation
+    const OutputType* Propagate(
+        const TransformedFeatureType* transformed_features, char* buffer) const {
+      const auto input = previous_layer_.Propagate(
+          transformed_features, buffer + kSelfBufferSize);
+      const auto output = reinterpret_cast<OutputType*>(buffer);
+
+  #if defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
+      const __m256i kZero = _mm256_setzero_si256();
+      const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+      const auto in = reinterpret_cast<const __m256i*>(input);
+      const auto out = reinterpret_cast<__m256i*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
+          //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
+          //       even though alignas is specified.
+          _mm256_loadu_si256
+  #else
+          _mm256_load_si256
+  #endif
+
+          (&in[i * 4 + 0]),
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          _mm256_loadu_si256
+  #else
+          _mm256_load_si256
+  #endif
+
+          (&in[i * 4 + 1])), kWeightScaleBits);
+        const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          _mm256_loadu_si256
+  #else
+          _mm256_load_si256
+  #endif
+
+          (&in[i * 4 + 2]),
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          _mm256_loadu_si256
+  #else
+          _mm256_load_si256
+  #endif
+
+          (&in[i * 4 + 3])), kWeightScaleBits);
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+        _mm256_storeu_si256
+  #else
+        _mm256_store_si256
+  #endif
+
+          (&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
+            _mm256_packs_epi16(words0, words1), kZero), kOffsets));
+      }
+      constexpr IndexType kStart = kNumChunks * kSimdWidth;
+
+  #elif defined(USE_SSSE3)
+      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
+      const __m128i kZero = _mm_setzero_si128();
+
+  #ifndef USE_SSE41
+      const __m128i k0x80s = _mm_set1_epi8(-128);
+  #endif
+
+      const auto in = reinterpret_cast<const __m128i*>(input);
+      const auto out = reinterpret_cast<__m128i*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
+            _mm_load_si128(&in[i * 4 + 0]),
+            _mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
+        const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
+            _mm_load_si128(&in[i * 4 + 2]),
+            _mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
+        const __m128i packedbytes = _mm_packs_epi16(words0, words1);
+        _mm_store_si128(&out[i],
+
+  #ifdef USE_SSE41
+          _mm_max_epi8(packedbytes, kZero)
+  #else
+          _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
+  #endif
+
+        );
+      }
+      constexpr IndexType kStart = kNumChunks * kSimdWidth;
+
+  #elif defined(IS_ARM)
+      constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
+      const int8x8_t kZero = {0};
+      const auto in = reinterpret_cast<const int32x4_t*>(input);
+      const auto out = reinterpret_cast<int8x8_t*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        int16x8_t shifted;
+        const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
+        pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
+        pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
+        out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
+      }
+      constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
+  #else
+      constexpr IndexType kStart = 0;
+  #endif
+
+      for (IndexType i = kStart; i < kInputDimensions; ++i) {
+        output[i] = static_cast<OutputType>(
+            std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
+      }
+      return output;
+    }
+
+   private:
+    // Make the learning class a friend
+    friend class Trainer<ClippedReLU>;
+
+    // the layer immediately before this layer
+    PreviousLayer previous_layer_;
+  };
+
+}  // namespace Eval::NNUE::Layers
+
+#endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
--- a/src/eval/nnue/layers/input_slice.h
+++ b/src/eval/nnue/layers/input_slice.h
@ -1,17 +1,11 @@
 // NNUE evaluation function layer InputSlice definition

-#ifndef _NNUE_LAYERS_INPUT_SLICE_H_
-#define _NNUE_LAYERS_INPUT_SLICE_H_
-
-#if defined(EVAL_NNUE)
+#ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
+#define NNUE_LAYERS_INPUT_SLICE_H_INCLUDED

 #include "../nnue_common.h"

-namespace Eval {
-
-namespace NNUE {
-
-namespace Layers {
+namespace Eval::NNUE::Layers {

 // input layer
 template <IndexType OutputDimensions, IndexType Offset = 0>
@ -48,11 +42,6 @@ class InputSlice {
    return true;
  }

-  // write parameters
-  bool WriteParameters(std::ostream& /*stream*/) const {
-    return true;
-  }
-
  // forward propagation
  const OutputType* Propagate(
      const TransformedFeatureType* transformed_features,
@ -65,10 +54,4 @@ class InputSlice {

 }  // namespace Layers

-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
+#endif // #ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
--- a/src/nnue/layers/sum.h
+++ b/src/nnue/layers/sum.h
@ -0,0 +1,141 @@
+// Definition of layer Sum of NNUE evaluation function
+
+#ifndef NNUE_LAYERS_SUM_H_INCLUDED
+#define NNUE_LAYERS_SUM_H_INCLUDED
+
+#include "../nnue_common.h"
+
+namespace Eval::NNUE::Layers {
+
+  // Layer that sums the output of multiple layers
+  template <typename FirstPreviousLayer, typename... RemainingPreviousLayers>
+  class Sum : public Sum<RemainingPreviousLayers...> {
+
+   private:
+    using Head = FirstPreviousLayer;
+    using Tail = Sum<RemainingPreviousLayers...>;
+
+   public:
+    // Input/output type
+    using InputType = typename Head::OutputType;
+    using OutputType = InputType;
+    static_assert(std::is_same<InputType, typename Tail::InputType>::value, "");
+
+    // number of input/output dimensions
+    static constexpr IndexType kInputDimensions = Head::kOutputDimensions;
+    static constexpr IndexType kOutputDimensions = kInputDimensions;
+    static_assert(kInputDimensions == Tail::kInputDimensions ,"");
+
+    // Size of forward propagation buffer used in this layer
+    static constexpr std::size_t kSelfBufferSize =
+        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
+
+    // Size of the forward propagation buffer used from the input layer to this layer
+    static constexpr std::size_t kBufferSize =
+        std::max(Head::kBufferSize + kSelfBufferSize, Tail::kBufferSize);
+
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t GetHashValue() {
+      std::uint32_t hash_value = 0xBCE400B4u;
+      hash_value ^= Head::GetHashValue() >> 1;
+      hash_value ^= Head::GetHashValue() << 31;
+      hash_value ^= Tail::GetHashValue() >> 2;
+      hash_value ^= Tail::GetHashValue() << 30;
+      return hash_value;
+    }
+
+    // A string that represents the structure from the input layer to this layer
+    static std::string GetStructureString() {
+      return "Sum[" +
+          std::to_string(kOutputDimensions) + "](" + GetSummandsString() + ")";
+    }
+
+    // read parameters
+    bool ReadParameters(std::istream& stream) {
+      if (!Tail::ReadParameters(stream)) return false;
+      return previous_layer_.ReadParameters(stream);
+    }
+
+    // forward propagation
+    const OutputType* Propagate(
+        const TransformedFeatureType* transformed_features, char* buffer) const {
+      Tail::Propagate(transformed_features, buffer);
+      const auto head_output = previous_layer_.Propagate(
+          transformed_features, buffer + kSelfBufferSize);
+      const auto output = reinterpret_cast<OutputType*>(buffer);
+      for (IndexType i = 0; i <kOutputDimensions; ++i) {
+        output[i] += head_output[i];
+      }
+      return output;
+    }
+
+   protected:
+    // A string that represents the list of layers to be summed
+    static std::string GetSummandsString() {
+      return Head::GetStructureString() + "," + Tail::GetSummandsString();
+    }
+
+    // Make the learning class a friend
+    friend class Trainer<Sum>;
+
+    // the layer immediately before this layer
+    FirstPreviousLayer previous_layer_;
+  };
+
+  // Layer that sums the output of multiple layers (when there is one template argument)
+  template <typename PreviousLayer>
+  class Sum<PreviousLayer> {
+   public:
+    // Input/output type
+    using InputType = typename PreviousLayer::OutputType;
+    using OutputType = InputType;
+
+    // number of input/output dimensions
+    static constexpr IndexType kInputDimensions =
+        PreviousLayer::kOutputDimensions;
+    static constexpr IndexType kOutputDimensions = kInputDimensions;
+
+    // Size of the forward propagation buffer used from the input layer to this layer
+    static constexpr std::size_t kBufferSize = PreviousLayer::kBufferSize;
+
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t GetHashValue() {
+      std::uint32_t hash_value = 0xBCE400B4u;
+      hash_value ^= PreviousLayer::GetHashValue() >> 1;
+      hash_value ^= PreviousLayer::GetHashValue() << 31;
+      return hash_value;
+    }
+
+    // A string that represents the structure from the input layer to this layer
+    static std::string GetStructureString() {
+      return "Sum[" +
+          std::to_string(kOutputDimensions) + "](" + GetSummandsString() + ")";
+    }
+
+    // read parameters
+    bool ReadParameters(std::istream& stream) {
+      return previous_layer_.ReadParameters(stream);
+    }
+
+    // forward propagation
+    const OutputType* Propagate(
+        const TransformedFeatureType* transformed_features, char* buffer) const {
+      return previous_layer_.Propagate(transformed_features, buffer);
+    }
+
+   protected:
+    // A string that represents the list of layers to be summed
+    static std::string GetSummandsString() {
+      return PreviousLayer::GetStructureString();
+    }
+
+    // Make the learning class a friend
+    friend class Trainer<Sum>;
+
+    // the layer immediately before this layer
+    PreviousLayer previous_layer_;
+  };
+
+}  // namespace Eval::NNUE::Layers
+
+#endif // #ifndef NNUE_LAYERS_SUM_H_INCLUDED
--- a/src/nnue/nnue_accumulator.h
+++ b/src/nnue/nnue_accumulator.h
@ -0,0 +1,22 @@
+// Class for difference calculation of NNUE evaluation function
+
+#ifndef NNUE_ACCUMULATOR_H_INCLUDED
+#define NNUE_ACCUMULATOR_H_INCLUDED
+
+#include "nnue_architecture.h"
+
+namespace Eval::NNUE {
+
+  // Class that holds the result of affine transformation of input features
+  // Keep the evaluation value that is the final output together
+  struct alignas(32) Accumulator {
+    std::int16_t
+        accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
+    Value score = VALUE_ZERO;
+    bool computed_accumulation = false;
+    bool computed_score = false;
+  };
+
+}  // namespace Eval::NNUE
+
+#endif // NNUE_ACCUMULATOR_H_INCLUDED
--- a/src/nnue/nnue_architecture.h
+++ b/src/nnue/nnue_architecture.h
@ -0,0 +1,21 @@
+// Input features and network structure used in NNUE evaluation function
+
+#ifndef NNUE_ARCHITECTURE_H_INCLUDED
+#define NNUE_ARCHITECTURE_H_INCLUDED
+
+// include a header that defines the input features and network structure
+#include "architectures/halfkp_256x2-32-32.h"
+//#include "architectures/halfkp_384x2-32-32.h"
+
+namespace Eval::NNUE {
+
+  static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
+  static_assert(Network::kOutputDimensions == 1, "");
+  static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
+
+  // List of timings to perform all calculations instead of difference calculation
+  constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_ARCHITECTURE_H_INCLUDED
--- a/src/nnue/nnue_common.h
+++ b/src/nnue/nnue_common.h
@ -0,0 +1,62 @@
+// Constants used in NNUE evaluation function
+
+#ifndef NNUE_COMMON_H_INCLUDED
+#define NNUE_COMMON_H_INCLUDED
+
+#if defined(USE_AVX2)
+#include <immintrin.h>
+
+#elif defined(USE_SSE41)
+#include <smmintrin.h>
+
+#elif defined(USE_SSSE3)
+#include <tmmintrin.h>
+
+#elif defined(USE_SSE2)
+#include <emmintrin.h>
+#endif
+
+namespace Eval::NNUE {
+
+  // A constant that represents the version of the evaluation function file
+  constexpr std::uint32_t kVersion = 0x7AF32F16u;
+
+  // Constant used in evaluation value calculation
+  constexpr int FV_SCALE = 16;
+  constexpr int kWeightScaleBits = 6;
+
+  // Size of cache line (in bytes)
+  constexpr std::size_t kCacheLineSize = 64;
+
+  // SIMD width (in bytes)
+  #if defined(USE_AVX2)
+  constexpr std::size_t kSimdWidth = 32;
+
+  #elif defined(USE_SSE2)
+  constexpr std::size_t kSimdWidth = 16;
+
+  #elif defined(IS_ARM)
+  constexpr std::size_t kSimdWidth = 16;
+  #endif
+
+  constexpr std::size_t kMaxSimdWidth = 32;
+
+  // Type of input feature after conversion
+  using TransformedFeatureType = std::uint8_t;
+
+  // index type
+  using IndexType = std::uint32_t;
+
+  // Forward declaration of learning class template
+  template <typename Layer>
+  class Trainer;
+
+  // find the smallest multiple of n and above
+  template <typename IntType>
+  constexpr IntType CeilToMultiple(IntType n, IntType base) {
+    return (n + base - 1) / base * base;
+  }
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_COMMON_H_INCLUDED
--- a/src/nnue/nnue_feature_transformer.h
+++ b/src/nnue/nnue_feature_transformer.h
@ -0,0 +1,378 @@
+// A class that converts the input features of the NNUE evaluation function
+
+#ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
+#define NNUE_FEATURE_TRANSFORMER_H_INCLUDED
+
+#include "nnue_common.h"
+#include "nnue_architecture.h"
+#include "features/index_list.h"
+
+#include <cstring> // std::memset()
+
+namespace Eval::NNUE {
+
+  // Input feature converter
+  class FeatureTransformer {
+
+   private:
+    // number of output dimensions for one side
+    static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
+
+   public:
+    // output type
+    using OutputType = TransformedFeatureType;
+
+    // number of input/output dimensions
+    static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
+    static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
+
+    // size of forward propagation buffer
+    static constexpr std::size_t kBufferSize =
+        kOutputDimensions * sizeof(OutputType);
+
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t GetHashValue() {
+      return RawFeatures::kHashValue ^ kOutputDimensions;
+    }
+
+    // a string representing the structure
+    static std::string GetStructureString() {
+      return RawFeatures::GetName() + "[" +
+          std::to_string(kInputDimensions) + "->" +
+          std::to_string(kHalfDimensions) + "x2]";
+    }
+
+    // read parameters
+    bool ReadParameters(std::istream& stream) {
+      stream.read(reinterpret_cast<char*>(biases_),
+                  kHalfDimensions * sizeof(BiasType));
+      stream.read(reinterpret_cast<char*>(weights_),
+                  kHalfDimensions * kInputDimensions * sizeof(WeightType));
+      return !stream.fail();
+    }
+
+    // proceed with the difference calculation if possible
+    bool UpdateAccumulatorIfPossible(const Position& pos) const {
+      const auto now = pos.state();
+      if (now->accumulator.computed_accumulation) {
+        return true;
+      }
+      const auto prev = now->previous;
+      if (prev && prev->accumulator.computed_accumulation) {
+        UpdateAccumulator(pos);
+        return true;
+      }
+      return false;
+    }
+
+    // convert input features
+    void Transform(const Position& pos, OutputType* output, bool refresh) const {
+      if (refresh || !UpdateAccumulatorIfPossible(pos)) {
+        RefreshAccumulator(pos);
+      }
+      const auto& accumulation = pos.state()->accumulator.accumulation;
+
+  #if defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
+      constexpr int kControl = 0b11011000;
+      const __m256i kZero = _mm256_setzero_si256();
+
+  #elif defined(USE_SSSE3)
+      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
+      const __m128i kZero = _mm_setzero_si128();
+
+  #ifndef USE_SSE41
+      const __m128i k0x80s = _mm_set1_epi8(-128);
+  #endif
+
+  #elif defined(IS_ARM)
+      constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+      const int8x8_t kZero = {0};
+  #endif
+
+      const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
+      for (IndexType p = 0; p < 2; ++p) {
+        const IndexType offset = kHalfDimensions * p;
+
+  #if defined(USE_AVX2)
+        auto out = reinterpret_cast<__m256i*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m256i sum0 =
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+            // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
+            //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
+            //       even though alignas is specified.
+            _mm256_loadu_si256
+  #else
+            _mm256_load_si256
+  #endif
+
+            (&reinterpret_cast<const __m256i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 0]);
+          __m256i sum1 =
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+            _mm256_loadu_si256
+  #else
+            _mm256_load_si256
+  #endif
+
+            (&reinterpret_cast<const __m256i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 1]);
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 0]);
+            sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 1]);
+          }
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          _mm256_storeu_si256
+  #else
+          _mm256_store_si256
+  #endif
+
+          (&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
+              _mm256_packs_epi16(sum0, sum1), kZero), kControl));
+        }
+
+  #elif defined(USE_SSSE3)
+        auto out = reinterpret_cast<__m128i*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 0]);
+          __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 1]);
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 0]);
+            sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 1]);
+          }
+      const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
+
+          _mm_store_si128(&out[j],
+
+  #ifdef USE_SSE41
+            _mm_max_epi8(packedbytes, kZero)
+  #else
+            _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
+  #endif
+
+          );
+        }
+
+  #elif defined(IS_ARM)
+        const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          int16x8_t sum = reinterpret_cast<const int16x8_t*>(
+              accumulation[perspectives[p]][0])[j];
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
+                accumulation[perspectives[p]][i])[j]);
+          }
+          out[j] = vmax_s8(vqmovn_s16(sum), kZero);
+        }
+
+  #else
+        for (IndexType j = 0; j < kHalfDimensions; ++j) {
+          BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum += accumulation[static_cast<int>(perspectives[p])][i][j];
+          }
+          output[offset + j] = static_cast<OutputType>(
+              std::max<int>(0, std::min<int>(127, sum)));
+        }
+  #endif
+
+      }
+    }
+
+   private:
+    // Calculate cumulative value without using difference calculation
+    void RefreshAccumulator(const Position& pos) const {
+      auto& accumulator = pos.state()->accumulator;
+      for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
+        Features::IndexList active_indices[2];
+        RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
+                                         active_indices);
+        for (Color perspective : { WHITE, BLACK }) {
+          if (i == 0) {
+            std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                        kHalfDimensions * sizeof(BiasType));
+          } else {
+            std::memset(accumulator.accumulation[perspective][i], 0,
+                        kHalfDimensions * sizeof(BiasType));
+          }
+          for (const auto index : active_indices[perspective]) {
+            const IndexType offset = kHalfDimensions * index;
+
+  #if defined(USE_AVX2)
+            auto accumulation = reinterpret_cast<__m256i*>(
+                &accumulator.accumulation[perspective][i][0]);
+            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
+            constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+              _mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j]));
+  #else
+              accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
+  #endif
+            }
+
+  #elif defined(USE_SSE2)
+            auto accumulation = reinterpret_cast<__m128i*>(
+                &accumulator.accumulation[perspective][i][0]);
+            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
+            constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
+            }
+
+  #elif defined(IS_ARM)
+            auto accumulation = reinterpret_cast<int16x8_t*>(
+                &accumulator.accumulation[perspective][i][0]);
+            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
+            constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = vaddq_s16(accumulation[j], column[j]);
+            }
+
+  #else
+            for (IndexType j = 0; j < kHalfDimensions; ++j) {
+              accumulator.accumulation[perspective][i][j] += weights_[offset + j];
+            }
+  #endif
+
+          }
+        }
+      }
+
+      accumulator.computed_accumulation = true;
+      accumulator.computed_score = false;
+    }
+
+    // Calculate cumulative value using difference calculation
+    void UpdateAccumulator(const Position& pos) const {
+      const auto prev_accumulator = pos.state()->previous->accumulator;
+      auto& accumulator = pos.state()->accumulator;
+      for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
+        Features::IndexList removed_indices[2], added_indices[2];
+        bool reset[2];
+        RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
+                                          removed_indices, added_indices, reset);
+        for (Color perspective : { WHITE, BLACK }) {
+
+  #if defined(USE_AVX2)
+          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+          auto accumulation = reinterpret_cast<__m256i*>(
+              &accumulator.accumulation[perspective][i][0]);
+
+  #elif defined(USE_SSE2)
+          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+          auto accumulation = reinterpret_cast<__m128i*>(
+              &accumulator.accumulation[perspective][i][0]);
+
+  #elif defined(IS_ARM)
+          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+          auto accumulation = reinterpret_cast<int16x8_t*>(
+              &accumulator.accumulation[perspective][i][0]);
+  #endif
+
+          if (reset[perspective]) {
+            if (i == 0) {
+              std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                          kHalfDimensions * sizeof(BiasType));
+            } else {
+              std::memset(accumulator.accumulation[perspective][i], 0,
+                          kHalfDimensions * sizeof(BiasType));
+            }
+          } else {// Difference calculation for the feature amount changed from 1 to 0
+            std::memcpy(accumulator.accumulation[perspective][i],
+                        prev_accumulator.accumulation[perspective][i],
+                        kHalfDimensions * sizeof(BiasType));
+            for (const auto index : removed_indices[perspective]) {
+              const IndexType offset = kHalfDimensions * index;
+
+  #if defined(USE_AVX2)
+              auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
+              for (IndexType j = 0; j < kNumChunks; ++j) {
+                accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
+              }
+
+  #elif defined(USE_SSE2)
+              auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
+              for (IndexType j = 0; j < kNumChunks; ++j) {
+                accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
+              }
+
+  #elif defined(IS_ARM)
+              auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
+              for (IndexType j = 0; j < kNumChunks; ++j) {
+                accumulation[j] = vsubq_s16(accumulation[j], column[j]);
+              }
+
+  #else
+              for (IndexType j = 0; j < kHalfDimensions; ++j) {
+                accumulator.accumulation[perspective][i][j] -=
+                    weights_[offset + j];
+              }
+  #endif
+
+            }
+          }
+          {// Difference calculation for features that changed from 0 to 1
+            for (const auto index : added_indices[perspective]) {
+              const IndexType offset = kHalfDimensions * index;
+
+  #if defined(USE_AVX2)
+              auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
+              for (IndexType j = 0; j < kNumChunks; ++j) {
+                accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
+              }
+
+  #elif defined(USE_SSE2)
+              auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
+              for (IndexType j = 0; j < kNumChunks; ++j) {
+                accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
+              }
+
+  #elif defined(IS_ARM)
+              auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
+              for (IndexType j = 0; j < kNumChunks; ++j) {
+                accumulation[j] = vaddq_s16(accumulation[j], column[j]);
+              }
+
+  #else
+              for (IndexType j = 0; j < kHalfDimensions; ++j) {
+                accumulator.accumulation[perspective][i][j] +=
+                    weights_[offset + j];
+              }
+  #endif
+
+            }
+          }
+        }
+      }
+
+      accumulator.computed_accumulation = true;
+      accumulator.computed_score = false;
+    }
+
+    // parameter type
+    using BiasType = std::int16_t;
+    using WeightType = std::int16_t;
+
+    // Make the learning class a friend
+    friend class Trainer<FeatureTransformer>;
+
+    // parameter
+    alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
+    alignas(kCacheLineSize)
+        WeightType weights_[kHalfDimensions * kInputDimensions];
+  };
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
--- a/src/position.cpp
+++ b/src/position.cpp
@ -80,7 +80,7 @@ std::ostream& operator<<(std::ostream& os, const Position& pos) {
  {
      StateInfo st;
      Position p;
-      p.set(pos.fen(), pos.is_chess960(), &st, pos.this_thread());
+      p.set(pos.fen(), pos.is_chess960(), pos.use_nnue(), &st, pos.this_thread());
      Tablebases::ProbeState s1, s2;
      Tablebases::WDLScore wdl = Tablebases::probe_wdl(p, &s1);
      int dtz = Tablebases::probe_dtz(p, &s2);
@ -154,7 +154,7 @@ void Position::init() {
 /// This function is not very robust - make sure that input FENs are correct,
 /// this is assumed to be the responsibility of the GUI.

-Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Thread* th) {
+Position& Position::set(const string& fenStr, bool isChess960, bool useNnue, StateInfo* si, Thread* th) {
 /*
   A FEN string defines a particular position using only the ASCII character set.

@ -200,14 +200,8 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
  std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE);
  st = si;

-#if defined(EVAL_NNUE)
-  // clear evalList. It is cleared when memset is cleared to zero above...
-  evalList.clear();
-
-  // In updating the PieceList, we have to set which piece is where,
-  // A counter of how much each piece has been used
-  PieceNumber next_piece_number = PIECE_NUMBER_ZERO;
-#endif  // defined(EVAL_NNUE)
+  // Each piece on board gets a unique ID used to track the piece later
+  PieceId piece_id, next_piece_id = PIECE_ID_ZERO;

  ss >> std::noskipws;

@ -225,13 +219,15 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
          auto pc = Piece(idx);
          put_piece(pc, sq);

-#if defined(EVAL_NNUE)
-          PieceNumber piece_no =
-            (idx == W_KING) ?PIECE_NUMBER_WKING : //
-            (idx == B_KING) ?PIECE_NUMBER_BKING : // back ball
-            next_piece_number++; // otherwise
-          evalList.put_piece(piece_no, sq, pc); // Place the pc piece in the sq box
-#endif  // defined(EVAL_NNUE)
+          if (useNnue)
+          {
+            // Kings get a fixed ID, other pieces get ID in order of placement
+            piece_id =
+              (idx == W_KING) ? PIECE_ID_WKING :
+              (idx == B_KING) ? PIECE_ID_BKING :
+              next_piece_id++;
+            evalList.put_piece(piece_id, sq, pc);
+          }

          ++sq;
      }
@ -299,13 +295,11 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
  gamePly = std::max(2 * (gamePly - 1), 0) + (sideToMove == BLACK);

  chess960 = isChess960;
+  nnue = useNnue;
  thisThread = th;
  set_state(st);

  assert(pos_is_ok());
-#if defined(EVAL_NNUE)
-  assert(evalList.is_valid(*this));
-#endif  // defined(EVAL_NNUE)

  return *this;
 }
@ -410,7 +404,7 @@ Position& Position::set(const string& code, Color c, StateInfo* si) {
  string fenStr = "8/" + sides[0] + char(8 - sides[0].length() + '0') + "/8/8/8/8/"
                       + sides[1] + char(8 - sides[1].length() + '0') + "/8 w - - 0 10";

-  return set(fenStr, false, si, nullptr);
+  return set(fenStr, false, use_nnue(), si, nullptr);
 }


@ -727,10 +721,13 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
  ++st->rule50;
  ++st->pliesFromNull;

-#if defined(EVAL_NNUE)
+  // Used by NNUE
  st->accumulator.computed_accumulation = false;
  st->accumulator.computed_score = false;
-#endif  // defined(EVAL_NNUE)
+  PieceId dp0 = PIECE_ID_NONE;
+  PieceId dp1 = PIECE_ID_NONE;
+  auto& dp = st->dirtyPiece;
+  dp.dirty_num = 1;

  Color us = sideToMove;
  Color them = ~us;
@ -739,20 +736,10 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
  Piece pc = piece_on(from);
  Piece captured = type_of(m) == ENPASSANT ? make_piece(them, PAWN) : piece_on(to);

-#if defined(EVAL_NNUE)
-  PieceNumber piece_no0 = PIECE_NUMBER_NB;
-  PieceNumber piece_no1 = PIECE_NUMBER_NB;
-#endif  // defined(EVAL_NNUE)
-
  assert(color_of(pc) == us);
  assert(captured == NO_PIECE || color_of(captured) == (type_of(m) != CASTLING ? them : us));
  assert(type_of(captured) != KING);

-#if defined(EVAL_NNUE)
-  auto& dp = st->dirtyPiece;
-  dp.dirty_num = 1;
-#endif  // defined(EVAL_NNUE)
-
  if (type_of(m) == CASTLING)
  {
      assert(pc == make_piece(us, KING));
@ -782,37 +769,23 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
              assert(relative_rank(us, to) == RANK_6);
              assert(piece_on(to) == NO_PIECE);
              assert(piece_on(capsq) == make_piece(them, PAWN));
-
-#if defined(EVAL_NNUE)
-              piece_no1 = piece_no_of(capsq);
-#endif  // defined(EVAL_NNUE)
-
-              //board[capsq] = NO_PIECE; // Not done by remove_piece()
-#if defined(EVAL_NNUE)
-              evalList.piece_no_list_board[capsq] = PIECE_NUMBER_NB;
-#endif  // defined(EVAL_NNUE)
-          }
-          else {
-#if defined(EVAL_NNUE)
-            piece_no1 = piece_no_of(capsq);
-#endif  // defined(EVAL_NNUE)
          }

          st->pawnKey ^= Zobrist::psq[captured][capsq];
      }
-      else {
+      else
          st->nonPawnMaterial[them] -= PieceValue[MG][captured];

-#if defined(EVAL_NNUE)
-          piece_no1 = piece_no_of(capsq);
-#endif  // defined(EVAL_NNUE)
-      }
+      if (use_nnue())
+          dp1 = piece_id_on(capsq);

      // Update board and piece lists
      remove_piece(capsq);

      if (type_of(m) == ENPASSANT)
+      {
          board[capsq] = NO_PIECE;
+      }

      // Update material hash key and prefetch access to materialTable
      k ^= Zobrist::psq[captured][capsq];
@ -822,20 +795,17 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
      // Reset rule 50 counter
      st->rule50 = 0;

-#if defined(EVAL_NNUE)
-      dp.dirty_num = 2; // 2 pieces moved
-
-      dp.pieceNo[1] = piece_no1;
-      dp.changed_piece[1].old_piece = evalList.bona_piece(piece_no1);
-      // Do not use Eval::EvalList::put_piece() because the piece is removed
-      // from the game, and the corresponding elements of the piece lists
-      // needs to be Eval::BONA_PIECE_ZERO.
-      evalList.set_piece_on_board(piece_no1, Eval::BONA_PIECE_ZERO, Eval::BONA_PIECE_ZERO, capsq);
-      // Set PIECE_NUMBER_NB to piece_no_of_board[capsq] directly because it
-      // will not be overritten to pc if the move type is enpassant.
-      evalList.piece_no_list_board[capsq] = PIECE_NUMBER_NB;
-      dp.changed_piece[1].new_piece = evalList.bona_piece(piece_no1);
-#endif  // defined(EVAL_NNUE)
+      if (use_nnue())
+      {
+          dp.dirty_num = 2; // 2 pieces moved
+          dp.pieceId[1] = dp1;
+          dp.old_piece[1] = evalList.piece_with_id(dp1);
+          // Do not use EvalList::put_piece() because the piece is removed
+          // from the game, and the corresponding elements of the piece lists
+          // needs to be PS_NONE.
+          evalList.put_piece(dp1, capsq, NO_PIECE);
+          dp.new_piece[1] = evalList.piece_with_id(dp1);
+      }
  }

  // Update hash key
@ -858,19 +828,16 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {

  // Move the piece. The tricky Chess960 castling is handled earlier
  if (type_of(m) != CASTLING) {
-#if defined(EVAL_NNUE)
-    piece_no0 = piece_no_of(from);
-#endif  // defined(EVAL_NNUE)
-
    move_piece(from, to);

-#if defined(EVAL_NNUE)
-    dp.pieceNo[0] = piece_no0;
-    dp.changed_piece[0].old_piece = evalList.bona_piece(piece_no0);
-    evalList.piece_no_list_board[from] = PIECE_NUMBER_NB;
-    evalList.put_piece(piece_no0, to, pc);
-    dp.changed_piece[0].new_piece = evalList.bona_piece(piece_no0);
-#endif  // defined(EVAL_NNUE)
+    if (use_nnue())
+    {
+        dp0 = piece_id_on(from);
+        dp.pieceId[0] = dp0;
+        dp.old_piece[0] = evalList.piece_with_id(dp0);
+        evalList.put_piece(dp0, to, pc);
+        dp.new_piece[0] = evalList.piece_with_id(dp0);
+    }
  }

  // If the moving piece is a pawn do some special extra work
@ -894,14 +861,12 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
          remove_piece(to);
          put_piece(promotion, to);

-#if defined(EVAL_NNUE)
-          piece_no0 = piece_no_of(to);
-          //dp.pieceNo[0] = piece_no0;
-          //dp.changed_piece[0].old_piece = evalList.bona_piece(piece_no0);
-          assert(evalList.piece_no_list_board[from] == PIECE_NUMBER_NB);
-          evalList.put_piece(piece_no0, to, promotion);
-          dp.changed_piece[0].new_piece = evalList.bona_piece(piece_no0);
-#endif  // defined(EVAL_NNUE)
+          if (use_nnue())
+          {
+              dp0 = piece_id_on(to);
+              evalList.put_piece(dp0, to, promotion);
+              dp.new_piece[0] = evalList.piece_with_id(dp0);
+          }

          // Update hash keys
          k ^= Zobrist::psq[pc][to] ^ Zobrist::psq[promotion][to];
@ -953,12 +918,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
      }
  }

-  //std::cout << *this << std::endl;
-
  assert(pos_is_ok());
-#if defined(EVAL_NNUE)
-  assert(evalList.is_valid(*this));
-#endif  // defined(EVAL_NNUE)
 }


@ -988,11 +948,6 @@ void Position::undo_move(Move m) {
      remove_piece(to);
      pc = make_piece(us, PAWN);
      put_piece(pc, to);
-
-#if defined(EVAL_NNUE)
-      PieceNumber piece_no0 = st->dirtyPiece.pieceNo[0];
-      evalList.put_piece(piece_no0, to, pc);
-#endif  // defined(EVAL_NNUE)
  }

  if (type_of(m) == CASTLING)
@ -1005,11 +960,11 @@ void Position::undo_move(Move m) {
      
      move_piece(to, from); // Put the piece back at the source square

-#if defined(EVAL_NNUE)
-      PieceNumber piece_no0 = st->dirtyPiece.pieceNo[0];
-      evalList.put_piece(piece_no0, from, pc);
-      evalList.piece_no_list_board[to] = PIECE_NUMBER_NB;
-#endif  // defined(EVAL_NNUE)
+      if (use_nnue())
+      {
+          PieceId dp0 = st->dirtyPiece.pieceId[0];
+          evalList.put_piece(dp0, from, pc);
+      }

      if (st->capturedPiece)
      {
@ -1028,12 +983,13 @@ void Position::undo_move(Move m) {

          put_piece(st->capturedPiece, capsq); // Restore the captured piece

-#if defined(EVAL_NNUE)
-          PieceNumber piece_no1 = st->dirtyPiece.pieceNo[1];
-          assert(evalList.bona_piece(piece_no1).fw == Eval::BONA_PIECE_ZERO);
-          assert(evalList.bona_piece(piece_no1).fb == Eval::BONA_PIECE_ZERO);
-          evalList.put_piece(piece_no1, capsq, st->capturedPiece);
-#endif  // defined(EVAL_NNUE)
+          if (use_nnue())
+          {
+              PieceId dp1 = st->dirtyPiece.pieceId[1];
+              assert(evalList.piece_with_id(dp1).fw == PS_NONE);
+              assert(evalList.piece_with_id(dp1).fb == PS_NONE);
+              evalList.put_piece(dp1, capsq, st->capturedPiece);
+          }
      }
  }

@ -1042,9 +998,6 @@ void Position::undo_move(Move m) {
  --gamePly;

  assert(pos_is_ok());
-#if defined(EVAL_NNUE)
-  assert(evalList.is_valid(*this));
-#endif  // defined(EVAL_NNUE)
 }


@ -1052,32 +1005,12 @@ void Position::undo_move(Move m) {
 /// is a bit tricky in Chess960 where from/to squares can overlap.
 template<bool Do>
 void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto) {
-#if defined(EVAL_NNUE)
-  auto& dp = st->dirtyPiece;
-   // Record the moved pieces in StateInfo for difference calculation.
-   dp.dirty_num = 2; // 2 pieces moved
-
-  PieceNumber piece_no0;
-  PieceNumber piece_no1;
-
-  if (Do) {
-    piece_no0 = piece_no_of(from);
-    piece_no1 = piece_no_of(to);
-  }
-#endif  // defined(EVAL_NNUE)

  bool kingSide = to > from;
  rfrom = to; // Castling is encoded as "king captures friendly rook"
  rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1);
  to = relative_square(us, kingSide ? SQ_G1 : SQ_C1);

-#if defined(EVAL_NNUE)
-  if (!Do) {
-    piece_no0 = piece_no_of(to);
-    piece_no1 = piece_no_of(rto);
-  }
-#endif  // defined(EVAL_NNUE)
-
  // Remove both pieces first since squares could overlap in Chess960
  remove_piece(Do ? from : to);
  remove_piece(Do ? rfrom : rto);
@ -1085,27 +1018,31 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ
  put_piece(make_piece(us, KING), Do ? to : from);
  put_piece(make_piece(us, ROOK), Do ? rto : rfrom);

-#if defined(EVAL_NNUE)
-  if (Do) {
-    dp.pieceNo[0] = piece_no0;
-    dp.changed_piece[0].old_piece = evalList.bona_piece(piece_no0);
-    evalList.piece_no_list_board[from] = PIECE_NUMBER_NB;
-    evalList.put_piece(piece_no0, to, make_piece(us, KING));
-    dp.changed_piece[0].new_piece = evalList.bona_piece(piece_no0);
+  if (use_nnue())
+  {
+    PieceId dp0, dp1;
+    auto& dp = st->dirtyPiece;
+    dp.dirty_num = 2; // 2 pieces moved

-    dp.pieceNo[1] = piece_no1;
-    dp.changed_piece[1].old_piece = evalList.bona_piece(piece_no1);
-    evalList.piece_no_list_board[rfrom] = PIECE_NUMBER_NB;
-    evalList.put_piece(piece_no1, rto, make_piece(us, ROOK));
-    dp.changed_piece[1].new_piece = evalList.bona_piece(piece_no1);
+    if (Do) {
+      dp0 = piece_id_on(from);
+      dp1 = piece_id_on(rfrom);
+      dp.pieceId[0] = dp0;
+      dp.old_piece[0] = evalList.piece_with_id(dp0);
+      evalList.put_piece(dp0, to, make_piece(us, KING));
+      dp.new_piece[0] = evalList.piece_with_id(dp0);
+      dp.pieceId[1] = dp1;
+      dp.old_piece[1] = evalList.piece_with_id(dp1);
+      evalList.put_piece(dp1, rto, make_piece(us, ROOK));
+      dp.new_piece[1] = evalList.piece_with_id(dp1);
+    }
+    else {
+      dp0 = piece_id_on(to);
+      dp1 = piece_id_on(rto);
+      evalList.put_piece(dp0, from, make_piece(us, KING));
+      evalList.put_piece(dp1, rfrom, make_piece(us, ROOK));
+    }
  }
-  else {
-    evalList.piece_no_list_board[to] = PIECE_NUMBER_NB;
-    evalList.put_piece(piece_no0, from, make_piece(us, KING));
-    evalList.piece_no_list_board[rto] = PIECE_NUMBER_NB;
-    evalList.put_piece(piece_no1, rfrom, make_piece(us, ROOK));
-  }
-#endif  // defined(EVAL_NNUE)
 }


@ -1130,9 +1067,8 @@ void Position::do_null_move(StateInfo& newSt) {
  st->key ^= Zobrist::side;
  prefetch(TT.first_entry(st->key));

-#if defined(EVAL_NNUE)
-  st->accumulator.computed_score = false;
-#endif
+  if (use_nnue())
+      st->accumulator.computed_score = false;

  ++st->rule50;
  st->pliesFromNull = 0;
@ -1388,7 +1324,7 @@ void Position::flip() {
  std::getline(ss, token); // Half and full moves
  f += token;

-  set(f, is_chess960(), st, this_thread());
+  set(f, is_chess960(), use_nnue(), st, this_thread());

  assert(pos_is_ok());
 }
@ -1464,12 +1400,18 @@ bool Position::pos_is_ok() const {
  return true;
 }

-#if defined(EVAL_NNUE)
-PieceNumber Position::piece_no_of(Square sq) const
+StateInfo* Position::state() const {
+  return st;
+}
+
+const EvalList* Position::eval_list() const {
+  return &evalList;
+}
+
+PieceId Position::piece_id_on(Square sq) const
 {
  assert(piece_on(sq) != NO_PIECE);
-  PieceNumber n = evalList.piece_no_of_board(sq);
-  assert(is_ok(n));
-  return n;
+  PieceId pid = evalList.piece_id_list[sq];
+  assert(is_ok(pid));
+  return pid;
 }
-#endif  // defined(EVAL_NNUE)
--- a/src/position.h
+++ b/src/position.h
@ -23,16 +23,14 @@

 #include <cassert>
 #include <deque>
-#include <iostream>
 #include <memory> // For std::unique_ptr
 #include <string>

 #include "bitboard.h"
 #include "evaluate.h"
-#include "misc.h"
 #include "types.h"

-#include "eval/nnue/nnue_accumulator.h"
+#include "nnue/nnue_accumulator.h"


 /// StateInfo struct stores information needed to restore a Position object to
@ -60,12 +58,9 @@ struct StateInfo {
  Bitboard   checkSquares[PIECE_TYPE_NB];
  int        repetition;

-#if defined(EVAL_NNUE)
+  // Used by NNUE
  Eval::NNUE::Accumulator accumulator;
-
-   // For management of evaluation value difference calculation
-  Eval::DirtyPiece dirtyPiece;
-#endif  // defined(EVAL_NNUE)
+  DirtyPiece dirtyPiece;
 };


@ -82,9 +77,6 @@ typedef std::unique_ptr<std::deque<StateInfo>> StateListPtr;
 /// traversing the search tree.
 class Thread;

-// packed sfen
-struct PackedSfen { uint8_t data[32]; };
-
 class Position {
 public:
  static void init();
@ -94,7 +86,7 @@ public:
  Position& operator=(const Position&) = delete;

  // FEN string input/output
-  Position& set(const std::string& fenStr, bool isChess960, StateInfo* si, Thread* th);
+  Position& set(const std::string& fenStr, bool isChess960,  bool useNnue, StateInfo* si, Thread* th);
  Position& set(const std::string& code, Color c, StateInfo* si);
  const std::string fen() const;

@ -165,6 +157,7 @@ public:
  Color side_to_move() const;
  int game_ply() const;
  bool is_chess960() const;
+  bool use_nnue() const;
  Thread* this_thread() const;
  bool is_draw(int ply) const;
  bool has_game_cycle(int ply) const;
@ -178,36 +171,9 @@ public:
  bool pos_is_ok() const;
  void flip();

-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-  // --- StateInfo
-
-  // Returns the StateInfo corresponding to the current situation.
-  // For example, if state()->capturedPiece, the pieces captured in the previous phase are stored.
-  StateInfo* state() const { return st; }
-
-  // Information such as where and which piece number is used for the evaluation function.
-  const Eval::EvalList* eval_list() const { return &evalList; }
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
-#if defined(EVAL_LEARN)
-  // --sfenization helper
-
-  // Get the packed sfen. Returns to the buffer specified in the argument.
-  // Do not include gamePly in pack.
-  void sfen_pack(PackedSfen& sfen);
-
-  // <20>ª It is slow to go through sfen, so I made a function to set packed sfen directly.
-  // Equivalent to pos.set(sfen_unpack(data),si,th);.
-  // If there is a problem with the passed phase and there is an error, non-zero is returned.
-  // PackedSfen does not include gamePly so it cannot be restored. If you want to set it, specify it with an argument.
-  int set_from_packed_sfen(const PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror = false);
-
-  // Give the board, hand piece, and turn, and return the sfen.
-  //static std::string sfen_from_rawdata(Piece board[81], Hand hands[2], Color turn, int gamePly);
-
-  // Returns the position of the ball on the c side.
-  Square king_square(Color c) const { return pieceList[make_piece(c, KING)][0]; }
-#endif // EVAL_LEARN
+  // Used by NNUE
+  StateInfo* state() const;
+  const EvalList* eval_list() const;

 private:
  // Initialization helpers (used while setting up a position)
@ -222,10 +188,8 @@ private:
  template<bool Do>
  void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto);

-#if defined(EVAL_NNUE)
-  // Returns the PieceNumber of the piece in the sq box on the board.
-  PieceNumber piece_no_of(Square sq) const;
-#endif  // defined(EVAL_NNUE)
+  // ID of a piece on a given square
+  PieceId piece_id_on(Square sq) const;

  // Data members
  Piece board[SQUARE_NB];
@ -243,11 +207,10 @@ private:
  Thread* thisThread;
  StateInfo* st;
  bool chess960;
+  bool nnue;

-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-  // List of pieces used in the evaluation function
-  Eval::EvalList evalList;
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
+  // List of pieces used in NNUE evaluation function
+  EvalList evalList;
 };

 namespace PSQT {
@ -413,6 +376,10 @@ inline bool Position::is_chess960() const {
  return chess960;
 }

+inline bool Position::use_nnue() const {
+  return nnue;
+}
+
 inline bool Position::capture_or_promotion(Move m) const {
  assert(is_ok(m));
  return type_of(m) != NORMAL ? type_of(m) != CASTLING : !empty(to_sq(m));
--- a/src/search.cpp
+++ b/src/search.cpp
@ -925,9 +925,12 @@ namespace {

                if (value >= probcutBeta)
                {
-                    tte->save(posKey, value_to_tt(value, ss->ply), ttPv,
-                        BOUND_LOWER,
-                        depth - 3, move, ss->staticEval);
+                    if ( !(ttHit
+                       && tte->depth() >= depth - 3
+                       && ttValue != VALUE_NONE))
+                        tte->save(posKey, value_to_tt(value, ss->ply), ttPv,
+                            BOUND_LOWER,
+                            depth - 3, move, ss->staticEval);
                    return value;
                }
            }
@ -985,7 +988,7 @@ moves_loop: // When in check, search starts from here

      ss->moveCount = ++moveCount;

-      if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000 && !Limits.silent)
+      if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000)
          sync_cout << "info depth " << depth
                    << " currmove " << UCI::move(move, pos.is_chess960())
                    << " currmovenumber " << moveCount + thisThread->pvIdx << sync_endl;
@ -1553,13 +1556,7 @@ moves_loop: // When in check, search starts from here
      prefetch(TT.first_entry(pos.key_after(move)));

      // Check for legality just before making the move
-      if (
-#if defined(EVAL_LEARN)
-        // HACK: pos.piece_on(from_sq(m)) sometimes will be NO_PIECE during machine learning.
-        !pos.pseudo_legal(move) ||
-#endif // EVAL_LEARN
-        !pos.legal(move)
-        )
+      if (!pos.legal(move))
      {
          moveCount--;
          continue;
@ -1959,316 +1956,3 @@ void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) {
            m.tbRank = 0;
    }
 }
-
-// --- expose the functions such as fixed depth search used for learning to the outside
-
-#if defined (EVAL_LEARN)
-
-namespace Learner
-{
-  // For learning, prepare a stub that can call search,qsearch() from one thread.
-  // From now on, it is better to have a Searcher and prepare a substitution table for each thread like Apery.
-  // It might have been good.
-
-  // Initialization for learning.
-  // Called from Learner::search(),Learner::qsearch().
-  void init_for_search(Position& pos, Stack* ss)
-  {
-
-    // RootNode requires ss->ply == 0.
-    // Because it clears to zero, ss->ply == 0, so it's okay...
-
-    std::memset(ss - 7, 0, 10 * sizeof(Stack));
-
-    // About Search::Limits
-    // Be careful because this member variable is global and affects other threads.
-    {
-      auto& limits = Search::Limits;
-
-      // Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done)
-      limits.infinite = true;
-
-      // Since PV is an obstacle when displayed, erase it.
-      limits.silent = true;
-
-      // If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it.
-      limits.nodes = 0;
-
-      // depth is also processed by the one passed as an argument of Learner::search().
-      limits.depth = 0;
-
-      // Set a large value to prevent the draw value from being returned due to the number of moves near the draw.
-      //limits.max_game_ply = 1 << 16;
-
-      // If you do not include the ball entry rule, it will be a draw and it will be difficult to settle.
-      //limits.enteringKingRule = EnteringKingRule::EKR_27_POINT;
-    }
-
-    // Set DrawValue
-    {
-      // Because it is not prepared for each thread
-      // May be overwritten by another thread. There is no help for it.
-      // If that happens, I think it should be 0.
-      //drawValueTable[REPETITION_DRAW][BLACK] = VALUE_ZERO;
-      //drawValueTable[REPETITION_DRAW][WHITE] = VALUE_ZERO;
-    }
-
-    // Regarding this_thread.
-
-    {
-      auto th = pos.this_thread();
-
-      th->completedDepth = 0;
-      th->selDepth = 0;
-      th->rootDepth = 0;
-
-	  // Zero initialization of the number of search nodes
-      th->nodes = 0;
-
-      // Clear all history types. This initialization takes a little time, and the accuracy of the search is rather low, so the good and bad are not well understood.
-      // th->clear();
-
-      int ct = int(Options["Contempt"]) * PawnValueEg / 100; // From centipawns
-      Color us = pos.side_to_move();
-
-      // In analysis mode, adjust contempt in accordance with user preference
-      if (Limits.infinite || Options["UCI_AnalyseMode"])
-        ct = Options["Analysis Contempt"] == "Off" ? 0
-        : Options["Analysis Contempt"] == "Both" ? ct
-        : Options["Analysis Contempt"] == "White" && us == BLACK ? -ct
-        : Options["Analysis Contempt"] == "Black" && us == WHITE ? -ct
-        : ct;
-
-      // Evaluation score is from the white point of view
-      th->contempt = (us == WHITE ? make_score(ct, ct / 2)
-        : -make_score(ct, ct / 2));
-
-      for (int i = 7; i > 0; i--)
-          (ss - i)->continuationHistory = &th->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel
-
- // set rootMoves
-      auto& rootMoves = th->rootMoves;
-
-      rootMoves.clear();
-      for (auto m: MoveList<LEGAL>(pos))
-        rootMoves.push_back(Search::RootMove(m));
-
-      assert(!rootMoves.empty());
-
-      //#if defined(USE_GLOBAL_OPTIONS)
-      // Since the generation of the substitution table for each search thread should be managed,
-      // Increase the generation of the substitution table for this thread because it is a new search.
-            //TT.new_search(th->thread_id());
-
-            // ↑ If you call new_search here, it may be a loss because you can't use the previous search result.
-            // Do not do this here, but caller should do TT.new_search(th->thread_id()) for each station ...
-
-            // →Because we want to avoid reaching the same final diagram, use the substitution table commonly for all threads when generating teachers.
-      //#endif
-    }
-  }
-
-  // A pair of reader and evaluation value. Returned by Learner::search(),Learner::qsearch().
-  typedef std::pair<Value, std::vector<Move> > ValueAndPV;
-
-  // Stationary search.
-  //
-  // Precondition) Search thread is set by pos.set_this_thread(Threads[thread_id]).
-  // Also, when Threads.stop arrives, the search is interrupted, so the PV at that time is not correct.
-  // After returning from search(), if Threads.stop == true, do not use the search result.
-  // Also, note that before calling, if you do not call it with Threads.stop == false, the search will be interrupted and it will return.
-  //
-  // If it is clogged, MOVE_RESIGN is returned in the PV array.
-  //
-  //Although it was possible to specify alpha and beta with arguments, this will show the result when searching in that window
-  // Because it writes to the substitution table, the value that can be pruned is written to that window when learning
-  // As it has a bad effect, I decided to stop allowing the window range to be specified.
-  ValueAndPV qsearch(Position& pos)
-  {
-    Stack stack[MAX_PLY + 10], * ss = stack + 7;
-    Move pv[MAX_PLY + 1];
-
-    init_for_search(pos, ss);
-    ss->pv = pv; // For the time being, it must be a dummy and somewhere with a buffer.
-
-    if (pos.is_draw(0)) {
-      // Return draw value if draw.
-      return { VALUE_DRAW, {} };
-    }
-
-    // Is it stuck?
-    if (MoveList<LEGAL>(pos).size() == 0)
-    {
-      // Return the mated value if checkmated.
-      return { mated_in(/*ss->ply*/ 0 + 1), {} };
-    }
-
-    auto bestValue = ::qsearch<PV>(pos, ss, -VALUE_INFINITE, VALUE_INFINITE, 0);
-
-  // Returns the PV obtained.
-    std::vector<Move> pvs;
-    for (Move* p = &ss->pv[0]; is_ok(*p); ++p)
-      pvs.push_back(*p);
-
-    return ValueAndPV(bestValue, pvs);
-  }
-
-  // Normal search. Depth depth (specified as an integer).
-  // 3 If you want a score for hand reading,
-  // auto v = search(pos,3);
-  // Do something like
-  // Evaluation value is obtained in v.first and PV is obtained in v.second.
-  // When multi pv is enabled, you can get the PV (reading line) array in pos.this_thread()->rootMoves[N].pv.
-  // Specify multi pv with the argument multiPV of this function. (The value of Options["MultiPV"] is ignored)
-  //
-  // Declaration win judgment is not done as root (because it is troublesome to handle), so it is not done here.
-  // Handle it by the caller.
-  //
-  // Precondition) Search thread is set by pos.set_this_thread(Threads[thread_id]).
-  // Also, when Threads.stop arrives, the search is interrupted, so the PV at that time is not correct.
-  // After returning from search(), if Threads.stop == true, do not use the search result.
-  // Also, note that before calling, if you do not call it with Threads.stop == false, the search will be interrupted and it will return.
-
-  ValueAndPV search(Position& pos, int depth_, size_t multiPV /* = 1 */, uint64_t nodesLimit /* = 0 */)
-  {
-    std::vector<Move> pvs;
-
-    Depth depth = depth_;
-    if (depth < 0)
-      return std::pair<Value, std::vector<Move>>(Eval::evaluate(pos), std::vector<Move>());
-
-    if (depth == 0)
-      return qsearch(pos);
-
-    Stack stack[MAX_PLY + 10], * ss = stack + 7;
-    Move pv[MAX_PLY + 1];
-
-    init_for_search(pos, ss);
-
-	ss->pv = pv; // For the time being, it must be a dummy and somewhere with a buffer.
-
-    // Initialize the variables related to this_thread
-    auto th = pos.this_thread();
-    auto& rootDepth = th->rootDepth;
-    auto& pvIdx = th->pvIdx;
-    auto& pvLast = th->pvLast;
-    auto& rootMoves = th->rootMoves;
-    auto& completedDepth = th->completedDepth;
-    auto& selDepth = th->selDepth;
-
-     // A function to search the top N of this stage as best move
-     //size_t multiPV = Options["MultiPV"];
-
-     // Do not exceed the number of moves in this situation
-    multiPV = std::min(multiPV, rootMoves.size());
-
-     // If you do not multiply the node limit by the value of MultiPV, you will not be thinking about the same node for one candidate hand when you fix the depth and have MultiPV.
-    nodesLimit *= multiPV;
-
-    Value alpha = -VALUE_INFINITE;
-    Value beta = VALUE_INFINITE;
-    Value delta = -VALUE_INFINITE;
-    Value bestValue = -VALUE_INFINITE;
-
-    while ((rootDepth += 1) <= depth
-	  // exit this loop even if the node limit is exceeded
-      // The number of search nodes is passed in the argument of this function.
-      && !(nodesLimit /* limited nodes */ && th->nodes.load(std::memory_order_relaxed) >= nodesLimit)
-      )
-    {
-      for (RootMove& rm : rootMoves)
-        rm.previousScore = rm.score;
-
-      size_t pvFirst = 0;
-      pvLast = 0;
-
-      // MultiPV loop. We perform a full root search for each PV line
-      for (pvIdx = 0; pvIdx < multiPV && !Threads.stop; ++pvIdx)
-      {
-        if (pvIdx == pvLast)
-        {
-          pvFirst = pvLast;
-          for (pvLast++; pvLast < rootMoves.size(); pvLast++)
-            if (rootMoves[pvLast].tbRank != rootMoves[pvFirst].tbRank)
-              break;
-        }
-
-	    // selDepth output with USI info for each depth and PV line
-        selDepth = 0;
-
-        // Switch to aspiration search for depth 5 and above.
-        if (rootDepth >= 5 * 1)
-        {
-          delta = Value(20);
-
-          Value p = rootMoves[pvIdx].previousScore;
-
-          alpha = std::max(p - delta, -VALUE_INFINITE);
-          beta = std::min(p + delta, VALUE_INFINITE);
-        }
-
-        // aspiration search
-        int failedHighCnt = 0;
-        while (true)
-        {
-          Depth adjustedDepth = std::max(1, rootDepth - failedHighCnt * 1);
-          bestValue = ::search<PV>(pos, ss, alpha, beta, adjustedDepth, false);
-
-          stable_sort(rootMoves.begin() + pvIdx, rootMoves.end());
-          //my_stable_sort(pos.this_thread()->thread_id(),&rootMoves[0] + pvIdx, rootMoves.size() - pvIdx);
-
-		  // Expand aspiration window for fail low/high.
-          // However, if it is the value specified by the argument, it will be treated as fail low/high and break.
-          if (bestValue <= alpha)
-          {
-            beta = (alpha + beta) / 2;
-            alpha = std::max(bestValue - delta, -VALUE_INFINITE);
-
-            failedHighCnt = 0;
-            //if (mainThread)
-            //    mainThread->stopOnPonderhit = false;
-
-          }
-          else if (bestValue >= beta)
-          {
-            beta = std::min(bestValue + delta, VALUE_INFINITE);
-            ++failedHighCnt;
-          }
-          else
-            break;
-
-          delta += delta / 4 + 5;
-          assert(-VALUE_INFINITE <= alpha && beta <= VALUE_INFINITE);
-
-          // runaway check
-          //assert(th->nodes.load(std::memory_order_relaxed) <= 1000000 );
-        }
-
-        stable_sort(rootMoves.begin(), rootMoves.begin() + pvIdx + 1);
-        //my_stable_sort(pos.this_thread()->thread_id() , &rootMoves[0] , pvIdx + 1);
-
-      } // multi PV
-
-      completedDepth = rootDepth;
-    }
-
-    // Pass PV_is(ok) to eliminate this PV, there may be NULL_MOVE in the middle.
-    // → PV should not be NULL_MOVE because it is PV
-    // MOVE_WIN has never been thrust. (For now)
-    for (Move move : rootMoves[0].pv)
-    {
-      if (!is_ok(move))
-        break;
-      pvs.push_back(move);
-    }
-
-    //sync_cout << rootDepth << sync_endl;
-
-    // Considering multiPV, the score of rootMoves[0] is returned as bestValue.
-    bestValue = rootMoves[0].score;
-
-    return ValueAndPV(bestValue, pvs);
-  }
-
-}
-#endif
--- a/src/search.h
+++ b/src/search.h
@ -88,7 +88,6 @@ struct LimitsType {
    time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0);
    movestogo = depth = mate = perft = infinite = 0;
    nodes = 0;
-    silent = false;
  }

  bool use_time_management() const {
@ -99,9 +98,6 @@ struct LimitsType {
  TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime;
  int movestogo, depth, mate, perft, infinite;
  int64_t nodes;
-  // Silent mode that does not output to the screen (for continuous self-play in process)
-  // Do not output PV at this time.
-  bool silent;
 };

 extern LimitsType Limits;
--- a/src/thread.cpp
+++ b/src/thread.cpp
@ -216,7 +216,7 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
      th->nodes = th->tbHits = th->nmpMinPly = th->bestMoveChanges = 0;
      th->rootDepth = th->completedDepth = 0;
      th->rootMoves = rootMoves;
-      th->rootPos.set(pos.fen(), pos.is_chess960(), &setupStates->back(), th);
+      th->rootPos.set(pos.fen(), pos.is_chess960(), pos.use_nnue(), &setupStates->back(), th);
  }

  setupStates->back() = tmp;
--- a/src/tt.cpp
+++ b/src/tt.cpp
@ -116,9 +116,6 @@ void TranspositionTable::clear() {
 /// TTEntry t2 if its replace value is greater than that of t2.

 TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
-#if defined(DISABLE_TT)
-  return found = false, first_entry(0);
-#else

  TTEntry* const tte = first_entry(key);
  const uint16_t key16 = (uint16_t)key;  // Use the low 16 bits as key inside the cluster
@ -143,7 +140,6 @@ TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
          replace = &tte[i];

  return found = false, replace;
-#endif
 }


--- a/src/types.h
+++ b/src/types.h
@ -40,7 +40,6 @@

 #include <cassert>
 #include <cctype>
-#include <climits>
 #include <cstdint>
 #include <cstdlib>
 #include <algorithm>
@ -132,8 +131,6 @@ enum Color {
  WHITE, BLACK, COLOR_NB = 2
 };

-constexpr Color Colors[2] = { WHITE, BLACK };
-
 enum CastlingRights {
  NO_CASTLING,
  WHITE_OO,
@ -190,10 +187,7 @@ enum Value : int {
  QueenValueMg  = 2538,  QueenValueEg  = 2682,
  Tempo = 28,

-  MidgameLimit  = 15258, EndgameLimit  = 3915,
-
-// Maximum value returned by the evaluation function (I want it to be around 2**14..)
-  VALUE_MAX_EVAL = 27000,
+  MidgameLimit  = 15258, EndgameLimit  = 3915
 };

 enum PieceType {
@ -209,6 +203,21 @@ enum Piece {
  PIECE_NB = 16
 };

+// An ID used to track the pieces. Max. 32 pieces on board.
+enum PieceId {
+  PIECE_ID_ZERO   = 0,
+  PIECE_ID_KING   = 30,
+  PIECE_ID_WKING  = 30,
+  PIECE_ID_BKING  = 31,
+  PIECE_ID_NONE   = 32
+};
+
+inline PieceId operator++(PieceId& d, int) {
+  PieceId x = d;
+  d = PieceId(int(d) + 1);
+  return x;
+}
+
 constexpr Value PieceValue[PHASE_NB][PIECE_NB] = {
  { VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO,
    VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO },
@ -238,8 +247,8 @@ enum Square : int {
  SQ_A8, SQ_B8, SQ_C8, SQ_D8, SQ_E8, SQ_F8, SQ_G8, SQ_H8,
  SQ_NONE,

-  SQUARE_ZERO = 0, SQUARE_NB = 64,
-  SQUARE_NB_PLUS1 = SQUARE_NB + 1, // If there are no balls, it is treated as having moved to SQUARE_NB, so it may be necessary to secure the array with SQUARE_NB+1, so this constant is used.
+  SQUARE_ZERO = 0,
+  SQUARE_NB   = 64
 };

 enum Direction : int {
@ -262,6 +271,101 @@ enum Rank : int {
  RANK_1, RANK_2, RANK_3, RANK_4, RANK_5, RANK_6, RANK_7, RANK_8, RANK_NB
 };

+// unique number for each piece type on each square
+enum PieceSquare : uint32_t
+{
+    PS_NONE     =  0,
+    PS_W_PAWN   =  1,
+    PS_B_PAWN   =  1 * SQUARE_NB + 1,
+    PS_W_KNIGHT =  2 * SQUARE_NB + 1,
+    PS_B_KNIGHT =  3 * SQUARE_NB + 1,
+    PS_W_BISHOP =  4 * SQUARE_NB + 1,
+    PS_B_BISHOP =  5 * SQUARE_NB + 1,
+    PS_W_ROOK   =  6 * SQUARE_NB + 1,
+    PS_B_ROOK   =  7 * SQUARE_NB + 1,
+    PS_W_QUEEN  =  8 * SQUARE_NB + 1,
+    PS_B_QUEEN  =  9 * SQUARE_NB + 1,
+    PS_W_KING   = 10 * SQUARE_NB + 1,
+    PS_END      = PS_W_KING, // pieces without kings (pawns included)
+    PS_B_KING   = 11 * SQUARE_NB + 1,
+    PS_END2     = 12 * SQUARE_NB + 1
+};
+
+struct ExtPieceSquare
+{
+    PieceSquare from[COLOR_NB];
+
+    ExtPieceSquare() {}
+    ExtPieceSquare(PieceSquare fw, PieceSquare fb) : from{fw, fb} {}
+};
+
+// Array for finding the PieceSquare corresponding to the piece on the board
+extern ExtPieceSquare kpp_board_index[PIECE_NB];
+
+// Structure holding which tracked piece (PieceId) is where (PieceSquare)
+class EvalList
+{
+    // Return relative square when turning the board 180 degrees
+    constexpr Square rotate180(Square sq) {
+      return (Square)(sq ^ 63);
+    }
+
+public:
+    // Max. number of pieces without kings is 30 but must be a multiple of 4 in AVX2
+    static const int MAX_LENGTH = 32;
+
+    // Array that holds the piece id for the pieces on the board
+    PieceId piece_id_list[SQUARE_NB];
+
+    // List of pieces, separate from White and Black POV
+    PieceSquare* piece_list_fw() const { return const_cast<PieceSquare*>(pieceListFw); }
+    PieceSquare* piece_list_fb() const { return const_cast<PieceSquare*>(pieceListFb); }
+
+    // Place the piece pc with piece_id on the square sq on the board
+    void put_piece(PieceId piece_id, Square sq, Piece pc)
+    {
+        assert(is_ok(piece_id));
+        if (pc != NO_PIECE)
+        {
+          pieceListFw[piece_id] = PieceSquare(kpp_board_index[pc].from[WHITE] + sq);
+          pieceListFb[piece_id] = PieceSquare(kpp_board_index[pc].from[BLACK] + rotate180(sq));
+          piece_id_list[sq] = piece_id;
+        }
+        else
+        {
+          pieceListFw[piece_id] = PS_NONE;
+          pieceListFb[piece_id] = PS_NONE;
+          piece_id_list[sq] = piece_id;
+        }
+    }
+
+    // Convert the specified piece_id piece to ExtPieceSquare type and return it
+    ExtPieceSquare piece_with_id(PieceId piece_id) const
+    {
+        ExtPieceSquare eps;
+        eps.from[WHITE] = pieceListFw[piece_id];
+        eps.from[BLACK] = pieceListFb[piece_id];
+        return eps;
+    }
+
+private:
+    PieceSquare pieceListFw[MAX_LENGTH];
+    PieceSquare pieceListFb[MAX_LENGTH];
+};
+
+// For differential evaluation of pieces that changed since last turn
+struct DirtyPiece
+{
+    // Number of changed pieces
+    int dirty_num;
+
+    // The ids of changed pieces, max. 2 pieces can change in one move
+    PieceId pieceId[2];
+
+    // What changed from the piece with that piece number
+    ExtPieceSquare old_piece[2];
+    ExtPieceSquare new_piece[2];
+};

 /// Score enum stores a middlegame and an endgame value in a single integer (enum).
 /// The least significant 16 bits are used to store the middlegame value and the
@ -287,10 +391,10 @@ inline Value mg_value(Score s) {
 }

 #define ENABLE_BASE_OPERATORS_ON(T)                                \
-constexpr T operator+(T d1, int d2) { return T(int(d1) + d2); } \
-constexpr T operator-(T d1, int d2) { return T(int(d1) - d2); } \
+constexpr T operator+(T d1, int d2) { return T(int(d1) + d2); }    \
+constexpr T operator-(T d1, int d2) { return T(int(d1) - d2); }    \
 constexpr T operator-(T d) { return T(-int(d)); }                  \
-inline T& operator+=(T& d1, int d2) { return d1 = d1 + d2; }         \
+inline T& operator+=(T& d1, int d2) { return d1 = d1 + d2; }       \
 inline T& operator-=(T& d1, int d2) { return d1 = d1 - d2; }

 #define ENABLE_INCR_OPERATORS_ON(T)                                \
@ -309,8 +413,10 @@ inline T& operator/=(T& d, int i) { return d = T(int(d) / i); }
 ENABLE_FULL_OPERATORS_ON(Value)
 ENABLE_FULL_OPERATORS_ON(Direction)

-ENABLE_INCR_OPERATORS_ON(PieceType)
 ENABLE_INCR_OPERATORS_ON(Piece)
+ENABLE_INCR_OPERATORS_ON(PieceSquare)
+ENABLE_INCR_OPERATORS_ON(PieceId)
+ENABLE_INCR_OPERATORS_ON(PieceType)
 ENABLE_INCR_OPERATORS_ON(Square)
 ENABLE_INCR_OPERATORS_ON(File)
 ENABLE_INCR_OPERATORS_ON(Rank)
@ -398,6 +504,10 @@ inline Color color_of(Piece pc) {
  return Color(pc >> 3);
 }

+constexpr bool is_ok(PieceId pid) { 
+  return pid < PIECE_ID_NONE;
+}
+
 constexpr bool is_ok(Square s) {
  return s >= SQ_A1 && s <= SQ_H8;
 }
@ -463,44 +573,6 @@ constexpr bool is_ok(Move m) {
  return from_sq(m) != to_sq(m); // Catch MOVE_NULL and MOVE_NONE
 }

-// Return squares when turning the board 180<38>‹
-constexpr Square Inv(Square sq) { return (Square)((SQUARE_NB - 1) - sq); }
-
-// Return squares when mirroring the board
-constexpr Square Mir(Square sq) { return make_square(File(7 - (int)file_of(sq)), rank_of(sq)); }
-
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-// --------------------
-// 		piece box
-// --------------------
-
-// A number used to manage the piece list (which piece is where) used in the Position class.
-enum PieceNumber : uint8_t
-{
-	PIECE_NUMBER_PAWN = 0,
-	PIECE_NUMBER_KNIGHT = 16,
-	PIECE_NUMBER_BISHOP = 20,
-	PIECE_NUMBER_ROOK = 24,
-	PIECE_NUMBER_QUEEN = 28,
-	PIECE_NUMBER_KING = 30,
-	PIECE_NUMBER_WKING = 30,
-	PIECE_NUMBER_BKING = 31, // Use this if you need the numbers of the first and second balls
-	PIECE_NUMBER_ZERO = 0,
-	PIECE_NUMBER_NB = 32,
-};
-
-inline PieceNumber& operator++(PieceNumber& d) { return d = PieceNumber(int8_t(d) + 1); }
-inline PieceNumber operator++(PieceNumber& d, int) {
-  PieceNumber x = d;
-  d = PieceNumber(int8_t(d) + 1);
-  return x;
-}
-inline PieceNumber& operator--(PieceNumber& d) { return d = PieceNumber(int8_t(d) - 1); }
-
-// Piece Number integrity check. for assert.
-constexpr bool is_ok(PieceNumber pn) { return pn < PIECE_NUMBER_NB; }
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
 /// Based on a congruential pseudo random number generator
 constexpr Key make_key(uint64_t seed) {
  return seed * 6364136223846793005ULL + 1442695040888963407ULL;
--- a/src/uci.cpp
+++ b/src/uci.cpp
@ -34,55 +34,16 @@
 #include "uci.h"
 #include "syzygy/tbprobe.h"

-#if defined(EVAL_NNUE) && defined(ENABLE_TEST_CMD)
-#include "eval/nnue/nnue_test_command.h"
-#endif
-
 using namespace std;

 extern vector<string> setup_bench(const Position&, istream&);

-// FEN string of the initial position, normal chess
-const char* StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1";
-
-// Command to automatically generate a game record
-#if defined (EVAL_LEARN)
-namespace Learner
-{
-  // Automatic generation of teacher position
-  void gen_sfen(Position& pos, istringstream& is);
-
-  // Learning from the generated game record
-  void learn(Position& pos, istringstream& is);
-
-#if defined(GENSFEN2019)
-  // Automatic generation command of teacher phase under development
-  void gen_sfen2019(Position& pos, istringstream& is);
-#endif
-
-  // A pair of reader and evaluation value. Returned by Learner::search(),Learner::qsearch().
-  typedef std::pair<Value, std::vector<Move> > ValueAndPV;
-
-  ValueAndPV qsearch(Position& pos);
-  ValueAndPV search(Position& pos, int depth_, size_t multiPV = 1, uint64_t nodesLimit = 0);
-
-}
-#endif
-
-#if defined(EVAL_NNUE) && defined(ENABLE_TEST_CMD)
-void test_cmd(Position& pos, istringstream& is)
-{
-    // Initialize as it may be searched.
-    init_nnue();
-
-    std::string param;
-    is >> param;
-
-    if (param == "nnue") Eval::NNUE::TestCommand(pos, is);
-}
-#endif
-
 namespace {
+
+  // FEN string of the initial position, normal chess
+  const char* StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1";
+
+
  // position() is called when engine receives the "position" UCI command.
  // The function sets up the position described in the given FEN string ("fen")
  // or the starting position ("startpos") and then makes the moves given in the
@ -107,7 +68,7 @@ namespace {
        return;

    states = StateListPtr(new std::deque<StateInfo>(1)); // Drop old and create a new one
-    pos.set(fen, Options["UCI_Chess960"], &states->back(), Threads.main());
+    pos.set(fen, Options["UCI_Chess960"], Options["Use NNUE"], &states->back(), Threads.main());

    // Parse move list (if any)
    while (is >> token && (m = UCI::to_move(pos, token)) != MOVE_NONE)
@ -211,11 +172,9 @@ namespace {
        else if (token == "position")   position(pos, is, states);
        else if (token == "ucinewgame")
        {
-#if defined(EVAL_NNUE)
-            init_nnue();
-#endif
+            init_nnue(Options["EvalFile"]);
            Search::clear();
-            elapsed = now(); // Search::clear() may take some while
+            elapsed = now(); // initialization may take some time
        }
    }

@ -251,86 +210,20 @@ namespace {
     return int(0.5 + 1000 / (1 + std::exp((a - x) / b)));
  }

-// When you calculate check sum, save it and check the consistency later.
-  uint64_t eval_sum;
 } // namespace

-// Make is_ready_cmd() callable from outside. (Because I want to call it from the bench command etc.)
-// Note that the phase is not initialized.
-void init_nnue(bool skipCorruptCheck)
+
+void UCI::init_nnue(const std::string& evalFile)
 {
-#if defined(EVAL_NNUE)
-  // After receiving "isready", modify so that a line feed is sent every 5 seconds until "readyok" is returned. (keep alive processing)
-  // From USI 2.0 specifications.
-  // -The time out time after "is ready" is about 30 seconds. Beyond this, if you want to initialize the evaluation function and secure the hash table,
-  // You should send some kind of message (breakable) from the thinking engine side.
-  // -Shogi GUI already does so, so MyShogi will follow along.
-  //-Also, the engine side of Yaneura King modifies it so that after "isready" is received, a line feed is sent every 5 seconds until "readyok" is returned.
-
-  // Perform processing that may take time, such as reading the evaluation function, at this timing.
-  // If you do a time-consuming process at startup, Shogi place will make a timeout judgment and retire the recognition as a thinking engine.
-  if (!UCI::load_eval_finished)
+  if (UCI::use_nnue && !UCI::load_eval_finished)
  {
-      // Read evaluation function
-      Eval::load_eval();
-
-      // Calculate and save checksum (to check for subsequent memory corruption)
-      eval_sum = Eval::calc_check_sum();
-
-      // display soft name
-      Eval::print_softname(eval_sum);
-
+      // Load evaluation function from a file
+      Eval::NNUE::load_eval(evalFile);
      UCI::load_eval_finished = true;
  }
-  else
-  {
-      // Check the checksum every time to see if the memory has been corrupted.
-      // It seems that the time is a little wasteful, but it is good because it is about 0.1 seconds.
-      if (!skipCorruptCheck && eval_sum != Eval::calc_check_sum())
-          sync_cout << "Error! : EVAL memory is corrupted" << sync_endl;
-  }
-#endif  // defined(EVAL_NNUE)
 }


-// --------------------
-// Call qsearch(),search() directly for testing
-// --------------------
-
-#if defined(EVAL_LEARN)
-void qsearch_cmd(Position& pos)
-{
-  cout << "qsearch : ";
-  auto pv = Learner::qsearch(pos);
-  cout << "Value = " << pv.first << " , " << UCI::value(pv.first) << " , PV = ";
-  for (auto m : pv.second)
-    cout << UCI::move(m, false) << " ";
-  cout << endl;
-}
-
-void search_cmd(Position& pos, istringstream& is)
-{
-  string token;
-  int depth = 1;
-  int multi_pv = (int)Options["MultiPV"];
-  while (is >> token)
-  {
-    if (token == "depth")
-      is >> depth;
-    if (token == "multipv")
-      is >> multi_pv;
-  }
-
-  cout << "search depth = " << depth << " , multi_pv = " << multi_pv << " : ";
-  auto pv = Learner::search(pos, depth, multi_pv);
-  cout << "Value = " << pv.first << " , " << UCI::value(pv.first) << " , PV = ";
-  for (auto m : pv.second)
-    cout << UCI::move(m, false) << " ";
-  cout << endl;
-}
-
-#endif
-
 /// UCI::loop() waits for a command from stdin, parses it and calls the appropriate
 /// function. Also intercepts EOF from stdin to ensure gracefully exiting if the
 /// GUI dies unexpectedly. When called with some command line arguments, e.g. to
@ -343,7 +236,7 @@ void UCI::loop(int argc, char* argv[]) {
  string token, cmd;
  StateListPtr states(new std::deque<StateInfo>(1));

-  pos.set(StartFEN, false, &states->back(), Threads.main());
+  pos.set(StartFEN, false, pos.use_nnue(), &states->back(), Threads.main());

  for (int i = 1; i < argc; ++i)
      cmd += std::string(argv[i]) + " ";
@ -378,12 +271,14 @@ void UCI::loop(int argc, char* argv[]) {
      else if (token == "position")   position(pos, is, states);
      else if (token == "ucinewgame")
      {
-#if defined(EVAL_NNUE)
-          init_nnue();
-#endif
+          init_nnue(Options["EvalFile"]);
          Search::clear();
      }
-      else if (token == "isready")    sync_cout << "readyok" << sync_endl;
+      else if (token == "isready")
+      {
+          init_nnue(Options["EvalFile"]);
+          sync_cout << "readyok" << sync_endl;
+      }

      // Additional custom non-UCI commands, mainly for debugging.
      // Do not use these commands during a search!
@ -392,28 +287,8 @@ void UCI::loop(int argc, char* argv[]) {
      else if (token == "d")        sync_cout << pos << sync_endl;
      else if (token == "eval")     sync_cout << Eval::trace(pos) << sync_endl;
      else if (token == "compiler") sync_cout << compiler_info() << sync_endl;
-#if defined (EVAL_LEARN)
-      else if (token == "gensfen") Learner::gen_sfen(pos, is);
-      else if (token == "learn") Learner::learn(pos, is);
-
-#if defined (GENSFEN2019)
-	  // Command to generate teacher phase under development
-      else if (token == "gensfen2019") Learner::gen_sfen2019(pos, is);
-#endif
-      // Command to call qsearch(),search() directly for testing
-      else if (token == "qsearch") qsearch_cmd(pos);
-      else if (token == "search") search_cmd(pos, is);
-
-#endif
-
-#if defined(EVAL_NNUE)
-      else if (token == "eval_nnue") sync_cout << "eval_nnue = " << Eval::compute_eval(pos) << sync_endl;
-#endif
-
-#if defined(EVAL_NNUE) && defined(ENABLE_TEST_CMD)
-      // test command
-      else if (token == "test") test_cmd(pos, is);
-#endif
+      else if (token == "evalnn")   sync_cout << "NNUE evaluation: "
+                                    << Eval::NNUE::compute_eval(pos) << sync_endl;
      else
          sync_cout << "Unknown command: " << cmd << sync_endl;

--- a/src/uci.h
+++ b/src/uci.h
@ -76,19 +76,13 @@ std::string pv(const Position& pos, Depth depth, Value alpha, Value beta);
 std::string wdl(Value v, int ply);
 Move to_move(const Position& pos, std::string& str);

-// Flag that read the evaluation function. This is set to false when evaldir is changed.
-extern bool load_eval_finished; // = false;
+void init_nnue(const std::string& evalFile);
+
+extern bool load_eval_finished;
+extern bool use_nnue;
+
 } // namespace UCI

 extern UCI::OptionsMap Options;

-// Processing when USI "isready" command is called. At this time, the evaluation function is read.
-// Used when you want to load the evaluation function when "isready" does not come in handler of benchmark command etc.
-// If skipCorruptCheck == true, skip memory corruption check by check sum when reading the evaluation function a second time.
-// * This function is inconvenient if it is not available in Stockfish, so add it.
-
-void init_nnue(bool skipCorruptCheck = false);
-
-extern const char* StartFEN;
-
 #endif // #ifndef UCI_H_INCLUDED
--- a/src/ucioption.cpp
+++ b/src/ucioption.cpp
@ -42,8 +42,22 @@ void on_hash_size(const Option& o) { TT.resize(size_t(o)); }
 void on_logger(const Option& o) { start_logger(o); }
 void on_threads(const Option& o) { Threads.set(size_t(o)); }
 void on_tb_path(const Option& o) { Tablebases::init(o); }
-void on_eval_file(const Option& o) { load_eval_finished = false; init_nnue(); }

+void on_use_nnue(const Option& o) {
+  use_nnue = o;
+
+  if (use_nnue)
+    std::cout << "info string NNUE eval used" << std::endl;
+  else
+    std::cout << "info string Standard eval used" << std::endl;
+
+  init_nnue(Options["EvalFile"]);
+}
+
+void on_eval_file(const Option& o) {
+  load_eval_finished = false;
+  init_nnue(o);
+}

 /// Our case insensitive less() function as required by UCI protocol
 bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const {
@ -80,27 +94,8 @@ void init(OptionsMap& o) {
  o["SyzygyProbeDepth"]      << Option(1, 1, 100);
  o["Syzygy50MoveRule"]      << Option(true);
  o["SyzygyProbeLimit"]      << Option(7, 0, 7);
-  // Evaluation function file name. When this is changed, it is necessary to reread the evaluation function at the next ucinewgame timing.
-#if defined(__linux__)
-  o["EvalFile"]              << Option("eval/nn.bin", on_eval_file);
-#else
-  o["EvalFile"]              << Option("eval\\nn.bin", on_eval_file);
-#endif
-  // When the evaluation function is loaded at the ucinewgame timing, it is necessary to convert the new evaluation function.
-  // I want to hit the test eval convert command, but there is no new evaluation function
-  // It ends abnormally before executing this command.
-  // Therefore, with this hidden option, you can suppress the loading of the evaluation function when ucinewgame,
-  // Hit the test eval convert command.
-  o["SkipLoadingEval"]       << Option(false);
-  // how many moves to use a fixed move
-  o["BookMoves"] << Option(16, 0, 10000);
-
-#if defined(EVAL_LEARN)
-  // When learning the evaluation function, you can change the folder to save the evaluation function.
-  // Evalsave by default. This folder shall be prepared in advance.
-  // Automatically dig a folder under this folder like "0/", "1/", ... and save the evaluation function file there.
-  o["EvalSaveDir"] << Option("evalsave");
-#endif
+  o["Use NNUE"]              << Option(true, on_use_nnue);
+  o["EvalFile"]              << Option("nn.bin", on_eval_file);
 }


@ -209,6 +204,6 @@ Option& Option::operator=(const string& v) {
  return *this;
 }

-// Flag that read the evaluation function. This is set to false when evaldir is changed.
+bool use_nnue = true;
 bool load_eval_finished = false;
 } // namespace UCI