diff --git a/.travis.yml b/.travis.yml
index e2ae61be..12596f1e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,5 @@
 language: cpp
-dist: xenial
+dist: bionic
 
 matrix:
   include:
@@ -7,7 +7,6 @@ matrix:
       compiler: gcc
       addons:
         apt:
-          sources: ['ubuntu-toolchain-r-test']
           packages: ['g++-8', 'g++-8-multilib', 'g++-multilib', 'valgrind', 'expect', 'curl']
       env:
         - COMPILER=g++-8
@@ -17,23 +16,23 @@ matrix:
       compiler: clang
       addons:
         apt:
-          sources: ['ubuntu-toolchain-r-test', 'llvm-toolchain-xenial-6.0']
-          packages: ['clang-6.0', 'llvm-6.0-dev', 'g++-multilib', 'valgrind', 'expect', 'curl']
+          packages: ['clang-10', 'llvm-10-dev', 'g++-multilib', 'valgrind', 'expect', 'curl']
       env:
-        - COMPILER=clang++-6.0
+        - COMPILER=clang++-10
         - COMP=clang
-        - LDFLAGS=-fuse-ld=lld
 
     - os: osx
+      osx_image: xcode12
       compiler: gcc
       env:
         - COMPILER=g++
         - COMP=gcc
 
     - os: osx
+      osx_image: xcode12
       compiler: clang
       env:
-        - COMPILER=clang++ V='Apple LLVM 9.4.1' # Apple LLVM version 9.1.0 (clang-902.0.39.2)
+        - COMPILER=clang++
         - COMP=clang
 
 branches:
@@ -44,30 +43,57 @@ before_script:
   - cd src
 
 script:
+  # Download net
+  - make net
+
   # Obtain bench reference from git log
   - git log HEAD | grep "\b[Bb]ench[ :]\+[0-9]\{7\}" | head -n 1 | sed "s/[^0-9]*\([0-9]*\).*/\1/g" > git_sig
   - export benchref=$(cat git_sig)
   - echo "Reference bench:" $benchref
-  #
+
+  # Compiler version string
+  - $COMPILER -v
+
+  # test help target
+  - make help
+
   # Verify bench number against various builds
   - export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG"
-  - make clean && make -j2 ARCH=x86-64 optimize=no debug=yes build && ../tests/signature.sh $benchref
-  - make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref
-  - make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref
+  - make clean && make -j2 ARCH=x86-64-modern optimize=no debug=yes build && ../tests/signature.sh $benchref
+  - export CXXFLAGS="-Werror"
+  - make clean && make -j2 ARCH=x86-64-modern build && ../tests/signature.sh $benchref
+  - make clean && make -j2 ARCH=x86-64-ssse3 build && ../tests/signature.sh $benchref
+  - make clean && make -j2 ARCH=x86-64-sse3-popcnt build && ../tests/signature.sh $benchref
+  - make clean && make -j2 ARCH=x86-64 build && ../tests/signature.sh $benchref
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-64 build && ../tests/signature.sh $benchref; fi
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref; fi
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse41-popcnt build && ../tests/signature.sh $benchref; fi
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse2 build && ../tests/signature.sh $benchref; fi
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-32 build && ../tests/signature.sh $benchref; fi
+  - if [[ "$TRAVIS_OS_NAME" == "linux" && "$COMP" == "gcc" ]]; then make clean && make -j2 ARCH=x86-64-modern profile-build && ../tests/signature.sh $benchref; fi
+
+  # compile only for some more advanced architectures (might not run in travis)
+  - make clean && make -j2 ARCH=x86-64-avx2 build
+  - make clean && make -j2 ARCH=x86-64-bmi2 build
+  # needs gcc 10 to compile
+  - if [[ "$COMPILER" != "g++-8" ]]; then make clean && make -j2 ARCH=x86-64-avx512 build; fi
 
   #
   # Check perft and reproducible search
+  - make clean && make -j2 ARCH=x86-64-modern build
   - ../tests/perft.sh
   - ../tests/reprosearch.sh
+
   #
   # Valgrind
   #
   - export CXXFLAGS="-O1 -fno-inline"
-  - if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64 debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi
+  - if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64-modern debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi
   - if [ -x "$(command -v valgrind )" ]; then ../tests/instrumented.sh --valgrind-thread; fi
+
   #
   # Sanitizer
   #
-  # Use g++-8 as a proxy for having sanitizers, might need revision as they become available for more recent versions of clang/gcc
-  - if [[ "$COMPILER" == "g++-8" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi
-  - if [[ "$COMPILER" == "g++-8" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=thread    optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64-modern sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64-modern sanitize=thread    optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi
diff --git a/AUTHORS b/AUTHORS
index f08d71d3..d8f4d30e 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,10 +1,17 @@
-# List of authors for Stockfish, as of March 30, 2020
+# List of authors for Stockfish, as of August 4, 2020
 
+# Founders of the Stockfish project and fishtest infrastructure
 Tord Romstad (romstad)
 Marco Costalba (mcostalba)
 Joona Kiiski (zamar)
 Gary Linscott (glinscott)
 
+# Authors and inventors of NNUE, training, NNUE port
+Yu Nasu (ynasu87)
+Motohiro Isozaki (yaneurao)
+Hisayori Noda (nodchip)
+
+# all other authors of the code in alphabetical order
 Aditya (absimaldata)
 Adrian Petrescu (apetresc)
 Ajith Chandy Jose (ajithcj)
@@ -36,6 +43,7 @@ Dariusz Orzechowski
 David Zar
 Daylen Yang (daylen)
 DiscanX
+Dominik Schlösser (domschl)
 double-beep
 Eduardo Cáceres (eduherminio)
 Eelco de Groot (KingDefender)
@@ -45,6 +53,7 @@ Ernesto Gatti
 Linmiao Xu (linrock)
 Fabian Beuke (madnight)
 Fabian Fichter (ianfab)
+Fanael Linithien (Fanael)
 fanon
 Fauzi Akram Dabat (FauziAkram)
 Felix Wittmann
@@ -71,6 +80,7 @@ Jean Gauthier (OuaisBla)
 Jean-Francois Romang (jromang)
 Jekaa
 Jerry Donald Watson (jerrydonaldwatson)
+jjoshua2
 Jonathan Calovski (Mysseno)
 Jonathan Dumale (SFisGOD)
 Joost VandeVondele (vondele)
@@ -115,7 +125,9 @@ Nick Pelling (nickpelling)
 Nicklas Persson (NicklasPersson)
 Niklas Fiekas (niklasf)
 Nikolay Kostov (NikolayIT)
-Nguyen Pham
+Nguyen Pham (nguyenpham)
+Norman Schmidt (FireFather)
+notruck
 Ondrej Mosnáček (WOnder93)
 Oskar Werkelin Ahlin
 Pablo Vazquez
@@ -135,14 +147,17 @@ Richard Lloyd
 Rodrigo Exterckötter Tjäder
 Ron Britvich (Britvich)
 Ronald de Man (syzygy1, syzygy)
+rqs
 Ryan Schmitt
 Ryan Takker
 Sami Kiminki (skiminki)
 Sebastian Buchwald (UniQP)
 Sergei Antonov (saproj)
 Sergei Ivanov (svivanov72)
+Sergio Vieri (sergiovieri)
 sf-x
 Shane Booth (shane31)
+Shawn Varghese (xXH4CKST3RXx)
 Stefan Geschwentner (locutus2)
 Stefano Cardanobile (Stefano80)
 Steinar Gunderson (sesse)
@@ -155,9 +170,11 @@ Tom Vijlbrief (tomtor)
 Tomasz Sobczyk (Sopel97)
 Torsten Franz (torfranz, tfranzer)
 Tracey Emery (basepr1me)
+tttak
 Unai Corzo (unaiic)
 Uri Blass (uriblass)
 Vince Negri (cuddlestmonkey)
+zz4032
 
 
 # Additionally, we acknowledge the authors and maintainers of fishtest,
diff --git a/Readme.md b/README.md
similarity index 79%
rename from Readme.md
rename to README.md
index 2988f3b3..54f61b52 100644
--- a/Readme.md
+++ b/README.md
@@ -4,7 +4,13 @@
 [![Build Status](https://ci.appveyor.com/api/projects/status/github/official-stockfish/Stockfish?branch=master&svg=true)](https://ci.appveyor.com/project/mcostalba/stockfish/branch/master)
 
 [Stockfish](https://stockfishchess.org) is a free, powerful UCI chess engine
-derived from Glaurung 2.1. It is not a complete chess program and requires a
+derived from Glaurung 2.1. It features two evaluation functions, the classical
+evaluation based on handcrafted terms, and the NNUE evaluation based on
+efficiently updateable neural networks. The classical evaluation runs efficiently
+on most 64bit CPU architectures, while the NNUE evaluation benefits strongly from the
+vector intrinsics available on modern CPUs (avx2 or similar).
+
+Stockfish is not a complete chess program and requires a
 UCI-compatible GUI (e.g. XBoard with PolyGlot, Scid, Cute Chess, eboard, Arena,
 Sigma Chess, Shredder, Chess Partner or Fritz) in order to be used comfortably.
 Read the documentation for your GUI of choice for information about how to use
@@ -22,21 +28,20 @@ This distribution of Stockfish consists of the following files:
   * src, a subdirectory containing the full source code, including a Makefile
     that can be used to compile Stockfish on Unix-like systems.
 
+To use the NNUE evaluation an additional data file with neural network parameters
+needs to be downloaded. The filename for the default set can be found as the default
+value of the `EvalFile` UCI option, with the format
+`nn-[SHA256 first 12 digits].nnue` (e.g. nn-c157e0a5755b.nnue). This file can be downloaded from
+```
+https://tests.stockfishchess.org/api/nn/[filename]
+```
+replacing `[filename]` as needed.
 
-## UCI parameters
+
+## UCI options
 
 Currently, Stockfish has the following UCI options:
 
-  * #### Debug Log File
-    Write all communication to and from the engine into a text file.
-
-  * #### Contempt
-    A positive value for contempt favors middle game positions and avoids draws.
-
-  * #### Analysis Contempt
-    By default, contempt is set to prefer the side to move. Set this option to "White"
-    or "Black" to analyse with contempt for that side, or "Off" to disable contempt.
-
   * #### Threads
     The number of CPU threads used for searching a position. For best performance, set
     this equal to the number of CPU cores available.
@@ -44,9 +49,6 @@ Currently, Stockfish has the following UCI options:
   * #### Hash
     The size of the hash table in MB. It is recommended to set Hash after setting Threads.
 
-  * #### Clear Hash
-    Clear the hash table.
-
   * #### Ponder
     Let Stockfish ponder its next move while the opponent is thinking.
 
@@ -54,10 +56,32 @@ Currently, Stockfish has the following UCI options:
     Output the N best lines (principal variations, PVs) when searching.
     Leave at 1 for best performance.
 
-  * #### Skill Level
-    Lower the Skill Level in order to make Stockfish play weaker (see also UCI_LimitStrength).
-    Internally, MultiPV is enabled, and with a certain probability depending on the Skill Level a
-    weaker move will be played.
+  * #### Use NNUE
+    Toggle between the NNUE and classical evaluation functions. If set to "true",
+    the network parameters must be available to load from file (see also EvalFile).
+
+  * #### EvalFile
+    The name of the file of the NNUE evaluation parameters. Depending on the GUI the
+    filename should include the full path to the folder/directory that contains the file.
+
+  * #### Contempt
+    A positive value for contempt favors middle game positions and avoids draws,
+    effective for the classical evaluation only.
+
+  * #### Analysis Contempt
+    By default, contempt is set to prefer the side to move. Set this option to "White"
+    or "Black" to analyse with contempt for that side, or "Off" to disable contempt.
+
+  * #### UCI_AnalyseMode
+    An option handled by your GUI.
+
+  * #### UCI_Chess960
+    An option handled by your GUI. If true, Stockfish will play Chess960.
+
+  * #### UCI_ShowWDL
+    If enabled, show approximate WDL statistics as part of the engine output.
+    These WDL numbers model expected game outcomes for a given evaluation and
+    game ply for engine self-play at fishtest LTC conditions (60+0.6s per game).
 
   * #### UCI_LimitStrength
     Enable weaker play aiming for an Elo rating as set by UCI_Elo. This option overrides Skill Level.
@@ -66,26 +90,10 @@ Currently, Stockfish has the following UCI options:
     If enabled by UCI_LimitStrength, aim for an engine strength of the given Elo.
     This Elo rating has been calibrated at a time control of 60s+0.6s and anchored to CCRL 40/4.
 
-  * #### Move Overhead
-    Assume a time delay of x ms due to network and GUI overheads. This is useful to
-    avoid losses on time in those cases.
-
-  * #### Minimum Thinking Time
-    Search for at least x ms per move.
-
-  * #### Slow Mover
-    Lower values will make Stockfish take less time in games, higher values will
-    make it think longer.
-
-  * #### nodestime
-    Tells the engine to use nodes searched instead of wall time to account for
-    elapsed time. Useful for engine testing.
-
-  * #### UCI_Chess960
-    An option handled by your GUI. If true, Stockfish will play Chess960.
-
-  * #### UCI_AnalyseMode
-    An option handled by your GUI.
+  * #### Skill Level
+    Lower the Skill Level in order to make Stockfish play weaker (see also UCI_LimitStrength).
+    Internally, MultiPV is enabled, and with a certain probability depending on the Skill Level a
+    weaker move will be played.
 
   * #### SyzygyPath
     Path to the folders/directories storing the Syzygy tablebase files. Multiple
@@ -112,6 +120,47 @@ Currently, Stockfish has the following UCI options:
     Limit Syzygy tablebase probing to positions with at most this many pieces left
     (including kings and pawns).
 
+  * #### Move Overhead
+    Assume a time delay of x ms due to network and GUI overheads. This is useful to
+    avoid losses on time in those cases.
+
+  * #### Slow Mover
+    Lower values will make Stockfish take less time in games, higher values will
+    make it think longer.
+
+  * #### nodestime
+    Tells the engine to use nodes searched instead of wall time to account for
+    elapsed time. Useful for engine testing.
+
+  * #### Clear Hash
+    Clear the hash table.
+
+  * #### Debug Log File
+    Write all communication to and from the engine into a text file.
+
+## Classical and NNUE evaluation
+
+Both approaches assign a value to a position that is used in alpha-beta (PVS) search
+to find the best move. The classical evaluation computes this value as a function
+of various chess concepts, handcrafted by experts, tested and tuned using fishtest.
+The NNUE evaluation computes this value with a neural network based on basic
+inputs (e.g. piece positions only). The network is optimized and trained
+on the evalutions of millions of positions at moderate search depth.
+
+The NNUE evaluation was first introduced in shogi, and ported to Stockfish afterward.
+It can be evaluated efficiently on CPUs, and exploits the fact that only parts
+of the neural network need to be updated after a typical chess move.
+[The nodchip repository](https://github.com/nodchip/Stockfish) provides additional
+tools to train and develop the NNUE networks.
+
+On CPUs supporting modern vector instructions (avx2 and similar), the NNUE evaluation
+results in stronger playing strength, even if the nodes per second computed by the engine
+is somewhat lower (roughly 60% of nps is typical).
+
+Note that the NNUE evaluation depends on the Stockfish binary and the network parameter
+file (see EvalFile). Not every parameter file is compatible with a given Stockfish binary.
+The default value of the EvalFile UCI option is the name of a network that is guaranteed
+to be compatible with that binary.
 
 ## What to expect from Syzygybases?
 
@@ -191,7 +240,6 @@ afterwards. Due to memory fragmentation, it may not always be
 possible to allocate large pages even when enabled. A reboot
 might alleviate this problem. To determine whether large pages
 are in use, see the engine log.
->>>>>>> master
 
 ## Compiling Stockfish yourself from the sources
 
diff --git a/appveyor.yml b/appveyor.yml
index 21f3bbe3..a3732a23 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -4,10 +4,9 @@ clone_depth: 50
 branches:
   only:
     - master
-    - appveyor
 
 # Operating system (build VM template)
-os: Visual Studio 2017
+os: Visual Studio 2019
 
 # Build platform, i.e. x86, x64, AnyCPU. This setting is optional.
 platform:
@@ -36,8 +35,11 @@ before_build:
       $src = $src.Replace("\", "/")
 
       # Build CMakeLists.txt
-      $t = 'cmake_minimum_required(VERSION 3.8)',
+      $t = 'cmake_minimum_required(VERSION 3.17)',
            'project(Stockfish)',
+           'set(CMAKE_CXX_STANDARD 17)',
+           'set(CMAKE_CXX_STANDARD_REQUIRED ON)',
+           'set (CMAKE_CXX_EXTENSIONS OFF)',
            'set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/src)',
            'set(source_files', $src, ')',
            'add_executable(stockfish ${source_files})'
@@ -51,13 +53,28 @@ before_build:
       $b = git log HEAD | sls "\b[Bb]ench[ :]+[0-9]{7}" | select -first 1
       $bench = $b -match '\D+(\d+)' | % { $matches[1] }
       Write-Host "Reference bench:" $bench
-      $g = "Visual Studio 15 2017"
-      If (${env:PLATFORM} -eq 'x64') { $g = $g + ' Win64' }
-      cmake -G "${g}" .
-      Write-Host "Generated files for: " $g
+      $g = "Visual Studio 16 2019"
+      If (${env:PLATFORM} -eq 'x64') { $a = "x64" }
+      If (${env:PLATFORM} -eq 'x86') { $a = "Win32" }
+      cmake -G "${g}" -A ${a} .
+      Write-Host "Generated files for: " $g $a
 
 build_script:
   - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal
+  - ps: |
+      # Download default NNUE net from fishtest
+      $nnuenet = Get-Content -Path src\ucioption.cpp | Select-String -CaseSensitive -Pattern "Option" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue"
+      $dummy = $nnuenet -match "(?<nnuenet>nn-[a-z0-9]{12}.nnue)"
+      $nnuenet = $Matches.nnuenet
+      Write-Host "Default net:" $nnuenet
+      $nnuedownloadurl = "https://tests.stockfishchess.org/api/nn/$nnuenet"
+      $nnuefilepath = "src\${env:CONFIGURATION}\$nnuenet"
+      if (Test-Path -Path $nnuefilepath) {
+            Write-Host "Already available."
+      } else {
+            Write-Host "Downloading $nnuedownloadurl to $nnuefilepath"
+            Invoke-WebRequest -Uri $nnuedownloadurl -OutFile $nnuefilepath
+      }
 
 before_test:
   - cd src/%CONFIGURATION%
diff --git a/src/Makefile b/src/Makefile
index f92b1f16..a8ef55e3 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -38,11 +38,12 @@ PGOBENCH = ./$(EXE) bench
 ### Source and object files
 SRCS = benchmark.cpp bitbase.cpp bitboard.cpp cluster.cpp endgame.cpp evaluate.cpp main.cpp \
 	material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \
-	search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp
+	search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
+	nnue/evaluate_nnue.cpp nnue/features/half_kp.cpp
 
 OBJS = $(notdir $(SRCS:.cpp=.o))
 
-VPATH = syzygy
+VPATH = syzygy:nnue:nnue/features
 
 ### Establish the operating system name
 KERNEL = $(shell uname -s)
@@ -66,8 +67,16 @@ endif
 # bits = 64/32        --- -DIS_64BIT       --- 64-/32-bit operating system
 # prefetch = yes/no   --- -DUSE_PREFETCH   --- Use prefetch asm-instruction
 # popcnt = yes/no     --- -DUSE_POPCNT     --- Use popcnt asm-instruction
-# sse = yes/no        --- -msse            --- Use Intel Streaming SIMD Extensions
 # pext = yes/no       --- -DUSE_PEXT       --- Use pext x86_64 asm-instruction
+# sse = yes/no        --- -msse            --- Use Intel Streaming SIMD Extensions
+# mmx = yes/no        --- -mmmx            --- Use Intel MMX instructions
+# sse2 = yes/no       --- -msse2           --- Use Intel Streaming SIMD Extensions 2
+# ssse3 = yes/no      --- -mssse3          --- Use Intel Supplemental Streaming SIMD Extensions 3
+# sse41 = yes/no      --- -msse4.1         --- Use Intel Streaming SIMD Extensions 4.1
+# avx2 = yes/no       --- -mavx2           --- Use Intel Advanced Vector Extensions 2
+# avx512 = yes/no     --- -mavx512bw       --- Use Intel Advanced Vector Extensions 512
+# vnni = yes/no       --- -mavx512vnni     --- Use Intel Vector Neural Network Instructions 512
+# neon = yes/no       --- -DUSE_NEON       --- Use ARM SIMD architecture
 # mpi = yes/no        --- -DUSE_MPI        --- Use Message Passing Interface
 #
 # Note that Makefile is space sensitive, so when adding new architectures
@@ -75,69 +84,176 @@ endif
 # at the end of the line for flag values.
 
 ### 2.1. General and architecture defaults
+
+ifeq ($(ARCH),)
+    empty_arch = yes
+endif
+
 optimize = yes
 debug = no
 sanitize = no
 bits = 64
 prefetch = no
 popcnt = no
-sse = no
 pext = no
+sse = no
+mmx = no
+sse2 = no
+ssse3 = no
+sse41 = no
+avx2 = no
+avx512 = no
+vnni = no
+neon = no
 mpi = no
+ARCH = x86-64-modern
+STRIP = strip
 
 ### 2.2 Architecture specific
+
+ifeq ($(findstring x86,$(ARCH)),x86)
+
+# x86-32/64
+
+ifeq ($(findstring x86-32,$(ARCH)),x86-32)
+	arch = i386
+	bits = 32
+	sse = yes
+	mmx = yes
+else
+	arch = x86_64
+	sse = yes
+	sse2 = yes
+endif
+
+ifeq ($(findstring -sse,$(ARCH)),-sse)
+	sse = yes
+endif
+
+ifeq ($(findstring -popcnt,$(ARCH)),-popcnt)
+	popcnt = yes
+endif
+
+ifeq ($(findstring -mmx,$(ARCH)),-mmx)
+	mmx = yes
+endif
+
+ifeq ($(findstring -sse2,$(ARCH)),-sse2)
+	sse = yes
+	sse2 = yes
+endif
+
+ifeq ($(findstring -ssse3,$(ARCH)),-ssse3)
+	sse = yes
+	sse2 = yes
+	ssse3 = yes
+endif
+
+ifeq ($(findstring -sse41,$(ARCH)),-sse41)
+	sse = yes
+	sse2 = yes
+	ssse3 = yes
+	sse41 = yes
+endif
+
+ifeq ($(findstring -modern,$(ARCH)),-modern)
+	popcnt = yes
+	sse = yes
+	sse2 = yes
+	ssse3 = yes
+	sse41 = yes
+endif
+
+ifeq ($(findstring -avx2,$(ARCH)),-avx2)
+	popcnt = yes
+	sse = yes
+	sse2 = yes
+	ssse3 = yes
+	sse41 = yes
+	avx2 = yes
+endif
+
+ifeq ($(findstring -bmi2,$(ARCH)),-bmi2)
+	popcnt = yes
+	sse = yes
+	sse2 = yes
+	ssse3 = yes
+	sse41 = yes
+	avx2 = yes
+	pext = yes
+endif
+
+ifeq ($(findstring -avx512,$(ARCH)),-avx512)
+	popcnt = yes
+	sse = yes
+	sse2 = yes
+	ssse3 = yes
+	sse41 = yes
+	avx2 = yes
+	pext = yes
+	avx512 = yes
+endif
+
+ifeq ($(findstring -vnni,$(ARCH)),-vnni)
+	popcnt = yes
+	sse = yes
+	sse2 = yes
+	ssse3 = yes
+	sse41 = yes
+	avx2 = yes
+	pext = yes
+	avx512 = yes
+	vnni = yes
+endif
+
+ifeq ($(sse),yes)
+	prefetch = yes
+endif
+
+# 64-bit pext is not available on x86-32
+ifeq ($(bits),32)
+	pext = no
+endif
+
+else
+
+# all other architectures
+
 ifeq ($(ARCH),general-32)
 	arch = any
 	bits = 32
 endif
 
-ifeq ($(ARCH),x86-32-old)
-	arch = i386
-	bits = 32
-endif
-
-ifeq ($(ARCH),x86-32)
-	arch = i386
-	bits = 32
-	prefetch = yes
-	sse = yes
-endif
-
 ifeq ($(ARCH),general-64)
 	arch = any
 endif
 
-ifeq ($(ARCH),x86-64)
-	arch = x86_64
-	prefetch = yes
-	sse = yes
-endif
-
-ifeq ($(ARCH),x86-64-modern)
-	arch = x86_64
-	prefetch = yes
-	popcnt = yes
-	sse = yes
-endif
-
-ifeq ($(ARCH),x86-64-bmi2)
-	arch = x86_64
-	prefetch = yes
-	popcnt = yes
-	sse = yes
-	pext = yes
-endif
-
 ifeq ($(ARCH),armv7)
 	arch = armv7
 	prefetch = yes
 	bits = 32
 endif
 
+ifeq ($(ARCH),armv7-neon)
+	arch = armv7
+	prefetch = yes
+	popcnt = yes
+	neon = yes
+	bits = 32
+endif
+
 ifeq ($(ARCH),armv8)
 	arch = armv8-a
 	prefetch = yes
 	popcnt = yes
+	neon = yes
+endif
+
+ifeq ($(ARCH),apple-silicon)
+	arch = arm64
+	prefetch = yes
+	popcnt = yes
+	neon = yes
 endif
 
 ifeq ($(ARCH),ppc-32)
@@ -151,13 +267,15 @@ ifeq ($(ARCH),ppc-64)
 	prefetch = yes
 endif
 
+endif
+
 ### ==========================================================================
 ### Section 3. Low-level Configuration
 ### ==========================================================================
 
 ### 3.1 Selecting compiler (default = gcc)
-CXXFLAGS += -Wall -Wcast-qual -fno-exceptions -std=c++11 $(EXTRACXXFLAGS)
-DEPENDFLAGS += -std=c++11
+CXXFLAGS += -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS)
+DEPENDFLAGS += -std=c++17
 LDFLAGS += $(EXTRALDFLAGS)
 
 ifeq ($(COMP),)
@@ -169,7 +287,7 @@ ifeq ($(COMP),gcc)
 	CXX=g++
 	CXXFLAGS += -pedantic -Wextra -Wshadow
 
-	ifeq ($(ARCH),$(filter $(ARCH),armv7 armv8))
+	ifeq ($(arch),$(filter $(arch),armv7 armv8-a))
 		ifeq ($(OS),Android)
 			CXXFLAGS += -m$(bits)
 			LDFLAGS += -m$(bits)
@@ -179,9 +297,16 @@ ifeq ($(COMP),gcc)
 		LDFLAGS += -m$(bits)
 	endif
 
+	ifeq ($(arch),$(filter $(arch),armv7))
+		LDFLAGS += -latomic
+	endif
+
 	ifneq ($(KERNEL),Darwin)
 	   LDFLAGS += -Wl,--no-as-needed
 	endif
+
+	gccversion = $(shell $(CXX) --version)
+	gccisclang = $(findstring clang,$(gccversion))
 endif
 
 ifeq ($(COMP),mingw)
@@ -226,7 +351,7 @@ ifeq ($(COMP),clang)
 	endif
 	endif
 
-	ifeq ($(ARCH),$(filter $(ARCH),armv7 armv8))
+	ifeq ($(arch),$(filter $(arch),armv7 armv8))
 		ifeq ($(OS),Android)
 			CXXFLAGS += -m$(bits)
 			LDFLAGS += -m$(bits)
@@ -251,8 +376,27 @@ endif
 endif
 
 ifeq ($(KERNEL),Darwin)
-	CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.9
-	LDFLAGS += -arch $(arch) -mmacosx-version-min=10.9
+	CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14
+	LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14
+endif
+
+# To cross-compile for Android, NDK version r21 or later is recommended.
+# In earlier NDK versions, you'll need to pass -fno-addrsig if using GNU binutils.
+# Currently we don't know how to make PGO builds with the NDK yet.
+ifeq ($(COMP),ndk)
+	CXXFLAGS += -stdlib=libc++ -fPIE
+	ifeq ($(arch),armv7)
+		comp=armv7a-linux-androideabi16-clang
+		CXX=armv7a-linux-androideabi16-clang++
+		CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon
+		STRIP=arm-linux-androideabi-strip
+	endif
+	ifeq ($(arch),armv8-a)
+		comp=aarch64-linux-android21-clang
+		CXX=aarch64-linux-android21-clang++
+		STRIP=aarch64-linux-android-strip
+	endif
+	LDFLAGS += -static-libstdc++ -pie -lm -latomic
 endif
 
 ### Travis CI script uses COMPILER to overwrite CXX
@@ -271,7 +415,9 @@ ifneq ($(comp),mingw)
 	ifneq ($(OS),Android)
 		# Haiku has pthreads in its libroot, so only link it in on other platforms
 		ifneq ($(KERNEL),Haiku)
-			LDFLAGS += -lpthread
+			ifneq ($(COMP),ndk)
+				LDFLAGS += -lpthread
+			endif
 		endif
 	endif
 endif
@@ -285,8 +431,8 @@ endif
 
 ### 3.2.2 Debugging with undefined behavior sanitizers
 ifneq ($(sanitize),no)
-        CXXFLAGS += -g3 -fsanitize=$(sanitize) -fuse-ld=gold
-        LDFLAGS += -fsanitize=$(sanitize) -fuse-ld=gold
+        CXXFLAGS += -g3 -fsanitize=$(sanitize)
+        LDFLAGS += -fsanitize=$(sanitize)
 endif
 
 ### 3.3 Optimization
@@ -316,7 +462,6 @@ endif
 ifeq ($(prefetch),yes)
 	ifeq ($(sse),yes)
 		CXXFLAGS += -msse
-		DEPENDFLAGS += -msse
 	endif
 else
 	CXXFLAGS += -DNO_PREFETCH
@@ -324,7 +469,7 @@ endif
 
 ### 3.6 popcnt
 ifeq ($(popcnt),yes)
-	ifeq ($(arch),$(filter $(arch),ppc64 armv8-a))
+	ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8-a arm64))
 		CXXFLAGS += -DUSE_POPCNT
 	else ifeq ($(comp),icc)
 		CXXFLAGS += -msse3 -DUSE_POPCNT
@@ -333,11 +478,70 @@ ifeq ($(popcnt),yes)
 	endif
 endif
 
+
+ifeq ($(avx2),yes)
+	CXXFLAGS += -DUSE_AVX2
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+		CXXFLAGS += -mavx2
+	endif
+endif
+
+ifeq ($(avx512),yes)
+	CXXFLAGS += -DUSE_AVX512
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+		CXXFLAGS += -mavx512f -mavx512bw
+	endif
+endif
+
+ifeq ($(vnni),yes)
+	CXXFLAGS += -DUSE_VNNI
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+		CXXFLAGS += -mavx512vnni -mavx512dq -mavx512vl
+	endif
+endif
+
+ifeq ($(sse41),yes)
+	CXXFLAGS += -DUSE_SSE41
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+		CXXFLAGS += -msse4.1
+	endif
+endif
+
+ifeq ($(ssse3),yes)
+	CXXFLAGS += -DUSE_SSSE3
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+		CXXFLAGS += -mssse3
+	endif
+endif
+
+ifeq ($(sse2),yes)
+	CXXFLAGS += -DUSE_SSE2
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+		CXXFLAGS += -msse2
+	endif
+endif
+
+ifeq ($(mmx),yes)
+	CXXFLAGS += -DUSE_MMX
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
+		CXXFLAGS += -mmmx
+	endif
+endif
+
+ifeq ($(neon),yes)
+	CXXFLAGS += -DUSE_NEON
+	ifeq ($(KERNEL),Linux)
+	ifneq ($(COMP),ndk)
+		CXXFLAGS += -mfpu=neon
+	endif
+	endif
+endif
+
 ### 3.7 pext
 ifeq ($(pext),yes)
 	CXXFLAGS += -DUSE_PEXT
 	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
-		CXXFLAGS += -msse4 -mbmi2
+		CXXFLAGS += -mbmi2
 	endif
 endif
 
@@ -346,18 +550,36 @@ endif
 ### needs access to the optimization flags.
 ifeq ($(optimize),yes)
 ifeq ($(debug), no)
-	ifeq ($(comp),$(filter $(comp),gcc clang))
+	ifeq ($(COMP),ndk)
+		CXXFLAGS += -flto=thin
+		LDFLAGS += $(CXXFLAGS)
+	else ifeq ($(comp),clang)
+		CXXFLAGS += -flto=thin
+		LDFLAGS += $(CXXFLAGS)
+
+# GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be
+# GCC on some systems.
+	else ifeq ($(comp),gcc)
+	ifeq ($(gccisclang),)
 		CXXFLAGS += -flto
+		LDFLAGS += $(CXXFLAGS) -flto=jobserver
+		ifneq ($(findstring MINGW,$(KERNEL)),)
+			LDFLAGS += -save-temps
+		else ifneq ($(findstring MSYS,$(KERNEL)),)
+			LDFLAGS += -save-temps
+		endif
+	else
+		CXXFLAGS += -flto=thin
 		LDFLAGS += $(CXXFLAGS)
 	endif
 
 # To use LTO and static linking on windows, the tool chain requires a recent gcc:
-# gcc version 10.1 in msys2 or TDM-GCC version 9.2 are know to work, older might not.
+# gcc version 10.1 in msys2 or TDM-GCC version 9.2 are known to work, older might not.
 # So, only enable it for a cross from Linux by default.
-	ifeq ($(comp),mingw)
+	else ifeq ($(comp),mingw)
 	ifeq ($(KERNEL),Linux)
 		CXXFLAGS += -flto
-		LDFLAGS += $(CXXFLAGS)
+		LDFLAGS += $(CXXFLAGS) -flto=jobserver
 	endif
 	endif
 endif
@@ -389,23 +611,34 @@ help:
 	@echo ""
 	@echo "Supported targets:"
 	@echo ""
+	@echo "help                    > Display architecture details"
 	@echo "build                   > Standard build"
-	@echo "profile-build           > PGO build"
+	@echo "net                     > Download the default nnue net"
+	@echo "profile-build           > Faster build (with profile-guided optimization)"
 	@echo "strip                   > Strip executable"
 	@echo "install                 > Install executable"
 	@echo "clean                   > Clean up"
 	@echo ""
 	@echo "Supported archs:"
 	@echo ""
-	@echo "x86-64-bmi2             > x86 64-bit with pext support (also enables SSE4)"
-	@echo "x86-64-modern           > x86 64-bit with popcnt support (also enables SSE3)"
-	@echo "x86-64                  > x86 64-bit generic"
-	@echo "x86-32                  > x86 32-bit (also enables SSE)"
-	@echo "x86-32-old              > x86 32-bit fall back for old hardware"
+	@echo "x86-64-vnni             > x86 64-bit with vnni support"
+	@echo "x86-64-avx512           > x86 64-bit with avx512 support"
+	@echo "x86-64-bmi2             > x86 64-bit with bmi2 support"
+	@echo "x86-64-avx2             > x86 64-bit with avx2 support"
+	@echo "x86-64-sse41-popcnt     > x86 64-bit with sse41 and popcnt support"
+	@echo "x86-64-modern           > common modern CPU, currently x86-64-sse41-popcnt"
+	@echo "x86-64-ssse3            > x86 64-bit with ssse3 support"
+	@echo "x86-64-sse3-popcnt      > x86 64-bit with sse3 and popcnt support"
+	@echo "x86-64                  > x86 64-bit generic (with sse2 support)"
+	@echo "x86-32-sse41-popcnt     > x86 32-bit with sse41 and popcnt support"
+	@echo "x86-32-sse2             > x86 32-bit with sse2 support"
+	@echo "x86-32                  > x86 32-bit generic (with mmx and sse support)"
 	@echo "ppc-64                  > PPC 64-bit"
 	@echo "ppc-32                  > PPC 32-bit"
 	@echo "armv7                   > ARMv7 32-bit"
-	@echo "armv8                   > ARMv8 64-bit"
+	@echo "armv7-neon              > ARMv7 32-bit with popcnt and neon"
+	@echo "armv8                   > ARMv8 64-bit with popcnt and neon"
+	@echo "apple-silicon           > Apple silicon ARM64"
 	@echo "general-64              > unspecified 64-bit"
 	@echo "general-32              > unspecified 32-bit"
 	@echo ""
@@ -415,27 +648,34 @@ help:
 	@echo "mingw                   > Gnu compiler with MinGW under Windows"
 	@echo "clang                   > LLVM Clang compiler"
 	@echo "icc                     > Intel compiler"
+	@echo "ndk                     > Google NDK to cross-compile for Android"
 	@echo ""
 	@echo "Simple examples. If you don't know what to do, you likely want to run: "
 	@echo ""
-	@echo "make build ARCH=x86-64    (This is for 64-bit systems)"
-	@echo "make build ARCH=x86-32    (This is for 32-bit systems)"
+	@echo "make -j build ARCH=x86-64  (A portable, slow compile for 64-bit systems)"
+	@echo "make -j build ARCH=x86-32  (A portable, slow compile for 32-bit systems)"
 	@echo ""
-	@echo "Advanced examples, for experienced users: "
+	@echo "Advanced examples, for experienced users looking for performance: "
 	@echo ""
-	@echo "make build ARCH=x86-64 COMP=clang"
-	@echo "make profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-4.8"
+	@echo "make    help  ARCH=x86-64-bmi2"
+	@echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-9.0"
+	@echo "make -j build ARCH=x86-64-ssse3 COMP=clang"
 	@echo ""
+ifneq ($(empty_arch), yes)
+	@echo "-------------------------------"
+	@echo "The selected architecture $(ARCH) will enable the following configuration: "
+	@$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity
+endif
 
 
-.PHONY: help build profile-build strip install clean objclean profileclean \
+.PHONY: help build profile-build strip install clean net objclean profileclean \
         config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \
         clang-profile-use clang-profile-make
 
 build: config-sanity
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) all
 
-profile-build: config-sanity objclean profileclean
+profile-build: net config-sanity objclean profileclean
 	@echo ""
 	@echo "Step 1/4. Building instrumented executable ..."
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make)
@@ -451,7 +691,7 @@ profile-build: config-sanity objclean profileclean
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) profileclean
 
 strip:
-	strip $(EXE)
+	$(STRIP) $(EXE)
 
 install:
 	-mkdir -p -m 755 $(BINDIR)
@@ -462,14 +702,38 @@ install:
 clean: objclean profileclean
 	@rm -f .depend *~ core
 
+net:
+	$(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
+	@echo "Default net: $(nnuenet)"
+	$(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet))
+	$(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi))
+	@if test -f "$(nnuenet)"; then \
+            echo "Already available."; \
+         else \
+            if [ "x$(curl_or_wget)" = "x" ]; then \
+               echo "Automatic download failed: neither curl nor wget is installed. Install one of these tools or download the net manually"; exit 1; \
+            else \
+               echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet);\
+            fi; \
+        fi;
+	$(eval shasum_command := $(shell if hash shasum 2>/dev/null; then echo "shasum -a 256 "; elif hash sha256sum 2>/dev/null; then echo "sha256sum "; fi))
+	@if [ "x$(shasum_command)" != "x" ]; then \
+	    if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \
+                echo "Failed download or $(nnuenet) corrupted, please delete!"; exit 1; \
+            fi \
+         else \
+            echo "shasum / sha256sum not found, skipping net validation"; \
+        fi
+
+
 # clean binaries and objects
 objclean:
-	@rm -f $(EXE) *.o ./syzygy/*.o
+	@rm -f $(EXE) *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o
 
 # clean auxiliary profiling files
 profileclean:
 	@rm -rf profdir
-	@rm -f bench.txt *.gcda *.gcno
+	@rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda *.s
 	@rm -f stockfish.profdata *.profraw
 
 default:
@@ -493,8 +757,16 @@ config-sanity:
 	@echo "os: '$(OS)'"
 	@echo "prefetch: '$(prefetch)'"
 	@echo "popcnt: '$(popcnt)'"
-	@echo "sse: '$(sse)'"
 	@echo "pext: '$(pext)'"
+	@echo "sse: '$(sse)'"
+	@echo "mmx: '$(mmx)'"
+	@echo "sse2: '$(sse2)'"
+	@echo "ssse3: '$(ssse3)'"
+	@echo "sse41: '$(sse41)'"
+	@echo "avx2: '$(avx2)'"
+	@echo "avx512: '$(avx512)'"
+	@echo "vnni: '$(vnni)'"
+	@echo "neon: '$(neon)'"
 	@echo "mpi: '$(mpi)'"
 	@echo ""
 	@echo "Flags:"
@@ -509,16 +781,25 @@ config-sanity:
 	@test "$(optimize)" = "yes" || test "$(optimize)" = "no"
 	@test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \
 	 test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \
-	 test "$(arch)" = "armv7" || test "$(arch)" = "armv8-a"
+	 test "$(arch)" = "armv7" || test "$(arch)" = "armv8-a" || test "$(arch)" = "arm64"
 	@test "$(bits)" = "32" || test "$(bits)" = "64"
 	@test "$(prefetch)" = "yes" || test "$(prefetch)" = "no"
 	@test "$(popcnt)" = "yes" || test "$(popcnt)" = "no"
-	@test "$(sse)" = "yes" || test "$(sse)" = "no"
 	@test "$(pext)" = "yes" || test "$(pext)" = "no"
-	@test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang"
+	@test "$(sse)" = "yes" || test "$(sse)" = "no"
+	@test "$(mmx)" = "yes" || test "$(mmx)" = "no"
+	@test "$(sse2)" = "yes" || test "$(sse2)" = "no"
+	@test "$(ssse3)" = "yes" || test "$(ssse3)" = "no"
+	@test "$(sse41)" = "yes" || test "$(sse41)" = "no"
+	@test "$(avx2)" = "yes" || test "$(avx2)" = "no"
+	@test "$(avx512)" = "yes" || test "$(avx512)" = "no"
+	@test "$(vnni)" = "yes" || test "$(vnni)" = "no"
+	@test "$(neon)" = "yes" || test "$(neon)" = "no"
+	@test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \
+	|| test "$(comp)" = "armv7a-linux-androideabi16-clang"  || test "$(comp)" = "aarch64-linux-android21-clang"
 
 $(EXE): $(OBJS)
-	$(CXX) -o $@ $(OBJS) $(LDFLAGS)
+	+$(CXX) -o $@ $(OBJS) $(LDFLAGS)
 
 clang-profile-make:
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
diff --git a/src/benchmark.cpp b/src/benchmark.cpp
index f338cdda..806e9840 100644
--- a/src/benchmark.cpp
+++ b/src/benchmark.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -88,7 +86,7 @@ const vector<string> Defaults = {
 
   // Chess 960
   "setoption name UCI_Chess960 value true",
-  "bbqnnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNNRKR w KQkq - 0 1 moves g2g3 d7d5 d2d4 c8h3 c1g5 e8d6 g5e7 f7f6",
+  "bbqnnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNNRKR w HFhf - 0 1 moves g2g3 d7d5 d2d4 c8h3 c1g5 e8d6 g5e7 f7f6",
   "setoption name UCI_Chess960 value false"
 };
 
@@ -97,8 +95,9 @@ const vector<string> Defaults = {
 /// setup_bench() builds a list of UCI commands to be run by bench. There
 /// are five parameters: TT size in MB, number of search threads that
 /// should be used, the limit value spent for each position, a file name
-/// where to look for positions in FEN format and the type of the limit:
-/// depth, perft, nodes and movetime (in millisecs).
+/// where to look for positions in FEN format, the type of the limit:
+/// depth, perft, nodes and movetime (in millisecs), and evaluation type
+/// mixed (default), classical, NNUE.
 ///
 /// bench -> search default positions up to depth 13
 /// bench 64 1 15 -> search default positions up to depth 15 (TT = 64MB)
@@ -117,6 +116,7 @@ vector<string> setup_bench(const Position& current, istream& is) {
   string limit     = (is >> token) ? token : "13";
   string fenFile   = (is >> token) ? token : "default";
   string limitType = (is >> token) ? token : "depth";
+  string evalType  = (is >> token) ? token : "mixed";
 
   go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit;
 
@@ -148,13 +148,20 @@ vector<string> setup_bench(const Position& current, istream& is) {
   list.emplace_back("setoption name Hash value " + ttSize);
   list.emplace_back("ucinewgame");
 
+  size_t posCounter = 0;
+
   for (const string& fen : fens)
       if (fen.find("setoption") != string::npos)
           list.emplace_back(fen);
       else
       {
+          if (evalType == "classical" || (evalType == "mixed" && posCounter % 2 == 0))
+              list.emplace_back("setoption name Use NNUE value false");
+          else if (evalType == "NNUE" || (evalType == "mixed" && posCounter % 2 != 0))
+              list.emplace_back("setoption name Use NNUE value true");
           list.emplace_back("position fen " + fen);
           list.emplace_back(go);
+          ++posCounter;
       }
 
   return list;
diff --git a/src/bitbase.cpp b/src/bitbase.cpp
index 7e27eb96..bbe8e9a7 100644
--- a/src/bitbase.cpp
+++ b/src/bitbase.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/bitboard.cpp b/src/bitboard.cpp
index 0bf7eef9..f531010c 100644
--- a/src/bitboard.cpp
+++ b/src/bitboard.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/bitboard.h b/src/bitboard.h
index 1c598108..a899d879 100644
--- a/src/bitboard.h
+++ b/src/bitboard.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -124,12 +122,13 @@ inline Bitboard  operator&(Square s, Bitboard b) { return b & s; }
 inline Bitboard  operator|(Square s, Bitboard b) { return b | s; }
 inline Bitboard  operator^(Square s, Bitboard b) { return b ^ s; }
 
-inline Bitboard  operator|(Square s, Square s2) { return square_bb(s) | s2; }
+inline Bitboard  operator|(Square s1, Square s2) { return square_bb(s1) | s2; }
 
 constexpr bool more_than_one(Bitboard b) {
   return b & (b - 1);
 }
 
+
 constexpr bool opposite_colors(Square s1, Square s2) {
   return (s1 + rank_of(s1) + s2 + rank_of(s2)) & 1;
 }
@@ -138,19 +137,19 @@ constexpr bool opposite_colors(Square s1, Square s2) {
 /// rank_bb() and file_bb() return a bitboard representing all the squares on
 /// the given file or rank.
 
-inline Bitboard rank_bb(Rank r) {
+constexpr Bitboard rank_bb(Rank r) {
   return Rank1BB << (8 * r);
 }
 
-inline Bitboard rank_bb(Square s) {
+constexpr Bitboard rank_bb(Square s) {
   return rank_bb(rank_of(s));
 }
 
-inline Bitboard file_bb(File f) {
+constexpr Bitboard file_bb(File f) {
   return FileABB << f;
 }
 
-inline Bitboard file_bb(Square s) {
+constexpr Bitboard file_bb(Square s) {
   return file_bb(file_of(s));
 }
 
@@ -195,16 +194,16 @@ constexpr Bitboard pawn_double_attacks_bb(Bitboard b) {
 
 
 /// adjacent_files_bb() returns a bitboard representing all the squares on the
-/// adjacent files of the given one.
+/// adjacent files of a given square.
 
-inline Bitboard adjacent_files_bb(Square s) {
+constexpr Bitboard adjacent_files_bb(Square s) {
   return shift<EAST>(file_bb(s)) | shift<WEST>(file_bb(s));
 }
 
 
-/// line_bb(Square, Square) returns a bitboard representing an entire line,
-/// from board edge to board edge, that intersects the given squares. If the
-/// given squares are not on a same file/rank/diagonal, returns 0. For instance,
+/// line_bb() returns a bitboard representing an entire line (from board edge
+/// to board edge) that intersects the two given squares. If the given squares
+/// are not on a same file/rank/diagonal, the function returns 0. For instance,
 /// line_bb(SQ_C4, SQ_F7) will return a bitboard with the A2-G8 diagonal.
 
 inline Bitboard line_bb(Square s1, Square s2) {
@@ -215,8 +214,8 @@ inline Bitboard line_bb(Square s1, Square s2) {
 
 
 /// between_bb() returns a bitboard representing squares that are linearly
-/// between the given squares (excluding the given squares). If the given
-/// squares are not on a same file/rank/diagonal, return 0. For instance,
+/// between the two given squares (excluding the given squares). If the given
+/// squares are not on a same file/rank/diagonal, we return 0. For instance,
 /// between_bb(SQ_C4, SQ_F7) will return a bitboard with squares D5 and E6.
 
 inline Bitboard between_bb(Square s1, Square s2) {
@@ -229,7 +228,7 @@ inline Bitboard between_bb(Square s1, Square s2) {
 /// in front of the given one, from the point of view of the given color. For instance,
 /// forward_ranks_bb(BLACK, SQ_D3) will return the 16 squares on ranks 1 and 2.
 
-inline Bitboard forward_ranks_bb(Color c, Square s) {
+constexpr Bitboard forward_ranks_bb(Color c, Square s) {
   return c == WHITE ? ~Rank1BB << 8 * relative_rank(WHITE, s)
                     : ~Rank8BB >> 8 * relative_rank(BLACK, s);
 }
@@ -238,7 +237,7 @@ inline Bitboard forward_ranks_bb(Color c, Square s) {
 /// forward_file_bb() returns a bitboard representing all the squares along the
 /// line in front of the given one, from the point of view of the given color.
 
-inline Bitboard forward_file_bb(Color c, Square s) {
+constexpr Bitboard forward_file_bb(Color c, Square s) {
   return forward_ranks_bb(c, s) & file_bb(s);
 }
 
@@ -247,7 +246,7 @@ inline Bitboard forward_file_bb(Color c, Square s) {
 /// be attacked by a pawn of the given color when it moves along its file, starting
 /// from the given square.
 
-inline Bitboard pawn_attack_span(Color c, Square s) {
+constexpr Bitboard pawn_attack_span(Color c, Square s) {
   return forward_ranks_bb(c, s) & adjacent_files_bb(s);
 }
 
@@ -255,7 +254,7 @@ inline Bitboard pawn_attack_span(Color c, Square s) {
 /// passed_pawn_span() returns a bitboard which can be used to test if a pawn of
 /// the given color and on the given square is a passed pawn.
 
-inline Bitboard passed_pawn_span(Color c, Square s) {
+constexpr Bitboard passed_pawn_span(Color c, Square s) {
   return pawn_attack_span(c, s) | forward_file_bb(c, s);
 }
 
diff --git a/src/cluster.cpp b/src/cluster.cpp
index b56359e6..a4de8825 100644
--- a/src/cluster.cpp
+++ b/src/cluster.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2019 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/cluster.h b/src/cluster.h
index 0e074554..8e224d69 100644
--- a/src/cluster.h
+++ b/src/cluster.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2019 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/endgame.cpp b/src/endgame.cpp
index be0755a8..c8be2198 100644
--- a/src/endgame.cpp
+++ b/src/endgame.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -181,15 +179,15 @@ Value Endgame<KRKP>::operator()(const Position& pos) const {
   assert(verify_material(pos, strongSide, RookValueMg, 0));
   assert(verify_material(pos, weakSide, VALUE_ZERO, 1));
 
-  Square strongKing = relative_square(strongSide, pos.square<KING>(strongSide));
-  Square weakKing   = relative_square(strongSide, pos.square<KING>(weakSide));
-  Square strongRook = relative_square(strongSide, pos.square<ROOK>(strongSide));
-  Square weakPawn   = relative_square(strongSide, pos.square<PAWN>(weakSide));
-  Square queeningSquare = make_square(file_of(weakPawn), RANK_1);
+  Square strongKing = pos.square<KING>(strongSide);
+  Square weakKing   = pos.square<KING>(weakSide);
+  Square strongRook = pos.square<ROOK>(strongSide);
+  Square weakPawn   = pos.square<PAWN>(weakSide);
+  Square queeningSquare = make_square(file_of(weakPawn), relative_rank(weakSide, RANK_8));
   Value result;
 
   // If the stronger side's king is in front of the pawn, it's a win
-  if (forward_file_bb(WHITE, strongKing) & weakPawn)
+  if (forward_file_bb(strongSide, strongKing) & weakPawn)
       result = RookValueEg - distance(strongKing, weakPawn);
 
   // If the weaker side's king is too far from the pawn and the rook,
@@ -200,15 +198,15 @@ Value Endgame<KRKP>::operator()(const Position& pos) const {
 
   // If the pawn is far advanced and supported by the defending king,
   // the position is drawish
-  else if (   rank_of(weakKing) <= RANK_3
+  else if (   relative_rank(strongSide, weakKing) <= RANK_3
            && distance(weakKing, weakPawn) == 1
-           && rank_of(strongKing) >= RANK_4
+           && relative_rank(strongSide, strongKing) >= RANK_4
            && distance(strongKing, weakPawn) > 2 + (pos.side_to_move() == strongSide))
       result = Value(80) - 8 * distance(strongKing, weakPawn);
 
   else
-      result =  Value(200) - 8 * (  distance(strongKing, weakPawn + SOUTH)
-                                  - distance(weakKing, weakPawn + SOUTH)
+      result =  Value(200) - 8 * (  distance(strongKing, weakPawn + pawn_push(weakSide))
+                                  - distance(weakKing, weakPawn + pawn_push(weakSide))
                                   - distance(weakPawn, queeningSquare));
 
   return strongSide == pos.side_to_move() ? result : -result;
@@ -589,8 +587,8 @@ ScaleFactor Endgame<KPsK>::operator()(const Position& pos) const {
   Bitboard strongPawns = pos.pieces(strongSide, PAWN);
 
   // If all pawns are ahead of the king on a single rook file, it's a draw.
-  if (!((strongPawns & ~FileABB) || (strongPawns & ~FileHBB)) &&
-      !(strongPawns & ~passed_pawn_span(weakSide, weakKing)))
+  if (   !(strongPawns & ~(FileABB | FileHBB))
+      && !(strongPawns & ~passed_pawn_span(weakSide, weakKing)))
       return SCALE_FACTOR_DRAW;
 
   return SCALE_FACTOR_NONE;
diff --git a/src/endgame.h b/src/endgame.h
index fd1aba2d..1351d88a 100644
--- a/src/endgame.h
+++ b/src/endgame.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/evaluate.cpp b/src/evaluate.cpp
index 60ec9c72..c08144de 100644
--- a/src/evaluate.cpp
+++ b/src/evaluate.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -20,15 +18,58 @@
 
 #include <algorithm>
 #include <cassert>
+#include <cstdlib>
 #include <cstring>   // For std::memset
 #include <iomanip>
 #include <sstream>
+#include <iostream>
 
 #include "bitboard.h"
+#include "cluster.h"
 #include "evaluate.h"
 #include "material.h"
 #include "pawns.h"
 #include "thread.h"
+#include "uci.h"
+
+namespace Eval {
+
+  bool useNNUE;
+  std::string eval_file_loaded="None";
+
+  void init_NNUE() {
+
+    useNNUE = Options["Use NNUE"];
+    std::string eval_file = std::string(Options["EvalFile"]);
+    if (useNNUE && eval_file_loaded != eval_file)
+        if (Eval::NNUE::load_eval_file(eval_file))
+            eval_file_loaded = eval_file;
+  }
+
+  void verify_NNUE() {
+
+    std::string eval_file = std::string(Options["EvalFile"]);
+    if (useNNUE && eval_file_loaded != eval_file)
+    {
+        UCI::OptionsMap defaults;
+        UCI::init(defaults);
+
+        std::cerr << "NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully. "
+                  << "These network evaluation parameters must be available, and compatible with this version of the code. "
+                  << "The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file. "
+                  << "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << std::endl;
+        std::exit(EXIT_FAILURE);
+    }
+
+    if (Cluster::is_root())
+    {
+        if (useNNUE)
+            sync_cout << "info string NNUE evaluation using " << eval_file << " enabled." << sync_endl;
+        else
+            sync_cout << "info string classical evaluation enabled." << sync_endl;
+    }
+  }
+}
 
 namespace Trace {
 
@@ -74,17 +115,20 @@ using namespace Trace;
 namespace {
 
   // Threshold for lazy and space evaluation
-  constexpr Value LazyThreshold  = Value(1400);
+  constexpr Value LazyThreshold1 =  Value(1400);
+  constexpr Value LazyThreshold2 =  Value(1300);
   constexpr Value SpaceThreshold = Value(12222);
+  constexpr Value NNUEThreshold1 =   Value(550);
+  constexpr Value NNUEThreshold2 =   Value(150);
 
   // KingAttackWeights[PieceType] contains king attack weights by piece type
   constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 };
 
-  // Penalties for enemy's safe checks
-  constexpr int QueenSafeCheck  = 772;
-  constexpr int RookSafeCheck   = 1084;
-  constexpr int BishopSafeCheck = 645;
-  constexpr int KnightSafeCheck = 792;
+  // SafeCheck[PieceType][single/multiple] contains safe check bonus by piece type,
+  // higher if multiple safe checks are possible for that piece type.
+  constexpr int SafeCheck[][2] = {
+      {}, {}, {792, 1283}, {645, 967}, {1084, 1897}, {772, 1119}
+  };
 
 #define S(mg, eg) make_score(mg, eg)
 
@@ -106,6 +150,18 @@ namespace {
       S(110,182), S(114,182), S(114,192), S(116,219) }
   };
 
+  // KingProtector[knight/bishop] contains penalty for each distance unit to own king
+  constexpr Score KingProtector[] = { S(8, 9), S(6, 9) };
+
+  // Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a
+  // pawn protected square on rank 4 to 6 which is also safe from a pawn attack.
+  constexpr Score Outpost[] = { S(56, 36), S(30, 23) };
+
+  // PassedRank[Rank] contains a bonus according to the rank of a passed pawn
+  constexpr Score PassedRank[RANK_NB] = {
+    S(0, 0), S(10, 28), S(17, 33), S(15, 41), S(62, 72), S(168, 177), S(276, 260)
+  };
+
   // RookOnFile[semiopen/open] contains bonuses for each rook when there is
   // no (friendly) pawn on the rook file.
   constexpr Score RookOnFile[] = { S(19, 7), S(48, 29) };
@@ -121,28 +177,19 @@ namespace {
     S(0, 0), S(3, 46), S(37, 68), S(42, 60), S(0, 38), S(58, 41)
   };
 
-  // PassedRank[Rank] contains a bonus according to the rank of a passed pawn
-  constexpr Score PassedRank[RANK_NB] = {
-    S(0, 0), S(10, 28), S(17, 33), S(15, 41), S(62, 72), S(168, 177), S(276, 260)
-  };
-
   // Assorted bonuses and penalties
-  constexpr Score BishopKingProtector = S(  6,  9);
+  constexpr Score BadOutpost          = S( -7, 36);
   constexpr Score BishopOnKingRing    = S( 24,  0);
-  constexpr Score BishopOutpost       = S( 30, 23);
   constexpr Score BishopPawns         = S(  3,  7);
   constexpr Score BishopXRayPawns     = S(  4,  5);
   constexpr Score CorneredBishop      = S( 50, 50);
   constexpr Score FlankAttacks        = S(  8,  0);
   constexpr Score Hanging             = S( 69, 36);
-  constexpr Score KnightKingProtector = S(  8,  9);
   constexpr Score KnightOnQueen       = S( 16, 11);
-  constexpr Score KnightOutpost       = S( 56, 36);
   constexpr Score LongDiagonalBishop  = S( 45,  0);
   constexpr Score MinorBehindPawn     = S( 18,  3);
   constexpr Score PassedFile          = S( 11,  8);
   constexpr Score PawnlessFlank       = S( 17, 95);
-  constexpr Score QueenInfiltration   = S( -2, 14);
   constexpr Score ReachableOutpost    = S( 31, 22);
   constexpr Score RestrictedPiece     = S(  7,  7);
   constexpr Score RookOnKingRing      = S( 16,  0);
@@ -305,10 +352,19 @@ namespace {
 
         if (Pt == BISHOP || Pt == KNIGHT)
         {
-            // Bonus if piece is on an outpost square or can reach one
-            bb = OutpostRanks & attackedBy[Us][PAWN] & ~pe->pawn_attacks_span(Them);
-            if (bb & s)
-                score += (Pt == KNIGHT) ? KnightOutpost : BishopOutpost;
+            // Bonus if the piece is on an outpost square or can reach one
+            // Reduced bonus for knights (BadOutpost) if few relevant targets
+            bb = OutpostRanks & (attackedBy[Us][PAWN] | shift<Down>(pos.pieces(PAWN)))
+                              & ~pe->pawn_attacks_span(Them);
+            Bitboard targets = pos.pieces(Them) & ~pos.pieces(PAWN);
+
+            if (   Pt == KNIGHT
+                && bb & s & ~CenterFiles // on a side outpost
+                && !(b & targets)        // no relevant attacks
+                && (!more_than_one(targets & (s & QueenSide ? QueenSide : KingSide))))
+                score += BadOutpost;
+            else if (bb & s)
+                score += Outpost[Pt == BISHOP];
             else if (Pt == KNIGHT && bb & b & ~pos.pieces(Us))
                 score += ReachableOutpost;
 
@@ -317,8 +373,7 @@ namespace {
                 score += MinorBehindPawn;
 
             // Penalty if the piece is far from the king
-            score -= (Pt == KNIGHT ? KnightKingProtector
-                                   : BishopKingProtector) * distance(pos.square<KING>(Us), s);
+            score -= KingProtector[Pt == BISHOP] * distance(pos.square<KING>(Us), s);
 
             if (Pt == BISHOP)
             {
@@ -377,10 +432,6 @@ namespace {
             Bitboard queenPinners;
             if (pos.slider_blockers(pos.pieces(Them, ROOK, BISHOP), s, queenPinners))
                 score -= WeakQueen;
-
-            // Bonus for queen on weak square in enemy camp
-            if (relative_rank(Us, s) > RANK_4 && (~pe->pawn_attacks_span(Them) & s))
-                score += QueenInfiltration;
         }
     }
     if (T)
@@ -420,41 +471,33 @@ namespace {
     b2 = attacks_bb<BISHOP>(ksq, pos.pieces() ^ pos.pieces(Us, QUEEN));
 
     // Enemy rooks checks
-    rookChecks = b1 & safe & attackedBy[Them][ROOK];
+    rookChecks = b1 & attackedBy[Them][ROOK] & safe;
     if (rookChecks)
-        kingDanger += more_than_one(rookChecks) ? RookSafeCheck * 175/100
-                                                : RookSafeCheck;
+        kingDanger += SafeCheck[ROOK][more_than_one(rookChecks)];
     else
         unsafeChecks |= b1 & attackedBy[Them][ROOK];
 
-    // Enemy queen safe checks: we count them only if they are from squares from
-    // which we can't give a rook check, because rook checks are more valuable.
-    queenChecks =  (b1 | b2)
-                 & attackedBy[Them][QUEEN]
-                 & safe
-                 & ~attackedBy[Us][QUEEN]
-                 & ~rookChecks;
+    // Enemy queen safe checks: count them only if the checks are from squares from
+    // which opponent cannot give a rook check, because rook checks are more valuable.
+    queenChecks =  (b1 | b2) & attackedBy[Them][QUEEN] & safe
+                 & ~(attackedBy[Us][QUEEN] | rookChecks);
     if (queenChecks)
-        kingDanger += more_than_one(queenChecks) ? QueenSafeCheck * 145/100
-                                                 : QueenSafeCheck;
+        kingDanger += SafeCheck[QUEEN][more_than_one(queenChecks)];
 
-    // Enemy bishops checks: we count them only if they are from squares from
-    // which we can't give a queen check, because queen checks are more valuable.
-    bishopChecks =  b2
-                  & attackedBy[Them][BISHOP]
-                  & safe
+    // Enemy bishops checks: count them only if they are from squares from which
+    // opponent cannot give a queen check, because queen checks are more valuable.
+    bishopChecks =  b2 & attackedBy[Them][BISHOP] & safe
                   & ~queenChecks;
     if (bishopChecks)
-        kingDanger += more_than_one(bishopChecks) ? BishopSafeCheck * 3/2
-                                                  : BishopSafeCheck;
+        kingDanger += SafeCheck[BISHOP][more_than_one(bishopChecks)];
+
     else
         unsafeChecks |= b2 & attackedBy[Them][BISHOP];
 
     // Enemy knights checks
     knightChecks = attacks_bb<KNIGHT>(ksq) & attackedBy[Them][KNIGHT];
     if (knightChecks & safe)
-        kingDanger += more_than_one(knightChecks & safe) ? KnightSafeCheck * 162/100
-                                                         : KnightSafeCheck;
+        kingDanger += SafeCheck[KNIGHT][more_than_one(knightChecks & safe)];
     else
         unsafeChecks |= knightChecks;
 
@@ -464,7 +507,7 @@ namespace {
     b2 = b1 & attackedBy2[Them];
     b3 = attackedBy[Us][ALL_PIECES] & KingFlank[file_of(ksq)] & Camp;
 
-    int kingFlankAttack = popcount(b1) + popcount(b2);
+    int kingFlankAttack  = popcount(b1) + popcount(b2);
     int kingFlankDefense = popcount(b3);
 
     kingDanger +=        kingAttackersCount[Them] * kingAttackersWeight[Them]
@@ -575,17 +618,21 @@ namespace {
     // Bonus for threats on the next moves against enemy queen
     if (pos.count<QUEEN>(Them) == 1)
     {
+        bool queenImbalance = pos.count<QUEEN>() == 1;
+
         Square s = pos.square<QUEEN>(Them);
-        safe = mobilityArea[Us] & ~stronglyProtected;
+        safe =   mobilityArea[Us]
+              & ~pos.pieces(Us, PAWN)
+              & ~stronglyProtected;
 
         b = attackedBy[Us][KNIGHT] & attacks_bb<KNIGHT>(s);
 
-        score += KnightOnQueen * popcount(b & safe);
+        score += KnightOnQueen * popcount(b & safe) * (1 + queenImbalance);
 
         b =  (attackedBy[Us][BISHOP] & attacks_bb<BISHOP>(s, pos.pieces()))
            | (attackedBy[Us][ROOK  ] & attacks_bb<ROOK  >(s, pos.pieces()));
 
-        score += SliderOnQueen * popcount(b & safe & attackedBy2[Us]);
+        score += SliderOnQueen * popcount(b & safe & attackedBy2[Us]) * (1 + queenImbalance);
     }
 
     if (T)
@@ -725,9 +772,9 @@ namespace {
   }
 
 
-  // Evaluation::winnable() adjusts the mg and eg score components based on the
-  // known attacking/defending status of the players. A single value is derived
-  // by interpolation from the mg and eg values and returned.
+  // Evaluation::winnable() adjusts the midgame and endgame score components, based on
+  // the known attacking/defending status of the players. The final value is derived
+  // by interpolation from the midgame and endgame values.
 
   template<Tracing T>
   Value Evaluation<T>::winnable(Score score) const {
@@ -741,8 +788,8 @@ namespace {
     bool almostUnwinnable =   outflanking < 0
                            && !pawnsOnBothFlanks;
 
-    bool infiltration = rank_of(pos.square<KING>(WHITE)) > RANK_4
-                     || rank_of(pos.square<KING>(BLACK)) < RANK_5;
+    bool infiltration =   rank_of(pos.square<KING>(WHITE)) > RANK_4
+                       || rank_of(pos.square<KING>(BLACK)) < RANK_5;
 
     // Compute the initiative bonus for the attacking side
     int complexity =   9 * pe->passed_count()
@@ -767,11 +814,10 @@ namespace {
     eg += v;
 
     // Compute the scale factor for the winning side
-
     Color strongSide = eg > VALUE_DRAW ? WHITE : BLACK;
     int sf = me->scale_factor(pos, strongSide);
 
-    // If scale is not already specific, scale down the endgame via general heuristics
+    // If scale factor is not already specific, scale down via general heuristics
     if (sf == SCALE_FACTOR_NORMAL)
     {
         if (pos.opposite_bishops())
@@ -782,6 +828,15 @@ namespace {
             else
                 sf = 22 + 3 * pos.count<ALL_PIECES>(strongSide);
         }
+        else if (  pos.non_pawn_material(WHITE) == RookValueMg
+                && pos.non_pawn_material(BLACK) == RookValueMg
+                && pos.count<PAWN>(strongSide) - pos.count<PAWN>(~strongSide) <= 1
+                && bool(KingSide & pos.pieces(strongSide, PAWN)) != bool(QueenSide & pos.pieces(strongSide, PAWN))
+                && (attacks_bb<KING>(pos.square<KING>(~strongSide)) & pos.pieces(~strongSide, PAWN)))
+            sf = 36;
+        else if (pos.count<QUEEN>() == 1)
+            sf = 37 + 3 * (pos.count<QUEEN>(WHITE) == 1 ? pos.count<BISHOP>(BLACK) + pos.count<KNIGHT>(BLACK)
+                                                        : pos.count<BISHOP>(WHITE) + pos.count<KNIGHT>(WHITE));
         else
             sf = std::min(sf, 36 + 7 * pos.count<PAWN>(strongSide));
     }
@@ -828,9 +883,12 @@ namespace {
     score += pe->pawn_score(WHITE) - pe->pawn_score(BLACK);
 
     // Early exit if score is high
-    Value v = (mg_value(score) + eg_value(score)) / 2;
-    if (abs(v) > LazyThreshold + pos.non_pawn_material() / 64)
-       return pos.side_to_move() == WHITE ? v : -v;
+    auto lazy_skip = [&](Value lazyThreshold) {
+        return abs(mg_value(score) + eg_value(score)) / 2 > lazyThreshold + pos.non_pawn_material() / 64;
+    };
+
+    if (lazy_skip(LazyThreshold1))
+        goto make_v;
 
     // Main evaluation begins here
     initialize<WHITE>();
@@ -847,12 +905,17 @@ namespace {
 
     // More complex interactions that require fully populated attack bitboards
     score +=  king<   WHITE>() - king<   BLACK>()
-            + threats<WHITE>() - threats<BLACK>()
-            + passed< WHITE>() - passed< BLACK>()
+            + passed< WHITE>() - passed< BLACK>();
+
+    if (lazy_skip(LazyThreshold2))
+        goto make_v;
+
+    score +=  threats<WHITE>() - threats<BLACK>()
             + space<  WHITE>() - space<  BLACK>();
 
+make_v:
     // Derive single value from mg and eg parts of score
-    v = winnable(score);
+    Value v = winnable(score);
 
     // In case of tracing add all remaining individual evaluation terms
     if (T)
@@ -869,9 +932,6 @@ namespace {
     // Side to move point of view
     v = (pos.side_to_move() == WHITE ? v : -v) + Tempo;
 
-    // Damp down the evaluation linearly when shuffling
-    v = v * (100 - pos.rule50_count()) / 100;
-
     return v;
   }
 
@@ -882,47 +942,73 @@ namespace {
 /// evaluation of the position from the point of view of the side to move.
 
 Value Eval::evaluate(const Position& pos) {
-  return Evaluation<NO_TRACE>(pos).value();
-}
 
+  bool classical = !Eval::useNNUE
+                ||  abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count());
+  Value v = classical ? Evaluation<NO_TRACE>(pos).value()
+                      : NNUE::evaluate(pos) * 5 / 4 + Tempo;
+
+  if (classical && Eval::useNNUE && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count()))
+      v = NNUE::evaluate(pos) * 5 / 4 + Tempo;
+
+  // Damp down the evaluation linearly when shuffling
+  v = v * (100 - pos.rule50_count()) / 100;
+
+  // Guarantee evalution outside of TB range
+  v = Utility::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
+
+  return v;
+}
 
 /// trace() is like evaluate(), but instead of returning a value, it returns
 /// a string (suitable for outputting to stdout) that contains the detailed
 /// descriptions and values of each evaluation term. Useful for debugging.
+/// Trace scores are from white's point of view
 
 std::string Eval::trace(const Position& pos) {
 
   if (pos.checkers())
-      return "Total evaluation: none (in check)";
-
-  std::memset(scores, 0, sizeof(scores));
-
-  pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt
-
-  Value v = Evaluation<TRACE>(pos).value();
-
-  v = pos.side_to_move() == WHITE ? v : -v; // Trace scores are from white's point of view
+      return "Final evaluation: none (in check)";
 
   std::stringstream ss;
-  ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2)
-     << "     Term    |    White    |    Black    |    Total   \n"
-     << "             |   MG    EG  |   MG    EG  |   MG    EG \n"
-     << " ------------+-------------+-------------+------------\n"
-     << "    Material | " << Term(MATERIAL)
-     << "   Imbalance | " << Term(IMBALANCE)
-     << "       Pawns | " << Term(PAWN)
-     << "     Knights | " << Term(KNIGHT)
-     << "     Bishops | " << Term(BISHOP)
-     << "       Rooks | " << Term(ROOK)
-     << "      Queens | " << Term(QUEEN)
-     << "    Mobility | " << Term(MOBILITY)
-     << " King safety | " << Term(KING)
-     << "     Threats | " << Term(THREAT)
-     << "      Passed | " << Term(PASSED)
-     << "       Space | " << Term(SPACE)
-     << "    Winnable | " << Term(WINNABLE)
-     << " ------------+-------------+-------------+------------\n"
-     << "       Total | " << Term(TOTAL);
+  ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2);
+
+  Value v;
+
+  if (Eval::useNNUE)
+  {
+      v = NNUE::evaluate(pos);
+  }
+  else
+  {
+      std::memset(scores, 0, sizeof(scores));
+
+      pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt
+
+      v = Evaluation<TRACE>(pos).value();
+
+      ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2)
+         << "     Term    |    White    |    Black    |    Total   \n"
+         << "             |   MG    EG  |   MG    EG  |   MG    EG \n"
+         << " ------------+-------------+-------------+------------\n"
+         << "    Material | " << Term(MATERIAL)
+         << "   Imbalance | " << Term(IMBALANCE)
+         << "       Pawns | " << Term(PAWN)
+         << "     Knights | " << Term(KNIGHT)
+         << "     Bishops | " << Term(BISHOP)
+         << "       Rooks | " << Term(ROOK)
+         << "      Queens | " << Term(QUEEN)
+         << "    Mobility | " << Term(MOBILITY)
+         << " King safety | " << Term(KING)
+         << "     Threats | " << Term(THREAT)
+         << "      Passed | " << Term(PASSED)
+         << "       Space | " << Term(SPACE)
+         << "    Winnable | " << Term(WINNABLE)
+         << " ------------+-------------+-------------+------------\n"
+         << "       Total | " << Term(TOTAL);
+  }
+
+  v = pos.side_to_move() == WHITE ? v : -v;
 
   ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n";
 
diff --git a/src/evaluate.h b/src/evaluate.h
index 7c8a2a6f..e808068d 100644
--- a/src/evaluate.h
+++ b/src/evaluate.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -29,9 +27,23 @@ class Position;
 
 namespace Eval {
 
-std::string trace(const Position& pos);
+  std::string trace(const Position& pos);
+  Value evaluate(const Position& pos);
 
-Value evaluate(const Position& pos);
-}
+  extern bool useNNUE;
+  extern std::string eval_file_loaded;
+  void init_NNUE();
+  void verify_NNUE();
+
+  namespace NNUE {
+
+    Value evaluate(const Position& pos);
+    Value compute_eval(const Position& pos);
+    void  update_eval(const Position& pos);
+    bool  load_eval_file(const std::string& evalFile);
+
+  } // namespace NNUE
+
+} // namespace Eval
 
 #endif // #ifndef EVALUATE_H_INCLUDED
diff --git a/src/main.cpp b/src/main.cpp
index 9b5dc7d9..fc2f4029 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -48,6 +46,7 @@ int main(int argc, char* argv[]) {
   Endgames::init();
   Threads.set(size_t(Options["Threads"]));
   Search::clear(); // After threads are up
+  Eval::init_NNUE();
 
   UCI::loop(argc, argv);
 
diff --git a/src/material.cpp b/src/material.cpp
index bb25d3ca..0ef9926f 100644
--- a/src/material.cpp
+++ b/src/material.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/material.h b/src/material.h
index 21647f23..80d01655 100644
--- a/src/material.h
+++ b/src/material.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/misc.cpp b/src/misc.cpp
index 2bc05c5b..459ea100 100644
--- a/src/misc.cpp
+++ b/src/misc.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -46,12 +44,18 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
 #include <iostream>
 #include <sstream>
 #include <vector>
+#include <cstdlib>
 
 #if defined(__linux__) && !defined(__ANDROID__)
 #include <stdlib.h>
 #include <sys/mman.h>
 #endif
 
+#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
+#define POSIXALIGNEDALLOC
+#include <stdlib.h>
+#endif
+
 #include "misc.h"
 #include "thread.h"
 
@@ -147,10 +151,8 @@ const string engine_info(bool to_uci) {
       ss << setw(2) << day << setw(2) << (1 + months.find(month) / 4) << year.substr(2);
   }
 
-  ss << (Is64Bit ? " 64" : "")
-     << (HasPext ? " BMI2" : (HasPopCnt ? " POPCNT" : ""))
-     << (to_uci  ? "\nid author ": " by ")
-     << "T. Romstad, M. Costalba, J. Kiiski, G. Linscott";
+  ss << (to_uci  ? "\nid author ": " by ")
+     << "the Stockfish developers (see AUTHORS file)";
 
   return ss.str();
 }
@@ -215,7 +217,40 @@ const std::string compiler_info() {
      compiler += " on unknown system";
   #endif
 
-  compiler += "\n __VERSION__ macro expands to: ";
+  compiler += "\nCompilation settings include: ";
+  compiler += (Is64Bit ? " 64bit" : " 32bit");
+  #if defined(USE_VNNI)
+    compiler += " VNNI";
+  #endif
+  #if defined(USE_AVX512)
+    compiler += " AVX512";
+  #endif
+  compiler += (HasPext ? " BMI2" : "");
+  #if defined(USE_AVX2)
+    compiler += " AVX2";
+  #endif
+  #if defined(USE_SSE41)
+    compiler += " SSE41";
+  #endif
+  #if defined(USE_SSSE3)
+    compiler += " SSSE3";
+  #endif
+  #if defined(USE_SSE2)
+    compiler += " SSE2";
+  #endif
+  compiler += (HasPopCnt ? " POPCNT" : "");
+  #if defined(USE_MMX)
+    compiler += " MMX";
+  #endif
+  #if defined(USE_NEON)
+    compiler += " NEON";
+  #endif
+
+  #if !defined(NDEBUG)
+    compiler += " DEBUG";
+  #endif
+
+  compiler += "\n__VERSION__ macro expands to: ";
   #ifdef __VERSION__
      compiler += __VERSION__;
   #else
@@ -293,6 +328,32 @@ void prefetch(void* addr) {
 
 #endif
 
+/// Wrappers for systems where the c++17 implementation doesn't guarantee the availability of aligned_alloc.
+/// Memory allocated with std_aligned_alloc must be freed with std_aligned_free.
+///
+
+void* std_aligned_alloc(size_t alignment, size_t size) {
+#if defined(POSIXALIGNEDALLOC)
+  void *pointer;
+  if(posix_memalign(&pointer, alignment, size) == 0)
+      return pointer;
+  return nullptr;
+#elif defined(_WIN32)
+  return _mm_malloc(size, alignment);
+#else
+  return std::aligned_alloc(alignment, size);
+#endif
+}
+
+void std_aligned_free(void* ptr) {
+#if defined(POSIXALIGNEDALLOC)
+  free(ptr);
+#elif defined(_WIN32)
+  _mm_free(ptr);
+#else
+  free(ptr);
+#endif
+}
 
 /// aligned_ttmem_alloc() will return suitably aligned memory, and if possible use large pages.
 /// The returned pointer is the aligned one, while the mem argument is the one that needs
diff --git a/src/misc.h b/src/misc.h
index 373f1b77..eb4e05c0 100644
--- a/src/misc.h
+++ b/src/misc.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -33,6 +31,8 @@ const std::string engine_info(bool to_uci = false);
 const std::string compiler_info();
 void prefetch(void* addr);
 void start_logger(const std::string& fname);
+void* std_aligned_alloc(size_t alignment, size_t size);
+void std_aligned_free(void* ptr);
 void* aligned_ttmem_alloc(size_t size, void*& mem);
 void aligned_ttmem_free(void* mem); // nop if mem == nullptr
 
diff --git a/src/movegen.cpp b/src/movegen.cpp
index 17203a95..d74df4c3 100644
--- a/src/movegen.cpp
+++ b/src/movegen.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -29,22 +27,20 @@ namespace {
   ExtMove* make_promotions(ExtMove* moveList, Square to, Square ksq) {
 
     if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS)
+    {
         *moveList++ = make<PROMOTION>(to - D, to, QUEEN);
+        if (attacks_bb<KNIGHT>(to) & ksq)
+            *moveList++ = make<PROMOTION>(to - D, to, KNIGHT);
+    }
 
     if (Type == QUIETS || Type == EVASIONS || Type == NON_EVASIONS)
     {
         *moveList++ = make<PROMOTION>(to - D, to, ROOK);
         *moveList++ = make<PROMOTION>(to - D, to, BISHOP);
-        *moveList++ = make<PROMOTION>(to - D, to, KNIGHT);
+        if (!(attacks_bb<KNIGHT>(to) & ksq))
+            *moveList++ = make<PROMOTION>(to - D, to, KNIGHT);
     }
 
-    // Knight promotion is the only promotion that can give a direct check
-    // that's not already included in the queen promotion.
-    if (Type == QUIET_CHECKS && (attacks_bb<KNIGHT>(to) & ksq))
-        *moveList++ = make<PROMOTION>(to - D, to, KNIGHT);
-    else
-        (void)ksq; // Silence a warning under MSVC
-
     return moveList;
   }
 
@@ -263,8 +259,8 @@ namespace {
 } // namespace
 
 
-/// <CAPTURES>     Generates all pseudo-legal captures and queen promotions
-/// <QUIETS>       Generates all pseudo-legal non-captures and underpromotions
+/// <CAPTURES>     Generates all pseudo-legal captures plus queen and checking knight promotions
+/// <QUIETS>       Generates all pseudo-legal non-captures and underpromotions(except checking knight)
 /// <NON_EVASIONS> Generates all pseudo-legal captures and non-captures
 ///
 /// Returns a pointer to the end of the move list.
@@ -287,8 +283,8 @@ template ExtMove* generate<QUIETS>(const Position&, ExtMove*);
 template ExtMove* generate<NON_EVASIONS>(const Position&, ExtMove*);
 
 
-/// generate<QUIET_CHECKS> generates all pseudo-legal non-captures and knight
-/// underpromotions that give check. Returns a pointer to the end of the move list.
+/// generate<QUIET_CHECKS> generates all pseudo-legal non-captures.
+/// Returns a pointer to the end of the move list.
 template<>
 ExtMove* generate<QUIET_CHECKS>(const Position& pos, ExtMove* moveList) {
 
diff --git a/src/movegen.h b/src/movegen.h
index c2e7c3f1..fb616d00 100644
--- a/src/movegen.h
+++ b/src/movegen.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/movepick.cpp b/src/movepick.cpp
index 5775f810..96a44449 100644
--- a/src/movepick.cpp
+++ b/src/movepick.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/movepick.h b/src/movepick.h
index aaff388f..f080935a 100644
--- a/src/movepick.h
+++ b/src/movepick.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/nnue/architectures/halfkp_256x2-32-32.h b/src/nnue/architectures/halfkp_256x2-32-32.h
new file mode 100644
index 00000000..9216bd41
--- /dev/null
+++ b/src/nnue/architectures/halfkp_256x2-32-32.h
@@ -0,0 +1,54 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Definition of input features and network structure used in NNUE evaluation function
+
+#ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
+#define NNUE_HALFKP_256X2_32_32_H_INCLUDED
+
+#include "../features/feature_set.h"
+#include "../features/half_kp.h"
+
+#include "../layers/input_slice.h"
+#include "../layers/affine_transform.h"
+#include "../layers/clipped_relu.h"
+
+namespace Eval::NNUE {
+
+// Input features used in evaluation function
+using RawFeatures = Features::FeatureSet<
+    Features::HalfKP<Features::Side::kFriend>>;
+
+// Number of input feature dimensions after conversion
+constexpr IndexType kTransformedFeatureDimensions = 256;
+
+namespace Layers {
+
+// Define network structure
+using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
+using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
+using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
+using OutputLayer = AffineTransform<HiddenLayer2, 1>;
+
+}  // namespace Layers
+
+using Network = Layers::OutputLayer;
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp
new file mode 100644
index 00000000..dfbb1ac2
--- /dev/null
+++ b/src/nnue/evaluate_nnue.cpp
@@ -0,0 +1,175 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Code for calculating NNUE evaluation function
+
+#include <fstream>
+#include <iostream>
+#include <set>
+
+#include "../evaluate.h"
+#include "../position.h"
+#include "../misc.h"
+#include "../uci.h"
+
+#include "evaluate_nnue.h"
+
+ExtPieceSquare kpp_board_index[PIECE_NB] = {
+ // convention: W - us, B - them
+ // viewed from other side, W and B are reversed
+    { PS_NONE,     PS_NONE     },
+    { PS_W_PAWN,   PS_B_PAWN   },
+    { PS_W_KNIGHT, PS_B_KNIGHT },
+    { PS_W_BISHOP, PS_B_BISHOP },
+    { PS_W_ROOK,   PS_B_ROOK   },
+    { PS_W_QUEEN,  PS_B_QUEEN  },
+    { PS_W_KING,   PS_B_KING   },
+    { PS_NONE,     PS_NONE     },
+    { PS_NONE,     PS_NONE     },
+    { PS_B_PAWN,   PS_W_PAWN   },
+    { PS_B_KNIGHT, PS_W_KNIGHT },
+    { PS_B_BISHOP, PS_W_BISHOP },
+    { PS_B_ROOK,   PS_W_ROOK   },
+    { PS_B_QUEEN,  PS_W_QUEEN  },
+    { PS_B_KING,   PS_W_KING   },
+    { PS_NONE,     PS_NONE     }
+};
+
+
+namespace Eval::NNUE {
+
+  // Input feature converter
+  AlignedPtr<FeatureTransformer> feature_transformer;
+
+  // Evaluation function
+  AlignedPtr<Network> network;
+
+  // Evaluation function file name
+  std::string fileName;
+
+  namespace Detail {
+
+  // Initialize the evaluation function parameters
+  template <typename T>
+  void Initialize(AlignedPtr<T>& pointer) {
+
+    pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
+    std::memset(pointer.get(), 0, sizeof(T));
+  }
+
+  // Read evaluation function parameters
+  template <typename T>
+  bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
+
+    std::uint32_t header;
+    header = read_little_endian<std::uint32_t>(stream);
+    if (!stream || header != T::GetHashValue()) return false;
+    return pointer->ReadParameters(stream);
+  }
+
+  }  // namespace Detail
+
+  // Initialize the evaluation function parameters
+  void Initialize() {
+
+    Detail::Initialize(feature_transformer);
+    Detail::Initialize(network);
+  }
+
+  // Read network header
+  bool ReadHeader(std::istream& stream, std::uint32_t* hash_value, std::string* architecture)
+  {
+    std::uint32_t version, size;
+
+    version     = read_little_endian<std::uint32_t>(stream);
+    *hash_value = read_little_endian<std::uint32_t>(stream);
+    size        = read_little_endian<std::uint32_t>(stream);
+    if (!stream || version != kVersion) return false;
+    architecture->resize(size);
+    stream.read(&(*architecture)[0], size);
+    return !stream.fail();
+  }
+
+  // Read network parameters
+  bool ReadParameters(std::istream& stream) {
+
+    std::uint32_t hash_value;
+    std::string architecture;
+    if (!ReadHeader(stream, &hash_value, &architecture)) return false;
+    if (hash_value != kHashValue) return false;
+    if (!Detail::ReadParameters(stream, feature_transformer)) return false;
+    if (!Detail::ReadParameters(stream, network)) return false;
+    return stream && stream.peek() == std::ios::traits_type::eof();
+  }
+
+  // Proceed with the difference calculation if possible
+  static void UpdateAccumulatorIfPossible(const Position& pos) {
+
+    feature_transformer->UpdateAccumulatorIfPossible(pos);
+  }
+
+  // Calculate the evaluation value
+  static Value ComputeScore(const Position& pos, bool refresh) {
+
+    auto& accumulator = pos.state()->accumulator;
+    if (!refresh && accumulator.computed_score) {
+      return accumulator.score;
+    }
+
+    alignas(kCacheLineSize) TransformedFeatureType
+        transformed_features[FeatureTransformer::kBufferSize];
+    feature_transformer->Transform(pos, transformed_features, refresh);
+    alignas(kCacheLineSize) char buffer[Network::kBufferSize];
+    const auto output = network->Propagate(transformed_features, buffer);
+
+    auto score = static_cast<Value>(output[0] / FV_SCALE);
+
+    accumulator.score = score;
+    accumulator.computed_score = true;
+    return accumulator.score;
+  }
+
+  // Load the evaluation function file
+  bool load_eval_file(const std::string& evalFile) {
+
+    Initialize();
+    fileName = evalFile;
+
+    std::ifstream stream(evalFile, std::ios::binary);
+
+    const bool result = ReadParameters(stream);
+
+    return result;
+  }
+
+  // Evaluation function. Perform differential calculation.
+  Value evaluate(const Position& pos) {
+    return ComputeScore(pos, false);
+  }
+
+  // Evaluation function. Perform full calculation.
+  Value compute_eval(const Position& pos) {
+    return ComputeScore(pos, true);
+  }
+
+  // Proceed with the difference calculation if possible
+  void update_eval(const Position& pos) {
+    UpdateAccumulatorIfPossible(pos);
+  }
+
+} // namespace Eval::NNUE
diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h
new file mode 100644
index 00000000..5f0d1855
--- /dev/null
+++ b/src/nnue/evaluate_nnue.h
@@ -0,0 +1,48 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// header used in NNUE evaluation function
+
+#ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
+#define NNUE_EVALUATE_NNUE_H_INCLUDED
+
+#include "nnue_feature_transformer.h"
+
+#include <memory>
+
+namespace Eval::NNUE {
+
+  // Hash value of evaluation function structure
+  constexpr std::uint32_t kHashValue =
+      FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
+
+  // Deleter for automating release of memory area
+  template <typename T>
+  struct AlignedDeleter {
+    void operator()(T* ptr) const {
+      ptr->~T();
+      std_aligned_free(ptr);
+    }
+  };
+
+  template <typename T>
+  using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
diff --git a/src/nnue/features/feature_set.h b/src/nnue/features/feature_set.h
new file mode 100644
index 00000000..79ca83ae
--- /dev/null
+++ b/src/nnue/features/feature_set.h
@@ -0,0 +1,135 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// A class template that represents the input feature set of the NNUE evaluation function
+
+#ifndef NNUE_FEATURE_SET_H_INCLUDED
+#define NNUE_FEATURE_SET_H_INCLUDED
+
+#include "features_common.h"
+#include <array>
+
+namespace Eval::NNUE::Features {
+
+  // Class template that represents a list of values
+  template <typename T, T... Values>
+  struct CompileTimeList;
+
+  template <typename T, T First, T... Remaining>
+  struct CompileTimeList<T, First, Remaining...> {
+    static constexpr bool Contains(T value) {
+      return value == First || CompileTimeList<T, Remaining...>::Contains(value);
+    }
+    static constexpr std::array<T, sizeof...(Remaining) + 1>
+        kValues = {{First, Remaining...}};
+  };
+
+  // Base class of feature set
+  template <typename Derived>
+  class FeatureSetBase {
+
+   public:
+    // Get a list of indices for active features
+    template <typename IndexListType>
+    static void AppendActiveIndices(
+        const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
+
+      for (Color perspective : { WHITE, BLACK }) {
+        Derived::CollectActiveIndices(
+            pos, trigger, perspective, &active[perspective]);
+      }
+    }
+
+    // Get a list of indices for recently changed features
+    template <typename PositionType, typename IndexListType>
+    static void AppendChangedIndices(
+        const PositionType& pos, TriggerEvent trigger,
+        IndexListType removed[2], IndexListType added[2], bool reset[2]) {
+
+      const auto& dp = pos.state()->dirtyPiece;
+      if (dp.dirty_num == 0) return;
+
+      for (Color perspective : { WHITE, BLACK }) {
+        reset[perspective] = false;
+        switch (trigger) {
+          case TriggerEvent::kFriendKingMoved:
+            reset[perspective] =
+                dp.pieceId[0] == PIECE_ID_KING + perspective;
+            break;
+          default:
+            assert(false);
+            break;
+        }
+        if (reset[perspective]) {
+          Derived::CollectActiveIndices(
+              pos, trigger, perspective, &added[perspective]);
+        } else {
+          Derived::CollectChangedIndices(
+              pos, trigger, perspective,
+              &removed[perspective], &added[perspective]);
+        }
+      }
+    }
+  };
+
+  // Class template that represents the feature set
+  template <typename FeatureType>
+  class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
+
+   public:
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
+    // Number of feature dimensions
+    static constexpr IndexType kDimensions = FeatureType::kDimensions;
+    // Maximum number of simultaneously active features
+    static constexpr IndexType kMaxActiveDimensions =
+        FeatureType::kMaxActiveDimensions;
+    // Trigger for full calculation instead of difference calculation
+    using SortedTriggerSet =
+        CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
+    static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
+
+   private:
+    // Get a list of indices for active features
+    static void CollectActiveIndices(
+        const Position& pos, const TriggerEvent trigger, const Color perspective,
+        IndexList* const active) {
+      if (FeatureType::kRefreshTrigger == trigger) {
+        FeatureType::AppendActiveIndices(pos, perspective, active);
+      }
+    }
+
+    // Get a list of indices for recently changed features
+    static void CollectChangedIndices(
+        const Position& pos, const TriggerEvent trigger, const Color perspective,
+        IndexList* const removed, IndexList* const added) {
+
+      if (FeatureType::kRefreshTrigger == trigger) {
+        FeatureType::AppendChangedIndices(pos, perspective, removed, added);
+      }
+    }
+
+    // Make the base class and the class template that recursively uses itself a friend
+    friend class FeatureSetBase<FeatureSet>;
+    template <typename... FeatureTypes>
+    friend class FeatureSet;
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // #ifndef NNUE_FEATURE_SET_H_INCLUDED
diff --git a/src/nnue/features/features_common.h b/src/nnue/features/features_common.h
new file mode 100644
index 00000000..d00a35df
--- /dev/null
+++ b/src/nnue/features/features_common.h
@@ -0,0 +1,45 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+//Common header of input features of NNUE evaluation function
+
+#ifndef NNUE_FEATURES_COMMON_H_INCLUDED
+#define NNUE_FEATURES_COMMON_H_INCLUDED
+
+#include "../../evaluate.h"
+#include "../nnue_common.h"
+
+namespace Eval::NNUE::Features {
+
+  class IndexList;
+
+  template <typename... FeatureTypes>
+  class FeatureSet;
+
+  // Trigger to perform full calculations instead of difference only
+  enum class TriggerEvent {
+    kFriendKingMoved // calculate full evaluation when own king moves
+  };
+
+  enum class Side {
+    kFriend // side to move
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // #ifndef NNUE_FEATURES_COMMON_H_INCLUDED
diff --git a/src/nnue/features/half_kp.cpp b/src/nnue/features/half_kp.cpp
new file mode 100644
index 00000000..628add6e
--- /dev/null
+++ b/src/nnue/features/half_kp.cpp
@@ -0,0 +1,92 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+//Definition of input features HalfKP of NNUE evaluation function
+
+#include "half_kp.h"
+#include "index_list.h"
+
+namespace Eval::NNUE::Features {
+
+  // Find the index of the feature quantity from the king position and PieceSquare
+  template <Side AssociatedKing>
+  inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, PieceSquare p) {
+    return static_cast<IndexType>(PS_END) * static_cast<IndexType>(sq_k) + p;
+  }
+
+  // Get pieces information
+  template <Side AssociatedKing>
+  inline void HalfKP<AssociatedKing>::GetPieces(
+      const Position& pos, Color perspective,
+      PieceSquare** pieces, Square* sq_target_k) {
+
+    *pieces = (perspective == BLACK) ?
+        pos.eval_list()->piece_list_fb() :
+        pos.eval_list()->piece_list_fw();
+    const PieceId target = (AssociatedKing == Side::kFriend) ?
+        static_cast<PieceId>(PIECE_ID_KING + perspective) :
+        static_cast<PieceId>(PIECE_ID_KING + ~perspective);
+    *sq_target_k = static_cast<Square>(((*pieces)[target] - PS_W_KING) % SQUARE_NB);
+  }
+
+  // Get a list of indices for active features
+  template <Side AssociatedKing>
+  void HalfKP<AssociatedKing>::AppendActiveIndices(
+      const Position& pos, Color perspective, IndexList* active) {
+
+    // Do nothing if array size is small to avoid compiler warning
+    if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
+
+    PieceSquare* pieces;
+    Square sq_target_k;
+    GetPieces(pos, perspective, &pieces, &sq_target_k);
+    for (PieceId i = PIECE_ID_ZERO; i < PIECE_ID_KING; ++i) {
+      if (pieces[i] != PS_NONE) {
+        active->push_back(MakeIndex(sq_target_k, pieces[i]));
+      }
+    }
+  }
+
+  // Get a list of indices for recently changed features
+  template <Side AssociatedKing>
+  void HalfKP<AssociatedKing>::AppendChangedIndices(
+      const Position& pos, Color perspective,
+      IndexList* removed, IndexList* added) {
+
+    PieceSquare* pieces;
+    Square sq_target_k;
+    GetPieces(pos, perspective, &pieces, &sq_target_k);
+    const auto& dp = pos.state()->dirtyPiece;
+    for (int i = 0; i < dp.dirty_num; ++i) {
+      if (dp.pieceId[i] >= PIECE_ID_KING) continue;
+      const auto old_p = static_cast<PieceSquare>(
+          dp.old_piece[i].from[perspective]);
+      if (old_p != PS_NONE) {
+        removed->push_back(MakeIndex(sq_target_k, old_p));
+      }
+      const auto new_p = static_cast<PieceSquare>(
+          dp.new_piece[i].from[perspective]);
+      if (new_p != PS_NONE) {
+        added->push_back(MakeIndex(sq_target_k, new_p));
+      }
+    }
+  }
+
+  template class HalfKP<Side::kFriend>;
+
+}  // namespace Eval::NNUE::Features
diff --git a/src/nnue/features/half_kp.h b/src/nnue/features/half_kp.h
new file mode 100644
index 00000000..99842eea
--- /dev/null
+++ b/src/nnue/features/half_kp.h
@@ -0,0 +1,67 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+//Definition of input features HalfKP of NNUE evaluation function
+
+#ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
+#define NNUE_FEATURES_HALF_KP_H_INCLUDED
+
+#include "../../evaluate.h"
+#include "features_common.h"
+
+namespace Eval::NNUE::Features {
+
+  // Feature HalfKP: Combination of the position of own king
+  // and the position of pieces other than kings
+  template <Side AssociatedKing>
+  class HalfKP {
+
+   public:
+    // Feature name
+    static constexpr const char* kName = "HalfKP(Friend)";
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t kHashValue =
+        0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
+    // Number of feature dimensions
+    static constexpr IndexType kDimensions =
+        static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
+    // Maximum number of simultaneously active features
+    static constexpr IndexType kMaxActiveDimensions = PIECE_ID_KING;
+    // Trigger for full calculation instead of difference calculation
+    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;
+
+    // Get a list of indices for active features
+    static void AppendActiveIndices(const Position& pos, Color perspective,
+                                    IndexList* active);
+
+    // Get a list of indices for recently changed features
+    static void AppendChangedIndices(const Position& pos, Color perspective,
+                                     IndexList* removed, IndexList* added);
+
+    // Index of a feature for a given king position and another piece on some square
+    static IndexType MakeIndex(Square sq_k, PieceSquare p);
+
+   private:
+    // Get pieces information
+    static void GetPieces(const Position& pos, Color perspective,
+                          PieceSquare** pieces, Square* sq_target_k);
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // #ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
diff --git a/src/nnue/features/index_list.h b/src/nnue/features/index_list.h
new file mode 100644
index 00000000..d9ad680a
--- /dev/null
+++ b/src/nnue/features/index_list.h
@@ -0,0 +1,64 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Definition of index list of input features
+
+#ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED
+#define NNUE_FEATURES_INDEX_LIST_H_INCLUDED
+
+#include "../../position.h"
+#include "../nnue_architecture.h"
+
+namespace Eval::NNUE::Features {
+
+  // Class template used for feature index list
+  template <typename T, std::size_t MaxSize>
+  class ValueList {
+
+   public:
+    std::size_t size() const { return size_; }
+    void resize(std::size_t size) { size_ = size; }
+    void push_back(const T& value) { values_[size_++] = value; }
+    T& operator[](std::size_t index) { return values_[index]; }
+    T* begin() { return values_; }
+    T* end() { return values_ + size_; }
+    const T& operator[](std::size_t index) const { return values_[index]; }
+    const T* begin() const { return values_; }
+    const T* end() const { return values_ + size_; }
+
+    void swap(ValueList& other) {
+      const std::size_t max_size = std::max(size_, other.size_);
+      for (std::size_t i = 0; i < max_size; ++i) {
+        std::swap(values_[i], other.values_[i]);
+      }
+      std::swap(size_, other.size_);
+    }
+
+   private:
+    T values_[MaxSize];
+    std::size_t size_ = 0;
+  };
+
+  //Type of feature index list
+  class IndexList
+      : public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // NNUE_FEATURES_INDEX_LIST_H_INCLUDED
diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h
new file mode 100644
index 00000000..7ac5a1c0
--- /dev/null
+++ b/src/nnue/layers/affine_transform.h
@@ -0,0 +1,260 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Definition of layer AffineTransform of NNUE evaluation function
+
+#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
+#define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
+
+#include <iostream>
+#include "../nnue_common.h"
+
+namespace Eval::NNUE::Layers {
+
+  // Affine transformation layer
+  template <typename PreviousLayer, IndexType OutputDimensions>
+  class AffineTransform {
+   public:
+    // Input/output type
+    using InputType = typename PreviousLayer::OutputType;
+    using OutputType = std::int32_t;
+    static_assert(std::is_same<InputType, std::uint8_t>::value, "");
+
+    // Number of input/output dimensions
+    static constexpr IndexType kInputDimensions =
+        PreviousLayer::kOutputDimensions;
+    static constexpr IndexType kOutputDimensions = OutputDimensions;
+    static constexpr IndexType kPaddedInputDimensions =
+        CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);
+
+    // Size of forward propagation buffer used in this layer
+    static constexpr std::size_t kSelfBufferSize =
+        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
+
+    // Size of the forward propagation buffer used from the input layer to this layer
+    static constexpr std::size_t kBufferSize =
+        PreviousLayer::kBufferSize + kSelfBufferSize;
+
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t GetHashValue() {
+      std::uint32_t hash_value = 0xCC03DAE4u;
+      hash_value += kOutputDimensions;
+      hash_value ^= PreviousLayer::GetHashValue() >> 1;
+      hash_value ^= PreviousLayer::GetHashValue() << 31;
+      return hash_value;
+    }
+
+   // Read network parameters
+    bool ReadParameters(std::istream& stream) {
+      if (!previous_layer_.ReadParameters(stream)) return false;
+      for (std::size_t i = 0; i < kOutputDimensions; ++i)
+        biases_[i] = read_little_endian<BiasType>(stream);
+      for (std::size_t i = 0; i < kOutputDimensions * kPaddedInputDimensions; ++i)
+        weights_[i] = read_little_endian<WeightType>(stream);
+      return !stream.fail();
+    }
+
+    // Forward propagation
+    const OutputType* Propagate(
+        const TransformedFeatureType* transformed_features, char* buffer) const {
+      const auto input = previous_layer_.Propagate(
+          transformed_features, buffer + kSelfBufferSize);
+      const auto output = reinterpret_cast<OutputType*>(buffer);
+
+  #if defined(USE_AVX512)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
+      const auto input_vector = reinterpret_cast<const __m512i*>(input);
+  #if !defined(USE_VNNI)
+      const __m512i kOnes = _mm512_set1_epi16(1);
+  #endif
+
+  #elif defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const __m256i kOnes = _mm256_set1_epi16(1);
+      const auto input_vector = reinterpret_cast<const __m256i*>(input);
+
+  #elif defined(USE_SSE2)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+  #ifndef USE_SSSE3
+      const __m128i kZeros = _mm_setzero_si128();
+  #else
+      const __m128i kOnes = _mm_set1_epi16(1);
+  #endif
+      const auto input_vector = reinterpret_cast<const __m128i*>(input);
+
+  #elif defined(USE_MMX)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const __m64 kZeros = _mm_setzero_si64();
+      const auto input_vector = reinterpret_cast<const __m64*>(input);
+
+  #elif defined(USE_NEON)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
+  #endif
+
+      for (IndexType i = 0; i < kOutputDimensions; ++i) {
+        const IndexType offset = i * kPaddedInputDimensions;
+
+  #if defined(USE_AVX512)
+        __m512i sum = _mm512_setzero_si512();
+        const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+  #if defined(USE_VNNI)
+            sum = _mm512_dpbusd_epi32(sum, _mm512_loadA_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
+  #else
+            __m512i product = _mm512_maddubs_epi16(_mm512_loadA_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
+            product = _mm512_madd_epi16(product, kOnes);
+            sum = _mm512_add_epi32(sum, product);
+  #endif
+        }
+
+        // Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks.
+        // As a result kPaddedInputDimensions may not be an even multiple of 64(512bit)
+        // and we have to do one more 256bit chunk.
+        if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2)
+        {
+            const auto iv256  = reinterpret_cast<const __m256i*>(&input_vector[kNumChunks]);
+            const auto row256 = reinterpret_cast<const __m256i*>(&row[kNumChunks]);
+  #if defined(USE_VNNI)
+            __m256i product256 = _mm256_dpbusd_epi32(
+                _mm512_castsi512_si256(sum), _mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0]));
+            sum = _mm512_inserti32x8(sum, product256, 0);
+  #else
+            __m256i product256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0]));
+            sum = _mm512_add_epi32(sum, _mm512_cvtepi16_epi32(product256));
+  #endif
+        }
+        output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
+
+  #elif defined(USE_AVX2)
+        __m256i sum = _mm256_setzero_si256();
+        const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m256i product = _mm256_maddubs_epi16(_mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j]));
+          product = _mm256_madd_epi16(product, kOnes);
+          sum = _mm256_add_epi32(sum, product);
+        }
+        __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
+        sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
+        sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
+        output[i] = _mm_cvtsi128_si32(sum128) + biases_[i];
+
+  #elif defined(USE_SSSE3)
+        __m128i sum = _mm_setzero_si128();
+        const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
+        for (int j = 0; j < (int)kNumChunks - 1; j += 2) {
+          __m128i product0 = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
+          product0 = _mm_madd_epi16(product0, kOnes);
+          sum = _mm_add_epi32(sum, product0);
+          __m128i product1 = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j+1]), _mm_load_si128(&row[j+1]));
+          product1 = _mm_madd_epi16(product1, kOnes);
+          sum = _mm_add_epi32(sum, product1);
+        }
+        if (kNumChunks & 0x1) {
+          __m128i product = _mm_maddubs_epi16(_mm_load_si128(&input_vector[kNumChunks-1]), _mm_load_si128(&row[kNumChunks-1]));
+          product = _mm_madd_epi16(product, kOnes);
+          sum = _mm_add_epi32(sum, product);
+        }
+        sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
+        sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
+        output[i] = _mm_cvtsi128_si32(sum) + biases_[i];
+
+  #elif defined(USE_SSE2)
+        __m128i sum_lo = _mm_cvtsi32_si128(biases_[i]);
+        __m128i sum_hi = kZeros;
+        const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m128i row_j = _mm_load_si128(&row[j]);
+          __m128i input_j = _mm_load_si128(&input_vector[j]);
+          __m128i row_signs = _mm_cmpgt_epi8(kZeros, row_j);
+          __m128i extended_row_lo = _mm_unpacklo_epi8(row_j, row_signs);
+          __m128i extended_row_hi = _mm_unpackhi_epi8(row_j, row_signs);
+          __m128i extended_input_lo = _mm_unpacklo_epi8(input_j, kZeros);
+          __m128i extended_input_hi = _mm_unpackhi_epi8(input_j, kZeros);
+          __m128i product_lo = _mm_madd_epi16(extended_row_lo, extended_input_lo);
+          __m128i product_hi = _mm_madd_epi16(extended_row_hi, extended_input_hi);
+          sum_lo = _mm_add_epi32(sum_lo, product_lo);
+          sum_hi = _mm_add_epi32(sum_hi, product_hi);
+        }
+        __m128i sum = _mm_add_epi32(sum_lo, sum_hi);
+        __m128i sum_high_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
+        sum = _mm_add_epi32(sum, sum_high_64);
+        __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2));
+        sum = _mm_add_epi32(sum, sum_second_32);
+        output[i] = _mm_cvtsi128_si32(sum);
+
+  #elif defined(USE_MMX)
+        __m64 sum_lo = _mm_cvtsi32_si64(biases_[i]);
+        __m64 sum_hi = kZeros;
+        const auto row = reinterpret_cast<const __m64*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m64 row_j = row[j];
+          __m64 input_j = input_vector[j];
+          __m64 row_signs = _mm_cmpgt_pi8(kZeros, row_j);
+          __m64 extended_row_lo = _mm_unpacklo_pi8(row_j, row_signs);
+          __m64 extended_row_hi = _mm_unpackhi_pi8(row_j, row_signs);
+          __m64 extended_input_lo = _mm_unpacklo_pi8(input_j, kZeros);
+          __m64 extended_input_hi = _mm_unpackhi_pi8(input_j, kZeros);
+          __m64 product_lo = _mm_madd_pi16(extended_row_lo, extended_input_lo);
+          __m64 product_hi = _mm_madd_pi16(extended_row_hi, extended_input_hi);
+          sum_lo = _mm_add_pi32(sum_lo, product_lo);
+          sum_hi = _mm_add_pi32(sum_hi, product_hi);
+        }
+        __m64 sum = _mm_add_pi32(sum_lo, sum_hi);
+        sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum));
+        output[i] = _mm_cvtsi64_si32(sum);
+
+  #elif defined(USE_NEON)
+        int32x4_t sum = {biases_[i]};
+        const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
+          product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
+          sum = vpadalq_s16(sum, product);
+        }
+        output[i] = sum[0] + sum[1] + sum[2] + sum[3];
+
+  #else
+        OutputType sum = biases_[i];
+        for (IndexType j = 0; j < kInputDimensions; ++j) {
+          sum += weights_[offset + j] * input[j];
+        }
+        output[i] = sum;
+  #endif
+
+      }
+  #if defined(USE_MMX)
+      _mm_empty();
+  #endif
+      return output;
+    }
+
+   private:
+    using BiasType = OutputType;
+    using WeightType = std::int8_t;
+
+    PreviousLayer previous_layer_;
+
+    alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
+    alignas(kCacheLineSize)
+        WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
+  };
+
+}  // namespace Eval::NNUE::Layers
+
+#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h
new file mode 100644
index 00000000..44d8a7de
--- /dev/null
+++ b/src/nnue/layers/clipped_relu.h
@@ -0,0 +1,166 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Definition of layer ClippedReLU of NNUE evaluation function
+
+#ifndef NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
+#define NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
+
+#include "../nnue_common.h"
+
+namespace Eval::NNUE::Layers {
+
+  // Clipped ReLU
+  template <typename PreviousLayer>
+  class ClippedReLU {
+   public:
+    // Input/output type
+    using InputType = typename PreviousLayer::OutputType;
+    using OutputType = std::uint8_t;
+    static_assert(std::is_same<InputType, std::int32_t>::value, "");
+
+    // Number of input/output dimensions
+    static constexpr IndexType kInputDimensions =
+        PreviousLayer::kOutputDimensions;
+    static constexpr IndexType kOutputDimensions = kInputDimensions;
+
+    // Size of forward propagation buffer used in this layer
+    static constexpr std::size_t kSelfBufferSize =
+        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
+
+    // Size of the forward propagation buffer used from the input layer to this layer
+    static constexpr std::size_t kBufferSize =
+        PreviousLayer::kBufferSize + kSelfBufferSize;
+
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t GetHashValue() {
+      std::uint32_t hash_value = 0x538D24C7u;
+      hash_value += PreviousLayer::GetHashValue();
+      return hash_value;
+    }
+
+    // Read network parameters
+    bool ReadParameters(std::istream& stream) {
+      return previous_layer_.ReadParameters(stream);
+    }
+
+    // Forward propagation
+    const OutputType* Propagate(
+        const TransformedFeatureType* transformed_features, char* buffer) const {
+      const auto input = previous_layer_.Propagate(
+          transformed_features, buffer + kSelfBufferSize);
+      const auto output = reinterpret_cast<OutputType*>(buffer);
+
+  #if defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
+      const __m256i kZero = _mm256_setzero_si256();
+      const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+      const auto in = reinterpret_cast<const __m256i*>(input);
+      const auto out = reinterpret_cast<__m256i*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
+            _mm256_loadA_si256(&in[i * 4 + 0]),
+            _mm256_loadA_si256(&in[i * 4 + 1])), kWeightScaleBits);
+        const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
+            _mm256_loadA_si256(&in[i * 4 + 2]),
+            _mm256_loadA_si256(&in[i * 4 + 3])), kWeightScaleBits);
+        _mm256_storeA_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
+            _mm256_packs_epi16(words0, words1), kZero), kOffsets));
+      }
+      constexpr IndexType kStart = kNumChunks * kSimdWidth;
+
+  #elif defined(USE_SSE2)
+      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
+
+  #ifdef USE_SSE41
+      const __m128i kZero = _mm_setzero_si128();
+  #else
+      const __m128i k0x80s = _mm_set1_epi8(-128);
+  #endif
+
+      const auto in = reinterpret_cast<const __m128i*>(input);
+      const auto out = reinterpret_cast<__m128i*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
+            _mm_load_si128(&in[i * 4 + 0]),
+            _mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
+        const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
+            _mm_load_si128(&in[i * 4 + 2]),
+            _mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
+        const __m128i packedbytes = _mm_packs_epi16(words0, words1);
+        _mm_store_si128(&out[i],
+
+  #ifdef USE_SSE41
+          _mm_max_epi8(packedbytes, kZero)
+  #else
+          _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
+  #endif
+
+        );
+      }
+      constexpr IndexType kStart = kNumChunks * kSimdWidth;
+
+  #elif defined(USE_MMX)
+      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
+      const __m64 k0x80s = _mm_set1_pi8(-128);
+      const auto in = reinterpret_cast<const __m64*>(input);
+      const auto out = reinterpret_cast<__m64*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        const __m64 words0 = _mm_srai_pi16(
+            _mm_packs_pi32(in[i * 4 + 0], in[i * 4 + 1]),
+            kWeightScaleBits);
+        const __m64 words1 = _mm_srai_pi16(
+            _mm_packs_pi32(in[i * 4 + 2], in[i * 4 + 3]),
+            kWeightScaleBits);
+        const __m64 packedbytes = _mm_packs_pi16(words0, words1);
+        out[i] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
+      }
+      _mm_empty();
+      constexpr IndexType kStart = kNumChunks * kSimdWidth;
+
+  #elif defined(USE_NEON)
+      constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
+      const int8x8_t kZero = {0};
+      const auto in = reinterpret_cast<const int32x4_t*>(input);
+      const auto out = reinterpret_cast<int8x8_t*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        int16x8_t shifted;
+        const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
+        pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
+        pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
+        out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
+      }
+      constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
+  #else
+      constexpr IndexType kStart = 0;
+  #endif
+
+      for (IndexType i = kStart; i < kInputDimensions; ++i) {
+        output[i] = static_cast<OutputType>(
+            std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
+      }
+      return output;
+    }
+
+   private:
+    PreviousLayer previous_layer_;
+  };
+
+}  // namespace Eval::NNUE::Layers
+
+#endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
diff --git a/src/nnue/layers/input_slice.h b/src/nnue/layers/input_slice.h
new file mode 100644
index 00000000..afca14c8
--- /dev/null
+++ b/src/nnue/layers/input_slice.h
@@ -0,0 +1,68 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// NNUE evaluation function layer InputSlice definition
+
+#ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
+#define NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
+
+#include "../nnue_common.h"
+
+namespace Eval::NNUE::Layers {
+
+// Input layer
+template <IndexType OutputDimensions, IndexType Offset = 0>
+class InputSlice {
+ public:
+  // Need to maintain alignment
+  static_assert(Offset % kMaxSimdWidth == 0, "");
+
+  // Output type
+  using OutputType = TransformedFeatureType;
+
+  // Output dimensionality
+  static constexpr IndexType kOutputDimensions = OutputDimensions;
+
+  // Size of forward propagation buffer used from the input layer to this layer
+  static constexpr std::size_t kBufferSize = 0;
+
+  // Hash value embedded in the evaluation file
+  static constexpr std::uint32_t GetHashValue() {
+    std::uint32_t hash_value = 0xEC42E90Du;
+    hash_value ^= kOutputDimensions ^ (Offset << 10);
+    return hash_value;
+  }
+
+  // Read network parameters
+  bool ReadParameters(std::istream& /*stream*/) {
+    return true;
+  }
+
+  // Forward propagation
+  const OutputType* Propagate(
+      const TransformedFeatureType* transformed_features,
+      char* /*buffer*/) const {
+    return transformed_features + Offset;
+  }
+
+ private:
+};
+
+}  // namespace Layers
+
+#endif // #ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h
new file mode 100644
index 00000000..69dfaad2
--- /dev/null
+++ b/src/nnue/nnue_accumulator.h
@@ -0,0 +1,39 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Class for difference calculation of NNUE evaluation function
+
+#ifndef NNUE_ACCUMULATOR_H_INCLUDED
+#define NNUE_ACCUMULATOR_H_INCLUDED
+
+#include "nnue_architecture.h"
+
+namespace Eval::NNUE {
+
+  // Class that holds the result of affine transformation of input features
+  struct alignas(kCacheLineSize) Accumulator {
+    std::int16_t
+        accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
+    Value score;
+    bool computed_accumulation;
+    bool computed_score;
+  };
+
+}  // namespace Eval::NNUE
+
+#endif // NNUE_ACCUMULATOR_H_INCLUDED
diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h
new file mode 100644
index 00000000..91cdc4bd
--- /dev/null
+++ b/src/nnue/nnue_architecture.h
@@ -0,0 +1,38 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Input features and network structure used in NNUE evaluation function
+
+#ifndef NNUE_ARCHITECTURE_H_INCLUDED
+#define NNUE_ARCHITECTURE_H_INCLUDED
+
+// Defines the network structure
+#include "architectures/halfkp_256x2-32-32.h"
+
+namespace Eval::NNUE {
+
+  static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
+  static_assert(Network::kOutputDimensions == 1, "");
+  static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
+
+  // Trigger for full calculation instead of difference calculation
+  constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_ARCHITECTURE_H_INCLUDED
diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h
new file mode 100644
index 00000000..4c93e3d1
--- /dev/null
+++ b/src/nnue/nnue_common.h
@@ -0,0 +1,127 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Constants used in NNUE evaluation function
+
+#ifndef NNUE_COMMON_H_INCLUDED
+#define NNUE_COMMON_H_INCLUDED
+
+#include <cstring>
+#include <iostream>
+
+#if defined(USE_AVX2)
+#include <immintrin.h>
+
+#elif defined(USE_SSE41)
+#include <smmintrin.h>
+
+#elif defined(USE_SSSE3)
+#include <tmmintrin.h>
+
+#elif defined(USE_SSE2)
+#include <emmintrin.h>
+
+#elif defined(USE_MMX)
+#include <mmintrin.h>
+
+#elif defined(USE_NEON)
+#include <arm_neon.h>
+#endif
+
+// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Otherwise a binary
+//       compiled with older g++ crashes because the output memory is not aligned
+//       even though alignas is specified.
+#if defined(USE_AVX2)
+#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32)
+#define _mm256_loadA_si256  _mm256_loadu_si256
+#define _mm256_storeA_si256 _mm256_storeu_si256
+#else
+#define _mm256_loadA_si256  _mm256_load_si256
+#define _mm256_storeA_si256 _mm256_store_si256
+#endif
+#endif
+
+#if defined(USE_AVX512)
+#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32)
+#define _mm512_loadA_si512   _mm512_loadu_si512
+#define _mm512_storeA_si512  _mm512_storeu_si512
+#else
+#define _mm512_loadA_si512   _mm512_load_si512
+#define _mm512_storeA_si512  _mm512_store_si512
+#endif
+#endif
+
+namespace Eval::NNUE {
+
+  // Version of the evaluation file
+  constexpr std::uint32_t kVersion = 0x7AF32F16u;
+
+  // Constant used in evaluation value calculation
+  constexpr int FV_SCALE = 16;
+  constexpr int kWeightScaleBits = 6;
+
+  // Size of cache line (in bytes)
+  constexpr std::size_t kCacheLineSize = 64;
+
+  // SIMD width (in bytes)
+  #if defined(USE_AVX2)
+  constexpr std::size_t kSimdWidth = 32;
+
+  #elif defined(USE_SSE2)
+  constexpr std::size_t kSimdWidth = 16;
+
+  #elif defined(USE_MMX)
+  constexpr std::size_t kSimdWidth = 8;
+
+  #elif defined(USE_NEON)
+  constexpr std::size_t kSimdWidth = 16;
+  #endif
+
+  constexpr std::size_t kMaxSimdWidth = 32;
+
+  // Type of input feature after conversion
+  using TransformedFeatureType = std::uint8_t;
+  using IndexType = std::uint32_t;
+
+  // Round n up to be a multiple of base
+  template <typename IntType>
+  constexpr IntType CeilToMultiple(IntType n, IntType base) {
+      return (n + base - 1) / base * base;
+  }
+
+  // read_little_endian() is our utility to read an integer (signed or unsigned, any size)
+  // from a stream in little-endian order. We swap the byte order after the read if
+  // necessary to return a result with the byte ordering of the compiling machine.
+  template <typename IntType>
+  inline IntType read_little_endian(std::istream& stream) {
+
+      IntType result;
+      std::uint8_t u[sizeof(IntType)];
+      typename std::make_unsigned<IntType>::type v = 0;
+
+      stream.read(reinterpret_cast<char*>(u), sizeof(IntType));
+      for (std::size_t i = 0; i < sizeof(IntType); ++i)
+          v = (v << 8) | u[sizeof(IntType) - i - 1];
+
+      std::memcpy(&result, &v, sizeof(IntType));
+      return result;
+  }
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_COMMON_H_INCLUDED
diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h
new file mode 100644
index 00000000..43707610
--- /dev/null
+++ b/src/nnue/nnue_feature_transformer.h
@@ -0,0 +1,378 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// A class that converts the input features of the NNUE evaluation function
+
+#ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
+#define NNUE_FEATURE_TRANSFORMER_H_INCLUDED
+
+#include "nnue_common.h"
+#include "nnue_architecture.h"
+#include "features/index_list.h"
+
+#include <cstring> // std::memset()
+
+namespace Eval::NNUE {
+
+  // Input feature converter
+  class FeatureTransformer {
+
+   private:
+    // Number of output dimensions for one side
+    static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
+
+   public:
+    // Output type
+    using OutputType = TransformedFeatureType;
+
+    // Number of input/output dimensions
+    static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
+    static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
+
+    // Size of forward propagation buffer
+    static constexpr std::size_t kBufferSize =
+        kOutputDimensions * sizeof(OutputType);
+
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t GetHashValue() {
+      return RawFeatures::kHashValue ^ kOutputDimensions;
+    }
+
+    // Read network parameters
+    bool ReadParameters(std::istream& stream) {
+      for (std::size_t i = 0; i < kHalfDimensions; ++i)
+        biases_[i] = read_little_endian<BiasType>(stream);
+      for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i)
+        weights_[i] = read_little_endian<WeightType>(stream);
+      return !stream.fail();
+    }
+
+    // Proceed with the difference calculation if possible
+    bool UpdateAccumulatorIfPossible(const Position& pos) const {
+      const auto now = pos.state();
+      if (now->accumulator.computed_accumulation) {
+        return true;
+      }
+      const auto prev = now->previous;
+      if (prev && prev->accumulator.computed_accumulation) {
+        UpdateAccumulator(pos);
+        return true;
+      }
+      return false;
+    }
+
+    // Convert input features
+    void Transform(const Position& pos, OutputType* output, bool refresh) const {
+      if (refresh || !UpdateAccumulatorIfPossible(pos)) {
+        RefreshAccumulator(pos);
+      }
+      const auto& accumulation = pos.state()->accumulator.accumulation;
+
+  #if defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
+      constexpr int kControl = 0b11011000;
+      const __m256i kZero = _mm256_setzero_si256();
+
+  #elif defined(USE_SSE2)
+      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
+
+  #ifdef USE_SSE41
+      const __m128i kZero = _mm_setzero_si128();
+  #else
+      const __m128i k0x80s = _mm_set1_epi8(-128);
+  #endif
+
+  #elif defined(USE_MMX)
+      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
+      const __m64 k0x80s = _mm_set1_pi8(-128);
+
+  #elif defined(USE_NEON)
+      constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+      const int8x8_t kZero = {0};
+  #endif
+
+      const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
+      for (IndexType p = 0; p < 2; ++p) {
+        const IndexType offset = kHalfDimensions * p;
+
+  #if defined(USE_AVX2)
+        auto out = reinterpret_cast<__m256i*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m256i sum0 = _mm256_loadA_si256(
+              &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
+          __m256i sum1 = _mm256_loadA_si256(
+            &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
+          _mm256_storeA_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
+              _mm256_packs_epi16(sum0, sum1), kZero), kControl));
+        }
+
+  #elif defined(USE_SSE2)
+        auto out = reinterpret_cast<__m128i*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 0]);
+          __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 1]);
+      const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
+
+          _mm_store_si128(&out[j],
+
+  #ifdef USE_SSE41
+            _mm_max_epi8(packedbytes, kZero)
+  #else
+            _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
+  #endif
+
+          );
+        }
+
+  #elif defined(USE_MMX)
+        auto out = reinterpret_cast<__m64*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m64 sum0 = *(&reinterpret_cast<const __m64*>(
+              accumulation[perspectives[p]][0])[j * 2 + 0]);
+          __m64 sum1 = *(&reinterpret_cast<const __m64*>(
+              accumulation[perspectives[p]][0])[j * 2 + 1]);
+          const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
+          out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
+        }
+
+  #elif defined(USE_NEON)
+        const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          int16x8_t sum = reinterpret_cast<const int16x8_t*>(
+              accumulation[perspectives[p]][0])[j];
+          out[j] = vmax_s8(vqmovn_s16(sum), kZero);
+        }
+
+  #else
+        for (IndexType j = 0; j < kHalfDimensions; ++j) {
+          BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
+          output[offset + j] = static_cast<OutputType>(
+              std::max<int>(0, std::min<int>(127, sum)));
+        }
+  #endif
+
+      }
+  #if defined(USE_MMX)
+      _mm_empty();
+  #endif
+    }
+
+   private:
+    // Calculate cumulative value without using difference calculation
+    void RefreshAccumulator(const Position& pos) const {
+      auto& accumulator = pos.state()->accumulator;
+      IndexType i = 0;
+      Features::IndexList active_indices[2];
+      RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
+                                       active_indices);
+      for (Color perspective : { WHITE, BLACK }) {
+        std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                   kHalfDimensions * sizeof(BiasType));
+        for (const auto index : active_indices[perspective]) {
+          const IndexType offset = kHalfDimensions * index;
+  #if defined(USE_AVX512)
+          auto accumulation = reinterpret_cast<__m512i*>(
+              &accumulator.accumulation[perspective][i][0]);
+          auto column = reinterpret_cast<const __m512i*>(&weights_[offset]);
+          constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
+          for (IndexType j = 0; j < kNumChunks; ++j)
+            _mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j]));
+
+  #elif defined(USE_AVX2)
+          auto accumulation = reinterpret_cast<__m256i*>(
+              &accumulator.accumulation[perspective][i][0]);
+          auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
+          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+          for (IndexType j = 0; j < kNumChunks; ++j)
+            _mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j]));
+
+  #elif defined(USE_SSE2)
+          auto accumulation = reinterpret_cast<__m128i*>(
+              &accumulator.accumulation[perspective][i][0]);
+          auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
+          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+          for (IndexType j = 0; j < kNumChunks; ++j)
+            accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
+
+  #elif defined(USE_MMX)
+          auto accumulation = reinterpret_cast<__m64*>(
+              &accumulator.accumulation[perspective][i][0]);
+          auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
+          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+          for (IndexType j = 0; j < kNumChunks; ++j) {
+            accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
+          }
+
+  #elif defined(USE_NEON)
+          auto accumulation = reinterpret_cast<int16x8_t*>(
+              &accumulator.accumulation[perspective][i][0]);
+          auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
+          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+          for (IndexType j = 0; j < kNumChunks; ++j)
+            accumulation[j] = vaddq_s16(accumulation[j], column[j]);
+
+  #else
+          for (IndexType j = 0; j < kHalfDimensions; ++j)
+            accumulator.accumulation[perspective][i][j] += weights_[offset + j];
+  #endif
+
+        }
+      }
+  #if defined(USE_MMX)
+      _mm_empty();
+  #endif
+
+      accumulator.computed_accumulation = true;
+      accumulator.computed_score = false;
+    }
+
+    // Calculate cumulative value using difference calculation
+    void UpdateAccumulator(const Position& pos) const {
+      const auto prev_accumulator = pos.state()->previous->accumulator;
+      auto& accumulator = pos.state()->accumulator;
+      IndexType i = 0;
+      Features::IndexList removed_indices[2], added_indices[2];
+      bool reset[2];
+      RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
+                                        removed_indices, added_indices, reset);
+      for (Color perspective : { WHITE, BLACK }) {
+
+  #if defined(USE_AVX2)
+        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+        auto accumulation = reinterpret_cast<__m256i*>(
+            &accumulator.accumulation[perspective][i][0]);
+
+  #elif defined(USE_SSE2)
+        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+        auto accumulation = reinterpret_cast<__m128i*>(
+            &accumulator.accumulation[perspective][i][0]);
+
+  #elif defined(USE_MMX)
+        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+        auto accumulation = reinterpret_cast<__m64*>(
+            &accumulator.accumulation[perspective][i][0]);
+
+  #elif defined(USE_NEON)
+        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+        auto accumulation = reinterpret_cast<int16x8_t*>(
+            &accumulator.accumulation[perspective][i][0]);
+  #endif
+
+        if (reset[perspective]) {
+          std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                      kHalfDimensions * sizeof(BiasType));
+        } else {
+          std::memcpy(accumulator.accumulation[perspective][i],
+                      prev_accumulator.accumulation[perspective][i],
+                      kHalfDimensions * sizeof(BiasType));
+          // Difference calculation for the deactivated features
+          for (const auto index : removed_indices[perspective]) {
+            const IndexType offset = kHalfDimensions * index;
+
+  #if defined(USE_AVX2)
+            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
+            }
+
+  #elif defined(USE_SSE2)
+            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
+            }
+
+  #elif defined(USE_MMX)
+            auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]);
+            }
+
+  #elif defined(USE_NEON)
+            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = vsubq_s16(accumulation[j], column[j]);
+            }
+
+  #else
+            for (IndexType j = 0; j < kHalfDimensions; ++j) {
+              accumulator.accumulation[perspective][i][j] -=
+                  weights_[offset + j];
+            }
+  #endif
+
+          }
+        }
+        { // Difference calculation for the activated features
+          for (const auto index : added_indices[perspective]) {
+            const IndexType offset = kHalfDimensions * index;
+
+  #if defined(USE_AVX2)
+            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
+            }
+
+  #elif defined(USE_SSE2)
+            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
+            }
+
+  #elif defined(USE_MMX)
+            auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
+            }
+
+  #elif defined(USE_NEON)
+            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = vaddq_s16(accumulation[j], column[j]);
+            }
+
+  #else
+            for (IndexType j = 0; j < kHalfDimensions; ++j) {
+              accumulator.accumulation[perspective][i][j] +=
+                  weights_[offset + j];
+            }
+  #endif
+
+          }
+        }
+      }
+  #if defined(USE_MMX)
+      _mm_empty();
+  #endif
+
+      accumulator.computed_accumulation = true;
+      accumulator.computed_score = false;
+    }
+
+    using BiasType = std::int16_t;
+    using WeightType = std::int16_t;
+
+    alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
+    alignas(kCacheLineSize)
+        WeightType weights_[kHalfDimensions * kInputDimensions];
+  };
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
diff --git a/src/pawns.cpp b/src/pawns.cpp
index d741b2ef..868d0c8e 100644
--- a/src/pawns.cpp
+++ b/src/pawns.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -32,16 +30,21 @@ namespace {
   #define S(mg, eg) make_score(mg, eg)
 
   // Pawn penalties
-  constexpr Score Backward      = S( 9, 24);
-  constexpr Score Doubled       = S(11, 56);
-  constexpr Score Isolated      = S( 5, 15);
-  constexpr Score WeakLever     = S( 0, 56);
-  constexpr Score WeakUnopposed = S(13, 27);
+  constexpr Score Backward      = S( 8, 27);
+  constexpr Score Doubled       = S(11, 55);
+  constexpr Score Isolated      = S( 5, 17);
+  constexpr Score WeakLever     = S( 2, 54);
+  constexpr Score WeakUnopposed = S(15, 25);
 
-  constexpr Score BlockedStorm[RANK_NB]  = {S( 0, 0), S( 0, 0), S( 76, 78), S(-10, 15), S(-7, 10), S(-4, 6), S(-1, 2)};
+  // Bonus for blocked pawns at 5th or 6th rank
+  constexpr Score BlockedPawn[2] = { S(-13, -4), S(-4, 3) };
+
+  constexpr Score BlockedStorm[RANK_NB] = {
+    S(0, 0), S(0, 0), S(76, 78), S(-10, 15), S(-7, 10), S(-4, 6), S(-1, 2)
+  };
 
   // Connected pawn bonus
-  constexpr int Connected[RANK_NB] = { 0, 7, 8, 12, 29, 48, 86 };
+  constexpr int Connected[RANK_NB] = { 0, 7, 8, 11, 24, 45, 85 };
 
   // Strength of pawn shelter for our king by [distance from edge][rank].
   // RANK_1 = 0 is used for files where we have no pawn, or pawn is behind our king.
@@ -143,7 +146,7 @@ namespace {
         // Score this pawn
         if (support | phalanx)
         {
-            int v =  Connected[r] * (4 + 2 * bool(phalanx) - 2 * bool(opposed) - bool(blocked)) / 2
+            int v =  Connected[r] * (2 + bool(phalanx) - bool(opposed))
                    + 21 * popcount(support);
 
             score += make_score(v, v * (r - 2) / 4);
@@ -167,6 +170,9 @@ namespace {
         if (!support)
             score -=  Doubled * doubled
                     + WeakLever * more_than_one(lever);
+
+        if (blocked && r > RANK_4)
+            score += BlockedPawn[r-4];
     }
 
     return score;
diff --git a/src/pawns.h b/src/pawns.h
index e6098069..5499826e 100644
--- a/src/pawns.h
+++ b/src/pawns.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/position.cpp b/src/position.cpp
index 471ef01f..46e5d78b 100644
--- a/src/position.cpp
+++ b/src/position.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -119,15 +117,7 @@ void Position::init() {
       Zobrist::enpassant[f] = rng.rand<Key>();
 
   for (int cr = NO_CASTLING; cr <= ANY_CASTLING; ++cr)
-  {
-      Zobrist::castling[cr] = 0;
-      Bitboard b = cr;
-      while (b)
-      {
-          Key k = Zobrist::castling[1ULL << pop_lsb(&b)];
-          Zobrist::castling[cr] ^= k ? k : rng.rand<Key>();
-      }
-  }
+      Zobrist::castling[cr] = rng.rand<Key>();
 
   Zobrist::side = rng.rand<Key>();
   Zobrist::noPawns = rng.rand<Key>();
@@ -186,9 +176,9 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
 
    4) En passant target square (in algebraic notation). If there's no en passant
       target square, this is "-". If a pawn has just made a 2-square move, this
-      is the position "behind" the pawn. This is recorded only if there is a pawn
-      in position to make an en passant capture, and if there really is a pawn
-      that might have advanced two squares.
+      is the position "behind" the pawn. Following X-FEN standard, this is recorded only
+      if there is a pawn in position to make an en passant capture, and if there really
+      is a pawn that might have advanced two squares.
 
    5) Halfmove clock. This is the number of halfmoves since the last pawn advance
       or capture. This is used to determine if a draw can be claimed under the
@@ -208,6 +198,9 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
   std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE);
   st = si;
 
+  // Each piece on board gets a unique ID used to track the piece later
+  PieceId piece_id, next_piece_id = PIECE_ID_ZERO;
+
   ss >> std::noskipws;
 
   // 1. Piece placement
@@ -221,7 +214,19 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
 
       else if ((idx = PieceToChar.find(token)) != string::npos)
       {
-          put_piece(Piece(idx), sq);
+          auto pc = Piece(idx);
+          put_piece(pc, sq);
+
+          if (Eval::useNNUE)
+          {
+              // Kings get a fixed ID, other pieces get ID in order of placement
+              piece_id =
+                (idx == W_KING) ? PIECE_ID_WKING :
+                (idx == B_KING) ? PIECE_ID_BKING :
+                next_piece_id++;
+              evalList.put_piece(piece_id, sq, pc);
+          }
+
           ++sq;
       }
   }
@@ -259,17 +264,25 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
       set_castling_right(c, rsq);
   }
 
-  // 4. En passant square. Ignore if no pawn capture is possible
+  // 4. En passant square.
+  // Ignore if square is invalid or not on side to move relative rank 6.
+  bool enpassant = false;
+
   if (   ((ss >> col) && (col >= 'a' && col <= 'h'))
-      && ((ss >> row) && (row == '3' || row == '6')))
+      && ((ss >> row) && (row == (sideToMove == WHITE ? '6' : '3'))))
   {
       st->epSquare = make_square(File(col - 'a'), Rank(row - '1'));
 
-      if (   !(attackers_to(st->epSquare) & pieces(sideToMove, PAWN))
-          || !(pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove))))
-          st->epSquare = SQ_NONE;
+      // En passant square will be considered only if
+      // a) side to move have a pawn threatening epSquare
+      // b) there is an enemy pawn in front of epSquare
+      // c) there is no piece on epSquare or behind epSquare
+      enpassant = pawn_attacks_bb(~sideToMove, st->epSquare) & pieces(sideToMove, PAWN)
+               && (pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove)))
+               && !(pieces() & (st->epSquare | (st->epSquare + pawn_push(sideToMove))));
   }
-  else
+
+  if (!enpassant)
       st->epSquare = SQ_NONE;
 
   // 5-6. Halfmove clock and fullmove number
@@ -705,6 +718,14 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
   ++st->rule50;
   ++st->pliesFromNull;
 
+  // Used by NNUE
+  st->accumulator.computed_accumulation = false;
+  st->accumulator.computed_score = false;
+  PieceId dp0 = PIECE_ID_NONE;
+  PieceId dp1 = PIECE_ID_NONE;
+  auto& dp = st->dirtyPiece;
+  dp.dirty_num = 1;
+
   Color us = sideToMove;
   Color them = ~us;
   Square from = from_sq(m);
@@ -752,6 +773,16 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
       else
           st->nonPawnMaterial[them] -= PieceValue[MG][captured];
 
+      if (Eval::useNNUE)
+      {
+          dp.dirty_num = 2; // 2 pieces moved
+          dp1 = piece_id_on(capsq);
+          dp.pieceId[1] = dp1;
+          dp.old_piece[1] = evalList.piece_with_id(dp1);
+          evalList.put_piece(dp1, capsq, NO_PIECE);
+          dp.new_piece[1] = evalList.piece_with_id(dp1);
+      }
+
       // Update board and piece lists
       remove_piece(capsq);
 
@@ -780,14 +811,25 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
   // Update castling rights if needed
   if (st->castlingRights && (castlingRightsMask[from] | castlingRightsMask[to]))
   {
-      int cr = castlingRightsMask[from] | castlingRightsMask[to];
-      k ^= Zobrist::castling[st->castlingRights & cr];
-      st->castlingRights &= ~cr;
+      k ^= Zobrist::castling[st->castlingRights];
+      st->castlingRights &= ~(castlingRightsMask[from] | castlingRightsMask[to]);
+      k ^= Zobrist::castling[st->castlingRights];
   }
 
   // Move the piece. The tricky Chess960 castling is handled earlier
   if (type_of(m) != CASTLING)
+  {
+      if (Eval::useNNUE)
+      {
+          dp0 = piece_id_on(from);
+          dp.pieceId[0] = dp0;
+          dp.old_piece[0] = evalList.piece_with_id(dp0);
+          evalList.put_piece(dp0, to, pc);
+          dp.new_piece[0] = evalList.piece_with_id(dp0);
+      }
+
       move_piece(from, to);
+  }
 
   // If the moving piece is a pawn do some special extra work
   if (type_of(pc) == PAWN)
@@ -810,6 +852,13 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
           remove_piece(to);
           put_piece(promotion, to);
 
+          if (Eval::useNNUE)
+          {
+              dp0 = piece_id_on(to);
+              evalList.put_piece(dp0, to, promotion);
+              dp.new_piece[0] = evalList.piece_with_id(dp0);
+          }
+
           // Update hash keys
           k ^= Zobrist::psq[pc][to] ^ Zobrist::psq[promotion][to];
           st->pawnKey ^= Zobrist::psq[pc][to];
@@ -901,6 +950,12 @@ void Position::undo_move(Move m) {
   {
       move_piece(to, from); // Put the piece back at the source square
 
+      if (Eval::useNNUE)
+      {
+          PieceId dp0 = st->dirtyPiece.pieceId[0];
+          evalList.put_piece(dp0, from, pc);
+      }
+
       if (st->capturedPiece)
       {
           Square capsq = to;
@@ -917,6 +972,14 @@ void Position::undo_move(Move m) {
           }
 
           put_piece(st->capturedPiece, capsq); // Restore the captured piece
+
+          if (Eval::useNNUE)
+          {
+              PieceId dp1 = st->dirtyPiece.pieceId[1];
+              assert(evalList.piece_with_id(dp1).from[WHITE] == PS_NONE);
+              assert(evalList.piece_with_id(dp1).from[BLACK] == PS_NONE);
+              evalList.put_piece(dp1, capsq, st->capturedPiece);
+          }
       }
   }
 
@@ -938,6 +1001,34 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ
   rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1);
   to = relative_square(us, kingSide ? SQ_G1 : SQ_C1);
 
+  if (Eval::useNNUE)
+  {
+      PieceId dp0, dp1;
+      auto& dp = st->dirtyPiece;
+      dp.dirty_num = 2; // 2 pieces moved
+
+      if (Do)
+      {
+          dp0 = piece_id_on(from);
+          dp1 = piece_id_on(rfrom);
+          dp.pieceId[0] = dp0;
+          dp.old_piece[0] = evalList.piece_with_id(dp0);
+          evalList.put_piece(dp0, to, make_piece(us, KING));
+          dp.new_piece[0] = evalList.piece_with_id(dp0);
+          dp.pieceId[1] = dp1;
+          dp.old_piece[1] = evalList.piece_with_id(dp1);
+          evalList.put_piece(dp1, rto, make_piece(us, ROOK));
+          dp.new_piece[1] = evalList.piece_with_id(dp1);
+      }
+      else
+      {
+          dp0 = piece_id_on(to);
+          dp1 = piece_id_on(rto);
+          evalList.put_piece(dp0, from, make_piece(us, KING));
+          evalList.put_piece(dp1, rfrom, make_piece(us, ROOK));
+      }
+  }
+
   // Remove both pieces first since squares could overlap in Chess960
   remove_piece(Do ? from : to);
   remove_piece(Do ? rfrom : rto);
@@ -955,7 +1046,14 @@ void Position::do_null_move(StateInfo& newSt) {
   assert(!checkers());
   assert(&newSt != st);
 
-  std::memcpy(&newSt, st, sizeof(StateInfo));
+  if (Eval::useNNUE)
+  {
+      std::memcpy(&newSt, st, sizeof(StateInfo));
+      st->accumulator.computed_score = false;
+  }
+  else
+      std::memcpy(&newSt, st, offsetof(StateInfo, accumulator));
+
   newSt.previous = st;
   st = &newSt;
 
diff --git a/src/position.h b/src/position.h
index 8cfa3920..a77050eb 100644
--- a/src/position.h
+++ b/src/position.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -27,8 +25,11 @@
 #include <string>
 
 #include "bitboard.h"
+#include "evaluate.h"
 #include "types.h"
 
+#include "nnue/nnue_accumulator.h"
+
 
 /// StateInfo struct stores information needed to restore a Position object to
 /// its previous state when we retract a move. Whenever a move is made on the
@@ -54,6 +55,10 @@ struct StateInfo {
   Bitboard   pinners[COLOR_NB];
   Bitboard   checkSquares[PIECE_TYPE_NB];
   int        repetition;
+
+  // Used by NNUE
+  Eval::NNUE::Accumulator accumulator;
+  DirtyPiece dirtyPiece;
 };
 
 
@@ -163,6 +168,10 @@ public:
   bool pos_is_ok() const;
   void flip();
 
+  // Used by NNUE
+  StateInfo* state() const;
+  const EvalList* eval_list() const;
+
 private:
   // Initialization helpers (used while setting up a position)
   void set_castling_right(Color c, Square rfrom);
@@ -176,6 +185,9 @@ private:
   template<bool Do>
   void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto);
 
+  // ID of a piece on a given square
+  PieceId piece_id_on(Square sq) const;
+
   // Data members
   Piece board[SQUARE_NB];
   Bitboard byTypeBB[PIECE_TYPE_NB];
@@ -192,6 +204,9 @@ private:
   Thread* thisThread;
   StateInfo* st;
   bool chess960;
+
+  // List of pieces used in NNUE evaluation function
+  EvalList evalList;
 };
 
 namespace PSQT {
@@ -426,4 +441,25 @@ inline void Position::do_move(Move m, StateInfo& newSt) {
   do_move(m, newSt, gives_check(m));
 }
 
+inline StateInfo* Position::state() const {
+
+  return st;
+}
+
+inline const EvalList* Position::eval_list() const {
+
+  return &evalList;
+}
+
+inline PieceId Position::piece_id_on(Square sq) const
+{
+
+  assert(piece_on(sq) != NO_PIECE);
+
+  PieceId pid = evalList.piece_id_list[sq];
+  assert(is_ok(pid));
+
+  return pid;
+}
+
 #endif // #ifndef POSITION_H_INCLUDED
diff --git a/src/psqt.cpp b/src/psqt.cpp
index c5da9785..eb36e75e 100644
--- a/src/psqt.cpp
+++ b/src/psqt.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -92,7 +90,7 @@ constexpr Score PBonus[RANK_NB][FILE_NB] =
    { S(  3,-10), S(  3, -6), S( 10, 10), S( 19,  0), S( 16, 14), S( 19,  7), S(  7, -5), S( -5,-19) },
    { S( -9,-10), S(-15,-10), S( 11,-10), S( 15,  4), S( 32,  4), S( 22,  3), S(  5, -6), S(-22, -4) },
    { S( -4,  6), S(-23, -2), S(  6, -8), S( 20, -4), S( 40,-13), S( 17,-12), S(  4,-10), S( -8, -9) },
-   { S( 13,  9), S(  0,  4), S(-13,  3), S(  1,-12), S( 11,-12), S( -2, -6), S(-13, 13), S(  5,  8) },
+   { S( 13, 10), S(  0,  5), S(-13,  4), S(  1, -5), S( 11, -5), S( -2, -5), S(-13, 14), S(  5,  9) },
    { S(  5, 28), S(-12, 20), S( -7, 21), S( 22, 28), S( -8, 30), S( -5,  7), S(-15,  6), S( -8, 13) },
    { S( -7,  0), S(  7,-11), S( -3, 12), S(-13, 21), S(  5, 25), S(-16, 19), S( 10,  4), S( -8,  7) }
   };
diff --git a/src/search.cpp b/src/search.cpp
index e036abcc..72bf924f 100644
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -66,9 +64,9 @@ namespace {
   constexpr uint64_t TtHitAverageResolution = 1024;
 
   // Razor and futility margins
-  constexpr int RazorMargin = 527;
+  constexpr int RazorMargin = 510;
   Value futility_margin(Depth d, bool improving) {
-    return Value(227 * (d - improving));
+    return Value(223 * (d - improving));
   }
 
   // Reductions lookup table, initialized at startup
@@ -76,7 +74,7 @@ namespace {
 
   Depth reduction(bool i, Depth d, int mn) {
     int r = Reductions[d] * Reductions[mn];
-    return (r + 570) / 1024 + (!i && r > 1018);
+    return (r + 509) / 1024 + (!i && r > 894);
   }
 
   constexpr int futility_move_count(bool improving, Depth depth) {
@@ -85,7 +83,7 @@ namespace {
 
   // History and stats update bonus, based on depth
   int stat_bonus(Depth d) {
-    return d > 15 ? 27 : 17 * d * d + 133 * d - 134;
+    return d > 13 ? 29 : 17 * d * d + 134 * d - 134;
   }
 
   // Add a small random component to draw evaluations to avoid 3fold-blindness
@@ -195,7 +193,7 @@ namespace {
 void Search::init() {
 
   for (int i = 1; i < MAX_MOVES; ++i)
-      Reductions[i] = int((24.8 + std::log(Threads.size())) * std::log(i));
+      Reductions[i] = int((22.0 + std::log(Threads.size())) * std::log(i));
 }
 
 
@@ -230,6 +228,8 @@ void MainThread::search() {
   Time.init(Limits, us, rootPos.game_ply());
   TT.new_search();
 
+  Eval::verify_NNUE();
+
   if (rootMoves.empty())
   {
       rootMoves.emplace_back(MOVE_NONE);
@@ -270,10 +270,10 @@ void MainThread::search() {
 
   Thread* bestThread = this;
 
-  if (int(Options["MultiPV"]) == 1 &&
-      !Limits.depth &&
-      !(Skill(Options["Skill Level"]).enabled() || int(Options["UCI_LimitStrength"])) &&
-      rootMoves[0].pv[0] != MOVE_NONE)
+  if (   int(Options["MultiPV"]) == 1
+      && !Limits.depth
+      && !(Skill(Options["Skill Level"]).enabled() || int(Options["UCI_LimitStrength"]))
+      && rootMoves[0].pv[0] != MOVE_NONE)
       bestThread = Threads.get_best_thread();
 
   // Prepare PVLine and ponder move
@@ -433,12 +433,12 @@ void Thread::search() {
           if (rootDepth >= 4)
           {
               Value prev = rootMoves[pvIdx].previousScore;
-              delta = Value(19);
+              delta = Value(17);
               alpha = std::max(prev - delta,-VALUE_INFINITE);
               beta  = std::min(prev + delta, VALUE_INFINITE);
 
               // Adjust contempt based on root move's previousScore (dynamic contempt)
-              int dct = ct + (110 - ct / 2) * prev / (abs(prev) + 140);
+              int dct = ct + (105 - ct / 2) * prev / (abs(prev) + 149);
 
               contempt = (us == WHITE ?  make_score(dct, dct / 2)
                                       : -make_score(dct, dct / 2));
@@ -543,13 +543,13 @@ void Thread::search() {
           && !Threads.stop
           && !mainThread->stopOnPonderhit)
       {
-          double fallingEval = (296 + 6 * (mainThread->bestPreviousScore - bestValue)
-                                    + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 725.0;
+          double fallingEval = (318 + 6 * (mainThread->bestPreviousScore - bestValue)
+                                    + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 825.0;
           fallingEval = Utility::clamp(fallingEval, 0.5, 1.5);
 
           // If the bestMove is stable over several iterations, reduce time accordingly
-          timeReduction = lastBestMoveDepth + 10 < completedDepth ? 1.92 : 0.95;
-          double reduction = (1.47 + mainThread->previousTimeReduction) / (2.22 * timeReduction);
+          timeReduction = lastBestMoveDepth + 9 < completedDepth ? 1.92 : 0.95;
+          double reduction = (1.47 + mainThread->previousTimeReduction) / (2.32 * timeReduction);
 
           // Use part of the gained time from a previous stable move for the current move
           for (Thread* th : Threads)
@@ -574,7 +574,7 @@ void Thread::search() {
           }
           else if (   Threads.increaseDepth
                    && !mainThread->ponder
-                   && Time.elapsed() > totalTime * 0.56)
+                   && Time.elapsed() > totalTime * 0.58)
                    Threads.increaseDepth = false;
           else
                    Threads.increaseDepth = true;
@@ -633,7 +633,7 @@ namespace {
     Key posKey;
     Move ttMove, move, excludedMove, bestMove;
     Depth extension, newDepth;
-    Value bestValue, value, ttValue, eval, maxValue;
+    Value bestValue, value, ttValue, eval, maxValue, probCutBeta;
     bool ttHit, ttPv, formerPv, givesCheck, improving, didLMR, priorCapture;
     bool captureOrPromotion, doFullDepthSearch, moveCountPruning,
          ttCapture, singularQuietLMR;
@@ -699,7 +699,7 @@ namespace {
     // search to overwrite a previous full search TT value, so we use a different
     // position key in case of an excluded move.
     excludedMove = ss->excludedMove;
-    posKey = pos.key() ^ (Key(excludedMove) << 48); // Isn't a very good hash
+    posKey = excludedMove == MOVE_NONE ? pos.key() : pos.key() ^ make_key(excludedMove);
     tte = TT.probe(posKey, ttHit);
     ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
     ttMove =  rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0]
@@ -707,7 +707,11 @@ namespace {
     ttPv = PvNode || (ttHit && tte->is_pv());
     formerPv = ttPv && !PvNode;
 
-    if (ttPv && depth > 12 && ss->ply - 1 < MAX_LPH && !priorCapture && is_ok((ss-1)->currentMove))
+    if (   ttPv
+        && depth > 12
+        && ss->ply - 1 < MAX_LPH
+        && !priorCapture
+        && is_ok((ss-1)->currentMove))
         thisThread->lowPlyHistory[ss->ply - 1][from_to((ss-1)->currentMove)] << stat_bonus(depth - 5);
 
     // thisThread->ttHitAverage can be used to approximate the running average of ttHit
@@ -828,11 +832,7 @@ namespace {
     else
     {
         if ((ss-1)->currentMove != MOVE_NULL)
-        {
-            int bonus = -(ss-1)->statScore / 512;
-
-            ss->staticEval = eval = evaluate(pos) + bonus;
-        }
+            ss->staticEval = eval = evaluate(pos);
         else
             ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo;
 
@@ -852,7 +852,7 @@ namespace {
 
     // Step 8. Futility pruning: child node (~50 Elo)
     if (   !PvNode
-        &&  depth < 6
+        &&  depth < 8
         &&  eval - futility_margin(depth, improving) >= beta
         &&  eval < VALUE_KNOWN_WIN) // Do not return unproven wins
         return eval;
@@ -860,10 +860,10 @@ namespace {
     // Step 9. Null move search with verification search (~40 Elo)
     if (   !PvNode
         && (ss-1)->currentMove != MOVE_NULL
-        && (ss-1)->statScore < 23824
+        && (ss-1)->statScore < 22977
         &&  eval >= beta
         &&  eval >= ss->staticEval
-        &&  ss->staticEval >= beta - 33 * depth - 33 * improving + 112 * ttPv + 311
+        &&  ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ttPv + 182
         && !excludedMove
         &&  pos.non_pawn_material(us)
         && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor))
@@ -871,7 +871,7 @@ namespace {
         assert(eval - beta >= 0);
 
         // Null move dynamic reduction based on depth and value
-        Depth R = (737 + 77 * depth) / 246 + std::min(int(eval - beta) / 192, 3);
+        Depth R = (817 + 71 * depth) / 213 + std::min(int(eval - beta) / 192, 3);
 
         ss->currentMove = MOVE_NULL;
         ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0];
@@ -907,23 +907,39 @@ namespace {
         }
     }
 
+    probCutBeta = beta + 176 - 49 * improving;
+
     // Step 10. ProbCut (~10 Elo)
     // If we have a good enough capture and a reduced search returns a value
     // much above beta, we can (almost) safely prune the previous move.
     if (   !PvNode
         &&  depth > 4
-        &&  abs(beta) < VALUE_TB_WIN_IN_MAX_PLY)
+        &&  abs(beta) < VALUE_TB_WIN_IN_MAX_PLY
+        // if value from transposition table is lower than probCutBeta, don't attempt probCut
+        // there and in further interactions with transposition table cutoff depth is set to depth - 3
+        // because probCut search has depth set to depth - 4 but we also do a move before it
+        // so effective depth is equal to depth - 3
+        && !(   ttHit 
+             && tte->depth() >= depth - 3 
+             && ttValue != VALUE_NONE
+             && ttValue < probCutBeta))
     {
-        Value raisedBeta = beta + 176 - 49 * improving;
-        assert(raisedBeta < VALUE_INFINITE);
-        MovePicker mp(pos, ttMove, raisedBeta - ss->staticEval, &captureHistory);
+        // if ttMove is a capture and value from transposition table is good enough produce probCut
+        // cutoff without digging into actual probCut search
+        if (   ttHit
+            && tte->depth() >= depth - 3
+            && ttValue != VALUE_NONE
+            && ttValue >= probCutBeta
+            && ttMove
+            && pos.capture_or_promotion(ttMove))
+            return probCutBeta;
+
+        assert(probCutBeta < VALUE_INFINITE);
+        MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory);
         int probCutCount = 0;
 
         while (   (move = mp.next_move()) != MOVE_NONE
-               && probCutCount < 2 + 2 * cutNode
-               && !(   move == ttMove
-                    && tte->depth() >= depth - 4
-                    && ttValue < raisedBeta))
+               && probCutCount < 2 + 2 * cutNode)
             if (move != excludedMove && pos.legal(move))
             {
                 assert(pos.capture_or_promotion(move));
@@ -941,16 +957,25 @@ namespace {
                 pos.do_move(move, st);
 
                 // Perform a preliminary qsearch to verify that the move holds
-                value = -qsearch<NonPV>(pos, ss+1, -raisedBeta, -raisedBeta+1);
+                value = -qsearch<NonPV>(pos, ss+1, -probCutBeta, -probCutBeta+1);
 
                 // If the qsearch held, perform the regular search
-                if (value >= raisedBeta)
-                    value = -search<NonPV>(pos, ss+1, -raisedBeta, -raisedBeta+1, depth - 4, !cutNode);
+                if (value >= probCutBeta)
+                    value = -search<NonPV>(pos, ss+1, -probCutBeta, -probCutBeta+1, depth - 4, !cutNode);
 
                 pos.undo_move(move);
 
-                if (value >= raisedBeta)
+                if (value >= probCutBeta)
+                {
+                    // if transposition table doesn't have equal or more deep info write probCut data into it
+                    if ( !(ttHit
+                       && tte->depth() >= depth - 3
+                       && ttValue != VALUE_NONE))
+                        tte->save(posKey, value_to_tt(value, ss->ply), ttPv,
+                            BOUND_LOWER,
+                            depth - 3, move, ss->staticEval);
                     return value;
+                }
             }
     }
 
@@ -1004,6 +1029,10 @@ moves_loop: // When in check, search starts from here
                                   thisThread->rootMoves.begin() + thisThread->pvLast, move))
           continue;
 
+      // Check for legality
+      if (!rootNode && !pos.legal(move))
+          continue;
+
       ss->moveCount = ++moveCount;
 
       if (rootNode && Cluster::is_root() && thisThread == Threads.main() && Time.elapsed() > 3000)
@@ -1042,17 +1071,17 @@ moves_loop: // When in check, search starts from here
                   continue;
 
               // Futility pruning: parent node (~5 Elo)
-              if (   lmrDepth < 6
+              if (   lmrDepth < 7
                   && !ss->inCheck
-                  && ss->staticEval + 284 + 188 * lmrDepth <= alpha
+                  && ss->staticEval + 283 + 170 * lmrDepth <= alpha
                   &&  (*contHist[0])[movedPiece][to_sq(move)]
                     + (*contHist[1])[movedPiece][to_sq(move)]
                     + (*contHist[3])[movedPiece][to_sq(move)]
-                    + (*contHist[5])[movedPiece][to_sq(move)] / 2 < 28388)
+                    + (*contHist[5])[movedPiece][to_sq(move)] / 2 < 27376)
                   continue;
 
               // Prune moves with negative SEE (~20 Elo)
-              if (!pos.see_ge(move, Value(-(29 - std::min(lmrDepth, 17)) * lmrDepth * lmrDepth)))
+              if (!pos.see_ge(move, Value(-(29 - std::min(lmrDepth, 18)) * lmrDepth * lmrDepth)))
                   continue;
           }
           else
@@ -1069,12 +1098,12 @@ moves_loop: // When in check, search starts from here
                   && !(PvNode && abs(bestValue) < 2)
                   && PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))]
                   && !ss->inCheck
-                  && ss->staticEval + 267 + 391 * lmrDepth
+                  && ss->staticEval + 169 + 244 * lmrDepth
                      + PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha)
                   continue;
 
               // See based pruning
-              if (!pos.see_ge(move, Value(-202) * depth)) // (~25 Elo)
+              if (!pos.see_ge(move, Value(-221) * depth)) // (~25 Elo)
                   continue;
           }
       }
@@ -1085,16 +1114,15 @@ moves_loop: // When in check, search starts from here
       // search of (alpha-s, beta-s), and just one fails high on (alpha, beta),
       // then that move is singular and should be extended. To verify this we do
       // a reduced search on all the other moves but the ttMove and if the
-      // result is lower than ttValue minus a margin then we will extend the ttMove.
-      if (    depth >= 6
+      // result is lower than ttValue minus a margin, then we will extend the ttMove.
+      if (    depth >= 7
           &&  move == ttMove
           && !rootNode
           && !excludedMove // Avoid recursive singular search
        /* &&  ttValue != VALUE_NONE Already implicit in the next condition */
           &&  abs(ttValue) < VALUE_KNOWN_WIN
           && (tte->bound() & BOUND_LOWER)
-          &&  tte->depth() >= depth - 3
-          &&  pos.legal(move))
+          &&  tte->depth() >= depth - 3)
       {
           Value singularBeta = ttValue - ((formerPv + 4) * depth) / 2;
           Depth singularDepth = (depth - 1 + 3 * formerPv) / 2;
@@ -1134,19 +1162,9 @@ moves_loop: // When in check, search starts from here
                && (pos.is_discovery_check_on_king(~us, move) || pos.see_ge(move)))
           extension = 1;
 
-      // Passed pawn extension
-      else if (   move == ss->killers[0]
-               && pos.advanced_pawn_push(move)
-               && pos.pawn_passed(us, to_sq(move)))
-          extension = 1;
-
-      // Last captures extension
-      else if (   PieceValue[EG][pos.captured_piece()] > PawnValueEg
-               && pos.non_pawn_material() <= 2 * RookValueMg)
-          extension = 1;
-
       // Castling extension
-      if (type_of(move) == CASTLING)
+      if (   type_of(move) == CASTLING
+          && popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 2)
           extension = 1;
 
       // Late irreversible move extension
@@ -1161,13 +1179,6 @@ moves_loop: // When in check, search starts from here
       // Speculative prefetch as early as possible
       prefetch(TT.first_entry(pos.key_after(move)));
 
-      // Check for legality just before making the move
-      if (!rootNode && !pos.legal(move))
-      {
-          ss->moveCount = --moveCount;
-          continue;
-      }
-
       // Update the current move (this must be done after singular extension search)
       ss->currentMove = move;
       ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck]
@@ -1181,18 +1192,25 @@ moves_loop: // When in check, search starts from here
       // Step 16. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be
       // re-searched at full depth.
       if (    depth >= 3
-          &&  moveCount > 1 + 2 * rootNode
+          &&  moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2)
           && (!rootNode || thisThread->best_move_count(move) == 0)
           && (  !captureOrPromotion
               || moveCountPruning
               || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha
               || cutNode
-              || thisThread->ttHitAverage < 415 * TtHitAverageResolution * TtHitAverageWindow / 1024))
+              || thisThread->ttHitAverage < 427 * TtHitAverageResolution * TtHitAverageWindow / 1024))
       {
           Depth r = reduction(improving, depth, moveCount);
 
+          // Decrease reduction at non-check cut nodes for second move at low depths
+          if (   cutNode
+              && depth <= 10
+              && moveCount <= 2
+              && !ss->inCheck)
+              r--;
+
           // Decrease reduction if the ttHit running average is large
-          if (thisThread->ttHitAverage > 473 * TtHitAverageResolution * TtHitAverageWindow / 1024)
+          if (thisThread->ttHitAverage > 509 * TtHitAverageResolution * TtHitAverageWindow / 1024)
               r--;
 
           // Reduction if other threads are searching this position
@@ -1235,17 +1253,17 @@ moves_loop: // When in check, search starts from here
                              + (*contHist[0])[movedPiece][to_sq(move)]
                              + (*contHist[1])[movedPiece][to_sq(move)]
                              + (*contHist[3])[movedPiece][to_sq(move)]
-                             - 4826;
+                             - 5287;
 
               // Decrease/increase reduction by comparing opponent's stat score (~10 Elo)
-              if (ss->statScore >= -100 && (ss-1)->statScore < -112)
+              if (ss->statScore >= -106 && (ss-1)->statScore < -104)
                   r--;
 
-              else if ((ss-1)->statScore >= -125 && ss->statScore < -138)
+              else if ((ss-1)->statScore >= -119 && ss->statScore < -140)
                   r++;
 
               // Decrease/increase reduction for moves with a good/bad history (~30 Elo)
-              r -= ss->statScore / 14615;
+              r -= ss->statScore / 14884;
           }
           else
           {
@@ -1255,7 +1273,7 @@ moves_loop: // When in check, search starts from here
 
             // Unless giving check, this capture is likely bad
             if (   !givesCheck
-                && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 211 * depth <= alpha)
+                && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha)
                 r++;
           }
 
@@ -1519,7 +1537,7 @@ moves_loop: // When in check, search starts from here
         if (PvNode && bestValue > alpha)
             alpha = bestValue;
 
-        futilityBase = bestValue + 141;
+        futilityBase = bestValue + 145;
     }
 
     const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory,
@@ -1528,8 +1546,8 @@ moves_loop: // When in check, search starts from here
 
     // Initialize a MovePicker object for the current position, and prepare
     // to search the moves. Because the depth is <= 0 here, only captures,
-    // queen promotions and checks (only if depth >= DEPTH_QS_CHECKS) will
-    // be generated.
+    // queen and checking knight promotions, and other checks(only if depth >= DEPTH_QS_CHECKS)
+    // will be generated.
     MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory,
                                       &thisThread->captureHistory,
                                       contHist,
@@ -1771,7 +1789,7 @@ moves_loop: // When in check, search starts from here
     }
 
     if (depth > 11 && ss->ply < MAX_LPH)
-        thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 6);
+        thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 7);
   }
 
   // When playing with strength handicap, choose best move among a set of RootMoves
@@ -1881,6 +1899,9 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) {
          << " multipv "  << i + 1
          << " score "    << UCI::value(v);
 
+      if (Options["UCI_ShowWDL"])
+          ss << UCI::wdl(v, pos.game_ply());
+
       if (!tb && i == pvIdx)
           ss << (v >= beta ? " lowerbound" : v <= alpha ? " upperbound" : "");
 
diff --git a/src/search.h b/src/search.h
index 0d4a1f76..601ccf82 100644
--- a/src/search.h
+++ b/src/search.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp
index 6f8fbc22..eefb173c 100644
--- a/src/syzygy/tbprobe.cpp
+++ b/src/syzygy/tbprobe.cpp
@@ -1,7 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (c) 2013 Ronald de Man
-  Copyright (C) 2016-2020 Marco Costalba, Lucas Braesch
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/syzygy/tbprobe.h b/src/syzygy/tbprobe.h
index df3ca4fe..b998989b 100644
--- a/src/syzygy/tbprobe.h
+++ b/src/syzygy/tbprobe.h
@@ -1,7 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (c) 2013 Ronald de Man
-  Copyright (C) 2016-2020 Marco Costalba, Lucas Braesch
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/thread.cpp b/src/thread.cpp
index 98f26613..2c1242ee 100644
--- a/src/thread.cpp
+++ b/src/thread.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -209,21 +207,18 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
 
   // We use Position::set() to set root position across threads. But there are
   // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot
-  // be deduced from a fen string, so set() clears them and to not lose the info
-  // we need to backup and later restore setupStates->back(). Note that setupStates
-  // is shared by threads but is accessed in read-only mode.
-  StateInfo tmp = setupStates->back();
-
+  // be deduced from a fen string, so set() clears them and they are set from
+  // setupStates->back() later. The rootState is per thread, earlier states are shared
+  // since they are read-only.
   for (Thread* th : *this)
   {
       th->nodes = th->tbHits = th->TTsaves = th->nmpMinPly = th->bestMoveChanges = 0;
       th->rootDepth = th->completedDepth = 0;
       th->rootMoves = rootMoves;
-      th->rootPos.set(pos.fen(), pos.is_chess960(), &setupStates->back(), th);
+      th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th);
+      th->rootState = setupStates->back();
   }
 
-  setupStates->back() = tmp;
-
   Cluster::signals_init();
 
   main()->start_searching();
diff --git a/src/thread.h b/src/thread.h
index 7792de73..6927c6c5 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -68,6 +66,7 @@ public:
   std::atomic<uint64_t> nodes, tbHits, TTsaves, bestMoveChanges;
 
   Position rootPos;
+  StateInfo rootState;
   Search::RootMoves rootMoves;
   Depth rootDepth, completedDepth;
   CounterMoveHistory counterMoves;
diff --git a/src/thread_win32_osx.h b/src/thread_win32_osx.h
index 0ef5c981..c4b55a48 100644
--- a/src/thread_win32_osx.h
+++ b/src/thread_win32_osx.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/timeman.cpp b/src/timeman.cpp
index 546eadd2..df4ba9b2 100644
--- a/src/timeman.cpp
+++ b/src/timeman.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/timeman.h b/src/timeman.h
index f225b482..96eecce1 100644
--- a/src/timeman.h
+++ b/src/timeman.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/tt.cpp b/src/tt.cpp
index 34590903..d494c27d 100644
--- a/src/tt.cpp
+++ b/src/tt.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/tt.h b/src/tt.h
index 3895e503..13a87112 100644
--- a/src/tt.h
+++ b/src/tt.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/tune.cpp b/src/tune.cpp
index c1b1c76b..e94f67f8 100644
--- a/src/tune.cpp
+++ b/src/tune.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/tune.h b/src/tune.h
index 27c3f961..1489fa32 100644
--- a/src/tune.h
+++ b/src/tune.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2017 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2018 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/types.h b/src/types.h
index 0c512f5b..73da41e2 100644
--- a/src/types.h
+++ b/src/types.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -180,7 +178,7 @@ enum Value : int {
   VALUE_MATE_IN_MAX_PLY  =  VALUE_MATE - MAX_PLY,
   VALUE_MATED_IN_MAX_PLY = -VALUE_MATE_IN_MAX_PLY,
 
-  PawnValueMg   = 124,   PawnValueEg   = 206,
+  PawnValueMg   = 126,   PawnValueEg   = 208,
   KnightValueMg = 781,   KnightValueEg = 854,
   BishopValueMg = 825,   BishopValueEg = 915,
   RookValueMg   = 1276,  RookValueEg   = 1380,
@@ -203,6 +201,22 @@ enum Piece {
   PIECE_NB = 16
 };
 
+// An ID used to track the pieces. Max. 32 pieces on board.
+enum PieceId {
+  PIECE_ID_ZERO   = 0,
+  PIECE_ID_KING   = 30,
+  PIECE_ID_WKING  = 30,
+  PIECE_ID_BKING  = 31,
+  PIECE_ID_NONE   = 32
+};
+
+inline PieceId operator++(PieceId& d, int) {
+
+  PieceId x = d;
+  d = PieceId(int(d) + 1);
+  return x;
+}
+
 constexpr Value PieceValue[PHASE_NB][PIECE_NB] = {
   { VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO,
     VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO },
@@ -232,7 +246,8 @@ enum Square : int {
   SQ_A8, SQ_B8, SQ_C8, SQ_D8, SQ_E8, SQ_F8, SQ_G8, SQ_H8,
   SQ_NONE,
 
-  SQUARE_NB = 64
+  SQUARE_ZERO = 0,
+  SQUARE_NB   = 64
 };
 
 enum Direction : int {
@@ -255,6 +270,94 @@ enum Rank : int {
   RANK_1, RANK_2, RANK_3, RANK_4, RANK_5, RANK_6, RANK_7, RANK_8, RANK_NB
 };
 
+// unique number for each piece type on each square
+enum PieceSquare : uint32_t {
+  PS_NONE     =  0,
+  PS_W_PAWN   =  1,
+  PS_B_PAWN   =  1 * SQUARE_NB + 1,
+  PS_W_KNIGHT =  2 * SQUARE_NB + 1,
+  PS_B_KNIGHT =  3 * SQUARE_NB + 1,
+  PS_W_BISHOP =  4 * SQUARE_NB + 1,
+  PS_B_BISHOP =  5 * SQUARE_NB + 1,
+  PS_W_ROOK   =  6 * SQUARE_NB + 1,
+  PS_B_ROOK   =  7 * SQUARE_NB + 1,
+  PS_W_QUEEN  =  8 * SQUARE_NB + 1,
+  PS_B_QUEEN  =  9 * SQUARE_NB + 1,
+  PS_W_KING   = 10 * SQUARE_NB + 1,
+  PS_END      = PS_W_KING, // pieces without kings (pawns included)
+  PS_B_KING   = 11 * SQUARE_NB + 1,
+  PS_END2     = 12 * SQUARE_NB + 1
+};
+
+struct ExtPieceSquare {
+  PieceSquare from[COLOR_NB];
+};
+
+// Array for finding the PieceSquare corresponding to the piece on the board
+extern ExtPieceSquare kpp_board_index[PIECE_NB];
+
+constexpr bool is_ok(PieceId pid);
+constexpr Square rotate180(Square sq);
+
+// Structure holding which tracked piece (PieceId) is where (PieceSquare)
+class EvalList {
+
+public:
+  // Max. number of pieces without kings is 30 but must be a multiple of 4 in AVX2
+  static const int MAX_LENGTH = 32;
+
+  // Array that holds the piece id for the pieces on the board
+  PieceId piece_id_list[SQUARE_NB];
+
+  // List of pieces, separate from White and Black POV
+  PieceSquare* piece_list_fw() const { return const_cast<PieceSquare*>(pieceListFw); }
+  PieceSquare* piece_list_fb() const { return const_cast<PieceSquare*>(pieceListFb); }
+
+  // Place the piece pc with piece_id on the square sq on the board
+  void put_piece(PieceId piece_id, Square sq, Piece pc)
+  {
+      assert(is_ok(piece_id));
+      if (pc != NO_PIECE)
+      {
+          pieceListFw[piece_id] = PieceSquare(kpp_board_index[pc].from[WHITE] + sq);
+          pieceListFb[piece_id] = PieceSquare(kpp_board_index[pc].from[BLACK] + rotate180(sq));
+          piece_id_list[sq] = piece_id;
+      }
+      else
+      {
+          pieceListFw[piece_id] = PS_NONE;
+          pieceListFb[piece_id] = PS_NONE;
+          piece_id_list[sq] = piece_id;
+      }
+  }
+
+  // Convert the specified piece_id piece to ExtPieceSquare type and return it
+  ExtPieceSquare piece_with_id(PieceId piece_id) const
+  {
+      ExtPieceSquare eps;
+      eps.from[WHITE] = pieceListFw[piece_id];
+      eps.from[BLACK] = pieceListFb[piece_id];
+      return eps;
+  }
+
+private:
+  PieceSquare pieceListFw[MAX_LENGTH];
+  PieceSquare pieceListFb[MAX_LENGTH];
+};
+
+// For differential evaluation of pieces that changed since last turn
+struct DirtyPiece {
+
+  // Number of changed pieces
+  int dirty_num;
+
+  // The ids of changed pieces, max. 2 pieces can change in one move
+  PieceId pieceId[2];
+
+  // What changed from the piece with that piece number
+  ExtPieceSquare old_piece[2];
+  ExtPieceSquare new_piece[2];
+};
 
 /// Score enum stores a middlegame and an endgame value in a single integer (enum).
 /// The least significant 16 bits are used to store the middlegame value and the
@@ -280,10 +383,10 @@ inline Value mg_value(Score s) {
 }
 
 #define ENABLE_BASE_OPERATORS_ON(T)                                \
-constexpr T operator+(T d1, int d2) { return T(int(d1) + d2); } \
-constexpr T operator-(T d1, int d2) { return T(int(d1) - d2); } \
+constexpr T operator+(T d1, int d2) { return T(int(d1) + d2); }    \
+constexpr T operator-(T d1, int d2) { return T(int(d1) - d2); }    \
 constexpr T operator-(T d) { return T(-int(d)); }                  \
-inline T& operator+=(T& d1, int d2) { return d1 = d1 + d2; }         \
+inline T& operator+=(T& d1, int d2) { return d1 = d1 + d2; }       \
 inline T& operator-=(T& d1, int d2) { return d1 = d1 - d2; }
 
 #define ENABLE_INCR_OPERATORS_ON(T)                                \
@@ -302,6 +405,9 @@ inline T& operator/=(T& d, int i) { return d = T(int(d) / i); }
 ENABLE_FULL_OPERATORS_ON(Value)
 ENABLE_FULL_OPERATORS_ON(Direction)
 
+ENABLE_INCR_OPERATORS_ON(Piece)
+ENABLE_INCR_OPERATORS_ON(PieceSquare)
+ENABLE_INCR_OPERATORS_ON(PieceId)
 ENABLE_INCR_OPERATORS_ON(PieceType)
 ENABLE_INCR_OPERATORS_ON(Square)
 ENABLE_INCR_OPERATORS_ON(File)
@@ -390,6 +496,10 @@ inline Color color_of(Piece pc) {
   return Color(pc >> 3);
 }
 
+constexpr bool is_ok(PieceId pid) {
+  return pid < PIECE_ID_NONE;
+}
+
 constexpr bool is_ok(Square s) {
   return s >= SQ_A1 && s <= SQ_H8;
 }
@@ -426,6 +536,11 @@ constexpr Square to_sq(Move m) {
   return Square(m & 0x3F);
 }
 
+// Return relative square when turning the board 180 degrees
+constexpr Square rotate180(Square sq) {
+  return (Square)(sq ^ 0x3F);
+}
+
 constexpr int from_to(Move m) {
  return m & 0xFFF;
 }
@@ -455,6 +570,11 @@ constexpr bool is_ok(Move m) {
   return from_sq(m) != to_sq(m); // Catch MOVE_NULL and MOVE_NONE
 }
 
+/// Based on a congruential pseudo random number generator
+constexpr Key make_key(uint64_t seed) {
+  return seed * 6364136223846793005ULL + 1442695040888963407ULL;
+}
+
 #endif // #ifndef TYPES_H_INCLUDED
 
 #include "tune.h" // Global visibility to tuning setup
diff --git a/src/uci.cpp b/src/uci.cpp
index f1e16a53..19981b35 100644
--- a/src/uci.cpp
+++ b/src/uci.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -19,6 +17,7 @@
 */
 
 #include <cassert>
+#include <cmath>
 #include <iostream>
 #include <sstream>
 #include <string>
@@ -78,6 +77,20 @@ namespace {
     }
   }
 
+  // trace_eval() prints the evaluation for the current position, consistent with the UCI
+  // options set so far.
+
+  void trace_eval(Position& pos) {
+
+    StateListPtr states(new std::deque<StateInfo>(1));
+    Position p;
+    p.set(pos.fen(), Options["UCI_Chess960"], &states->back(), Threads.main());
+
+    Eval::verify_NNUE();
+
+    sync_cout << "\n" << Eval::trace(p) << sync_endl;
+  }
+
 
   // setoption() is called when engine receives the "setoption" UCI command. The
   // function updates the UCI option ("name") to the given value ("value").
@@ -167,7 +180,7 @@ namespace {
                nodes += Threads.nodes_searched();
             }
             else if (Cluster::is_root())
-               sync_cout << "\n" << Eval::trace(pos) << sync_endl;
+               trace_eval(pos);
         }
         else if (token == "setoption")  setoption(is);
         else if (token == "position")   position(pos, is, states);
@@ -185,6 +198,28 @@ namespace {
              << "\nNodes/second    : " << 1000 * nodes / elapsed << endl;
   }
 
+  // The win rate model returns the probability (per mille) of winning given an eval
+  // and a game-ply. The model fits rather accurately the LTC fishtest statistics.
+  int win_rate_model(Value v, int ply) {
+
+     // The model captures only up to 240 plies, so limit input (and rescale)
+     double m = std::min(240, ply) / 64.0;
+
+     // Coefficients of a 3rd order polynomial fit based on fishtest data
+     // for two parameters needed to transform eval to the argument of a
+     // logistic function.
+     double as[] = {-8.24404295, 64.23892342, -95.73056462, 153.86478679};
+     double bs[] = {-3.37154371, 28.44489198, -56.67657741,  72.05858751};
+     double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3];
+     double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
+
+     // Transform eval to centipawns with limited range
+     double x = Utility::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0);
+
+     // Return win rate in per mille (rounded to nearest)
+     return int(0.5 + 1000 / (1 + std::exp((a - x) / b)));
+  }
+
 } // namespace
 
 
@@ -244,7 +279,7 @@ void UCI::loop(int argc, char* argv[]) {
       else if (token == "d" && Cluster::is_root())
           sync_cout << pos << sync_endl;
       else if (token == "eval" && Cluster::is_root())
-          sync_cout << Eval::trace(pos) << sync_endl;
+          trace_eval(pos);
       else if (token == "compiler" && Cluster::is_root())
           sync_cout << compiler_info() << sync_endl;
       else if (Cluster::is_root())
@@ -276,6 +311,22 @@ string UCI::value(Value v) {
 }
 
 
+/// UCI::wdl() report WDL statistics given an evaluation and a game ply, based on
+/// data gathered for fishtest LTC games.
+
+string UCI::wdl(Value v, int ply) {
+
+  stringstream ss;
+
+  int wdl_w = win_rate_model( v, ply);
+  int wdl_l = win_rate_model(-v, ply);
+  int wdl_d = 1000 - wdl_w - wdl_l;
+  ss << " wdl " << wdl_w << " " << wdl_d << " " << wdl_l;
+
+  return ss.str();
+}
+
+
 /// UCI::square() converts a Square to a string in algebraic notation (g1, a7, etc.)
 
 std::string UCI::square(Square s) {
diff --git a/src/uci.h b/src/uci.h
index b845889b..eb0b390b 100644
--- a/src/uci.h
+++ b/src/uci.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -73,6 +71,7 @@ std::string value(Value v);
 std::string square(Square s);
 std::string move(Move m, bool chess960);
 std::string pv(const Position& pos, Depth depth, Value alpha, Value beta);
+std::string wdl(Value v, int ply);
 Move to_move(const Position& pos, std::string& str);
 
 } // namespace UCI
diff --git a/src/ucioption.cpp b/src/ucioption.cpp
index c268c975..2b66a475 100644
--- a/src/ucioption.cpp
+++ b/src/ucioption.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -42,7 +40,8 @@ void on_hash_size(const Option& o) { TT.resize(size_t(o)); }
 void on_logger(const Option& o) { start_logger(o); }
 void on_threads(const Option& o) { Threads.set(size_t(o)); }
 void on_tb_path(const Option& o) { Tablebases::init(o); }
-
+void on_use_NNUE(const Option& ) { Eval::init_NNUE(); }
+void on_eval_file(const Option& ) { Eval::init_NNUE(); }
 
 /// Our case insensitive less() function as required by UCI protocol
 bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const {
@@ -74,10 +73,15 @@ void init(OptionsMap& o) {
   o["UCI_AnalyseMode"]       << Option(false);
   o["UCI_LimitStrength"]     << Option(false);
   o["UCI_Elo"]               << Option(1350, 1350, 2850);
+  o["UCI_ShowWDL"]           << Option(false);
   o["SyzygyPath"]            << Option("<empty>", on_tb_path);
   o["SyzygyProbeDepth"]      << Option(1, 1, 100);
   o["Syzygy50MoveRule"]      << Option(true);
   o["SyzygyProbeLimit"]      << Option(7, 0, 7);
+  o["Use NNUE"]              << Option(false, on_use_NNUE);
+  // The default must follow the format nn-[SHA256 first 12 digits].nnue
+  // for the build process (profile-build and fishtest) to work.
+  o["EvalFile"]              << Option("nn-82215d0fd0df.nnue", on_eval_file);
 }
 
 
diff --git a/tests/instrumented.sh b/tests/instrumented.sh
index ae6d5c4b..03ded74a 100755
--- a/tests/instrumented.sh
+++ b/tests/instrumented.sh
@@ -70,7 +70,7 @@ for args in "eval" \
             "go depth 10" \
             "go movetime 1000" \
             "go wtime 8000 btime 8000 winc 500 binc 500" \
-            "bench 128 $threads 10 default depth"
+            "bench 128 $threads 8 default depth"
 do
 
    echo "$prefix $exeprefix ./stockfish $args $postfix"
@@ -80,7 +80,7 @@ done
 
 # more general testing, following an uci protocol exchange
 cat << EOF > game.exp
- set timeout 10
+ set timeout 240
  spawn $exeprefix ./stockfish
 
  send "uci\n"
@@ -98,7 +98,7 @@ cat << EOF > game.exp
  expect "bestmove"
 
  send "position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1\n"
- send "go depth 30\n"
+ send "go depth 20\n"
  expect "bestmove"
 
  send "quit\n"
@@ -121,7 +121,7 @@ cat << EOF > syzygy.exp
  send "uci\n"
  send "setoption name SyzygyPath value ../tests/syzygy/\n"
  expect "info string Found 35 tablebases" {} timeout {exit 1}
- send "bench 128 1 10 default depth\n"
+ send "bench 128 1 8 default depth\n"
  send "quit\n"
  expect eof