mirror of
https://github.com/sockspls/badfish
synced 2025-04-30 16:53:09 +00:00
Add ARCH x86-64-bmi2 support
Intel Haswell and newer CPUs can calculate sliders attacks using special PEXT asm instructions instead of magic bitboards. This gives a +3% speed up. To enable it just compile with ARCH=x86-64-bmi2 No functional change.
This commit is contained in:
parent
da2f8880b9
commit
226bbc1e63
3 changed files with 28 additions and 6 deletions
28
src/Makefile
28
src/Makefile
|
@ -60,21 +60,22 @@ OBJS = benchmark.o bitbase.o bitboard.o book.o endgame.o evaluate.o main.o \
|
|||
# with GCC and ICC 64-bit)
|
||||
# popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt x86_64 asm-instruction
|
||||
# sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions
|
||||
# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction
|
||||
#
|
||||
# Note that Makefile is space sensitive, so when adding new architectures
|
||||
# or modifying existing flags, you have to make sure there are no extra spaces
|
||||
# at the end of the line for flag values.
|
||||
|
||||
### 2.1. General and architecture defaults
|
||||
debug = no
|
||||
optimize = yes
|
||||
|
||||
debug = no
|
||||
os = any
|
||||
bits = 32
|
||||
prefetch = no
|
||||
bsfq = no
|
||||
popcnt = no
|
||||
sse = no
|
||||
pext = no
|
||||
|
||||
### 2.2 Architecture specific
|
||||
|
||||
|
@ -114,6 +115,16 @@ ifeq ($(ARCH),x86-64-modern)
|
|||
sse = yes
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH),x86-64-bmi2)
|
||||
arch = x86_64
|
||||
bits = 64
|
||||
prefetch = yes
|
||||
bsfq = yes
|
||||
popcnt = yes
|
||||
sse = yes
|
||||
pext = yes
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH),armv7)
|
||||
arch = armv7
|
||||
prefetch = yes
|
||||
|
@ -310,7 +321,15 @@ ifeq ($(popcnt),yes)
|
|||
CXXFLAGS += -msse3 -DUSE_POPCNT
|
||||
endif
|
||||
|
||||
### 3.10 Link Time Optimization, it works since gcc 4.5 but not on mingw.
|
||||
### 3.10 pext
|
||||
ifeq ($(pext),yes)
|
||||
CXXFLAGS += -DUSE_PEXT
|
||||
ifeq ($(comp),$(filter $(comp),gcc clang mingw))
|
||||
CXXFLAGS += -mbmi2
|
||||
endif
|
||||
endif
|
||||
|
||||
### 3.11 Link Time Optimization, it works since gcc 4.5 but not on mingw.
|
||||
### This is a mix of compile and link time options because the lto link phase
|
||||
### needs access to the optimization flags.
|
||||
ifeq ($(comp),gcc)
|
||||
|
@ -350,6 +369,7 @@ help:
|
|||
@echo ""
|
||||
@echo "x86-64 > x86 64-bit"
|
||||
@echo "x86-64-modern > x86 64-bit with popcnt support"
|
||||
@echo "x86-64-bmi2 > x86 64-bit with pext support"
|
||||
@echo "x86-32 > x86 32-bit with SSE support"
|
||||
@echo "x86-32-old > x86 32-bit fall back for old hardware"
|
||||
@echo "linux-ppc-64 > PPC-Linux 64 bit"
|
||||
|
@ -448,6 +468,7 @@ config-sanity:
|
|||
@echo "bsfq: '$(bsfq)'"
|
||||
@echo "popcnt: '$(popcnt)'"
|
||||
@echo "sse: '$(sse)'"
|
||||
@echo "pext: '$(pext)'"
|
||||
@echo ""
|
||||
@echo "Flags:"
|
||||
@echo "CXX: $(CXX)"
|
||||
|
@ -466,6 +487,7 @@ config-sanity:
|
|||
@test "$(bsfq)" = "yes" || test "$(bsfq)" = "no"
|
||||
@test "$(popcnt)" = "yes" || test "$(popcnt)" = "no"
|
||||
@test "$(sse)" = "yes" || test "$(sse)" = "no"
|
||||
@test "$(pext)" = "yes" || test "$(pext)" = "no"
|
||||
@test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang"
|
||||
|
||||
$(EXE): $(OBJS)
|
||||
|
|
|
@ -51,8 +51,8 @@ const string engine_info(bool to_uci) {
|
|||
}
|
||||
|
||||
ss << (Is64Bit ? " 64" : "")
|
||||
<< (HasPopCnt ? " SSE4.2" : "")
|
||||
<< (to_uci ? "\nid author ": " by ")
|
||||
<< (HasPext ? " BMI2" : (HasPopCnt ? " SSE4.2" : ""))
|
||||
<< (to_uci ? "\nid author ": " by ")
|
||||
<< "Tord Romstad, Marco Costalba and Joona Kiiski";
|
||||
|
||||
return ss.str();
|
||||
|
|
|
@ -55,7 +55,7 @@
|
|||
#endif
|
||||
|
||||
#if defined(USE_PEXT)
|
||||
# include <x86intrin.h> // Gcc header for _pext_u64() intrinsic
|
||||
# include <immintrin.h> // Header for _pext_u64() intrinsic
|
||||
#else
|
||||
# define _pext_u64(b, m) (0)
|
||||
#endif
|
||||
|
|
Loading…
Add table
Reference in a new issue