mirror of
https://github.com/sockspls/badfish
synced 2025-04-30 00:33:09 +00:00
Remove custom mutex implementation
As part of the investigation of the hang caused by an incorrect implementation of condition_variable in libwinpthread, it was realized that our custom Mutex implementation is no longer needed. Prior to lazySMP this custom implementation resulted in a 30% speedup, but now no speed difference can be measured as no mutex is used on the hot path in lazySMP. https://github.com/official-stockfish/Stockfish/issues/2291 https://github.com/official-stockfish/Stockfish/issues/2309#issuecomment-533733393 https://github.com/official-stockfish/Stockfish/issues/2309#issuecomment-533737515 The interest of this patch is that it removes platform-specific code, which is always less tested. No functional change.
This commit is contained in:
parent
8726beba59
commit
d703d2b5e7
5 changed files with 16 additions and 61 deletions
|
@ -168,7 +168,7 @@ void dbg_print() {
|
|||
|
||||
std::ostream& operator<<(std::ostream& os, SyncCout sc) {
|
||||
|
||||
static Mutex m;
|
||||
static std::mutex m;
|
||||
|
||||
if (sc == IO_LOCK)
|
||||
m.lock();
|
||||
|
|
|
@ -27,12 +27,12 @@
|
|||
#include <list>
|
||||
#include <sstream>
|
||||
#include <type_traits>
|
||||
#include <mutex>
|
||||
|
||||
#include "../bitboard.h"
|
||||
#include "../movegen.h"
|
||||
#include "../position.h"
|
||||
#include "../search.h"
|
||||
#include "../thread_win32_osx.h"
|
||||
#include "../types.h"
|
||||
#include "../uci.h"
|
||||
|
||||
|
@ -45,7 +45,9 @@
|
|||
#include <sys/stat.h>
|
||||
#else
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define NOMINMAX
|
||||
#ifndef NOMINMAX
|
||||
# define NOMINMAX // Disable macros min() and max()
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
|
@ -1124,14 +1126,14 @@ void set(T& e, uint8_t* data) {
|
|||
template<TBType Type>
|
||||
void* mapped(TBTable<Type>& e, const Position& pos) {
|
||||
|
||||
static Mutex mutex;
|
||||
static std::mutex mutex;
|
||||
|
||||
// Use 'acquire' to avoid a thread reading 'ready' == true while
|
||||
// another is still working. (compiler reordering may cause this).
|
||||
if (e.ready.load(std::memory_order_acquire))
|
||||
return e.baseAddress; // Could be nullptr if file does not exist
|
||||
|
||||
std::unique_lock<Mutex> lk(mutex);
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
|
||||
if (e.ready.load(std::memory_order_relaxed)) // Recheck under lock
|
||||
return e.baseAddress;
|
||||
|
|
|
@ -81,7 +81,7 @@ void Thread::clear() {
|
|||
|
||||
void Thread::start_searching() {
|
||||
|
||||
std::lock_guard<Mutex> lk(mutex);
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
searching = true;
|
||||
cv.notify_one(); // Wake up the thread in idle_loop()
|
||||
}
|
||||
|
@ -92,7 +92,7 @@ void Thread::start_searching() {
|
|||
|
||||
void Thread::wait_for_search_finished() {
|
||||
|
||||
std::unique_lock<Mutex> lk(mutex);
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
cv.wait(lk, [&]{ return !searching; });
|
||||
}
|
||||
|
||||
|
@ -112,7 +112,7 @@ void Thread::idle_loop() {
|
|||
|
||||
while (true)
|
||||
{
|
||||
std::unique_lock<Mutex> lk(mutex);
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
searching = false;
|
||||
cv.notify_one(); // Wake up anyone waiting for search finished
|
||||
cv.wait(lk, [&]{ return searching; });
|
||||
|
|
|
@ -42,8 +42,8 @@
|
|||
|
||||
class Thread {
|
||||
|
||||
Mutex mutex;
|
||||
ConditionVariable cv;
|
||||
std::mutex mutex;
|
||||
std::condition_variable cv;
|
||||
size_t idx;
|
||||
bool exit = false, searching = true; // Set before starting std::thread
|
||||
NativeThread stdThread;
|
||||
|
|
|
@ -21,60 +21,13 @@
|
|||
#ifndef THREAD_WIN32_OSX_H_INCLUDED
|
||||
#define THREAD_WIN32_OSX_H_INCLUDED
|
||||
|
||||
/// STL thread library used by mingw and gcc when cross compiling for Windows
|
||||
/// relies on libwinpthread. Currently libwinpthread implements mutexes directly
|
||||
/// on top of Windows semaphores. Semaphores, being kernel objects, require kernel
|
||||
/// mode transition in order to lock or unlock, which is very slow compared to
|
||||
/// interlocked operations (about 30% slower on bench test). To work around this
|
||||
/// issue, we define our wrappers to the low level Win32 calls. We use critical
|
||||
/// sections to support Windows XP and older versions. Unfortunately, cond_wait()
|
||||
/// is racy between unlock() and WaitForSingleObject() but they have the same
|
||||
/// speed performance as the SRW locks.
|
||||
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
#if defined(_WIN32) && !defined(_MSC_VER)
|
||||
|
||||
#ifndef NOMINMAX
|
||||
# define NOMINMAX // Disable macros min() and max()
|
||||
#endif
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#undef WIN32_LEAN_AND_MEAN
|
||||
#undef NOMINMAX
|
||||
|
||||
/// Mutex and ConditionVariable struct are wrappers of the low level locking
|
||||
/// machinery and are modeled after the corresponding C++11 classes.
|
||||
|
||||
struct Mutex {
|
||||
Mutex() { InitializeCriticalSection(&cs); }
|
||||
~Mutex() { DeleteCriticalSection(&cs); }
|
||||
void lock() { EnterCriticalSection(&cs); }
|
||||
void unlock() { LeaveCriticalSection(&cs); }
|
||||
|
||||
private:
|
||||
CRITICAL_SECTION cs;
|
||||
};
|
||||
|
||||
typedef std::condition_variable_any ConditionVariable;
|
||||
|
||||
#else // Default case: use STL classes
|
||||
|
||||
typedef std::mutex Mutex;
|
||||
typedef std::condition_variable ConditionVariable;
|
||||
|
||||
#endif
|
||||
|
||||
/// On OSX threads other than the main thread are created with a reduced stack
|
||||
/// size of 512KB by default, this is dangerously low for deep searches, so
|
||||
/// adjust it to TH_STACK_SIZE. The implementation calls pthread_create() with
|
||||
/// proper stack size parameter.
|
||||
|
||||
/// On toolchains where pthread is always available, ensure minimum stack size
|
||||
/// instead of relying on linker defaults which may be platform-specific.
|
||||
/// size of 512KB by default, this is too low for deep searches, which require
|
||||
/// somewhat more than 1MB stack, so adjust it to TH_STACK_SIZE.
|
||||
/// The implementation calls pthread_create() with the stack size parameter
|
||||
/// equal to the linux 8MB default, on platforms that support it.
|
||||
|
||||
#if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__)
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue