mirror of
https://github.com/sockspls/badfish
synced 2025-04-29 16:23:09 +00:00

Previously, we had two type aliases, LargePagePtr and AlignedPtr, which required manually initializing the aligned memory for the pointer. The new helpers: - make_unique_aligned - make_unique_large_page are now available for allocating aligned memory (with large pages). They behave similarly to std::make_unique, ensuring objects allocated with these functions follow RAII. The old approach had issues with initializing non-trivial types or arrays of objects. The evaluation function of the network is now a unique pointer to an array instead of an array of unique pointers. Memory related functions have been moved into memory.h Passed High Hash Pressure Test Non-Regression STC: https://tests.stockfishchess.org/tests/view/665b2b36586058766677cfd2 LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 476992 W: 122426 L: 122677 D: 231889 Ptnml(0-2): 1145, 51027, 134419, 50744, 1161 Failed Normal Non-Regression STC: https://tests.stockfishchess.org/tests/view/665b2997586058766677cfc8 LLR: -2.94 (-2.94,2.94) <-1.75,0.25> Total: 877312 W: 225233 L: 226395 D: 425684 Ptnml(0-2): 2110, 94642, 246239, 93630, 2035 Probably a fluke since there shouldn't be a real slowndown and it has also passed the high hash pressure test. closes https://github.com/official-stockfish/Stockfish/pull/5332 No functional change
229 lines
7.1 KiB
C++
229 lines
7.1 KiB
C++
/*
|
|
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
|
Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file)
|
|
|
|
Stockfish is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
Stockfish is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "memory.h"
|
|
|
|
#include <cstdlib>
|
|
|
|
#if __has_include("features.h")
|
|
#include <features.h>
|
|
#endif
|
|
|
|
#if defined(__linux__) && !defined(__ANDROID__)
|
|
#include <sys/mman.h>
|
|
#endif
|
|
|
|
#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) \
|
|
|| (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) \
|
|
|| defined(__e2k__)
|
|
#define POSIXALIGNEDALLOC
|
|
#include <stdlib.h>
|
|
#endif
|
|
|
|
#ifdef _WIN32
|
|
#if _WIN32_WINNT < 0x0601
|
|
#undef _WIN32_WINNT
|
|
#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes
|
|
#endif
|
|
|
|
#ifndef NOMINMAX
|
|
#define NOMINMAX
|
|
#endif
|
|
|
|
#include <ios> // std::hex, std::dec
|
|
#include <iostream> // std::cerr
|
|
#include <ostream> // std::endl
|
|
#include <windows.h>
|
|
// The needed Windows API for processor groups could be missed from old Windows
|
|
// versions, so instead of calling them directly (forcing the linker to resolve
|
|
// the calls at compile time), try to load them at runtime. To do this we need
|
|
// first to define the corresponding function pointers.
|
|
extern "C" {
|
|
using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE);
|
|
using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID);
|
|
using AdjustTokenPrivileges_t =
|
|
bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
|
|
}
|
|
#endif
|
|
|
|
|
|
namespace Stockfish {
|
|
|
|
// Wrapper for systems where the c++17 implementation
|
|
// does not guarantee the availability of aligned_alloc(). Memory allocated with
|
|
// std_aligned_alloc() must be freed with std_aligned_free().
|
|
void* std_aligned_alloc(size_t alignment, size_t size) {
|
|
// Apple requires 10.15, which is enforced in the makefile
|
|
#if defined(_ISOC11_SOURCE) || defined(__APPLE__)
|
|
return aligned_alloc(alignment, size);
|
|
#elif defined(POSIXALIGNEDALLOC)
|
|
void* mem;
|
|
return posix_memalign(&mem, alignment, size) ? nullptr : mem;
|
|
#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
|
|
return _mm_malloc(size, alignment);
|
|
#elif defined(_WIN32)
|
|
return _aligned_malloc(size, alignment);
|
|
#else
|
|
return std::aligned_alloc(alignment, size);
|
|
#endif
|
|
}
|
|
|
|
void std_aligned_free(void* ptr) {
|
|
|
|
#if defined(POSIXALIGNEDALLOC)
|
|
free(ptr);
|
|
#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
|
|
_mm_free(ptr);
|
|
#elif defined(_WIN32)
|
|
_aligned_free(ptr);
|
|
#else
|
|
free(ptr);
|
|
#endif
|
|
}
|
|
|
|
// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages.
|
|
|
|
#if defined(_WIN32)
|
|
|
|
static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) {
|
|
|
|
#if !defined(_WIN64)
|
|
return nullptr;
|
|
#else
|
|
|
|
HANDLE hProcessToken{};
|
|
LUID luid{};
|
|
void* mem = nullptr;
|
|
|
|
const size_t largePageSize = GetLargePageMinimum();
|
|
if (!largePageSize)
|
|
return nullptr;
|
|
|
|
// Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges
|
|
|
|
HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll"));
|
|
|
|
if (!hAdvapi32)
|
|
hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));
|
|
|
|
auto OpenProcessToken_f =
|
|
OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
|
|
if (!OpenProcessToken_f)
|
|
return nullptr;
|
|
auto LookupPrivilegeValueA_f =
|
|
LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
|
|
if (!LookupPrivilegeValueA_f)
|
|
return nullptr;
|
|
auto AdjustTokenPrivileges_f =
|
|
AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
|
|
if (!AdjustTokenPrivileges_f)
|
|
return nullptr;
|
|
|
|
// We need SeLockMemoryPrivilege, so try to enable it for the process
|
|
if (!OpenProcessToken_f( // OpenProcessToken()
|
|
GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
|
|
return nullptr;
|
|
|
|
if (LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid))
|
|
{
|
|
TOKEN_PRIVILEGES tp{};
|
|
TOKEN_PRIVILEGES prevTp{};
|
|
DWORD prevTpLen = 0;
|
|
|
|
tp.PrivilegeCount = 1;
|
|
tp.Privileges[0].Luid = luid;
|
|
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
|
|
|
|
// Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds,
|
|
// we still need to query GetLastError() to ensure that the privileges were actually obtained.
|
|
if (AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp,
|
|
&prevTpLen)
|
|
&& GetLastError() == ERROR_SUCCESS)
|
|
{
|
|
// Round up size to full pages and allocate
|
|
allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
|
|
mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
|
|
PAGE_READWRITE);
|
|
|
|
// Privilege no longer needed, restore previous state
|
|
AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
|
|
}
|
|
}
|
|
|
|
CloseHandle(hProcessToken);
|
|
|
|
return mem;
|
|
|
|
#endif
|
|
}
|
|
|
|
void* aligned_large_pages_alloc(size_t allocSize) {
|
|
|
|
// Try to allocate large pages
|
|
void* mem = aligned_large_pages_alloc_windows(allocSize);
|
|
|
|
// Fall back to regular, page-aligned, allocation if necessary
|
|
if (!mem)
|
|
mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
|
|
|
|
return mem;
|
|
}
|
|
|
|
#else
|
|
|
|
void* aligned_large_pages_alloc(size_t allocSize) {
|
|
|
|
#if defined(__linux__)
|
|
constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size
|
|
#else
|
|
constexpr size_t alignment = 4096; // assumed small page size
|
|
#endif
|
|
|
|
// Round up to multiples of alignment
|
|
size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
|
|
void* mem = std_aligned_alloc(alignment, size);
|
|
#if defined(MADV_HUGEPAGE)
|
|
madvise(mem, size, MADV_HUGEPAGE);
|
|
#endif
|
|
return mem;
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
// aligned_large_pages_free() will free the previously allocated ttmem
|
|
|
|
#if defined(_WIN32)
|
|
|
|
void aligned_large_pages_free(void* mem) {
|
|
|
|
if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
|
|
{
|
|
DWORD err = GetLastError();
|
|
std::cerr << "Failed to free large page memory. Error code: 0x" << std::hex << err
|
|
<< std::dec << std::endl;
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
#else
|
|
|
|
void aligned_large_pages_free(void* mem) { std_aligned_free(mem); }
|
|
|
|
#endif
|
|
} // namespace Stockfish
|