From e67431e4555b9597143bd881bdc5f414ba040723 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Thu, 27 Jun 2019 18:13:19 +0300 Subject: [PATCH 1/5] Use mimalloc instead of lfallocator --- .gitmodules | 3 + CMakeLists.txt | 2 +- cmake/find_lfalloc.cmake | 11 - cmake/find_mimalloc.cmake | 14 + contrib/CMakeLists.txt | 12 +- contrib/lfalloc/src/lf_allocX64.h | 1820 ----------------- contrib/lfalloc/src/lfmalloc.h | 23 - contrib/lfalloc/src/util/README.md | 33 - contrib/lfalloc/src/util/system/atomic.h | 51 - contrib/lfalloc/src/util/system/atomic_gcc.h | 90 - contrib/lfalloc/src/util/system/atomic_ops.h | 189 -- contrib/lfalloc/src/util/system/atomic_win.h | 114 -- contrib/lfalloc/src/util/system/compiler.h | 617 ------ contrib/lfalloc/src/util/system/defaults.h | 168 -- contrib/lfalloc/src/util/system/platform.h | 242 --- contrib/lfalloc/src/util/system/types.h | 117 -- contrib/mimalloc | 1 + dbms/CMakeLists.txt | 5 +- dbms/src/Common/LFAllocator.cpp | 53 - dbms/src/Common/LFAllocator.h | 22 - dbms/src/Common/MiAllocator.h | 49 + dbms/src/Common/config.h.in | 3 +- dbms/src/DataStreams/MarkInCompressedFile.h | 8 +- dbms/src/IO/UncompressedCache.h | 8 +- dbms/src/Interpreters/Compiler.cpp | 4 +- ...StorageSystemBuildOptions.generated.cpp.in | 3 +- 26 files changed, 91 insertions(+), 3571 deletions(-) delete mode 100644 cmake/find_lfalloc.cmake create mode 100644 cmake/find_mimalloc.cmake delete mode 100644 contrib/lfalloc/src/lf_allocX64.h delete mode 100644 contrib/lfalloc/src/lfmalloc.h delete mode 100644 contrib/lfalloc/src/util/README.md delete mode 100644 contrib/lfalloc/src/util/system/atomic.h delete mode 100644 contrib/lfalloc/src/util/system/atomic_gcc.h delete mode 100644 contrib/lfalloc/src/util/system/atomic_ops.h delete mode 100644 contrib/lfalloc/src/util/system/atomic_win.h delete mode 100644 contrib/lfalloc/src/util/system/compiler.h delete mode 100644 contrib/lfalloc/src/util/system/defaults.h delete mode 100644 contrib/lfalloc/src/util/system/platform.h delete mode 100644 contrib/lfalloc/src/util/system/types.h create mode 160000 contrib/mimalloc delete mode 100644 dbms/src/Common/LFAllocator.cpp delete mode 100644 dbms/src/Common/LFAllocator.h create mode 100644 dbms/src/Common/MiAllocator.h diff --git a/.gitmodules b/.gitmodules index 0fda654f07c..1dc3af6dfad 100644 --- a/.gitmodules +++ b/.gitmodules @@ -85,3 +85,6 @@ [submodule "contrib/rapidjson"] path = contrib/rapidjson url = https://github.com/Tencent/rapidjson +[submodule "contrib/mimalloc"] + path = contrib/mimalloc + url = https://github.com/ClickHouse-Extras/mimalloc diff --git a/CMakeLists.txt b/CMakeLists.txt index 45c48ed6f35..56a434151d4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -335,7 +335,7 @@ include (cmake/find_hdfs3.cmake) # uses protobuf include (cmake/find_consistent-hashing.cmake) include (cmake/find_base64.cmake) include (cmake/find_hyperscan.cmake) -include (cmake/find_lfalloc.cmake) +include (cmake/find_mimalloc.cmake) include (cmake/find_simdjson.cmake) include (cmake/find_rapidjson.cmake) find_contrib_lib(cityhash) diff --git a/cmake/find_lfalloc.cmake b/cmake/find_lfalloc.cmake deleted file mode 100644 index 32cb1e7d5d5..00000000000 --- a/cmake/find_lfalloc.cmake +++ /dev/null @@ -1,11 +0,0 @@ -# TODO(danlark1). Disable LFAlloc for a while to fix mmap count problem -if (NOT OS_LINUX AND NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32 AND NOT ARCH_PPC64LE AND NOT OS_FREEBSD AND NOT APPLE) - option (ENABLE_LFALLOC "Set to FALSE to use system libgsasl library instead of bundled" ${NOT_UNBUNDLED}) -endif () - -if (ENABLE_LFALLOC) - set (USE_LFALLOC 1) - set (USE_LFALLOC_RANDOM_HINT 1) - set (LFALLOC_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/lfalloc/src) - message (STATUS "Using lfalloc=${USE_LFALLOC}: ${LFALLOC_INCLUDE_DIR}") -endif () diff --git a/cmake/find_mimalloc.cmake b/cmake/find_mimalloc.cmake new file mode 100644 index 00000000000..7d4c00ac09a --- /dev/null +++ b/cmake/find_mimalloc.cmake @@ -0,0 +1,14 @@ +if (OS_LINUX AND NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32 AND NOT ARCH_PPC64LE) + option (ENABLE_MIMALLOC "Set to FALSE to use system libgsasl library instead of bundled" ${NOT_UNBUNDLED}) +endif () + +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/mimalloc/include/mimalloc.h") + message (WARNING "submodule contrib/mimalloc is missing. to fix try run: \n git submodule update --init --recursive") +endif () + +if (ENABLE_MIMALLOC) + set (MIMALLOC_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/mimalloc/include) + set (USE_MIMALLOC 1) + set (MIMALLOC_LIBRARY mimalloc-static) + message (STATUS "Using mimalloc: ${MIMALLOC_INCLUDE_DIR} : ${MIMALLOC_LIBRARY}") +endif () diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 8618d2b3f89..eb3712f4c2b 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -1,11 +1,11 @@ # Third-party libraries may have substandard code. if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-maybe-uninitialized -Wno-format -Wno-misleading-indentation -Wno-stringop-overflow -Wno-implicit-function-declaration -Wno-return-type -Wno-array-bounds -Wno-bool-compare -Wno-int-conversion -Wno-switch -Wno-stringop-truncation") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-old-style-cast -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-non-virtual-dtor -Wno-maybe-uninitialized -Wno-format -Wno-misleading-indentation -Wno-implicit-fallthrough -Wno-class-memaccess -Wno-sign-compare -Wno-array-bounds -Wno-missing-attributes -Wno-stringop-truncation -std=c++1z") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w -std=c++1z") elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function -Wno-unused-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-format -Wno-parentheses-equality -Wno-tautological-constant-compare -Wno-tautological-constant-out-of-range-compare -Wno-implicit-function-declaration -Wno-return-type -Wno-pointer-bool-conversion -Wno-enum-conversion -Wno-int-conversion -Wno-switch -Wno-string-plus-int") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-old-style-cast -Wno-unused-function -Wno-unused-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-non-virtual-dtor -Wno-format -Wno-inconsistent-missing-override -std=c++1z") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w -std=c++1z") endif () set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1) @@ -317,3 +317,7 @@ endif() if (USE_SIMDJSON) add_subdirectory (simdjson-cmake) endif() + +if (USE_MIMALLOC) + add_subdirectory (mimalloc) +endif() diff --git a/contrib/lfalloc/src/lf_allocX64.h b/contrib/lfalloc/src/lf_allocX64.h deleted file mode 100644 index 12190f0712f..00000000000 --- a/contrib/lfalloc/src/lf_allocX64.h +++ /dev/null @@ -1,1820 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "lfmalloc.h" - -#include "util/system/compiler.h" -#include "util/system/types.h" -#include - -#ifdef _MSC_VER -#ifndef _CRT_SECURE_NO_WARNINGS -#define _CRT_SECURE_NO_WARNINGS -#endif -#ifdef _M_X64 -#define _64_ -#endif -#include -#define WIN32_LEAN_AND_MEAN -#include -#pragma intrinsic(_InterlockedCompareExchange) -#pragma intrinsic(_InterlockedExchangeAdd) - -#include -#include -#include - -#define PERTHREAD __declspec(thread) -#define _win_ -#define Y_FORCE_INLINE __forceinline - -using TAtomic = volatile long; - -static inline long AtomicAdd(TAtomic& a, long b) { - return _InterlockedExchangeAdd(&a, b) + b; -} - -static inline long AtomicSub(TAtomic& a, long b) { - return AtomicAdd(a, -b); -} - -#define Y_ASSERT_NOBT(x) ((void)0) - -#else - -#include "util/system/defaults.h" -#include "util/system/atomic.h" -#include - -#if !defined(NDEBUG) && !defined(__GCCXML__) -#define Y_ASSERT_NOBT(a) \ - do { \ - if (Y_UNLIKELY(!(a))) { \ - assert(false && (a)); \ - } \ - } while (0) -#else -#define Y_ASSERT_NOBT(a) \ - do { \ - if (false) { \ - bool __xxx = static_cast(a); \ - Y_UNUSED(__xxx); \ - } \ - } while (0) -#endif - -#include -#include -#include -#include -#include -#include - -#if defined(_linux_) -#if !defined(MADV_HUGEPAGE) -#define MADV_HUGEPAGE 14 -#endif -#if !defined(MAP_HUGETLB) -#define MAP_HUGETLB 0x40000 -#endif -#endif - -#define PERTHREAD __thread - -#endif - -#ifndef _darwin_ - -#ifndef Y_ARRAY_SIZE -#define Y_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) -#endif - -#ifndef NDEBUG -#define DBG_FILL_MEMORY -static bool FillMemoryOnAllocation = true; -#endif - -static bool TransparentHugePages = false; // force MADV_HUGEPAGE for large allocs -static bool MapHugeTLB = false; // force MAP_HUGETLB for small allocs -static bool EnableDefrag = true; - -// Buffers that are larger than this size will not be filled with 0xcf -#ifndef DBG_FILL_MAX_SIZE -#define DBG_FILL_MAX_SIZE 0x01000000000000ULL -#endif - -template -inline T* DoCas(T* volatile* target, T* exchange, T* compare) { -#if defined(_linux_) - return __sync_val_compare_and_swap(target, compare, exchange); -#elif defined(_WIN32) -#ifdef _64_ - return (T*)_InterlockedCompareExchange64((__int64*)target, (__int64)exchange, (__int64)compare); -#else - //return (T*)InterlockedCompareExchangePointer(targetVoidP, exchange, compare); - return (T*)_InterlockedCompareExchange((LONG*)target, (LONG)exchange, (LONG)compare); -#endif -#elif defined(__i386) || defined(__x86_64__) - union { - T* volatile* NP; - void* volatile* VoidP; - } gccSucks; - gccSucks.NP = target; - void* volatile* targetVoidP = gccSucks.VoidP; - - __asm__ __volatile__( - "lock\n\t" - "cmpxchg %2,%0\n\t" - : "+m"(*(targetVoidP)), "+a"(compare) - : "r"(exchange) - : "cc", "memory"); - return compare; -#else -#error inline_cas not defined for this platform -#endif -} - -#ifdef _64_ -const uintptr_t N_MAX_WORKSET_SIZE = 0x100000000ll * 200; -const uintptr_t N_HUGE_AREA_FINISH = 0x700000000000ll; -#ifndef _freebsd_ -const uintptr_t LINUX_MMAP_AREA_START = 0x100000000ll; -static uintptr_t volatile linuxAllocPointer = LINUX_MMAP_AREA_START; -static uintptr_t volatile linuxAllocPointerHuge = LINUX_MMAP_AREA_START + N_MAX_WORKSET_SIZE; -#endif -#else -const uintptr_t N_MAX_WORKSET_SIZE = 0xffffffff; -#endif -#define ALLOC_START ((char*)0) - -const size_t N_CHUNK_SIZE = 1024 * 1024; -const size_t N_CHUNKS = N_MAX_WORKSET_SIZE / N_CHUNK_SIZE; -const size_t N_LARGE_ALLOC_SIZE = N_CHUNK_SIZE * 128; - -// map size idx to size in bytes -#ifdef LFALLOC_YT -const int N_SIZES = 27; -#else -const int N_SIZES = 25; -#endif -const int nSizeIdxToSize[N_SIZES] = { - -1, -#if defined(_64_) - 16, 16, 32, 32, 48, 64, 96, 128, -#else - 8, - 16, - 24, - 32, - 48, - 64, - 96, - 128, -#endif - 192, 256, 384, 512, 768, 1024, 1536, 2048, - 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, -#ifdef LFALLOC_YT - 49152, 65536 -#endif -}; -#ifdef LFALLOC_YT -const size_t N_MAX_FAST_SIZE = 65536; -#else -const size_t N_MAX_FAST_SIZE = 32768; -#endif -const unsigned char size2idxArr1[64 + 1] = { - 1, -#if defined(_64_) - 2, 2, 4, 4, // 16, 16, 32, 32 -#else - 1, 2, 3, 4, // 8, 16, 24, 32 -#endif - 5, 5, 6, 6, // 48, 64 - 7, 7, 7, 7, 8, 8, 8, 8, // 96, 128 - 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, // 192, 256 - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, // 384 - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 // 512 -}; -#ifdef LFALLOC_YT -const unsigned char size2idxArr2[256] = { -#else -const unsigned char size2idxArr2[128] = { -#endif - 12, 12, 13, 14, // 512, 512, 768, 1024 - 15, 15, 16, 16, // 1536, 2048 - 17, 17, 17, 17, 18, 18, 18, 18, // 3072, 4096 - 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, // 6144, 8192 - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, // 12288 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, // 16384 - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, // 24576 - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, // 32768 -#ifdef LFALLOC_YT - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, // 49152 - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, // 65536 -#endif -}; - -// map entry number to size idx -// special size idx's: 0 = not used, -1 = mem locked, but not allocated -static volatile char chunkSizeIdx[N_CHUNKS]; -const int FREE_CHUNK_ARR_BUF = 0x20000; // this is effectively 128G of free memory (with 1M chunks), should not be exhausted actually -static volatile uintptr_t freeChunkArr[FREE_CHUNK_ARR_BUF]; -static volatile int freeChunkCount; - -static void AddFreeChunk(uintptr_t chunkId) { - chunkSizeIdx[chunkId] = -1; - if (Y_UNLIKELY(freeChunkCount == FREE_CHUNK_ARR_BUF)) - NMalloc::AbortFromCorruptedAllocator(); // free chunks arrray overflowed - freeChunkArr[freeChunkCount++] = chunkId; -} - -static bool GetFreeChunk(uintptr_t* res) { - if (freeChunkCount == 0) { - *res = 0; - return false; - } - *res = freeChunkArr[--freeChunkCount]; - return true; -} - -////////////////////////////////////////////////////////////////////////// -enum ELFAllocCounter { - CT_USER_ALLOC, // accumulated size requested by user code - CT_MMAP, // accumulated mmapped size - CT_MMAP_CNT, // number of mmapped regions - CT_MUNMAP, // accumulated unmmapped size - CT_MUNMAP_CNT, // number of munmaped regions - CT_SYSTEM_ALLOC, // accumulated allocated size for internal lfalloc needs - CT_SYSTEM_FREE, // accumulated deallocated size for internal lfalloc needs - CT_SMALL_ALLOC, // accumulated allocated size for fixed-size blocks - CT_SMALL_FREE, // accumulated deallocated size for fixed-size blocks - CT_LARGE_ALLOC, // accumulated allocated size for large blocks - CT_LARGE_FREE, // accumulated deallocated size for large blocks - CT_SLOW_ALLOC_CNT, // number of slow (not LF) allocations - CT_DEGRAGMENT_CNT, // number of memory defragmentations - CT_MAX -}; - -static Y_FORCE_INLINE void IncrementCounter(ELFAllocCounter counter, size_t value); - -////////////////////////////////////////////////////////////////////////// -enum EMMapMode { - MM_NORMAL, // memory for small allocs - MM_HUGE // memory for large allocs -}; - -#ifndef _MSC_VER -inline void VerifyMmapResult(void* result) { - if (Y_UNLIKELY(result == MAP_FAILED)) - NMalloc::AbortFromCorruptedAllocator(); // negative size requested? or just out of mem -} -#endif - -#if !defined(_MSC_VER) && !defined(_freebsd_) && defined(_64_) -static char* AllocWithMMapLinuxImpl(uintptr_t sz, EMMapMode mode) { - char* volatile* areaPtr; - char* areaStart; - uintptr_t areaFinish; - - int mapProt = PROT_READ | PROT_WRITE; - int mapFlags = MAP_PRIVATE | MAP_ANON; - - if (mode == MM_HUGE) { - areaPtr = reinterpret_cast(&linuxAllocPointerHuge); - areaStart = reinterpret_cast(LINUX_MMAP_AREA_START + N_MAX_WORKSET_SIZE); - areaFinish = N_HUGE_AREA_FINISH; - } else { - areaPtr = reinterpret_cast(&linuxAllocPointer); - areaStart = reinterpret_cast(LINUX_MMAP_AREA_START); - areaFinish = N_MAX_WORKSET_SIZE; - - if (MapHugeTLB) { - mapFlags |= MAP_HUGETLB; - } - } - - bool wrapped = false; - for (;;) { - char* prevAllocPtr = *areaPtr; - char* nextAllocPtr = prevAllocPtr + sz; - if (uintptr_t(nextAllocPtr - (char*)nullptr) >= areaFinish) { - if (Y_UNLIKELY(wrapped)) { - // virtual memory is over fragmented - NMalloc::AbortFromCorruptedAllocator(); - } - // wrap after all area is used - DoCas(areaPtr, areaStart, prevAllocPtr); - wrapped = true; - continue; - } - - if (DoCas(areaPtr, nextAllocPtr, prevAllocPtr) != prevAllocPtr) - continue; - - char* largeBlock = (char*)mmap(prevAllocPtr, sz, mapProt, mapFlags, -1, 0); - VerifyMmapResult(largeBlock); - if (largeBlock == prevAllocPtr) - return largeBlock; - if (largeBlock) - munmap(largeBlock, sz); - - if (sz < 0x80000) { - // skip utilized area with big steps - DoCas(areaPtr, nextAllocPtr + 0x10 * 0x10000, nextAllocPtr); - } - } -} -#endif - -static char* AllocWithMMap(uintptr_t sz, EMMapMode mode) { - (void)mode; -#ifdef _MSC_VER - char* largeBlock = (char*)VirtualAlloc(0, sz, MEM_RESERVE, PAGE_READWRITE); - if (Y_UNLIKELY(largeBlock == nullptr)) - NMalloc::AbortFromCorruptedAllocator(); // out of memory - if (Y_UNLIKELY(uintptr_t(((char*)largeBlock - ALLOC_START) + sz) >= N_MAX_WORKSET_SIZE)) - NMalloc::AbortFromCorruptedAllocator(); // out of working set, something has broken -#else -#if defined(_freebsd_) || !defined(_64_) || defined(USE_LFALLOC_RANDOM_HINT) - uintptr_t areaStart; - uintptr_t areaFinish; - if (mode == MM_HUGE) { - areaStart = LINUX_MMAP_AREA_START + N_MAX_WORKSET_SIZE; - areaFinish = N_HUGE_AREA_FINISH; - } else { - areaStart = LINUX_MMAP_AREA_START; - areaFinish = N_MAX_WORKSET_SIZE; - } -#if defined(USE_LFALLOC_RANDOM_HINT) - static thread_local std::mt19937_64 generator(std::random_device{}()); - std::uniform_int_distribution distr(areaStart, areaFinish - sz - 1); - char* largeBlock; - static constexpr size_t MaxAttempts = 100; - size_t attempt = 0; - do - { - largeBlock = (char*)mmap(reinterpret_cast(distr(generator)), sz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); - ++attempt; - } while (uintptr_t(((char*)largeBlock - ALLOC_START) + sz) >= areaFinish && attempt < MaxAttempts && munmap(largeBlock, sz) == 0); -#else - char* largeBlock = (char*)mmap(0, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); -#endif - VerifyMmapResult(largeBlock); - if (Y_UNLIKELY(uintptr_t(((char*)largeBlock - ALLOC_START) + sz) >= areaFinish)) - NMalloc::AbortFromCorruptedAllocator(); // out of working set, something has broken -#else - char* largeBlock = AllocWithMMapLinuxImpl(sz, mode); - if (TransparentHugePages) { - madvise(largeBlock, sz, MADV_HUGEPAGE); - } -#endif -#endif - Y_ASSERT_NOBT(largeBlock); - IncrementCounter(CT_MMAP, sz); - IncrementCounter(CT_MMAP_CNT, 1); - return largeBlock; -} - -enum class ELarge : ui8 { - Free = 0, // block in free cache - Alloc = 1, // block is allocated - Gone = 2, // block was unmapped -}; - -struct TLargeBlk { - - static TLargeBlk* As(void *raw) { - return reinterpret_cast((char*)raw - 4096ll); - } - - static const TLargeBlk* As(const void *raw) { - return reinterpret_cast((const char*)raw - 4096ll); - } - - void SetSize(size_t bytes, size_t pages) { - Pages = pages; - Bytes = bytes; - } - - void Mark(ELarge state) { - const ui64 marks[] = { - 0x8b38aa5ca4953c98, // ELarge::Free - 0xf916d33584eb5087, // ELarge::Alloc - 0xd33b0eca7651bc3f // ELarge::Gone - }; - - Token = size_t(marks[ui8(state)]); - } - - size_t Pages; // Total pages allocated with mmap like call - size_t Bytes; // Actually requested bytes by user - size_t Token; // Block state token, see ELarge enum. -}; - - -static void LargeBlockUnmap(void* p, size_t pages) { - const auto bytes = (pages + 1) * uintptr_t(4096); - - IncrementCounter(CT_MUNMAP, bytes); - IncrementCounter(CT_MUNMAP_CNT, 1); -#ifdef _MSC_VER - Y_ASSERT_NOBT(0); -#else - TLargeBlk::As(p)->Mark(ELarge::Gone); - munmap((char*)p - 4096ll, bytes); -#endif -} - -////////////////////////////////////////////////////////////////////////// -const size_t LB_BUF_SIZE = 250; -const size_t LB_BUF_HASH = 977; -static int LB_LIMIT_TOTAL_SIZE = 500 * 1024 * 1024 / 4096; // do not keep more then this mem total in lbFreePtrs[] -static void* volatile lbFreePtrs[LB_BUF_HASH][LB_BUF_SIZE]; -static TAtomic lbFreePageCount; - - -static void* LargeBlockAlloc(size_t _nSize, ELFAllocCounter counter) { - size_t pgCount = (_nSize + 4095) / 4096; -#ifdef _MSC_VER - char* pRes = (char*)VirtualAlloc(0, (pgCount + 1) * 4096ll, MEM_COMMIT, PAGE_READWRITE); - if (Y_UNLIKELY(pRes == 0)) { - NMalloc::AbortFromCorruptedAllocator(); // out of memory - } -#else - - IncrementCounter(counter, pgCount * 4096ll); - IncrementCounter(CT_SYSTEM_ALLOC, 4096ll); - - int lbHash = pgCount % LB_BUF_HASH; - for (int i = 0; i < LB_BUF_SIZE; ++i) { - void* p = lbFreePtrs[lbHash][i]; - if (p == nullptr) - continue; - if (DoCas(&lbFreePtrs[lbHash][i], (void*)nullptr, p) == p) { - size_t realPageCount = TLargeBlk::As(p)->Pages; - if (realPageCount == pgCount) { - AtomicAdd(lbFreePageCount, -pgCount); - TLargeBlk::As(p)->Mark(ELarge::Alloc); - return p; - } else { - if (DoCas(&lbFreePtrs[lbHash][i], p, (void*)nullptr) != (void*)nullptr) { - // block was freed while we were busy - AtomicAdd(lbFreePageCount, -realPageCount); - LargeBlockUnmap(p, realPageCount); - --i; - } - } - } - } - char* pRes = AllocWithMMap((pgCount + 1) * 4096ll, MM_HUGE); -#endif - pRes += 4096ll; - TLargeBlk::As(pRes)->SetSize(_nSize, pgCount); - TLargeBlk::As(pRes)->Mark(ELarge::Alloc); - - return pRes; -} - -#ifndef _MSC_VER -static void FreeAllLargeBlockMem() { - for (auto& lbFreePtr : lbFreePtrs) { - for (int i = 0; i < LB_BUF_SIZE; ++i) { - void* p = lbFreePtr[i]; - if (p == nullptr) - continue; - if (DoCas(&lbFreePtr[i], (void*)nullptr, p) == p) { - int pgCount = TLargeBlk::As(p)->Pages; - AtomicAdd(lbFreePageCount, -pgCount); - LargeBlockUnmap(p, pgCount); - } - } - } -} -#endif - -static void LargeBlockFree(void* p, ELFAllocCounter counter) { - if (p == nullptr) - return; -#ifdef _MSC_VER - VirtualFree((char*)p - 4096ll, 0, MEM_RELEASE); -#else - size_t pgCount = TLargeBlk::As(p)->Pages; - - TLargeBlk::As(p)->Mark(ELarge::Free); - IncrementCounter(counter, pgCount * 4096ll); - IncrementCounter(CT_SYSTEM_FREE, 4096ll); - - if (lbFreePageCount > LB_LIMIT_TOTAL_SIZE) - FreeAllLargeBlockMem(); - int lbHash = pgCount % LB_BUF_HASH; - for (int i = 0; i < LB_BUF_SIZE; ++i) { - if (lbFreePtrs[lbHash][i] == nullptr) { - if (DoCas(&lbFreePtrs[lbHash][i], p, (void*)nullptr) == nullptr) { - AtomicAdd(lbFreePageCount, pgCount); - return; - } - } - } - - LargeBlockUnmap(p, pgCount); -#endif -} - -static void* SystemAlloc(size_t _nSize) { - //HeapAlloc(GetProcessHeap(), HEAP_GENERATE_EXCEPTIONS, _nSize); - return LargeBlockAlloc(_nSize, CT_SYSTEM_ALLOC); -} -static void SystemFree(void* p) { - //HeapFree(GetProcessHeap(), 0, p); - LargeBlockFree(p, CT_SYSTEM_FREE); -} - -////////////////////////////////////////////////////////////////////////// -static int* volatile nLock = nullptr; -static int nLockVar; -inline void RealEnterCriticalDefault(int* volatile* lockPtr) { - while (DoCas(lockPtr, &nLockVar, (int*)nullptr) != nullptr) - ; //pthread_yield(); -} -inline void RealLeaveCriticalDefault(int* volatile* lockPtr) { - *lockPtr = nullptr; -} -static void (*RealEnterCritical)(int* volatile* lockPtr) = RealEnterCriticalDefault; -static void (*RealLeaveCritical)(int* volatile* lockPtr) = RealLeaveCriticalDefault; -static void (*BeforeLFAllocGlobalLockAcquired)() = nullptr; -static void (*AfterLFAllocGlobalLockReleased)() = nullptr; -class CCriticalSectionLockMMgr { -public: - CCriticalSectionLockMMgr() { - if (BeforeLFAllocGlobalLockAcquired) { - BeforeLFAllocGlobalLockAcquired(); - } - RealEnterCritical(&nLock); - } - ~CCriticalSectionLockMMgr() { - RealLeaveCritical(&nLock); - if (AfterLFAllocGlobalLockReleased) { - AfterLFAllocGlobalLockReleased(); - } - } -}; - -////////////////////////////////////////////////////////////////////////// -class TLFAllocFreeList { - struct TNode { - TNode* Next; - }; - - TNode* volatile Head; - TNode* volatile Pending; - TAtomic PendingToFreeListCounter; - TAtomic AllocCount; - - static Y_FORCE_INLINE void Enqueue(TNode* volatile* headPtr, TNode* n) { - for (;;) { - TNode* volatile prevHead = *headPtr; - n->Next = prevHead; - if (DoCas(headPtr, n, prevHead) == prevHead) - break; - } - } - Y_FORCE_INLINE void* DoAlloc() { - TNode* res; - for (res = Head; res; res = Head) { - TNode* keepNext = res->Next; - if (DoCas(&Head, keepNext, res) == res) { - //Y_VERIFY(keepNext == res->Next); - break; - } - } - return res; - } - void FreeList(TNode* fl) { - if (!fl) - return; - TNode* flTail = fl; - while (flTail->Next) - flTail = flTail->Next; - for (;;) { - TNode* volatile prevHead = Head; - flTail->Next = prevHead; - if (DoCas(&Head, fl, prevHead) == prevHead) - break; - } - } - -public: - Y_FORCE_INLINE void Free(void* ptr) { - TNode* newFree = (TNode*)ptr; - if (AtomicAdd(AllocCount, 0) == 0) - Enqueue(&Head, newFree); - else - Enqueue(&Pending, newFree); - } - Y_FORCE_INLINE void* Alloc() { - TAtomic keepCounter = AtomicAdd(PendingToFreeListCounter, 0); - TNode* fl = Pending; - if (AtomicAdd(AllocCount, 1) == 1) { - // No other allocs in progress. - // If (keepCounter == PendingToFreeListCounter) then Pending was not freed by other threads. - // Hence Pending is not used in any concurrent DoAlloc() atm and can be safely moved to FreeList - if (fl && keepCounter == AtomicAdd(PendingToFreeListCounter, 0) && DoCas(&Pending, (TNode*)nullptr, fl) == fl) { - // pick first element from Pending and return it - void* res = fl; - fl = fl->Next; - // if there are other elements in Pending list, add them to main free list - FreeList(fl); - AtomicAdd(PendingToFreeListCounter, 1); - AtomicAdd(AllocCount, -1); - return res; - } - } - void* res = DoAlloc(); - AtomicAdd(AllocCount, -1); - return res; - } - void* GetWholeList() { - TNode* res; - for (res = Head; res; res = Head) { - if (DoCas(&Head, (TNode*)nullptr, res) == res) - break; - } - return res; - } - void ReturnWholeList(void* ptr) { - while (AtomicAdd(AllocCount, 0) != 0) // theoretically can run into problems with parallel DoAlloc() - ; //ThreadYield(); - for (;;) { - TNode* prevHead = Head; - if (DoCas(&Head, (TNode*)ptr, prevHead) == prevHead) { - FreeList(prevHead); - break; - } - } - } -}; - -///////////////////////////////////////////////////////////////////////// -static TLFAllocFreeList globalFreeLists[N_SIZES]; -static char* volatile globalCurrentPtr[N_SIZES]; -static TLFAllocFreeList blockFreeList; - -// globalFreeLists[] contains TFreeListGroup, each of them points up to 15 free blocks -const int FL_GROUP_SIZE = 15; -struct TFreeListGroup { - TFreeListGroup* Next; - char* Ptrs[FL_GROUP_SIZE]; -}; -#ifdef _64_ -const int FREE_LIST_GROUP_SIZEIDX = 8; -#else -const int FREE_LIST_GROUP_SIZEIDX = 6; -#endif - -////////////////////////////////////////////////////////////////////////// -// find free chunks and reset chunk size so they can be reused by different sized allocations -// do not look at blockFreeList (TFreeListGroup has same size for any allocations) -static bool DefragmentMem() { - if (!EnableDefrag) { - return false; - } - - IncrementCounter(CT_DEGRAGMENT_CNT, 1); - - int* nFreeCount = (int*)SystemAlloc(N_CHUNKS * sizeof(int)); - if (Y_UNLIKELY(!nFreeCount)) { - //__debugbreak(); - NMalloc::AbortFromCorruptedAllocator(); - } - memset(nFreeCount, 0, N_CHUNKS * sizeof(int)); - - TFreeListGroup* wholeLists[N_SIZES]; - for (int nSizeIdx = 0; nSizeIdx < N_SIZES; ++nSizeIdx) { - wholeLists[nSizeIdx] = (TFreeListGroup*)globalFreeLists[nSizeIdx].GetWholeList(); - for (TFreeListGroup* g = wholeLists[nSizeIdx]; g; g = g->Next) { - for (auto pData : g->Ptrs) { - if (pData) { - uintptr_t nChunk = (pData - ALLOC_START) / N_CHUNK_SIZE; - ++nFreeCount[nChunk]; - Y_ASSERT_NOBT(chunkSizeIdx[nChunk] == nSizeIdx); - } - } - } - } - - bool bRes = false; - for (size_t nChunk = 0; nChunk < N_CHUNKS; ++nChunk) { - int fc = nFreeCount[nChunk]; - nFreeCount[nChunk] = 0; - if (chunkSizeIdx[nChunk] <= 0) - continue; - int nEntries = N_CHUNK_SIZE / nSizeIdxToSize[static_cast(chunkSizeIdx[nChunk])]; - Y_ASSERT_NOBT(fc <= nEntries); // can not have more free blocks then total count - if (fc == nEntries) { - bRes = true; - nFreeCount[nChunk] = 1; - } - } - if (bRes) { - for (auto& wholeList : wholeLists) { - TFreeListGroup** ppPtr = &wholeList; - while (*ppPtr) { - TFreeListGroup* g = *ppPtr; - int dst = 0; - for (auto pData : g->Ptrs) { - if (pData) { - uintptr_t nChunk = (pData - ALLOC_START) / N_CHUNK_SIZE; - if (nFreeCount[nChunk] == 0) - g->Ptrs[dst++] = pData; // block is not freed, keep pointer - } - } - if (dst == 0) { - // no valid pointers in group, free it - *ppPtr = g->Next; - blockFreeList.Free(g); - } else { - // reset invalid pointers to 0 - for (int i = dst; i < FL_GROUP_SIZE; ++i) - g->Ptrs[i] = nullptr; - ppPtr = &g->Next; - } - } - } - for (uintptr_t nChunk = 0; nChunk < N_CHUNKS; ++nChunk) { - if (!nFreeCount[nChunk]) - continue; - char* pStart = ALLOC_START + nChunk * N_CHUNK_SIZE; -#ifdef _win_ - VirtualFree(pStart, N_CHUNK_SIZE, MEM_DECOMMIT); -#elif defined(_freebsd_) - madvise(pStart, N_CHUNK_SIZE, MADV_FREE); -#else - madvise(pStart, N_CHUNK_SIZE, MADV_DONTNEED); -#endif - AddFreeChunk(nChunk); - } - } - - for (int nSizeIdx = 0; nSizeIdx < N_SIZES; ++nSizeIdx) - globalFreeLists[nSizeIdx].ReturnWholeList(wholeLists[nSizeIdx]); - - SystemFree(nFreeCount); - return bRes; -} - -static Y_FORCE_INLINE void* LFAllocFromCurrentChunk(int nSizeIdx, int blockSize, int count) { - char* volatile* pFreeArray = &globalCurrentPtr[nSizeIdx]; - while (char* newBlock = *pFreeArray) { - char* nextFree = newBlock + blockSize * count; - - // check if there is space in chunk - char* globalEndPtr = ALLOC_START + ((newBlock - ALLOC_START) & ~((uintptr_t)N_CHUNK_SIZE - 1)) + N_CHUNK_SIZE; - if (nextFree >= globalEndPtr) { - if (nextFree > globalEndPtr) - break; - nextFree = nullptr; // it was last block in chunk - } - if (DoCas(pFreeArray, nextFree, newBlock) == newBlock) - return newBlock; - } - return nullptr; -} - -enum EDefrag { - MEM_DEFRAG, - NO_MEM_DEFRAG, -}; - -static void* SlowLFAlloc(int nSizeIdx, int blockSize, EDefrag defrag) { - IncrementCounter(CT_SLOW_ALLOC_CNT, 1); - - CCriticalSectionLockMMgr ls; - void* res = LFAllocFromCurrentChunk(nSizeIdx, blockSize, 1); - if (res) - return res; // might happen when other thread allocated new current chunk - - for (;;) { - uintptr_t nChunk; - if (GetFreeChunk(&nChunk)) { - char* newPlace = ALLOC_START + nChunk * N_CHUNK_SIZE; -#ifdef _MSC_VER - void* pTest = VirtualAlloc(newPlace, N_CHUNK_SIZE, MEM_COMMIT, PAGE_READWRITE); - Y_ASSERT_NOBT(pTest == newPlace); -#endif - chunkSizeIdx[nChunk] = (char)nSizeIdx; - globalCurrentPtr[nSizeIdx] = newPlace + blockSize; - return newPlace; - } - - // out of luck, try to defrag - if (defrag == MEM_DEFRAG && DefragmentMem()) { - continue; - } - - char* largeBlock = AllocWithMMap(N_LARGE_ALLOC_SIZE, MM_NORMAL); - uintptr_t addr = ((largeBlock - ALLOC_START) + N_CHUNK_SIZE - 1) & (~(N_CHUNK_SIZE - 1)); - uintptr_t endAddr = ((largeBlock - ALLOC_START) + N_LARGE_ALLOC_SIZE) & (~(N_CHUNK_SIZE - 1)); - for (uintptr_t p = addr; p < endAddr; p += N_CHUNK_SIZE) { - uintptr_t chunk = p / N_CHUNK_SIZE; - Y_ASSERT_NOBT(chunk * N_CHUNK_SIZE == p); - Y_ASSERT_NOBT(chunkSizeIdx[chunk] == 0); - AddFreeChunk(chunk); - } - } - return nullptr; -} - -// allocate single block -static Y_FORCE_INLINE void* LFAllocNoCache(int nSizeIdx, EDefrag defrag) { - int blockSize = nSizeIdxToSize[nSizeIdx]; - void* res = LFAllocFromCurrentChunk(nSizeIdx, blockSize, 1); - if (res) - return res; - - return SlowLFAlloc(nSizeIdx, blockSize, defrag); -} - -// allocate multiple blocks, returns number of blocks allocated (max FL_GROUP_SIZE) -// buf should have space for at least FL_GROUP_SIZE elems -static Y_FORCE_INLINE int LFAllocNoCacheMultiple(int nSizeIdx, char** buf) { - int blockSize = nSizeIdxToSize[nSizeIdx]; - void* res = LFAllocFromCurrentChunk(nSizeIdx, blockSize, FL_GROUP_SIZE); - if (res) { - char* resPtr = (char*)res; - for (int k = 0; k < FL_GROUP_SIZE; ++k) { - buf[k] = resPtr; - resPtr += blockSize; - } - return FL_GROUP_SIZE; - } - buf[0] = (char*)SlowLFAlloc(nSizeIdx, blockSize, MEM_DEFRAG); - return 1; -} - -// take several blocks from global free list (max FL_GROUP_SIZE blocks), returns number of blocks taken -// buf should have space for at least FL_GROUP_SIZE elems -static Y_FORCE_INLINE int TakeBlocksFromGlobalFreeList(int nSizeIdx, char** buf) { - TLFAllocFreeList& fl = globalFreeLists[nSizeIdx]; - TFreeListGroup* g = (TFreeListGroup*)fl.Alloc(); - if (g) { - int resCount = 0; - for (auto& ptr : g->Ptrs) { - if (ptr) - buf[resCount++] = ptr; - else - break; - } - blockFreeList.Free(g); - return resCount; - } - return 0; -} - -// add several blocks to global free list -static Y_FORCE_INLINE void PutBlocksToGlobalFreeList(ptrdiff_t nSizeIdx, char** buf, int count) { - for (int startIdx = 0; startIdx < count;) { - TFreeListGroup* g = (TFreeListGroup*)blockFreeList.Alloc(); - Y_ASSERT_NOBT(sizeof(TFreeListGroup) == nSizeIdxToSize[FREE_LIST_GROUP_SIZEIDX]); - if (!g) { - g = (TFreeListGroup*)LFAllocNoCache(FREE_LIST_GROUP_SIZEIDX, NO_MEM_DEFRAG); - } - - int groupSize = count - startIdx; - if (groupSize > FL_GROUP_SIZE) - groupSize = FL_GROUP_SIZE; - for (int i = 0; i < groupSize; ++i) - g->Ptrs[i] = buf[startIdx + i]; - for (int i = groupSize; i < FL_GROUP_SIZE; ++i) - g->Ptrs[i] = nullptr; - - // add free group to the global list - TLFAllocFreeList& fl = globalFreeLists[nSizeIdx]; - fl.Free(g); - - startIdx += groupSize; - } -} - -////////////////////////////////////////////////////////////////////////// -static TAtomic GlobalCounters[CT_MAX]; -const int MAX_LOCAL_UPDATES = 100; - -struct TLocalCounter { - intptr_t Value; - int Updates; - TAtomic* Parent; - - Y_FORCE_INLINE void Init(TAtomic* parent) { - Parent = parent; - Value = 0; - Updates = 0; - } - - Y_FORCE_INLINE void Increment(size_t value) { - Value += value; - if (++Updates > MAX_LOCAL_UPDATES) { - Flush(); - } - } - - Y_FORCE_INLINE void Flush() { - AtomicAdd(*Parent, Value); - Value = 0; - Updates = 0; - } -}; - -//////////////////////////////////////////////////////////////////////////////// -// DBG stuff -//////////////////////////////////////////////////////////////////////////////// - -#if defined(LFALLOC_DBG) - -struct TPerTagAllocCounter { - TAtomic Size; - TAtomic Count; - - Y_FORCE_INLINE void Alloc(size_t size) { - AtomicAdd(Size, size); - AtomicAdd(Count, 1); - } - - Y_FORCE_INLINE void Free(size_t size) { - AtomicSub(Size, size); - AtomicSub(Count, 1); - } -}; - -struct TLocalPerTagAllocCounter { - intptr_t Size; - int Count; - int Updates; - - Y_FORCE_INLINE void Init() { - Size = 0; - Count = 0; - Updates = 0; - } - - Y_FORCE_INLINE void Alloc(TPerTagAllocCounter& parent, size_t size) { - Size += size; - ++Count; - if (++Updates > MAX_LOCAL_UPDATES) { - Flush(parent); - } - } - - Y_FORCE_INLINE void Free(TPerTagAllocCounter& parent, size_t size) { - Size -= size; - --Count; - if (++Updates > MAX_LOCAL_UPDATES) { - Flush(parent); - } - } - - Y_FORCE_INLINE void Flush(TPerTagAllocCounter& parent) { - AtomicAdd(parent.Size, Size); - Size = 0; - AtomicAdd(parent.Count, Count); - Count = 0; - Updates = 0; - } -}; - -static const int DBG_ALLOC_MAX_TAG = 1000; -static const int DBG_ALLOC_NUM_SIZES = 30; -static TPerTagAllocCounter GlobalPerTagAllocCounters[DBG_ALLOC_MAX_TAG][DBG_ALLOC_NUM_SIZES]; - -#endif // LFALLOC_DBG - -////////////////////////////////////////////////////////////////////////// -const int THREAD_BUF = 256; -static int borderSizes[N_SIZES]; -const int MAX_MEM_PER_SIZE_PER_THREAD = 512 * 1024; -struct TThreadAllocInfo { - // FreePtrs - pointers to first free blocks in per thread block list - // LastFreePtrs - pointers to last blocks in lists, may be invalid if FreePtr is zero - char* FreePtrs[N_SIZES][THREAD_BUF]; - int FreePtrIndex[N_SIZES]; - TThreadAllocInfo* pNextInfo; - TLocalCounter LocalCounters[CT_MAX]; - -#if defined(LFALLOC_DBG) - TLocalPerTagAllocCounter LocalPerTagAllocCounters[DBG_ALLOC_MAX_TAG][DBG_ALLOC_NUM_SIZES]; -#endif -#ifdef _win_ - HANDLE hThread; -#endif - - void Init(TThreadAllocInfo** pHead) { - memset(this, 0, sizeof(*this)); - for (auto& i : FreePtrIndex) - i = THREAD_BUF; -#ifdef _win_ - BOOL b = DuplicateHandle( - GetCurrentProcess(), GetCurrentThread(), - GetCurrentProcess(), &hThread, - 0, FALSE, DUPLICATE_SAME_ACCESS); - Y_ASSERT_NOBT(b); -#endif - pNextInfo = *pHead; - *pHead = this; - for (int k = 0; k < N_SIZES; ++k) { - int maxCount = MAX_MEM_PER_SIZE_PER_THREAD / nSizeIdxToSize[k]; - if (maxCount > THREAD_BUF) - maxCount = THREAD_BUF; - borderSizes[k] = THREAD_BUF - maxCount; - } - for (int i = 0; i < CT_MAX; ++i) { - LocalCounters[i].Init(&GlobalCounters[i]); - } -#if defined(LFALLOC_DBG) - for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) { - for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) { - auto& local = LocalPerTagAllocCounters[tag][sizeIdx]; - local.Init(); - } - } -#endif - } - void Done() { - for (auto sizeIdx : FreePtrIndex) { - Y_ASSERT_NOBT(sizeIdx == THREAD_BUF); - } - for (auto& localCounter : LocalCounters) { - localCounter.Flush(); - } -#if defined(LFALLOC_DBG) - for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) { - for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) { - auto& local = LocalPerTagAllocCounters[tag][sizeIdx]; - auto& global = GlobalPerTagAllocCounters[tag][sizeIdx]; - local.Flush(global); - } - } -#endif -#ifdef _win_ - if (hThread) - CloseHandle(hThread); -#endif - } -}; -PERTHREAD TThreadAllocInfo* pThreadInfo; -static TThreadAllocInfo* pThreadInfoList; - -static int* volatile nLockThreadInfo = nullptr; -class TLockThreadListMMgr { -public: - TLockThreadListMMgr() { - RealEnterCritical(&nLockThreadInfo); - } - ~TLockThreadListMMgr() { - RealLeaveCritical(&nLockThreadInfo); - } -}; - -static Y_FORCE_INLINE void IncrementCounter(ELFAllocCounter counter, size_t value) { -#ifdef LFALLOC_YT - TThreadAllocInfo* thr = pThreadInfo; - if (thr) { - thr->LocalCounters[counter].Increment(value); - } else { - AtomicAdd(GlobalCounters[counter], value); - } -#endif -} - -extern "C" i64 GetLFAllocCounterFast(int counter) { -#ifdef LFALLOC_YT - return GlobalCounters[counter]; -#else - return 0; -#endif -} - -extern "C" i64 GetLFAllocCounterFull(int counter) { -#ifdef LFALLOC_YT - i64 ret = GlobalCounters[counter]; - { - TLockThreadListMMgr ll; - for (TThreadAllocInfo** p = &pThreadInfoList; *p;) { - TThreadAllocInfo* pInfo = *p; - ret += pInfo->LocalCounters[counter].Value; - p = &pInfo->pNextInfo; - } - } - return ret; -#else - return 0; -#endif -} - -static void MoveSingleThreadFreeToGlobal(TThreadAllocInfo* pInfo) { - for (int sizeIdx = 0; sizeIdx < N_SIZES; ++sizeIdx) { - int& freePtrIdx = pInfo->FreePtrIndex[sizeIdx]; - char** freePtrs = pInfo->FreePtrs[sizeIdx]; - PutBlocksToGlobalFreeList(sizeIdx, freePtrs + freePtrIdx, THREAD_BUF - freePtrIdx); - freePtrIdx = THREAD_BUF; - } -} - -#ifdef _win_ -static bool IsDeadThread(TThreadAllocInfo* pInfo) { - DWORD dwExit; - bool isDead = !GetExitCodeThread(pInfo->hThread, &dwExit) || dwExit != STILL_ACTIVE; - return isDead; -} - -static void CleanupAfterDeadThreads() { - TLockThreadListMMgr ls; - for (TThreadAllocInfo** p = &pThreadInfoList; *p;) { - TThreadAllocInfo* pInfo = *p; - if (IsDeadThread(pInfo)) { - MoveSingleThreadFreeToGlobal(pInfo); - pInfo->Done(); - *p = pInfo->pNextInfo; - SystemFree(pInfo); - } else - p = &pInfo->pNextInfo; - } -} -#endif - -#ifndef _win_ -static pthread_key_t ThreadCacheCleaner; -static void* volatile ThreadCacheCleanerStarted; // 0 = not started, -1 = started, -2 = is starting -static PERTHREAD bool IsStoppingThread; - -static void FreeThreadCache(void*) { - TThreadAllocInfo* pToDelete = nullptr; - { - TLockThreadListMMgr ls; - pToDelete = pThreadInfo; - if (pToDelete == nullptr) - return; - - // remove from the list - for (TThreadAllocInfo** p = &pThreadInfoList; *p; p = &(*p)->pNextInfo) { - if (*p == pToDelete) { - *p = pToDelete->pNextInfo; - break; - } - } - IsStoppingThread = true; - pThreadInfo = nullptr; - } - - // free per thread buf - MoveSingleThreadFreeToGlobal(pToDelete); - pToDelete->Done(); - SystemFree(pToDelete); -} -#endif - -static void AllocThreadInfo() { -#ifndef _win_ - if (DoCas(&ThreadCacheCleanerStarted, (void*)-2, (void*)nullptr) == (void*)nullptr) { - pthread_key_create(&ThreadCacheCleaner, FreeThreadCache); - ThreadCacheCleanerStarted = (void*)-1; - } - if (ThreadCacheCleanerStarted != (void*)-1) - return; // do not use ThreadCacheCleaner until it is constructed - - { - if (IsStoppingThread) - return; - TLockThreadListMMgr ls; - if (IsStoppingThread) // better safe than sorry - return; - - pThreadInfo = (TThreadAllocInfo*)SystemAlloc(sizeof(TThreadAllocInfo)); - pThreadInfo->Init(&pThreadInfoList); - } - pthread_setspecific(ThreadCacheCleaner, (void*)-1); // without value destructor will not be called -#else - CleanupAfterDeadThreads(); - { - TLockThreadListMMgr ls; - pThreadInfo = (TThreadAllocInfo*)SystemAlloc(sizeof(TThreadAllocInfo)); - pThreadInfo->Init(&pThreadInfoList); - } -#endif -} - - ////////////////////////////////////////////////////////////////////////// - // DBG stuff - ////////////////////////////////////////////////////////////////////////// - -#if defined(LFALLOC_DBG) - -struct TAllocHeader { - size_t Size; - int Tag; - int Cookie; -}; - -static inline void* GetAllocPtr(TAllocHeader* p) { - return p + 1; -} - -static inline TAllocHeader* GetAllocHeader(void* p) { - return ((TAllocHeader*)p) - 1; -} - -PERTHREAD int AllocationTag; -extern "C" int SetThreadAllocTag(int tag) { - int prevTag = AllocationTag; - if (tag < DBG_ALLOC_MAX_TAG && tag >= 0) { - AllocationTag = tag; - } - return prevTag; -} - -PERTHREAD bool ProfileCurrentThread; -extern "C" bool SetProfileCurrentThread(bool newVal) { - bool prevVal = ProfileCurrentThread; - ProfileCurrentThread = newVal; - return prevVal; -} - -static volatile bool ProfileAllThreads; -extern "C" bool SetProfileAllThreads(bool newVal) { - bool prevVal = ProfileAllThreads; - ProfileAllThreads = newVal; - return prevVal; -} - -static volatile bool AllocationSamplingEnabled; -extern "C" bool SetAllocationSamplingEnabled(bool newVal) { - bool prevVal = AllocationSamplingEnabled; - AllocationSamplingEnabled = newVal; - return prevVal; -} - -static size_t AllocationSampleRate = 1000; -extern "C" size_t SetAllocationSampleRate(size_t newVal) { - size_t prevVal = AllocationSampleRate; - AllocationSampleRate = newVal; - return prevVal; -} - -static size_t AllocationSampleMaxSize = N_MAX_FAST_SIZE; -extern "C" size_t SetAllocationSampleMaxSize(size_t newVal) { - size_t prevVal = AllocationSampleMaxSize; - AllocationSampleMaxSize = newVal; - return prevVal; -} - -using TAllocationCallback = int(int tag, size_t size, int sizeIdx); -static TAllocationCallback* AllocationCallback; -extern "C" TAllocationCallback* SetAllocationCallback(TAllocationCallback* newVal) { - TAllocationCallback* prevVal = AllocationCallback; - AllocationCallback = newVal; - return prevVal; -} - -using TDeallocationCallback = void(int cookie, int tag, size_t size, int sizeIdx); -static TDeallocationCallback* DeallocationCallback; -extern "C" TDeallocationCallback* SetDeallocationCallback(TDeallocationCallback* newVal) { - TDeallocationCallback* prevVal = DeallocationCallback; - DeallocationCallback = newVal; - return prevVal; -} - -PERTHREAD TAtomic AllocationsCount; -PERTHREAD bool InAllocationCallback; - -static const int DBG_ALLOC_INVALID_COOKIE = -1; -static inline int SampleAllocation(TAllocHeader* p, int sizeIdx) { - int cookie = DBG_ALLOC_INVALID_COOKIE; - if (AllocationSamplingEnabled && (ProfileCurrentThread || ProfileAllThreads) && !InAllocationCallback) { - if (p->Size > AllocationSampleMaxSize || ++AllocationsCount % AllocationSampleRate == 0) { - if (AllocationCallback) { - InAllocationCallback = true; - cookie = AllocationCallback(p->Tag, p->Size, sizeIdx); - InAllocationCallback = false; - } - } - } - return cookie; -} - -static inline void SampleDeallocation(TAllocHeader* p, int sizeIdx) { - if (p->Cookie != DBG_ALLOC_INVALID_COOKIE && !InAllocationCallback) { - if (DeallocationCallback) { - InAllocationCallback = true; - DeallocationCallback(p->Cookie, p->Tag, p->Size, sizeIdx); - InAllocationCallback = false; - } - } -} - -static inline void TrackPerTagAllocation(TAllocHeader* p, int sizeIdx) { - if (p->Tag < DBG_ALLOC_MAX_TAG && p->Tag >= 0) { - Y_ASSERT_NOBT(sizeIdx < DBG_ALLOC_NUM_SIZES); - auto& global = GlobalPerTagAllocCounters[p->Tag][sizeIdx]; - - TThreadAllocInfo* thr = pThreadInfo; - if (thr) { - auto& local = thr->LocalPerTagAllocCounters[p->Tag][sizeIdx]; - local.Alloc(global, p->Size); - } else { - global.Alloc(p->Size); - } - } -} - -static inline void TrackPerTagDeallocation(TAllocHeader* p, int sizeIdx) { - if (p->Tag < DBG_ALLOC_MAX_TAG && p->Tag >= 0) { - Y_ASSERT_NOBT(sizeIdx < DBG_ALLOC_NUM_SIZES); - auto& global = GlobalPerTagAllocCounters[p->Tag][sizeIdx]; - - TThreadAllocInfo* thr = pThreadInfo; - if (thr) { - auto& local = thr->LocalPerTagAllocCounters[p->Tag][sizeIdx]; - local.Free(global, p->Size); - } else { - global.Free(p->Size); - } - } -} - -static void* TrackAllocation(void* ptr, size_t size, int sizeIdx) { - TAllocHeader* p = (TAllocHeader*)ptr; - p->Size = size; - p->Tag = AllocationTag; - p->Cookie = SampleAllocation(p, sizeIdx); - TrackPerTagAllocation(p, sizeIdx); - return GetAllocPtr(p); -} - -static void TrackDeallocation(void* ptr, int sizeIdx) { - TAllocHeader* p = (TAllocHeader*)ptr; - SampleDeallocation(p, sizeIdx); - TrackPerTagDeallocation(p, sizeIdx); -} - -struct TPerTagAllocInfo { - ssize_t Count; - ssize_t Size; -}; - -extern "C" void GetPerTagAllocInfo( - bool flushPerThreadCounters, - TPerTagAllocInfo* info, - int& maxTag, - int& numSizes) { - maxTag = DBG_ALLOC_MAX_TAG; - numSizes = DBG_ALLOC_NUM_SIZES; - - if (info) { - if (flushPerThreadCounters) { - TLockThreadListMMgr ll; - for (TThreadAllocInfo** p = &pThreadInfoList; *p;) { - TThreadAllocInfo* pInfo = *p; - for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) { - for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) { - auto& local = pInfo->LocalPerTagAllocCounters[tag][sizeIdx]; - auto& global = GlobalPerTagAllocCounters[tag][sizeIdx]; - local.Flush(global); - } - } - p = &pInfo->pNextInfo; - } - } - - for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) { - for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) { - auto& global = GlobalPerTagAllocCounters[tag][sizeIdx]; - auto& res = info[tag * DBG_ALLOC_NUM_SIZES + sizeIdx]; - res.Count = global.Count; - res.Size = global.Size; - } - } - } -} - -#endif // LFALLOC_DBG - -////////////////////////////////////////////////////////////////////////// -static Y_FORCE_INLINE void* LFAllocImpl(size_t _nSize) { -#if defined(LFALLOC_DBG) - size_t size = _nSize; - _nSize += sizeof(TAllocHeader); -#endif - - IncrementCounter(CT_USER_ALLOC, _nSize); - - int nSizeIdx; - if (_nSize > 512) { - if (_nSize > N_MAX_FAST_SIZE) { - void* ptr = LargeBlockAlloc(_nSize, CT_LARGE_ALLOC); -#if defined(LFALLOC_DBG) - ptr = TrackAllocation(ptr, size, N_SIZES); -#endif - return ptr; - } - nSizeIdx = size2idxArr2[(_nSize - 1) >> 8]; - } else - nSizeIdx = size2idxArr1[1 + (((int)_nSize - 1) >> 3)]; - - IncrementCounter(CT_SMALL_ALLOC, nSizeIdxToSize[nSizeIdx]); - - // check per thread buffer - TThreadAllocInfo* thr = pThreadInfo; - if (!thr) { - AllocThreadInfo(); - thr = pThreadInfo; - if (!thr) { - void* ptr = LFAllocNoCache(nSizeIdx, MEM_DEFRAG); -#if defined(LFALLOC_DBG) - ptr = TrackAllocation(ptr, size, nSizeIdx); -#endif - return ptr; - } - } - { - int& freePtrIdx = thr->FreePtrIndex[nSizeIdx]; - if (freePtrIdx < THREAD_BUF) { - void* ptr = thr->FreePtrs[nSizeIdx][freePtrIdx++]; -#if defined(LFALLOC_DBG) - ptr = TrackAllocation(ptr, size, nSizeIdx); -#endif - return ptr; - } - - // try to alloc from global free list - char* buf[FL_GROUP_SIZE]; - int count = TakeBlocksFromGlobalFreeList(nSizeIdx, buf); - if (count == 0) { - count = LFAllocNoCacheMultiple(nSizeIdx, buf); - if (count == 0) { - NMalloc::AbortFromCorruptedAllocator(); // no way LFAllocNoCacheMultiple() can fail - } - } - char** dstBuf = thr->FreePtrs[nSizeIdx] + freePtrIdx - 1; - for (int i = 0; i < count - 1; ++i) - dstBuf[-i] = buf[i]; - freePtrIdx -= count - 1; - void* ptr = buf[count - 1]; -#if defined(LFALLOC_DBG) - ptr = TrackAllocation(ptr, size, nSizeIdx); -#endif - return ptr; - } -} - -static Y_FORCE_INLINE void* LFAlloc(size_t _nSize) { - void* res = LFAllocImpl(_nSize); -#ifdef DBG_FILL_MEMORY - if (FillMemoryOnAllocation && res && (_nSize <= DBG_FILL_MAX_SIZE)) { - memset(res, 0xcf, _nSize); - } -#endif - return res; -} - -static Y_FORCE_INLINE void LFFree(void* p) { -#if defined(LFALLOC_DBG) - if (p == nullptr) - return; - p = GetAllocHeader(p); -#endif - - uintptr_t chkOffset = ((char*)p - ALLOC_START) - 1ll; - if (chkOffset >= N_MAX_WORKSET_SIZE) { - if (p == nullptr) - return; -#if defined(LFALLOC_DBG) - TrackDeallocation(p, N_SIZES); -#endif - LargeBlockFree(p, CT_LARGE_FREE); - return; - } - - uintptr_t chunk = ((char*)p - ALLOC_START) / N_CHUNK_SIZE; - ptrdiff_t nSizeIdx = chunkSizeIdx[chunk]; - if (nSizeIdx <= 0) { -#if defined(LFALLOC_DBG) - TrackDeallocation(p, N_SIZES); -#endif - LargeBlockFree(p, CT_LARGE_FREE); - return; - } - -#if defined(LFALLOC_DBG) - TrackDeallocation(p, nSizeIdx); -#endif - -#ifdef DBG_FILL_MEMORY - memset(p, 0xfe, nSizeIdxToSize[nSizeIdx]); -#endif - - IncrementCounter(CT_SMALL_FREE, nSizeIdxToSize[nSizeIdx]); - - // try to store info to per thread buf - TThreadAllocInfo* thr = pThreadInfo; - if (thr) { - int& freePtrIdx = thr->FreePtrIndex[nSizeIdx]; - if (freePtrIdx > borderSizes[nSizeIdx]) { - thr->FreePtrs[nSizeIdx][--freePtrIdx] = (char*)p; - return; - } - - // move several pointers to global free list - int freeCount = FL_GROUP_SIZE; - if (freeCount > THREAD_BUF - freePtrIdx) - freeCount = THREAD_BUF - freePtrIdx; - char** freePtrs = thr->FreePtrs[nSizeIdx]; - PutBlocksToGlobalFreeList(nSizeIdx, freePtrs + freePtrIdx, freeCount); - freePtrIdx += freeCount; - - freePtrs[--freePtrIdx] = (char*)p; - - } else { - AllocThreadInfo(); - PutBlocksToGlobalFreeList(nSizeIdx, (char**)&p, 1); - } -} - -static size_t LFGetSize(const void* p) { -#if defined(LFALLOC_DBG) - if (p == nullptr) - return 0; - return GetAllocHeader(const_cast(p))->Size; -#endif - - uintptr_t chkOffset = ((const char*)p - ALLOC_START); - if (chkOffset >= N_MAX_WORKSET_SIZE) { - if (p == nullptr) - return 0; - return TLargeBlk::As(p)->Pages * 4096ll; - } - uintptr_t chunk = ((const char*)p - ALLOC_START) / N_CHUNK_SIZE; - ptrdiff_t nSizeIdx = chunkSizeIdx[chunk]; - if (nSizeIdx <= 0) - return TLargeBlk::As(p)->Pages * 4096ll; - return nSizeIdxToSize[nSizeIdx]; -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Output mem alloc stats -const int N_PAGE_SIZE = 4096; -static void DebugTraceMMgr(const char* pszFormat, ...) // __cdecl -{ - static char buff[20000]; - va_list va; - // - va_start(va, pszFormat); - vsprintf(buff, pszFormat, va); - va_end(va); -// -#ifdef _win_ - OutputDebugStringA(buff); -#else - fprintf(stderr, buff); -#endif -} - -struct TChunkStats { - char *Start, *Finish; - i64 Size; - char* Entries; - i64 FreeCount; - - TChunkStats(size_t chunk, i64 size, char* entries) - : Size(size) - , Entries(entries) - , FreeCount(0) - { - Start = ALLOC_START + chunk * N_CHUNK_SIZE; - Finish = Start + N_CHUNK_SIZE; - } - void CheckBlock(char* pBlock) { - if (pBlock && pBlock >= Start && pBlock < Finish) { - ++FreeCount; - i64 nShift = pBlock - Start; - i64 nOffsetInStep = nShift & (N_CHUNK_SIZE - 1); - Entries[nOffsetInStep / Size] = 1; - } - } - void SetGlobalFree(char* ptr) { - i64 nShift = ptr - Start; - i64 nOffsetInStep = nShift & (N_CHUNK_SIZE - 1); - while (nOffsetInStep + Size <= N_CHUNK_SIZE) { - ++FreeCount; - Entries[nOffsetInStep / Size] = 1; - nOffsetInStep += Size; - } - } -}; - -static void DumpMemoryBlockUtilizationLocked() { - TFreeListGroup* wholeLists[N_SIZES]; - for (int nSizeIdx = 0; nSizeIdx < N_SIZES; ++nSizeIdx) { - wholeLists[nSizeIdx] = (TFreeListGroup*)globalFreeLists[nSizeIdx].GetWholeList(); - } - char* bfList = (char*)blockFreeList.GetWholeList(); - - DebugTraceMMgr("memory blocks utilisation stats:\n"); - i64 nTotalAllocated = 0, nTotalFree = 0, nTotalBadPages = 0, nTotalPages = 0, nTotalUsed = 0, nTotalLocked = 0; - i64 nTotalGroupBlocks = 0; - char* entries; - entries = (char*)SystemAlloc((N_CHUNK_SIZE / 4)); - for (size_t k = 0; k < N_CHUNKS; ++k) { - if (chunkSizeIdx[k] <= 0) { - if (chunkSizeIdx[k] == -1) - nTotalLocked += N_CHUNK_SIZE; - continue; - } - i64 nSizeIdx = chunkSizeIdx[k]; - i64 nSize = nSizeIdxToSize[nSizeIdx]; - TChunkStats cs(k, nSize, entries); - int nEntriesTotal = N_CHUNK_SIZE / nSize; - memset(entries, 0, nEntriesTotal); - for (TFreeListGroup* g = wholeLists[nSizeIdx]; g; g = g->Next) { - for (auto& ptr : g->Ptrs) - cs.CheckBlock(ptr); - } - TChunkStats csGB(k, nSize, entries); - if (nSizeIdx == FREE_LIST_GROUP_SIZEIDX) { - for (auto g : wholeLists) { - for (; g; g = g->Next) - csGB.CheckBlock((char*)g); - } - for (char* blk = bfList; blk; blk = *(char**)blk) - csGB.CheckBlock(blk); - nTotalGroupBlocks += csGB.FreeCount * nSize; - } - if (((globalCurrentPtr[nSizeIdx] - ALLOC_START) / N_CHUNK_SIZE) == k) - cs.SetGlobalFree(globalCurrentPtr[nSizeIdx]); - nTotalUsed += (nEntriesTotal - cs.FreeCount - csGB.FreeCount) * nSize; - - char pages[N_CHUNK_SIZE / N_PAGE_SIZE]; - memset(pages, 0, sizeof(pages)); - for (int i = 0, nShift = 0; i < nEntriesTotal; ++i, nShift += nSize) { - int nBit = 0; - if (entries[i]) - nBit = 1; // free entry - else - nBit = 2; // used entry - for (i64 nDelta = nSize - 1; nDelta >= 0; nDelta -= N_PAGE_SIZE) - pages[(nShift + nDelta) / N_PAGE_SIZE] |= nBit; - } - i64 nBadPages = 0; - for (auto page : pages) { - nBadPages += page == 3; - nTotalPages += page != 1; - } - DebugTraceMMgr("entry = %lld; size = %lld; free = %lld; system %lld; utilisation = %g%%, fragmentation = %g%%\n", - k, nSize, cs.FreeCount * nSize, csGB.FreeCount * nSize, - (N_CHUNK_SIZE - cs.FreeCount * nSize) * 100.0f / N_CHUNK_SIZE, 100.0f * nBadPages / Y_ARRAY_SIZE(pages)); - nTotalAllocated += N_CHUNK_SIZE; - nTotalFree += cs.FreeCount * nSize; - nTotalBadPages += nBadPages; - } - SystemFree(entries); - DebugTraceMMgr("Total allocated = %llu, free = %lld, system = %lld, locked for future use %lld, utilisation = %g, fragmentation = %g\n", - nTotalAllocated, nTotalFree, nTotalGroupBlocks, nTotalLocked, - 100.0f * (nTotalAllocated - nTotalFree) / nTotalAllocated, 100.0f * nTotalBadPages / nTotalPages); - DebugTraceMMgr("Total %lld bytes used, %lld bytes in used pages\n", nTotalUsed, nTotalPages * N_PAGE_SIZE); - - for (int nSizeIdx = 0; nSizeIdx < N_SIZES; ++nSizeIdx) - globalFreeLists[nSizeIdx].ReturnWholeList(wholeLists[nSizeIdx]); - blockFreeList.ReturnWholeList(bfList); -} - -void FlushThreadFreeList() { - if (pThreadInfo) - MoveSingleThreadFreeToGlobal(pThreadInfo); -} - -void DumpMemoryBlockUtilization() { - // move current thread free to global lists to get better statistics - FlushThreadFreeList(); - { - CCriticalSectionLockMMgr ls; - DumpMemoryBlockUtilizationLocked(); - } -} - -////////////////////////////////////////////////////////////////////////// -// malloc api - -static bool LFAlloc_SetParam(const char* param, const char* value) { - if (!strcmp(param, "LB_LIMIT_TOTAL_SIZE")) { - LB_LIMIT_TOTAL_SIZE = atoi(value); - return true; - } - if (!strcmp(param, "LB_LIMIT_TOTAL_SIZE_BYTES")) { - LB_LIMIT_TOTAL_SIZE = (atoi(value) + N_PAGE_SIZE - 1) / N_PAGE_SIZE; - return true; - } -#ifdef DBG_FILL_MEMORY - if (!strcmp(param, "FillMemoryOnAllocation")) { - FillMemoryOnAllocation = !strcmp(value, "true"); - return true; - } -#endif - if (!strcmp(param, "BeforeLFAllocGlobalLockAcquired")) { - BeforeLFAllocGlobalLockAcquired = (decltype(BeforeLFAllocGlobalLockAcquired))(value); - return true; - } - if (!strcmp(param, "AfterLFAllocGlobalLockReleased")) { - AfterLFAllocGlobalLockReleased = (decltype(AfterLFAllocGlobalLockReleased))(value); - return true; - } - if (!strcmp(param, "EnterCritical")) { - assert(value); - RealEnterCritical = (decltype(RealEnterCritical))(value); - return true; - } - if (!strcmp(param, "LeaveCritical")) { - assert(value); - RealLeaveCritical = (decltype(RealLeaveCritical))(value); - return true; - } - if (!strcmp(param, "TransparentHugePages")) { - TransparentHugePages = !strcmp(value, "true"); - return true; - } - if (!strcmp(param, "MapHugeTLB")) { - MapHugeTLB = !strcmp(value, "true"); - return true; - } - if (!strcmp(param, "EnableDefrag")) { - EnableDefrag = !strcmp(value, "true"); - return true; - } - return false; -}; - -static const char* LFAlloc_GetParam(const char* param) { - struct TParam { - const char* Name; - const char* Value; - }; - - static const TParam Params[] = { - {"GetLFAllocCounterFast", (const char*)&GetLFAllocCounterFast}, - {"GetLFAllocCounterFull", (const char*)&GetLFAllocCounterFull}, -#if defined(LFALLOC_DBG) - {"SetThreadAllocTag", (const char*)&SetThreadAllocTag}, - {"SetProfileCurrentThread", (const char*)&SetProfileCurrentThread}, - {"SetProfileAllThreads", (const char*)&SetProfileAllThreads}, - {"SetAllocationSamplingEnabled", (const char*)&SetAllocationSamplingEnabled}, - {"SetAllocationSampleRate", (const char*)&SetAllocationSampleRate}, - {"SetAllocationSampleMaxSize", (const char*)&SetAllocationSampleMaxSize}, - {"SetAllocationCallback", (const char*)&SetAllocationCallback}, - {"SetDeallocationCallback", (const char*)&SetDeallocationCallback}, - {"GetPerTagAllocInfo", (const char*)&GetPerTagAllocInfo}, -#endif // LFALLOC_DBG - }; - - for (int i = 0; i < Y_ARRAY_SIZE(Params); ++i) { - if (strcmp(param, Params[i].Name) == 0) { - return Params[i].Value; - } - } - return nullptr; -} - -static Y_FORCE_INLINE void* LFVAlloc(size_t size) { - const size_t pg = N_PAGE_SIZE; - size_t bigsize = (size + pg - 1) & (~(pg - 1)); - void* p = LFAlloc(bigsize); - - Y_ASSERT_NOBT((intptr_t)p % N_PAGE_SIZE == 0); - return p; -} - -static Y_FORCE_INLINE int LFPosixMemalign(void** memptr, size_t alignment, size_t size) { - if (Y_UNLIKELY(alignment > 4096)) { -#ifdef _win_ - OutputDebugStringA("Larger alignment are not guaranteed with this implementation\n"); -#else - fprintf(stderr, "Larger alignment are not guaranteed with this implementation\n"); -#endif - NMalloc::AbortFromCorruptedAllocator(); - } - size_t bigsize = size; - if (bigsize <= alignment) { - bigsize = alignment; - } else if (bigsize < 2 * alignment) { - bigsize = 2 * alignment; - } - *memptr = LFAlloc(bigsize); - return 0; -} -#endif diff --git a/contrib/lfalloc/src/lfmalloc.h b/contrib/lfalloc/src/lfmalloc.h deleted file mode 100644 index 1e6a0d55773..00000000000 --- a/contrib/lfalloc/src/lfmalloc.h +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -#include -#include -#include "util/system/compiler.h" - -namespace NMalloc { - volatile inline bool IsAllocatorCorrupted = false; - - static inline void AbortFromCorruptedAllocator() { - IsAllocatorCorrupted = true; - abort(); - } - - struct TAllocHeader { - void* Block; - size_t AllocSize; - void Y_FORCE_INLINE Encode(void* block, size_t size, size_t signature) { - Block = block; - AllocSize = size | signature; - } - }; -} diff --git a/contrib/lfalloc/src/util/README.md b/contrib/lfalloc/src/util/README.md deleted file mode 100644 index c367cb4b439..00000000000 --- a/contrib/lfalloc/src/util/README.md +++ /dev/null @@ -1,33 +0,0 @@ -Style guide for the util folder is a stricter version of general style guide (mostly in terms of ambiguity resolution). - - * all {} must be in K&R style - * &, * tied closer to a type, not to variable - * always use `using` not `typedef` - * even a single line block must be in braces {}: - ``` - if (A) { - B(); - } - ``` - * _ at the end of private data member of a class - `First_`, `Second_` - * every .h file must be accompanied with corresponding .cpp to avoid a leakage and check that it is self contained - * prohibited to use `printf`-like functions - - -Things declared in the general style guide, which sometimes are missed: - - * `template <`, not `template<` - * `noexcept`, not `throw ()` nor `throw()`, not required for destructors - * indents inside `namespace` same as inside `class` - - -Requirements for a new code (and for corrections in an old code which involves change of behaviour) in util: - - * presence of UNIT-tests - * presence of comments in Doxygen style - * accessors without Get prefix (`Length()`, but not `GetLength()`) - -This guide is not a mandatory as there is the general style guide. -Nevertheless if it is not followed, then a next `ya style .` run in the util folder will undeservedly update authors of some lines of code. - -Thus before a commit it is recommended to run `ya style .` in the util folder. diff --git a/contrib/lfalloc/src/util/system/atomic.h b/contrib/lfalloc/src/util/system/atomic.h deleted file mode 100644 index 9876515a54d..00000000000 --- a/contrib/lfalloc/src/util/system/atomic.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include "defaults.h" - -using TAtomicBase = intptr_t; -using TAtomic = volatile TAtomicBase; - -#if defined(__GNUC__) -#include "atomic_gcc.h" -#elif defined(_MSC_VER) -#include "atomic_win.h" -#else -#error unsupported platform -#endif - -#if !defined(ATOMIC_COMPILER_BARRIER) -#define ATOMIC_COMPILER_BARRIER() -#endif - -static inline TAtomicBase AtomicSub(TAtomic& a, TAtomicBase v) { - return AtomicAdd(a, -v); -} - -static inline TAtomicBase AtomicGetAndSub(TAtomic& a, TAtomicBase v) { - return AtomicGetAndAdd(a, -v); -} - -#if defined(USE_GENERIC_SETGET) -static inline TAtomicBase AtomicGet(const TAtomic& a) { - return a; -} - -static inline void AtomicSet(TAtomic& a, TAtomicBase v) { - a = v; -} -#endif - -static inline bool AtomicTryLock(TAtomic* a) { - return AtomicCas(a, 1, 0); -} - -static inline bool AtomicTryAndTryLock(TAtomic* a) { - return (AtomicGet(*a) == 0) && AtomicTryLock(a); -} - -static inline void AtomicUnlock(TAtomic* a) { - ATOMIC_COMPILER_BARRIER(); - AtomicSet(*a, 0); -} - -#include "atomic_ops.h" diff --git a/contrib/lfalloc/src/util/system/atomic_gcc.h b/contrib/lfalloc/src/util/system/atomic_gcc.h deleted file mode 100644 index ed8dc2bdc53..00000000000 --- a/contrib/lfalloc/src/util/system/atomic_gcc.h +++ /dev/null @@ -1,90 +0,0 @@ -#pragma once - -#define ATOMIC_COMPILER_BARRIER() __asm__ __volatile__("" \ - : \ - : \ - : "memory") - -static inline TAtomicBase AtomicGet(const TAtomic& a) { - TAtomicBase tmp; -#if defined(_arm64_) - __asm__ __volatile__( - "ldar %x[value], %[ptr] \n\t" - : [value] "=r"(tmp) - : [ptr] "Q"(a) - : "memory"); -#else - __atomic_load(&a, &tmp, __ATOMIC_ACQUIRE); -#endif - return tmp; -} - -static inline void AtomicSet(TAtomic& a, TAtomicBase v) { -#if defined(_arm64_) - __asm__ __volatile__( - "stlr %x[value], %[ptr] \n\t" - : [ptr] "=Q"(a) - : [value] "r"(v) - : "memory"); -#else - __atomic_store(&a, &v, __ATOMIC_RELEASE); -#endif -} - -static inline intptr_t AtomicIncrement(TAtomic& p) { - return __atomic_add_fetch(&p, 1, __ATOMIC_SEQ_CST); -} - -static inline intptr_t AtomicGetAndIncrement(TAtomic& p) { - return __atomic_fetch_add(&p, 1, __ATOMIC_SEQ_CST); -} - -static inline intptr_t AtomicDecrement(TAtomic& p) { - return __atomic_sub_fetch(&p, 1, __ATOMIC_SEQ_CST); -} - -static inline intptr_t AtomicGetAndDecrement(TAtomic& p) { - return __atomic_fetch_sub(&p, 1, __ATOMIC_SEQ_CST); -} - -static inline intptr_t AtomicAdd(TAtomic& p, intptr_t v) { - return __atomic_add_fetch(&p, v, __ATOMIC_SEQ_CST); -} - -static inline intptr_t AtomicGetAndAdd(TAtomic& p, intptr_t v) { - return __atomic_fetch_add(&p, v, __ATOMIC_SEQ_CST); -} - -static inline intptr_t AtomicSwap(TAtomic* p, intptr_t v) { - (void)p; // disable strange 'parameter set but not used' warning on gcc - intptr_t ret; - __atomic_exchange(p, &v, &ret, __ATOMIC_SEQ_CST); - return ret; -} - -static inline bool AtomicCas(TAtomic* a, intptr_t exchange, intptr_t compare) { - (void)a; // disable strange 'parameter set but not used' warning on gcc - return __atomic_compare_exchange(a, &compare, &exchange, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); -} - -static inline intptr_t AtomicGetAndCas(TAtomic* a, intptr_t exchange, intptr_t compare) { - (void)a; // disable strange 'parameter set but not used' warning on gcc - __atomic_compare_exchange(a, &compare, &exchange, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); - return compare; -} - -static inline intptr_t AtomicOr(TAtomic& a, intptr_t b) { - return __atomic_or_fetch(&a, b, __ATOMIC_SEQ_CST); -} - -static inline intptr_t AtomicXor(TAtomic& a, intptr_t b) { - return __atomic_xor_fetch(&a, b, __ATOMIC_SEQ_CST); -} - -static inline intptr_t AtomicAnd(TAtomic& a, intptr_t b) { - return __atomic_and_fetch(&a, b, __ATOMIC_SEQ_CST); -} - -static inline void AtomicBarrier() { - __sync_synchronize(); -} diff --git a/contrib/lfalloc/src/util/system/atomic_ops.h b/contrib/lfalloc/src/util/system/atomic_ops.h deleted file mode 100644 index 425b643e14d..00000000000 --- a/contrib/lfalloc/src/util/system/atomic_ops.h +++ /dev/null @@ -1,189 +0,0 @@ -#pragma once - -#include - -template -inline TAtomic* AsAtomicPtr(T volatile* target) { - return reinterpret_cast(target); -} - -template -inline const TAtomic* AsAtomicPtr(T const volatile* target) { - return reinterpret_cast(target); -} - -// integral types - -template -struct TAtomicTraits { - enum { - Castable = std::is_integral::value && sizeof(T) == sizeof(TAtomicBase) && !std::is_const::value, - }; -}; - -template -using TEnableIfCastable = std::enable_if_t::Castable, TT>; - -template -inline TEnableIfCastable AtomicGet(T const volatile& target) { - return static_cast(AtomicGet(*AsAtomicPtr(&target))); -} - -template -inline TEnableIfCastable AtomicSet(T volatile& target, TAtomicBase value) { - AtomicSet(*AsAtomicPtr(&target), value); -} - -template -inline TEnableIfCastable AtomicIncrement(T volatile& target) { - return static_cast(AtomicIncrement(*AsAtomicPtr(&target))); -} - -template -inline TEnableIfCastable AtomicGetAndIncrement(T volatile& target) { - return static_cast(AtomicGetAndIncrement(*AsAtomicPtr(&target))); -} - -template -inline TEnableIfCastable AtomicDecrement(T volatile& target) { - return static_cast(AtomicDecrement(*AsAtomicPtr(&target))); -} - -template -inline TEnableIfCastable AtomicGetAndDecrement(T volatile& target) { - return static_cast(AtomicGetAndDecrement(*AsAtomicPtr(&target))); -} - -template -inline TEnableIfCastable AtomicAdd(T volatile& target, TAtomicBase value) { - return static_cast(AtomicAdd(*AsAtomicPtr(&target), value)); -} - -template -inline TEnableIfCastable AtomicGetAndAdd(T volatile& target, TAtomicBase value) { - return static_cast(AtomicGetAndAdd(*AsAtomicPtr(&target), value)); -} - -template -inline TEnableIfCastable AtomicSub(T volatile& target, TAtomicBase value) { - return static_cast(AtomicSub(*AsAtomicPtr(&target), value)); -} - -template -inline TEnableIfCastable AtomicGetAndSub(T volatile& target, TAtomicBase value) { - return static_cast(AtomicGetAndSub(*AsAtomicPtr(&target), value)); -} - -template -inline TEnableIfCastable AtomicSwap(T volatile* target, TAtomicBase exchange) { - return static_cast(AtomicSwap(AsAtomicPtr(target), exchange)); -} - -template -inline TEnableIfCastable AtomicCas(T volatile* target, TAtomicBase exchange, TAtomicBase compare) { - return AtomicCas(AsAtomicPtr(target), exchange, compare); -} - -template -inline TEnableIfCastable AtomicGetAndCas(T volatile* target, TAtomicBase exchange, TAtomicBase compare) { - return static_cast(AtomicGetAndCas(AsAtomicPtr(target), exchange, compare)); -} - -template -inline TEnableIfCastable AtomicTryLock(T volatile* target) { - return AtomicTryLock(AsAtomicPtr(target)); -} - -template -inline TEnableIfCastable AtomicTryAndTryLock(T volatile* target) { - return AtomicTryAndTryLock(AsAtomicPtr(target)); -} - -template -inline TEnableIfCastable AtomicUnlock(T volatile* target) { - AtomicUnlock(AsAtomicPtr(target)); -} - -template -inline TEnableIfCastable AtomicOr(T volatile& target, TAtomicBase value) { - return static_cast(AtomicOr(*AsAtomicPtr(&target), value)); -} - -template -inline TEnableIfCastable AtomicAnd(T volatile& target, TAtomicBase value) { - return static_cast(AtomicAnd(*AsAtomicPtr(&target), value)); -} - -template -inline TEnableIfCastable AtomicXor(T volatile& target, TAtomicBase value) { - return static_cast(AtomicXor(*AsAtomicPtr(&target), value)); -} - -// pointer types - -template -inline T* AtomicGet(T* const volatile& target) { - return reinterpret_cast(AtomicGet(*AsAtomicPtr(&target))); -} - -template -inline void AtomicSet(T* volatile& target, T* value) { - AtomicSet(*AsAtomicPtr(&target), reinterpret_cast(value)); -} - -using TNullPtr = decltype(nullptr); - -template -inline void AtomicSet(T* volatile& target, TNullPtr) { - AtomicSet(*AsAtomicPtr(&target), 0); -} - -template -inline T* AtomicSwap(T* volatile* target, T* exchange) { - return reinterpret_cast(AtomicSwap(AsAtomicPtr(target), reinterpret_cast(exchange))); -} - -template -inline T* AtomicSwap(T* volatile* target, TNullPtr) { - return reinterpret_cast(AtomicSwap(AsAtomicPtr(target), 0)); -} - -template -inline bool AtomicCas(T* volatile* target, T* exchange, T* compare) { - return AtomicCas(AsAtomicPtr(target), reinterpret_cast(exchange), reinterpret_cast(compare)); -} - -template -inline T* AtomicGetAndCas(T* volatile* target, T* exchange, T* compare) { - return reinterpret_cast(AtomicGetAndCas(AsAtomicPtr(target), reinterpret_cast(exchange), reinterpret_cast(compare))); -} - -template -inline bool AtomicCas(T* volatile* target, T* exchange, TNullPtr) { - return AtomicCas(AsAtomicPtr(target), reinterpret_cast(exchange), 0); -} - -template -inline T* AtomicGetAndCas(T* volatile* target, T* exchange, TNullPtr) { - return reinterpret_cast(AtomicGetAndCas(AsAtomicPtr(target), reinterpret_cast(exchange), 0)); -} - -template -inline bool AtomicCas(T* volatile* target, TNullPtr, T* compare) { - return AtomicCas(AsAtomicPtr(target), 0, reinterpret_cast(compare)); -} - -template -inline T* AtomicGetAndCas(T* volatile* target, TNullPtr, T* compare) { - return reinterpret_cast(AtomicGetAndCas(AsAtomicPtr(target), 0, reinterpret_cast(compare))); -} - -template -inline bool AtomicCas(T* volatile* target, TNullPtr, TNullPtr) { - return AtomicCas(AsAtomicPtr(target), 0, 0); -} - -template -inline T* AtomicGetAndCas(T* volatile* target, TNullPtr, TNullPtr) { - return reinterpret_cast(AtomicGetAndCas(AsAtomicPtr(target), 0, 0)); -} diff --git a/contrib/lfalloc/src/util/system/atomic_win.h b/contrib/lfalloc/src/util/system/atomic_win.h deleted file mode 100644 index 1abebd87b38..00000000000 --- a/contrib/lfalloc/src/util/system/atomic_win.h +++ /dev/null @@ -1,114 +0,0 @@ -#pragma once - -#include - -#define USE_GENERIC_SETGET - -#if defined(_i386_) - -#pragma intrinsic(_InterlockedIncrement) -#pragma intrinsic(_InterlockedDecrement) -#pragma intrinsic(_InterlockedExchangeAdd) -#pragma intrinsic(_InterlockedExchange) -#pragma intrinsic(_InterlockedCompareExchange) - -static inline intptr_t AtomicIncrement(TAtomic& a) { - return _InterlockedIncrement((volatile long*)&a); -} - -static inline intptr_t AtomicGetAndIncrement(TAtomic& a) { - return _InterlockedIncrement((volatile long*)&a) - 1; -} - -static inline intptr_t AtomicDecrement(TAtomic& a) { - return _InterlockedDecrement((volatile long*)&a); -} - -static inline intptr_t AtomicGetAndDecrement(TAtomic& a) { - return _InterlockedDecrement((volatile long*)&a) + 1; -} - -static inline intptr_t AtomicAdd(TAtomic& a, intptr_t b) { - return _InterlockedExchangeAdd((volatile long*)&a, b) + b; -} - -static inline intptr_t AtomicGetAndAdd(TAtomic& a, intptr_t b) { - return _InterlockedExchangeAdd((volatile long*)&a, b); -} - -static inline intptr_t AtomicSwap(TAtomic* a, intptr_t b) { - return _InterlockedExchange((volatile long*)a, b); -} - -static inline bool AtomicCas(TAtomic* a, intptr_t exchange, intptr_t compare) { - return _InterlockedCompareExchange((volatile long*)a, exchange, compare) == compare; -} - -static inline intptr_t AtomicGetAndCas(TAtomic* a, intptr_t exchange, intptr_t compare) { - return _InterlockedCompareExchange((volatile long*)a, exchange, compare); -} - -#else // _x86_64_ - -#pragma intrinsic(_InterlockedIncrement64) -#pragma intrinsic(_InterlockedDecrement64) -#pragma intrinsic(_InterlockedExchangeAdd64) -#pragma intrinsic(_InterlockedExchange64) -#pragma intrinsic(_InterlockedCompareExchange64) - -static inline intptr_t AtomicIncrement(TAtomic& a) { - return _InterlockedIncrement64((volatile __int64*)&a); -} - -static inline intptr_t AtomicGetAndIncrement(TAtomic& a) { - return _InterlockedIncrement64((volatile __int64*)&a) - 1; -} - -static inline intptr_t AtomicDecrement(TAtomic& a) { - return _InterlockedDecrement64((volatile __int64*)&a); -} - -static inline intptr_t AtomicGetAndDecrement(TAtomic& a) { - return _InterlockedDecrement64((volatile __int64*)&a) + 1; -} - -static inline intptr_t AtomicAdd(TAtomic& a, intptr_t b) { - return _InterlockedExchangeAdd64((volatile __int64*)&a, b) + b; -} - -static inline intptr_t AtomicGetAndAdd(TAtomic& a, intptr_t b) { - return _InterlockedExchangeAdd64((volatile __int64*)&a, b); -} - -static inline intptr_t AtomicSwap(TAtomic* a, intptr_t b) { - return _InterlockedExchange64((volatile __int64*)a, b); -} - -static inline bool AtomicCas(TAtomic* a, intptr_t exchange, intptr_t compare) { - return _InterlockedCompareExchange64((volatile __int64*)a, exchange, compare) == compare; -} - -static inline intptr_t AtomicGetAndCas(TAtomic* a, intptr_t exchange, intptr_t compare) { - return _InterlockedCompareExchange64((volatile __int64*)a, exchange, compare); -} - -static inline intptr_t AtomicOr(TAtomic& a, intptr_t b) { - return _InterlockedOr64(&a, b) | b; -} - -static inline intptr_t AtomicAnd(TAtomic& a, intptr_t b) { - return _InterlockedAnd64(&a, b) & b; -} - -static inline intptr_t AtomicXor(TAtomic& a, intptr_t b) { - return _InterlockedXor64(&a, b) ^ b; -} - -#endif // _x86_ - -//TODO -static inline void AtomicBarrier() { - TAtomic val = 0; - - AtomicSwap(&val, 0); -} diff --git a/contrib/lfalloc/src/util/system/compiler.h b/contrib/lfalloc/src/util/system/compiler.h deleted file mode 100644 index b5cec600923..00000000000 --- a/contrib/lfalloc/src/util/system/compiler.h +++ /dev/null @@ -1,617 +0,0 @@ -#pragma once - -// useful cross-platfrom definitions for compilers - -/** - * @def Y_FUNC_SIGNATURE - * - * Use this macro to get pretty function name (see example). - * - * @code - * void Hi() { - * Cout << Y_FUNC_SIGNATURE << Endl; - * } - - * template - * void Do() { - * Cout << Y_FUNC_SIGNATURE << Endl; - * } - - * int main() { - * Hi(); // void Hi() - * Do(); // void Do() [T = int] - * Do(); // void Do() [T = TString] - * } - * @endcode - */ -#if defined(__GNUC__) -#define Y_FUNC_SIGNATURE __PRETTY_FUNCTION__ -#elif defined(_MSC_VER) -#define Y_FUNC_SIGNATURE __FUNCSIG__ -#else -#define Y_FUNC_SIGNATURE "" -#endif - -#ifdef __GNUC__ -#define Y_PRINTF_FORMAT(n, m) __attribute__((__format__(__printf__, n, m))) -#endif - -#ifndef Y_PRINTF_FORMAT -#define Y_PRINTF_FORMAT(n, m) -#endif - -#if defined(__clang__) -#define Y_NO_SANITIZE(...) __attribute__((no_sanitize(__VA_ARGS__))) -#endif - -#if !defined(Y_NO_SANITIZE) -#define Y_NO_SANITIZE(...) -#endif - -/** - * @def Y_DECLARE_UNUSED - * - * Macro is needed to silence compiler warning about unused entities (e.g. function or argument). - * - * @code - * Y_DECLARE_UNUSED int FunctionUsedSolelyForDebugPurposes(); - * assert(FunctionUsedSolelyForDebugPurposes() == 42); - * - * void Foo(const int argumentUsedOnlyForDebugPurposes Y_DECLARE_UNUSED) { - * assert(argumentUsedOnlyForDebugPurposes == 42); - * // however you may as well omit `Y_DECLARE_UNUSED` and use `UNUSED` macro instead - * Y_UNUSED(argumentUsedOnlyForDebugPurposes); - * } - * @endcode - */ -#ifdef __GNUC__ -#define Y_DECLARE_UNUSED __attribute__((unused)) -#endif - -#ifndef Y_DECLARE_UNUSED -#define Y_DECLARE_UNUSED -#endif - -#if defined(__GNUC__) -#define Y_LIKELY(Cond) __builtin_expect(!!(Cond), 1) -#define Y_UNLIKELY(Cond) __builtin_expect(!!(Cond), 0) -#define Y_PREFETCH_READ(Pointer, Priority) __builtin_prefetch((const void*)(Pointer), 0, Priority) -#define Y_PREFETCH_WRITE(Pointer, Priority) __builtin_prefetch((const void*)(Pointer), 1, Priority) -#endif - -/** - * @def Y_FORCE_INLINE - * - * Macro to use in place of 'inline' in function declaration/definition to force - * it to be inlined. - */ -#if !defined(Y_FORCE_INLINE) -#if defined(CLANG_COVERAGE) -#/* excessive __always_inline__ might significantly slow down compilation of an instrumented unit */ -#define Y_FORCE_INLINE inline -#elif defined(_MSC_VER) -#define Y_FORCE_INLINE __forceinline -#elif defined(__GNUC__) -#/* Clang also defines __GNUC__ (as 4) */ -#define Y_FORCE_INLINE inline __attribute__((__always_inline__)) -#else -#define Y_FORCE_INLINE inline -#endif -#endif - -/** - * @def Y_NO_INLINE - * - * Macro to use in place of 'inline' in function declaration/definition to - * prevent it from being inlined. - */ -#if !defined(Y_NO_INLINE) -#if defined(_MSC_VER) -#define Y_NO_INLINE __declspec(noinline) -#elif defined(__GNUC__) || defined(__INTEL_COMPILER) -#/* Clang also defines __GNUC__ (as 4) */ -#define Y_NO_INLINE __attribute__((__noinline__)) -#else -#define Y_NO_INLINE -#endif -#endif - -//to cheat compiler about strict aliasing or similar problems -#if defined(__GNUC__) -#define Y_FAKE_READ(X) \ - do { \ - __asm__ __volatile__("" \ - : \ - : "m"(X)); \ - } while (0) - -#define Y_FAKE_WRITE(X) \ - do { \ - __asm__ __volatile__("" \ - : "=m"(X)); \ - } while (0) -#endif - -#if !defined(Y_FAKE_READ) -#define Y_FAKE_READ(X) -#endif - -#if !defined(Y_FAKE_WRITE) -#define Y_FAKE_WRITE(X) -#endif - -#ifndef Y_PREFETCH_READ -#define Y_PREFETCH_READ(Pointer, Priority) (void)(const void*)(Pointer), (void)Priority -#endif - -#ifndef Y_PREFETCH_WRITE -#define Y_PREFETCH_WRITE(Pointer, Priority) (void)(const void*)(Pointer), (void)Priority -#endif - -#ifndef Y_LIKELY -#define Y_LIKELY(Cond) (Cond) -#define Y_UNLIKELY(Cond) (Cond) -#endif - -#ifdef __GNUC__ -#define _packed __attribute__((packed)) -#else -#define _packed -#endif - -#if defined(__GNUC__) -#define Y_WARN_UNUSED_RESULT __attribute__((warn_unused_result)) -#endif - -#ifndef Y_WARN_UNUSED_RESULT -#define Y_WARN_UNUSED_RESULT -#endif - -#if defined(__GNUC__) -#define Y_HIDDEN __attribute__((visibility("hidden"))) -#endif - -#if !defined(Y_HIDDEN) -#define Y_HIDDEN -#endif - -#if defined(__GNUC__) -#define Y_PUBLIC __attribute__((visibility("default"))) -#endif - -#if !defined(Y_PUBLIC) -#define Y_PUBLIC -#endif - -#if !defined(Y_UNUSED) && !defined(__cplusplus) -#define Y_UNUSED(var) (void)(var) -#endif -#if !defined(Y_UNUSED) && defined(__cplusplus) -template -constexpr Y_FORCE_INLINE int Y_UNUSED(Types&&...) { - return 0; -}; -#endif - -/** - * @def Y_ASSUME - * - * Macro that tells the compiler that it can generate optimized code - * as if the given expression will always evaluate true. - * The behavior is undefined if it ever evaluates false. - * - * @code - * // factored into a function so that it's testable - * inline int Avg(int x, int y) { - * if (x >= 0 && y >= 0) { - * return (static_cast(x) + static_cast(y)) >> 1; - * } else { - * // a slower implementation - * } - * } - * - * // we know that xs and ys are non-negative from domain knowledge, - * // but we can't change the types of xs and ys because of API constrains - * int Foo(const TVector& xs, const TVector& ys) { - * TVector avgs; - * avgs.resize(xs.size()); - * for (size_t i = 0; i < xs.size(); ++i) { - * auto x = xs[i]; - * auto y = ys[i]; - * Y_ASSUME(x >= 0); - * Y_ASSUME(y >= 0); - * xs[i] = Avg(x, y); - * } - * } - * @endcode - */ -#if defined(__GNUC__) -#define Y_ASSUME(condition) ((condition) ? (void)0 : __builtin_unreachable()) -#elif defined(_MSC_VER) -#define Y_ASSUME(condition) __assume(condition) -#else -#define Y_ASSUME(condition) Y_UNUSED(condition) -#endif - -#ifdef __cplusplus -[[noreturn]] -#endif -Y_HIDDEN void _YandexAbort(); - -/** - * @def Y_UNREACHABLE - * - * Macro that marks the rest of the code branch unreachable. - * The behavior is undefined if it's ever reached. - * - * @code - * switch (i % 3) { - * case 0: - * return foo; - * case 1: - * return bar; - * case 2: - * return baz; - * default: - * Y_UNREACHABLE(); - * } - * @endcode - */ -#if defined(__GNUC__) || defined(_MSC_VER) -#define Y_UNREACHABLE() Y_ASSUME(0) -#else -#define Y_UNREACHABLE() _YandexAbort() -#endif - -#if defined(undefined_sanitizer_enabled) -#define _ubsan_enabled_ -#endif - -#ifdef __clang__ - -#if __has_feature(thread_sanitizer) -#define _tsan_enabled_ -#endif -#if __has_feature(memory_sanitizer) -#define _msan_enabled_ -#endif -#if __has_feature(address_sanitizer) -#define _asan_enabled_ -#endif - -#else - -#if defined(thread_sanitizer_enabled) || defined(__SANITIZE_THREAD__) -#define _tsan_enabled_ -#endif -#if defined(memory_sanitizer_enabled) -#define _msan_enabled_ -#endif -#if defined(address_sanitizer_enabled) || defined(__SANITIZE_ADDRESS__) -#define _asan_enabled_ -#endif - -#endif - -#if defined(_asan_enabled_) || defined(_msan_enabled_) || defined(_tsan_enabled_) || defined(_ubsan_enabled_) -#define _san_enabled_ -#endif - -#if defined(_MSC_VER) -#define __PRETTY_FUNCTION__ __FUNCSIG__ -#endif - -#if defined(__GNUC__) -#define Y_WEAK __attribute__((weak)) -#else -#define Y_WEAK -#endif - -#if defined(__CUDACC_VER_MAJOR__) -#define Y_CUDA_AT_LEAST(x, y) (__CUDACC_VER_MAJOR__ > x || (__CUDACC_VER_MAJOR__ == x && __CUDACC_VER_MINOR__ >= y)) -#else -#define Y_CUDA_AT_LEAST(x, y) 0 -#endif - -// NVidia CUDA C++ Compiler did not know about noexcept keyword until version 9.0 -#if !Y_CUDA_AT_LEAST(9, 0) -#if defined(__CUDACC__) && !defined(noexcept) -#define noexcept throw () -#endif -#endif - -#if defined(__GNUC__) -#define Y_COLD __attribute__((cold)) -#define Y_LEAF __attribute__((leaf)) -#define Y_WRAPPER __attribute__((artificial)) -#else -#define Y_COLD -#define Y_LEAF -#define Y_WRAPPER -#endif - -/** - * @def Y_PRAGMA - * - * Macro for use in other macros to define compiler pragma - * See below for other usage examples - * - * @code - * #if defined(__clang__) || defined(__GNUC__) - * #define Y_PRAGMA_NO_WSHADOW \ - * Y_PRAGMA("GCC diagnostic ignored \"-Wshadow\"") - * #elif defined(_MSC_VER) - * #define Y_PRAGMA_NO_WSHADOW \ - * Y_PRAGMA("warning(disable:4456 4457") - * #else - * #define Y_PRAGMA_NO_WSHADOW - * #endif - * @endcode - */ -#if defined(__clang__) || defined(__GNUC__) -#define Y_PRAGMA(x) _Pragma(x) -#elif defined(_MSC_VER) -#define Y_PRAGMA(x) __pragma(x) -#else -#define Y_PRAGMA(x) -#endif - -/** - * @def Y_PRAGMA_DIAGNOSTIC_PUSH - * - * Cross-compiler pragma to save diagnostic settings - * - * @see - * GCC: https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Pragmas.html - * MSVC: https://msdn.microsoft.com/en-us/library/2c8f766e.aspx - * Clang: https://clang.llvm.org/docs/UsersManual.html#controlling-diagnostics-via-pragmas - * - * @code - * Y_PRAGMA_DIAGNOSTIC_PUSH - * @endcode - */ -#if defined(__clang__) || defined(__GNUC__) -#define Y_PRAGMA_DIAGNOSTIC_PUSH \ - Y_PRAGMA("GCC diagnostic push") -#elif defined(_MSC_VER) -#define Y_PRAGMA_DIAGNOSTIC_PUSH \ - Y_PRAGMA(warning(push)) -#else -#define Y_PRAGMA_DIAGNOSTIC_PUSH -#endif - -/** - * @def Y_PRAGMA_DIAGNOSTIC_POP - * - * Cross-compiler pragma to restore diagnostic settings - * - * @see - * GCC: https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Pragmas.html - * MSVC: https://msdn.microsoft.com/en-us/library/2c8f766e.aspx - * Clang: https://clang.llvm.org/docs/UsersManual.html#controlling-diagnostics-via-pragmas - * - * @code - * Y_PRAGMA_DIAGNOSTIC_POP - * @endcode - */ -#if defined(__clang__) || defined(__GNUC__) -#define Y_PRAGMA_DIAGNOSTIC_POP \ - Y_PRAGMA("GCC diagnostic pop") -#elif defined(_MSC_VER) -#define Y_PRAGMA_DIAGNOSTIC_POP \ - Y_PRAGMA(warning(pop)) -#else -#define Y_PRAGMA_DIAGNOSTIC_POP -#endif - -/** - * @def Y_PRAGMA_NO_WSHADOW - * - * Cross-compiler pragma to disable warnings about shadowing variables - * - * @code - * Y_PRAGMA_DIAGNOSTIC_PUSH - * Y_PRAGMA_NO_WSHADOW - * - * // some code which use variable shadowing, e.g.: - * - * for (int i = 0; i < 100; ++i) { - * Use(i); - * - * for (int i = 42; i < 100500; ++i) { // this i is shadowing previous i - * AnotherUse(i); - * } - * } - * - * Y_PRAGMA_DIAGNOSTIC_POP - * @endcode - */ -#if defined(__clang__) || defined(__GNUC__) -#define Y_PRAGMA_NO_WSHADOW \ - Y_PRAGMA("GCC diagnostic ignored \"-Wshadow\"") -#elif defined(_MSC_VER) -#define Y_PRAGMA_NO_WSHADOW \ - Y_PRAGMA(warning(disable : 4456 4457)) -#else -#define Y_PRAGMA_NO_WSHADOW -#endif - -/** - * @ def Y_PRAGMA_NO_UNUSED_FUNCTION - * - * Cross-compiler pragma to disable warnings about unused functions - * - * @see - * GCC: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html - * Clang: https://clang.llvm.org/docs/DiagnosticsReference.html#wunused-function - * MSVC: there is no such warning - * - * @code - * Y_PRAGMA_DIAGNOSTIC_PUSH - * Y_PRAGMA_NO_UNUSED_FUNCTION - * - * // some code which introduces a function which later will not be used, e.g.: - * - * void Foo() { - * } - * - * int main() { - * return 0; // Foo() never called - * } - * - * Y_PRAGMA_DIAGNOSTIC_POP - * @endcode - */ -#if defined(__clang__) || defined(__GNUC__) -#define Y_PRAGMA_NO_UNUSED_FUNCTION \ - Y_PRAGMA("GCC diagnostic ignored \"-Wunused-function\"") -#else -#define Y_PRAGMA_NO_UNUSED_FUNCTION -#endif - -/** - * @ def Y_PRAGMA_NO_UNUSED_PARAMETER - * - * Cross-compiler pragma to disable warnings about unused function parameters - * - * @see - * GCC: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html - * Clang: https://clang.llvm.org/docs/DiagnosticsReference.html#wunused-parameter - * MSVC: https://msdn.microsoft.com/en-us/library/26kb9fy0.aspx - * - * @code - * Y_PRAGMA_DIAGNOSTIC_PUSH - * Y_PRAGMA_NO_UNUSED_PARAMETER - * - * // some code which introduces a function with unused parameter, e.g.: - * - * void foo(int a) { - * // a is not referenced - * } - * - * int main() { - * foo(1); - * return 0; - * } - * - * Y_PRAGMA_DIAGNOSTIC_POP - * @endcode - */ -#if defined(__clang__) || defined(__GNUC__) -#define Y_PRAGMA_NO_UNUSED_PARAMETER \ - Y_PRAGMA("GCC diagnostic ignored \"-Wunused-parameter\"") -#elif defined(_MSC_VER) -#define Y_PRAGMA_NO_UNUSED_PARAMETER \ - Y_PRAGMA(warning(disable : 4100)) -#else -#define Y_PRAGMA_NO_UNUSED_PARAMETER -#endif - -/** - * @def Y_PRAGMA_NO_DEPRECATED - * - * Cross compiler pragma to disable warnings and errors about deprecated - * - * @see - * GCC: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html - * Clang: https://clang.llvm.org/docs/DiagnosticsReference.html#wdeprecated - * MSVC: https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-3-c4996?view=vs-2017 - * - * @code - * Y_PRAGMA_DIAGNOSTIC_PUSH - * Y_PRAGMA_NO_DEPRECATED - * - * [deprecated] void foo() { - * // ... - * } - * - * int main() { - * foo(); - * return 0; - * } - * - * Y_PRAGMA_DIAGNOSTIC_POP - * @endcode - */ -#if defined(__clang__) || defined(__GNUC__) -#define Y_PRAGMA_NO_DEPRECATED \ - Y_PRAGMA("GCC diagnostic ignored \"-Wdeprecated\"") -#elif defined(_MSC_VER) -#define Y_PRAGMA_NO_DEPRECATED \ - Y_PRAGMA(warning(disable : 4996)) -#else -#define Y_PRAGMA_NO_DEPRECATED -#endif - -#if defined(__clang__) || defined(__GNUC__) -/** - * @def Y_CONST_FUNCTION - methods and functions, marked with this method are promised to: - 1. do not have side effects - 2. this method do not read global memory - NOTE: this attribute can't be set for methods that depend on data, pointed by this - this allow compilers to do hard optimization of that functions - NOTE: in common case this attribute can't be set if method have pointer-arguments - NOTE: as result there no any reason to discard result of such method -*/ -#define Y_CONST_FUNCTION [[gnu::const]] -#endif - -#if !defined(Y_CONST_FUNCTION) -#define Y_CONST_FUNCTION -#endif - -#if defined(__clang__) || defined(__GNUC__) -/** - * @def Y_PURE_FUNCTION - methods and functions, marked with this method are promised to: - 1. do not have side effects - 2. result will be the same if no global memory changed - this allow compilers to do hard optimization of that functions - NOTE: as result there no any reason to discard result of such method -*/ -#define Y_PURE_FUNCTION [[gnu::pure]] -#endif - -#if !defined(Y_PURE_FUNCTION) -#define Y_PURE_FUNCTION -#endif - -/** - * @ def Y_HAVE_INT128 - * - * Defined when the compiler supports __int128 extension - * - * @code - * - * #if defined(Y_HAVE_INT128) - * __int128 myVeryBigInt = 12345678901234567890; - * #endif - * - * @endcode - */ -#if defined(__SIZEOF_INT128__) -#define Y_HAVE_INT128 1 -#endif - -/** - * XRAY macro must be passed to compiler if XRay is enabled. - * - * Define everything XRay-specific as a macro so that it doesn't cause errors - * for compilers that doesn't support XRay. - */ -#if defined(XRAY) && defined(__cplusplus) -#include -#define Y_XRAY_ALWAYS_INSTRUMENT [[clang::xray_always_instrument]] -#define Y_XRAY_NEVER_INSTRUMENT [[clang::xray_never_instrument]] -#define Y_XRAY_CUSTOM_EVENT(__string, __length) \ - do { \ - __xray_customevent(__string, __length); \ - } while (0) -#else -#define Y_XRAY_ALWAYS_INSTRUMENT -#define Y_XRAY_NEVER_INSTRUMENT -#define Y_XRAY_CUSTOM_EVENT(__string, __length) \ - do { \ - } while (0) -#endif diff --git a/contrib/lfalloc/src/util/system/defaults.h b/contrib/lfalloc/src/util/system/defaults.h deleted file mode 100644 index 19196a28b2b..00000000000 --- a/contrib/lfalloc/src/util/system/defaults.h +++ /dev/null @@ -1,168 +0,0 @@ -#pragma once - -#include "platform.h" - -#if defined _unix_ -#define LOCSLASH_C '/' -#define LOCSLASH_S "/" -#else -#define LOCSLASH_C '\\' -#define LOCSLASH_S "\\" -#endif // _unix_ - -#if defined(__INTEL_COMPILER) && defined(__cplusplus) -#include -#endif - -// low and high parts of integers -#if !defined(_win_) -#include -#endif - -#if defined(BSD) || defined(_android_) - -#if defined(BSD) -#include -#endif - -#if defined(_android_) -#include -#endif - -#if (BYTE_ORDER == LITTLE_ENDIAN) -#define _little_endian_ -#elif (BYTE_ORDER == BIG_ENDIAN) -#define _big_endian_ -#else -#error unknown endian not supported -#endif - -#elif (defined(_sun_) && !defined(__i386__)) || defined(_hpux_) || defined(WHATEVER_THAT_HAS_BIG_ENDIAN) -#define _big_endian_ -#else -#define _little_endian_ -#endif - -// alignment -#if (defined(_sun_) && !defined(__i386__)) || defined(_hpux_) || defined(__alpha__) || defined(__ia64__) || defined(WHATEVER_THAT_NEEDS_ALIGNING_QUADS) -#define _must_align8_ -#endif - -#if (defined(_sun_) && !defined(__i386__)) || defined(_hpux_) || defined(__alpha__) || defined(__ia64__) || defined(WHATEVER_THAT_NEEDS_ALIGNING_LONGS) -#define _must_align4_ -#endif - -#if (defined(_sun_) && !defined(__i386__)) || defined(_hpux_) || defined(__alpha__) || defined(__ia64__) || defined(WHATEVER_THAT_NEEDS_ALIGNING_SHORTS) -#define _must_align2_ -#endif - -#if defined(__GNUC__) -#define alias_hack __attribute__((__may_alias__)) -#endif - -#ifndef alias_hack -#define alias_hack -#endif - -#include "types.h" - -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) -#define PRAGMA(x) _Pragma(#x) -#define RCSID(idstr) PRAGMA(comment(exestr, idstr)) -#else -#define RCSID(idstr) static const char rcsid[] = idstr -#endif - -#include "compiler.h" - -#ifdef _win_ -#include -#elif defined(_sun_) -#include -#endif - -#ifdef NDEBUG -#define Y_IF_DEBUG(X) -#else -#define Y_IF_DEBUG(X) X -#endif - -/** - * @def Y_ARRAY_SIZE - * - * This macro is needed to get number of elements in a statically allocated fixed size array. The - * expression is a compile-time constant and therefore can be used in compile time computations. - * - * @code - * enum ENumbers { - * EN_ONE, - * EN_TWO, - * EN_SIZE - * } - * - * const char* NAMES[] = { - * "one", - * "two" - * } - * - * static_assert(Y_ARRAY_SIZE(NAMES) == EN_SIZE, "you should define `NAME` for each enumeration"); - * @endcode - * - * This macro also catches type errors. If you see a compiler error like "warning: division by zero - * is undefined" when using `Y_ARRAY_SIZE` then you are probably giving it a pointer. - * - * Since all of our code is expected to work on a 64 bit platform where pointers are 8 bytes we may - * falsefully accept pointers to types of sizes that are divisors of 8 (1, 2, 4 and 8). - */ -#if defined(__cplusplus) -namespace NArraySizePrivate { - template - struct TArraySize; - - template - struct TArraySize { - enum { - Result = N - }; - }; - - template - struct TArraySize { - enum { - Result = N - }; - }; -} - -#define Y_ARRAY_SIZE(arr) ((size_t)::NArraySizePrivate::TArraySize::Result) -#else -#undef Y_ARRAY_SIZE -#define Y_ARRAY_SIZE(arr) \ - ((sizeof(arr) / sizeof((arr)[0])) / static_cast(!(sizeof(arr) % sizeof((arr)[0])))) -#endif - -#undef Y_ARRAY_BEGIN -#define Y_ARRAY_BEGIN(arr) (arr) - -#undef Y_ARRAY_END -#define Y_ARRAY_END(arr) ((arr) + Y_ARRAY_SIZE(arr)) - -/** - * Concatenates two symbols, even if one of them is itself a macro. - */ -#define Y_CAT(X, Y) Y_CAT_I(X, Y) -#define Y_CAT_I(X, Y) Y_CAT_II(X, Y) -#define Y_CAT_II(X, Y) X##Y - -#define Y_STRINGIZE(X) UTIL_PRIVATE_STRINGIZE_AUX(X) -#define UTIL_PRIVATE_STRINGIZE_AUX(X) #X - -#if defined(__COUNTER__) -#define Y_GENERATE_UNIQUE_ID(N) Y_CAT(N, __COUNTER__) -#endif - -#if !defined(Y_GENERATE_UNIQUE_ID) -#define Y_GENERATE_UNIQUE_ID(N) Y_CAT(N, __LINE__) -#endif - -#define NPOS ((size_t)-1) diff --git a/contrib/lfalloc/src/util/system/platform.h b/contrib/lfalloc/src/util/system/platform.h deleted file mode 100644 index 0687f239a2e..00000000000 --- a/contrib/lfalloc/src/util/system/platform.h +++ /dev/null @@ -1,242 +0,0 @@ -#pragma once - -// What OS ? -// our definition has the form _{osname}_ - -#if defined(_WIN64) -#define _win64_ -#define _win32_ -#elif defined(__WIN32__) || defined(_WIN32) // _WIN32 is also defined by the 64-bit compiler for backward compatibility -#define _win32_ -#else -#define _unix_ -#if defined(__sun__) || defined(sun) || defined(sparc) || defined(__sparc) -#define _sun_ -#endif -#if defined(__hpux__) -#define _hpux_ -#endif -#if defined(__linux__) -#define _linux_ -#endif -#if defined(__FreeBSD__) -#define _freebsd_ -#endif -#if defined(__CYGWIN__) -#define _cygwin_ -#endif -#if defined(__APPLE__) -#define _darwin_ -#endif -#if defined(__ANDROID__) -#define _android_ -#endif -#endif - -#if defined(__IOS__) -#define _ios_ -#endif - -#if defined(_linux_) -#if defined(_musl_) -//nothing to do -#elif defined(_android_) -#define _bionic_ -#else -#define _glibc_ -#endif -#endif - -#if defined(_darwin_) -#define unix -#define __unix__ -#endif - -#if defined(_win32_) || defined(_win64_) -#define _win_ -#endif - -#if defined(__arm__) || defined(__ARM__) || defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM) -#if defined(__arm64) || defined(__arm64__) || defined(__aarch64__) -#define _arm64_ -#else -#define _arm32_ -#endif -#endif - -#if defined(_arm64_) || defined(_arm32_) -#define _arm_ -#endif - -/* __ia64__ and __x86_64__ - defined by GNU C. - * _M_IA64, _M_X64, _M_AMD64 - defined by Visual Studio. - * - * Microsoft can define _M_IX86, _M_AMD64 (before Visual Studio 8) - * or _M_X64 (starting in Visual Studio 8). - */ -#if defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) -#define _x86_64_ -#endif - -#if defined(__i386__) || defined(_M_IX86) -#define _i386_ -#endif - -#if defined(__ia64__) || defined(_M_IA64) -#define _ia64_ -#endif - -#if defined(__powerpc__) -#define _ppc_ -#endif - -#if defined(__powerpc64__) -#define _ppc64_ -#endif - -#if !defined(sparc) && !defined(__sparc) && !defined(__hpux__) && !defined(__alpha__) && !defined(_ia64_) && !defined(_x86_64_) && !defined(_arm_) && !defined(_i386_) && !defined(_ppc_) && !defined(_ppc64_) -#error "platform not defined, please, define one" -#endif - -#if defined(_x86_64_) || defined(_i386_) -#define _x86_ -#endif - -#if defined(__MIC__) -#define _mic_ -#define _k1om_ -#endif - -// stdio or MessageBox -#if defined(__CONSOLE__) || defined(_CONSOLE) -#define _console_ -#endif -#if (defined(_win_) && !defined(_console_)) -#define _windows_ -#elif !defined(_console_) -#define _console_ -#endif - -#if defined(__SSE__) || defined(SSE_ENABLED) -#define _sse_ -#endif - -#if defined(__SSE2__) || defined(SSE2_ENABLED) -#define _sse2_ -#endif - -#if defined(__SSE3__) || defined(SSE3_ENABLED) -#define _sse3_ -#endif - -#if defined(__SSSE3__) || defined(SSSE3_ENABLED) -#define _ssse3_ -#endif - -#if defined(POPCNT_ENABLED) -#define _popcnt_ -#endif - -#if defined(__DLL__) || defined(_DLL) -#define _dll_ -#endif - -// 16, 32 or 64 -#if defined(__sparc_v9__) || defined(_x86_64_) || defined(_ia64_) || defined(_arm64_) || defined(_ppc64_) -#define _64_ -#else -#define _32_ -#endif - -/* All modern 64-bit Unix systems use scheme LP64 (long, pointers are 64-bit). - * Microsoft uses a different scheme: LLP64 (long long, pointers are 64-bit). - * - * Scheme LP64 LLP64 - * char 8 8 - * short 16 16 - * int 32 32 - * long 64 32 - * long long 64 64 - * pointer 64 64 - */ - -#if defined(_32_) -#define SIZEOF_PTR 4 -#elif defined(_64_) -#define SIZEOF_PTR 8 -#endif - -#define PLATFORM_DATA_ALIGN SIZEOF_PTR - -#if !defined(SIZEOF_PTR) -#error todo -#endif - -#define SIZEOF_CHAR 1 -#define SIZEOF_UNSIGNED_CHAR 1 -#define SIZEOF_SHORT 2 -#define SIZEOF_UNSIGNED_SHORT 2 -#define SIZEOF_INT 4 -#define SIZEOF_UNSIGNED_INT 4 - -#if defined(_32_) -#define SIZEOF_LONG 4 -#define SIZEOF_UNSIGNED_LONG 4 -#elif defined(_64_) -#if defined(_win_) -#define SIZEOF_LONG 4 -#define SIZEOF_UNSIGNED_LONG 4 -#else -#define SIZEOF_LONG 8 -#define SIZEOF_UNSIGNED_LONG 8 -#endif // _win_ -#endif // _32_ - -#if !defined(SIZEOF_LONG) -#error todo -#endif - -#define SIZEOF_LONG_LONG 8 -#define SIZEOF_UNSIGNED_LONG_LONG 8 - -#undef SIZEOF_SIZE_T // in case we include which defines it, too -#define SIZEOF_SIZE_T SIZEOF_PTR - -#if defined(__INTEL_COMPILER) -#pragma warning(disable 1292) -#pragma warning(disable 1469) -#pragma warning(disable 193) -#pragma warning(disable 271) -#pragma warning(disable 383) -#pragma warning(disable 424) -#pragma warning(disable 444) -#pragma warning(disable 584) -#pragma warning(disable 593) -#pragma warning(disable 981) -#pragma warning(disable 1418) -#pragma warning(disable 304) -#pragma warning(disable 810) -#pragma warning(disable 1029) -#pragma warning(disable 1419) -#pragma warning(disable 177) -#pragma warning(disable 522) -#pragma warning(disable 858) -#pragma warning(disable 111) -#pragma warning(disable 1599) -#pragma warning(disable 411) -#pragma warning(disable 304) -#pragma warning(disable 858) -#pragma warning(disable 444) -#pragma warning(disable 913) -#pragma warning(disable 310) -#pragma warning(disable 167) -#pragma warning(disable 180) -#pragma warning(disable 1572) -#endif - -#if defined(_MSC_VER) -#undef _WINSOCKAPI_ -#define _WINSOCKAPI_ -#undef NOMINMAX -#define NOMINMAX -#endif diff --git a/contrib/lfalloc/src/util/system/types.h b/contrib/lfalloc/src/util/system/types.h deleted file mode 100644 index af4f0adb13d..00000000000 --- a/contrib/lfalloc/src/util/system/types.h +++ /dev/null @@ -1,117 +0,0 @@ -#pragma once - -// DO_NOT_STYLE - -#include "platform.h" - -#include - -typedef int8_t i8; -typedef int16_t i16; -typedef uint8_t ui8; -typedef uint16_t ui16; - -typedef int yssize_t; -#define PRIYSZT "d" - -#if defined(_darwin_) && defined(_32_) -typedef unsigned long ui32; -typedef long i32; -#else -typedef uint32_t ui32; -typedef int32_t i32; -#endif - -#if defined(_darwin_) && defined(_64_) -typedef unsigned long ui64; -typedef long i64; -#else -typedef uint64_t ui64; -typedef int64_t i64; -#endif - -#define LL(number) INT64_C(number) -#define ULL(number) UINT64_C(number) - -// Macro for size_t and ptrdiff_t types -#if defined(_32_) -# if defined(_darwin_) -# define PRISZT "lu" -# undef PRIi32 -# define PRIi32 "li" -# undef SCNi32 -# define SCNi32 "li" -# undef PRId32 -# define PRId32 "li" -# undef SCNd32 -# define SCNd32 "li" -# undef PRIu32 -# define PRIu32 "lu" -# undef SCNu32 -# define SCNu32 "lu" -# undef PRIx32 -# define PRIx32 "lx" -# undef SCNx32 -# define SCNx32 "lx" -# elif !defined(_cygwin_) -# define PRISZT PRIu32 -# else -# define PRISZT "u" -# endif -# define SCNSZT SCNu32 -# define PRIPDT PRIi32 -# define SCNPDT SCNi32 -# define PRITMT PRIi32 -# define SCNTMT SCNi32 -#elif defined(_64_) -# if defined(_darwin_) -# define PRISZT "lu" -# undef PRIu64 -# define PRIu64 PRISZT -# undef PRIx64 -# define PRIx64 "lx" -# undef PRIX64 -# define PRIX64 "lX" -# undef PRId64 -# define PRId64 "ld" -# undef PRIi64 -# define PRIi64 "li" -# undef SCNi64 -# define SCNi64 "li" -# undef SCNu64 -# define SCNu64 "lu" -# undef SCNx64 -# define SCNx64 "lx" -# else -# define PRISZT PRIu64 -# endif -# define SCNSZT SCNu64 -# define PRIPDT PRIi64 -# define SCNPDT SCNi64 -# define PRITMT PRIi64 -# define SCNTMT SCNi64 -#else -# error "Unsupported platform" -#endif - -// SUPERLONG -#if !defined(DONT_USE_SUPERLONG) && !defined(SUPERLONG_MAX) -#define SUPERLONG_MAX ~LL(0) -typedef i64 SUPERLONG; -#endif - -// UNICODE -// UCS-2, native byteorder -typedef ui16 wchar16; -// internal symbol type: UTF-16LE -typedef wchar16 TChar; -typedef ui32 wchar32; - -#if defined(_MSC_VER) -#include -typedef SSIZE_T ssize_t; -#define HAVE_SSIZE_T 1 -#include -#endif - -#include diff --git a/contrib/mimalloc b/contrib/mimalloc new file mode 160000 index 00000000000..cb76e966941 --- /dev/null +++ b/contrib/mimalloc @@ -0,0 +1 @@ +Subproject commit cb76e966941328bf7242ce9939ebe986555de2bf diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index e2cc16fe122..4b47b77dec2 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -223,8 +223,9 @@ if(RE2_INCLUDE_DIR) target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${RE2_INCLUDE_DIR}) endif() -if (USE_LFALLOC) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${LFALLOC_INCLUDE_DIR}) +if (USE_MIMALLOC) + target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${MIMALLOC_INCLUDE_DIR}) + target_link_libraries (clickhouse_common_io PRIVATE ${MIMALLOC_LIBRARY}) endif () if(CPUID_LIBRARY) diff --git a/dbms/src/Common/LFAllocator.cpp b/dbms/src/Common/LFAllocator.cpp deleted file mode 100644 index 71396d341ab..00000000000 --- a/dbms/src/Common/LFAllocator.cpp +++ /dev/null @@ -1,53 +0,0 @@ -#include - -#if USE_LFALLOC -#include "LFAllocator.h" - -#include -#include - -namespace DB -{ - -void * LFAllocator::alloc(size_t size, size_t alignment) -{ - if (alignment == 0) - return LFAlloc(size); - else - { - void * ptr; - int res = LFPosixMemalign(&ptr, alignment, size); - return res ? nullptr : ptr; - } -} - -void LFAllocator::free(void * buf, size_t) -{ - LFFree(buf); -} - -void * LFAllocator::realloc(void * old_ptr, size_t, size_t new_size, size_t alignment) -{ - if (old_ptr == nullptr) - { - void * result = LFAllocator::alloc(new_size, alignment); - return result; - } - if (new_size == 0) - { - LFFree(old_ptr); - return nullptr; - } - - void * new_ptr = LFAllocator::alloc(new_size, alignment); - if (new_ptr == nullptr) - return nullptr; - size_t old_size = LFGetSize(old_ptr); - memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size)); - LFFree(old_ptr); - return new_ptr; -} - -} - -#endif diff --git a/dbms/src/Common/LFAllocator.h b/dbms/src/Common/LFAllocator.h deleted file mode 100644 index f2a10cc4508..00000000000 --- a/dbms/src/Common/LFAllocator.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include - -#if !USE_LFALLOC -#error "do not include this file until USE_LFALLOC is set to 1" -#endif - -#include - -namespace DB -{ -struct LFAllocator -{ - static void * alloc(size_t size, size_t alignment = 0); - - static void free(void * buf, size_t); - - static void * realloc(void * buf, size_t, size_t new_size, size_t alignment = 0); -}; - -} diff --git a/dbms/src/Common/MiAllocator.h b/dbms/src/Common/MiAllocator.h new file mode 100644 index 00000000000..384e3bddcf4 --- /dev/null +++ b/dbms/src/Common/MiAllocator.h @@ -0,0 +1,49 @@ +#pragma once + +#include + +#if !USE_MIMALLOC +#error "do not include this file until USE_MIMALLOC is set to 1" +#endif + +#include +#include + +namespace DB +{ +struct MiAllocator +{ + + static void * alloc(size_t size, size_t alignment = 0) + { + if (alignment == 0) + return mi_malloc(size); + else + return mi_malloc_aligned(size, alignment); + } + + static void free(void * buf, size_t) + { + mi_free(buf); + } + + static void * realloc(void * old_ptr, size_t, size_t new_size, size_t alignment = 0) + { + if (old_ptr == nullptr) + return alloc(new_size, alignment); + + if (new_size == 0) + { + mi_free(old_ptr); + return nullptr; + } + + if (alignment == 0) + return mi_realloc(old_ptr, alignment); + + return mi_realloc_aligned(old_ptr, new_size, alignment); + } + +}; + +} diff --git a/dbms/src/Common/config.h.in b/dbms/src/Common/config.h.in index 08d8e7e9af1..9b38dd9fc04 100644 --- a/dbms/src/Common/config.h.in +++ b/dbms/src/Common/config.h.in @@ -8,7 +8,6 @@ #cmakedefine01 USE_CPUID #cmakedefine01 USE_CPUINFO #cmakedefine01 USE_BROTLI -#cmakedefine01 USE_LFALLOC -#cmakedefine01 USE_LFALLOC_RANDOM_HINT +#cmakedefine01 USE_MIMALLOC #cmakedefine01 CLICKHOUSE_SPLIT_BINARY diff --git a/dbms/src/DataStreams/MarkInCompressedFile.h b/dbms/src/DataStreams/MarkInCompressedFile.h index ff07b2afbe1..a5970a89738 100644 --- a/dbms/src/DataStreams/MarkInCompressedFile.h +++ b/dbms/src/DataStreams/MarkInCompressedFile.h @@ -7,8 +7,8 @@ #include #include -#if USE_LFALLOC -#include +#if USE_MIMALLOC +#include #endif namespace DB @@ -43,8 +43,8 @@ struct MarkInCompressedFile } }; -#if USE_LFALLOC -using MarksInCompressedFile = PODArray; +#if USE_MIMALLOC +using MarksInCompressedFile = PODArray; #else using MarksInCompressedFile = PODArray; #endif diff --git a/dbms/src/IO/UncompressedCache.h b/dbms/src/IO/UncompressedCache.h index 2347c6d7a28..1f17c5e61b6 100644 --- a/dbms/src/IO/UncompressedCache.h +++ b/dbms/src/IO/UncompressedCache.h @@ -7,8 +7,8 @@ #include #include -#if USE_LFALLOC -#include +#if USE_MIMALLOC +#include #endif @@ -25,8 +25,8 @@ namespace DB struct UncompressedCacheCell { -#if USE_LFALLOC - Memory data; +#if USE_MIMALLOC + Memory data; #else Memory<> data; #endif diff --git a/dbms/src/Interpreters/Compiler.cpp b/dbms/src/Interpreters/Compiler.cpp index abdb0969121..ee6845767e6 100644 --- a/dbms/src/Interpreters/Compiler.cpp +++ b/dbms/src/Interpreters/Compiler.cpp @@ -262,8 +262,8 @@ void Compiler::compile( " -I " << compiler_headers << "/dbms/src/" " -isystem " << compiler_headers << "/contrib/cityhash102/include/" " -isystem " << compiler_headers << "/contrib/libpcg-random/include/" - #if USE_LFALLOC - " -isystem " << compiler_headers << "/contrib/lfalloc/src/" + #if USE_MIMALLOC + " -isystem " << compiler_headers << "/contrib/mimalloc/include/" #endif " -isystem " << compiler_headers << INTERNAL_DOUBLE_CONVERSION_INCLUDE_DIR " -isystem " << compiler_headers << INTERNAL_Poco_Foundation_INCLUDE_DIR diff --git a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index 758408114a8..99591648bae 100644 --- a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -37,8 +37,7 @@ const char * auto_config_build[] "USE_GLIBC_COMPATIBILITY", "@GLIBC_COMPATIBILITY@", "USE_JEMALLOC", "@USE_JEMALLOC@", "USE_TCMALLOC", "@USE_TCMALLOC@", - "USE_LFALLOC", "@USE_LFALLOC@", - "USE_LFALLOC_RANDOM_HINT", "@USE_LFALLOC_RANDOM_HINT@", + "USE_MIMALLOC", "@USE_MIMALLOC@", "USE_UNWIND", "@USE_UNWIND@", "USE_ICU", "@USE_ICU@", "USE_MYSQL", "@USE_MYSQL@", From 35630f2abd23eb7c44c6154de25c99e39daa28d0 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Thu, 27 Jun 2019 18:25:44 +0300 Subject: [PATCH 2/5] MI_SECURE added --- contrib/mimalloc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/mimalloc b/contrib/mimalloc index cb76e966941..b4ece3482f9 160000 --- a/contrib/mimalloc +++ b/contrib/mimalloc @@ -1 +1 @@ -Subproject commit cb76e966941328bf7242ce9939ebe986555de2bf +Subproject commit b4ece3482f944b5d07d889cbaebaf9aa6c56cc03 From ebaced106521436fcbf22dc6fdf030e00daf8ca0 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 27 Jun 2019 21:16:15 +0300 Subject: [PATCH 3/5] Update find_mimalloc.cmake --- cmake/find_mimalloc.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/find_mimalloc.cmake b/cmake/find_mimalloc.cmake index 7d4c00ac09a..9ee785e0753 100644 --- a/cmake/find_mimalloc.cmake +++ b/cmake/find_mimalloc.cmake @@ -1,5 +1,5 @@ if (OS_LINUX AND NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32 AND NOT ARCH_PPC64LE) - option (ENABLE_MIMALLOC "Set to FALSE to use system libgsasl library instead of bundled" ${NOT_UNBUNDLED}) + option (ENABLE_MIMALLOC "Set to FALSE to disable usage of mimalloc for internal ClickHouse caches" ${NOT_UNBUNDLED}) endif () if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/mimalloc/include/mimalloc.h") From 407cf9a22986f7c81bdb41d9a5e4e06f436be380 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Fri, 28 Jun 2019 00:43:50 +0300 Subject: [PATCH 4/5] Comments --- cmake/find_mimalloc.cmake | 2 +- dbms/src/Common/MiAllocator.h | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/cmake/find_mimalloc.cmake b/cmake/find_mimalloc.cmake index 9ee785e0753..7d4c00ac09a 100644 --- a/cmake/find_mimalloc.cmake +++ b/cmake/find_mimalloc.cmake @@ -1,5 +1,5 @@ if (OS_LINUX AND NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32 AND NOT ARCH_PPC64LE) - option (ENABLE_MIMALLOC "Set to FALSE to disable usage of mimalloc for internal ClickHouse caches" ${NOT_UNBUNDLED}) + option (ENABLE_MIMALLOC "Set to FALSE to use system libgsasl library instead of bundled" ${NOT_UNBUNDLED}) endif () if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/mimalloc/include/mimalloc.h") diff --git a/dbms/src/Common/MiAllocator.h b/dbms/src/Common/MiAllocator.h index 384e3bddcf4..075328e5d94 100644 --- a/dbms/src/Common/MiAllocator.h +++ b/dbms/src/Common/MiAllocator.h @@ -11,6 +11,12 @@ namespace DB { + +/* + * This is a different allocator that is based on mimalloc (Microsoft malloc). + * It can be used separately from main allocator to catch heap corruptions and vulnerabilities (for example, for caches). + * We use MI_SECURE mode in mimalloc to achieve such behaviour. + */ struct MiAllocator { From 5cc91161e913a7f338d7659ed1ad51cb28156780 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Fri, 28 Jun 2019 00:44:21 +0300 Subject: [PATCH 5/5] Comments --- cmake/find_mimalloc.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/find_mimalloc.cmake b/cmake/find_mimalloc.cmake index 7d4c00ac09a..9ee785e0753 100644 --- a/cmake/find_mimalloc.cmake +++ b/cmake/find_mimalloc.cmake @@ -1,5 +1,5 @@ if (OS_LINUX AND NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32 AND NOT ARCH_PPC64LE) - option (ENABLE_MIMALLOC "Set to FALSE to use system libgsasl library instead of bundled" ${NOT_UNBUNDLED}) + option (ENABLE_MIMALLOC "Set to FALSE to disable usage of mimalloc for internal ClickHouse caches" ${NOT_UNBUNDLED}) endif () if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/mimalloc/include/mimalloc.h")