2015-08-16 13:00:22 +00:00
# pragma once
# include <string.h>
2015-11-15 03:09:40 +00:00
2019-03-10 03:16:51 +00:00
# ifdef NDEBUG
# define ALLOCATOR_ASLR 0
# else
# define ALLOCATOR_ASLR 1
# endif
2019-04-06 15:27:39 +00:00
# include <pcg_random.hpp>
2019-07-28 15:30:38 +00:00
# include <Common/thread_local_rng.h>
2019-04-06 15:27:39 +00:00
# if !defined(__APPLE__) && !defined(__FreeBSD__)
# include <malloc.h>
# endif
# include <cstdlib>
# include <algorithm>
# include <sys/mman.h>
# include <Core/Defines.h>
2020-05-01 07:37:03 +00:00
# if defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER)
/// Thread and memory sanitizers do not intercept mremap. The usage of
/// mremap will lead to false positives.
2019-04-06 15:27:39 +00:00
# define DISABLE_MREMAP 1
# endif
# include <common/mremap.h>
# include <Common/MemoryTracker.h>
# include <Common/Exception.h>
# include <Common/formatReadable.h>
2020-12-16 15:59:53 +00:00
# include <Common/getPageSize.h>
2019-04-06 15:27:39 +00:00
2019-10-07 18:56:03 +00:00
# include <Common/Allocator_fwd.h>
2019-04-06 15:27:39 +00:00
/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
# ifndef MAP_ANONYMOUS
# define MAP_ANONYMOUS MAP_ANON
# endif
2019-08-30 16:15:43 +00:00
/**
* Many modern allocators ( for example , tcmalloc ) do not do a mremap for
* realloc , even in case of large enough chunks of memory . Although this allows
* you to increase performance and reduce memory consumption during realloc .
2019-04-06 15:27:39 +00:00
* To fix this , we do mremap manually if the chunk of memory is large enough .
2019-08-30 16:15:43 +00:00
* The threshold ( 64 MB ) is chosen quite large , since changing the address
* space is very slow , especially in the case of a large number of threads . We
* expect that the set of operations mmap / something to do / mremap can only be
* performed about 1000 times per second .
2019-04-06 15:27:39 +00:00
*
2019-08-30 16:15:43 +00:00
* P . S . This is also required , because tcmalloc can not allocate a chunk of
* memory greater than 16 GB .
2020-01-23 22:12:01 +00:00
*
* P . P . S . Note that MMAP_THRESHOLD symbol is intentionally made weak . It allows
* to override it during linkage when using ClickHouse as a library in
* third - party applications which may already use own allocator doing mmaps
* in the implementation of alloc / realloc .
2019-04-06 15:27:39 +00:00
*/
2020-06-20 18:52:13 +00:00
extern const size_t MMAP_THRESHOLD ;
2019-03-10 03:16:51 +00:00
2019-04-06 15:27:39 +00:00
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8 ;
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS ;
extern const int CANNOT_ALLOCATE_MEMORY ;
extern const int CANNOT_MUNMAP ;
extern const int CANNOT_MREMAP ;
2020-08-07 19:36:04 +00:00
extern const int LOGICAL_ERROR ;
2019-04-06 15:27:39 +00:00
}
}
2017-05-07 20:25:26 +00:00
/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
* Also used in hash tables .
* The interface is different from std : : allocator
* - the presence of the method realloc , which for large chunks of memory uses mremap ;
* - passing the size into the ` free ` method ;
* - by the presence of the ` alignment ` argument ;
* - the possibility of zeroing memory ( used in hash tables ) ;
2019-08-30 16:15:43 +00:00
* - random hint address for mmap
2019-04-06 17:06:39 +00:00
* - mmap_threshold for using mmap less or more
2015-08-16 13:00:22 +00:00
*/
2019-10-07 18:56:03 +00:00
template < bool clear_memory_ , bool mmap_populate >
2019-08-30 16:15:43 +00:00
class Allocator
2015-08-16 13:00:22 +00:00
{
public :
2017-04-08 01:32:05 +00:00
/// Allocate memory range.
2019-04-06 15:27:39 +00:00
void * alloc ( size_t size , size_t alignment = 0 )
{
2020-08-07 19:36:04 +00:00
checkSize ( size ) ;
2019-04-06 15:27:39 +00:00
CurrentMemoryTracker : : alloc ( size ) ;
2019-07-18 15:07:41 +00:00
return allocNoTrack ( size , alignment ) ;
2019-04-06 15:27:39 +00:00
}
2017-04-01 07:20:54 +00:00
2017-04-08 01:32:05 +00:00
/// Free memory range.
2019-04-06 15:27:39 +00:00
void free ( void * buf , size_t size )
{
2020-08-07 19:36:04 +00:00
checkSize ( size ) ;
2019-07-18 15:07:41 +00:00
freeNoTrack ( buf , size ) ;
2019-04-06 15:27:39 +00:00
CurrentMemoryTracker : : free ( size ) ;
}
2017-04-01 07:20:54 +00:00
2017-04-08 01:32:05 +00:00
/** Enlarge memory range.
* Data from old range is moved to the beginning of new range .
* Address of memory range could change .
2017-04-01 07:20:54 +00:00
*/
2019-04-06 15:27:39 +00:00
void * realloc ( void * buf , size_t old_size , size_t new_size , size_t alignment = 0 )
{
2020-08-07 19:36:04 +00:00
checkSize ( new_size ) ;
2019-04-06 15:27:39 +00:00
if ( old_size = = new_size )
{
/// nothing to do.
/// BTW, it's not possible to change alignment while doing realloc.
}
2019-08-30 16:15:43 +00:00
else if ( old_size < MMAP_THRESHOLD & & new_size < MMAP_THRESHOLD
& & alignment < = MALLOC_MIN_ALIGNMENT )
2019-04-06 15:27:39 +00:00
{
/// Resize malloc'd memory region with no special alignment requirement.
CurrentMemoryTracker : : realloc ( old_size , new_size ) ;
void * new_buf = : : realloc ( buf , new_size ) ;
if ( nullptr = = new_buf )
2020-05-30 21:35:52 +00:00
DB : : throwFromErrno ( fmt : : format ( " Allocator: Cannot realloc from {} to {}. " , ReadableSize ( old_size ) , ReadableSize ( new_size ) ) , DB : : ErrorCodes : : CANNOT_ALLOCATE_MEMORY ) ;
2019-04-06 15:27:39 +00:00
buf = new_buf ;
2019-07-18 15:07:41 +00:00
if constexpr ( clear_memory )
if ( new_size > old_size )
memset ( reinterpret_cast < char * > ( buf ) + old_size , 0 , new_size - old_size ) ;
2019-04-06 15:27:39 +00:00
}
2019-08-30 16:15:43 +00:00
else if ( old_size > = MMAP_THRESHOLD & & new_size > = MMAP_THRESHOLD )
2019-04-06 15:27:39 +00:00
{
/// Resize mmap'd memory region.
CurrentMemoryTracker : : realloc ( old_size , new_size ) ;
// On apple and freebsd self-implemented mremap used (common/mremap.h)
2019-08-30 16:25:46 +00:00
buf = clickhouse_mremap ( buf , old_size , new_size , MREMAP_MAYMOVE ,
PROT_READ | PROT_WRITE , mmap_flags , - 1 , 0 ) ;
2019-04-06 15:27:39 +00:00
if ( MAP_FAILED = = buf )
2020-05-30 21:35:52 +00:00
DB : : throwFromErrno ( fmt : : format ( " Allocator: Cannot mremap memory chunk from {} to {}. " ,
ReadableSize ( old_size ) , ReadableSize ( new_size ) ) , DB : : ErrorCodes : : CANNOT_MREMAP ) ;
2019-04-06 15:27:39 +00:00
/// No need for zero-fill, because mmap guarantees it.
}
2019-08-30 16:15:43 +00:00
else if ( new_size < MMAP_THRESHOLD )
2019-04-06 15:27:39 +00:00
{
2019-07-18 15:25:23 +00:00
/// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once.
2019-07-18 15:07:41 +00:00
CurrentMemoryTracker : : realloc ( old_size , new_size ) ;
2019-04-06 15:27:39 +00:00
2019-07-18 15:07:41 +00:00
void * new_buf = allocNoTrack ( new_size , alignment ) ;
2019-04-06 15:27:39 +00:00
memcpy ( new_buf , buf , std : : min ( old_size , new_size ) ) ;
2019-07-18 15:07:41 +00:00
freeNoTrack ( buf , old_size ) ;
2019-04-06 15:27:39 +00:00
buf = new_buf ;
}
2019-07-18 15:25:23 +00:00
else
{
/// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
void * new_buf = alloc ( new_size , alignment ) ;
memcpy ( new_buf , buf , std : : min ( old_size , new_size ) ) ;
free ( buf , old_size ) ;
buf = new_buf ;
}
2019-04-06 15:27:39 +00:00
return buf ;
}
2016-07-07 16:56:49 +00:00
protected :
2017-04-01 07:20:54 +00:00
static constexpr size_t getStackThreshold ( )
{
return 0 ;
}
2019-07-18 15:07:41 +00:00
2019-08-30 16:25:46 +00:00
static constexpr bool clear_memory = clear_memory_ ;
// Freshly mmapped pages are copy-on-write references to a global zero page.
// On the first write, a page fault occurs, and an actual writable page is
// allocated. If we are going to use this memory soon, such as when resizing
// hash tables, it makes sense to pre-fault the pages by passing
// MAP_POPULATE to mmap(). This takes some time, but should be faster
// overall than having a hot loop interrupted by page faults.
2019-09-18 12:27:03 +00:00
// It is only supported on Linux.
2019-08-30 16:25:46 +00:00
static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS
2019-09-11 10:01:38 +00:00
# if defined(OS_LINUX)
| ( mmap_populate ? MAP_POPULATE : 0 )
# endif
;
2019-08-30 16:25:46 +00:00
2019-07-18 15:07:41 +00:00
private :
void * allocNoTrack ( size_t size , size_t alignment )
{
void * buf ;
2020-10-30 21:24:16 +00:00
size_t mmap_min_alignment = : : getPageSize ( ) ;
2019-07-18 15:07:41 +00:00
2019-08-30 16:15:43 +00:00
if ( size > = MMAP_THRESHOLD )
2019-07-18 15:07:41 +00:00
{
2020-10-30 21:24:16 +00:00
if ( alignment > mmap_min_alignment )
2020-05-30 21:35:52 +00:00
throw DB : : Exception ( fmt : : format ( " Too large alignment {}: more than page size when allocating {}. " ,
ReadableSize ( alignment ) , ReadableSize ( size ) ) , DB : : ErrorCodes : : BAD_ARGUMENTS ) ;
2019-07-18 15:07:41 +00:00
2019-08-30 16:25:46 +00:00
buf = mmap ( getMmapHint ( ) , size , PROT_READ | PROT_WRITE ,
mmap_flags , - 1 , 0 ) ;
2019-07-18 15:07:41 +00:00
if ( MAP_FAILED = = buf )
2020-05-30 21:35:52 +00:00
DB : : throwFromErrno ( fmt : : format ( " Allocator: Cannot mmap {}. " , ReadableSize ( size ) ) , DB : : ErrorCodes : : CANNOT_ALLOCATE_MEMORY ) ;
2019-07-18 15:07:41 +00:00
/// No need for zero-fill, because mmap guarantees it.
}
else
{
if ( alignment < = MALLOC_MIN_ALIGNMENT )
{
if constexpr ( clear_memory )
buf = : : calloc ( size , 1 ) ;
else
buf = : : malloc ( size ) ;
if ( nullptr = = buf )
2020-05-30 21:35:52 +00:00
DB : : throwFromErrno ( fmt : : format ( " Allocator: Cannot malloc {}. " , ReadableSize ( size ) ) , DB : : ErrorCodes : : CANNOT_ALLOCATE_MEMORY ) ;
2019-07-18 15:07:41 +00:00
}
else
{
buf = nullptr ;
int res = posix_memalign ( & buf , alignment , size ) ;
if ( 0 ! = res )
2020-05-30 21:35:52 +00:00
DB : : throwFromErrno ( fmt : : format ( " Cannot allocate memory (posix_memalign) {}. " , ReadableSize ( size ) ) ,
DB : : ErrorCodes : : CANNOT_ALLOCATE_MEMORY , res ) ;
2019-07-18 15:07:41 +00:00
if constexpr ( clear_memory )
memset ( buf , 0 , size ) ;
}
}
return buf ;
}
void freeNoTrack ( void * buf , size_t size )
{
2019-08-30 16:15:43 +00:00
if ( size > = MMAP_THRESHOLD )
2019-07-18 15:07:41 +00:00
{
if ( 0 ! = munmap ( buf , size ) )
2020-05-30 21:35:52 +00:00
DB : : throwFromErrno ( fmt : : format ( " Allocator: Cannot munmap {}. " , ReadableSize ( size ) ) , DB : : ErrorCodes : : CANNOT_MUNMAP ) ;
2019-07-18 15:07:41 +00:00
}
else
{
: : free ( buf ) ;
}
}
2015-11-15 03:09:40 +00:00
2020-08-07 19:36:04 +00:00
void checkSize ( size_t size )
{
/// More obvious exception in case of possible overflow (instead of just "Cannot mmap").
if ( size > = 0x8000000000000000ULL )
throw DB : : Exception ( DB : : ErrorCodes : : LOGICAL_ERROR , " Too large size ({}) passed to allocator. It indicates an error. " , size ) ;
}
2019-08-30 16:15:43 +00:00
# ifndef NDEBUG
/// In debug builds, request mmap() at random addresses (a kind of ASLR), to
/// reproduce more memory stomping bugs. Note that Linux doesn't do it by
/// default. This may lead to worse TLB performance.
void * getMmapHint ( )
{
return reinterpret_cast < void * > ( std : : uniform_int_distribution < intptr_t > ( 0x100000000000UL , 0x700000000000UL ) ( thread_local_rng ) ) ;
}
2019-04-06 15:27:39 +00:00
# else
2019-08-30 16:15:43 +00:00
void * getMmapHint ( )
{
return nullptr ;
}
2019-04-06 15:27:39 +00:00
# endif
2019-08-30 16:15:43 +00:00
} ;
2015-11-15 03:09:40 +00:00
2017-05-07 20:25:26 +00:00
/** When using AllocatorWithStackMemory, located on the stack,
* GCC 4.9 mistakenly assumes that we can call ` free ` from a pointer to the stack .
* In fact , the combination of conditions inside AllocatorWithStackMemory does not allow this .
2017-04-08 01:32:05 +00:00
*/
# if !__clang__
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wfree-nonheap-object"
# endif
/** Allocator with optimization to place small memory ranges in automatic memory.
2015-11-15 03:09:40 +00:00
*/
2020-05-15 16:23:31 +00:00
template < typename Base , size_t _initial_bytes , size_t Alignment >
2015-11-15 03:09:40 +00:00
class AllocatorWithStackMemory : private Base
{
private :
2020-05-15 16:23:31 +00:00
alignas ( Alignment ) char stack_memory [ _initial_bytes ] ;
2015-11-15 03:09:40 +00:00
public :
2020-05-15 16:23:31 +00:00
static constexpr size_t initial_bytes = _initial_bytes ;
2019-06-28 18:05:08 +00:00
/// Do not use boost::noncopyable to avoid the warning about direct base
/// being inaccessible due to ambiguity, when derived classes are also
/// noncopiable (-Winaccessible-base).
AllocatorWithStackMemory ( const AllocatorWithStackMemory & ) = delete ;
AllocatorWithStackMemory & operator = ( const AllocatorWithStackMemory & ) = delete ;
AllocatorWithStackMemory ( ) = default ;
~ AllocatorWithStackMemory ( ) = default ;
2017-04-01 07:20:54 +00:00
void * alloc ( size_t size )
{
2020-05-15 16:23:31 +00:00
if ( size < = initial_bytes )
2017-04-01 07:20:54 +00:00
{
2019-07-18 15:07:41 +00:00
if constexpr ( Base : : clear_memory )
2020-05-15 16:23:31 +00:00
memset ( stack_memory , 0 , initial_bytes ) ;
2017-04-01 07:20:54 +00:00
return stack_memory ;
}
2019-08-07 18:43:06 +00:00
return Base : : alloc ( size , Alignment ) ;
2017-04-01 07:20:54 +00:00
}
void free ( void * buf , size_t size )
{
2020-05-15 16:23:31 +00:00
if ( size > initial_bytes )
2017-04-01 07:20:54 +00:00
Base : : free ( buf , size ) ;
}
void * realloc ( void * buf , size_t old_size , size_t new_size )
{
2017-04-08 01:32:05 +00:00
/// Was in stack_memory, will remain there.
2020-05-15 16:23:31 +00:00
if ( new_size < = initial_bytes )
2017-04-01 07:20:54 +00:00
return buf ;
2017-04-08 01:32:05 +00:00
/// Already was big enough to not fit in stack_memory.
2020-05-15 16:23:31 +00:00
if ( old_size > initial_bytes )
2019-08-07 18:43:06 +00:00
return Base : : realloc ( buf , old_size , new_size , Alignment ) ;
2017-04-01 07:20:54 +00:00
2017-04-08 01:32:05 +00:00
/// Was in stack memory, but now will not fit there.
2019-08-07 18:43:06 +00:00
void * new_buf = Base : : alloc ( new_size , Alignment ) ;
2017-04-01 07:20:54 +00:00
memcpy ( new_buf , buf , old_size ) ;
return new_buf ;
}
2016-07-07 16:56:49 +00:00
protected :
2017-04-01 07:20:54 +00:00
static constexpr size_t getStackThreshold ( )
{
2020-05-15 16:23:31 +00:00
return initial_bytes ;
2017-04-01 07:20:54 +00:00
}
2015-11-15 03:09:40 +00:00
} ;
2020-05-15 16:23:31 +00:00
// A constant that gives the number of initially available bytes in
// the allocator. Used to check that this number is in sync with the
// initial size of array or hash table that uses the allocator.
template < typename TAllocator >
constexpr size_t allocatorInitialBytes = 0 ;
template < typename Base , size_t initial_bytes , size_t Alignment >
constexpr size_t allocatorInitialBytes < AllocatorWithStackMemory <
Base , initial_bytes , Alignment > > = initial_bytes ;
2015-11-15 03:09:40 +00:00
# if !__clang__
# pragma GCC diagnostic pop
# endif