mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
dbms: ArenaWithFreeLists basic implementation. [#METR-17382]
This commit is contained in:
parent
274ea51f9d
commit
2e8cc93664
188
dbms/include/DB/Common/ArenaWithFreeLists.h
Normal file
188
dbms/include/DB/Common/ArenaWithFreeLists.h
Normal file
@ -0,0 +1,188 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include <Poco/SharedPtr.h>
|
||||||
|
#include <common/likely.h>
|
||||||
|
#include <DB/Core/Defines.h>
|
||||||
|
#include <DB/Common/ProfileEvents.h>
|
||||||
|
#include <DB/Common/Allocator.h>
|
||||||
|
#include <ext/range.hpp>
|
||||||
|
#include <ext/size.hpp>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
class ArenaWithFreeLists
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
/// Непрерывный кусок памяти и указатель на свободное место в нём. Односвязный список.
|
||||||
|
struct Chunk : private Allocator<false> /// empty base optimization
|
||||||
|
{
|
||||||
|
char * begin;
|
||||||
|
char * end;
|
||||||
|
|
||||||
|
Chunk * prev;
|
||||||
|
|
||||||
|
Chunk(const std::size_t size_, Chunk * prev_)
|
||||||
|
{
|
||||||
|
ProfileEvents::increment(ProfileEvents::ArenaAllocChunks);
|
||||||
|
ProfileEvents::increment(ProfileEvents::ArenaAllocBytes, size_);
|
||||||
|
|
||||||
|
begin = static_cast<char *>(Allocator::alloc(size_));
|
||||||
|
end = begin + size_;
|
||||||
|
prev = prev_;
|
||||||
|
}
|
||||||
|
|
||||||
|
~Chunk()
|
||||||
|
{
|
||||||
|
Allocator::free(begin, size());
|
||||||
|
|
||||||
|
delete prev;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t size() { return end - begin; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Block
|
||||||
|
{
|
||||||
|
std::size_t size;
|
||||||
|
Block * next;
|
||||||
|
};
|
||||||
|
|
||||||
|
static size_t roundUpToPageSize(size_t s)
|
||||||
|
{
|
||||||
|
return (s + 4096 - 1) / 4096 * 4096;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t growth_factor;
|
||||||
|
size_t linear_growth_threshold;
|
||||||
|
|
||||||
|
/// Последний непрерывный кусок памяти.
|
||||||
|
Chunk * head;
|
||||||
|
size_t size_in_bytes;
|
||||||
|
|
||||||
|
static constexpr std::size_t sizes[] {
|
||||||
|
16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536,
|
||||||
|
std::numeric_limits<std::size_t>::max()
|
||||||
|
};
|
||||||
|
static_assert(sizes[0] >= sizeof(Block), "Can't make allocations smaller than sizeof(Block)");
|
||||||
|
static constexpr auto min_bucket_num = 3;
|
||||||
|
static constexpr auto max_fixed_block_size = 65536;
|
||||||
|
|
||||||
|
Block * free_lists[ext::size(sizes)] {};
|
||||||
|
|
||||||
|
/// Если размер чанка меньше linear_growth_threshold, то рост экспоненциальный, иначе - линейный, для уменьшения потребления памяти.
|
||||||
|
size_t nextSize(size_t min_next_size) const
|
||||||
|
{
|
||||||
|
size_t size_after_grow = 0;
|
||||||
|
|
||||||
|
if (head->size() < linear_growth_threshold)
|
||||||
|
size_after_grow = head->size() * growth_factor;
|
||||||
|
else
|
||||||
|
size_after_grow = linear_growth_threshold;
|
||||||
|
|
||||||
|
if (size_after_grow < min_next_size)
|
||||||
|
size_after_grow = min_next_size;
|
||||||
|
|
||||||
|
return roundUpToPageSize(size_after_grow);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Добавить следующий непрерывный кусок памяти размера не меньше заданного.
|
||||||
|
void NO_INLINE addChunk(size_t min_size)
|
||||||
|
{
|
||||||
|
head = new Chunk(nextSize(min_size), head);
|
||||||
|
size_in_bytes += head->size();
|
||||||
|
|
||||||
|
putBlock(head->begin, head->size());
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::size_t findFreeListIndex(const std::size_t size)
|
||||||
|
{
|
||||||
|
/// last free list is for any blocks > 64k
|
||||||
|
if (size > max_fixed_block_size)
|
||||||
|
return ext::size(sizes) - 1;
|
||||||
|
|
||||||
|
/// shift powers of two into previous bucket by subtracting 1
|
||||||
|
const auto bucket_num = _bit_scan_reverse(size - 1);
|
||||||
|
|
||||||
|
return std::max(bucket_num, min_bucket_num) - min_bucket_num;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @todo coalesce blocks
|
||||||
|
void putBlock(const void * ptr, const std::size_t size)
|
||||||
|
{
|
||||||
|
const auto list_idx = findFreeListIndex(size);
|
||||||
|
|
||||||
|
union {
|
||||||
|
const void * p_c;
|
||||||
|
Block * block;
|
||||||
|
};
|
||||||
|
|
||||||
|
p_c = ptr;
|
||||||
|
block->size = size;
|
||||||
|
block->next = free_lists[list_idx];
|
||||||
|
|
||||||
|
free_lists[list_idx] = block;
|
||||||
|
}
|
||||||
|
|
||||||
|
char * splitBlock(Block * & block, const std::size_t size)
|
||||||
|
{
|
||||||
|
const auto block_pos = reinterpret_cast<char *>(block);
|
||||||
|
/// calculate size of block remaining after cutting `size` bytes
|
||||||
|
const auto remaining_size = block->size - size;
|
||||||
|
|
||||||
|
/// we have claimed this block, redirect pointer to next block
|
||||||
|
block = block->next;
|
||||||
|
|
||||||
|
/// put remaining block to appropriate free list
|
||||||
|
if (remaining_size != 0)
|
||||||
|
putBlock(block_pos + size, remaining_size);
|
||||||
|
|
||||||
|
/// return cut block to caller
|
||||||
|
return block_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
ArenaWithFreeLists(
|
||||||
|
const std::size_t initial_size = 4096, const std::size_t growth_factor = 2,
|
||||||
|
const std::size_t linear_growth_threshold = 128 * 1024 * 1024)
|
||||||
|
: growth_factor{growth_factor}, linear_growth_threshold{linear_growth_threshold},
|
||||||
|
head{new Chunk(initial_size, nullptr)}, size_in_bytes{head->size()}
|
||||||
|
{
|
||||||
|
putBlock(head->begin, head->size());
|
||||||
|
}
|
||||||
|
|
||||||
|
~ArenaWithFreeLists()
|
||||||
|
{
|
||||||
|
delete head;
|
||||||
|
}
|
||||||
|
|
||||||
|
char * alloc(const std::size_t size)
|
||||||
|
{
|
||||||
|
/// find existing list of required size, possibly split a larger one
|
||||||
|
for (const auto list_idx : ext::range(findFreeListIndex(size), ext::size(free_lists)))
|
||||||
|
/// reference to a pointer to head of corresponding free list
|
||||||
|
if (auto & block = free_lists[list_idx])
|
||||||
|
return splitBlock(block, size);
|
||||||
|
|
||||||
|
/// no block of corresponding size, add another chunk
|
||||||
|
addChunk(size);
|
||||||
|
|
||||||
|
/// find the newly created block and split it
|
||||||
|
auto & newly_created_block = free_lists[findFreeListIndex(head->size())];
|
||||||
|
|
||||||
|
return splitBlock(newly_created_block, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void free(const void * ptr, const std::size_t size)
|
||||||
|
{
|
||||||
|
putBlock(ptr, size);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
}
|
@ -3,6 +3,8 @@
|
|||||||
#include <DB/Dictionaries/IDictionary.h>
|
#include <DB/Dictionaries/IDictionary.h>
|
||||||
#include <DB/Dictionaries/IDictionarySource.h>
|
#include <DB/Dictionaries/IDictionarySource.h>
|
||||||
#include <DB/Dictionaries/DictionaryStructure.h>
|
#include <DB/Dictionaries/DictionaryStructure.h>
|
||||||
|
#include <DB/Common/Arena.h>
|
||||||
|
#include <DB/Common/ArenaWithFreeLists.h>
|
||||||
#include <DB/Common/HashTable/HashMap.h>
|
#include <DB/Common/HashTable/HashMap.h>
|
||||||
#include <DB/Columns/ColumnString.h>
|
#include <DB/Columns/ColumnString.h>
|
||||||
#include <DB/Core/StringRef.h>
|
#include <DB/Core/StringRef.h>
|
||||||
@ -599,15 +601,15 @@ private:
|
|||||||
/// handle memory allocated for old key
|
/// handle memory allocated for old key
|
||||||
if (key == cell.key)
|
if (key == cell.key)
|
||||||
{
|
{
|
||||||
/// new key is same as old key, rollback memory allocated for the new key
|
keys_pool.free(key.data, key.size);
|
||||||
keys_pool.rollback(key.size);
|
|
||||||
key = cell.key;
|
key = cell.key;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/// new key is different from the old one
|
/// new key is different from the old one
|
||||||
/// @todo need a pool capable of deallocations
|
if (cell.key.data)
|
||||||
/*keys_pool.dealloc(cell.key.data, cell.key.size);*/
|
keys_pool.free(cell.key.data, cell.key.size);
|
||||||
|
|
||||||
cell.key = key;
|
cell.key = key;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -650,8 +652,9 @@ private:
|
|||||||
key = cell.key;
|
key = cell.key;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/// @todo need a pool capable of deallocations
|
if (cell.key.data)
|
||||||
/*keys_pool.dealloc(cell.key.data, cell.key.size);*/
|
keys_pool.free(cell.key.data, cell.key.size);
|
||||||
|
|
||||||
/// copy key from temporary pool to `keys_pool`
|
/// copy key from temporary pool to `keys_pool`
|
||||||
key = copyKeyToPool(key, keys_pool);
|
key = copyKeyToPool(key, keys_pool);
|
||||||
cell.key = key;
|
cell.key = key;
|
||||||
@ -786,6 +789,7 @@ private:
|
|||||||
return ts.tv_nsec ^ getpid();
|
return ts.tv_nsec ^ getpid();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename Arena>
|
||||||
static StringRef placeKeysInPool(
|
static StringRef placeKeysInPool(
|
||||||
const std::size_t row, const ConstColumnPlainPtrs & key_columns, StringRefs & keys, Arena & pool)
|
const std::size_t row, const ConstColumnPlainPtrs & key_columns, StringRefs & keys, Arena & pool)
|
||||||
{
|
{
|
||||||
@ -809,6 +813,7 @@ private:
|
|||||||
return { res, sum_keys_size };
|
return { res, sum_keys_size };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename Arena>
|
||||||
static StringRef copyKeyToPool(const StringRef key, Arena & pool)
|
static StringRef copyKeyToPool(const StringRef key, Arena & pool)
|
||||||
{
|
{
|
||||||
const auto res = pool.alloc(key.size);
|
const auto res = pool.alloc(key.size);
|
||||||
@ -829,7 +834,7 @@ private:
|
|||||||
std::map<std::string, std::size_t> attribute_index_by_name;
|
std::map<std::string, std::size_t> attribute_index_by_name;
|
||||||
mutable std::vector<attribute_t> attributes;
|
mutable std::vector<attribute_t> attributes;
|
||||||
mutable std::vector<cell_metadata_t> cells;
|
mutable std::vector<cell_metadata_t> cells;
|
||||||
mutable Arena keys_pool;
|
mutable ArenaWithFreeLists keys_pool;
|
||||||
|
|
||||||
mutable std::mt19937_64 rnd_engine{getSeed()};
|
mutable std::mt19937_64 rnd_engine{getSeed()};
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include <DB/Core/StringRef.h>
|
#include <DB/Core/StringRef.h>
|
||||||
#include <DB/Common/HashTable/HashMap.h>
|
#include <DB/Common/HashTable/HashMap.h>
|
||||||
#include <DB/Columns/ColumnString.h>
|
#include <DB/Columns/ColumnString.h>
|
||||||
|
#include <DB/Common/Arena.h>
|
||||||
#include <ext/range.hpp>
|
#include <ext/range.hpp>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
@ -9,6 +9,6 @@ namespace ext
|
|||||||
template <typename T, std::size_t N>
|
template <typename T, std::size_t N>
|
||||||
constexpr std::size_t size(const T (&)[N]) noexcept { return N; }
|
constexpr std::size_t size(const T (&)[N]) noexcept { return N; }
|
||||||
|
|
||||||
/** \brief Returns number of in a container providing size() member function. */
|
/** \brief Returns number of in a container providing size() member function. */
|
||||||
template <typename T> constexpr auto size(const T & t) { return t.size(); }
|
template <typename T> constexpr auto size(const T & t) { return t.size(); }
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user