ClickHouse/src/Dictionaries/CacheDictionaryStorage.h

339 lines
12 KiB
C++
Raw Normal View History

2021-02-16 21:33:02 +00:00
#pragma once
#include <chrono>
#include <pcg_random.hpp>
#include <Common/randomSeed.h>
#include <Common/Arena.h>
#include <Common/ArenaWithFreeLists.h>
#include <Common/HashTable/LRUHashMap.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Dictionaries/ICacheDictionaryStorage.h>
#include <Dictionaries/DictionaryHelpers.h>
2021-02-16 21:33:02 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
struct CacheDictionaryStorageConfiguration
{
2021-02-17 11:48:06 +00:00
/// Max size of storage in cells
2021-02-16 21:33:02 +00:00
const size_t max_size_in_cells;
2021-02-17 11:48:06 +00:00
/// Needed to perform check if cell is expired or not found. Default value is dictionary max lifetime.
2021-02-16 21:33:02 +00:00
const size_t strict_max_lifetime_seconds;
2021-02-17 11:48:06 +00:00
/// Lifetime of dictionary. Cell deadline is random value between lifetime min and max seconds.
2021-02-16 21:33:02 +00:00
const DictionaryLifetime lifetime;
};
2021-02-17 11:48:06 +00:00
/** Keys are stored in LRUCache and column values are serialized into arena.
2021-02-16 21:33:02 +00:00
2021-02-17 11:48:06 +00:00
Cell in LRUCache consists of allocated size and place in arena were columns serialized data is stored.
2021-02-16 21:33:02 +00:00
2021-02-18 18:39:10 +00:00
Columns are serialized by rows.
2021-02-17 11:48:06 +00:00
When cell is removed from LRUCache data associated with it is also removed from arena.
2021-02-16 21:33:02 +00:00
2021-02-17 11:48:06 +00:00
In case of complex key we also store key data in arena and it is removed from arena.
*/
2021-02-16 21:33:02 +00:00
template <DictionaryKeyType dictionary_key_type>
class CacheDictionaryStorage final : public ICacheDictionaryStorage
{
public:
using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::simple, UInt64, StringRef>;
static_assert(dictionary_key_type != DictionaryKeyType::range, "Range key type is not supported by CacheDictionaryStorage");
explicit CacheDictionaryStorage(CacheDictionaryStorageConfiguration & configuration_)
: configuration(configuration_)
, rnd_engine(randomSeed())
2021-02-17 11:48:06 +00:00
, cache(configuration.max_size_in_cells, false, { *this })
{
}
bool returnsFetchedColumnsInOrderOfRequestedKeys() const override { return true; }
2021-02-16 21:33:02 +00:00
String getName() const override
{
if (dictionary_key_type == DictionaryKeyType::simple)
return "Cache";
else
return "ComplexKeyCache";
}
bool supportsSimpleKeys() const override { return dictionary_key_type == DictionaryKeyType::simple; }
2021-02-16 21:33:02 +00:00
SimpleKeysStorageFetchResult fetchColumnsForKeys(
const PaddedPODArray<UInt64> & keys,
2021-02-17 11:48:06 +00:00
const DictionaryStorageFetchRequest & fetch_request) override
2021-02-16 21:33:02 +00:00
{
if constexpr (dictionary_key_type == DictionaryKeyType::simple)
{
2021-02-27 16:04:32 +00:00
return fetchColumnsForKeysImpl<SimpleKeysStorageFetchResult>(keys, fetch_request);
2021-02-16 21:33:02 +00:00
}
else
2021-02-27 16:04:32 +00:00
throw Exception("Method fetchColumnsForKeys is not supported for complex key storage", ErrorCodes::NOT_IMPLEMENTED);
2021-02-16 21:33:02 +00:00
}
void insertColumnsForKeys(const PaddedPODArray<UInt64> & keys, Columns columns) override
{
if constexpr (dictionary_key_type == DictionaryKeyType::simple)
insertColumnsForKeysImpl(keys, columns);
else
throw Exception("Method insertColumnsForKeys is not supported for complex key storage", ErrorCodes::NOT_IMPLEMENTED);
}
PaddedPODArray<UInt64> getCachedSimpleKeys() const override
{
if constexpr (dictionary_key_type == DictionaryKeyType::simple)
return getCachedKeysImpl();
else
throw Exception("Method getCachedSimpleKeys is not supported for complex key storage", ErrorCodes::NOT_IMPLEMENTED);
}
bool supportsComplexKeys() const override { return dictionary_key_type == DictionaryKeyType::complex; }
2021-02-16 21:33:02 +00:00
ComplexKeysStorageFetchResult fetchColumnsForKeys(
const PaddedPODArray<StringRef> & keys,
2021-02-17 11:48:06 +00:00
const DictionaryStorageFetchRequest & column_fetch_requests) override
2021-02-16 21:33:02 +00:00
{
if constexpr (dictionary_key_type == DictionaryKeyType::complex)
{
2021-02-27 16:04:32 +00:00
return fetchColumnsForKeysImpl<ComplexKeysStorageFetchResult>(keys, column_fetch_requests);
2021-02-16 21:33:02 +00:00
}
else
throw Exception("Method fetchColumnsForKeys is not supported for simple key storage", ErrorCodes::NOT_IMPLEMENTED);
}
void insertColumnsForKeys(const PaddedPODArray<StringRef> & keys, Columns columns) override
{
if constexpr (dictionary_key_type == DictionaryKeyType::complex)
insertColumnsForKeysImpl(keys, columns);
else
throw Exception("Method insertColumnsForKeys is not supported for simple key storage", ErrorCodes::NOT_IMPLEMENTED);
}
PaddedPODArray<StringRef> getCachedComplexKeys() const override
{
if constexpr (dictionary_key_type == DictionaryKeyType::complex)
return getCachedKeysImpl();
else
2021-02-27 16:04:32 +00:00
throw Exception("Method getCachedComplexKeys is not supported for simple key storage", ErrorCodes::NOT_IMPLEMENTED);
2021-02-16 21:33:02 +00:00
}
size_t getSize() const override { return cache.size(); }
size_t getMaxSize() const override { return cache.getMaxSize(); }
size_t getBytesAllocated() const override { return arena.size() + cache.getSizeInBytes(); }
2021-02-16 21:33:02 +00:00
private:
template <typename KeysStorageFetchResult>
2021-02-27 16:04:32 +00:00
ALWAYS_INLINE KeysStorageFetchResult fetchColumnsForKeysImpl(
2021-02-16 21:33:02 +00:00
const PaddedPODArray<KeyType> & keys,
2021-02-27 16:04:32 +00:00
const DictionaryStorageFetchRequest & fetch_request)
2021-02-16 21:33:02 +00:00
{
2021-02-27 16:04:32 +00:00
KeysStorageFetchResult result;
2021-02-16 21:33:02 +00:00
result.fetched_columns = fetch_request.makeAttributesResultColumns();
result.key_index_to_state.resize_fill(keys.size(), {KeyState::not_found});
2021-02-16 21:33:02 +00:00
const auto now = std::chrono::system_clock::now();
size_t fetched_columns_index = 0;
2021-02-27 16:04:32 +00:00
std::chrono::seconds max_lifetime_seconds(configuration.strict_max_lifetime_seconds);
size_t keys_size = keys.size();
for (size_t key_index = 0; key_index < keys_size; ++key_index)
2021-02-16 21:33:02 +00:00
{
auto key = keys[key_index];
auto * it = cache.find(key);
if (it)
{
/// Columns values for key are serialized in cache now deserialize them
2021-02-19 10:30:53 +00:00
const auto & cell = it->getMapped();
bool has_deadline = cellHasDeadline(cell);
2021-02-16 21:33:02 +00:00
2021-02-27 16:04:32 +00:00
if (has_deadline && now > cell.deadline + max_lifetime_seconds)
2021-02-16 21:33:02 +00:00
{
result.key_index_to_state[key_index] = {KeyState::not_found};
++result.not_found_keys_size;
2021-02-16 21:33:02 +00:00
continue;
}
2021-02-19 10:30:53 +00:00
else if (has_deadline && now > cell.deadline)
2021-02-16 21:33:02 +00:00
{
result.key_index_to_state[key_index] = {KeyState::expired, fetched_columns_index};
++result.expired_keys_size;
2021-02-16 21:33:02 +00:00
}
else
{
result.key_index_to_state[key_index] = {KeyState::found, fetched_columns_index};
++result.found_keys_size;
}
2021-02-16 21:33:02 +00:00
++fetched_columns_index;
const char * place_for_serialized_columns = cell.place_for_serialized_columns;
deserializeAndInsertIntoColumns(result.fetched_columns, fetch_request, place_for_serialized_columns);
2021-02-16 21:33:02 +00:00
}
else
{
result.key_index_to_state[key_index] = {KeyState::not_found};
++result.not_found_keys_size;
2021-02-16 21:33:02 +00:00
}
}
2021-02-27 16:04:32 +00:00
return result;
2021-02-16 21:33:02 +00:00
}
void insertColumnsForKeysImpl(const PaddedPODArray<KeyType> & keys, Columns columns)
{
Arena temporary_values_pool;
size_t columns_to_serialize_size = columns.size();
PaddedPODArray<StringRef> temporary_column_data(columns_to_serialize_size);
const auto now = std::chrono::system_clock::now();
size_t keys_size = keys.size();
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
size_t allocated_size_for_columns = 0;
const char * block_start = nullptr;
auto key = keys[key_index];
auto * it = cache.find(key);
for (size_t column_index = 0; column_index < columns_to_serialize_size; ++column_index)
{
auto & column = columns[column_index];
temporary_column_data[column_index] = column->serializeValueIntoArena(key_index, temporary_values_pool, block_start);
allocated_size_for_columns += temporary_column_data[column_index].size;
}
char * place_for_serialized_columns = arena.alloc(allocated_size_for_columns);
memcpy(reinterpret_cast<void*>(place_for_serialized_columns), reinterpret_cast<const void*>(block_start), allocated_size_for_columns);
if (it)
{
/// Cell exists need to free previous serialized place and update deadline
auto & cell = it->getMapped();
arena.free(cell.place_for_serialized_columns, cell.allocated_size_for_columns);
setCellDeadline(cell, now);
cell.allocated_size_for_columns = allocated_size_for_columns;
cell.place_for_serialized_columns = place_for_serialized_columns;
}
else
{
/// No cell exists so create and put in cache
Cell cell;
setCellDeadline(cell, now);
cell.allocated_size_for_columns = allocated_size_for_columns;
cell.place_for_serialized_columns = place_for_serialized_columns;
if constexpr (dictionary_key_type == DictionaryKeyType::complex)
{
/// Copy complex key into arena and put in cache
size_t key_size = key.size;
char * place_for_key = arena.alloc(key_size);
memcpy(reinterpret_cast<void*>(place_for_key), reinterpret_cast<const void*>(key.data), key_size);
KeyType updated_key { place_for_key, key_size };
key = updated_key;
}
cache.insert(key, cell);
}
temporary_values_pool.rollback(allocated_size_for_columns);
}
}
PaddedPODArray<KeyType> getCachedKeysImpl() const
{
PaddedPODArray<KeyType> result;
result.reserve(cache.size());
for (auto & node : cache)
result.emplace_back(node.getKey());
return result;
}
using TimePoint = std::chrono::system_clock::time_point;
struct Cell
{
TimePoint deadline;
size_t allocated_size_for_columns;
char * place_for_serialized_columns;
};
2021-02-19 10:30:53 +00:00
inline static bool cellHasDeadline(const Cell & cell)
{
return cell.deadline != std::chrono::system_clock::from_time_t(0);
}
2021-02-16 21:33:02 +00:00
inline void setCellDeadline(Cell & cell, TimePoint now)
{
2021-02-19 10:30:53 +00:00
if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0)
cell.deadline = std::chrono::system_clock::from_time_t(0);
2021-02-16 21:33:02 +00:00
size_t min_sec_lifetime = configuration.lifetime.min_sec;
size_t max_sec_lifetime = configuration.lifetime.max_sec;
std::uniform_int_distribution<UInt64> distribution{min_sec_lifetime, max_sec_lifetime};
2021-02-19 10:30:53 +00:00
cell.deadline = now + std::chrono::seconds(distribution(rnd_engine));
2021-02-16 21:33:02 +00:00
}
template <typename>
friend class ArenaCellDisposer;
CacheDictionaryStorageConfiguration configuration;
ArenaWithFreeLists arena;
pcg64 rnd_engine;
2021-02-17 11:48:06 +00:00
class ArenaCellDisposer
{
public:
CacheDictionaryStorage<dictionary_key_type> & storage;
template <typename Key, typename Value>
void operator()(const Key & key, const Value & value) const
{
/// In case of complex key we keep it in arena
if constexpr (std::is_same_v<Key, StringRef>)
{
storage.arena.free(const_cast<char *>(key.data), key.size);
}
storage.arena.free(value.place_for_serialized_columns, value.allocated_size_for_columns);
}
};
using SimpleKeyLRUHashMap = LRUHashMap<UInt64, Cell, ArenaCellDisposer>;
using ComplexKeyLRUHashMap = LRUHashMapWithSavedHash<StringRef, Cell, ArenaCellDisposer>;
2021-02-16 21:33:02 +00:00
using CacheLRUHashMap = std::conditional_t<
dictionary_key_type == DictionaryKeyType::simple,
SimpleKeyLRUHashMap,
ComplexKeyLRUHashMap>;
CacheLRUHashMap cache;
};
}