2021-02-16 21:33:02 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <chrono>
|
2021-03-09 22:58:19 +00:00
|
|
|
#include <variant>
|
2021-02-16 21:33:02 +00:00
|
|
|
|
|
|
|
#include <pcg_random.hpp>
|
|
|
|
|
|
|
|
#include <Common/randomSeed.h>
|
|
|
|
#include <Common/Arena.h>
|
|
|
|
#include <Common/ArenaWithFreeLists.h>
|
|
|
|
#include <Common/HashTable/LRUHashMap.h>
|
|
|
|
#include <Dictionaries/DictionaryStructure.h>
|
|
|
|
#include <Dictionaries/ICacheDictionaryStorage.h>
|
2021-02-17 18:19:04 +00:00
|
|
|
#include <Dictionaries/DictionaryHelpers.h>
|
2021-02-16 21:33:02 +00:00
|
|
|
|
2022-01-08 10:26:11 +00:00
|
|
|
|
2021-02-16 21:33:02 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int NOT_IMPLEMENTED;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct CacheDictionaryStorageConfiguration
|
|
|
|
{
|
2021-02-17 11:48:06 +00:00
|
|
|
/// Max size of storage in cells
|
2021-02-16 21:33:02 +00:00
|
|
|
const size_t max_size_in_cells;
|
2021-02-17 11:48:06 +00:00
|
|
|
/// Needed to perform check if cell is expired or not found. Default value is dictionary max lifetime.
|
2021-02-16 21:33:02 +00:00
|
|
|
const size_t strict_max_lifetime_seconds;
|
2021-02-17 11:48:06 +00:00
|
|
|
/// Lifetime of dictionary. Cell deadline is random value between lifetime min and max seconds.
|
2021-02-16 21:33:02 +00:00
|
|
|
const DictionaryLifetime lifetime;
|
|
|
|
};
|
|
|
|
|
2021-03-23 12:14:37 +00:00
|
|
|
/** ICacheDictionaryStorage implementation that keeps key in hash table with fixed collision length.
|
|
|
|
* Value in hash table point to index in attributes arrays.
|
|
|
|
*/
|
2021-02-16 21:33:02 +00:00
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
|
|
class CacheDictionaryStorage final : public ICacheDictionaryStorage
|
|
|
|
{
|
2021-03-17 17:14:36 +00:00
|
|
|
|
|
|
|
static constexpr size_t max_collision_length = 10;
|
|
|
|
|
2021-02-16 21:33:02 +00:00
|
|
|
public:
|
2021-08-17 17:35:43 +00:00
|
|
|
using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::Simple, UInt64, StringRef>;
|
2021-02-16 21:33:02 +00:00
|
|
|
|
2021-03-09 22:58:19 +00:00
|
|
|
explicit CacheDictionaryStorage(
|
|
|
|
const DictionaryStructure & dictionary_structure,
|
|
|
|
CacheDictionaryStorageConfiguration & configuration_)
|
2021-02-16 21:33:02 +00:00
|
|
|
: configuration(configuration_)
|
|
|
|
, rnd_engine(randomSeed())
|
2021-02-17 11:48:06 +00:00
|
|
|
{
|
2021-03-17 17:14:36 +00:00
|
|
|
size_t cells_size = roundUpToPowerOfTwoOrZero(std::max(configuration.max_size_in_cells, max_collision_length));
|
|
|
|
|
|
|
|
cells.resize_fill(cells_size);
|
|
|
|
size_overlap_mask = cells_size - 1;
|
|
|
|
|
2021-06-09 22:19:31 +00:00
|
|
|
createAttributes(dictionary_structure);
|
2021-02-17 11:48:06 +00:00
|
|
|
}
|
|
|
|
|
2021-02-17 18:19:04 +00:00
|
|
|
bool returnsFetchedColumnsInOrderOfRequestedKeys() const override { return true; }
|
2021-02-16 21:33:02 +00:00
|
|
|
|
2021-02-26 15:56:41 +00:00
|
|
|
String getName() const override
|
|
|
|
{
|
2021-08-17 17:35:43 +00:00
|
|
|
if (dictionary_key_type == DictionaryKeyType::Simple)
|
2021-02-26 15:56:41 +00:00
|
|
|
return "Cache";
|
|
|
|
else
|
|
|
|
return "ComplexKeyCache";
|
|
|
|
}
|
|
|
|
|
2021-08-17 17:35:43 +00:00
|
|
|
bool supportsSimpleKeys() const override { return dictionary_key_type == DictionaryKeyType::Simple; }
|
2021-02-16 21:33:02 +00:00
|
|
|
|
|
|
|
SimpleKeysStorageFetchResult fetchColumnsForKeys(
|
|
|
|
const PaddedPODArray<UInt64> & keys,
|
2021-02-17 11:48:06 +00:00
|
|
|
const DictionaryStorageFetchRequest & fetch_request) override
|
2021-02-16 21:33:02 +00:00
|
|
|
{
|
2021-08-17 17:35:43 +00:00
|
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
2021-02-27 16:04:32 +00:00
|
|
|
return fetchColumnsForKeysImpl<SimpleKeysStorageFetchResult>(keys, fetch_request);
|
2021-02-16 21:33:02 +00:00
|
|
|
else
|
2021-04-10 18:48:36 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method fetchColumnsForKeys is not supported for complex key storage");
|
2021-02-16 21:33:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void insertColumnsForKeys(const PaddedPODArray<UInt64> & keys, Columns columns) override
|
|
|
|
{
|
2021-08-17 17:35:43 +00:00
|
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
2021-02-16 21:33:02 +00:00
|
|
|
insertColumnsForKeysImpl(keys, columns);
|
|
|
|
else
|
2021-04-10 18:48:36 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method insertColumnsForKeys is not supported for complex key storage");
|
2021-02-16 21:33:02 +00:00
|
|
|
}
|
|
|
|
|
2021-03-03 18:58:43 +00:00
|
|
|
void insertDefaultKeys(const PaddedPODArray<UInt64> & keys) override
|
|
|
|
{
|
2021-08-17 17:35:43 +00:00
|
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
2021-03-03 18:58:43 +00:00
|
|
|
insertDefaultKeysImpl(keys);
|
|
|
|
else
|
2021-04-10 18:48:36 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method insertDefaultKeysImpl is not supported for complex key storage");
|
2021-03-03 18:58:43 +00:00
|
|
|
}
|
|
|
|
|
2021-02-16 21:33:02 +00:00
|
|
|
PaddedPODArray<UInt64> getCachedSimpleKeys() const override
|
|
|
|
{
|
2021-08-17 17:35:43 +00:00
|
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
2021-02-16 21:33:02 +00:00
|
|
|
return getCachedKeysImpl();
|
|
|
|
else
|
2021-04-10 18:48:36 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getCachedSimpleKeys is not supported for complex key storage");
|
2021-02-16 21:33:02 +00:00
|
|
|
}
|
|
|
|
|
2021-08-17 17:35:43 +00:00
|
|
|
bool supportsComplexKeys() const override { return dictionary_key_type == DictionaryKeyType::Complex; }
|
2021-02-16 21:33:02 +00:00
|
|
|
|
|
|
|
ComplexKeysStorageFetchResult fetchColumnsForKeys(
|
|
|
|
const PaddedPODArray<StringRef> & keys,
|
2021-02-17 11:48:06 +00:00
|
|
|
const DictionaryStorageFetchRequest & column_fetch_requests) override
|
2021-02-16 21:33:02 +00:00
|
|
|
{
|
2021-08-17 17:35:43 +00:00
|
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Complex)
|
2021-02-27 16:04:32 +00:00
|
|
|
return fetchColumnsForKeysImpl<ComplexKeysStorageFetchResult>(keys, column_fetch_requests);
|
2021-02-16 21:33:02 +00:00
|
|
|
else
|
2021-04-10 18:48:36 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method fetchColumnsForKeys is not supported for simple key storage");
|
2021-02-16 21:33:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void insertColumnsForKeys(const PaddedPODArray<StringRef> & keys, Columns columns) override
|
|
|
|
{
|
2021-08-17 17:35:43 +00:00
|
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Complex)
|
2021-02-16 21:33:02 +00:00
|
|
|
insertColumnsForKeysImpl(keys, columns);
|
|
|
|
else
|
2021-04-10 18:48:36 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method insertColumnsForKeys is not supported for simple key storage");
|
2021-02-16 21:33:02 +00:00
|
|
|
}
|
|
|
|
|
2021-03-03 18:58:43 +00:00
|
|
|
void insertDefaultKeys(const PaddedPODArray<StringRef> & keys) override
|
|
|
|
{
|
2021-08-17 17:35:43 +00:00
|
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Complex)
|
2021-03-03 18:58:43 +00:00
|
|
|
insertDefaultKeysImpl(keys);
|
|
|
|
else
|
2021-04-10 18:48:36 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method insertDefaultKeysImpl is not supported for simple key storage");
|
2021-03-03 18:58:43 +00:00
|
|
|
}
|
|
|
|
|
2021-02-16 21:33:02 +00:00
|
|
|
PaddedPODArray<StringRef> getCachedComplexKeys() const override
|
|
|
|
{
|
2021-08-17 17:35:43 +00:00
|
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Complex)
|
2021-02-16 21:33:02 +00:00
|
|
|
return getCachedKeysImpl();
|
|
|
|
else
|
2021-04-10 18:48:36 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getCachedComplexKeys is not supported for simple key storage");
|
2021-02-16 21:33:02 +00:00
|
|
|
}
|
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
size_t getSize() const override { return size; }
|
2021-02-17 18:19:04 +00:00
|
|
|
|
2021-03-18 09:55:17 +00:00
|
|
|
double getLoadFactor() const override { return static_cast<double>(size) / configuration.max_size_in_cells; }
|
2021-02-17 18:19:04 +00:00
|
|
|
|
2021-03-09 22:58:19 +00:00
|
|
|
size_t getBytesAllocated() const override
|
|
|
|
{
|
|
|
|
size_t attributes_size_in_bytes = 0;
|
|
|
|
size_t attributes_size = attributes.size();
|
|
|
|
|
|
|
|
for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
|
|
|
|
{
|
|
|
|
getAttributeContainer(attribute_index, [&](const auto & container)
|
|
|
|
{
|
|
|
|
attributes_size_in_bytes += container.capacity() * sizeof(container[0]);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
return arena.size() + sizeof(Cell) * configuration.max_size_in_cells + attributes_size_in_bytes;
|
2021-03-09 22:58:19 +00:00
|
|
|
}
|
2021-02-16 21:33:02 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
|
2021-03-09 22:58:19 +00:00
|
|
|
struct FetchedKey
|
|
|
|
{
|
|
|
|
FetchedKey(size_t element_index_, bool is_default_)
|
|
|
|
: element_index(element_index_)
|
|
|
|
, is_default(is_default_)
|
|
|
|
{}
|
|
|
|
|
2021-03-15 22:59:04 +00:00
|
|
|
size_t element_index;
|
|
|
|
bool is_default;
|
2021-03-09 22:58:19 +00:00
|
|
|
};
|
|
|
|
|
2021-02-16 21:33:02 +00:00
|
|
|
template <typename KeysStorageFetchResult>
|
2021-03-13 21:49:45 +00:00
|
|
|
KeysStorageFetchResult fetchColumnsForKeysImpl(
|
2021-02-16 21:33:02 +00:00
|
|
|
const PaddedPODArray<KeyType> & keys,
|
2021-02-27 16:04:32 +00:00
|
|
|
const DictionaryStorageFetchRequest & fetch_request)
|
2021-02-16 21:33:02 +00:00
|
|
|
{
|
2021-02-27 16:04:32 +00:00
|
|
|
KeysStorageFetchResult result;
|
|
|
|
|
2021-02-16 21:33:02 +00:00
|
|
|
result.fetched_columns = fetch_request.makeAttributesResultColumns();
|
2021-03-17 17:14:36 +00:00
|
|
|
result.key_index_to_state.resize_fill(keys.size());
|
2021-02-16 21:33:02 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
const time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
|
2021-02-16 21:33:02 +00:00
|
|
|
|
|
|
|
size_t fetched_columns_index = 0;
|
2021-03-09 22:58:19 +00:00
|
|
|
size_t keys_size = keys.size();
|
2021-02-16 21:33:02 +00:00
|
|
|
|
2021-03-09 22:58:19 +00:00
|
|
|
PaddedPODArray<FetchedKey> fetched_keys;
|
2021-03-15 22:59:04 +00:00
|
|
|
fetched_keys.resize_fill(keys_size);
|
2021-02-27 16:04:32 +00:00
|
|
|
|
|
|
|
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
2021-02-16 21:33:02 +00:00
|
|
|
{
|
|
|
|
auto key = keys[key_index];
|
2021-03-17 17:14:36 +00:00
|
|
|
auto [key_state, cell_index] = getKeyStateAndCellIndex(key, now);
|
2021-02-16 21:33:02 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
if (unlikely(key_state == KeyState::not_found))
|
2021-02-16 21:33:02 +00:00
|
|
|
{
|
2021-02-26 15:56:41 +00:00
|
|
|
result.key_index_to_state[key_index] = {KeyState::not_found};
|
|
|
|
++result.not_found_keys_size;
|
2021-03-13 21:49:45 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
auto & cell = cells[cell_index];
|
2021-03-13 21:49:45 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
result.expired_keys_size += static_cast<size_t>(key_state == KeyState::expired);
|
2021-03-13 21:49:45 +00:00
|
|
|
|
|
|
|
result.key_index_to_state[key_index] = {key_state, fetched_columns_index};
|
2021-03-17 17:14:36 +00:00
|
|
|
fetched_keys[fetched_columns_index] = FetchedKey(cell.element_index, cell.is_default);
|
2021-03-13 21:49:45 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
++fetched_columns_index;
|
2021-03-13 21:49:45 +00:00
|
|
|
|
|
|
|
result.key_index_to_state[key_index].setDefaultValue(cell.is_default);
|
|
|
|
result.default_keys_size += cell.is_default;
|
2021-02-16 21:33:02 +00:00
|
|
|
}
|
2021-02-27 16:04:32 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
result.found_keys_size = keys_size - (result.expired_keys_size + result.not_found_keys_size);
|
|
|
|
|
2021-03-09 22:58:19 +00:00
|
|
|
for (size_t attribute_index = 0; attribute_index < fetch_request.attributesSize(); ++attribute_index)
|
|
|
|
{
|
|
|
|
if (!fetch_request.shouldFillResultColumnWithIndex(attribute_index))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
auto & attribute = attributes[attribute_index];
|
|
|
|
const auto & default_value_provider = fetch_request.defaultValueProviderAtIndex(attribute_index);
|
2021-03-17 17:14:36 +00:00
|
|
|
|
2021-03-09 22:58:19 +00:00
|
|
|
auto & fetched_column = *result.fetched_columns[attribute_index];
|
2021-06-09 17:21:32 +00:00
|
|
|
fetched_column.reserve(fetched_columns_index);
|
2021-03-09 22:58:19 +00:00
|
|
|
|
2021-06-09 22:19:31 +00:00
|
|
|
if (unlikely(attribute.is_nullable))
|
2021-03-09 22:58:19 +00:00
|
|
|
{
|
2021-06-09 17:21:32 +00:00
|
|
|
getItemsForFetchedKeys<Field>(
|
|
|
|
attribute,
|
|
|
|
fetched_columns_index,
|
|
|
|
fetched_keys,
|
|
|
|
[&](Field & value) { fetched_column.insert(value); },
|
|
|
|
default_value_provider);
|
2021-03-09 22:58:19 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
auto type_call = [&](const auto & dictionary_attribute_type)
|
|
|
|
{
|
|
|
|
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
|
|
|
using AttributeType = typename Type::AttributeType;
|
2021-06-09 17:21:32 +00:00
|
|
|
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
|
|
|
using ColumnType = typename ColumnProvider::ColumnType;
|
2021-03-09 22:58:19 +00:00
|
|
|
using ValueType = DictionaryValueType<AttributeType>;
|
|
|
|
|
|
|
|
ColumnType & column_typed = static_cast<ColumnType &>(fetched_column);
|
|
|
|
|
2021-06-09 17:21:32 +00:00
|
|
|
if constexpr (std::is_same_v<ValueType, Array>)
|
2021-03-09 22:58:19 +00:00
|
|
|
{
|
2021-06-09 17:21:32 +00:00
|
|
|
getItemsForFetchedKeys<ValueType>(
|
|
|
|
attribute,
|
|
|
|
fetched_columns_index,
|
|
|
|
fetched_keys,
|
|
|
|
[&](Array & value) { fetched_column.insert(value); },
|
|
|
|
default_value_provider);
|
|
|
|
}
|
|
|
|
else if constexpr (std::is_same_v<ValueType, StringRef>)
|
|
|
|
{
|
|
|
|
getItemsForFetchedKeys<ValueType>(
|
|
|
|
attribute,
|
|
|
|
fetched_columns_index,
|
|
|
|
fetched_keys,
|
|
|
|
[&](StringRef value) { fetched_column.insertData(value.data, value.size); },
|
|
|
|
default_value_provider);
|
2021-03-09 22:58:19 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-03-21 16:26:06 +00:00
|
|
|
auto & data = column_typed.getData();
|
|
|
|
|
2021-06-09 17:21:32 +00:00
|
|
|
getItemsForFetchedKeys<ValueType>(
|
|
|
|
attribute,
|
|
|
|
fetched_columns_index,
|
|
|
|
fetched_keys,
|
|
|
|
[&](auto value) { data.push_back(value); },
|
|
|
|
default_value_provider);
|
2021-03-09 22:58:19 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
callOnDictionaryAttributeType(attribute.type, type_call);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-27 16:04:32 +00:00
|
|
|
return result;
|
2021-02-16 21:33:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void insertColumnsForKeysImpl(const PaddedPODArray<KeyType> & keys, Columns columns)
|
|
|
|
{
|
|
|
|
const auto now = std::chrono::system_clock::now();
|
2021-03-15 22:59:04 +00:00
|
|
|
|
|
|
|
Field column_value;
|
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
for (size_t key_index = 0; key_index < keys.size(); ++key_index)
|
2021-02-16 21:33:02 +00:00
|
|
|
{
|
|
|
|
auto key = keys[key_index];
|
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
size_t cell_index = getCellIndexForInsert(key);
|
|
|
|
auto & cell = cells[cell_index];
|
|
|
|
|
2021-03-21 16:26:06 +00:00
|
|
|
bool cell_was_default = cell.is_default;
|
2021-03-17 17:14:36 +00:00
|
|
|
cell.is_default = false;
|
|
|
|
|
|
|
|
bool was_inserted = cell.deadline == 0;
|
2021-02-16 21:33:02 +00:00
|
|
|
|
2021-03-15 22:59:04 +00:00
|
|
|
if (was_inserted)
|
|
|
|
{
|
2021-03-17 17:14:36 +00:00
|
|
|
if constexpr (std::is_same_v<KeyType, StringRef>)
|
2022-01-08 10:26:11 +00:00
|
|
|
cell.key = copyStringInArena(arena, key);
|
2021-03-17 17:14:36 +00:00
|
|
|
else
|
|
|
|
cell.key = key;
|
2021-02-16 21:33:02 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
for (size_t attribute_index = 0; attribute_index < columns.size(); ++attribute_index)
|
2021-03-15 22:59:04 +00:00
|
|
|
{
|
|
|
|
auto & column = columns[attribute_index];
|
2021-03-09 22:58:19 +00:00
|
|
|
|
2021-03-15 22:59:04 +00:00
|
|
|
getAttributeContainer(attribute_index, [&](auto & container)
|
|
|
|
{
|
|
|
|
container.emplace_back();
|
|
|
|
cell.element_index = container.size() - 1;
|
2021-02-16 21:33:02 +00:00
|
|
|
|
2021-03-15 22:59:04 +00:00
|
|
|
using ElementType = std::decay_t<decltype(container[0])>;
|
2021-03-09 22:58:19 +00:00
|
|
|
|
2021-03-15 22:59:04 +00:00
|
|
|
column->get(key_index, column_value);
|
2021-03-09 22:58:19 +00:00
|
|
|
|
2021-03-15 22:59:04 +00:00
|
|
|
if constexpr (std::is_same_v<ElementType, Field>)
|
2021-06-09 22:19:31 +00:00
|
|
|
{
|
2021-03-15 22:59:04 +00:00
|
|
|
container.back() = column_value;
|
2021-06-09 22:19:31 +00:00
|
|
|
}
|
2021-03-15 22:59:04 +00:00
|
|
|
else if constexpr (std::is_same_v<ElementType, StringRef>)
|
|
|
|
{
|
2021-03-17 17:14:36 +00:00
|
|
|
const String & string_value = column_value.get<String>();
|
2022-01-08 10:26:11 +00:00
|
|
|
StringRef inserted_value = copyStringInArena(arena, string_value);
|
2021-03-15 22:59:04 +00:00
|
|
|
container.back() = inserted_value;
|
|
|
|
}
|
|
|
|
else
|
2021-06-09 22:19:31 +00:00
|
|
|
{
|
2021-03-17 17:14:36 +00:00
|
|
|
container.back() = column_value.get<NearestFieldType<ElementType>>();
|
2021-06-09 22:19:31 +00:00
|
|
|
}
|
2021-03-15 22:59:04 +00:00
|
|
|
});
|
2021-03-09 22:58:19 +00:00
|
|
|
}
|
2021-03-17 17:14:36 +00:00
|
|
|
|
|
|
|
++size;
|
2021-02-16 21:33:02 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-03-17 17:14:36 +00:00
|
|
|
if (cell.key != key)
|
2021-03-09 22:58:19 +00:00
|
|
|
{
|
2021-03-15 22:59:04 +00:00
|
|
|
if constexpr (std::is_same_v<KeyType, StringRef>)
|
2021-03-17 17:14:36 +00:00
|
|
|
{
|
|
|
|
char * data = const_cast<char *>(cell.key.data);
|
|
|
|
arena.free(data, cell.key.size);
|
2022-01-08 10:26:11 +00:00
|
|
|
cell.key = copyStringInArena(arena, key);
|
2021-03-17 17:14:36 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
cell.key = key;
|
2021-03-15 22:59:04 +00:00
|
|
|
}
|
2021-03-09 22:58:19 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
/// Put values into existing index
|
|
|
|
size_t index_to_use = cell.element_index;
|
2021-03-09 22:58:19 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
for (size_t attribute_index = 0; attribute_index < columns.size(); ++attribute_index)
|
2021-03-15 22:59:04 +00:00
|
|
|
{
|
|
|
|
auto & column = columns[attribute_index];
|
2021-03-09 22:58:19 +00:00
|
|
|
|
2021-03-15 22:59:04 +00:00
|
|
|
getAttributeContainer(attribute_index, [&](auto & container)
|
2021-03-09 22:58:19 +00:00
|
|
|
{
|
2021-03-15 22:59:04 +00:00
|
|
|
using ElementType = std::decay_t<decltype(container[0])>;
|
2021-03-09 22:58:19 +00:00
|
|
|
|
2021-03-15 22:59:04 +00:00
|
|
|
column->get(key_index, column_value);
|
|
|
|
|
|
|
|
if constexpr (std::is_same_v<ElementType, Field>)
|
2021-06-09 22:19:31 +00:00
|
|
|
{
|
2021-03-17 17:14:36 +00:00
|
|
|
container[index_to_use] = column_value;
|
2021-06-09 22:19:31 +00:00
|
|
|
}
|
2021-03-15 22:59:04 +00:00
|
|
|
else if constexpr (std::is_same_v<ElementType, StringRef>)
|
|
|
|
{
|
2021-03-17 17:14:36 +00:00
|
|
|
const String & string_value = column_value.get<String>();
|
2022-01-08 10:26:11 +00:00
|
|
|
StringRef inserted_value = copyStringInArena(arena, string_value);
|
2021-03-17 17:14:36 +00:00
|
|
|
|
2021-03-21 16:26:06 +00:00
|
|
|
if (!cell_was_default)
|
|
|
|
{
|
|
|
|
StringRef previous_value = container[index_to_use];
|
|
|
|
arena.free(const_cast<char *>(previous_value.data), previous_value.size);
|
|
|
|
}
|
2021-03-17 17:14:36 +00:00
|
|
|
|
|
|
|
container[index_to_use] = inserted_value;
|
2021-03-15 22:59:04 +00:00
|
|
|
}
|
|
|
|
else
|
2021-06-09 22:19:31 +00:00
|
|
|
{
|
2021-03-17 17:14:36 +00:00
|
|
|
container[index_to_use] = column_value.get<NearestFieldType<ElementType>>();
|
2021-06-09 22:19:31 +00:00
|
|
|
}
|
2021-03-15 22:59:04 +00:00
|
|
|
});
|
|
|
|
}
|
2021-02-16 21:33:02 +00:00
|
|
|
}
|
2021-03-09 22:58:19 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
setCellDeadline(cell, now);
|
2021-03-15 22:59:04 +00:00
|
|
|
}
|
2021-02-16 21:33:02 +00:00
|
|
|
}
|
|
|
|
|
2021-03-03 18:58:43 +00:00
|
|
|
void insertDefaultKeysImpl(const PaddedPODArray<KeyType> & keys)
|
|
|
|
{
|
|
|
|
const auto now = std::chrono::system_clock::now();
|
|
|
|
|
2021-03-15 22:59:04 +00:00
|
|
|
size_t keys_size = keys.size();
|
|
|
|
|
|
|
|
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
2021-03-03 18:58:43 +00:00
|
|
|
{
|
2021-03-15 22:59:04 +00:00
|
|
|
auto key = keys[key_index];
|
2021-03-03 18:58:43 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
size_t cell_index = getCellIndexForInsert(key);
|
|
|
|
auto & cell = cells[cell_index];
|
2021-03-03 18:58:43 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
bool was_inserted = cell.deadline == 0;
|
2021-03-22 13:02:32 +00:00
|
|
|
bool cell_was_default = cell.is_default;
|
2021-03-17 17:14:36 +00:00
|
|
|
|
|
|
|
cell.is_default = true;
|
2021-03-03 18:58:43 +00:00
|
|
|
|
2021-03-15 22:59:04 +00:00
|
|
|
if (was_inserted)
|
|
|
|
{
|
2021-03-17 17:14:36 +00:00
|
|
|
if constexpr (std::is_same_v<KeyType, StringRef>)
|
2022-01-08 10:26:11 +00:00
|
|
|
cell.key = copyStringInArena(arena, key);
|
2021-03-17 17:14:36 +00:00
|
|
|
else
|
|
|
|
cell.key = key;
|
2021-03-15 22:59:04 +00:00
|
|
|
|
|
|
|
for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
|
|
|
|
{
|
|
|
|
getAttributeContainer(attribute_index, [&](auto & container)
|
|
|
|
{
|
|
|
|
container.emplace_back();
|
2021-03-17 17:14:36 +00:00
|
|
|
cell.element_index = container.size() - 1;
|
2021-03-15 22:59:04 +00:00
|
|
|
});
|
|
|
|
}
|
2021-03-17 17:14:36 +00:00
|
|
|
|
|
|
|
++size;
|
2021-03-15 22:59:04 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-03-22 13:02:32 +00:00
|
|
|
for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
|
|
|
|
{
|
|
|
|
getAttributeContainer(attribute_index, [&](const auto & container)
|
|
|
|
{
|
|
|
|
using ElementType = std::decay_t<decltype(container[0])>;
|
|
|
|
|
|
|
|
if constexpr (std::is_same_v<ElementType, StringRef>)
|
|
|
|
{
|
|
|
|
if (!cell_was_default)
|
|
|
|
{
|
|
|
|
StringRef previous_value = container[cell.element_index];
|
|
|
|
arena.free(const_cast<char *>(previous_value.data), previous_value.size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
if (cell.key != key)
|
2021-03-15 22:59:04 +00:00
|
|
|
{
|
|
|
|
if constexpr (std::is_same_v<KeyType, StringRef>)
|
2021-03-17 17:14:36 +00:00
|
|
|
{
|
|
|
|
char * data = const_cast<char *>(cell.key.data);
|
|
|
|
arena.free(data, cell.key.size);
|
2022-01-08 10:26:11 +00:00
|
|
|
cell.key = copyStringInArena(arena, key);
|
2021-03-17 17:14:36 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
cell.key = key;
|
2021-03-15 22:59:04 +00:00
|
|
|
}
|
|
|
|
}
|
2021-03-09 22:58:19 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
setCellDeadline(cell, now);
|
2021-03-15 22:59:04 +00:00
|
|
|
}
|
2021-03-03 18:58:43 +00:00
|
|
|
}
|
|
|
|
|
2021-02-16 21:33:02 +00:00
|
|
|
PaddedPODArray<KeyType> getCachedKeysImpl() const
|
|
|
|
{
|
|
|
|
PaddedPODArray<KeyType> result;
|
2021-03-17 17:14:36 +00:00
|
|
|
result.reserve(size);
|
2021-02-16 21:33:02 +00:00
|
|
|
|
2021-03-23 12:14:37 +00:00
|
|
|
for (auto & cell : cells)
|
2021-03-03 18:58:43 +00:00
|
|
|
{
|
2021-03-17 17:14:36 +00:00
|
|
|
if (cell.deadline == 0)
|
|
|
|
continue;
|
2021-03-03 18:58:43 +00:00
|
|
|
|
2021-03-09 22:58:19 +00:00
|
|
|
if (cell.is_default)
|
2021-03-03 18:58:43 +00:00
|
|
|
continue;
|
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
result.emplace_back(cell.key);
|
2021-03-03 18:58:43 +00:00
|
|
|
}
|
2021-02-16 21:33:02 +00:00
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-03-09 22:58:19 +00:00
|
|
|
template <typename GetContainerFunc>
|
|
|
|
void getAttributeContainer(size_t attribute_index, GetContainerFunc && func)
|
2021-02-16 21:33:02 +00:00
|
|
|
{
|
2021-03-09 22:58:19 +00:00
|
|
|
auto & attribute = attributes[attribute_index];
|
|
|
|
auto & attribute_type = attribute.type;
|
2021-03-03 18:58:43 +00:00
|
|
|
|
2021-06-09 22:19:31 +00:00
|
|
|
if (unlikely(attribute.is_nullable))
|
2021-03-09 22:58:19 +00:00
|
|
|
{
|
2021-06-09 17:21:32 +00:00
|
|
|
auto & container = std::get<ContainerType<Field>>(attribute.attribute_container);
|
2021-03-09 22:58:19 +00:00
|
|
|
std::forward<GetContainerFunc>(func)(container);
|
|
|
|
}
|
|
|
|
else
|
2021-03-03 18:58:43 +00:00
|
|
|
{
|
2021-03-09 22:58:19 +00:00
|
|
|
auto type_call = [&](const auto & dictionary_attribute_type)
|
|
|
|
{
|
|
|
|
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
|
|
|
using AttributeType = typename Type::AttributeType;
|
|
|
|
using ValueType = DictionaryValueType<AttributeType>;
|
|
|
|
|
2021-06-09 17:21:32 +00:00
|
|
|
auto & container = std::get<ContainerType<ValueType>>(attribute.attribute_container);
|
2021-03-09 22:58:19 +00:00
|
|
|
std::forward<GetContainerFunc>(func)(container);
|
|
|
|
};
|
|
|
|
|
|
|
|
callOnDictionaryAttributeType(attribute_type, type_call);
|
2021-03-03 18:58:43 +00:00
|
|
|
}
|
2021-03-09 22:58:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GetContainerFunc>
|
|
|
|
void getAttributeContainer(size_t attribute_index, GetContainerFunc && func) const
|
|
|
|
{
|
|
|
|
return const_cast<std::decay_t<decltype(*this)> *>(this)->template getAttributeContainer(attribute_index, std::forward<GetContainerFunc>(func));
|
|
|
|
}
|
|
|
|
|
2021-06-09 17:21:32 +00:00
|
|
|
template<typename ValueType>
|
2021-06-10 09:49:33 +00:00
|
|
|
using ContainerType = std::conditional_t<
|
|
|
|
std::is_same_v<ValueType, Field> || std::is_same_v<ValueType, Array>,
|
|
|
|
std::vector<ValueType>,
|
|
|
|
PaddedPODArray<ValueType>>;
|
2021-06-09 17:21:32 +00:00
|
|
|
|
|
|
|
struct Attribute
|
|
|
|
{
|
|
|
|
AttributeUnderlyingType type;
|
2021-06-09 22:19:31 +00:00
|
|
|
bool is_nullable;
|
2021-06-09 17:21:32 +00:00
|
|
|
|
|
|
|
std::variant<
|
|
|
|
ContainerType<UInt8>,
|
|
|
|
ContainerType<UInt16>,
|
|
|
|
ContainerType<UInt32>,
|
|
|
|
ContainerType<UInt64>,
|
|
|
|
ContainerType<UInt128>,
|
|
|
|
ContainerType<UInt256>,
|
|
|
|
ContainerType<Int8>,
|
|
|
|
ContainerType<Int16>,
|
|
|
|
ContainerType<Int32>,
|
|
|
|
ContainerType<Int64>,
|
|
|
|
ContainerType<Int128>,
|
|
|
|
ContainerType<Int256>,
|
|
|
|
ContainerType<Decimal32>,
|
|
|
|
ContainerType<Decimal64>,
|
|
|
|
ContainerType<Decimal128>,
|
|
|
|
ContainerType<Decimal256>,
|
|
|
|
ContainerType<Float32>,
|
|
|
|
ContainerType<Float64>,
|
|
|
|
ContainerType<UUID>,
|
|
|
|
ContainerType<StringRef>,
|
|
|
|
ContainerType<Array>,
|
|
|
|
ContainerType<Field>> attribute_container;
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename ValueType, typename ValueSetter>
|
|
|
|
void getItemsForFetchedKeys(
|
|
|
|
Attribute & attribute,
|
|
|
|
size_t fetched_keys_size,
|
|
|
|
PaddedPODArray<FetchedKey> & fetched_keys,
|
|
|
|
ValueSetter && value_setter,
|
|
|
|
const DefaultValueProvider & default_value_provider)
|
|
|
|
{
|
|
|
|
auto & container = std::get<ContainerType<ValueType>>(attribute.attribute_container);
|
|
|
|
|
|
|
|
for (size_t fetched_key_index = 0; fetched_key_index < fetched_keys_size; ++fetched_key_index)
|
|
|
|
{
|
|
|
|
auto fetched_key = fetched_keys[fetched_key_index];
|
|
|
|
|
|
|
|
if (unlikely(fetched_key.is_default))
|
|
|
|
{
|
|
|
|
auto default_value = default_value_provider.getDefaultValue(fetched_key_index);
|
|
|
|
|
|
|
|
if constexpr (std::is_same_v<ValueType, Field>)
|
|
|
|
{
|
|
|
|
value_setter(default_value);
|
|
|
|
}
|
|
|
|
else if constexpr (std::is_same_v<ValueType, StringRef>)
|
|
|
|
{
|
|
|
|
auto & value = default_value.get<String>();
|
|
|
|
value_setter(value);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
value_setter(default_value.get<ValueType>());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
value_setter(container[fetched_key.element_index]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-09 22:19:31 +00:00
|
|
|
void createAttributes(const DictionaryStructure & dictionary_structure)
|
2021-03-13 21:49:45 +00:00
|
|
|
{
|
|
|
|
/// For each dictionary attribute create storage attribute
|
|
|
|
/// For simple attributes create PODArray, for complex vector of Fields
|
|
|
|
|
|
|
|
attributes.reserve(dictionary_structure.attributes.size());
|
|
|
|
|
|
|
|
for (const auto & dictionary_attribute : dictionary_structure.attributes)
|
|
|
|
{
|
|
|
|
auto attribute_type = dictionary_attribute.underlying_type;
|
|
|
|
|
|
|
|
auto type_call = [&](const auto & dictionary_attribute_type)
|
|
|
|
{
|
|
|
|
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
|
|
|
using AttributeType = typename Type::AttributeType;
|
|
|
|
using ValueType = DictionaryValueType<AttributeType>;
|
|
|
|
|
|
|
|
attributes.emplace_back();
|
|
|
|
auto & last_attribute = attributes.back();
|
|
|
|
last_attribute.type = attribute_type;
|
2021-06-09 22:19:31 +00:00
|
|
|
last_attribute.is_nullable = dictionary_attribute.is_nullable;
|
2021-03-13 21:49:45 +00:00
|
|
|
|
|
|
|
if (dictionary_attribute.is_nullable)
|
2021-06-09 17:21:32 +00:00
|
|
|
last_attribute.attribute_container = ContainerType<Field>();
|
2021-03-13 21:49:45 +00:00
|
|
|
else
|
2021-06-09 17:21:32 +00:00
|
|
|
last_attribute.attribute_container = ContainerType<ValueType>();
|
2021-03-13 21:49:45 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
callOnDictionaryAttributeType(attribute_type, type_call);
|
|
|
|
}
|
|
|
|
}
|
2021-03-09 22:58:19 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
using TimePoint = std::chrono::system_clock::time_point;
|
|
|
|
|
2021-03-09 22:58:19 +00:00
|
|
|
struct Cell
|
|
|
|
{
|
2021-03-17 17:14:36 +00:00
|
|
|
KeyType key;
|
2021-03-09 22:58:19 +00:00
|
|
|
size_t element_index;
|
|
|
|
bool is_default;
|
2021-03-17 17:14:36 +00:00
|
|
|
time_t deadline;
|
2021-02-16 21:33:02 +00:00
|
|
|
};
|
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
CacheDictionaryStorageConfiguration configuration;
|
2021-02-17 11:48:06 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
pcg64 rnd_engine;
|
|
|
|
|
|
|
|
size_t size_overlap_mask = 0;
|
2021-02-17 11:48:06 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
size_t size = 0;
|
2021-02-16 21:33:02 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
PaddedPODArray<Cell> cells;
|
2021-03-15 22:59:04 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
ArenaWithFreeLists arena;
|
|
|
|
|
|
|
|
std::vector<Attribute> attributes;
|
2021-03-15 22:59:04 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
inline void setCellDeadline(Cell & cell, TimePoint now)
|
2021-03-15 22:59:04 +00:00
|
|
|
{
|
|
|
|
if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0)
|
|
|
|
{
|
|
|
|
/// This maybe not obvious, but when we define is this cell is expired or expired permanently, we add strict_max_lifetime_seconds
|
|
|
|
/// to the expiration time. And it overflows pretty well.
|
|
|
|
auto deadline = std::chrono::time_point<std::chrono::system_clock>::max() - 2 * std::chrono::seconds(configuration.strict_max_lifetime_seconds);
|
2021-03-17 17:14:36 +00:00
|
|
|
cell.deadline = std::chrono::system_clock::to_time_t(deadline);
|
2021-03-15 22:59:04 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t min_sec_lifetime = configuration.lifetime.min_sec;
|
|
|
|
size_t max_sec_lifetime = configuration.lifetime.max_sec;
|
|
|
|
|
|
|
|
std::uniform_int_distribution<UInt64> distribution{min_sec_lifetime, max_sec_lifetime};
|
|
|
|
|
|
|
|
auto deadline = now + std::chrono::seconds(distribution(rnd_engine));
|
2021-03-17 17:14:36 +00:00
|
|
|
cell.deadline = std::chrono::system_clock::to_time_t(deadline);
|
2021-03-15 22:59:04 +00:00
|
|
|
}
|
2021-02-16 21:33:02 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
inline size_t getCellIndex(const KeyType key) const
|
|
|
|
{
|
|
|
|
const size_t hash = DefaultHash<KeyType>()(key);
|
|
|
|
const size_t index = hash & size_overlap_mask;
|
|
|
|
return index;
|
|
|
|
}
|
2021-03-13 21:49:45 +00:00
|
|
|
|
2021-03-17 17:14:36 +00:00
|
|
|
using KeyStateAndCellIndex = std::pair<KeyState::State, size_t>;
|
|
|
|
|
|
|
|
inline KeyStateAndCellIndex getKeyStateAndCellIndex(const KeyType key, const time_t now) const
|
|
|
|
{
|
|
|
|
size_t place_value = getCellIndex(key);
|
|
|
|
const size_t place_value_end = place_value + max_collision_length;
|
|
|
|
|
|
|
|
time_t max_lifetime_seconds = static_cast<time_t>(configuration.strict_max_lifetime_seconds);
|
|
|
|
|
|
|
|
for (; place_value < place_value_end; ++place_value)
|
|
|
|
{
|
|
|
|
const auto cell_place_value = place_value & size_overlap_mask;
|
|
|
|
const auto & cell = cells[cell_place_value];
|
|
|
|
|
|
|
|
if (cell.key != key)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (unlikely(now > cell.deadline + max_lifetime_seconds))
|
|
|
|
return std::make_pair(KeyState::not_found, cell_place_value);
|
|
|
|
|
|
|
|
if (unlikely(now > cell.deadline))
|
|
|
|
return std::make_pair(KeyState::expired, cell_place_value);
|
|
|
|
|
|
|
|
return std::make_pair(KeyState::found, cell_place_value);
|
|
|
|
}
|
|
|
|
|
2021-03-18 09:55:17 +00:00
|
|
|
return std::make_pair(KeyState::not_found, place_value & size_overlap_mask);
|
2021-03-17 17:14:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
inline size_t getCellIndexForInsert(const KeyType & key) const
|
|
|
|
{
|
|
|
|
size_t place_value = getCellIndex(key);
|
|
|
|
const size_t place_value_end = place_value + max_collision_length;
|
|
|
|
size_t oldest_place_value = place_value;
|
|
|
|
|
|
|
|
time_t oldest_time = std::numeric_limits<time_t>::max();
|
|
|
|
|
|
|
|
for (; place_value < place_value_end; ++place_value)
|
|
|
|
{
|
|
|
|
const size_t cell_place_value = place_value & size_overlap_mask;
|
|
|
|
const Cell cell = cells[cell_place_value];
|
|
|
|
|
|
|
|
if (cell.deadline == 0)
|
|
|
|
return cell_place_value;
|
|
|
|
|
|
|
|
if (cell.key == key)
|
2021-03-18 09:55:17 +00:00
|
|
|
return cell_place_value;
|
2021-03-17 17:14:36 +00:00
|
|
|
|
|
|
|
if (cell.deadline < oldest_time)
|
|
|
|
{
|
|
|
|
oldest_time = cell.deadline;
|
|
|
|
oldest_place_value = cell_place_value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return oldest_place_value;
|
|
|
|
}
|
2021-02-16 21:33:02 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|