mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-30 19:42:00 +00:00
875 lines
32 KiB
C++
875 lines
32 KiB
C++
#include "HashedArrayDictionary.h"
|
|
|
|
#include <Common/ArenaUtils.h>
|
|
#include <Core/Defines.h>
|
|
#include <DataTypes/DataTypesDecimal.h>
|
|
#include <Columns/ColumnsNumber.h>
|
|
#include <Columns/ColumnNullable.h>
|
|
#include <Functions/FunctionHelpers.h>
|
|
|
|
#include <Dictionaries/DictionarySource.h>
|
|
#include <Dictionaries/DictionaryFactory.h>
|
|
#include <Dictionaries/HierarchyDictionariesUtils.h>
|
|
|
|
|
|
namespace DB
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
{
|
|
extern const int BAD_ARGUMENTS;
|
|
extern const int DICTIONARY_IS_EMPTY;
|
|
extern const int UNSUPPORTED_METHOD;
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
HashedArrayDictionary<dictionary_key_type>::HashedArrayDictionary(
|
|
const StorageID & dict_id_,
|
|
const DictionaryStructure & dict_struct_,
|
|
DictionarySourcePtr source_ptr_,
|
|
const HashedArrayDictionaryStorageConfiguration & configuration_,
|
|
BlockPtr update_field_loaded_block_)
|
|
: IDictionary(dict_id_)
|
|
, dict_struct(dict_struct_)
|
|
, source_ptr(std::move(source_ptr_))
|
|
, configuration(configuration_)
|
|
, update_field_loaded_block(std::move(update_field_loaded_block_))
|
|
{
|
|
createAttributes();
|
|
loadData();
|
|
buildHierarchyParentToChildIndexIfNeeded();
|
|
calculateBytesAllocated();
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
ColumnPtr HashedArrayDictionary<dictionary_key_type>::getColumn(
|
|
const std::string & attribute_name,
|
|
const DataTypePtr & result_type,
|
|
const Columns & key_columns,
|
|
const DataTypes & key_types [[maybe_unused]],
|
|
const ColumnPtr & default_values_column) const
|
|
{
|
|
if (dictionary_key_type == DictionaryKeyType::Complex)
|
|
dict_struct.validateKeyTypes(key_types);
|
|
|
|
ColumnPtr result;
|
|
|
|
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
|
|
DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
|
|
|
|
const size_t keys_size = extractor.getKeysSize();
|
|
|
|
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
|
const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
|
|
auto & attribute = attributes[attribute_index];
|
|
|
|
return getAttributeColumn(attribute, dictionary_attribute, keys_size, default_values_column, extractor);
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
Columns HashedArrayDictionary<dictionary_key_type>::getColumns(
|
|
const Strings & attribute_names,
|
|
const DataTypes & result_types,
|
|
const Columns & key_columns,
|
|
const DataTypes & key_types,
|
|
const Columns & default_values_columns) const
|
|
{
|
|
if (dictionary_key_type == DictionaryKeyType::Complex)
|
|
dict_struct.validateKeyTypes(key_types);
|
|
|
|
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
|
|
DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
|
|
|
|
const size_t keys_size = extractor.getKeysSize();
|
|
|
|
PaddedPODArray<ssize_t> key_index_to_element_index;
|
|
|
|
/** Optimization for multiple attributes.
|
|
* For each key save element index in key_index_to_element_index array.
|
|
* Later in type_call for attribute use getItemsImpl specialization with key_index_to_element_index array
|
|
* instead of DictionaryKeyExtractor.
|
|
*/
|
|
if (attribute_names.size() > 1)
|
|
{
|
|
const auto & key_attribute_container = key_attribute.container;
|
|
size_t keys_found = 0;
|
|
|
|
key_index_to_element_index.resize(keys_size);
|
|
|
|
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
|
{
|
|
auto key = extractor.extractCurrentKey();
|
|
|
|
auto it = key_attribute_container.find(key);
|
|
if (it == key_attribute_container.end())
|
|
{
|
|
key_index_to_element_index[key_index] = -1;
|
|
}
|
|
else
|
|
{
|
|
key_index_to_element_index[key_index] = it->getMapped();
|
|
++keys_found;
|
|
}
|
|
|
|
extractor.rollbackCurrentKey();
|
|
}
|
|
|
|
query_count.fetch_add(keys_size, std::memory_order_relaxed);
|
|
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
|
}
|
|
|
|
size_t attribute_names_size = attribute_names.size();
|
|
|
|
Columns result_columns;
|
|
result_columns.reserve(attribute_names_size);
|
|
|
|
for (size_t i = 0; i < attribute_names_size; ++i)
|
|
{
|
|
ColumnPtr result_column;
|
|
|
|
const auto & attribute_name = attribute_names[i];
|
|
const auto & result_type = result_types[i];
|
|
const auto & default_values_column = default_values_columns[i];
|
|
|
|
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
|
const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
|
|
auto & attribute = attributes[attribute_index];
|
|
|
|
if (attribute_names_size > 1)
|
|
result_column = getAttributeColumn(attribute, dictionary_attribute, keys_size, default_values_column, key_index_to_element_index);
|
|
else
|
|
result_column = getAttributeColumn(attribute, dictionary_attribute, keys_size, default_values_column, extractor);
|
|
|
|
result_columns.emplace_back(std::move(result_column));
|
|
}
|
|
|
|
return result_columns;
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
|
{
|
|
if (dictionary_key_type == DictionaryKeyType::Complex)
|
|
dict_struct.validateKeyTypes(key_types);
|
|
|
|
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
|
|
DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
|
|
|
|
size_t keys_size = extractor.getKeysSize();
|
|
|
|
auto result = ColumnUInt8::create(keys_size, false);
|
|
auto & out = result->getData();
|
|
|
|
size_t keys_found = 0;
|
|
|
|
for (size_t requested_key_index = 0; requested_key_index < keys_size; ++requested_key_index)
|
|
{
|
|
auto requested_key = extractor.extractCurrentKey();
|
|
|
|
out[requested_key_index] = key_attribute.container.find(requested_key) != key_attribute.container.end();
|
|
|
|
keys_found += out[requested_key_index];
|
|
extractor.rollbackCurrentKey();
|
|
}
|
|
|
|
query_count.fetch_add(keys_size, std::memory_order_relaxed);
|
|
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
|
|
|
return result;
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key_column [[maybe_unused]], const DataTypePtr &) const
|
|
{
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
|
{
|
|
PaddedPODArray<UInt64> keys_backup_storage;
|
|
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
|
|
|
|
size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
|
|
|
|
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
|
|
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
|
|
|
|
std::optional<UInt64> null_value;
|
|
|
|
if (!dictionary_attribute.null_value.isNull())
|
|
null_value = dictionary_attribute.null_value.get<UInt64>();
|
|
|
|
const auto & key_attribute_container = key_attribute.container;
|
|
const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
|
|
|
|
auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); };
|
|
|
|
size_t keys_found = 0;
|
|
|
|
auto get_parent_func = [&](auto & hierarchy_key)
|
|
{
|
|
std::optional<UInt64> result;
|
|
|
|
auto it = key_attribute_container.find(hierarchy_key);
|
|
|
|
if (it == key_attribute_container.end())
|
|
return result;
|
|
|
|
size_t key_index = it->getMapped();
|
|
|
|
if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[key_index])
|
|
return result;
|
|
|
|
UInt64 parent_key = parent_keys_container[key_index];
|
|
if (null_value && *null_value == parent_key)
|
|
return result;
|
|
|
|
result = parent_key;
|
|
keys_found += 1;
|
|
|
|
return result;
|
|
};
|
|
|
|
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, is_key_valid_func, get_parent_func);
|
|
|
|
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
|
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
|
|
|
return dictionary_hierarchy_array;
|
|
}
|
|
else
|
|
{
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
|
|
ColumnPtr key_column [[maybe_unused]],
|
|
ColumnPtr in_key_column [[maybe_unused]],
|
|
const DataTypePtr &) const
|
|
{
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
|
{
|
|
PaddedPODArray<UInt64> keys_backup_storage;
|
|
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
|
|
|
|
PaddedPODArray<UInt64> keys_in_backup_storage;
|
|
const auto & keys_in = getColumnVectorData(this, in_key_column, keys_in_backup_storage);
|
|
|
|
size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
|
|
|
|
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
|
|
auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
|
|
|
|
std::optional<UInt64> null_value;
|
|
|
|
if (!dictionary_attribute.null_value.isNull())
|
|
null_value = dictionary_attribute.null_value.get<UInt64>();
|
|
|
|
const auto & key_attribute_container = key_attribute.container;
|
|
const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
|
|
|
|
auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); };
|
|
|
|
size_t keys_found = 0;
|
|
|
|
auto get_parent_func = [&](auto & hierarchy_key)
|
|
{
|
|
std::optional<UInt64> result;
|
|
|
|
auto it = key_attribute_container.find(hierarchy_key);
|
|
|
|
if (it == key_attribute_container.end())
|
|
return result;
|
|
|
|
size_t key_index = it->getMapped();
|
|
|
|
if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[key_index])
|
|
return result;
|
|
|
|
UInt64 parent_key = parent_keys_container[key_index];
|
|
if (null_value && *null_value == parent_key)
|
|
return result;
|
|
|
|
result = parent_key;
|
|
keys_found += 1;
|
|
|
|
return result;
|
|
};
|
|
|
|
auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_func);
|
|
|
|
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
|
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
|
|
|
return result;
|
|
}
|
|
else
|
|
{
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionary<dictionary_key_type>::getHierarchicalIndex() const
|
|
{
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
|
{
|
|
if (hierarchical_index)
|
|
return hierarchical_index;
|
|
|
|
size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
|
|
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
|
|
const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
|
|
|
|
const auto & key_attribute_container = key_attribute.container;
|
|
|
|
HashMap<size_t, UInt64> index_to_key;
|
|
index_to_key.reserve(key_attribute.container.size());
|
|
|
|
for (auto & [key, value] : key_attribute_container)
|
|
index_to_key[value] = key;
|
|
|
|
HashMap<UInt64, PaddedPODArray<UInt64>> parent_to_child;
|
|
parent_to_child.reserve(index_to_key.size());
|
|
|
|
size_t parent_keys_container_size = parent_keys_container.size();
|
|
for (size_t i = 0; i < parent_keys_container_size; ++i)
|
|
{
|
|
if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[i])
|
|
continue;
|
|
|
|
const auto * it = index_to_key.find(i);
|
|
if (it == index_to_key.end())
|
|
continue;
|
|
|
|
auto child_key = it->getMapped();
|
|
auto parent_key = parent_keys_container[i];
|
|
parent_to_child[parent_key].emplace_back(child_key);
|
|
}
|
|
|
|
return std::make_shared<DictionaryHierarchicalParentToChildIndex>(parent_to_child);
|
|
}
|
|
else
|
|
{
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
ColumnPtr HashedArrayDictionary<dictionary_key_type>::getDescendants(
|
|
ColumnPtr key_column [[maybe_unused]],
|
|
const DataTypePtr &,
|
|
size_t level [[maybe_unused]],
|
|
DictionaryHierarchicalParentToChildIndexPtr parent_to_child_index [[maybe_unused]]) const
|
|
{
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
|
{
|
|
PaddedPODArray<UInt64> keys_backup;
|
|
const auto & keys = getColumnVectorData(this, key_column, keys_backup);
|
|
|
|
size_t keys_found = 0;
|
|
auto result = getKeysDescendantsArray(keys, *parent_to_child_index, level, keys_found);
|
|
|
|
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
|
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
|
|
|
return result;
|
|
}
|
|
else
|
|
{
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
void HashedArrayDictionary<dictionary_key_type>::createAttributes()
|
|
{
|
|
const auto size = dict_struct.attributes.size();
|
|
attributes.reserve(size);
|
|
|
|
for (const auto & dictionary_attribute : dict_struct.attributes)
|
|
{
|
|
auto type_call = [&, this](const auto & dictionary_attribute_type)
|
|
{
|
|
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
|
using AttributeType = typename Type::AttributeType;
|
|
using ValueType = DictionaryValueType<AttributeType>;
|
|
|
|
auto is_index_null = dictionary_attribute.is_nullable ? std::make_optional<std::vector<bool>>() : std::optional<std::vector<bool>>{};
|
|
Attribute attribute{dictionary_attribute.underlying_type, AttributeContainerType<ValueType>(), std::move(is_index_null)};
|
|
attributes.emplace_back(std::move(attribute));
|
|
};
|
|
|
|
callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call);
|
|
}
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
void HashedArrayDictionary<dictionary_key_type>::updateData()
|
|
{
|
|
if (!update_field_loaded_block || update_field_loaded_block->rows() == 0)
|
|
{
|
|
QueryPipeline pipeline(source_ptr->loadUpdatedAll());
|
|
|
|
PullingPipelineExecutor executor(pipeline);
|
|
Block block;
|
|
while (executor.pull(block))
|
|
{
|
|
/// We are using this to keep saved data if input stream consists of multiple blocks
|
|
if (!update_field_loaded_block)
|
|
update_field_loaded_block = std::make_shared<DB::Block>(block.cloneEmpty());
|
|
|
|
for (size_t attribute_index = 0; attribute_index < block.columns(); ++attribute_index)
|
|
{
|
|
const IColumn & update_column = *block.getByPosition(attribute_index).column.get();
|
|
MutableColumnPtr saved_column = update_field_loaded_block->getByPosition(attribute_index).column->assumeMutable();
|
|
saved_column->insertRangeFrom(update_column, 0, update_column.size());
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
auto pipe = source_ptr->loadUpdatedAll();
|
|
mergeBlockWithPipe<dictionary_key_type>(
|
|
dict_struct.getKeysSize(),
|
|
*update_field_loaded_block,
|
|
std::move(pipe));
|
|
}
|
|
|
|
if (update_field_loaded_block)
|
|
{
|
|
resize(update_field_loaded_block->rows());
|
|
blockToAttributes(*update_field_loaded_block.get());
|
|
}
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
void HashedArrayDictionary<dictionary_key_type>::blockToAttributes(const Block & block [[maybe_unused]])
|
|
{
|
|
size_t skip_keys_size_offset = dict_struct.getKeysSize();
|
|
|
|
Columns key_columns;
|
|
key_columns.reserve(skip_keys_size_offset);
|
|
|
|
/// Split into keys columns and attribute columns
|
|
for (size_t i = 0; i < skip_keys_size_offset; ++i)
|
|
key_columns.emplace_back(block.safeGetByPosition(i).column);
|
|
|
|
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
|
|
DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns, arena_holder.getComplexKeyArena());
|
|
const size_t keys_size = keys_extractor.getKeysSize();
|
|
|
|
Field column_value_to_insert;
|
|
|
|
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
|
{
|
|
auto key = keys_extractor.extractCurrentKey();
|
|
|
|
auto it = key_attribute.container.find(key);
|
|
|
|
if (it != key_attribute.container.end())
|
|
{
|
|
keys_extractor.rollbackCurrentKey();
|
|
continue;
|
|
}
|
|
|
|
if constexpr (std::is_same_v<KeyType, StringRef>)
|
|
key = copyStringInArena(string_arena, key);
|
|
|
|
key_attribute.container.insert({key, element_count});
|
|
|
|
for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index)
|
|
{
|
|
const IColumn & attribute_column = *block.safeGetByPosition(skip_keys_size_offset + attribute_index).column;
|
|
auto & attribute = attributes[attribute_index];
|
|
bool attribute_is_nullable = attribute.is_index_null.has_value();
|
|
|
|
attribute_column.get(key_index, column_value_to_insert);
|
|
|
|
auto type_call = [&](const auto & dictionary_attribute_type)
|
|
{
|
|
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
|
using AttributeType = typename Type::AttributeType;
|
|
using AttributeValueType = DictionaryValueType<AttributeType>;
|
|
|
|
auto & attribute_container = std::get<AttributeContainerType<AttributeValueType>>(attribute.container);
|
|
attribute_container.emplace_back();
|
|
|
|
if (attribute_is_nullable)
|
|
{
|
|
attribute.is_index_null->emplace_back();
|
|
|
|
if (column_value_to_insert.isNull())
|
|
{
|
|
(*attribute.is_index_null).back() = true;
|
|
return;
|
|
}
|
|
}
|
|
|
|
if constexpr (std::is_same_v<AttributeValueType, StringRef>)
|
|
{
|
|
String & value_to_insert = column_value_to_insert.get<String>();
|
|
StringRef string_in_arena_reference = copyStringInArena(string_arena, value_to_insert);
|
|
attribute_container.back() = string_in_arena_reference;
|
|
}
|
|
else
|
|
{
|
|
auto value_to_insert = static_cast<AttributeValueType>(column_value_to_insert.get<AttributeValueType>());
|
|
attribute_container.back() = value_to_insert;
|
|
}
|
|
};
|
|
|
|
callOnDictionaryAttributeType(attribute.type, type_call);
|
|
}
|
|
|
|
++element_count;
|
|
keys_extractor.rollbackCurrentKey();
|
|
}
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
void HashedArrayDictionary<dictionary_key_type>::resize(size_t added_rows)
|
|
{
|
|
if (unlikely(!added_rows))
|
|
return;
|
|
|
|
key_attribute.container.reserve(added_rows);
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
template <typename KeysProvider>
|
|
ColumnPtr HashedArrayDictionary<dictionary_key_type>::getAttributeColumn(
|
|
const Attribute & attribute,
|
|
const DictionaryAttribute & dictionary_attribute,
|
|
size_t keys_size,
|
|
ColumnPtr default_values_column,
|
|
KeysProvider && keys_object) const
|
|
{
|
|
ColumnPtr result;
|
|
|
|
bool is_attribute_nullable = attribute.is_index_null.has_value();
|
|
|
|
ColumnUInt8::MutablePtr col_null_map_to;
|
|
ColumnUInt8::Container * vec_null_map_to = nullptr;
|
|
if (attribute.is_index_null)
|
|
{
|
|
col_null_map_to = ColumnUInt8::create(keys_size, false);
|
|
vec_null_map_to = &col_null_map_to->getData();
|
|
}
|
|
|
|
auto type_call = [&](const auto & dictionary_attribute_type)
|
|
{
|
|
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
|
using AttributeType = typename Type::AttributeType;
|
|
using ValueType = DictionaryValueType<AttributeType>;
|
|
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
|
|
|
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(dictionary_attribute.null_value, default_values_column);
|
|
|
|
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
|
|
|
|
if constexpr (std::is_same_v<ValueType, Array>)
|
|
{
|
|
auto * out = column.get();
|
|
|
|
getItemsImpl<ValueType, false>(
|
|
attribute,
|
|
keys_object,
|
|
[&](const size_t, const Array & value, bool) { out->insert(value); },
|
|
default_value_extractor);
|
|
}
|
|
else if constexpr (std::is_same_v<ValueType, StringRef>)
|
|
{
|
|
auto * out = column.get();
|
|
|
|
if (is_attribute_nullable)
|
|
getItemsImpl<ValueType, true>(
|
|
attribute,
|
|
keys_object,
|
|
[&](size_t row, StringRef value, bool is_null)
|
|
{
|
|
(*vec_null_map_to)[row] = is_null;
|
|
out->insertData(value.data, value.size);
|
|
},
|
|
default_value_extractor);
|
|
else
|
|
getItemsImpl<ValueType, false>(
|
|
attribute,
|
|
keys_object,
|
|
[&](size_t, StringRef value, bool) { out->insertData(value.data, value.size); },
|
|
default_value_extractor);
|
|
}
|
|
else
|
|
{
|
|
auto & out = column->getData();
|
|
|
|
if (is_attribute_nullable)
|
|
getItemsImpl<ValueType, true>(
|
|
attribute,
|
|
keys_object,
|
|
[&](size_t row, const auto value, bool is_null)
|
|
{
|
|
(*vec_null_map_to)[row] = is_null;
|
|
out[row] = value;
|
|
},
|
|
default_value_extractor);
|
|
else
|
|
getItemsImpl<ValueType, false>(
|
|
attribute,
|
|
keys_object,
|
|
[&](size_t row, const auto value, bool) { out[row] = value; },
|
|
default_value_extractor);
|
|
}
|
|
|
|
result = std::move(column);
|
|
};
|
|
|
|
callOnDictionaryAttributeType(attribute.type, type_call);
|
|
|
|
if (is_attribute_nullable)
|
|
result = ColumnNullable::create(result, std::move(col_null_map_to));
|
|
|
|
return result;
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
|
|
void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
|
|
const Attribute & attribute,
|
|
DictionaryKeysExtractor<dictionary_key_type> & keys_extractor,
|
|
ValueSetter && set_value [[maybe_unused]],
|
|
DefaultValueExtractor & default_value_extractor) const
|
|
{
|
|
const auto & key_attribute_container = key_attribute.container;
|
|
const auto & attribute_container = std::get<AttributeContainerType<AttributeType>>(attribute.container);
|
|
const size_t keys_size = keys_extractor.getKeysSize();
|
|
|
|
size_t keys_found = 0;
|
|
|
|
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
|
{
|
|
auto key = keys_extractor.extractCurrentKey();
|
|
|
|
const auto it = key_attribute_container.find(key);
|
|
|
|
if (it != key_attribute_container.end())
|
|
{
|
|
size_t element_index = it->getMapped();
|
|
|
|
const auto & element = attribute_container[element_index];
|
|
|
|
if constexpr (is_nullable)
|
|
set_value(key_index, element, (*attribute.is_index_null)[element_index]);
|
|
else
|
|
set_value(key_index, element, false);
|
|
|
|
++keys_found;
|
|
}
|
|
else
|
|
{
|
|
if constexpr (is_nullable)
|
|
set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index));
|
|
else
|
|
set_value(key_index, default_value_extractor[key_index], false);
|
|
}
|
|
|
|
keys_extractor.rollbackCurrentKey();
|
|
}
|
|
|
|
query_count.fetch_add(keys_size, std::memory_order_relaxed);
|
|
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
|
|
void HashedArrayDictionary<dictionary_key_type>::getItemsImpl(
|
|
const Attribute & attribute,
|
|
const PaddedPODArray<ssize_t> & key_index_to_element_index,
|
|
ValueSetter && set_value,
|
|
DefaultValueExtractor & default_value_extractor) const
|
|
{
|
|
const auto & attribute_container = std::get<AttributeContainerType<AttributeType>>(attribute.container);
|
|
const size_t keys_size = key_index_to_element_index.size();
|
|
|
|
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
|
{
|
|
bool key_exists = key_index_to_element_index[key_index] != -1;
|
|
|
|
if (key_exists)
|
|
{
|
|
size_t element_index = static_cast<size_t>(key_index_to_element_index[key_index]);
|
|
const auto & element = attribute_container[element_index];
|
|
|
|
if constexpr (is_nullable)
|
|
set_value(key_index, element, (*attribute.is_index_null)[element_index]);
|
|
else
|
|
set_value(key_index, element, false);
|
|
}
|
|
else
|
|
{
|
|
if constexpr (is_nullable)
|
|
set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index));
|
|
else
|
|
set_value(key_index, default_value_extractor[key_index], false);
|
|
}
|
|
}
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
void HashedArrayDictionary<dictionary_key_type>::loadData()
|
|
{
|
|
if (!source_ptr->hasUpdateField())
|
|
{
|
|
QueryPipeline pipeline;
|
|
pipeline = QueryPipeline(source_ptr->loadAll());
|
|
|
|
PullingPipelineExecutor executor(pipeline);
|
|
Block block;
|
|
while (executor.pull(block))
|
|
{
|
|
resize(block.rows());
|
|
blockToAttributes(block);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
updateData();
|
|
}
|
|
|
|
if (configuration.require_nonempty && 0 == element_count)
|
|
throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY,
|
|
"{}: dictionary source is empty and 'require_nonempty' property is set.",
|
|
getFullName());
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
void HashedArrayDictionary<dictionary_key_type>::buildHierarchyParentToChildIndexIfNeeded()
|
|
{
|
|
if (!dict_struct.hierarchical_attribute_index)
|
|
return;
|
|
|
|
if (dict_struct.attributes[*dict_struct.hierarchical_attribute_index].bidirectional)
|
|
hierarchical_index = getHierarchicalIndex();
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
void HashedArrayDictionary<dictionary_key_type>::calculateBytesAllocated()
|
|
{
|
|
bytes_allocated += attributes.size() * sizeof(attributes.front());
|
|
|
|
bytes_allocated += key_attribute.container.size();
|
|
|
|
for (auto & attribute : attributes)
|
|
{
|
|
auto type_call = [&](const auto & dictionary_attribute_type)
|
|
{
|
|
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
|
using AttributeType = typename Type::AttributeType;
|
|
using ValueType = DictionaryValueType<AttributeType>;
|
|
|
|
const auto & container = std::get<AttributeContainerType<ValueType>>(attribute.container);
|
|
bytes_allocated += sizeof(AttributeContainerType<ValueType>);
|
|
|
|
if constexpr (std::is_same_v<ValueType, Array>)
|
|
{
|
|
/// It is not accurate calculations
|
|
bytes_allocated += sizeof(Array) * container.size();
|
|
}
|
|
else
|
|
{
|
|
bytes_allocated += container.allocated_bytes();
|
|
}
|
|
|
|
bucket_count = container.capacity();
|
|
};
|
|
|
|
callOnDictionaryAttributeType(attribute.type, type_call);
|
|
|
|
if (attribute.is_index_null.has_value())
|
|
bytes_allocated += (*attribute.is_index_null).size();
|
|
}
|
|
|
|
if (update_field_loaded_block)
|
|
bytes_allocated += update_field_loaded_block->allocatedBytes();
|
|
|
|
if (hierarchical_index)
|
|
{
|
|
hierarchical_index_bytes_allocated = hierarchical_index->getSizeInBytes();
|
|
bytes_allocated += hierarchical_index_bytes_allocated;
|
|
}
|
|
|
|
bytes_allocated += string_arena.allocatedBytes();
|
|
}
|
|
|
|
template <DictionaryKeyType dictionary_key_type>
|
|
Pipe HashedArrayDictionary<dictionary_key_type>::read(const Names & column_names, size_t max_block_size, size_t num_streams) const
|
|
{
|
|
PaddedPODArray<HashedArrayDictionary::KeyType> keys;
|
|
keys.reserve(key_attribute.container.size());
|
|
|
|
for (auto & [key, _] : key_attribute.container)
|
|
keys.emplace_back(key);
|
|
|
|
ColumnsWithTypeAndName key_columns;
|
|
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
|
{
|
|
auto keys_column = getColumnFromPODArray(std::move(keys));
|
|
key_columns = {ColumnWithTypeAndName(std::move(keys_column), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
|
|
}
|
|
else
|
|
{
|
|
key_columns = deserializeColumnsWithTypeAndNameFromKeys(dict_struct, keys, 0, keys.size());
|
|
}
|
|
|
|
std::shared_ptr<const IDictionary> dictionary = shared_from_this();
|
|
auto coordinator = std::make_shared<DictionarySourceCoordinator>(dictionary, column_names, std::move(key_columns), max_block_size);
|
|
auto result = coordinator->read(num_streams);
|
|
|
|
return result;
|
|
}
|
|
|
|
template class HashedArrayDictionary<DictionaryKeyType::Simple>;
|
|
template class HashedArrayDictionary<DictionaryKeyType::Complex>;
|
|
|
|
void registerDictionaryArrayHashed(DictionaryFactory & factory)
|
|
{
|
|
auto create_layout = [](const std::string & full_name,
|
|
const DictionaryStructure & dict_struct,
|
|
const Poco::Util::AbstractConfiguration & config,
|
|
const std::string & config_prefix,
|
|
DictionarySourcePtr source_ptr,
|
|
DictionaryKeyType dictionary_key_type) -> DictionaryPtr
|
|
{
|
|
if (dictionary_key_type == DictionaryKeyType::Simple && dict_struct.key)
|
|
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'key' is not supported for simple key hashed array dictionary");
|
|
else if (dictionary_key_type == DictionaryKeyType::Complex && dict_struct.id)
|
|
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'id' is not supported for complex key hashed array dictionary");
|
|
|
|
if (dict_struct.range_min || dict_struct.range_max)
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
"{}: elements .structure.range_min and .structure.range_max should be defined only "
|
|
"for a dictionary of layout 'range_hashed'",
|
|
full_name);
|
|
|
|
const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
|
|
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
|
|
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
|
|
|
|
HashedArrayDictionaryStorageConfiguration configuration{require_nonempty, dict_lifetime};
|
|
|
|
if (dictionary_key_type == DictionaryKeyType::Simple)
|
|
return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Simple>>(dict_id, dict_struct, std::move(source_ptr), configuration);
|
|
else
|
|
return std::make_unique<HashedArrayDictionary<DictionaryKeyType::Complex>>(dict_id, dict_struct, std::move(source_ptr), configuration);
|
|
};
|
|
|
|
using namespace std::placeholders;
|
|
|
|
factory.registerLayout("hashed_array",
|
|
[=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Simple); }, false);
|
|
factory.registerLayout("complex_key_hashed_array",
|
|
[=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Complex); }, true);
|
|
}
|
|
|
|
}
|