mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 16:12:01 +00:00
Hierarchical dictionaries support nullable parent key
This commit is contained in:
parent
0b40e05ffc
commit
20b55a45b2
@ -177,6 +177,9 @@ ColumnUInt8::Ptr FlatDictionary::hasKeys(const Columns & key_columns, const Data
|
||||
|
||||
ColumnPtr FlatDictionary::getHierarchy(ColumnPtr key_column, const DataTypePtr &) const
|
||||
{
|
||||
if (key_column->isNullable())
|
||||
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
|
||||
|
||||
PaddedPODArray<UInt64> keys_backup_storage;
|
||||
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
|
||||
|
||||
@ -184,7 +187,11 @@ ColumnPtr FlatDictionary::getHierarchy(ColumnPtr key_column, const DataTypePtr &
|
||||
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
|
||||
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
|
||||
|
||||
const UInt64 null_value = dictionary_attribute.null_value.get<UInt64>();
|
||||
std::optional<UInt64> null_value;
|
||||
|
||||
if (!dictionary_attribute.null_value.isNull())
|
||||
null_value = dictionary_attribute.null_value.get<UInt64>();
|
||||
|
||||
const ContainerType<UInt64> & parent_keys = std::get<ContainerType<UInt64>>(hierarchical_attribute.container);
|
||||
|
||||
auto is_key_valid_func = [&, this](auto & key) { return key < loaded_keys.size() && loaded_keys[key]; };
|
||||
@ -193,13 +200,26 @@ ColumnPtr FlatDictionary::getHierarchy(ColumnPtr key_column, const DataTypePtr &
|
||||
|
||||
auto get_parent_key_func = [&, this](auto & hierarchy_key)
|
||||
{
|
||||
std::optional<UInt64> result;
|
||||
|
||||
bool is_key_valid = hierarchy_key < loaded_keys.size() && loaded_keys[hierarchy_key];
|
||||
std::optional<UInt64> result = is_key_valid ? std::make_optional(parent_keys[hierarchy_key]) : std::nullopt;
|
||||
keys_found += result.has_value();
|
||||
|
||||
if (!is_key_valid)
|
||||
return result;
|
||||
|
||||
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
|
||||
return result;
|
||||
|
||||
UInt64 parent_key = parent_keys[hierarchy_key];
|
||||
if (null_value && *null_value == parent_key)
|
||||
return result;
|
||||
|
||||
result = parent_key;
|
||||
keys_found += 1;
|
||||
return result;
|
||||
};
|
||||
|
||||
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_key_func);
|
||||
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, is_key_valid_func, get_parent_key_func);
|
||||
|
||||
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
||||
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||
@ -212,9 +232,22 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy(
|
||||
ColumnPtr in_key_column,
|
||||
const DataTypePtr &) const
|
||||
{
|
||||
if (key_column->isNullable())
|
||||
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
|
||||
|
||||
PaddedPODArray<UInt64> keys_backup_storage;
|
||||
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
|
||||
|
||||
const PaddedPODArray<UInt8> * in_key_column_nullable_mask = nullptr;
|
||||
|
||||
if (in_key_column->isNullable())
|
||||
{
|
||||
const auto * in_key_column_typed = assert_cast<const ColumnNullable *>(in_key_column.get());
|
||||
|
||||
in_key_column = in_key_column_typed->getNestedColumnPtr();
|
||||
in_key_column_nullable_mask = &in_key_column_typed->getNullMapColumn().getData();
|
||||
}
|
||||
|
||||
PaddedPODArray<UInt64> keys_in_backup_storage;
|
||||
const auto & keys_in = getColumnVectorData(this, in_key_column, keys_in_backup_storage);
|
||||
|
||||
@ -222,7 +255,11 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy(
|
||||
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
|
||||
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
|
||||
|
||||
const UInt64 null_value = dictionary_attribute.null_value.get<UInt64>();
|
||||
std::optional<UInt64> null_value;
|
||||
|
||||
if (!dictionary_attribute.null_value.isNull())
|
||||
null_value = dictionary_attribute.null_value.get<UInt64>();
|
||||
|
||||
const ContainerType<UInt64> & parent_keys = std::get<ContainerType<UInt64>>(hierarchical_attribute.container);
|
||||
|
||||
auto is_key_valid_func = [&, this](auto & key) { return key < loaded_keys.size() && loaded_keys[key]; };
|
||||
@ -231,13 +268,37 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy(
|
||||
|
||||
auto get_parent_key_func = [&, this](auto & hierarchy_key)
|
||||
{
|
||||
std::optional<UInt64> result;
|
||||
|
||||
bool is_key_valid = hierarchy_key < loaded_keys.size() && loaded_keys[hierarchy_key];
|
||||
std::optional<UInt64> result = is_key_valid ? std::make_optional(parent_keys[hierarchy_key]) : std::nullopt;
|
||||
keys_found += result.has_value();
|
||||
|
||||
if (!is_key_valid)
|
||||
return result;
|
||||
|
||||
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
|
||||
return result;
|
||||
|
||||
UInt64 parent_key = parent_keys[hierarchy_key];
|
||||
if (null_value && *null_value == parent_key)
|
||||
return result;
|
||||
|
||||
result = parent_keys[hierarchy_key];
|
||||
keys_found += 1;
|
||||
return result;
|
||||
};
|
||||
|
||||
auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_key_func);
|
||||
auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_key_func);
|
||||
|
||||
if (unlikely(in_key_column_nullable_mask))
|
||||
{
|
||||
auto mutable_result_ptr = result->assumeMutable();
|
||||
auto & mutable_result = assert_cast<ColumnUInt8 &>(*mutable_result_ptr);
|
||||
auto & mutable_result_data = mutable_result.getData();
|
||||
size_t mutable_result_data_size = mutable_result_data.size();
|
||||
|
||||
for (size_t i = 0; i < mutable_result_data_size; ++i)
|
||||
mutable_result_data[i] &= !(static_cast<bool>((*in_key_column_nullable_mask)[i]));
|
||||
}
|
||||
|
||||
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
||||
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||
|
@ -183,6 +183,9 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key
|
||||
{
|
||||
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
||||
{
|
||||
if (key_column->isNullable())
|
||||
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
|
||||
|
||||
PaddedPODArray<UInt64> keys_backup_storage;
|
||||
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
|
||||
|
||||
@ -191,9 +194,12 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key
|
||||
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
|
||||
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
|
||||
|
||||
const auto & key_attribute_container = key_attribute.container;
|
||||
std::optional<UInt64> null_value;
|
||||
|
||||
const UInt64 null_value = dictionary_attribute.null_value.template get<UInt64>();
|
||||
if (!dictionary_attribute.null_value.isNull())
|
||||
null_value = dictionary_attribute.null_value.get<UInt64>();
|
||||
|
||||
const auto & key_attribute_container = key_attribute.container;
|
||||
const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
|
||||
|
||||
auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); };
|
||||
@ -206,15 +212,25 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key
|
||||
|
||||
auto it = key_attribute_container.find(hierarchy_key);
|
||||
|
||||
if (it != key_attribute_container.end())
|
||||
result = parent_keys_container[it->getMapped()];
|
||||
if (it == key_attribute_container.end())
|
||||
return result;
|
||||
|
||||
keys_found += result.has_value();
|
||||
size_t key_index = it->getMapped();
|
||||
|
||||
if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[key_index])
|
||||
return result;
|
||||
|
||||
UInt64 parent_key = parent_keys_container[key_index];
|
||||
if (null_value && *null_value == parent_key)
|
||||
return result;
|
||||
|
||||
result = parent_key;
|
||||
keys_found += 1;
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_func);
|
||||
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, is_key_valid_func, get_parent_func);
|
||||
|
||||
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
||||
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||
@ -235,9 +251,22 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
|
||||
{
|
||||
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
||||
{
|
||||
if (key_column->isNullable())
|
||||
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
|
||||
|
||||
PaddedPODArray<UInt64> keys_backup_storage;
|
||||
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
|
||||
|
||||
const PaddedPODArray<UInt8> * in_key_column_nullable_mask = nullptr;
|
||||
|
||||
if (in_key_column->isNullable())
|
||||
{
|
||||
const auto * in_key_column_typed = assert_cast<const ColumnNullable *>(in_key_column.get());
|
||||
|
||||
in_key_column = in_key_column_typed->getNestedColumnPtr();
|
||||
in_key_column_nullable_mask = &in_key_column_typed->getNullMapColumn().getData();
|
||||
}
|
||||
|
||||
PaddedPODArray<UInt64> keys_in_backup_storage;
|
||||
const auto & keys_in = getColumnVectorData(this, in_key_column, keys_in_backup_storage);
|
||||
|
||||
@ -246,9 +275,12 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
|
||||
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
|
||||
auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
|
||||
|
||||
const auto & key_attribute_container = key_attribute.container;
|
||||
std::optional<UInt64> null_value;
|
||||
|
||||
const UInt64 null_value = dictionary_attribute.null_value.template get<UInt64>();
|
||||
if (!dictionary_attribute.null_value.isNull())
|
||||
null_value = dictionary_attribute.null_value.get<UInt64>();
|
||||
|
||||
const auto & key_attribute_container = key_attribute.container;
|
||||
const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
|
||||
|
||||
auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); };
|
||||
@ -261,15 +293,36 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
|
||||
|
||||
auto it = key_attribute_container.find(hierarchy_key);
|
||||
|
||||
if (it != key_attribute_container.end())
|
||||
result = parent_keys_container[it->getMapped()];
|
||||
if (it == key_attribute_container.end())
|
||||
return result;
|
||||
|
||||
keys_found += result.has_value();
|
||||
size_t key_index = it->getMapped();
|
||||
|
||||
if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[key_index])
|
||||
return result;
|
||||
|
||||
UInt64 parent_key = parent_keys_container[key_index];
|
||||
if (null_value && *null_value == parent_key)
|
||||
return result;
|
||||
|
||||
result = parent_key;
|
||||
keys_found += 1;
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_func);
|
||||
auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_func);
|
||||
|
||||
if (unlikely(in_key_column_nullable_mask))
|
||||
{
|
||||
auto mutable_result_ptr = result->assumeMutable();
|
||||
auto & mutable_result = assert_cast<ColumnUInt8 &>(*mutable_result_ptr);
|
||||
auto & mutable_result_data = mutable_result.getData();
|
||||
size_t mutable_result_data_size = mutable_result_data.size();
|
||||
|
||||
for (size_t i = 0; i < mutable_result_data_size; ++i)
|
||||
mutable_result_data[i] &= !(static_cast<bool>((*in_key_column_nullable_mask)[i]));
|
||||
}
|
||||
|
||||
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
||||
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||
|
@ -227,6 +227,9 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
|
||||
{
|
||||
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
||||
{
|
||||
if (key_column->isNullable())
|
||||
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
|
||||
|
||||
PaddedPODArray<UInt64> keys_backup_storage;
|
||||
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
|
||||
|
||||
@ -235,10 +238,20 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
|
||||
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
|
||||
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
|
||||
|
||||
const UInt64 null_value = dictionary_attribute.null_value.template get<UInt64>();
|
||||
std::optional<UInt64> null_value;
|
||||
|
||||
if (!dictionary_attribute.null_value.isNull())
|
||||
null_value = dictionary_attribute.null_value.get<UInt64>();
|
||||
|
||||
const CollectionType<UInt64> & parent_keys_map = std::get<CollectionType<UInt64>>(hierarchical_attribute.container);
|
||||
|
||||
auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); };
|
||||
auto is_key_valid_func = [&](auto & hierarchy_key)
|
||||
{
|
||||
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
|
||||
return true;
|
||||
|
||||
return parent_keys_map.find(hierarchy_key) != parent_keys_map.end();
|
||||
};
|
||||
|
||||
size_t keys_found = 0;
|
||||
|
||||
@ -248,15 +261,23 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
|
||||
|
||||
auto it = parent_keys_map.find(hierarchy_key);
|
||||
|
||||
if (it != parent_keys_map.end())
|
||||
result = getValueFromCell(it);
|
||||
if (it == parent_keys_map.end())
|
||||
return result;
|
||||
|
||||
keys_found += result.has_value();
|
||||
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
|
||||
return result;
|
||||
|
||||
UInt64 parent_key = getValueFromCell(it);
|
||||
if (null_value && *null_value == parent_key)
|
||||
return result;
|
||||
|
||||
result = parent_key;
|
||||
keys_found += 1;
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_func);
|
||||
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, is_key_valid_func, get_parent_func);
|
||||
|
||||
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
||||
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||
@ -264,7 +285,9 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
|
||||
return dictionary_hierarchy_array;
|
||||
}
|
||||
else
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type, bool sparse>
|
||||
@ -275,6 +298,19 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::isInHierarchy(
|
||||
{
|
||||
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
|
||||
{
|
||||
if (key_column->isNullable())
|
||||
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
|
||||
|
||||
const PaddedPODArray<UInt8> * in_key_column_nullable_mask = nullptr;
|
||||
|
||||
if (in_key_column->isNullable())
|
||||
{
|
||||
const auto * in_key_column_typed = assert_cast<const ColumnNullable *>(in_key_column.get());
|
||||
|
||||
in_key_column = in_key_column_typed->getNestedColumnPtr();
|
||||
in_key_column_nullable_mask = &in_key_column_typed->getNullMapColumn().getData();
|
||||
}
|
||||
|
||||
PaddedPODArray<UInt64> keys_backup_storage;
|
||||
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
|
||||
|
||||
@ -286,28 +322,57 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::isInHierarchy(
|
||||
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
|
||||
auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
|
||||
|
||||
const UInt64 null_value = dictionary_attribute.null_value.template get<UInt64>();
|
||||
std::optional<UInt64> null_value;
|
||||
|
||||
if (!dictionary_attribute.null_value.isNull())
|
||||
null_value = dictionary_attribute.null_value.get<UInt64>();
|
||||
|
||||
const CollectionType<UInt64> & parent_keys_map = std::get<CollectionType<UInt64>>(hierarchical_attribute.container);
|
||||
|
||||
auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); };
|
||||
auto is_key_valid_func = [&](auto & hierarchy_key)
|
||||
{
|
||||
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
|
||||
return true;
|
||||
|
||||
return parent_keys_map.find(hierarchy_key) != parent_keys_map.end();
|
||||
};
|
||||
|
||||
size_t keys_found = 0;
|
||||
|
||||
auto get_parent_func = [&](auto & hierarchy_key)
|
||||
auto get_parent_key_func = [&](auto & hierarchy_key)
|
||||
{
|
||||
std::optional<UInt64> result;
|
||||
|
||||
auto it = parent_keys_map.find(hierarchy_key);
|
||||
|
||||
if (it != parent_keys_map.end())
|
||||
result = getValueFromCell(it);
|
||||
if (it == parent_keys_map.end())
|
||||
return result;
|
||||
|
||||
keys_found += result.has_value();
|
||||
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
|
||||
return result;
|
||||
|
||||
UInt64 parent_key = getValueFromCell(it);
|
||||
if (null_value && *null_value == parent_key)
|
||||
return result;
|
||||
|
||||
result = parent_key;
|
||||
keys_found += 1;
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_func);
|
||||
auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_key_func);
|
||||
|
||||
if (unlikely(in_key_column_nullable_mask))
|
||||
{
|
||||
auto mutable_result_ptr = result->assumeMutable();
|
||||
auto & mutable_result = assert_cast<ColumnUInt8 &>(*mutable_result_ptr);
|
||||
auto & mutable_result_data = mutable_result.getData();
|
||||
size_t mutable_result_data_size = mutable_result_data.size();
|
||||
|
||||
for (size_t i = 0; i < mutable_result_data_size; ++i)
|
||||
mutable_result_data[i] &= !(static_cast<bool>((*in_key_column_nullable_mask)[i]));
|
||||
}
|
||||
|
||||
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
||||
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||
|
@ -142,18 +142,25 @@ ColumnPtr getKeysHierarchyDefaultImplementation(
|
||||
|
||||
auto is_key_valid_func = [&](auto & key) { return key_to_parent_key.find(key) != nullptr; };
|
||||
|
||||
UInt64 null_value = hierarchical_attribute.null_value.get<UInt64>();
|
||||
auto get_parent_key_func = [&](auto & key)
|
||||
{
|
||||
std::optional<UInt64> result;
|
||||
auto it = key_to_parent_key.find(key);
|
||||
std::optional<UInt64> result = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
|
||||
valid_keys += result.has_value();
|
||||
if (it == nullptr) {
|
||||
return result;
|
||||
}
|
||||
|
||||
UInt64 parent_key = it->getMapped();
|
||||
if (parent_key == null_value)
|
||||
return result;
|
||||
|
||||
result = parent_key;
|
||||
valid_keys += 1;
|
||||
return result;
|
||||
};
|
||||
|
||||
UInt64 null_value = hierarchical_attribute.null_value.get<UInt64>();
|
||||
|
||||
auto dictionary_hierarchy_array = getKeysHierarchyArray(requested_keys, null_value, is_key_valid_func, get_parent_key_func);
|
||||
return dictionary_hierarchy_array;
|
||||
return getKeysHierarchyArray(requested_keys, is_key_valid_func, get_parent_key_func);
|
||||
}
|
||||
|
||||
ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation(
|
||||
@ -185,19 +192,26 @@ ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation(
|
||||
|
||||
auto is_key_valid_func = [&](auto & key) { return key_to_parent_key.find(key) != nullptr; };
|
||||
|
||||
UInt64 null_value = hierarchical_attribute.null_value.get<UInt64>();
|
||||
auto get_parent_key_func = [&](auto & key)
|
||||
{
|
||||
std::optional<UInt64> result;
|
||||
auto it = key_to_parent_key.find(key);
|
||||
std::optional<UInt64> result = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
|
||||
valid_keys += result.has_value();
|
||||
if (it == nullptr) {
|
||||
return result;
|
||||
}
|
||||
|
||||
UInt64 parent_key = it->getMapped();
|
||||
if (parent_key == null_value)
|
||||
return result;
|
||||
|
||||
result = parent_key;
|
||||
valid_keys += 1;
|
||||
return result;
|
||||
};
|
||||
|
||||
UInt64 null_value = hierarchical_attribute.null_value.get<UInt64>();
|
||||
const auto & in_keys = in_key_column_typed->getData();
|
||||
|
||||
auto result = getKeysIsInHierarchyColumn(requested_keys, in_keys, null_value, is_key_valid_func, get_parent_key_func);
|
||||
return result;
|
||||
return getKeysIsInHierarchyColumn(requested_keys, in_keys, is_key_valid_func, get_parent_key_func);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ public:
|
||||
keys.reserve(parent_to_children_map_size);
|
||||
parent_to_children_keys_range.reserve(parent_to_children_map_size);
|
||||
|
||||
for (auto & [parent, children] : parent_to_children_map_)
|
||||
for (const auto & [parent, children] : parent_to_children_map_)
|
||||
{
|
||||
size_t keys_size = keys.size();
|
||||
UInt32 start_index = static_cast<UInt32>(keys_size);
|
||||
@ -97,7 +97,6 @@ namespace detail
|
||||
template <typename IsKeyValidFunc, typename GetParentKeyFunc>
|
||||
ElementsAndOffsets getHierarchy(
|
||||
const PaddedPODArray<UInt64> & keys,
|
||||
const UInt64 & hierarchy_null_value,
|
||||
IsKeyValidFunc && is_key_valid_func,
|
||||
GetParentKeyFunc && get_parent_key_func)
|
||||
{
|
||||
@ -156,7 +155,7 @@ namespace detail
|
||||
break;
|
||||
}
|
||||
|
||||
if (hierarchy_key == hierarchy_null_value || current_hierarchy_depth >= DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH)
|
||||
if (current_hierarchy_depth >= DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH)
|
||||
break;
|
||||
|
||||
already_processes_keys_to_offset[hierarchy_key] = {offsets.size(), current_hierarchy_depth};
|
||||
@ -190,7 +189,6 @@ namespace detail
|
||||
PaddedPODArray<UInt8> getIsInHierarchy(
|
||||
const PaddedPODArray<UInt64> & keys,
|
||||
const PaddedPODArray<UInt64> & in_keys,
|
||||
const UInt64 & hierarchy_null_value,
|
||||
IsKeyValidFunc && is_key_valid_func,
|
||||
GetParentKeyFunc && get_parent_func)
|
||||
{
|
||||
@ -201,7 +199,6 @@ namespace detail
|
||||
|
||||
detail::ElementsAndOffsets hierarchy = detail::getHierarchy(
|
||||
keys,
|
||||
hierarchy_null_value,
|
||||
std::forward<IsKeyValidFunc>(is_key_valid_func),
|
||||
std::forward<GetParentKeyFunc>(get_parent_func));
|
||||
|
||||
@ -213,7 +210,7 @@ namespace detail
|
||||
size_t i_elements_start = i > 0 ? offsets[i - 1] : 0;
|
||||
size_t i_elements_end = offsets[i];
|
||||
|
||||
auto & key_to_find = in_keys[i];
|
||||
const auto & key_to_find = in_keys[i];
|
||||
|
||||
const auto * begin = elements.begin() + i_elements_start;
|
||||
const auto * end = elements.begin() + i_elements_end;
|
||||
@ -263,8 +260,8 @@ namespace detail
|
||||
Strategy strategy,
|
||||
size_t & valid_keys)
|
||||
{
|
||||
auto & parent_to_children_keys_range = parent_to_child_index.parent_to_children_keys_range;
|
||||
auto & children_keys = parent_to_child_index.keys;
|
||||
const auto & parent_to_children_keys_range = parent_to_child_index.parent_to_children_keys_range;
|
||||
const auto & children_keys = parent_to_child_index.keys;
|
||||
|
||||
/// If strategy is GetAllDescendantsStrategy we try to cache and later reuse previously calculated descendants.
|
||||
/// If strategy is GetDescendantsAtSpecificLevelStrategy we does not use cache strategy.
|
||||
@ -436,13 +433,11 @@ namespace detail
|
||||
template <typename KeyType, typename IsKeyValidFunc, typename GetParentKeyFunc>
|
||||
ColumnPtr getKeysHierarchyArray(
|
||||
const PaddedPODArray<KeyType> & keys,
|
||||
const KeyType & hierarchy_null_value,
|
||||
IsKeyValidFunc && is_key_valid_func,
|
||||
GetParentKeyFunc && get_parent_func)
|
||||
{
|
||||
auto elements_and_offsets = detail::getHierarchy(
|
||||
keys,
|
||||
hierarchy_null_value,
|
||||
std::forward<IsKeyValidFunc>(is_key_valid_func),
|
||||
std::forward<GetParentKeyFunc>(get_parent_func));
|
||||
|
||||
@ -454,14 +449,12 @@ template <typename KeyType, typename IsKeyValidFunc, typename GetParentKeyFunc>
|
||||
ColumnUInt8::Ptr getKeysIsInHierarchyColumn(
|
||||
const PaddedPODArray<KeyType> & hierarchy_keys,
|
||||
const PaddedPODArray<KeyType> & hierarchy_in_keys,
|
||||
const KeyType & hierarchy_null_value,
|
||||
IsKeyValidFunc && is_key_valid_func,
|
||||
GetParentKeyFunc && get_parent_func)
|
||||
{
|
||||
auto is_in_hierarchy_data = detail::getIsInHierarchy(
|
||||
hierarchy_keys,
|
||||
hierarchy_in_keys,
|
||||
hierarchy_null_value,
|
||||
std::forward<IsKeyValidFunc>(is_key_valid_func),
|
||||
std::forward<GetParentKeyFunc>(get_parent_func));
|
||||
|
||||
|
@ -17,19 +17,26 @@ TEST(HierarchyDictionariesUtils, getHierarchy)
|
||||
|
||||
auto is_key_valid_func = [&](auto key) { return child_to_parent.find(key) != nullptr; };
|
||||
|
||||
UInt64 hierarchy_null_value_key = 0;
|
||||
auto get_parent_key_func = [&](auto key)
|
||||
{
|
||||
std::optional<UInt64> result;
|
||||
auto it = child_to_parent.find(key);
|
||||
std::optional<UInt64> value = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
|
||||
return value;
|
||||
if (it == nullptr)
|
||||
return result;
|
||||
|
||||
UInt64 parent_key = it->getMapped();
|
||||
if (parent_key == hierarchy_null_value_key)
|
||||
return result;
|
||||
|
||||
result = parent_key;
|
||||
return result;
|
||||
};
|
||||
|
||||
UInt64 hierarchy_null_value_key = 0;
|
||||
PaddedPODArray<UInt64> keys = {1, 2, 3, 4, 5};
|
||||
|
||||
auto result = DB::detail::getHierarchy(
|
||||
keys,
|
||||
hierarchy_null_value_key,
|
||||
is_key_valid_func,
|
||||
get_parent_key_func);
|
||||
|
||||
@ -49,19 +56,26 @@ TEST(HierarchyDictionariesUtils, getHierarchy)
|
||||
|
||||
auto is_key_valid_func = [&](auto key) { return child_to_parent.find(key) != nullptr; };
|
||||
|
||||
UInt64 hierarchy_null_value_key = 0;
|
||||
auto get_parent_key_func = [&](auto key)
|
||||
{
|
||||
std::optional<UInt64> result;
|
||||
auto it = child_to_parent.find(key);
|
||||
std::optional<UInt64> value = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
|
||||
return value;
|
||||
if (it == nullptr)
|
||||
return result;
|
||||
|
||||
UInt64 parent_key = it->getMapped();
|
||||
if (parent_key == hierarchy_null_value_key)
|
||||
return result;
|
||||
|
||||
result = parent_key;
|
||||
return result;
|
||||
};
|
||||
|
||||
UInt64 hierarchy_null_value_key = 0;
|
||||
PaddedPODArray<UInt64> keys = {1, 2, 3};
|
||||
|
||||
auto result = DB::detail::getHierarchy(
|
||||
keys,
|
||||
hierarchy_null_value_key,
|
||||
is_key_valid_func,
|
||||
get_parent_key_func);
|
||||
|
||||
@ -87,21 +101,28 @@ TEST(HierarchyDictionariesUtils, getIsInHierarchy)
|
||||
|
||||
auto is_key_valid_func = [&](auto key) { return child_to_parent.find(key) != nullptr; };
|
||||
|
||||
UInt64 hierarchy_null_value_key = 0;
|
||||
auto get_parent_key_func = [&](auto key)
|
||||
{
|
||||
std::optional<UInt64> result;
|
||||
auto it = child_to_parent.find(key);
|
||||
std::optional<UInt64> value = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
|
||||
return value;
|
||||
if (it == nullptr)
|
||||
return result;
|
||||
|
||||
UInt64 parent_key = it->getMapped();
|
||||
if (parent_key == hierarchy_null_value_key)
|
||||
return result;
|
||||
|
||||
result = parent_key;
|
||||
return result;
|
||||
};
|
||||
|
||||
UInt64 hierarchy_null_value_key = 0;
|
||||
PaddedPODArray<UInt64> keys = {1, 2, 3, 4, 5};
|
||||
PaddedPODArray<UInt64> keys_in = {1, 1, 1, 2, 5};
|
||||
|
||||
PaddedPODArray<UInt8> actual = DB::detail::getIsInHierarchy(
|
||||
keys,
|
||||
keys_in,
|
||||
hierarchy_null_value_key,
|
||||
is_key_valid_func,
|
||||
get_parent_key_func);
|
||||
|
||||
@ -119,21 +140,28 @@ TEST(HierarchyDictionariesUtils, getIsInHierarchy)
|
||||
return child_to_parent.find(key) != nullptr;
|
||||
};
|
||||
|
||||
UInt64 hierarchy_null_value_key = 0;
|
||||
auto get_parent_key_func = [&](auto key)
|
||||
{
|
||||
std::optional<UInt64> result;
|
||||
auto it = child_to_parent.find(key);
|
||||
std::optional<UInt64> value = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
|
||||
return value;
|
||||
if (it == nullptr)
|
||||
return result;
|
||||
|
||||
UInt64 parent_key = it->getMapped();
|
||||
if (parent_key == hierarchy_null_value_key)
|
||||
return result;
|
||||
|
||||
result = parent_key;
|
||||
return result;
|
||||
};
|
||||
|
||||
UInt64 hierarchy_null_value_key = 0;
|
||||
PaddedPODArray<UInt64> keys = {1, 2, 3};
|
||||
PaddedPODArray<UInt64> keys_in = {1, 2, 3};
|
||||
|
||||
PaddedPODArray<UInt8> actual = DB::detail::getIsInHierarchy(
|
||||
keys,
|
||||
keys_in,
|
||||
hierarchy_null_value_key,
|
||||
is_key_valid_func,
|
||||
get_parent_key_func);
|
||||
|
||||
|
@ -973,7 +973,7 @@ private:
|
||||
auto dictionary = helper.getDictionary(arguments[0].column);
|
||||
const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary);
|
||||
|
||||
return std::make_shared<DataTypeArray>(hierarchical_attribute.type);
|
||||
return std::make_shared<DataTypeArray>(removeNullable(hierarchical_attribute.type));
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
|
@ -0,0 +1,45 @@
|
||||
Flat dictionary
|
||||
Get hierarchy
|
||||
[0]
|
||||
[1,0]
|
||||
[2,1,0]
|
||||
[3]
|
||||
[4,2,1,0]
|
||||
[]
|
||||
Get is in hierarchy
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
Hashed dictionary
|
||||
Get hierarchy
|
||||
[0]
|
||||
[1,0]
|
||||
[2,1,0]
|
||||
[3]
|
||||
[4,2,1,0]
|
||||
[]
|
||||
Get is in hierarchy
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
HashedArray dictionary
|
||||
Get hierarchy
|
||||
[0]
|
||||
[1,0]
|
||||
[2,1,0]
|
||||
[3]
|
||||
[4,2,1,0]
|
||||
[]
|
||||
Get is in hierarchy
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
@ -0,0 +1,67 @@
|
||||
DROP TABLE IF EXISTS test_hierarhical_table;
|
||||
CREATE TABLE test_hierarhical_table
|
||||
(
|
||||
id UInt64,
|
||||
parent_id Nullable(UInt64)
|
||||
) ENGINE=TinyLog;
|
||||
|
||||
INSERT INTO test_hierarhical_table VALUES (0, NULL), (1, 0), (2, 1), (3, NULL), (4, 2);
|
||||
|
||||
DROP DICTIONARY IF EXISTS hierachical_flat_dictionary;
|
||||
CREATE DICTIONARY hierachical_flat_dictionary
|
||||
(
|
||||
id UInt64,
|
||||
parent_id Nullable(UInt64) HIERARCHICAL
|
||||
) PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table'))
|
||||
LAYOUT(FLAT())
|
||||
LIFETIME(0);
|
||||
|
||||
SELECT 'Flat dictionary';
|
||||
|
||||
SELECT 'Get hierarchy';
|
||||
SELECT dictGetHierarchy('hierachical_flat_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT 'Get is in hierarchy';
|
||||
SELECT dictIsIn('hierachical_flat_dictionary', number, number) FROM system.numbers LIMIT 6;
|
||||
|
||||
DROP DICTIONARY hierachical_flat_dictionary;
|
||||
|
||||
DROP DICTIONARY IF EXISTS hierachical_hashed_dictionary;
|
||||
CREATE DICTIONARY hierachical_hashed_dictionary
|
||||
(
|
||||
id UInt64,
|
||||
parent_id Nullable(UInt64) HIERARCHICAL
|
||||
) PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table'))
|
||||
LAYOUT(HASHED())
|
||||
LIFETIME(0);
|
||||
|
||||
SELECT 'Hashed dictionary';
|
||||
|
||||
SELECT 'Get hierarchy';
|
||||
SELECT dictGetHierarchy('hierachical_hashed_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT 'Get is in hierarchy';
|
||||
SELECT dictIsIn('hierachical_hashed_dictionary', number, number) FROM system.numbers LIMIT 6;
|
||||
|
||||
DROP DICTIONARY hierachical_hashed_dictionary;
|
||||
|
||||
DROP DICTIONARY IF EXISTS hierachical_hashed_array_dictionary;
|
||||
CREATE DICTIONARY hierachical_hashed_array_dictionary
|
||||
(
|
||||
id UInt64,
|
||||
parent_id Nullable(UInt64) HIERARCHICAL
|
||||
) PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table'))
|
||||
LAYOUT(HASHED_ARRAY())
|
||||
LIFETIME(0);
|
||||
|
||||
SELECT 'HashedArray dictionary';
|
||||
|
||||
SELECT 'Get hierarchy';
|
||||
SELECT dictGetHierarchy('hierachical_hashed_array_dictionary', number) FROM system.numbers LIMIT 6;
|
||||
SELECT 'Get is in hierarchy';
|
||||
SELECT dictIsIn('hierachical_hashed_array_dictionary', number, number) FROM system.numbers LIMIT 6;
|
||||
|
||||
DROP DICTIONARY hierachical_hashed_array_dictionary;
|
||||
|
||||
DROP TABLE test_hierarhical_table;
|
Loading…
Reference in New Issue
Block a user