Hierarchical dictionaries support nullable parent key

This commit is contained in:
Maksim Kita 2022-06-02 19:24:23 +02:00
parent 0b40e05ffc
commit 20b55a45b2
9 changed files with 400 additions and 74 deletions

View File

@ -177,6 +177,9 @@ ColumnUInt8::Ptr FlatDictionary::hasKeys(const Columns & key_columns, const Data
ColumnPtr FlatDictionary::getHierarchy(ColumnPtr key_column, const DataTypePtr &) const
{
if (key_column->isNullable())
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
PaddedPODArray<UInt64> keys_backup_storage;
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
@ -184,7 +187,11 @@ ColumnPtr FlatDictionary::getHierarchy(ColumnPtr key_column, const DataTypePtr &
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const UInt64 null_value = dictionary_attribute.null_value.get<UInt64>();
std::optional<UInt64> null_value;
if (!dictionary_attribute.null_value.isNull())
null_value = dictionary_attribute.null_value.get<UInt64>();
const ContainerType<UInt64> & parent_keys = std::get<ContainerType<UInt64>>(hierarchical_attribute.container);
auto is_key_valid_func = [&, this](auto & key) { return key < loaded_keys.size() && loaded_keys[key]; };
@ -193,13 +200,26 @@ ColumnPtr FlatDictionary::getHierarchy(ColumnPtr key_column, const DataTypePtr &
auto get_parent_key_func = [&, this](auto & hierarchy_key)
{
std::optional<UInt64> result;
bool is_key_valid = hierarchy_key < loaded_keys.size() && loaded_keys[hierarchy_key];
std::optional<UInt64> result = is_key_valid ? std::make_optional(parent_keys[hierarchy_key]) : std::nullopt;
keys_found += result.has_value();
if (!is_key_valid)
return result;
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
return result;
UInt64 parent_key = parent_keys[hierarchy_key];
if (null_value && *null_value == parent_key)
return result;
result = parent_key;
keys_found += 1;
return result;
};
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_key_func);
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, is_key_valid_func, get_parent_key_func);
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
@ -212,9 +232,22 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy(
ColumnPtr in_key_column,
const DataTypePtr &) const
{
if (key_column->isNullable())
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
PaddedPODArray<UInt64> keys_backup_storage;
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
const PaddedPODArray<UInt8> * in_key_column_nullable_mask = nullptr;
if (in_key_column->isNullable())
{
const auto * in_key_column_typed = assert_cast<const ColumnNullable *>(in_key_column.get());
in_key_column = in_key_column_typed->getNestedColumnPtr();
in_key_column_nullable_mask = &in_key_column_typed->getNullMapColumn().getData();
}
PaddedPODArray<UInt64> keys_in_backup_storage;
const auto & keys_in = getColumnVectorData(this, in_key_column, keys_in_backup_storage);
@ -222,7 +255,11 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy(
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const UInt64 null_value = dictionary_attribute.null_value.get<UInt64>();
std::optional<UInt64> null_value;
if (!dictionary_attribute.null_value.isNull())
null_value = dictionary_attribute.null_value.get<UInt64>();
const ContainerType<UInt64> & parent_keys = std::get<ContainerType<UInt64>>(hierarchical_attribute.container);
auto is_key_valid_func = [&, this](auto & key) { return key < loaded_keys.size() && loaded_keys[key]; };
@ -231,13 +268,37 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy(
auto get_parent_key_func = [&, this](auto & hierarchy_key)
{
std::optional<UInt64> result;
bool is_key_valid = hierarchy_key < loaded_keys.size() && loaded_keys[hierarchy_key];
std::optional<UInt64> result = is_key_valid ? std::make_optional(parent_keys[hierarchy_key]) : std::nullopt;
keys_found += result.has_value();
if (!is_key_valid)
return result;
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
return result;
UInt64 parent_key = parent_keys[hierarchy_key];
if (null_value && *null_value == parent_key)
return result;
result = parent_keys[hierarchy_key];
keys_found += 1;
return result;
};
auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_key_func);
auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_key_func);
if (unlikely(in_key_column_nullable_mask))
{
auto mutable_result_ptr = result->assumeMutable();
auto & mutable_result = assert_cast<ColumnUInt8 &>(*mutable_result_ptr);
auto & mutable_result_data = mutable_result.getData();
size_t mutable_result_data_size = mutable_result_data.size();
for (size_t i = 0; i < mutable_result_data_size; ++i)
mutable_result_data[i] &= !(static_cast<bool>((*in_key_column_nullable_mask)[i]));
}
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);

View File

@ -183,6 +183,9 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key
{
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
{
if (key_column->isNullable())
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
PaddedPODArray<UInt64> keys_backup_storage;
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
@ -191,9 +194,12 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const auto & key_attribute_container = key_attribute.container;
std::optional<UInt64> null_value;
const UInt64 null_value = dictionary_attribute.null_value.template get<UInt64>();
if (!dictionary_attribute.null_value.isNull())
null_value = dictionary_attribute.null_value.get<UInt64>();
const auto & key_attribute_container = key_attribute.container;
const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); };
@ -206,15 +212,25 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key
auto it = key_attribute_container.find(hierarchy_key);
if (it != key_attribute_container.end())
result = parent_keys_container[it->getMapped()];
if (it == key_attribute_container.end())
return result;
keys_found += result.has_value();
size_t key_index = it->getMapped();
if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[key_index])
return result;
UInt64 parent_key = parent_keys_container[key_index];
if (null_value && *null_value == parent_key)
return result;
result = parent_key;
keys_found += 1;
return result;
};
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_func);
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, is_key_valid_func, get_parent_func);
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
@ -235,9 +251,22 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
{
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
{
if (key_column->isNullable())
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
PaddedPODArray<UInt64> keys_backup_storage;
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
const PaddedPODArray<UInt8> * in_key_column_nullable_mask = nullptr;
if (in_key_column->isNullable())
{
const auto * in_key_column_typed = assert_cast<const ColumnNullable *>(in_key_column.get());
in_key_column = in_key_column_typed->getNestedColumnPtr();
in_key_column_nullable_mask = &in_key_column_typed->getNullMapColumn().getData();
}
PaddedPODArray<UInt64> keys_in_backup_storage;
const auto & keys_in = getColumnVectorData(this, in_key_column, keys_in_backup_storage);
@ -246,9 +275,12 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const auto & key_attribute_container = key_attribute.container;
std::optional<UInt64> null_value;
const UInt64 null_value = dictionary_attribute.null_value.template get<UInt64>();
if (!dictionary_attribute.null_value.isNull())
null_value = dictionary_attribute.null_value.get<UInt64>();
const auto & key_attribute_container = key_attribute.container;
const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); };
@ -261,15 +293,36 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
auto it = key_attribute_container.find(hierarchy_key);
if (it != key_attribute_container.end())
result = parent_keys_container[it->getMapped()];
if (it == key_attribute_container.end())
return result;
keys_found += result.has_value();
size_t key_index = it->getMapped();
if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[key_index])
return result;
UInt64 parent_key = parent_keys_container[key_index];
if (null_value && *null_value == parent_key)
return result;
result = parent_key;
keys_found += 1;
return result;
};
auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_func);
auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_func);
if (unlikely(in_key_column_nullable_mask))
{
auto mutable_result_ptr = result->assumeMutable();
auto & mutable_result = assert_cast<ColumnUInt8 &>(*mutable_result_ptr);
auto & mutable_result_data = mutable_result.getData();
size_t mutable_result_data_size = mutable_result_data.size();
for (size_t i = 0; i < mutable_result_data_size; ++i)
mutable_result_data[i] &= !(static_cast<bool>((*in_key_column_nullable_mask)[i]));
}
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);

View File

@ -227,6 +227,9 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
{
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
{
if (key_column->isNullable())
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
PaddedPODArray<UInt64> keys_backup_storage;
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
@ -235,10 +238,20 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const UInt64 null_value = dictionary_attribute.null_value.template get<UInt64>();
std::optional<UInt64> null_value;
if (!dictionary_attribute.null_value.isNull())
null_value = dictionary_attribute.null_value.get<UInt64>();
const CollectionType<UInt64> & parent_keys_map = std::get<CollectionType<UInt64>>(hierarchical_attribute.container);
auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); };
auto is_key_valid_func = [&](auto & hierarchy_key)
{
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
return true;
return parent_keys_map.find(hierarchy_key) != parent_keys_map.end();
};
size_t keys_found = 0;
@ -248,15 +261,23 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
auto it = parent_keys_map.find(hierarchy_key);
if (it != parent_keys_map.end())
result = getValueFromCell(it);
if (it == parent_keys_map.end())
return result;
keys_found += result.has_value();
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
return result;
UInt64 parent_key = getValueFromCell(it);
if (null_value && *null_value == parent_key)
return result;
result = parent_key;
keys_found += 1;
return result;
};
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_func);
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, is_key_valid_func, get_parent_func);
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
@ -264,7 +285,9 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
return dictionary_hierarchy_array;
}
else
{
return nullptr;
}
}
template <DictionaryKeyType dictionary_key_type, bool sparse>
@ -275,6 +298,19 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::isInHierarchy(
{
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
{
if (key_column->isNullable())
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
const PaddedPODArray<UInt8> * in_key_column_nullable_mask = nullptr;
if (in_key_column->isNullable())
{
const auto * in_key_column_typed = assert_cast<const ColumnNullable *>(in_key_column.get());
in_key_column = in_key_column_typed->getNestedColumnPtr();
in_key_column_nullable_mask = &in_key_column_typed->getNullMapColumn().getData();
}
PaddedPODArray<UInt64> keys_backup_storage;
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
@ -286,28 +322,57 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::isInHierarchy(
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const UInt64 null_value = dictionary_attribute.null_value.template get<UInt64>();
std::optional<UInt64> null_value;
if (!dictionary_attribute.null_value.isNull())
null_value = dictionary_attribute.null_value.get<UInt64>();
const CollectionType<UInt64> & parent_keys_map = std::get<CollectionType<UInt64>>(hierarchical_attribute.container);
auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); };
auto is_key_valid_func = [&](auto & hierarchy_key)
{
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
return true;
return parent_keys_map.find(hierarchy_key) != parent_keys_map.end();
};
size_t keys_found = 0;
auto get_parent_func = [&](auto & hierarchy_key)
auto get_parent_key_func = [&](auto & hierarchy_key)
{
std::optional<UInt64> result;
auto it = parent_keys_map.find(hierarchy_key);
if (it != parent_keys_map.end())
result = getValueFromCell(it);
if (it == parent_keys_map.end())
return result;
keys_found += result.has_value();
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
return result;
UInt64 parent_key = getValueFromCell(it);
if (null_value && *null_value == parent_key)
return result;
result = parent_key;
keys_found += 1;
return result;
};
auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_func);
auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_key_func);
if (unlikely(in_key_column_nullable_mask))
{
auto mutable_result_ptr = result->assumeMutable();
auto & mutable_result = assert_cast<ColumnUInt8 &>(*mutable_result_ptr);
auto & mutable_result_data = mutable_result.getData();
size_t mutable_result_data_size = mutable_result_data.size();
for (size_t i = 0; i < mutable_result_data_size; ++i)
mutable_result_data[i] &= !(static_cast<bool>((*in_key_column_nullable_mask)[i]));
}
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);

View File

@ -142,18 +142,25 @@ ColumnPtr getKeysHierarchyDefaultImplementation(
auto is_key_valid_func = [&](auto & key) { return key_to_parent_key.find(key) != nullptr; };
UInt64 null_value = hierarchical_attribute.null_value.get<UInt64>();
auto get_parent_key_func = [&](auto & key)
{
std::optional<UInt64> result;
auto it = key_to_parent_key.find(key);
std::optional<UInt64> result = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
valid_keys += result.has_value();
if (it == nullptr) {
return result;
}
UInt64 parent_key = it->getMapped();
if (parent_key == null_value)
return result;
result = parent_key;
valid_keys += 1;
return result;
};
UInt64 null_value = hierarchical_attribute.null_value.get<UInt64>();
auto dictionary_hierarchy_array = getKeysHierarchyArray(requested_keys, null_value, is_key_valid_func, get_parent_key_func);
return dictionary_hierarchy_array;
return getKeysHierarchyArray(requested_keys, is_key_valid_func, get_parent_key_func);
}
ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation(
@ -185,19 +192,26 @@ ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation(
auto is_key_valid_func = [&](auto & key) { return key_to_parent_key.find(key) != nullptr; };
UInt64 null_value = hierarchical_attribute.null_value.get<UInt64>();
auto get_parent_key_func = [&](auto & key)
{
std::optional<UInt64> result;
auto it = key_to_parent_key.find(key);
std::optional<UInt64> result = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
valid_keys += result.has_value();
if (it == nullptr) {
return result;
}
UInt64 parent_key = it->getMapped();
if (parent_key == null_value)
return result;
result = parent_key;
valid_keys += 1;
return result;
};
UInt64 null_value = hierarchical_attribute.null_value.get<UInt64>();
const auto & in_keys = in_key_column_typed->getData();
auto result = getKeysIsInHierarchyColumn(requested_keys, in_keys, null_value, is_key_valid_func, get_parent_key_func);
return result;
return getKeysIsInHierarchyColumn(requested_keys, in_keys, is_key_valid_func, get_parent_key_func);
}
}

View File

@ -33,7 +33,7 @@ public:
keys.reserve(parent_to_children_map_size);
parent_to_children_keys_range.reserve(parent_to_children_map_size);
for (auto & [parent, children] : parent_to_children_map_)
for (const auto & [parent, children] : parent_to_children_map_)
{
size_t keys_size = keys.size();
UInt32 start_index = static_cast<UInt32>(keys_size);
@ -97,7 +97,6 @@ namespace detail
template <typename IsKeyValidFunc, typename GetParentKeyFunc>
ElementsAndOffsets getHierarchy(
const PaddedPODArray<UInt64> & keys,
const UInt64 & hierarchy_null_value,
IsKeyValidFunc && is_key_valid_func,
GetParentKeyFunc && get_parent_key_func)
{
@ -156,7 +155,7 @@ namespace detail
break;
}
if (hierarchy_key == hierarchy_null_value || current_hierarchy_depth >= DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH)
if (current_hierarchy_depth >= DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH)
break;
already_processes_keys_to_offset[hierarchy_key] = {offsets.size(), current_hierarchy_depth};
@ -190,7 +189,6 @@ namespace detail
PaddedPODArray<UInt8> getIsInHierarchy(
const PaddedPODArray<UInt64> & keys,
const PaddedPODArray<UInt64> & in_keys,
const UInt64 & hierarchy_null_value,
IsKeyValidFunc && is_key_valid_func,
GetParentKeyFunc && get_parent_func)
{
@ -201,7 +199,6 @@ namespace detail
detail::ElementsAndOffsets hierarchy = detail::getHierarchy(
keys,
hierarchy_null_value,
std::forward<IsKeyValidFunc>(is_key_valid_func),
std::forward<GetParentKeyFunc>(get_parent_func));
@ -213,7 +210,7 @@ namespace detail
size_t i_elements_start = i > 0 ? offsets[i - 1] : 0;
size_t i_elements_end = offsets[i];
auto & key_to_find = in_keys[i];
const auto & key_to_find = in_keys[i];
const auto * begin = elements.begin() + i_elements_start;
const auto * end = elements.begin() + i_elements_end;
@ -263,8 +260,8 @@ namespace detail
Strategy strategy,
size_t & valid_keys)
{
auto & parent_to_children_keys_range = parent_to_child_index.parent_to_children_keys_range;
auto & children_keys = parent_to_child_index.keys;
const auto & parent_to_children_keys_range = parent_to_child_index.parent_to_children_keys_range;
const auto & children_keys = parent_to_child_index.keys;
/// If strategy is GetAllDescendantsStrategy we try to cache and later reuse previously calculated descendants.
/// If strategy is GetDescendantsAtSpecificLevelStrategy we does not use cache strategy.
@ -436,13 +433,11 @@ namespace detail
template <typename KeyType, typename IsKeyValidFunc, typename GetParentKeyFunc>
ColumnPtr getKeysHierarchyArray(
const PaddedPODArray<KeyType> & keys,
const KeyType & hierarchy_null_value,
IsKeyValidFunc && is_key_valid_func,
GetParentKeyFunc && get_parent_func)
{
auto elements_and_offsets = detail::getHierarchy(
keys,
hierarchy_null_value,
std::forward<IsKeyValidFunc>(is_key_valid_func),
std::forward<GetParentKeyFunc>(get_parent_func));
@ -454,14 +449,12 @@ template <typename KeyType, typename IsKeyValidFunc, typename GetParentKeyFunc>
ColumnUInt8::Ptr getKeysIsInHierarchyColumn(
const PaddedPODArray<KeyType> & hierarchy_keys,
const PaddedPODArray<KeyType> & hierarchy_in_keys,
const KeyType & hierarchy_null_value,
IsKeyValidFunc && is_key_valid_func,
GetParentKeyFunc && get_parent_func)
{
auto is_in_hierarchy_data = detail::getIsInHierarchy(
hierarchy_keys,
hierarchy_in_keys,
hierarchy_null_value,
std::forward<IsKeyValidFunc>(is_key_valid_func),
std::forward<GetParentKeyFunc>(get_parent_func));

View File

@ -17,19 +17,26 @@ TEST(HierarchyDictionariesUtils, getHierarchy)
auto is_key_valid_func = [&](auto key) { return child_to_parent.find(key) != nullptr; };
UInt64 hierarchy_null_value_key = 0;
auto get_parent_key_func = [&](auto key)
{
std::optional<UInt64> result;
auto it = child_to_parent.find(key);
std::optional<UInt64> value = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
return value;
if (it == nullptr)
return result;
UInt64 parent_key = it->getMapped();
if (parent_key == hierarchy_null_value_key)
return result;
result = parent_key;
return result;
};
UInt64 hierarchy_null_value_key = 0;
PaddedPODArray<UInt64> keys = {1, 2, 3, 4, 5};
auto result = DB::detail::getHierarchy(
keys,
hierarchy_null_value_key,
is_key_valid_func,
get_parent_key_func);
@ -49,19 +56,26 @@ TEST(HierarchyDictionariesUtils, getHierarchy)
auto is_key_valid_func = [&](auto key) { return child_to_parent.find(key) != nullptr; };
UInt64 hierarchy_null_value_key = 0;
auto get_parent_key_func = [&](auto key)
{
std::optional<UInt64> result;
auto it = child_to_parent.find(key);
std::optional<UInt64> value = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
return value;
if (it == nullptr)
return result;
UInt64 parent_key = it->getMapped();
if (parent_key == hierarchy_null_value_key)
return result;
result = parent_key;
return result;
};
UInt64 hierarchy_null_value_key = 0;
PaddedPODArray<UInt64> keys = {1, 2, 3};
auto result = DB::detail::getHierarchy(
keys,
hierarchy_null_value_key,
is_key_valid_func,
get_parent_key_func);
@ -87,21 +101,28 @@ TEST(HierarchyDictionariesUtils, getIsInHierarchy)
auto is_key_valid_func = [&](auto key) { return child_to_parent.find(key) != nullptr; };
UInt64 hierarchy_null_value_key = 0;
auto get_parent_key_func = [&](auto key)
{
std::optional<UInt64> result;
auto it = child_to_parent.find(key);
std::optional<UInt64> value = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
return value;
if (it == nullptr)
return result;
UInt64 parent_key = it->getMapped();
if (parent_key == hierarchy_null_value_key)
return result;
result = parent_key;
return result;
};
UInt64 hierarchy_null_value_key = 0;
PaddedPODArray<UInt64> keys = {1, 2, 3, 4, 5};
PaddedPODArray<UInt64> keys_in = {1, 1, 1, 2, 5};
PaddedPODArray<UInt8> actual = DB::detail::getIsInHierarchy(
keys,
keys_in,
hierarchy_null_value_key,
is_key_valid_func,
get_parent_key_func);
@ -119,21 +140,28 @@ TEST(HierarchyDictionariesUtils, getIsInHierarchy)
return child_to_parent.find(key) != nullptr;
};
UInt64 hierarchy_null_value_key = 0;
auto get_parent_key_func = [&](auto key)
{
std::optional<UInt64> result;
auto it = child_to_parent.find(key);
std::optional<UInt64> value = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
return value;
if (it == nullptr)
return result;
UInt64 parent_key = it->getMapped();
if (parent_key == hierarchy_null_value_key)
return result;
result = parent_key;
return result;
};
UInt64 hierarchy_null_value_key = 0;
PaddedPODArray<UInt64> keys = {1, 2, 3};
PaddedPODArray<UInt64> keys_in = {1, 2, 3};
PaddedPODArray<UInt8> actual = DB::detail::getIsInHierarchy(
keys,
keys_in,
hierarchy_null_value_key,
is_key_valid_func,
get_parent_key_func);

View File

@ -973,7 +973,7 @@ private:
auto dictionary = helper.getDictionary(arguments[0].column);
const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary);
return std::make_shared<DataTypeArray>(hierarchical_attribute.type);
return std::make_shared<DataTypeArray>(removeNullable(hierarchical_attribute.type));
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override

View File

@ -0,0 +1,45 @@
Flat dictionary
Get hierarchy
[0]
[1,0]
[2,1,0]
[3]
[4,2,1,0]
[]
Get is in hierarchy
1
1
1
1
1
0
Hashed dictionary
Get hierarchy
[0]
[1,0]
[2,1,0]
[3]
[4,2,1,0]
[]
Get is in hierarchy
1
1
1
1
1
0
HashedArray dictionary
Get hierarchy
[0]
[1,0]
[2,1,0]
[3]
[4,2,1,0]
[]
Get is in hierarchy
1
1
1
1
1
0

View File

@ -0,0 +1,67 @@
DROP TABLE IF EXISTS test_hierarhical_table;
CREATE TABLE test_hierarhical_table
(
id UInt64,
parent_id Nullable(UInt64)
) ENGINE=TinyLog;
INSERT INTO test_hierarhical_table VALUES (0, NULL), (1, 0), (2, 1), (3, NULL), (4, 2);
DROP DICTIONARY IF EXISTS hierachical_flat_dictionary;
CREATE DICTIONARY hierachical_flat_dictionary
(
id UInt64,
parent_id Nullable(UInt64) HIERARCHICAL
) PRIMARY KEY id
SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table'))
LAYOUT(FLAT())
LIFETIME(0);
SELECT 'Flat dictionary';
SELECT 'Get hierarchy';
SELECT dictGetHierarchy('hierachical_flat_dictionary', number) FROM system.numbers LIMIT 6;
SELECT 'Get is in hierarchy';
SELECT dictIsIn('hierachical_flat_dictionary', number, number) FROM system.numbers LIMIT 6;
DROP DICTIONARY hierachical_flat_dictionary;
DROP DICTIONARY IF EXISTS hierachical_hashed_dictionary;
CREATE DICTIONARY hierachical_hashed_dictionary
(
id UInt64,
parent_id Nullable(UInt64) HIERARCHICAL
) PRIMARY KEY id
SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table'))
LAYOUT(HASHED())
LIFETIME(0);
SELECT 'Hashed dictionary';
SELECT 'Get hierarchy';
SELECT dictGetHierarchy('hierachical_hashed_dictionary', number) FROM system.numbers LIMIT 6;
SELECT 'Get is in hierarchy';
SELECT dictIsIn('hierachical_hashed_dictionary', number, number) FROM system.numbers LIMIT 6;
DROP DICTIONARY hierachical_hashed_dictionary;
DROP DICTIONARY IF EXISTS hierachical_hashed_array_dictionary;
CREATE DICTIONARY hierachical_hashed_array_dictionary
(
id UInt64,
parent_id Nullable(UInt64) HIERARCHICAL
) PRIMARY KEY id
SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table'))
LAYOUT(HASHED_ARRAY())
LIFETIME(0);
SELECT 'HashedArray dictionary';
SELECT 'Get hierarchy';
SELECT dictGetHierarchy('hierachical_hashed_array_dictionary', number) FROM system.numbers LIMIT 6;
SELECT 'Get is in hierarchy';
SELECT dictIsIn('hierachical_hashed_array_dictionary', number, number) FROM system.numbers LIMIT 6;
DROP DICTIONARY hierachical_hashed_array_dictionary;
DROP TABLE test_hierarhical_table;