From 20b55a45b2e8ba0f5d379c16dae347a4a16fc2ce Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 2 Jun 2022 19:24:23 +0200 Subject: [PATCH 1/4] Hierarchical dictionaries support nullable parent key --- src/Dictionaries/FlatDictionary.cpp | 77 ++++++++++++++-- src/Dictionaries/HashedArrayDictionary.cpp | 77 +++++++++++++--- src/Dictionaries/HashedDictionary.cpp | 91 ++++++++++++++++--- .../HierarchyDictionariesUtils.cpp | 38 +++++--- src/Dictionaries/HierarchyDictionariesUtils.h | 17 +--- .../gtest_hierarchy_dictionaries_utils.cpp | 60 ++++++++---- src/Functions/FunctionsExternalDictionaries.h | 2 +- ...dictionaries_nullable_parent_key.reference | 45 +++++++++ ...hical_dictionaries_nullable_parent_key.sql | 67 ++++++++++++++ 9 files changed, 400 insertions(+), 74 deletions(-) create mode 100644 tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference create mode 100644 tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index bd664224d41..c099f7acf17 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -177,6 +177,9 @@ ColumnUInt8::Ptr FlatDictionary::hasKeys(const Columns & key_columns, const Data ColumnPtr FlatDictionary::getHierarchy(ColumnPtr key_column, const DataTypePtr &) const { + if (key_column->isNullable()) + key_column = assert_cast(key_column.get())->getNestedColumnPtr(); + PaddedPODArray keys_backup_storage; const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage); @@ -184,7 +187,11 @@ ColumnPtr FlatDictionary::getHierarchy(ColumnPtr key_column, const DataTypePtr & const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index]; const auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; - const UInt64 null_value = dictionary_attribute.null_value.get(); + std::optional null_value; + + if (!dictionary_attribute.null_value.isNull()) + null_value = dictionary_attribute.null_value.get(); + const ContainerType & parent_keys = std::get>(hierarchical_attribute.container); auto is_key_valid_func = [&, this](auto & key) { return key < loaded_keys.size() && loaded_keys[key]; }; @@ -193,13 +200,26 @@ ColumnPtr FlatDictionary::getHierarchy(ColumnPtr key_column, const DataTypePtr & auto get_parent_key_func = [&, this](auto & hierarchy_key) { + std::optional result; + bool is_key_valid = hierarchy_key < loaded_keys.size() && loaded_keys[hierarchy_key]; - std::optional result = is_key_valid ? std::make_optional(parent_keys[hierarchy_key]) : std::nullopt; - keys_found += result.has_value(); + + if (!is_key_valid) + return result; + + if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key)) + return result; + + UInt64 parent_key = parent_keys[hierarchy_key]; + if (null_value && *null_value == parent_key) + return result; + + result = parent_key; + keys_found += 1; return result; }; - auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_key_func); + auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, is_key_valid_func, get_parent_key_func); query_count.fetch_add(keys.size(), std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); @@ -212,9 +232,22 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy( ColumnPtr in_key_column, const DataTypePtr &) const { + if (key_column->isNullable()) + key_column = assert_cast(key_column.get())->getNestedColumnPtr(); + PaddedPODArray keys_backup_storage; const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage); + const PaddedPODArray * in_key_column_nullable_mask = nullptr; + + if (in_key_column->isNullable()) + { + const auto * in_key_column_typed = assert_cast(in_key_column.get()); + + in_key_column = in_key_column_typed->getNestedColumnPtr(); + in_key_column_nullable_mask = &in_key_column_typed->getNullMapColumn().getData(); + } + PaddedPODArray keys_in_backup_storage; const auto & keys_in = getColumnVectorData(this, in_key_column, keys_in_backup_storage); @@ -222,7 +255,11 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy( const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index]; const auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; - const UInt64 null_value = dictionary_attribute.null_value.get(); + std::optional null_value; + + if (!dictionary_attribute.null_value.isNull()) + null_value = dictionary_attribute.null_value.get(); + const ContainerType & parent_keys = std::get>(hierarchical_attribute.container); auto is_key_valid_func = [&, this](auto & key) { return key < loaded_keys.size() && loaded_keys[key]; }; @@ -231,13 +268,37 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy( auto get_parent_key_func = [&, this](auto & hierarchy_key) { + std::optional result; + bool is_key_valid = hierarchy_key < loaded_keys.size() && loaded_keys[hierarchy_key]; - std::optional result = is_key_valid ? std::make_optional(parent_keys[hierarchy_key]) : std::nullopt; - keys_found += result.has_value(); + + if (!is_key_valid) + return result; + + if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key)) + return result; + + UInt64 parent_key = parent_keys[hierarchy_key]; + if (null_value && *null_value == parent_key) + return result; + + result = parent_keys[hierarchy_key]; + keys_found += 1; return result; }; - auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_key_func); + auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_key_func); + + if (unlikely(in_key_column_nullable_mask)) + { + auto mutable_result_ptr = result->assumeMutable(); + auto & mutable_result = assert_cast(*mutable_result_ptr); + auto & mutable_result_data = mutable_result.getData(); + size_t mutable_result_data_size = mutable_result_data.size(); + + for (size_t i = 0; i < mutable_result_data_size; ++i) + mutable_result_data[i] &= !(static_cast((*in_key_column_nullable_mask)[i])); + } query_count.fetch_add(keys.size(), std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 66c63b7330d..d2187932c80 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -183,6 +183,9 @@ ColumnPtr HashedArrayDictionary::getHierarchy(ColumnPtr key { if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { + if (key_column->isNullable()) + key_column = assert_cast(key_column.get())->getNestedColumnPtr(); + PaddedPODArray keys_backup_storage; const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage); @@ -191,9 +194,12 @@ ColumnPtr HashedArrayDictionary::getHierarchy(ColumnPtr key const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index]; const auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; - const auto & key_attribute_container = key_attribute.container; + std::optional null_value; - const UInt64 null_value = dictionary_attribute.null_value.template get(); + if (!dictionary_attribute.null_value.isNull()) + null_value = dictionary_attribute.null_value.get(); + + const auto & key_attribute_container = key_attribute.container; const AttributeContainerType & parent_keys_container = std::get>(hierarchical_attribute.container); auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); }; @@ -206,15 +212,25 @@ ColumnPtr HashedArrayDictionary::getHierarchy(ColumnPtr key auto it = key_attribute_container.find(hierarchy_key); - if (it != key_attribute_container.end()) - result = parent_keys_container[it->getMapped()]; + if (it == key_attribute_container.end()) + return result; - keys_found += result.has_value(); + size_t key_index = it->getMapped(); + + if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[key_index]) + return result; + + UInt64 parent_key = parent_keys_container[key_index]; + if (null_value && *null_value == parent_key) + return result; + + result = parent_key; + keys_found += 1; return result; }; - auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_func); + auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, is_key_valid_func, get_parent_func); query_count.fetch_add(keys.size(), std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); @@ -235,9 +251,22 @@ ColumnUInt8::Ptr HashedArrayDictionary::isInHierarchy( { if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { + if (key_column->isNullable()) + key_column = assert_cast(key_column.get())->getNestedColumnPtr(); + PaddedPODArray keys_backup_storage; const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage); + const PaddedPODArray * in_key_column_nullable_mask = nullptr; + + if (in_key_column->isNullable()) + { + const auto * in_key_column_typed = assert_cast(in_key_column.get()); + + in_key_column = in_key_column_typed->getNestedColumnPtr(); + in_key_column_nullable_mask = &in_key_column_typed->getNullMapColumn().getData(); + } + PaddedPODArray keys_in_backup_storage; const auto & keys_in = getColumnVectorData(this, in_key_column, keys_in_backup_storage); @@ -246,9 +275,12 @@ ColumnUInt8::Ptr HashedArrayDictionary::isInHierarchy( const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index]; auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; - const auto & key_attribute_container = key_attribute.container; + std::optional null_value; - const UInt64 null_value = dictionary_attribute.null_value.template get(); + if (!dictionary_attribute.null_value.isNull()) + null_value = dictionary_attribute.null_value.get(); + + const auto & key_attribute_container = key_attribute.container; const AttributeContainerType & parent_keys_container = std::get>(hierarchical_attribute.container); auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); }; @@ -261,15 +293,36 @@ ColumnUInt8::Ptr HashedArrayDictionary::isInHierarchy( auto it = key_attribute_container.find(hierarchy_key); - if (it != key_attribute_container.end()) - result = parent_keys_container[it->getMapped()]; + if (it == key_attribute_container.end()) + return result; - keys_found += result.has_value(); + size_t key_index = it->getMapped(); + + if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[key_index]) + return result; + + UInt64 parent_key = parent_keys_container[key_index]; + if (null_value && *null_value == parent_key) + return result; + + result = parent_key; + keys_found += 1; return result; }; - auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_func); + auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_func); + + if (unlikely(in_key_column_nullable_mask)) + { + auto mutable_result_ptr = result->assumeMutable(); + auto & mutable_result = assert_cast(*mutable_result_ptr); + auto & mutable_result_data = mutable_result.getData(); + size_t mutable_result_data_size = mutable_result_data.size(); + + for (size_t i = 0; i < mutable_result_data_size; ++i) + mutable_result_data[i] &= !(static_cast((*in_key_column_nullable_mask)[i])); + } query_count.fetch_add(keys.size(), std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index 81d3d42617b..ba31e3e1a0b 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -227,6 +227,9 @@ ColumnPtr HashedDictionary::getHierarchy(ColumnPtr { if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { + if (key_column->isNullable()) + key_column = assert_cast(key_column.get())->getNestedColumnPtr(); + PaddedPODArray keys_backup_storage; const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage); @@ -235,10 +238,20 @@ ColumnPtr HashedDictionary::getHierarchy(ColumnPtr const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index]; const auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; - const UInt64 null_value = dictionary_attribute.null_value.template get(); + std::optional null_value; + + if (!dictionary_attribute.null_value.isNull()) + null_value = dictionary_attribute.null_value.get(); + const CollectionType & parent_keys_map = std::get>(hierarchical_attribute.container); - auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); }; + auto is_key_valid_func = [&](auto & hierarchy_key) + { + if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key)) + return true; + + return parent_keys_map.find(hierarchy_key) != parent_keys_map.end(); + }; size_t keys_found = 0; @@ -248,15 +261,23 @@ ColumnPtr HashedDictionary::getHierarchy(ColumnPtr auto it = parent_keys_map.find(hierarchy_key); - if (it != parent_keys_map.end()) - result = getValueFromCell(it); + if (it == parent_keys_map.end()) + return result; - keys_found += result.has_value(); + if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key)) + return result; + + UInt64 parent_key = getValueFromCell(it); + if (null_value && *null_value == parent_key) + return result; + + result = parent_key; + keys_found += 1; return result; }; - auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_func); + auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, is_key_valid_func, get_parent_func); query_count.fetch_add(keys.size(), std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); @@ -264,7 +285,9 @@ ColumnPtr HashedDictionary::getHierarchy(ColumnPtr return dictionary_hierarchy_array; } else + { return nullptr; + } } template @@ -275,6 +298,19 @@ ColumnUInt8::Ptr HashedDictionary::isInHierarchy( { if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { + if (key_column->isNullable()) + key_column = assert_cast(key_column.get())->getNestedColumnPtr(); + + const PaddedPODArray * in_key_column_nullable_mask = nullptr; + + if (in_key_column->isNullable()) + { + const auto * in_key_column_typed = assert_cast(in_key_column.get()); + + in_key_column = in_key_column_typed->getNestedColumnPtr(); + in_key_column_nullable_mask = &in_key_column_typed->getNullMapColumn().getData(); + } + PaddedPODArray keys_backup_storage; const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage); @@ -286,28 +322,57 @@ ColumnUInt8::Ptr HashedDictionary::isInHierarchy( const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index]; auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; - const UInt64 null_value = dictionary_attribute.null_value.template get(); + std::optional null_value; + + if (!dictionary_attribute.null_value.isNull()) + null_value = dictionary_attribute.null_value.get(); + const CollectionType & parent_keys_map = std::get>(hierarchical_attribute.container); - auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); }; + auto is_key_valid_func = [&](auto & hierarchy_key) + { + if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key)) + return true; + + return parent_keys_map.find(hierarchy_key) != parent_keys_map.end(); + }; size_t keys_found = 0; - auto get_parent_func = [&](auto & hierarchy_key) + auto get_parent_key_func = [&](auto & hierarchy_key) { std::optional result; auto it = parent_keys_map.find(hierarchy_key); - if (it != parent_keys_map.end()) - result = getValueFromCell(it); + if (it == parent_keys_map.end()) + return result; - keys_found += result.has_value(); + if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key)) + return result; + + UInt64 parent_key = getValueFromCell(it); + if (null_value && *null_value == parent_key) + return result; + + result = parent_key; + keys_found += 1; return result; }; - auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_func); + auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_key_func); + + if (unlikely(in_key_column_nullable_mask)) + { + auto mutable_result_ptr = result->assumeMutable(); + auto & mutable_result = assert_cast(*mutable_result_ptr); + auto & mutable_result_data = mutable_result.getData(); + size_t mutable_result_data_size = mutable_result_data.size(); + + for (size_t i = 0; i < mutable_result_data_size; ++i) + mutable_result_data[i] &= !(static_cast((*in_key_column_nullable_mask)[i])); + } query_count.fetch_add(keys.size(), std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); diff --git a/src/Dictionaries/HierarchyDictionariesUtils.cpp b/src/Dictionaries/HierarchyDictionariesUtils.cpp index 0b5f8478f7a..2cb4a2297fe 100644 --- a/src/Dictionaries/HierarchyDictionariesUtils.cpp +++ b/src/Dictionaries/HierarchyDictionariesUtils.cpp @@ -142,18 +142,25 @@ ColumnPtr getKeysHierarchyDefaultImplementation( auto is_key_valid_func = [&](auto & key) { return key_to_parent_key.find(key) != nullptr; }; + UInt64 null_value = hierarchical_attribute.null_value.get(); auto get_parent_key_func = [&](auto & key) { + std::optional result; auto it = key_to_parent_key.find(key); - std::optional result = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt); - valid_keys += result.has_value(); + if (it == nullptr) { + return result; + } + + UInt64 parent_key = it->getMapped(); + if (parent_key == null_value) + return result; + + result = parent_key; + valid_keys += 1; return result; }; - UInt64 null_value = hierarchical_attribute.null_value.get(); - - auto dictionary_hierarchy_array = getKeysHierarchyArray(requested_keys, null_value, is_key_valid_func, get_parent_key_func); - return dictionary_hierarchy_array; + return getKeysHierarchyArray(requested_keys, is_key_valid_func, get_parent_key_func); } ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation( @@ -185,19 +192,26 @@ ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation( auto is_key_valid_func = [&](auto & key) { return key_to_parent_key.find(key) != nullptr; }; + UInt64 null_value = hierarchical_attribute.null_value.get(); auto get_parent_key_func = [&](auto & key) { + std::optional result; auto it = key_to_parent_key.find(key); - std::optional result = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt); - valid_keys += result.has_value(); + if (it == nullptr) { + return result; + } + + UInt64 parent_key = it->getMapped(); + if (parent_key == null_value) + return result; + + result = parent_key; + valid_keys += 1; return result; }; - UInt64 null_value = hierarchical_attribute.null_value.get(); const auto & in_keys = in_key_column_typed->getData(); - - auto result = getKeysIsInHierarchyColumn(requested_keys, in_keys, null_value, is_key_valid_func, get_parent_key_func); - return result; + return getKeysIsInHierarchyColumn(requested_keys, in_keys, is_key_valid_func, get_parent_key_func); } } diff --git a/src/Dictionaries/HierarchyDictionariesUtils.h b/src/Dictionaries/HierarchyDictionariesUtils.h index 6a59a37b5e7..621290f40f9 100644 --- a/src/Dictionaries/HierarchyDictionariesUtils.h +++ b/src/Dictionaries/HierarchyDictionariesUtils.h @@ -33,7 +33,7 @@ public: keys.reserve(parent_to_children_map_size); parent_to_children_keys_range.reserve(parent_to_children_map_size); - for (auto & [parent, children] : parent_to_children_map_) + for (const auto & [parent, children] : parent_to_children_map_) { size_t keys_size = keys.size(); UInt32 start_index = static_cast(keys_size); @@ -97,7 +97,6 @@ namespace detail template ElementsAndOffsets getHierarchy( const PaddedPODArray & keys, - const UInt64 & hierarchy_null_value, IsKeyValidFunc && is_key_valid_func, GetParentKeyFunc && get_parent_key_func) { @@ -156,7 +155,7 @@ namespace detail break; } - if (hierarchy_key == hierarchy_null_value || current_hierarchy_depth >= DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH) + if (current_hierarchy_depth >= DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH) break; already_processes_keys_to_offset[hierarchy_key] = {offsets.size(), current_hierarchy_depth}; @@ -190,7 +189,6 @@ namespace detail PaddedPODArray getIsInHierarchy( const PaddedPODArray & keys, const PaddedPODArray & in_keys, - const UInt64 & hierarchy_null_value, IsKeyValidFunc && is_key_valid_func, GetParentKeyFunc && get_parent_func) { @@ -201,7 +199,6 @@ namespace detail detail::ElementsAndOffsets hierarchy = detail::getHierarchy( keys, - hierarchy_null_value, std::forward(is_key_valid_func), std::forward(get_parent_func)); @@ -213,7 +210,7 @@ namespace detail size_t i_elements_start = i > 0 ? offsets[i - 1] : 0; size_t i_elements_end = offsets[i]; - auto & key_to_find = in_keys[i]; + const auto & key_to_find = in_keys[i]; const auto * begin = elements.begin() + i_elements_start; const auto * end = elements.begin() + i_elements_end; @@ -263,8 +260,8 @@ namespace detail Strategy strategy, size_t & valid_keys) { - auto & parent_to_children_keys_range = parent_to_child_index.parent_to_children_keys_range; - auto & children_keys = parent_to_child_index.keys; + const auto & parent_to_children_keys_range = parent_to_child_index.parent_to_children_keys_range; + const auto & children_keys = parent_to_child_index.keys; /// If strategy is GetAllDescendantsStrategy we try to cache and later reuse previously calculated descendants. /// If strategy is GetDescendantsAtSpecificLevelStrategy we does not use cache strategy. @@ -436,13 +433,11 @@ namespace detail template ColumnPtr getKeysHierarchyArray( const PaddedPODArray & keys, - const KeyType & hierarchy_null_value, IsKeyValidFunc && is_key_valid_func, GetParentKeyFunc && get_parent_func) { auto elements_and_offsets = detail::getHierarchy( keys, - hierarchy_null_value, std::forward(is_key_valid_func), std::forward(get_parent_func)); @@ -454,14 +449,12 @@ template ColumnUInt8::Ptr getKeysIsInHierarchyColumn( const PaddedPODArray & hierarchy_keys, const PaddedPODArray & hierarchy_in_keys, - const KeyType & hierarchy_null_value, IsKeyValidFunc && is_key_valid_func, GetParentKeyFunc && get_parent_func) { auto is_in_hierarchy_data = detail::getIsInHierarchy( hierarchy_keys, hierarchy_in_keys, - hierarchy_null_value, std::forward(is_key_valid_func), std::forward(get_parent_func)); diff --git a/src/Dictionaries/tests/gtest_hierarchy_dictionaries_utils.cpp b/src/Dictionaries/tests/gtest_hierarchy_dictionaries_utils.cpp index a68f23a978f..10d335ebbbc 100644 --- a/src/Dictionaries/tests/gtest_hierarchy_dictionaries_utils.cpp +++ b/src/Dictionaries/tests/gtest_hierarchy_dictionaries_utils.cpp @@ -17,19 +17,26 @@ TEST(HierarchyDictionariesUtils, getHierarchy) auto is_key_valid_func = [&](auto key) { return child_to_parent.find(key) != nullptr; }; + UInt64 hierarchy_null_value_key = 0; auto get_parent_key_func = [&](auto key) { + std::optional result; auto it = child_to_parent.find(key); - std::optional value = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt); - return value; + if (it == nullptr) + return result; + + UInt64 parent_key = it->getMapped(); + if (parent_key == hierarchy_null_value_key) + return result; + + result = parent_key; + return result; }; - UInt64 hierarchy_null_value_key = 0; PaddedPODArray keys = {1, 2, 3, 4, 5}; auto result = DB::detail::getHierarchy( keys, - hierarchy_null_value_key, is_key_valid_func, get_parent_key_func); @@ -49,19 +56,26 @@ TEST(HierarchyDictionariesUtils, getHierarchy) auto is_key_valid_func = [&](auto key) { return child_to_parent.find(key) != nullptr; }; + UInt64 hierarchy_null_value_key = 0; auto get_parent_key_func = [&](auto key) { + std::optional result; auto it = child_to_parent.find(key); - std::optional value = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt); - return value; + if (it == nullptr) + return result; + + UInt64 parent_key = it->getMapped(); + if (parent_key == hierarchy_null_value_key) + return result; + + result = parent_key; + return result; }; - UInt64 hierarchy_null_value_key = 0; PaddedPODArray keys = {1, 2, 3}; auto result = DB::detail::getHierarchy( keys, - hierarchy_null_value_key, is_key_valid_func, get_parent_key_func); @@ -87,21 +101,28 @@ TEST(HierarchyDictionariesUtils, getIsInHierarchy) auto is_key_valid_func = [&](auto key) { return child_to_parent.find(key) != nullptr; }; + UInt64 hierarchy_null_value_key = 0; auto get_parent_key_func = [&](auto key) { + std::optional result; auto it = child_to_parent.find(key); - std::optional value = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt); - return value; + if (it == nullptr) + return result; + + UInt64 parent_key = it->getMapped(); + if (parent_key == hierarchy_null_value_key) + return result; + + result = parent_key; + return result; }; - UInt64 hierarchy_null_value_key = 0; PaddedPODArray keys = {1, 2, 3, 4, 5}; PaddedPODArray keys_in = {1, 1, 1, 2, 5}; PaddedPODArray actual = DB::detail::getIsInHierarchy( keys, keys_in, - hierarchy_null_value_key, is_key_valid_func, get_parent_key_func); @@ -119,21 +140,28 @@ TEST(HierarchyDictionariesUtils, getIsInHierarchy) return child_to_parent.find(key) != nullptr; }; + UInt64 hierarchy_null_value_key = 0; auto get_parent_key_func = [&](auto key) { + std::optional result; auto it = child_to_parent.find(key); - std::optional value = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt); - return value; + if (it == nullptr) + return result; + + UInt64 parent_key = it->getMapped(); + if (parent_key == hierarchy_null_value_key) + return result; + + result = parent_key; + return result; }; - UInt64 hierarchy_null_value_key = 0; PaddedPODArray keys = {1, 2, 3}; PaddedPODArray keys_in = {1, 2, 3}; PaddedPODArray actual = DB::detail::getIsInHierarchy( keys, keys_in, - hierarchy_null_value_key, is_key_valid_func, get_parent_key_func); diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 03833f9a2b2..ea7a9a60105 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -973,7 +973,7 @@ private: auto dictionary = helper.getDictionary(arguments[0].column); const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); - return std::make_shared(hierarchical_attribute.type); + return std::make_shared(removeNullable(hierarchical_attribute.type)); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override diff --git a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference new file mode 100644 index 00000000000..951540b7b44 --- /dev/null +++ b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference @@ -0,0 +1,45 @@ +Flat dictionary +Get hierarchy +[0] +[1,0] +[2,1,0] +[3] +[4,2,1,0] +[] +Get is in hierarchy +1 +1 +1 +1 +1 +0 +Hashed dictionary +Get hierarchy +[0] +[1,0] +[2,1,0] +[3] +[4,2,1,0] +[] +Get is in hierarchy +1 +1 +1 +1 +1 +0 +HashedArray dictionary +Get hierarchy +[0] +[1,0] +[2,1,0] +[3] +[4,2,1,0] +[] +Get is in hierarchy +1 +1 +1 +1 +1 +0 diff --git a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql new file mode 100644 index 00000000000..444384b4417 --- /dev/null +++ b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql @@ -0,0 +1,67 @@ +DROP TABLE IF EXISTS test_hierarhical_table; +CREATE TABLE test_hierarhical_table +( + id UInt64, + parent_id Nullable(UInt64) +) ENGINE=TinyLog; + +INSERT INTO test_hierarhical_table VALUES (0, NULL), (1, 0), (2, 1), (3, NULL), (4, 2); + +DROP DICTIONARY IF EXISTS hierachical_flat_dictionary; +CREATE DICTIONARY hierachical_flat_dictionary +( + id UInt64, + parent_id Nullable(UInt64) HIERARCHICAL +) PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table')) +LAYOUT(FLAT()) +LIFETIME(0); + +SELECT 'Flat dictionary'; + +SELECT 'Get hierarchy'; +SELECT dictGetHierarchy('hierachical_flat_dictionary', number) FROM system.numbers LIMIT 6; +SELECT 'Get is in hierarchy'; +SELECT dictIsIn('hierachical_flat_dictionary', number, number) FROM system.numbers LIMIT 6; + +DROP DICTIONARY hierachical_flat_dictionary; + +DROP DICTIONARY IF EXISTS hierachical_hashed_dictionary; +CREATE DICTIONARY hierachical_hashed_dictionary +( + id UInt64, + parent_id Nullable(UInt64) HIERARCHICAL +) PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table')) +LAYOUT(HASHED()) +LIFETIME(0); + +SELECT 'Hashed dictionary'; + +SELECT 'Get hierarchy'; +SELECT dictGetHierarchy('hierachical_hashed_dictionary', number) FROM system.numbers LIMIT 6; +SELECT 'Get is in hierarchy'; +SELECT dictIsIn('hierachical_hashed_dictionary', number, number) FROM system.numbers LIMIT 6; + +DROP DICTIONARY hierachical_hashed_dictionary; + +DROP DICTIONARY IF EXISTS hierachical_hashed_array_dictionary; +CREATE DICTIONARY hierachical_hashed_array_dictionary +( + id UInt64, + parent_id Nullable(UInt64) HIERARCHICAL +) PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table')) +LAYOUT(HASHED_ARRAY()) +LIFETIME(0); + +SELECT 'HashedArray dictionary'; + +SELECT 'Get hierarchy'; +SELECT dictGetHierarchy('hierachical_hashed_array_dictionary', number) FROM system.numbers LIMIT 6; +SELECT 'Get is in hierarchy'; +SELECT dictIsIn('hierachical_hashed_array_dictionary', number, number) FROM system.numbers LIMIT 6; + +DROP DICTIONARY hierachical_hashed_array_dictionary; + +DROP TABLE test_hierarhical_table; From a0cbbd9edce7c05be039f7a465765e15e406a648 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 3 Jun 2022 17:21:55 +0200 Subject: [PATCH 2/4] Hierarchical Cache, Direct dictionaries added support for nullable parent key --- src/Dictionaries/FlatDictionary.cpp | 27 ----- src/Dictionaries/HashedArrayDictionary.cpp | 27 ----- src/Dictionaries/HashedDictionary.cpp | 46 ++------ .../HierarchyDictionariesUtils.cpp | 110 ++++++++++++++---- src/Functions/FunctionsExternalDictionaries.h | 7 +- ...dictionaries_nullable_parent_key.reference | 30 +++++ ...hical_dictionaries_nullable_parent_key.sql | 37 ++++++ 7 files changed, 164 insertions(+), 120 deletions(-) diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index c099f7acf17..6527269d98c 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -177,9 +177,6 @@ ColumnUInt8::Ptr FlatDictionary::hasKeys(const Columns & key_columns, const Data ColumnPtr FlatDictionary::getHierarchy(ColumnPtr key_column, const DataTypePtr &) const { - if (key_column->isNullable()) - key_column = assert_cast(key_column.get())->getNestedColumnPtr(); - PaddedPODArray keys_backup_storage; const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage); @@ -232,22 +229,9 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy( ColumnPtr in_key_column, const DataTypePtr &) const { - if (key_column->isNullable()) - key_column = assert_cast(key_column.get())->getNestedColumnPtr(); - PaddedPODArray keys_backup_storage; const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage); - const PaddedPODArray * in_key_column_nullable_mask = nullptr; - - if (in_key_column->isNullable()) - { - const auto * in_key_column_typed = assert_cast(in_key_column.get()); - - in_key_column = in_key_column_typed->getNestedColumnPtr(); - in_key_column_nullable_mask = &in_key_column_typed->getNullMapColumn().getData(); - } - PaddedPODArray keys_in_backup_storage; const auto & keys_in = getColumnVectorData(this, in_key_column, keys_in_backup_storage); @@ -289,17 +273,6 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy( auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_key_func); - if (unlikely(in_key_column_nullable_mask)) - { - auto mutable_result_ptr = result->assumeMutable(); - auto & mutable_result = assert_cast(*mutable_result_ptr); - auto & mutable_result_data = mutable_result.getData(); - size_t mutable_result_data_size = mutable_result_data.size(); - - for (size_t i = 0; i < mutable_result_data_size; ++i) - mutable_result_data[i] &= !(static_cast((*in_key_column_nullable_mask)[i])); - } - query_count.fetch_add(keys.size(), std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index d2187932c80..2c1eaf5a0df 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -183,9 +183,6 @@ ColumnPtr HashedArrayDictionary::getHierarchy(ColumnPtr key { if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { - if (key_column->isNullable()) - key_column = assert_cast(key_column.get())->getNestedColumnPtr(); - PaddedPODArray keys_backup_storage; const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage); @@ -251,22 +248,9 @@ ColumnUInt8::Ptr HashedArrayDictionary::isInHierarchy( { if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { - if (key_column->isNullable()) - key_column = assert_cast(key_column.get())->getNestedColumnPtr(); - PaddedPODArray keys_backup_storage; const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage); - const PaddedPODArray * in_key_column_nullable_mask = nullptr; - - if (in_key_column->isNullable()) - { - const auto * in_key_column_typed = assert_cast(in_key_column.get()); - - in_key_column = in_key_column_typed->getNestedColumnPtr(); - in_key_column_nullable_mask = &in_key_column_typed->getNullMapColumn().getData(); - } - PaddedPODArray keys_in_backup_storage; const auto & keys_in = getColumnVectorData(this, in_key_column, keys_in_backup_storage); @@ -313,17 +297,6 @@ ColumnUInt8::Ptr HashedArrayDictionary::isInHierarchy( auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_func); - if (unlikely(in_key_column_nullable_mask)) - { - auto mutable_result_ptr = result->assumeMutable(); - auto & mutable_result = assert_cast(*mutable_result_ptr); - auto & mutable_result_data = mutable_result.getData(); - size_t mutable_result_data_size = mutable_result_data.size(); - - for (size_t i = 0; i < mutable_result_data_size; ++i) - mutable_result_data[i] &= !(static_cast((*in_key_column_nullable_mask)[i])); - } - query_count.fetch_add(keys.size(), std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index ba31e3e1a0b..76492027e9b 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -227,9 +227,6 @@ ColumnPtr HashedDictionary::getHierarchy(ColumnPtr { if constexpr (dictionary_key_type == DictionaryKeyType::Simple) { - if (key_column->isNullable()) - key_column = assert_cast(key_column.get())->getNestedColumnPtr(); - PaddedPODArray keys_backup_storage; const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage); @@ -243,14 +240,14 @@ ColumnPtr HashedDictionary::getHierarchy(ColumnPtr if (!dictionary_attribute.null_value.isNull()) null_value = dictionary_attribute.null_value.get(); - const CollectionType & parent_keys_map = std::get>(hierarchical_attribute.container); + const CollectionType & child_key_to_parent_key_map = std::get>(hierarchical_attribute.container); auto is_key_valid_func = [&](auto & hierarchy_key) { if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key)) return true; - return parent_keys_map.find(hierarchy_key) != parent_keys_map.end(); + return child_key_to_parent_key_map.find(hierarchy_key) != child_key_to_parent_key_map.end(); }; size_t keys_found = 0; @@ -259,12 +256,9 @@ ColumnPtr HashedDictionary::getHierarchy(ColumnPtr { std::optional result; - auto it = parent_keys_map.find(hierarchy_key); + auto it = child_key_to_parent_key_map.find(hierarchy_key); - if (it == parent_keys_map.end()) - return result; - - if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key)) + if (it == child_key_to_parent_key_map.end()) return result; UInt64 parent_key = getValueFromCell(it); @@ -301,16 +295,6 @@ ColumnUInt8::Ptr HashedDictionary::isInHierarchy( if (key_column->isNullable()) key_column = assert_cast(key_column.get())->getNestedColumnPtr(); - const PaddedPODArray * in_key_column_nullable_mask = nullptr; - - if (in_key_column->isNullable()) - { - const auto * in_key_column_typed = assert_cast(in_key_column.get()); - - in_key_column = in_key_column_typed->getNestedColumnPtr(); - in_key_column_nullable_mask = &in_key_column_typed->getNullMapColumn().getData(); - } - PaddedPODArray keys_backup_storage; const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage); @@ -327,14 +311,14 @@ ColumnUInt8::Ptr HashedDictionary::isInHierarchy( if (!dictionary_attribute.null_value.isNull()) null_value = dictionary_attribute.null_value.get(); - const CollectionType & parent_keys_map = std::get>(hierarchical_attribute.container); + const CollectionType & child_key_to_parent_key_map = std::get>(hierarchical_attribute.container); auto is_key_valid_func = [&](auto & hierarchy_key) { if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key)) return true; - return parent_keys_map.find(hierarchy_key) != parent_keys_map.end(); + return child_key_to_parent_key_map.find(hierarchy_key) != child_key_to_parent_key_map.end(); }; size_t keys_found = 0; @@ -343,12 +327,9 @@ ColumnUInt8::Ptr HashedDictionary::isInHierarchy( { std::optional result; - auto it = parent_keys_map.find(hierarchy_key); + auto it = child_key_to_parent_key_map.find(hierarchy_key); - if (it == parent_keys_map.end()) - return result; - - if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key)) + if (it == child_key_to_parent_key_map.end()) return result; UInt64 parent_key = getValueFromCell(it); @@ -363,17 +344,6 @@ ColumnUInt8::Ptr HashedDictionary::isInHierarchy( auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_key_func); - if (unlikely(in_key_column_nullable_mask)) - { - auto mutable_result_ptr = result->assumeMutable(); - auto & mutable_result = assert_cast(*mutable_result_ptr); - auto & mutable_result_data = mutable_result.getData(); - size_t mutable_result_data_size = mutable_result_data.size(); - - for (size_t i = 0; i < mutable_result_data_size; ++i) - mutable_result_data[i] &= !(static_cast((*in_key_column_nullable_mask)[i])); - } - query_count.fetch_add(keys.size(), std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); diff --git a/src/Dictionaries/HierarchyDictionariesUtils.cpp b/src/Dictionaries/HierarchyDictionariesUtils.cpp index 2cb4a2297fe..80f0cf9b3f0 100644 --- a/src/Dictionaries/HierarchyDictionariesUtils.cpp +++ b/src/Dictionaries/HierarchyDictionariesUtils.cpp @@ -1,5 +1,8 @@ #include "HierarchyDictionariesUtils.h" +#include + + namespace DB { @@ -26,25 +29,35 @@ namespace detail namespace { + struct ChildToParentHierarchicalContext { + HashMap child_key_to_parent_key; + std::optional> child_key_parent_key_is_null; + }; + /** In case of cache or direct dictionary we does not have structure with child to parent representation. * This function build such structure calling getColumn for initial keys to request and for next keys in hierarchy, * until all keys are requested or result key is null value. * To distinguish null value key and key that is not present in dictionary, we use special default value column * with max UInt64 value, if result column key has such value we assume that current key is not presented in dictionary storage. */ - HashMap getChildToParentHierarchyMapImpl( + ChildToParentHierarchicalContext getChildToParentHierarchicalContext( const IDictionary * dictionary, const DictionaryAttribute & hierarchical_attribute, const PaddedPODArray & initial_keys_to_request, const DataTypePtr & key_type) { - UInt64 null_value = hierarchical_attribute.null_value.get(); + std::optional null_value; + + if (!hierarchical_attribute.null_value.isNull()) + null_value = hierarchical_attribute.null_value.get(); ColumnPtr key_to_request_column = ColumnVector::create(); auto * key_to_request_column_typed = static_cast *>(key_to_request_column->assumeMutable().get()); UInt64 key_not_in_storage_value = std::numeric_limits::max(); ColumnPtr key_not_in_storage_default_value_column = ColumnVector::create(initial_keys_to_request.size(), key_not_in_storage_value); + if (hierarchical_attribute.is_nullable) + key_not_in_storage_default_value_column = makeNullable(key_not_in_storage_default_value_column); PaddedPODArray & keys_to_request = key_to_request_column_typed->getData(); keys_to_request.assign(initial_keys_to_request); @@ -52,20 +65,36 @@ namespace PaddedPODArray next_keys_to_request; HashSet already_requested_keys; - HashMap child_to_parent_key; + ChildToParentHierarchicalContext context; + + if (hierarchical_attribute.is_nullable) + context.child_key_parent_key_is_null = HashSet(); + + HashMap & child_key_to_parent_key = context.child_key_to_parent_key; + std::optional> & child_key_parent_key_is_null = context.child_key_parent_key_is_null; while (!keys_to_request.empty()) { - child_to_parent_key.reserve(child_to_parent_key.size() + keys_to_request.size()); + child_key_to_parent_key.reserve(keys_to_request.size()); - auto parent_key_column = dictionary->getColumn( + auto hierarchical_attribute_parent_key_column = dictionary->getColumn( hierarchical_attribute.name, hierarchical_attribute.type, {key_to_request_column}, {key_type}, key_not_in_storage_default_value_column); - const auto * parent_key_column_typed = checkAndGetColumn>(*parent_key_column); + const PaddedPODArray * in_key_column_nullable_mask = nullptr; + + ColumnPtr parent_key_column_non_null = hierarchical_attribute_parent_key_column; + if (hierarchical_attribute_parent_key_column->isNullable()) + { + const auto * parent_key_column_typed = assert_cast(hierarchical_attribute_parent_key_column.get()); + in_key_column_nullable_mask = &parent_key_column_typed->getNullMapData(); + parent_key_column_non_null = parent_key_column_typed->getNestedColumnPtr(); + } + + const auto * parent_key_column_typed = checkAndGetColumn>(*parent_key_column_non_null); if (!parent_key_column_typed) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Parent key column should be UInt64. Actual {}", @@ -74,17 +103,24 @@ namespace const auto & parent_keys = parent_key_column_typed->getData(); next_keys_to_request.clear(); - for (size_t i = 0; i < keys_to_request.size(); ++i) + size_t keys_to_request_size = keys_to_request.size(); + for (size_t i = 0; i < keys_to_request_size; ++i) { - auto key = keys_to_request[i]; + auto child_key = keys_to_request[i]; auto parent_key = parent_keys[i]; + if (unlikely(in_key_column_nullable_mask) && (*in_key_column_nullable_mask)[i]) + { + child_key_parent_key_is_null->insert(child_key); + continue; + } + if (parent_key == key_not_in_storage_value) continue; - child_to_parent_key[key] = parent_key; + child_key_to_parent_key[child_key] = parent_key; - if (parent_key == null_value || + if ((null_value && parent_key == *null_value) || already_requested_keys.find(parent_key) != nullptr) continue; @@ -96,7 +132,7 @@ namespace keys_to_request.assign(next_keys_to_request); } - return child_to_parent_key; + return context; } } @@ -138,21 +174,33 @@ ColumnPtr getKeysHierarchyDefaultImplementation( const auto & hierarchical_attribute = dictionary_structure.attributes[hierarchical_attribute_index]; const PaddedPODArray & requested_keys = key_column_typed->getData(); - HashMap key_to_parent_key = getChildToParentHierarchyMapImpl(dictionary, hierarchical_attribute, requested_keys, key_type); + ChildToParentHierarchicalContext child_to_parent_hierarchical_context + = getChildToParentHierarchicalContext(dictionary, hierarchical_attribute, requested_keys, key_type); - auto is_key_valid_func = [&](auto & key) { return key_to_parent_key.find(key) != nullptr; }; + auto is_key_valid_func = [&](auto & key) + { + if (unlikely(child_to_parent_hierarchical_context.child_key_parent_key_is_null) + && child_to_parent_hierarchical_context.child_key_parent_key_is_null->find(key)) + return true; + + return child_to_parent_hierarchical_context.child_key_to_parent_key.find(key) != nullptr; + }; + + std::optional null_value; + + if (!hierarchical_attribute.null_value.isNull()) + null_value = hierarchical_attribute.null_value.get(); - UInt64 null_value = hierarchical_attribute.null_value.get(); auto get_parent_key_func = [&](auto & key) { std::optional result; - auto it = key_to_parent_key.find(key); - if (it == nullptr) { + + auto it = child_to_parent_hierarchical_context.child_key_to_parent_key.find(key); + if (it == nullptr) return result; - } UInt64 parent_key = it->getMapped(); - if (parent_key == null_value) + if (null_value && parent_key == *null_value) return result; result = parent_key; @@ -188,21 +236,33 @@ ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation( const auto & hierarchical_attribute = dictionary_structure.attributes[hierarchical_attribute_index]; const PaddedPODArray & requested_keys = key_column_typed->getData(); - HashMap key_to_parent_key = getChildToParentHierarchyMapImpl(dictionary, hierarchical_attribute, requested_keys, key_type); + ChildToParentHierarchicalContext child_to_parent_hierarchical_context + = getChildToParentHierarchicalContext(dictionary, hierarchical_attribute, requested_keys, key_type); - auto is_key_valid_func = [&](auto & key) { return key_to_parent_key.find(key) != nullptr; }; + auto is_key_valid_func = [&](auto & key) + { + if (unlikely(child_to_parent_hierarchical_context.child_key_parent_key_is_null) + && child_to_parent_hierarchical_context.child_key_parent_key_is_null->find(key)) + return true; + + return child_to_parent_hierarchical_context.child_key_to_parent_key.find(key) != nullptr; + }; + + std::optional null_value; + + if (!hierarchical_attribute.null_value.isNull()) + null_value = hierarchical_attribute.null_value.get(); - UInt64 null_value = hierarchical_attribute.null_value.get(); auto get_parent_key_func = [&](auto & key) { std::optional result; - auto it = key_to_parent_key.find(key); - if (it == nullptr) { + + auto it = child_to_parent_hierarchical_context.child_key_to_parent_key.find(key); + if (it == nullptr) return result; - } UInt64 parent_key = it->getMapped(); - if (parent_key == null_value) + if (null_value && parent_key == *null_value) return result; result = parent_key; diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index ea7a9a60105..7f8353072cb 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -985,7 +985,7 @@ private: const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); auto key_column = ColumnWithTypeAndName{arguments[1].column, arguments[1].type, arguments[1].name}; - auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type); + auto key_column_casted = castColumnAccurate(key_column, removeNullable(hierarchical_attribute.type)); ColumnPtr result = dictionary->getHierarchy(key_column_casted, hierarchical_attribute.type); @@ -1042,8 +1042,9 @@ private: auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[2].name}; auto in_key_column = ColumnWithTypeAndName{arguments[2].column->convertToFullColumnIfConst(), arguments[2].type, arguments[2].name}; - auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type); - auto in_key_column_casted = castColumnAccurate(in_key_column, hierarchical_attribute.type); + auto hierarchical_attribute_non_nullable = removeNullable(hierarchical_attribute.type); + auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute_non_nullable); + auto in_key_column_casted = castColumnAccurate(in_key_column, hierarchical_attribute_non_nullable); ColumnPtr result = dictionary->isInHierarchy(key_column_casted, in_key_column_casted, hierarchical_attribute.type); diff --git a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference index 951540b7b44..132c03a136f 100644 --- a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference +++ b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference @@ -43,3 +43,33 @@ Get is in hierarchy 1 1 0 +Cache dictionary +Get hierarchy +[0] +[1,0] +[2,1,0] +[3] +[4,2,1,0] +[] +Get is in hierarchy +1 +1 +1 +1 +1 +0 +Direct dictionary +Get hierarchy +[0] +[1,0] +[2,1,0] +[3] +[4,2,1,0] +[] +Get is in hierarchy +1 +1 +1 +1 +1 +0 diff --git a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql index 444384b4417..b95c2a22348 100644 --- a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql +++ b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql @@ -64,4 +64,41 @@ SELECT dictIsIn('hierachical_hashed_array_dictionary', number, number) FROM syst DROP DICTIONARY hierachical_hashed_array_dictionary; +DROP DICTIONARY IF EXISTS hierachical_cache_dictionary; +CREATE DICTIONARY hierachical_cache_dictionary +( + id UInt64, + parent_id Nullable(UInt64) HIERARCHICAL +) PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table')) +LAYOUT(CACHE(SIZE_IN_CELLS 10)) +LIFETIME(0); + +SELECT 'Cache dictionary'; + +SELECT 'Get hierarchy'; +SELECT dictGetHierarchy('hierachical_cache_dictionary', number) FROM system.numbers LIMIT 6; +SELECT 'Get is in hierarchy'; +SELECT dictIsIn('hierachical_cache_dictionary', number, number) FROM system.numbers LIMIT 6; + +DROP DICTIONARY hierachical_cache_dictionary; + +DROP DICTIONARY IF EXISTS hierachical_direct_dictionary; +CREATE DICTIONARY hierachical_direct_dictionary +( + id UInt64, + parent_id Nullable(UInt64) HIERARCHICAL +) PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table')) +LAYOUT(DIRECT()); + +SELECT 'Direct dictionary'; + +SELECT 'Get hierarchy'; +SELECT dictGetHierarchy('hierachical_direct_dictionary', number) FROM system.numbers LIMIT 6; +SELECT 'Get is in hierarchy'; +SELECT dictIsIn('hierachical_direct_dictionary', number, number) FROM system.numbers LIMIT 6; + +DROP DICTIONARY hierachical_direct_dictionary; + DROP TABLE test_hierarhical_table; From 6db5c08fdee32e66d0458d866b2b997167dee5fa Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 3 Jun 2022 17:36:16 +0200 Subject: [PATCH 3/4] Functions dictGetChildren, dictGetDescendants added support for nullable parent key --- src/Dictionaries/FlatDictionary.cpp | 3 + src/Dictionaries/HashedArrayDictionary.cpp | 6 +- src/Dictionaries/HashedDictionary.cpp | 8 +-- src/Functions/FunctionsExternalDictionaries.h | 7 +-- ...dictionaries_nullable_parent_key.reference | 63 +++++++++++++++++++ ...hical_dictionaries_nullable_parent_key.sql | 19 ++++++ 6 files changed, 97 insertions(+), 9 deletions(-) diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index 6527269d98c..0f75bc27526 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -298,6 +298,9 @@ DictionaryHierarchyParentToChildIndexPtr FlatDictionary::getHierarchicalIndex() if (!loaded_keys[child_key]) continue; + if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(child_key)) + continue; + auto parent_key = parent_keys[child_key]; parent_to_child[parent_key].emplace_back(child_key); } diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 2c1eaf5a0df..d702a02bc2e 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -331,8 +331,12 @@ DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionary> parent_to_child; parent_to_child.reserve(index_to_key.size()); - for (size_t i = 0; i < parent_keys_container.size(); ++i) + size_t parent_keys_container_size = parent_keys_container.size(); + for (size_t i = 0; i < parent_keys_container_size; ++i) { + if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[i]) + continue; + const auto * it = index_to_key.find(i); if (it == index_to_key.end()) continue; diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index 76492027e9b..77e0e1c7907 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -363,13 +363,13 @@ DictionaryHierarchyParentToChildIndexPtr HashedDictionary & parent_keys = std::get>(hierarchical_attribute.container); + const CollectionType & child_key_to_parent_key_map = std::get>(hierarchical_attribute.container); HashMap> parent_to_child; - parent_to_child.reserve(parent_keys.size()); + parent_to_child.reserve(child_key_to_parent_key_map.size()); - for (const auto & [key, value] : parent_keys) - parent_to_child[value].emplace_back(key); + for (const auto & [child_key, parent_key] : child_key_to_parent_key_map) + parent_to_child[parent_key].emplace_back(child_key); return std::make_shared(parent_to_child); } diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 7f8353072cb..6435948b870 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -1083,10 +1083,9 @@ public: const auto & hierarchical_attribute = dictionary_helper->getDictionaryHierarchicalAttribute(dictionary); auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[1].name}; - auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type); + auto key_column_casted = castColumnAccurate(key_column, removeNullable(hierarchical_attribute.type)); - ColumnPtr result = dictionary->getDescendants(key_column_casted, hierarchical_attribute.type, level, hierarchical_parent_to_child_index); - return result; + return dictionary->getDescendants(key_column_casted, removeNullable(hierarchical_attribute.type), level, hierarchical_parent_to_child_index); } String name; @@ -1235,7 +1234,7 @@ public: auto dictionary = dictionary_helper->getDictionary(arguments[0].column); const auto & hierarchical_attribute = dictionary_helper->getDictionaryHierarchicalAttribute(dictionary); - return std::make_shared(hierarchical_attribute.type); + return std::make_shared(removeNullable(hierarchical_attribute.type)); } std::shared_ptr dictionary_helper; diff --git a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference index 132c03a136f..60d9fb16c5f 100644 --- a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference +++ b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.reference @@ -13,6 +13,27 @@ Get is in hierarchy 1 1 0 +Get children +[1] +[2] +[4] +[] +[] +[] +Get all descendants +[1,2,4] +[2,4] +[4] +[] +[] +[] +Get descendants at first level +[1] +[2] +[4] +[] +[] +[] Hashed dictionary Get hierarchy [0] @@ -28,6 +49,27 @@ Get is in hierarchy 1 1 0 +Get children +[1] +[2] +[4] +[] +[] +[] +Get all descendants +[1,2,4] +[2,4] +[4] +[] +[] +[] +Get descendants at first level +[1] +[2] +[4] +[] +[] +[] HashedArray dictionary Get hierarchy [0] @@ -43,6 +85,27 @@ Get is in hierarchy 1 1 0 +Get children +[1] +[2] +[4] +[] +[] +[] +Get all descendants +[1,2,4] +[2,4] +[4] +[] +[] +[] +Get descendants at first level +[1] +[2] +[4] +[] +[] +[] Cache dictionary Get hierarchy [0] diff --git a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql index b95c2a22348..d477d58d398 100644 --- a/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql +++ b/tests/queries/0_stateless/02316_hierarchical_dictionaries_nullable_parent_key.sql @@ -23,6 +23,12 @@ SELECT 'Get hierarchy'; SELECT dictGetHierarchy('hierachical_flat_dictionary', number) FROM system.numbers LIMIT 6; SELECT 'Get is in hierarchy'; SELECT dictIsIn('hierachical_flat_dictionary', number, number) FROM system.numbers LIMIT 6; +SELECT 'Get children'; +SELECT dictGetChildren('hierachical_flat_dictionary', number) FROM system.numbers LIMIT 6; +SELECT 'Get all descendants'; +SELECT dictGetDescendants('hierachical_flat_dictionary', number) FROM system.numbers LIMIT 6; +SELECT 'Get descendants at first level'; +SELECT dictGetDescendants('hierachical_flat_dictionary', number, 1) FROM system.numbers LIMIT 6; DROP DICTIONARY hierachical_flat_dictionary; @@ -42,6 +48,12 @@ SELECT 'Get hierarchy'; SELECT dictGetHierarchy('hierachical_hashed_dictionary', number) FROM system.numbers LIMIT 6; SELECT 'Get is in hierarchy'; SELECT dictIsIn('hierachical_hashed_dictionary', number, number) FROM system.numbers LIMIT 6; +SELECT 'Get children'; +SELECT dictGetChildren('hierachical_hashed_dictionary', number) FROM system.numbers LIMIT 6; +SELECT 'Get all descendants'; +SELECT dictGetDescendants('hierachical_hashed_dictionary', number) FROM system.numbers LIMIT 6; +SELECT 'Get descendants at first level'; +SELECT dictGetDescendants('hierachical_hashed_dictionary', number, 1) FROM system.numbers LIMIT 6; DROP DICTIONARY hierachical_hashed_dictionary; @@ -61,6 +73,12 @@ SELECT 'Get hierarchy'; SELECT dictGetHierarchy('hierachical_hashed_array_dictionary', number) FROM system.numbers LIMIT 6; SELECT 'Get is in hierarchy'; SELECT dictIsIn('hierachical_hashed_array_dictionary', number, number) FROM system.numbers LIMIT 6; +SELECT 'Get children'; +SELECT dictGetChildren('hierachical_hashed_array_dictionary', number) FROM system.numbers LIMIT 6; +SELECT 'Get all descendants'; +SELECT dictGetDescendants('hierachical_hashed_array_dictionary', number) FROM system.numbers LIMIT 6; +SELECT 'Get descendants at first level'; +SELECT dictGetDescendants('hierachical_hashed_array_dictionary', number, 1) FROM system.numbers LIMIT 6; DROP DICTIONARY hierachical_hashed_array_dictionary; @@ -81,6 +99,7 @@ SELECT dictGetHierarchy('hierachical_cache_dictionary', number) FROM system.numb SELECT 'Get is in hierarchy'; SELECT dictIsIn('hierachical_cache_dictionary', number, number) FROM system.numbers LIMIT 6; + DROP DICTIONARY hierachical_cache_dictionary; DROP DICTIONARY IF EXISTS hierachical_direct_dictionary; From 3fd82949073fb3b69fd26477988cba7504dc43aa Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 3 Jun 2022 18:05:09 +0200 Subject: [PATCH 4/4] Fixed style check --- src/Dictionaries/HierarchyDictionariesUtils.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Dictionaries/HierarchyDictionariesUtils.cpp b/src/Dictionaries/HierarchyDictionariesUtils.cpp index 80f0cf9b3f0..fd59a0c37db 100644 --- a/src/Dictionaries/HierarchyDictionariesUtils.cpp +++ b/src/Dictionaries/HierarchyDictionariesUtils.cpp @@ -29,7 +29,8 @@ namespace detail namespace { - struct ChildToParentHierarchicalContext { + struct ChildToParentHierarchicalContext + { HashMap child_key_to_parent_key; std::optional> child_key_parent_key_is_null; };