Merge pull request #37805 from kitaisreal/dictionaries-hierarchy-nullable-key-support

Hierarchical dictinaries support nullable parent key
This commit is contained in:
Maksim Kita 2022-06-08 12:36:09 +02:00 committed by GitHub
commit 4e160105b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 566 additions and 107 deletions

View File

@ -184,7 +184,11 @@ ColumnPtr FlatDictionary::getHierarchy(ColumnPtr key_column, const DataTypePtr &
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const UInt64 null_value = dictionary_attribute.null_value.get<UInt64>();
std::optional<UInt64> null_value;
if (!dictionary_attribute.null_value.isNull())
null_value = dictionary_attribute.null_value.get<UInt64>();
const ContainerType<UInt64> & parent_keys = std::get<ContainerType<UInt64>>(hierarchical_attribute.container);
auto is_key_valid_func = [&, this](auto & key) { return key < loaded_keys.size() && loaded_keys[key]; };
@ -193,13 +197,26 @@ ColumnPtr FlatDictionary::getHierarchy(ColumnPtr key_column, const DataTypePtr &
auto get_parent_key_func = [&, this](auto & hierarchy_key)
{
std::optional<UInt64> result;
bool is_key_valid = hierarchy_key < loaded_keys.size() && loaded_keys[hierarchy_key];
std::optional<UInt64> result = is_key_valid ? std::make_optional(parent_keys[hierarchy_key]) : std::nullopt;
keys_found += result.has_value();
if (!is_key_valid)
return result;
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
return result;
UInt64 parent_key = parent_keys[hierarchy_key];
if (null_value && *null_value == parent_key)
return result;
result = parent_key;
keys_found += 1;
return result;
};
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_key_func);
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, is_key_valid_func, get_parent_key_func);
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
@ -222,7 +239,11 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy(
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const UInt64 null_value = dictionary_attribute.null_value.get<UInt64>();
std::optional<UInt64> null_value;
if (!dictionary_attribute.null_value.isNull())
null_value = dictionary_attribute.null_value.get<UInt64>();
const ContainerType<UInt64> & parent_keys = std::get<ContainerType<UInt64>>(hierarchical_attribute.container);
auto is_key_valid_func = [&, this](auto & key) { return key < loaded_keys.size() && loaded_keys[key]; };
@ -231,13 +252,26 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy(
auto get_parent_key_func = [&, this](auto & hierarchy_key)
{
std::optional<UInt64> result;
bool is_key_valid = hierarchy_key < loaded_keys.size() && loaded_keys[hierarchy_key];
std::optional<UInt64> result = is_key_valid ? std::make_optional(parent_keys[hierarchy_key]) : std::nullopt;
keys_found += result.has_value();
if (!is_key_valid)
return result;
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
return result;
UInt64 parent_key = parent_keys[hierarchy_key];
if (null_value && *null_value == parent_key)
return result;
result = parent_keys[hierarchy_key];
keys_found += 1;
return result;
};
auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_key_func);
auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_key_func);
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
@ -264,6 +298,9 @@ DictionaryHierarchyParentToChildIndexPtr FlatDictionary::getHierarchicalIndex()
if (!loaded_keys[child_key])
continue;
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(child_key))
continue;
auto parent_key = parent_keys[child_key];
parent_to_child[parent_key].emplace_back(child_key);
}

View File

@ -191,9 +191,12 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const auto & key_attribute_container = key_attribute.container;
std::optional<UInt64> null_value;
const UInt64 null_value = dictionary_attribute.null_value.template get<UInt64>();
if (!dictionary_attribute.null_value.isNull())
null_value = dictionary_attribute.null_value.get<UInt64>();
const auto & key_attribute_container = key_attribute.container;
const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); };
@ -206,15 +209,25 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key
auto it = key_attribute_container.find(hierarchy_key);
if (it != key_attribute_container.end())
result = parent_keys_container[it->getMapped()];
if (it == key_attribute_container.end())
return result;
keys_found += result.has_value();
size_t key_index = it->getMapped();
if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[key_index])
return result;
UInt64 parent_key = parent_keys_container[key_index];
if (null_value && *null_value == parent_key)
return result;
result = parent_key;
keys_found += 1;
return result;
};
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_func);
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, is_key_valid_func, get_parent_func);
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
@ -246,9 +259,12 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const auto & key_attribute_container = key_attribute.container;
std::optional<UInt64> null_value;
const UInt64 null_value = dictionary_attribute.null_value.template get<UInt64>();
if (!dictionary_attribute.null_value.isNull())
null_value = dictionary_attribute.null_value.get<UInt64>();
const auto & key_attribute_container = key_attribute.container;
const AttributeContainerType<UInt64> & parent_keys_container = std::get<AttributeContainerType<UInt64>>(hierarchical_attribute.container);
auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); };
@ -261,15 +277,25 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
auto it = key_attribute_container.find(hierarchy_key);
if (it != key_attribute_container.end())
result = parent_keys_container[it->getMapped()];
if (it == key_attribute_container.end())
return result;
keys_found += result.has_value();
size_t key_index = it->getMapped();
if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[key_index])
return result;
UInt64 parent_key = parent_keys_container[key_index];
if (null_value && *null_value == parent_key)
return result;
result = parent_key;
keys_found += 1;
return result;
};
auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_func);
auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_func);
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
@ -305,8 +331,12 @@ DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionary<dictionary_key
HashMap<UInt64, PaddedPODArray<UInt64>> parent_to_child;
parent_to_child.reserve(index_to_key.size());
for (size_t i = 0; i < parent_keys_container.size(); ++i)
size_t parent_keys_container_size = parent_keys_container.size();
for (size_t i = 0; i < parent_keys_container_size; ++i)
{
if (unlikely(hierarchical_attribute.is_index_null) && (*hierarchical_attribute.is_index_null)[i])
continue;
const auto * it = index_to_key.find(i);
if (it == index_to_key.end())
continue;

View File

@ -233,10 +233,20 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const UInt64 null_value = dictionary_attribute.null_value.template get<UInt64>();
const CollectionType<UInt64> & parent_keys_map = std::get<CollectionType<UInt64>>(hierarchical_attribute.container);
std::optional<UInt64> null_value;
auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); };
if (!dictionary_attribute.null_value.isNull())
null_value = dictionary_attribute.null_value.get<UInt64>();
const CollectionType<UInt64> & child_key_to_parent_key_map = std::get<CollectionType<UInt64>>(hierarchical_attribute.container);
auto is_key_valid_func = [&](auto & hierarchy_key)
{
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
return true;
return child_key_to_parent_key_map.find(hierarchy_key) != child_key_to_parent_key_map.end();
};
size_t keys_found = 0;
@ -244,17 +254,22 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
{
std::optional<UInt64> result;
auto it = parent_keys_map.find(hierarchy_key);
auto it = child_key_to_parent_key_map.find(hierarchy_key);
if (it != parent_keys_map.end())
result = getValueFromCell(it);
if (it == child_key_to_parent_key_map.end())
return result;
keys_found += result.has_value();
UInt64 parent_key = getValueFromCell(it);
if (null_value && *null_value == parent_key)
return result;
result = parent_key;
keys_found += 1;
return result;
};
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_func);
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, is_key_valid_func, get_parent_func);
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
@ -262,7 +277,9 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
return dictionary_hierarchy_array;
}
else
{
return nullptr;
}
}
template <DictionaryKeyType dictionary_key_type, bool sparse>
@ -273,6 +290,9 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::isInHierarchy(
{
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
{
if (key_column->isNullable())
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
PaddedPODArray<UInt64> keys_backup_storage;
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
@ -284,28 +304,43 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::isInHierarchy(
const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index];
auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const UInt64 null_value = dictionary_attribute.null_value.template get<UInt64>();
const CollectionType<UInt64> & parent_keys_map = std::get<CollectionType<UInt64>>(hierarchical_attribute.container);
std::optional<UInt64> null_value;
auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); };
if (!dictionary_attribute.null_value.isNull())
null_value = dictionary_attribute.null_value.get<UInt64>();
const CollectionType<UInt64> & child_key_to_parent_key_map = std::get<CollectionType<UInt64>>(hierarchical_attribute.container);
auto is_key_valid_func = [&](auto & hierarchy_key)
{
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
return true;
return child_key_to_parent_key_map.find(hierarchy_key) != child_key_to_parent_key_map.end();
};
size_t keys_found = 0;
auto get_parent_func = [&](auto & hierarchy_key)
auto get_parent_key_func = [&](auto & hierarchy_key)
{
std::optional<UInt64> result;
auto it = parent_keys_map.find(hierarchy_key);
auto it = child_key_to_parent_key_map.find(hierarchy_key);
if (it != parent_keys_map.end())
result = getValueFromCell(it);
if (it == child_key_to_parent_key_map.end())
return result;
keys_found += result.has_value();
UInt64 parent_key = getValueFromCell(it);
if (null_value && *null_value == parent_key)
return result;
result = parent_key;
keys_found += 1;
return result;
};
auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_func);
auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_key_func);
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
@ -326,13 +361,13 @@ DictionaryHierarchyParentToChildIndexPtr HashedDictionary<dictionary_key_type, s
size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index;
const auto & hierarchical_attribute = attributes[hierarchical_attribute_index];
const CollectionType<UInt64> & parent_keys = std::get<CollectionType<UInt64>>(hierarchical_attribute.container);
const CollectionType<UInt64> & child_key_to_parent_key_map = std::get<CollectionType<UInt64>>(hierarchical_attribute.container);
HashMap<UInt64, PaddedPODArray<UInt64>> parent_to_child;
parent_to_child.reserve(parent_keys.size());
parent_to_child.reserve(child_key_to_parent_key_map.size());
for (const auto & [key, value] : parent_keys)
parent_to_child[value].emplace_back(key);
for (const auto & [child_key, parent_key] : child_key_to_parent_key_map)
parent_to_child[parent_key].emplace_back(child_key);
return std::make_shared<DictionaryHierarchicalParentToChildIndex>(parent_to_child);
}

View File

@ -1,5 +1,8 @@
#include "HierarchyDictionariesUtils.h"
#include <Columns/ColumnNullable.h>
namespace DB
{
@ -26,25 +29,36 @@ namespace detail
namespace
{
struct ChildToParentHierarchicalContext
{
HashMap<UInt64, UInt64> child_key_to_parent_key;
std::optional<HashSet<UInt64>> child_key_parent_key_is_null;
};
/** In case of cache or direct dictionary we does not have structure with child to parent representation.
* This function build such structure calling getColumn for initial keys to request and for next keys in hierarchy,
* until all keys are requested or result key is null value.
* To distinguish null value key and key that is not present in dictionary, we use special default value column
* with max UInt64 value, if result column key has such value we assume that current key is not presented in dictionary storage.
*/
HashMap<UInt64, UInt64> getChildToParentHierarchyMapImpl(
ChildToParentHierarchicalContext getChildToParentHierarchicalContext(
const IDictionary * dictionary,
const DictionaryAttribute & hierarchical_attribute,
const PaddedPODArray<UInt64> & initial_keys_to_request,
const DataTypePtr & key_type)
{
UInt64 null_value = hierarchical_attribute.null_value.get<UInt64>();
std::optional<UInt64> null_value;
if (!hierarchical_attribute.null_value.isNull())
null_value = hierarchical_attribute.null_value.get<UInt64>();
ColumnPtr key_to_request_column = ColumnVector<UInt64>::create();
auto * key_to_request_column_typed = static_cast<ColumnVector<UInt64> *>(key_to_request_column->assumeMutable().get());
UInt64 key_not_in_storage_value = std::numeric_limits<UInt64>::max();
ColumnPtr key_not_in_storage_default_value_column = ColumnVector<UInt64>::create(initial_keys_to_request.size(), key_not_in_storage_value);
if (hierarchical_attribute.is_nullable)
key_not_in_storage_default_value_column = makeNullable(key_not_in_storage_default_value_column);
PaddedPODArray<UInt64> & keys_to_request = key_to_request_column_typed->getData();
keys_to_request.assign(initial_keys_to_request);
@ -52,20 +66,36 @@ namespace
PaddedPODArray<UInt64> next_keys_to_request;
HashSet<UInt64> already_requested_keys;
HashMap<UInt64, UInt64> child_to_parent_key;
ChildToParentHierarchicalContext context;
if (hierarchical_attribute.is_nullable)
context.child_key_parent_key_is_null = HashSet<UInt64>();
HashMap<UInt64, UInt64> & child_key_to_parent_key = context.child_key_to_parent_key;
std::optional<HashSet<UInt64>> & child_key_parent_key_is_null = context.child_key_parent_key_is_null;
while (!keys_to_request.empty())
{
child_to_parent_key.reserve(child_to_parent_key.size() + keys_to_request.size());
child_key_to_parent_key.reserve(keys_to_request.size());
auto parent_key_column = dictionary->getColumn(
auto hierarchical_attribute_parent_key_column = dictionary->getColumn(
hierarchical_attribute.name,
hierarchical_attribute.type,
{key_to_request_column},
{key_type},
key_not_in_storage_default_value_column);
const auto * parent_key_column_typed = checkAndGetColumn<ColumnVector<UInt64>>(*parent_key_column);
const PaddedPODArray<UInt8> * in_key_column_nullable_mask = nullptr;
ColumnPtr parent_key_column_non_null = hierarchical_attribute_parent_key_column;
if (hierarchical_attribute_parent_key_column->isNullable())
{
const auto * parent_key_column_typed = assert_cast<const ColumnNullable *>(hierarchical_attribute_parent_key_column.get());
in_key_column_nullable_mask = &parent_key_column_typed->getNullMapData();
parent_key_column_non_null = parent_key_column_typed->getNestedColumnPtr();
}
const auto * parent_key_column_typed = checkAndGetColumn<ColumnVector<UInt64>>(*parent_key_column_non_null);
if (!parent_key_column_typed)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Parent key column should be UInt64. Actual {}",
@ -74,17 +104,24 @@ namespace
const auto & parent_keys = parent_key_column_typed->getData();
next_keys_to_request.clear();
for (size_t i = 0; i < keys_to_request.size(); ++i)
size_t keys_to_request_size = keys_to_request.size();
for (size_t i = 0; i < keys_to_request_size; ++i)
{
auto key = keys_to_request[i];
auto child_key = keys_to_request[i];
auto parent_key = parent_keys[i];
if (unlikely(in_key_column_nullable_mask) && (*in_key_column_nullable_mask)[i])
{
child_key_parent_key_is_null->insert(child_key);
continue;
}
if (parent_key == key_not_in_storage_value)
continue;
child_to_parent_key[key] = parent_key;
child_key_to_parent_key[child_key] = parent_key;
if (parent_key == null_value ||
if ((null_value && parent_key == *null_value) ||
already_requested_keys.find(parent_key) != nullptr)
continue;
@ -96,7 +133,7 @@ namespace
keys_to_request.assign(next_keys_to_request);
}
return child_to_parent_key;
return context;
}
}
@ -138,22 +175,41 @@ ColumnPtr getKeysHierarchyDefaultImplementation(
const auto & hierarchical_attribute = dictionary_structure.attributes[hierarchical_attribute_index];
const PaddedPODArray<UInt64> & requested_keys = key_column_typed->getData();
HashMap<UInt64, UInt64> key_to_parent_key = getChildToParentHierarchyMapImpl(dictionary, hierarchical_attribute, requested_keys, key_type);
ChildToParentHierarchicalContext child_to_parent_hierarchical_context
= getChildToParentHierarchicalContext(dictionary, hierarchical_attribute, requested_keys, key_type);
auto is_key_valid_func = [&](auto & key) { return key_to_parent_key.find(key) != nullptr; };
auto is_key_valid_func = [&](auto & key)
{
if (unlikely(child_to_parent_hierarchical_context.child_key_parent_key_is_null)
&& child_to_parent_hierarchical_context.child_key_parent_key_is_null->find(key))
return true;
return child_to_parent_hierarchical_context.child_key_to_parent_key.find(key) != nullptr;
};
std::optional<UInt64> null_value;
if (!hierarchical_attribute.null_value.isNull())
null_value = hierarchical_attribute.null_value.get<UInt64>();
auto get_parent_key_func = [&](auto & key)
{
auto it = key_to_parent_key.find(key);
std::optional<UInt64> result = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
valid_keys += result.has_value();
std::optional<UInt64> result;
auto it = child_to_parent_hierarchical_context.child_key_to_parent_key.find(key);
if (it == nullptr)
return result;
UInt64 parent_key = it->getMapped();
if (null_value && parent_key == *null_value)
return result;
result = parent_key;
valid_keys += 1;
return result;
};
UInt64 null_value = hierarchical_attribute.null_value.get<UInt64>();
auto dictionary_hierarchy_array = getKeysHierarchyArray(requested_keys, null_value, is_key_valid_func, get_parent_key_func);
return dictionary_hierarchy_array;
return getKeysHierarchyArray(requested_keys, is_key_valid_func, get_parent_key_func);
}
ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation(
@ -181,23 +237,42 @@ ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation(
const auto & hierarchical_attribute = dictionary_structure.attributes[hierarchical_attribute_index];
const PaddedPODArray<UInt64> & requested_keys = key_column_typed->getData();
HashMap<UInt64, UInt64> key_to_parent_key = getChildToParentHierarchyMapImpl(dictionary, hierarchical_attribute, requested_keys, key_type);
ChildToParentHierarchicalContext child_to_parent_hierarchical_context
= getChildToParentHierarchicalContext(dictionary, hierarchical_attribute, requested_keys, key_type);
auto is_key_valid_func = [&](auto & key) { return key_to_parent_key.find(key) != nullptr; };
auto is_key_valid_func = [&](auto & key)
{
if (unlikely(child_to_parent_hierarchical_context.child_key_parent_key_is_null)
&& child_to_parent_hierarchical_context.child_key_parent_key_is_null->find(key))
return true;
return child_to_parent_hierarchical_context.child_key_to_parent_key.find(key) != nullptr;
};
std::optional<UInt64> null_value;
if (!hierarchical_attribute.null_value.isNull())
null_value = hierarchical_attribute.null_value.get<UInt64>();
auto get_parent_key_func = [&](auto & key)
{
auto it = key_to_parent_key.find(key);
std::optional<UInt64> result = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
valid_keys += result.has_value();
std::optional<UInt64> result;
auto it = child_to_parent_hierarchical_context.child_key_to_parent_key.find(key);
if (it == nullptr)
return result;
UInt64 parent_key = it->getMapped();
if (null_value && parent_key == *null_value)
return result;
result = parent_key;
valid_keys += 1;
return result;
};
UInt64 null_value = hierarchical_attribute.null_value.get<UInt64>();
const auto & in_keys = in_key_column_typed->getData();
auto result = getKeysIsInHierarchyColumn(requested_keys, in_keys, null_value, is_key_valid_func, get_parent_key_func);
return result;
return getKeysIsInHierarchyColumn(requested_keys, in_keys, is_key_valid_func, get_parent_key_func);
}
}

View File

@ -33,7 +33,7 @@ public:
keys.reserve(parent_to_children_map_size);
parent_to_children_keys_range.reserve(parent_to_children_map_size);
for (auto & [parent, children] : parent_to_children_map_)
for (const auto & [parent, children] : parent_to_children_map_)
{
size_t keys_size = keys.size();
UInt32 start_index = static_cast<UInt32>(keys_size);
@ -97,7 +97,6 @@ namespace detail
template <typename IsKeyValidFunc, typename GetParentKeyFunc>
ElementsAndOffsets getHierarchy(
const PaddedPODArray<UInt64> & keys,
const UInt64 & hierarchy_null_value,
IsKeyValidFunc && is_key_valid_func,
GetParentKeyFunc && get_parent_key_func)
{
@ -156,7 +155,7 @@ namespace detail
break;
}
if (hierarchy_key == hierarchy_null_value || current_hierarchy_depth >= DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH)
if (current_hierarchy_depth >= DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH)
break;
already_processes_keys_to_offset[hierarchy_key] = {offsets.size(), current_hierarchy_depth};
@ -190,7 +189,6 @@ namespace detail
PaddedPODArray<UInt8> getIsInHierarchy(
const PaddedPODArray<UInt64> & keys,
const PaddedPODArray<UInt64> & in_keys,
const UInt64 & hierarchy_null_value,
IsKeyValidFunc && is_key_valid_func,
GetParentKeyFunc && get_parent_func)
{
@ -201,7 +199,6 @@ namespace detail
detail::ElementsAndOffsets hierarchy = detail::getHierarchy(
keys,
hierarchy_null_value,
std::forward<IsKeyValidFunc>(is_key_valid_func),
std::forward<GetParentKeyFunc>(get_parent_func));
@ -213,7 +210,7 @@ namespace detail
size_t i_elements_start = i > 0 ? offsets[i - 1] : 0;
size_t i_elements_end = offsets[i];
auto & key_to_find = in_keys[i];
const auto & key_to_find = in_keys[i];
const auto * begin = elements.begin() + i_elements_start;
const auto * end = elements.begin() + i_elements_end;
@ -263,8 +260,8 @@ namespace detail
Strategy strategy,
size_t & valid_keys)
{
auto & parent_to_children_keys_range = parent_to_child_index.parent_to_children_keys_range;
auto & children_keys = parent_to_child_index.keys;
const auto & parent_to_children_keys_range = parent_to_child_index.parent_to_children_keys_range;
const auto & children_keys = parent_to_child_index.keys;
/// If strategy is GetAllDescendantsStrategy we try to cache and later reuse previously calculated descendants.
/// If strategy is GetDescendantsAtSpecificLevelStrategy we does not use cache strategy.
@ -436,13 +433,11 @@ namespace detail
template <typename KeyType, typename IsKeyValidFunc, typename GetParentKeyFunc>
ColumnPtr getKeysHierarchyArray(
const PaddedPODArray<KeyType> & keys,
const KeyType & hierarchy_null_value,
IsKeyValidFunc && is_key_valid_func,
GetParentKeyFunc && get_parent_func)
{
auto elements_and_offsets = detail::getHierarchy(
keys,
hierarchy_null_value,
std::forward<IsKeyValidFunc>(is_key_valid_func),
std::forward<GetParentKeyFunc>(get_parent_func));
@ -454,14 +449,12 @@ template <typename KeyType, typename IsKeyValidFunc, typename GetParentKeyFunc>
ColumnUInt8::Ptr getKeysIsInHierarchyColumn(
const PaddedPODArray<KeyType> & hierarchy_keys,
const PaddedPODArray<KeyType> & hierarchy_in_keys,
const KeyType & hierarchy_null_value,
IsKeyValidFunc && is_key_valid_func,
GetParentKeyFunc && get_parent_func)
{
auto is_in_hierarchy_data = detail::getIsInHierarchy(
hierarchy_keys,
hierarchy_in_keys,
hierarchy_null_value,
std::forward<IsKeyValidFunc>(is_key_valid_func),
std::forward<GetParentKeyFunc>(get_parent_func));

View File

@ -17,19 +17,26 @@ TEST(HierarchyDictionariesUtils, getHierarchy)
auto is_key_valid_func = [&](auto key) { return child_to_parent.find(key) != nullptr; };
UInt64 hierarchy_null_value_key = 0;
auto get_parent_key_func = [&](auto key)
{
std::optional<UInt64> result;
auto it = child_to_parent.find(key);
std::optional<UInt64> value = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
return value;
if (it == nullptr)
return result;
UInt64 parent_key = it->getMapped();
if (parent_key == hierarchy_null_value_key)
return result;
result = parent_key;
return result;
};
UInt64 hierarchy_null_value_key = 0;
PaddedPODArray<UInt64> keys = {1, 2, 3, 4, 5};
auto result = DB::detail::getHierarchy(
keys,
hierarchy_null_value_key,
is_key_valid_func,
get_parent_key_func);
@ -49,19 +56,26 @@ TEST(HierarchyDictionariesUtils, getHierarchy)
auto is_key_valid_func = [&](auto key) { return child_to_parent.find(key) != nullptr; };
UInt64 hierarchy_null_value_key = 0;
auto get_parent_key_func = [&](auto key)
{
std::optional<UInt64> result;
auto it = child_to_parent.find(key);
std::optional<UInt64> value = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
return value;
if (it == nullptr)
return result;
UInt64 parent_key = it->getMapped();
if (parent_key == hierarchy_null_value_key)
return result;
result = parent_key;
return result;
};
UInt64 hierarchy_null_value_key = 0;
PaddedPODArray<UInt64> keys = {1, 2, 3};
auto result = DB::detail::getHierarchy(
keys,
hierarchy_null_value_key,
is_key_valid_func,
get_parent_key_func);
@ -87,21 +101,28 @@ TEST(HierarchyDictionariesUtils, getIsInHierarchy)
auto is_key_valid_func = [&](auto key) { return child_to_parent.find(key) != nullptr; };
UInt64 hierarchy_null_value_key = 0;
auto get_parent_key_func = [&](auto key)
{
std::optional<UInt64> result;
auto it = child_to_parent.find(key);
std::optional<UInt64> value = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
return value;
if (it == nullptr)
return result;
UInt64 parent_key = it->getMapped();
if (parent_key == hierarchy_null_value_key)
return result;
result = parent_key;
return result;
};
UInt64 hierarchy_null_value_key = 0;
PaddedPODArray<UInt64> keys = {1, 2, 3, 4, 5};
PaddedPODArray<UInt64> keys_in = {1, 1, 1, 2, 5};
PaddedPODArray<UInt8> actual = DB::detail::getIsInHierarchy(
keys,
keys_in,
hierarchy_null_value_key,
is_key_valid_func,
get_parent_key_func);
@ -119,21 +140,28 @@ TEST(HierarchyDictionariesUtils, getIsInHierarchy)
return child_to_parent.find(key) != nullptr;
};
UInt64 hierarchy_null_value_key = 0;
auto get_parent_key_func = [&](auto key)
{
std::optional<UInt64> result;
auto it = child_to_parent.find(key);
std::optional<UInt64> value = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
return value;
if (it == nullptr)
return result;
UInt64 parent_key = it->getMapped();
if (parent_key == hierarchy_null_value_key)
return result;
result = parent_key;
return result;
};
UInt64 hierarchy_null_value_key = 0;
PaddedPODArray<UInt64> keys = {1, 2, 3};
PaddedPODArray<UInt64> keys_in = {1, 2, 3};
PaddedPODArray<UInt8> actual = DB::detail::getIsInHierarchy(
keys,
keys_in,
hierarchy_null_value_key,
is_key_valid_func,
get_parent_key_func);

View File

@ -973,7 +973,7 @@ private:
auto dictionary = helper.getDictionary(arguments[0].column);
const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary);
return std::make_shared<DataTypeArray>(hierarchical_attribute.type);
return std::make_shared<DataTypeArray>(removeNullable(hierarchical_attribute.type));
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
@ -985,7 +985,7 @@ private:
const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary);
auto key_column = ColumnWithTypeAndName{arguments[1].column, arguments[1].type, arguments[1].name};
auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type);
auto key_column_casted = castColumnAccurate(key_column, removeNullable(hierarchical_attribute.type));
ColumnPtr result = dictionary->getHierarchy(key_column_casted, hierarchical_attribute.type);
@ -1042,8 +1042,9 @@ private:
auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[2].name};
auto in_key_column = ColumnWithTypeAndName{arguments[2].column->convertToFullColumnIfConst(), arguments[2].type, arguments[2].name};
auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type);
auto in_key_column_casted = castColumnAccurate(in_key_column, hierarchical_attribute.type);
auto hierarchical_attribute_non_nullable = removeNullable(hierarchical_attribute.type);
auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute_non_nullable);
auto in_key_column_casted = castColumnAccurate(in_key_column, hierarchical_attribute_non_nullable);
ColumnPtr result = dictionary->isInHierarchy(key_column_casted, in_key_column_casted, hierarchical_attribute.type);
@ -1082,10 +1083,9 @@ public:
const auto & hierarchical_attribute = dictionary_helper->getDictionaryHierarchicalAttribute(dictionary);
auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[1].name};
auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type);
auto key_column_casted = castColumnAccurate(key_column, removeNullable(hierarchical_attribute.type));
ColumnPtr result = dictionary->getDescendants(key_column_casted, hierarchical_attribute.type, level, hierarchical_parent_to_child_index);
return result;
return dictionary->getDescendants(key_column_casted, removeNullable(hierarchical_attribute.type), level, hierarchical_parent_to_child_index);
}
String name;
@ -1234,7 +1234,7 @@ public:
auto dictionary = dictionary_helper->getDictionary(arguments[0].column);
const auto & hierarchical_attribute = dictionary_helper->getDictionaryHierarchicalAttribute(dictionary);
return std::make_shared<DataTypeArray>(hierarchical_attribute.type);
return std::make_shared<DataTypeArray>(removeNullable(hierarchical_attribute.type));
}
std::shared_ptr<FunctionDictHelper> dictionary_helper;

View File

@ -0,0 +1,138 @@
Flat dictionary
Get hierarchy
[0]
[1,0]
[2,1,0]
[3]
[4,2,1,0]
[]
Get is in hierarchy
1
1
1
1
1
0
Get children
[1]
[2]
[4]
[]
[]
[]
Get all descendants
[1,2,4]
[2,4]
[4]
[]
[]
[]
Get descendants at first level
[1]
[2]
[4]
[]
[]
[]
Hashed dictionary
Get hierarchy
[0]
[1,0]
[2,1,0]
[3]
[4,2,1,0]
[]
Get is in hierarchy
1
1
1
1
1
0
Get children
[1]
[2]
[4]
[]
[]
[]
Get all descendants
[1,2,4]
[2,4]
[4]
[]
[]
[]
Get descendants at first level
[1]
[2]
[4]
[]
[]
[]
HashedArray dictionary
Get hierarchy
[0]
[1,0]
[2,1,0]
[3]
[4,2,1,0]
[]
Get is in hierarchy
1
1
1
1
1
0
Get children
[1]
[2]
[4]
[]
[]
[]
Get all descendants
[1,2,4]
[2,4]
[4]
[]
[]
[]
Get descendants at first level
[1]
[2]
[4]
[]
[]
[]
Cache dictionary
Get hierarchy
[0]
[1,0]
[2,1,0]
[3]
[4,2,1,0]
[]
Get is in hierarchy
1
1
1
1
1
0
Direct dictionary
Get hierarchy
[0]
[1,0]
[2,1,0]
[3]
[4,2,1,0]
[]
Get is in hierarchy
1
1
1
1
1
0

View File

@ -0,0 +1,123 @@
DROP TABLE IF EXISTS test_hierarhical_table;
CREATE TABLE test_hierarhical_table
(
id UInt64,
parent_id Nullable(UInt64)
) ENGINE=TinyLog;
INSERT INTO test_hierarhical_table VALUES (0, NULL), (1, 0), (2, 1), (3, NULL), (4, 2);
DROP DICTIONARY IF EXISTS hierachical_flat_dictionary;
CREATE DICTIONARY hierachical_flat_dictionary
(
id UInt64,
parent_id Nullable(UInt64) HIERARCHICAL
) PRIMARY KEY id
SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table'))
LAYOUT(FLAT())
LIFETIME(0);
SELECT 'Flat dictionary';
SELECT 'Get hierarchy';
SELECT dictGetHierarchy('hierachical_flat_dictionary', number) FROM system.numbers LIMIT 6;
SELECT 'Get is in hierarchy';
SELECT dictIsIn('hierachical_flat_dictionary', number, number) FROM system.numbers LIMIT 6;
SELECT 'Get children';
SELECT dictGetChildren('hierachical_flat_dictionary', number) FROM system.numbers LIMIT 6;
SELECT 'Get all descendants';
SELECT dictGetDescendants('hierachical_flat_dictionary', number) FROM system.numbers LIMIT 6;
SELECT 'Get descendants at first level';
SELECT dictGetDescendants('hierachical_flat_dictionary', number, 1) FROM system.numbers LIMIT 6;
DROP DICTIONARY hierachical_flat_dictionary;
DROP DICTIONARY IF EXISTS hierachical_hashed_dictionary;
CREATE DICTIONARY hierachical_hashed_dictionary
(
id UInt64,
parent_id Nullable(UInt64) HIERARCHICAL
) PRIMARY KEY id
SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table'))
LAYOUT(HASHED())
LIFETIME(0);
SELECT 'Hashed dictionary';
SELECT 'Get hierarchy';
SELECT dictGetHierarchy('hierachical_hashed_dictionary', number) FROM system.numbers LIMIT 6;
SELECT 'Get is in hierarchy';
SELECT dictIsIn('hierachical_hashed_dictionary', number, number) FROM system.numbers LIMIT 6;
SELECT 'Get children';
SELECT dictGetChildren('hierachical_hashed_dictionary', number) FROM system.numbers LIMIT 6;
SELECT 'Get all descendants';
SELECT dictGetDescendants('hierachical_hashed_dictionary', number) FROM system.numbers LIMIT 6;
SELECT 'Get descendants at first level';
SELECT dictGetDescendants('hierachical_hashed_dictionary', number, 1) FROM system.numbers LIMIT 6;
DROP DICTIONARY hierachical_hashed_dictionary;
DROP DICTIONARY IF EXISTS hierachical_hashed_array_dictionary;
CREATE DICTIONARY hierachical_hashed_array_dictionary
(
id UInt64,
parent_id Nullable(UInt64) HIERARCHICAL
) PRIMARY KEY id
SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table'))
LAYOUT(HASHED_ARRAY())
LIFETIME(0);
SELECT 'HashedArray dictionary';
SELECT 'Get hierarchy';
SELECT dictGetHierarchy('hierachical_hashed_array_dictionary', number) FROM system.numbers LIMIT 6;
SELECT 'Get is in hierarchy';
SELECT dictIsIn('hierachical_hashed_array_dictionary', number, number) FROM system.numbers LIMIT 6;
SELECT 'Get children';
SELECT dictGetChildren('hierachical_hashed_array_dictionary', number) FROM system.numbers LIMIT 6;
SELECT 'Get all descendants';
SELECT dictGetDescendants('hierachical_hashed_array_dictionary', number) FROM system.numbers LIMIT 6;
SELECT 'Get descendants at first level';
SELECT dictGetDescendants('hierachical_hashed_array_dictionary', number, 1) FROM system.numbers LIMIT 6;
DROP DICTIONARY hierachical_hashed_array_dictionary;
DROP DICTIONARY IF EXISTS hierachical_cache_dictionary;
CREATE DICTIONARY hierachical_cache_dictionary
(
id UInt64,
parent_id Nullable(UInt64) HIERARCHICAL
) PRIMARY KEY id
SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table'))
LAYOUT(CACHE(SIZE_IN_CELLS 10))
LIFETIME(0);
SELECT 'Cache dictionary';
SELECT 'Get hierarchy';
SELECT dictGetHierarchy('hierachical_cache_dictionary', number) FROM system.numbers LIMIT 6;
SELECT 'Get is in hierarchy';
SELECT dictIsIn('hierachical_cache_dictionary', number, number) FROM system.numbers LIMIT 6;
DROP DICTIONARY hierachical_cache_dictionary;
DROP DICTIONARY IF EXISTS hierachical_direct_dictionary;
CREATE DICTIONARY hierachical_direct_dictionary
(
id UInt64,
parent_id Nullable(UInt64) HIERARCHICAL
) PRIMARY KEY id
SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table'))
LAYOUT(DIRECT());
SELECT 'Direct dictionary';
SELECT 'Get hierarchy';
SELECT dictGetHierarchy('hierachical_direct_dictionary', number) FROM system.numbers LIMIT 6;
SELECT 'Get is in hierarchy';
SELECT dictIsIn('hierachical_direct_dictionary', number, number) FROM system.numbers LIMIT 6;
DROP DICTIONARY hierachical_direct_dictionary;
DROP TABLE test_hierarhical_table;