Hierarchical Cache, Direct dictionaries added support for nullable parent key

This commit is contained in:
Maksim Kita 2022-06-03 17:21:55 +02:00
parent 20b55a45b2
commit a0cbbd9edc
7 changed files with 164 additions and 120 deletions

View File

@ -177,9 +177,6 @@ ColumnUInt8::Ptr FlatDictionary::hasKeys(const Columns & key_columns, const Data
ColumnPtr FlatDictionary::getHierarchy(ColumnPtr key_column, const DataTypePtr &) const
{
if (key_column->isNullable())
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
PaddedPODArray<UInt64> keys_backup_storage;
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
@ -232,22 +229,9 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy(
ColumnPtr in_key_column,
const DataTypePtr &) const
{
if (key_column->isNullable())
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
PaddedPODArray<UInt64> keys_backup_storage;
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
const PaddedPODArray<UInt8> * in_key_column_nullable_mask = nullptr;
if (in_key_column->isNullable())
{
const auto * in_key_column_typed = assert_cast<const ColumnNullable *>(in_key_column.get());
in_key_column = in_key_column_typed->getNestedColumnPtr();
in_key_column_nullable_mask = &in_key_column_typed->getNullMapColumn().getData();
}
PaddedPODArray<UInt64> keys_in_backup_storage;
const auto & keys_in = getColumnVectorData(this, in_key_column, keys_in_backup_storage);
@ -289,17 +273,6 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy(
auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_key_func);
if (unlikely(in_key_column_nullable_mask))
{
auto mutable_result_ptr = result->assumeMutable();
auto & mutable_result = assert_cast<ColumnUInt8 &>(*mutable_result_ptr);
auto & mutable_result_data = mutable_result.getData();
size_t mutable_result_data_size = mutable_result_data.size();
for (size_t i = 0; i < mutable_result_data_size; ++i)
mutable_result_data[i] &= !(static_cast<bool>((*in_key_column_nullable_mask)[i]));
}
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);

View File

@ -183,9 +183,6 @@ ColumnPtr HashedArrayDictionary<dictionary_key_type>::getHierarchy(ColumnPtr key
{
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
{
if (key_column->isNullable())
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
PaddedPODArray<UInt64> keys_backup_storage;
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
@ -251,22 +248,9 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
{
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
{
if (key_column->isNullable())
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
PaddedPODArray<UInt64> keys_backup_storage;
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
const PaddedPODArray<UInt8> * in_key_column_nullable_mask = nullptr;
if (in_key_column->isNullable())
{
const auto * in_key_column_typed = assert_cast<const ColumnNullable *>(in_key_column.get());
in_key_column = in_key_column_typed->getNestedColumnPtr();
in_key_column_nullable_mask = &in_key_column_typed->getNullMapColumn().getData();
}
PaddedPODArray<UInt64> keys_in_backup_storage;
const auto & keys_in = getColumnVectorData(this, in_key_column, keys_in_backup_storage);
@ -313,17 +297,6 @@ ColumnUInt8::Ptr HashedArrayDictionary<dictionary_key_type>::isInHierarchy(
auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_func);
if (unlikely(in_key_column_nullable_mask))
{
auto mutable_result_ptr = result->assumeMutable();
auto & mutable_result = assert_cast<ColumnUInt8 &>(*mutable_result_ptr);
auto & mutable_result_data = mutable_result.getData();
size_t mutable_result_data_size = mutable_result_data.size();
for (size_t i = 0; i < mutable_result_data_size; ++i)
mutable_result_data[i] &= !(static_cast<bool>((*in_key_column_nullable_mask)[i]));
}
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);

View File

@ -227,9 +227,6 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
{
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
{
if (key_column->isNullable())
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
PaddedPODArray<UInt64> keys_backup_storage;
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
@ -243,14 +240,14 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
if (!dictionary_attribute.null_value.isNull())
null_value = dictionary_attribute.null_value.get<UInt64>();
const CollectionType<UInt64> & parent_keys_map = std::get<CollectionType<UInt64>>(hierarchical_attribute.container);
const CollectionType<UInt64> & child_key_to_parent_key_map = std::get<CollectionType<UInt64>>(hierarchical_attribute.container);
auto is_key_valid_func = [&](auto & hierarchy_key)
{
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
return true;
return parent_keys_map.find(hierarchy_key) != parent_keys_map.end();
return child_key_to_parent_key_map.find(hierarchy_key) != child_key_to_parent_key_map.end();
};
size_t keys_found = 0;
@ -259,12 +256,9 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
{
std::optional<UInt64> result;
auto it = parent_keys_map.find(hierarchy_key);
auto it = child_key_to_parent_key_map.find(hierarchy_key);
if (it == parent_keys_map.end())
return result;
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
if (it == child_key_to_parent_key_map.end())
return result;
UInt64 parent_key = getValueFromCell(it);
@ -301,16 +295,6 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::isInHierarchy(
if (key_column->isNullable())
key_column = assert_cast<const ColumnNullable *>(key_column.get())->getNestedColumnPtr();
const PaddedPODArray<UInt8> * in_key_column_nullable_mask = nullptr;
if (in_key_column->isNullable())
{
const auto * in_key_column_typed = assert_cast<const ColumnNullable *>(in_key_column.get());
in_key_column = in_key_column_typed->getNestedColumnPtr();
in_key_column_nullable_mask = &in_key_column_typed->getNullMapColumn().getData();
}
PaddedPODArray<UInt64> keys_backup_storage;
const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage);
@ -327,14 +311,14 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::isInHierarchy(
if (!dictionary_attribute.null_value.isNull())
null_value = dictionary_attribute.null_value.get<UInt64>();
const CollectionType<UInt64> & parent_keys_map = std::get<CollectionType<UInt64>>(hierarchical_attribute.container);
const CollectionType<UInt64> & child_key_to_parent_key_map = std::get<CollectionType<UInt64>>(hierarchical_attribute.container);
auto is_key_valid_func = [&](auto & hierarchy_key)
{
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
return true;
return parent_keys_map.find(hierarchy_key) != parent_keys_map.end();
return child_key_to_parent_key_map.find(hierarchy_key) != child_key_to_parent_key_map.end();
};
size_t keys_found = 0;
@ -343,12 +327,9 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::isInHierarchy(
{
std::optional<UInt64> result;
auto it = parent_keys_map.find(hierarchy_key);
auto it = child_key_to_parent_key_map.find(hierarchy_key);
if (it == parent_keys_map.end())
return result;
if (unlikely(hierarchical_attribute.is_nullable_set) && hierarchical_attribute.is_nullable_set->find(hierarchy_key))
if (it == child_key_to_parent_key_map.end())
return result;
UInt64 parent_key = getValueFromCell(it);
@ -363,17 +344,6 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::isInHierarchy(
auto result = getKeysIsInHierarchyColumn(keys, keys_in, is_key_valid_func, get_parent_key_func);
if (unlikely(in_key_column_nullable_mask))
{
auto mutable_result_ptr = result->assumeMutable();
auto & mutable_result = assert_cast<ColumnUInt8 &>(*mutable_result_ptr);
auto & mutable_result_data = mutable_result.getData();
size_t mutable_result_data_size = mutable_result_data.size();
for (size_t i = 0; i < mutable_result_data_size; ++i)
mutable_result_data[i] &= !(static_cast<bool>((*in_key_column_nullable_mask)[i]));
}
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);

View File

@ -1,5 +1,8 @@
#include "HierarchyDictionariesUtils.h"
#include <Columns/ColumnNullable.h>
namespace DB
{
@ -26,25 +29,35 @@ namespace detail
namespace
{
struct ChildToParentHierarchicalContext {
HashMap<UInt64, UInt64> child_key_to_parent_key;
std::optional<HashSet<UInt64>> child_key_parent_key_is_null;
};
/** In case of cache or direct dictionary we does not have structure with child to parent representation.
* This function build such structure calling getColumn for initial keys to request and for next keys in hierarchy,
* until all keys are requested or result key is null value.
* To distinguish null value key and key that is not present in dictionary, we use special default value column
* with max UInt64 value, if result column key has such value we assume that current key is not presented in dictionary storage.
*/
HashMap<UInt64, UInt64> getChildToParentHierarchyMapImpl(
ChildToParentHierarchicalContext getChildToParentHierarchicalContext(
const IDictionary * dictionary,
const DictionaryAttribute & hierarchical_attribute,
const PaddedPODArray<UInt64> & initial_keys_to_request,
const DataTypePtr & key_type)
{
UInt64 null_value = hierarchical_attribute.null_value.get<UInt64>();
std::optional<UInt64> null_value;
if (!hierarchical_attribute.null_value.isNull())
null_value = hierarchical_attribute.null_value.get<UInt64>();
ColumnPtr key_to_request_column = ColumnVector<UInt64>::create();
auto * key_to_request_column_typed = static_cast<ColumnVector<UInt64> *>(key_to_request_column->assumeMutable().get());
UInt64 key_not_in_storage_value = std::numeric_limits<UInt64>::max();
ColumnPtr key_not_in_storage_default_value_column = ColumnVector<UInt64>::create(initial_keys_to_request.size(), key_not_in_storage_value);
if (hierarchical_attribute.is_nullable)
key_not_in_storage_default_value_column = makeNullable(key_not_in_storage_default_value_column);
PaddedPODArray<UInt64> & keys_to_request = key_to_request_column_typed->getData();
keys_to_request.assign(initial_keys_to_request);
@ -52,20 +65,36 @@ namespace
PaddedPODArray<UInt64> next_keys_to_request;
HashSet<UInt64> already_requested_keys;
HashMap<UInt64, UInt64> child_to_parent_key;
ChildToParentHierarchicalContext context;
if (hierarchical_attribute.is_nullable)
context.child_key_parent_key_is_null = HashSet<UInt64>();
HashMap<UInt64, UInt64> & child_key_to_parent_key = context.child_key_to_parent_key;
std::optional<HashSet<UInt64>> & child_key_parent_key_is_null = context.child_key_parent_key_is_null;
while (!keys_to_request.empty())
{
child_to_parent_key.reserve(child_to_parent_key.size() + keys_to_request.size());
child_key_to_parent_key.reserve(keys_to_request.size());
auto parent_key_column = dictionary->getColumn(
auto hierarchical_attribute_parent_key_column = dictionary->getColumn(
hierarchical_attribute.name,
hierarchical_attribute.type,
{key_to_request_column},
{key_type},
key_not_in_storage_default_value_column);
const auto * parent_key_column_typed = checkAndGetColumn<ColumnVector<UInt64>>(*parent_key_column);
const PaddedPODArray<UInt8> * in_key_column_nullable_mask = nullptr;
ColumnPtr parent_key_column_non_null = hierarchical_attribute_parent_key_column;
if (hierarchical_attribute_parent_key_column->isNullable())
{
const auto * parent_key_column_typed = assert_cast<const ColumnNullable *>(hierarchical_attribute_parent_key_column.get());
in_key_column_nullable_mask = &parent_key_column_typed->getNullMapData();
parent_key_column_non_null = parent_key_column_typed->getNestedColumnPtr();
}
const auto * parent_key_column_typed = checkAndGetColumn<ColumnVector<UInt64>>(*parent_key_column_non_null);
if (!parent_key_column_typed)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Parent key column should be UInt64. Actual {}",
@ -74,17 +103,24 @@ namespace
const auto & parent_keys = parent_key_column_typed->getData();
next_keys_to_request.clear();
for (size_t i = 0; i < keys_to_request.size(); ++i)
size_t keys_to_request_size = keys_to_request.size();
for (size_t i = 0; i < keys_to_request_size; ++i)
{
auto key = keys_to_request[i];
auto child_key = keys_to_request[i];
auto parent_key = parent_keys[i];
if (unlikely(in_key_column_nullable_mask) && (*in_key_column_nullable_mask)[i])
{
child_key_parent_key_is_null->insert(child_key);
continue;
}
if (parent_key == key_not_in_storage_value)
continue;
child_to_parent_key[key] = parent_key;
child_key_to_parent_key[child_key] = parent_key;
if (parent_key == null_value ||
if ((null_value && parent_key == *null_value) ||
already_requested_keys.find(parent_key) != nullptr)
continue;
@ -96,7 +132,7 @@ namespace
keys_to_request.assign(next_keys_to_request);
}
return child_to_parent_key;
return context;
}
}
@ -138,21 +174,33 @@ ColumnPtr getKeysHierarchyDefaultImplementation(
const auto & hierarchical_attribute = dictionary_structure.attributes[hierarchical_attribute_index];
const PaddedPODArray<UInt64> & requested_keys = key_column_typed->getData();
HashMap<UInt64, UInt64> key_to_parent_key = getChildToParentHierarchyMapImpl(dictionary, hierarchical_attribute, requested_keys, key_type);
ChildToParentHierarchicalContext child_to_parent_hierarchical_context
= getChildToParentHierarchicalContext(dictionary, hierarchical_attribute, requested_keys, key_type);
auto is_key_valid_func = [&](auto & key) { return key_to_parent_key.find(key) != nullptr; };
auto is_key_valid_func = [&](auto & key)
{
if (unlikely(child_to_parent_hierarchical_context.child_key_parent_key_is_null)
&& child_to_parent_hierarchical_context.child_key_parent_key_is_null->find(key))
return true;
return child_to_parent_hierarchical_context.child_key_to_parent_key.find(key) != nullptr;
};
std::optional<UInt64> null_value;
if (!hierarchical_attribute.null_value.isNull())
null_value = hierarchical_attribute.null_value.get<UInt64>();
UInt64 null_value = hierarchical_attribute.null_value.get<UInt64>();
auto get_parent_key_func = [&](auto & key)
{
std::optional<UInt64> result;
auto it = key_to_parent_key.find(key);
if (it == nullptr) {
auto it = child_to_parent_hierarchical_context.child_key_to_parent_key.find(key);
if (it == nullptr)
return result;
}
UInt64 parent_key = it->getMapped();
if (parent_key == null_value)
if (null_value && parent_key == *null_value)
return result;
result = parent_key;
@ -188,21 +236,33 @@ ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation(
const auto & hierarchical_attribute = dictionary_structure.attributes[hierarchical_attribute_index];
const PaddedPODArray<UInt64> & requested_keys = key_column_typed->getData();
HashMap<UInt64, UInt64> key_to_parent_key = getChildToParentHierarchyMapImpl(dictionary, hierarchical_attribute, requested_keys, key_type);
ChildToParentHierarchicalContext child_to_parent_hierarchical_context
= getChildToParentHierarchicalContext(dictionary, hierarchical_attribute, requested_keys, key_type);
auto is_key_valid_func = [&](auto & key) { return key_to_parent_key.find(key) != nullptr; };
auto is_key_valid_func = [&](auto & key)
{
if (unlikely(child_to_parent_hierarchical_context.child_key_parent_key_is_null)
&& child_to_parent_hierarchical_context.child_key_parent_key_is_null->find(key))
return true;
return child_to_parent_hierarchical_context.child_key_to_parent_key.find(key) != nullptr;
};
std::optional<UInt64> null_value;
if (!hierarchical_attribute.null_value.isNull())
null_value = hierarchical_attribute.null_value.get<UInt64>();
UInt64 null_value = hierarchical_attribute.null_value.get<UInt64>();
auto get_parent_key_func = [&](auto & key)
{
std::optional<UInt64> result;
auto it = key_to_parent_key.find(key);
if (it == nullptr) {
auto it = child_to_parent_hierarchical_context.child_key_to_parent_key.find(key);
if (it == nullptr)
return result;
}
UInt64 parent_key = it->getMapped();
if (parent_key == null_value)
if (null_value && parent_key == *null_value)
return result;
result = parent_key;

View File

@ -985,7 +985,7 @@ private:
const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary);
auto key_column = ColumnWithTypeAndName{arguments[1].column, arguments[1].type, arguments[1].name};
auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type);
auto key_column_casted = castColumnAccurate(key_column, removeNullable(hierarchical_attribute.type));
ColumnPtr result = dictionary->getHierarchy(key_column_casted, hierarchical_attribute.type);
@ -1042,8 +1042,9 @@ private:
auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[2].name};
auto in_key_column = ColumnWithTypeAndName{arguments[2].column->convertToFullColumnIfConst(), arguments[2].type, arguments[2].name};
auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type);
auto in_key_column_casted = castColumnAccurate(in_key_column, hierarchical_attribute.type);
auto hierarchical_attribute_non_nullable = removeNullable(hierarchical_attribute.type);
auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute_non_nullable);
auto in_key_column_casted = castColumnAccurate(in_key_column, hierarchical_attribute_non_nullable);
ColumnPtr result = dictionary->isInHierarchy(key_column_casted, in_key_column_casted, hierarchical_attribute.type);

View File

@ -43,3 +43,33 @@ Get is in hierarchy
1
1
0
Cache dictionary
Get hierarchy
[0]
[1,0]
[2,1,0]
[3]
[4,2,1,0]
[]
Get is in hierarchy
1
1
1
1
1
0
Direct dictionary
Get hierarchy
[0]
[1,0]
[2,1,0]
[3]
[4,2,1,0]
[]
Get is in hierarchy
1
1
1
1
1
0

View File

@ -64,4 +64,41 @@ SELECT dictIsIn('hierachical_hashed_array_dictionary', number, number) FROM syst
DROP DICTIONARY hierachical_hashed_array_dictionary;
DROP DICTIONARY IF EXISTS hierachical_cache_dictionary;
CREATE DICTIONARY hierachical_cache_dictionary
(
id UInt64,
parent_id Nullable(UInt64) HIERARCHICAL
) PRIMARY KEY id
SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table'))
LAYOUT(CACHE(SIZE_IN_CELLS 10))
LIFETIME(0);
SELECT 'Cache dictionary';
SELECT 'Get hierarchy';
SELECT dictGetHierarchy('hierachical_cache_dictionary', number) FROM system.numbers LIMIT 6;
SELECT 'Get is in hierarchy';
SELECT dictIsIn('hierachical_cache_dictionary', number, number) FROM system.numbers LIMIT 6;
DROP DICTIONARY hierachical_cache_dictionary;
DROP DICTIONARY IF EXISTS hierachical_direct_dictionary;
CREATE DICTIONARY hierachical_direct_dictionary
(
id UInt64,
parent_id Nullable(UInt64) HIERARCHICAL
) PRIMARY KEY id
SOURCE(CLICKHOUSE(TABLE 'test_hierarhical_table'))
LAYOUT(DIRECT());
SELECT 'Direct dictionary';
SELECT 'Get hierarchy';
SELECT dictGetHierarchy('hierachical_direct_dictionary', number) FROM system.numbers LIMIT 6;
SELECT 'Get is in hierarchy';
SELECT dictIsIn('hierachical_direct_dictionary', number, number) FROM system.numbers LIMIT 6;
DROP DICTIONARY hierachical_direct_dictionary;
DROP TABLE test_hierarhical_table;