mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Count found keys for HashedDictionary (plus utils changes for descendants)
This commit is contained in:
parent
e08389b2d2
commit
c80a06def3
@ -223,7 +223,8 @@ ColumnPtr FlatDictionary::getDescendants(
|
|||||||
parent_to_child[parent_key].emplace_back(static_cast<UInt64>(i));
|
parent_to_child[parent_key].emplace_back(static_cast<UInt64>(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto result = getKeysDescendantsArray(keys, parent_to_child, level);
|
size_t keys_found;
|
||||||
|
auto result = getKeysDescendantsArray(keys, parent_to_child, level, keys_found);
|
||||||
|
|
||||||
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
||||||
|
|
||||||
|
@ -162,6 +162,7 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::hasKeys(const Co
|
|||||||
|
|
||||||
const auto & attribute = attributes.front();
|
const auto & attribute = attributes.front();
|
||||||
bool is_attribute_nullable = attribute.is_nullable_set.has_value();
|
bool is_attribute_nullable = attribute.is_nullable_set.has_value();
|
||||||
|
size_t keys_found = 0;
|
||||||
|
|
||||||
getAttributeContainer(0, [&](const auto & container)
|
getAttributeContainer(0, [&](const auto & container)
|
||||||
{
|
{
|
||||||
@ -171,6 +172,8 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::hasKeys(const Co
|
|||||||
|
|
||||||
out[requested_key_index] = container.find(requested_key) != container.end();
|
out[requested_key_index] = container.find(requested_key) != container.end();
|
||||||
|
|
||||||
|
keys_found += out[requested_key_index];
|
||||||
|
|
||||||
if (is_attribute_nullable && !out[requested_key_index])
|
if (is_attribute_nullable && !out[requested_key_index])
|
||||||
out[requested_key_index] = attribute.is_nullable_set->find(requested_key) != nullptr;
|
out[requested_key_index] = attribute.is_nullable_set->find(requested_key) != nullptr;
|
||||||
|
|
||||||
@ -179,6 +182,7 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::hasKeys(const Co
|
|||||||
});
|
});
|
||||||
|
|
||||||
query_count.fetch_add(keys_size, std::memory_order_relaxed);
|
query_count.fetch_add(keys_size, std::memory_order_relaxed);
|
||||||
|
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -201,6 +205,8 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
|
|||||||
|
|
||||||
auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); };
|
auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); };
|
||||||
|
|
||||||
|
size_t keys_found = 0;
|
||||||
|
|
||||||
auto get_parent_func = [&](auto & hierarchy_key)
|
auto get_parent_func = [&](auto & hierarchy_key)
|
||||||
{
|
{
|
||||||
std::optional<UInt64> result;
|
std::optional<UInt64> result;
|
||||||
@ -210,12 +216,15 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
|
|||||||
if (it != parent_keys_map.end())
|
if (it != parent_keys_map.end())
|
||||||
result = getValueFromCell(it);
|
result = getValueFromCell(it);
|
||||||
|
|
||||||
|
keys_found +=result.has_value();
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
};
|
};
|
||||||
|
|
||||||
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_func);
|
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_func);
|
||||||
|
|
||||||
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
||||||
|
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||||
|
|
||||||
return dictionary_hierarchy_array;
|
return dictionary_hierarchy_array;
|
||||||
}
|
}
|
||||||
@ -247,6 +256,8 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::isInHierarchy(
|
|||||||
|
|
||||||
auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); };
|
auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); };
|
||||||
|
|
||||||
|
size_t keys_found = 0;
|
||||||
|
|
||||||
auto get_parent_func = [&](auto & hierarchy_key)
|
auto get_parent_func = [&](auto & hierarchy_key)
|
||||||
{
|
{
|
||||||
std::optional<UInt64> result;
|
std::optional<UInt64> result;
|
||||||
@ -256,12 +267,15 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::isInHierarchy(
|
|||||||
if (it != parent_keys_map.end())
|
if (it != parent_keys_map.end())
|
||||||
result = getValueFromCell(it);
|
result = getValueFromCell(it);
|
||||||
|
|
||||||
|
keys_found += result.has_value();
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
};
|
};
|
||||||
|
|
||||||
auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_func);
|
auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_func);
|
||||||
|
|
||||||
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
||||||
|
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -290,9 +304,11 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getDescendants(
|
|||||||
for (const auto & [key, value] : parent_keys)
|
for (const auto & [key, value] : parent_keys)
|
||||||
parent_to_child[value].emplace_back(key);
|
parent_to_child[value].emplace_back(key);
|
||||||
|
|
||||||
auto result = getKeysDescendantsArray(keys, parent_to_child, level);
|
size_t keys_found;
|
||||||
|
auto result = getKeysDescendantsArray(keys, parent_to_child, level, keys_found);
|
||||||
|
|
||||||
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
|
||||||
|
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -493,6 +509,8 @@ void HashedDictionary<dictionary_key_type, sparse>::getItemsImpl(
|
|||||||
|
|
||||||
bool is_attribute_nullable = attribute.is_nullable_set.has_value();
|
bool is_attribute_nullable = attribute.is_nullable_set.has_value();
|
||||||
|
|
||||||
|
size_t keys_found = 0;
|
||||||
|
|
||||||
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||||
{
|
{
|
||||||
auto key = keys_extractor.extractCurrentKey();
|
auto key = keys_extractor.extractCurrentKey();
|
||||||
@ -500,7 +518,10 @@ void HashedDictionary<dictionary_key_type, sparse>::getItemsImpl(
|
|||||||
const auto it = attribute_container.find(key);
|
const auto it = attribute_container.find(key);
|
||||||
|
|
||||||
if (it != attribute_container.end())
|
if (it != attribute_container.end())
|
||||||
|
{
|
||||||
set_value(key_index, getValueFromCell(it));
|
set_value(key_index, getValueFromCell(it));
|
||||||
|
++keys_found;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (is_attribute_nullable && attribute.is_nullable_set->find(key) != nullptr)
|
if (is_attribute_nullable && attribute.is_nullable_set->find(key) != nullptr)
|
||||||
@ -513,6 +534,7 @@ void HashedDictionary<dictionary_key_type, sparse>::getItemsImpl(
|
|||||||
}
|
}
|
||||||
|
|
||||||
query_count.fetch_add(keys_size, std::memory_order_relaxed);
|
query_count.fetch_add(keys_size, std::memory_order_relaxed);
|
||||||
|
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <DictionaryKeyType dictionary_key_type, bool sparse>
|
template <DictionaryKeyType dictionary_key_type, bool sparse>
|
||||||
|
@ -196,6 +196,8 @@ namespace detail
|
|||||||
* Hierarchy result is ElementsAndOffsets structure, for each element there is descendants array,
|
* Hierarchy result is ElementsAndOffsets structure, for each element there is descendants array,
|
||||||
* with size offset[element_index] - (element_index > 0 ? offset[element_index - 1] : 0).
|
* with size offset[element_index] - (element_index > 0 ? offset[element_index - 1] : 0).
|
||||||
*
|
*
|
||||||
|
* @param valid_keys - number of keys that are valid in parent_to_child map
|
||||||
|
*
|
||||||
* Example:
|
* Example:
|
||||||
* id parent_id
|
* id parent_id
|
||||||
* 1 0
|
* 1 0
|
||||||
@ -218,11 +220,13 @@ namespace detail
|
|||||||
ElementsAndOffsets<KeyType> getDescendants(
|
ElementsAndOffsets<KeyType> getDescendants(
|
||||||
const PaddedPODArray<KeyType> & keys,
|
const PaddedPODArray<KeyType> & keys,
|
||||||
const HashMap<KeyType, PaddedPODArray<KeyType>> & parent_to_child,
|
const HashMap<KeyType, PaddedPODArray<KeyType>> & parent_to_child,
|
||||||
Strategy strategy)
|
Strategy strategy,
|
||||||
|
size_t & valid_keys)
|
||||||
{
|
{
|
||||||
/// If strategy is GetAllDescendantsStrategy we try to cache and later reuse previously calculated descendants.
|
/// If strategy is GetAllDescendantsStrategy we try to cache and later reuse previously calculated descendants.
|
||||||
/// If strategy is GetDescendantsAtSpecificLevelStrategy we does not use cache strategy.
|
/// If strategy is GetDescendantsAtSpecificLevelStrategy we does not use cache strategy.
|
||||||
size_t keys_size = keys.size();
|
size_t keys_size = keys.size();
|
||||||
|
valid_keys = 0;
|
||||||
|
|
||||||
PaddedPODArray<KeyType> descendants;
|
PaddedPODArray<KeyType> descendants;
|
||||||
descendants.reserve(keys_size);
|
descendants.reserve(keys_size);
|
||||||
@ -265,6 +269,7 @@ namespace detail
|
|||||||
descendants_offsets.emplace_back(descendants.size());
|
descendants_offsets.emplace_back(descendants.size());
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
++valid_keys;
|
||||||
|
|
||||||
next_keys_to_process_stack.emplace_back(KeyAndDepth{requested_key, 0});
|
next_keys_to_process_stack.emplace_back(KeyAndDepth{requested_key, 0});
|
||||||
|
|
||||||
@ -425,22 +430,25 @@ ColumnUInt8::Ptr getKeysIsInHierarchyColumn(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns descendants array column for keys
|
/// Returns descendants array column for keys
|
||||||
|
///
|
||||||
|
/// @param valid_keys - number of keys that are valid in parent_to_child map
|
||||||
template <typename KeyType>
|
template <typename KeyType>
|
||||||
ColumnPtr getKeysDescendantsArray(
|
ColumnPtr getKeysDescendantsArray(
|
||||||
const PaddedPODArray<KeyType> & requested_keys,
|
const PaddedPODArray<KeyType> & requested_keys,
|
||||||
const HashMap<KeyType, PaddedPODArray<KeyType>> & parent_to_child,
|
const HashMap<KeyType, PaddedPODArray<KeyType>> & parent_to_child,
|
||||||
size_t level)
|
size_t level,
|
||||||
|
size_t & valid_keys)
|
||||||
{
|
{
|
||||||
if (level == 0)
|
if (level == 0)
|
||||||
{
|
{
|
||||||
detail::GetAllDescendantsStrategy strategy { .level = level };
|
detail::GetAllDescendantsStrategy strategy { .level = level };
|
||||||
auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child, strategy);
|
auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child, strategy, valid_keys);
|
||||||
return detail::convertElementsAndOffsetsIntoArray(std::move(elements_and_offsets));
|
return detail::convertElementsAndOffsetsIntoArray(std::move(elements_and_offsets));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
detail::GetDescendantsAtSpecificLevelStrategy strategy { .level = level };
|
detail::GetDescendantsAtSpecificLevelStrategy strategy { .level = level };
|
||||||
auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child, strategy);
|
auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child, strategy, valid_keys);
|
||||||
return detail::convertElementsAndOffsetsIntoArray(std::move(elements_and_offsets));
|
return detail::convertElementsAndOffsetsIntoArray(std::move(elements_and_offsets));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -154,10 +154,12 @@ TEST(HierarchyDictionariesUtils, getDescendants)
|
|||||||
PaddedPODArray<UInt64> keys = {0, 1, 2, 3, 4};
|
PaddedPODArray<UInt64> keys = {0, 1, 2, 3, 4};
|
||||||
|
|
||||||
{
|
{
|
||||||
|
size_t keys_found;
|
||||||
auto result = DB::detail::getDescendants(
|
auto result = DB::detail::getDescendants(
|
||||||
keys,
|
keys,
|
||||||
parent_to_child,
|
parent_to_child,
|
||||||
DB::detail::GetAllDescendantsStrategy());
|
DB::detail::GetAllDescendantsStrategy(),
|
||||||
|
keys_found);
|
||||||
|
|
||||||
const auto & actual_elements = result.elements;
|
const auto & actual_elements = result.elements;
|
||||||
const auto & actual_offsets = result.offsets;
|
const auto & actual_offsets = result.offsets;
|
||||||
@ -167,12 +169,15 @@ TEST(HierarchyDictionariesUtils, getDescendants)
|
|||||||
|
|
||||||
ASSERT_EQ(actual_elements, expected_elements);
|
ASSERT_EQ(actual_elements, expected_elements);
|
||||||
ASSERT_EQ(actual_offsets, expected_offsets);
|
ASSERT_EQ(actual_offsets, expected_offsets);
|
||||||
|
ASSERT_EQ(keys_found, 3);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
|
size_t keys_found;
|
||||||
auto result = DB::detail::getDescendants(
|
auto result = DB::detail::getDescendants(
|
||||||
keys,
|
keys,
|
||||||
parent_to_child,
|
parent_to_child,
|
||||||
DB::detail::GetDescendantsAtSpecificLevelStrategy{1});
|
DB::detail::GetDescendantsAtSpecificLevelStrategy{1},
|
||||||
|
keys_found);
|
||||||
|
|
||||||
const auto & actual_elements = result.elements;
|
const auto & actual_elements = result.elements;
|
||||||
const auto & actual_offsets = result.offsets;
|
const auto & actual_offsets = result.offsets;
|
||||||
@ -182,6 +187,7 @@ TEST(HierarchyDictionariesUtils, getDescendants)
|
|||||||
|
|
||||||
ASSERT_EQ(actual_elements, expected_elements);
|
ASSERT_EQ(actual_elements, expected_elements);
|
||||||
ASSERT_EQ(actual_offsets, expected_offsets);
|
ASSERT_EQ(actual_offsets, expected_offsets);
|
||||||
|
ASSERT_EQ(keys_found, 3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
@ -192,10 +198,12 @@ TEST(HierarchyDictionariesUtils, getDescendants)
|
|||||||
PaddedPODArray<UInt64> keys = {1, 2, 3};
|
PaddedPODArray<UInt64> keys = {1, 2, 3};
|
||||||
|
|
||||||
{
|
{
|
||||||
|
size_t keys_found;
|
||||||
auto result = DB::detail::getDescendants(
|
auto result = DB::detail::getDescendants(
|
||||||
keys,
|
keys,
|
||||||
parent_to_child,
|
parent_to_child,
|
||||||
DB::detail::GetAllDescendantsStrategy());
|
DB::detail::GetAllDescendantsStrategy(),
|
||||||
|
keys_found);
|
||||||
|
|
||||||
const auto & actual_elements = result.elements;
|
const auto & actual_elements = result.elements;
|
||||||
const auto & actual_offsets = result.offsets;
|
const auto & actual_offsets = result.offsets;
|
||||||
@ -205,12 +213,15 @@ TEST(HierarchyDictionariesUtils, getDescendants)
|
|||||||
|
|
||||||
ASSERT_EQ(actual_elements, expected_elements);
|
ASSERT_EQ(actual_elements, expected_elements);
|
||||||
ASSERT_EQ(actual_offsets, expected_offsets);
|
ASSERT_EQ(actual_offsets, expected_offsets);
|
||||||
|
ASSERT_EQ(keys_found, 2);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
|
size_t keys_found;
|
||||||
auto result = DB::detail::getDescendants(
|
auto result = DB::detail::getDescendants(
|
||||||
keys,
|
keys,
|
||||||
parent_to_child,
|
parent_to_child,
|
||||||
DB::detail::GetDescendantsAtSpecificLevelStrategy{1});
|
DB::detail::GetDescendantsAtSpecificLevelStrategy{1},
|
||||||
|
keys_found);
|
||||||
|
|
||||||
const auto & actual_elements = result.elements;
|
const auto & actual_elements = result.elements;
|
||||||
const auto & actual_offsets = result.offsets;
|
const auto & actual_offsets = result.offsets;
|
||||||
@ -220,6 +231,7 @@ TEST(HierarchyDictionariesUtils, getDescendants)
|
|||||||
|
|
||||||
ASSERT_EQ(actual_elements, expected_elements);
|
ASSERT_EQ(actual_elements, expected_elements);
|
||||||
ASSERT_EQ(actual_offsets, expected_offsets);
|
ASSERT_EQ(actual_offsets, expected_offsets);
|
||||||
|
ASSERT_EQ(keys_found, 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user