Count found keys for HashedDictionary (plus utils changes for descendants)

This commit is contained in:
Azat Khuzhin 2021-05-06 10:55:27 +03:00
parent e08389b2d2
commit c80a06def3
4 changed files with 53 additions and 10 deletions

View File

@ -223,7 +223,8 @@ ColumnPtr FlatDictionary::getDescendants(
parent_to_child[parent_key].emplace_back(static_cast<UInt64>(i)); parent_to_child[parent_key].emplace_back(static_cast<UInt64>(i));
} }
auto result = getKeysDescendantsArray(keys, parent_to_child, level); size_t keys_found;
auto result = getKeysDescendantsArray(keys, parent_to_child, level, keys_found);
query_count.fetch_add(keys.size(), std::memory_order_relaxed); query_count.fetch_add(keys.size(), std::memory_order_relaxed);

View File

@ -162,6 +162,7 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::hasKeys(const Co
const auto & attribute = attributes.front(); const auto & attribute = attributes.front();
bool is_attribute_nullable = attribute.is_nullable_set.has_value(); bool is_attribute_nullable = attribute.is_nullable_set.has_value();
size_t keys_found = 0;
getAttributeContainer(0, [&](const auto & container) getAttributeContainer(0, [&](const auto & container)
{ {
@ -171,6 +172,8 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::hasKeys(const Co
out[requested_key_index] = container.find(requested_key) != container.end(); out[requested_key_index] = container.find(requested_key) != container.end();
keys_found += out[requested_key_index];
if (is_attribute_nullable && !out[requested_key_index]) if (is_attribute_nullable && !out[requested_key_index])
out[requested_key_index] = attribute.is_nullable_set->find(requested_key) != nullptr; out[requested_key_index] = attribute.is_nullable_set->find(requested_key) != nullptr;
@ -179,6 +182,7 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::hasKeys(const Co
}); });
query_count.fetch_add(keys_size, std::memory_order_relaxed); query_count.fetch_add(keys_size, std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
return result; return result;
} }
@ -201,6 +205,8 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); }; auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); };
size_t keys_found = 0;
auto get_parent_func = [&](auto & hierarchy_key) auto get_parent_func = [&](auto & hierarchy_key)
{ {
std::optional<UInt64> result; std::optional<UInt64> result;
@ -210,12 +216,15 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
if (it != parent_keys_map.end()) if (it != parent_keys_map.end())
result = getValueFromCell(it); result = getValueFromCell(it);
keys_found +=result.has_value();
return result; return result;
}; };
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_func); auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_func);
query_count.fetch_add(keys.size(), std::memory_order_relaxed); query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
return dictionary_hierarchy_array; return dictionary_hierarchy_array;
} }
@ -247,6 +256,8 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::isInHierarchy(
auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); }; auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); };
size_t keys_found = 0;
auto get_parent_func = [&](auto & hierarchy_key) auto get_parent_func = [&](auto & hierarchy_key)
{ {
std::optional<UInt64> result; std::optional<UInt64> result;
@ -256,12 +267,15 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::isInHierarchy(
if (it != parent_keys_map.end()) if (it != parent_keys_map.end())
result = getValueFromCell(it); result = getValueFromCell(it);
keys_found += result.has_value();
return result; return result;
}; };
auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_func); auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_func);
query_count.fetch_add(keys.size(), std::memory_order_relaxed); query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
return result; return result;
} }
@ -290,9 +304,11 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getDescendants(
for (const auto & [key, value] : parent_keys) for (const auto & [key, value] : parent_keys)
parent_to_child[value].emplace_back(key); parent_to_child[value].emplace_back(key);
auto result = getKeysDescendantsArray(keys, parent_to_child, level); size_t keys_found;
auto result = getKeysDescendantsArray(keys, parent_to_child, level, keys_found);
query_count.fetch_add(keys.size(), std::memory_order_relaxed); query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
return result; return result;
} }
@ -493,6 +509,8 @@ void HashedDictionary<dictionary_key_type, sparse>::getItemsImpl(
bool is_attribute_nullable = attribute.is_nullable_set.has_value(); bool is_attribute_nullable = attribute.is_nullable_set.has_value();
size_t keys_found = 0;
for (size_t key_index = 0; key_index < keys_size; ++key_index) for (size_t key_index = 0; key_index < keys_size; ++key_index)
{ {
auto key = keys_extractor.extractCurrentKey(); auto key = keys_extractor.extractCurrentKey();
@ -500,7 +518,10 @@ void HashedDictionary<dictionary_key_type, sparse>::getItemsImpl(
const auto it = attribute_container.find(key); const auto it = attribute_container.find(key);
if (it != attribute_container.end()) if (it != attribute_container.end())
{
set_value(key_index, getValueFromCell(it)); set_value(key_index, getValueFromCell(it));
++keys_found;
}
else else
{ {
if (is_attribute_nullable && attribute.is_nullable_set->find(key) != nullptr) if (is_attribute_nullable && attribute.is_nullable_set->find(key) != nullptr)
@ -513,6 +534,7 @@ void HashedDictionary<dictionary_key_type, sparse>::getItemsImpl(
} }
query_count.fetch_add(keys_size, std::memory_order_relaxed); query_count.fetch_add(keys_size, std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
} }
template <DictionaryKeyType dictionary_key_type, bool sparse> template <DictionaryKeyType dictionary_key_type, bool sparse>

View File

@ -196,6 +196,8 @@ namespace detail
* Hierarchy result is ElementsAndOffsets structure, for each element there is descendants array, * Hierarchy result is ElementsAndOffsets structure, for each element there is descendants array,
* with size offset[element_index] - (element_index > 0 ? offset[element_index - 1] : 0). * with size offset[element_index] - (element_index > 0 ? offset[element_index - 1] : 0).
* *
* @param valid_keys - number of keys that are valid in parent_to_child map
*
* Example: * Example:
* id parent_id * id parent_id
* 1 0 * 1 0
@ -218,11 +220,13 @@ namespace detail
ElementsAndOffsets<KeyType> getDescendants( ElementsAndOffsets<KeyType> getDescendants(
const PaddedPODArray<KeyType> & keys, const PaddedPODArray<KeyType> & keys,
const HashMap<KeyType, PaddedPODArray<KeyType>> & parent_to_child, const HashMap<KeyType, PaddedPODArray<KeyType>> & parent_to_child,
Strategy strategy) Strategy strategy,
size_t & valid_keys)
{ {
/// If strategy is GetAllDescendantsStrategy we try to cache and later reuse previously calculated descendants. /// If strategy is GetAllDescendantsStrategy we try to cache and later reuse previously calculated descendants.
/// If strategy is GetDescendantsAtSpecificLevelStrategy we does not use cache strategy. /// If strategy is GetDescendantsAtSpecificLevelStrategy we does not use cache strategy.
size_t keys_size = keys.size(); size_t keys_size = keys.size();
valid_keys = 0;
PaddedPODArray<KeyType> descendants; PaddedPODArray<KeyType> descendants;
descendants.reserve(keys_size); descendants.reserve(keys_size);
@ -265,6 +269,7 @@ namespace detail
descendants_offsets.emplace_back(descendants.size()); descendants_offsets.emplace_back(descendants.size());
continue; continue;
} }
++valid_keys;
next_keys_to_process_stack.emplace_back(KeyAndDepth{requested_key, 0}); next_keys_to_process_stack.emplace_back(KeyAndDepth{requested_key, 0});
@ -425,22 +430,25 @@ ColumnUInt8::Ptr getKeysIsInHierarchyColumn(
} }
/// Returns descendants array column for keys /// Returns descendants array column for keys
///
/// @param valid_keys - number of keys that are valid in parent_to_child map
template <typename KeyType> template <typename KeyType>
ColumnPtr getKeysDescendantsArray( ColumnPtr getKeysDescendantsArray(
const PaddedPODArray<KeyType> & requested_keys, const PaddedPODArray<KeyType> & requested_keys,
const HashMap<KeyType, PaddedPODArray<KeyType>> & parent_to_child, const HashMap<KeyType, PaddedPODArray<KeyType>> & parent_to_child,
size_t level) size_t level,
size_t & valid_keys)
{ {
if (level == 0) if (level == 0)
{ {
detail::GetAllDescendantsStrategy strategy { .level = level }; detail::GetAllDescendantsStrategy strategy { .level = level };
auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child, strategy); auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child, strategy, valid_keys);
return detail::convertElementsAndOffsetsIntoArray(std::move(elements_and_offsets)); return detail::convertElementsAndOffsetsIntoArray(std::move(elements_and_offsets));
} }
else else
{ {
detail::GetDescendantsAtSpecificLevelStrategy strategy { .level = level }; detail::GetDescendantsAtSpecificLevelStrategy strategy { .level = level };
auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child, strategy); auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child, strategy, valid_keys);
return detail::convertElementsAndOffsetsIntoArray(std::move(elements_and_offsets)); return detail::convertElementsAndOffsetsIntoArray(std::move(elements_and_offsets));
} }
} }

View File

@ -154,10 +154,12 @@ TEST(HierarchyDictionariesUtils, getDescendants)
PaddedPODArray<UInt64> keys = {0, 1, 2, 3, 4}; PaddedPODArray<UInt64> keys = {0, 1, 2, 3, 4};
{ {
size_t keys_found;
auto result = DB::detail::getDescendants( auto result = DB::detail::getDescendants(
keys, keys,
parent_to_child, parent_to_child,
DB::detail::GetAllDescendantsStrategy()); DB::detail::GetAllDescendantsStrategy(),
keys_found);
const auto & actual_elements = result.elements; const auto & actual_elements = result.elements;
const auto & actual_offsets = result.offsets; const auto & actual_offsets = result.offsets;
@ -167,12 +169,15 @@ TEST(HierarchyDictionariesUtils, getDescendants)
ASSERT_EQ(actual_elements, expected_elements); ASSERT_EQ(actual_elements, expected_elements);
ASSERT_EQ(actual_offsets, expected_offsets); ASSERT_EQ(actual_offsets, expected_offsets);
ASSERT_EQ(keys_found, 3);
} }
{ {
size_t keys_found;
auto result = DB::detail::getDescendants( auto result = DB::detail::getDescendants(
keys, keys,
parent_to_child, parent_to_child,
DB::detail::GetDescendantsAtSpecificLevelStrategy{1}); DB::detail::GetDescendantsAtSpecificLevelStrategy{1},
keys_found);
const auto & actual_elements = result.elements; const auto & actual_elements = result.elements;
const auto & actual_offsets = result.offsets; const auto & actual_offsets = result.offsets;
@ -182,6 +187,7 @@ TEST(HierarchyDictionariesUtils, getDescendants)
ASSERT_EQ(actual_elements, expected_elements); ASSERT_EQ(actual_elements, expected_elements);
ASSERT_EQ(actual_offsets, expected_offsets); ASSERT_EQ(actual_offsets, expected_offsets);
ASSERT_EQ(keys_found, 3);
} }
} }
{ {
@ -192,10 +198,12 @@ TEST(HierarchyDictionariesUtils, getDescendants)
PaddedPODArray<UInt64> keys = {1, 2, 3}; PaddedPODArray<UInt64> keys = {1, 2, 3};
{ {
size_t keys_found;
auto result = DB::detail::getDescendants( auto result = DB::detail::getDescendants(
keys, keys,
parent_to_child, parent_to_child,
DB::detail::GetAllDescendantsStrategy()); DB::detail::GetAllDescendantsStrategy(),
keys_found);
const auto & actual_elements = result.elements; const auto & actual_elements = result.elements;
const auto & actual_offsets = result.offsets; const auto & actual_offsets = result.offsets;
@ -205,12 +213,15 @@ TEST(HierarchyDictionariesUtils, getDescendants)
ASSERT_EQ(actual_elements, expected_elements); ASSERT_EQ(actual_elements, expected_elements);
ASSERT_EQ(actual_offsets, expected_offsets); ASSERT_EQ(actual_offsets, expected_offsets);
ASSERT_EQ(keys_found, 2);
} }
{ {
size_t keys_found;
auto result = DB::detail::getDescendants( auto result = DB::detail::getDescendants(
keys, keys,
parent_to_child, parent_to_child,
DB::detail::GetDescendantsAtSpecificLevelStrategy{1}); DB::detail::GetDescendantsAtSpecificLevelStrategy{1},
keys_found);
const auto & actual_elements = result.elements; const auto & actual_elements = result.elements;
const auto & actual_offsets = result.offsets; const auto & actual_offsets = result.offsets;
@ -220,6 +231,7 @@ TEST(HierarchyDictionariesUtils, getDescendants)
ASSERT_EQ(actual_elements, expected_elements); ASSERT_EQ(actual_elements, expected_elements);
ASSERT_EQ(actual_offsets, expected_offsets); ASSERT_EQ(actual_offsets, expected_offsets);
ASSERT_EQ(keys_found, 2);
} }
} }
} }