Count found keys for HashedDictionary (plus utils changes for descendants)

This commit is contained in:
Azat Khuzhin 2021-05-06 10:55:27 +03:00
parent e08389b2d2
commit c80a06def3
4 changed files with 53 additions and 10 deletions

View File

@ -223,7 +223,8 @@ ColumnPtr FlatDictionary::getDescendants(
parent_to_child[parent_key].emplace_back(static_cast<UInt64>(i));
}
auto result = getKeysDescendantsArray(keys, parent_to_child, level);
size_t keys_found;
auto result = getKeysDescendantsArray(keys, parent_to_child, level, keys_found);
query_count.fetch_add(keys.size(), std::memory_order_relaxed);

View File

@ -162,6 +162,7 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::hasKeys(const Co
const auto & attribute = attributes.front();
bool is_attribute_nullable = attribute.is_nullable_set.has_value();
size_t keys_found = 0;
getAttributeContainer(0, [&](const auto & container)
{
@ -171,6 +172,8 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::hasKeys(const Co
out[requested_key_index] = container.find(requested_key) != container.end();
keys_found += out[requested_key_index];
if (is_attribute_nullable && !out[requested_key_index])
out[requested_key_index] = attribute.is_nullable_set->find(requested_key) != nullptr;
@ -179,6 +182,7 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::hasKeys(const Co
});
query_count.fetch_add(keys_size, std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
return result;
}
@ -201,6 +205,8 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); };
size_t keys_found = 0;
auto get_parent_func = [&](auto & hierarchy_key)
{
std::optional<UInt64> result;
@ -210,12 +216,15 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getHierarchy(ColumnPtr
if (it != parent_keys_map.end())
result = getValueFromCell(it);
keys_found +=result.has_value();
return result;
};
auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_func);
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
return dictionary_hierarchy_array;
}
@ -247,6 +256,8 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::isInHierarchy(
auto is_key_valid_func = [&](auto & key) { return parent_keys_map.find(key) != parent_keys_map.end(); };
size_t keys_found = 0;
auto get_parent_func = [&](auto & hierarchy_key)
{
std::optional<UInt64> result;
@ -256,12 +267,15 @@ ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse>::isInHierarchy(
if (it != parent_keys_map.end())
result = getValueFromCell(it);
keys_found += result.has_value();
return result;
};
auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_func);
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
return result;
}
@ -290,9 +304,11 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse>::getDescendants(
for (const auto & [key, value] : parent_keys)
parent_to_child[value].emplace_back(key);
auto result = getKeysDescendantsArray(keys, parent_to_child, level);
size_t keys_found;
auto result = getKeysDescendantsArray(keys, parent_to_child, level, keys_found);
query_count.fetch_add(keys.size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
return result;
}
@ -493,6 +509,8 @@ void HashedDictionary<dictionary_key_type, sparse>::getItemsImpl(
bool is_attribute_nullable = attribute.is_nullable_set.has_value();
size_t keys_found = 0;
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
auto key = keys_extractor.extractCurrentKey();
@ -500,7 +518,10 @@ void HashedDictionary<dictionary_key_type, sparse>::getItemsImpl(
const auto it = attribute_container.find(key);
if (it != attribute_container.end())
{
set_value(key_index, getValueFromCell(it));
++keys_found;
}
else
{
if (is_attribute_nullable && attribute.is_nullable_set->find(key) != nullptr)
@ -513,6 +534,7 @@ void HashedDictionary<dictionary_key_type, sparse>::getItemsImpl(
}
query_count.fetch_add(keys_size, std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
}
template <DictionaryKeyType dictionary_key_type, bool sparse>

View File

@ -196,6 +196,8 @@ namespace detail
* Hierarchy result is ElementsAndOffsets structure, for each element there is descendants array,
* with size offset[element_index] - (element_index > 0 ? offset[element_index - 1] : 0).
*
* @param valid_keys - number of keys that are valid in parent_to_child map
*
* Example:
* id parent_id
* 1 0
@ -218,11 +220,13 @@ namespace detail
ElementsAndOffsets<KeyType> getDescendants(
const PaddedPODArray<KeyType> & keys,
const HashMap<KeyType, PaddedPODArray<KeyType>> & parent_to_child,
Strategy strategy)
Strategy strategy,
size_t & valid_keys)
{
/// If strategy is GetAllDescendantsStrategy we try to cache and later reuse previously calculated descendants.
/// If strategy is GetDescendantsAtSpecificLevelStrategy we does not use cache strategy.
size_t keys_size = keys.size();
valid_keys = 0;
PaddedPODArray<KeyType> descendants;
descendants.reserve(keys_size);
@ -265,6 +269,7 @@ namespace detail
descendants_offsets.emplace_back(descendants.size());
continue;
}
++valid_keys;
next_keys_to_process_stack.emplace_back(KeyAndDepth{requested_key, 0});
@ -425,22 +430,25 @@ ColumnUInt8::Ptr getKeysIsInHierarchyColumn(
}
/// Returns descendants array column for keys
///
/// @param valid_keys - number of keys that are valid in parent_to_child map
template <typename KeyType>
ColumnPtr getKeysDescendantsArray(
const PaddedPODArray<KeyType> & requested_keys,
const HashMap<KeyType, PaddedPODArray<KeyType>> & parent_to_child,
size_t level)
size_t level,
size_t & valid_keys)
{
if (level == 0)
{
detail::GetAllDescendantsStrategy strategy { .level = level };
auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child, strategy);
auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child, strategy, valid_keys);
return detail::convertElementsAndOffsetsIntoArray(std::move(elements_and_offsets));
}
else
{
detail::GetDescendantsAtSpecificLevelStrategy strategy { .level = level };
auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child, strategy);
auto elements_and_offsets = detail::getDescendants(requested_keys, parent_to_child, strategy, valid_keys);
return detail::convertElementsAndOffsetsIntoArray(std::move(elements_and_offsets));
}
}

View File

@ -154,10 +154,12 @@ TEST(HierarchyDictionariesUtils, getDescendants)
PaddedPODArray<UInt64> keys = {0, 1, 2, 3, 4};
{
size_t keys_found;
auto result = DB::detail::getDescendants(
keys,
parent_to_child,
DB::detail::GetAllDescendantsStrategy());
DB::detail::GetAllDescendantsStrategy(),
keys_found);
const auto & actual_elements = result.elements;
const auto & actual_offsets = result.offsets;
@ -167,12 +169,15 @@ TEST(HierarchyDictionariesUtils, getDescendants)
ASSERT_EQ(actual_elements, expected_elements);
ASSERT_EQ(actual_offsets, expected_offsets);
ASSERT_EQ(keys_found, 3);
}
{
size_t keys_found;
auto result = DB::detail::getDescendants(
keys,
parent_to_child,
DB::detail::GetDescendantsAtSpecificLevelStrategy{1});
DB::detail::GetDescendantsAtSpecificLevelStrategy{1},
keys_found);
const auto & actual_elements = result.elements;
const auto & actual_offsets = result.offsets;
@ -182,6 +187,7 @@ TEST(HierarchyDictionariesUtils, getDescendants)
ASSERT_EQ(actual_elements, expected_elements);
ASSERT_EQ(actual_offsets, expected_offsets);
ASSERT_EQ(keys_found, 3);
}
}
{
@ -192,10 +198,12 @@ TEST(HierarchyDictionariesUtils, getDescendants)
PaddedPODArray<UInt64> keys = {1, 2, 3};
{
size_t keys_found;
auto result = DB::detail::getDescendants(
keys,
parent_to_child,
DB::detail::GetAllDescendantsStrategy());
DB::detail::GetAllDescendantsStrategy(),
keys_found);
const auto & actual_elements = result.elements;
const auto & actual_offsets = result.offsets;
@ -205,12 +213,15 @@ TEST(HierarchyDictionariesUtils, getDescendants)
ASSERT_EQ(actual_elements, expected_elements);
ASSERT_EQ(actual_offsets, expected_offsets);
ASSERT_EQ(keys_found, 2);
}
{
size_t keys_found;
auto result = DB::detail::getDescendants(
keys,
parent_to_child,
DB::detail::GetDescendantsAtSpecificLevelStrategy{1});
DB::detail::GetDescendantsAtSpecificLevelStrategy{1},
keys_found);
const auto & actual_elements = result.elements;
const auto & actual_offsets = result.offsets;
@ -220,6 +231,7 @@ TEST(HierarchyDictionariesUtils, getDescendants)
ASSERT_EQ(actual_elements, expected_elements);
ASSERT_EQ(actual_offsets, expected_offsets);
ASSERT_EQ(keys_found, 2);
}
}
}