From b767d9faac58bd0c79c44b6f06469ede6e6d73ae Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 6 May 2021 09:24:09 +0300 Subject: [PATCH] Count found keys for DirectDictionary (plus utils changes) --- src/Dictionaries/CacheDictionary.cpp | 6 ++++-- src/Dictionaries/DirectDictionary.cpp | 19 +++++++++++++++++-- .../HierarchyDictionariesUtils.cpp | 15 +++++++++++++-- src/Dictionaries/HierarchyDictionariesUtils.h | 14 ++++++++++---- 4 files changed, 44 insertions(+), 10 deletions(-) diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index 916631a961d..eadee988533 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -365,7 +365,8 @@ ColumnPtr CacheDictionary::getHierarchy( { if (dictionary_key_type == DictionaryKeyType::simple) { - auto result = getKeysHierarchyDefaultImplementation(this, key_column, key_type); + size_t keys_found; + auto result = getKeysHierarchyDefaultImplementation(this, key_column, key_type, keys_found); query_count.fetch_add(key_column->size(), std::memory_order_relaxed); return result; } @@ -381,7 +382,8 @@ ColumnUInt8::Ptr CacheDictionary::isInHierarchy( { if (dictionary_key_type == DictionaryKeyType::simple) { - auto result = getKeysIsInHierarchyDefaultImplementation(this, key_column, in_key_column, key_type); + size_t keys_found; + auto result = getKeysIsInHierarchyDefaultImplementation(this, key_column, in_key_column, key_type, keys_found); query_count.fetch_add(key_column->size(), std::memory_order_relaxed); return result; } diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp index 2038704414e..c5efaef56f7 100644 --- a/src/Dictionaries/DirectDictionary.cpp +++ b/src/Dictionaries/DirectDictionary.cpp @@ -106,6 +106,8 @@ Columns DirectDictionary::getColumns( auto result_columns = request.makeAttributesResultColumns(); + size_t keys_found = 0; + for (size_t attribute_index = 0; attribute_index < result_columns.size(); ++attribute_index) { if (!request.shouldFillResultColumnWithIndex(attribute_index)) @@ -124,7 +126,10 @@ Columns DirectDictionary::getColumns( const auto * it = key_to_fetched_index.find(requested_key); if (it) + { fetched_column_from_storage->get(it->getMapped(), value_to_insert); + ++keys_found; + } else value_to_insert = default_value_provider.getDefaultValue(requested_key_index); @@ -133,6 +138,7 @@ Columns DirectDictionary::getColumns( } query_count.fetch_add(requested_keys_size, std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); return request.filterRequestedColumns(result_columns); } @@ -181,6 +187,8 @@ ColumnUInt8::Ptr DirectDictionary::hasKeys( stream->readPrefix(); + size_t keys_found = 0; + while (const auto block = stream->read()) { /// Split into keys columns and attribute columns @@ -198,6 +206,8 @@ ColumnUInt8::Ptr DirectDictionary::hasKeys( assert(it); size_t result_data_found_index = it->getMapped(); + /// block_keys_size cannot be used, due to duplicates. + keys_found += !result_data[result_data_found_index]; result_data[result_data_found_index] = true; block_keys_extractor.rollbackCurrentKey(); @@ -209,6 +219,7 @@ ColumnUInt8::Ptr DirectDictionary::hasKeys( stream->readSuffix(); query_count.fetch_add(requested_keys_size, std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); return result; } @@ -220,8 +231,10 @@ ColumnPtr DirectDictionary::getHierarchy( { if (dictionary_key_type == DictionaryKeyType::simple) { - auto result = getKeysHierarchyDefaultImplementation(this, key_column, key_type); + size_t keys_found; + auto result = getKeysHierarchyDefaultImplementation(this, key_column, key_type, keys_found); query_count.fetch_add(key_column->size(), std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); return result; } else @@ -236,8 +249,10 @@ ColumnUInt8::Ptr DirectDictionary::isInHierarchy( { if (dictionary_key_type == DictionaryKeyType::simple) { - auto result = getKeysIsInHierarchyDefaultImplementation(this, key_column, in_key_column, key_type); + size_t keys_found = 0; + auto result = getKeysIsInHierarchyDefaultImplementation(this, key_column, in_key_column, key_type, keys_found); query_count.fetch_add(key_column->size(), std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); return result; } else diff --git a/src/Dictionaries/HierarchyDictionariesUtils.cpp b/src/Dictionaries/HierarchyDictionariesUtils.cpp index 5bca6a5ac1a..dd729fe636c 100644 --- a/src/Dictionaries/HierarchyDictionariesUtils.cpp +++ b/src/Dictionaries/HierarchyDictionariesUtils.cpp @@ -84,8 +84,14 @@ namespace } } -ColumnPtr getKeysHierarchyDefaultImplementation(const IDictionary * dictionary, ColumnPtr key_column, const DataTypePtr & key_type) +ColumnPtr getKeysHierarchyDefaultImplementation( + const IDictionary * dictionary, + ColumnPtr key_column, + const DataTypePtr & key_type, + size_t & valid_keys) { + valid_keys = 0; + key_column = key_column->convertToFullColumnIfConst(); const auto * key_column_typed = checkAndGetColumn>(*key_column); if (!key_column_typed) @@ -104,6 +110,7 @@ ColumnPtr getKeysHierarchyDefaultImplementation(const IDictionary * dictionary, { auto it = key_to_parent_key.find(key); std::optional result = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt); + valid_keys += result.has_value(); return result; }; @@ -117,8 +124,11 @@ ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation( const IDictionary * dictionary, ColumnPtr key_column, ColumnPtr in_key_column, - const DataTypePtr & key_type) + const DataTypePtr & key_type, + size_t & valid_keys) { + valid_keys = 0; + key_column = key_column->convertToFullColumnIfConst(); in_key_column = in_key_column->convertToFullColumnIfConst(); @@ -143,6 +153,7 @@ ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation( { auto it = key_to_parent_key.find(key); std::optional result = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt); + valid_keys += result.has_value(); return result; }; diff --git a/src/Dictionaries/HierarchyDictionariesUtils.h b/src/Dictionaries/HierarchyDictionariesUtils.h index 97335a3c35f..44382f4720f 100644 --- a/src/Dictionaries/HierarchyDictionariesUtils.h +++ b/src/Dictionaries/HierarchyDictionariesUtils.h @@ -455,21 +455,27 @@ ColumnPtr getKeysDescendantsArray( /** Default getHierarchy implementation for dictionaries that does not have structure with child to parent representation. * Implementation will build such structure with getColumn calls, and then getHierarchy for such structure. - * Returns ColumnArray with hierarchy arrays for keys from key_column. + * + * @param valid_keys - number of keys (from @key_column) for which information about parent exists. + * @return ColumnArray with hierarchy arrays for keys from key_column. */ ColumnPtr getKeysHierarchyDefaultImplementation( const IDictionary * dictionary, ColumnPtr key_column, - const DataTypePtr & key_type); + const DataTypePtr & key_type, + size_t & valid_keys); /** Default isInHierarchy implementation for dictionaries that does not have structure with child to parent representation. * Implementation will build such structure with getColumn calls, and then getHierarchy for such structure. - * Returns UInt8 column if key from in_key_column is in key hierarchy from key_column. + * + * @param valid_keys - number of keys (from @key_column) for which information about parent exists. + * @return UInt8 column if key from in_key_column is in key hierarchy from key_column. */ ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation( const IDictionary * dictionary, ColumnPtr key_column, ColumnPtr in_key_column, - const DataTypePtr & key_type); + const DataTypePtr & key_type, + size_t & valid_keys); }