Count found keys for DirectDictionary (plus utils changes)

This commit is contained in:
Azat Khuzhin 2021-05-06 09:24:09 +03:00
parent a6a1bf2847
commit b767d9faac
4 changed files with 44 additions and 10 deletions

View File

@ -365,7 +365,8 @@ ColumnPtr CacheDictionary<dictionary_key_type>::getHierarchy(
{ {
if (dictionary_key_type == DictionaryKeyType::simple) if (dictionary_key_type == DictionaryKeyType::simple)
{ {
auto result = getKeysHierarchyDefaultImplementation(this, key_column, key_type); size_t keys_found;
auto result = getKeysHierarchyDefaultImplementation(this, key_column, key_type, keys_found);
query_count.fetch_add(key_column->size(), std::memory_order_relaxed); query_count.fetch_add(key_column->size(), std::memory_order_relaxed);
return result; return result;
} }
@ -381,7 +382,8 @@ ColumnUInt8::Ptr CacheDictionary<dictionary_key_type>::isInHierarchy(
{ {
if (dictionary_key_type == DictionaryKeyType::simple) if (dictionary_key_type == DictionaryKeyType::simple)
{ {
auto result = getKeysIsInHierarchyDefaultImplementation(this, key_column, in_key_column, key_type); size_t keys_found;
auto result = getKeysIsInHierarchyDefaultImplementation(this, key_column, in_key_column, key_type, keys_found);
query_count.fetch_add(key_column->size(), std::memory_order_relaxed); query_count.fetch_add(key_column->size(), std::memory_order_relaxed);
return result; return result;
} }

View File

@ -106,6 +106,8 @@ Columns DirectDictionary<dictionary_key_type>::getColumns(
auto result_columns = request.makeAttributesResultColumns(); auto result_columns = request.makeAttributesResultColumns();
size_t keys_found = 0;
for (size_t attribute_index = 0; attribute_index < result_columns.size(); ++attribute_index) for (size_t attribute_index = 0; attribute_index < result_columns.size(); ++attribute_index)
{ {
if (!request.shouldFillResultColumnWithIndex(attribute_index)) if (!request.shouldFillResultColumnWithIndex(attribute_index))
@ -124,7 +126,10 @@ Columns DirectDictionary<dictionary_key_type>::getColumns(
const auto * it = key_to_fetched_index.find(requested_key); const auto * it = key_to_fetched_index.find(requested_key);
if (it) if (it)
{
fetched_column_from_storage->get(it->getMapped(), value_to_insert); fetched_column_from_storage->get(it->getMapped(), value_to_insert);
++keys_found;
}
else else
value_to_insert = default_value_provider.getDefaultValue(requested_key_index); value_to_insert = default_value_provider.getDefaultValue(requested_key_index);
@ -133,6 +138,7 @@ Columns DirectDictionary<dictionary_key_type>::getColumns(
} }
query_count.fetch_add(requested_keys_size, std::memory_order_relaxed); query_count.fetch_add(requested_keys_size, std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
return request.filterRequestedColumns(result_columns); return request.filterRequestedColumns(result_columns);
} }
@ -181,6 +187,8 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(
stream->readPrefix(); stream->readPrefix();
size_t keys_found = 0;
while (const auto block = stream->read()) while (const auto block = stream->read())
{ {
/// Split into keys columns and attribute columns /// Split into keys columns and attribute columns
@ -198,6 +206,8 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(
assert(it); assert(it);
size_t result_data_found_index = it->getMapped(); size_t result_data_found_index = it->getMapped();
/// block_keys_size cannot be used, due to duplicates.
keys_found += !result_data[result_data_found_index];
result_data[result_data_found_index] = true; result_data[result_data_found_index] = true;
block_keys_extractor.rollbackCurrentKey(); block_keys_extractor.rollbackCurrentKey();
@ -209,6 +219,7 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(
stream->readSuffix(); stream->readSuffix();
query_count.fetch_add(requested_keys_size, std::memory_order_relaxed); query_count.fetch_add(requested_keys_size, std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
return result; return result;
} }
@ -220,8 +231,10 @@ ColumnPtr DirectDictionary<dictionary_key_type>::getHierarchy(
{ {
if (dictionary_key_type == DictionaryKeyType::simple) if (dictionary_key_type == DictionaryKeyType::simple)
{ {
auto result = getKeysHierarchyDefaultImplementation(this, key_column, key_type); size_t keys_found;
auto result = getKeysHierarchyDefaultImplementation(this, key_column, key_type, keys_found);
query_count.fetch_add(key_column->size(), std::memory_order_relaxed); query_count.fetch_add(key_column->size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
return result; return result;
} }
else else
@ -236,8 +249,10 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::isInHierarchy(
{ {
if (dictionary_key_type == DictionaryKeyType::simple) if (dictionary_key_type == DictionaryKeyType::simple)
{ {
auto result = getKeysIsInHierarchyDefaultImplementation(this, key_column, in_key_column, key_type); size_t keys_found = 0;
auto result = getKeysIsInHierarchyDefaultImplementation(this, key_column, in_key_column, key_type, keys_found);
query_count.fetch_add(key_column->size(), std::memory_order_relaxed); query_count.fetch_add(key_column->size(), std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
return result; return result;
} }
else else

View File

@ -84,8 +84,14 @@ namespace
} }
} }
ColumnPtr getKeysHierarchyDefaultImplementation(const IDictionary * dictionary, ColumnPtr key_column, const DataTypePtr & key_type) ColumnPtr getKeysHierarchyDefaultImplementation(
const IDictionary * dictionary,
ColumnPtr key_column,
const DataTypePtr & key_type,
size_t & valid_keys)
{ {
valid_keys = 0;
key_column = key_column->convertToFullColumnIfConst(); key_column = key_column->convertToFullColumnIfConst();
const auto * key_column_typed = checkAndGetColumn<ColumnVector<UInt64>>(*key_column); const auto * key_column_typed = checkAndGetColumn<ColumnVector<UInt64>>(*key_column);
if (!key_column_typed) if (!key_column_typed)
@ -104,6 +110,7 @@ ColumnPtr getKeysHierarchyDefaultImplementation(const IDictionary * dictionary,
{ {
auto it = key_to_parent_key.find(key); auto it = key_to_parent_key.find(key);
std::optional<UInt64> result = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt); std::optional<UInt64> result = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
valid_keys += result.has_value();
return result; return result;
}; };
@ -117,8 +124,11 @@ ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation(
const IDictionary * dictionary, const IDictionary * dictionary,
ColumnPtr key_column, ColumnPtr key_column,
ColumnPtr in_key_column, ColumnPtr in_key_column,
const DataTypePtr & key_type) const DataTypePtr & key_type,
size_t & valid_keys)
{ {
valid_keys = 0;
key_column = key_column->convertToFullColumnIfConst(); key_column = key_column->convertToFullColumnIfConst();
in_key_column = in_key_column->convertToFullColumnIfConst(); in_key_column = in_key_column->convertToFullColumnIfConst();
@ -143,6 +153,7 @@ ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation(
{ {
auto it = key_to_parent_key.find(key); auto it = key_to_parent_key.find(key);
std::optional<UInt64> result = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt); std::optional<UInt64> result = (it != nullptr ? std::make_optional(it->getMapped()) : std::nullopt);
valid_keys += result.has_value();
return result; return result;
}; };

View File

@ -455,21 +455,27 @@ ColumnPtr getKeysDescendantsArray(
/** Default getHierarchy implementation for dictionaries that does not have structure with child to parent representation. /** Default getHierarchy implementation for dictionaries that does not have structure with child to parent representation.
* Implementation will build such structure with getColumn calls, and then getHierarchy for such structure. * Implementation will build such structure with getColumn calls, and then getHierarchy for such structure.
* Returns ColumnArray with hierarchy arrays for keys from key_column. *
* @param valid_keys - number of keys (from @key_column) for which information about parent exists.
* @return ColumnArray with hierarchy arrays for keys from key_column.
*/ */
ColumnPtr getKeysHierarchyDefaultImplementation( ColumnPtr getKeysHierarchyDefaultImplementation(
const IDictionary * dictionary, const IDictionary * dictionary,
ColumnPtr key_column, ColumnPtr key_column,
const DataTypePtr & key_type); const DataTypePtr & key_type,
size_t & valid_keys);
/** Default isInHierarchy implementation for dictionaries that does not have structure with child to parent representation. /** Default isInHierarchy implementation for dictionaries that does not have structure with child to parent representation.
* Implementation will build such structure with getColumn calls, and then getHierarchy for such structure. * Implementation will build such structure with getColumn calls, and then getHierarchy for such structure.
* Returns UInt8 column if key from in_key_column is in key hierarchy from key_column. *
* @param valid_keys - number of keys (from @key_column) for which information about parent exists.
* @return UInt8 column if key from in_key_column is in key hierarchy from key_column.
*/ */
ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation( ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation(
const IDictionary * dictionary, const IDictionary * dictionary,
ColumnPtr key_column, ColumnPtr key_column,
ColumnPtr in_key_column, ColumnPtr in_key_column,
const DataTypePtr & key_type); const DataTypePtr & key_type,
size_t & valid_keys);
} }