Merge pull request #22630 from kitaisreal/direct-dictionary-dict-get-multiple-columns-optimization

DirectDictionary dictGet multiple columns optimization
This commit is contained in:
Maksim Kita 2021-04-07 10:29:42 +03:00 committed by GitHub
commit 7baafcb567
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 91 additions and 31 deletions

View File

@ -133,7 +133,7 @@ ColumnPtr CacheDictionary<dictionary_key_type>::getColumn(
template <DictionaryKeyType dictionary_key_type>
Columns CacheDictionary<dictionary_key_type>::getColumns(
const Strings & attribute_names,
const DataTypes &,
const DataTypes & result_types,
const Columns & key_columns,
const DataTypes & key_types,
const Columns & default_values_columns) const
@ -159,7 +159,7 @@ Columns CacheDictionary<dictionary_key_type>::getColumns(
DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
auto keys = extractor.extractAllKeys();
DictionaryStorageFetchRequest request(dict_struct, attribute_names, default_values_columns);
DictionaryStorageFetchRequest request(dict_struct, attribute_names, result_types, default_values_columns);
FetchResult result_of_fetch_from_storage;
@ -277,7 +277,7 @@ ColumnUInt8::Ptr CacheDictionary<dictionary_key_type>::hasKeys(const Columns & k
const auto keys = extractor.extractAllKeys();
/// We make empty request just to fetch if keys exists
DictionaryStorageFetchRequest request(dict_struct, {}, {});
DictionaryStorageFetchRequest request(dict_struct, {}, {}, {});
FetchResult result_of_fetch_from_storage;

View File

@ -60,7 +60,11 @@ private:
class DictionaryStorageFetchRequest
{
public:
DictionaryStorageFetchRequest(const DictionaryStructure & structure, const Strings & attributes_names_to_fetch, Columns attributes_default_values_columns)
DictionaryStorageFetchRequest(
const DictionaryStructure & structure,
const Strings & attributes_names_to_fetch,
DataTypes attributes_to_fetch_result_types,
Columns attributes_default_values_columns)
: attributes_to_fetch_names_set(attributes_names_to_fetch.begin(), attributes_names_to_fetch.end())
, attributes_to_fetch_filter(structure.attributes.size(), false)
{
@ -73,7 +77,7 @@ public:
dictionary_attributes_types.reserve(attributes_size);
attributes_default_value_providers.reserve(attributes_to_fetch_names_set.size());
size_t default_values_column_index = 0;
size_t attributes_to_fetch_index = 0;
for (size_t i = 0; i < attributes_size; ++i)
{
const auto & dictionary_attribute = structure.attributes[i];
@ -84,8 +88,16 @@ public:
if (attributes_to_fetch_names_set.find(name) != attributes_to_fetch_names_set.end())
{
attributes_to_fetch_filter[i] = true;
attributes_default_value_providers.emplace_back(dictionary_attribute.null_value, attributes_default_values_columns[default_values_column_index]);
++default_values_column_index;
auto & attribute_to_fetch_result_type = attributes_to_fetch_result_types[attributes_to_fetch_index];
if (!attribute_to_fetch_result_type->equals(*type))
throw Exception(ErrorCodes::TYPE_MISMATCH,
"Attribute type does not match, expected ({}), found ({})",
attribute_to_fetch_result_type->getName(),
type->getName());
attributes_default_value_providers.emplace_back(dictionary_attribute.null_value, attributes_default_values_columns[attributes_to_fetch_index]);
++attributes_to_fetch_index;
}
else
attributes_default_value_providers.emplace_back(dictionary_attribute.null_value);

View File

@ -31,12 +31,12 @@ DirectDictionary<dictionary_key_type>::DirectDictionary(
}
template <DictionaryKeyType dictionary_key_type>
ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types [[maybe_unused]],
const ColumnPtr & default_values_column) const
Columns DirectDictionary<dictionary_key_type>::getColumns(
const Strings & attribute_names,
const DataTypes & result_types,
const Columns & key_columns,
const DataTypes & key_types [[maybe_unused]],
const Columns & default_values_columns) const
{
if constexpr (dictionary_key_type == DictionaryKeyType::complex)
dict_struct.validateKeyTypes(key_types);
@ -45,16 +45,14 @@ ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
const auto requested_keys = extractor.extractAllKeys();
const DictionaryAttribute & attribute = dict_struct.getAttribute(attribute_name, result_type);
DefaultValueProvider default_value_provider(attribute.null_value, default_values_column);
DictionaryStorageFetchRequest request(dict_struct, attribute_names, result_types, default_values_columns);
HashMap<KeyType, size_t> key_to_fetched_index;
key_to_fetched_index.reserve(requested_keys.size());
auto fetched_from_storage = attribute.type->createColumn();
auto fetched_columns_from_storage = request.makeAttributesResultColumns();
size_t fetched_key_index = 0;
size_t requested_attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
Columns block_key_columns;
size_t dictionary_keys_size = dict_struct.getKeysNames().size();
@ -73,8 +71,14 @@ ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
DictionaryKeysExtractor<dictionary_key_type> block_keys_extractor(block_key_columns, arena_holder.getComplexKeyArena());
auto block_keys = block_keys_extractor.extractAllKeys();
const auto & block_column = block.safeGetByPosition(dictionary_keys_size + requested_attribute_index).column;
fetched_from_storage->insertRangeFrom(*block_column, 0, block_keys.size());
for (size_t attribute_index = 0; attribute_index < request.attributesSize(); ++attribute_index)
{
if (!request.shouldFillResultColumnWithIndex(attribute_index))
continue;
const auto & block_column = block.safeGetByPosition(dictionary_keys_size + attribute_index).column;
fetched_columns_from_storage[attribute_index]->insertRangeFrom(*block_column, 0, block_keys.size());
}
for (size_t block_key_index = 0; block_key_index < block_keys.size(); ++block_key_index)
{
@ -92,29 +96,54 @@ ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
size_t requested_keys_size = requested_keys.size();
auto result = fetched_from_storage->cloneEmpty();
result->reserve(requested_keys_size);
auto result_columns = request.makeAttributesResultColumns();
for (size_t requested_key_index = 0; requested_key_index < requested_keys_size; ++requested_key_index)
for (size_t attribute_index = 0; attribute_index < result_columns.size(); ++attribute_index)
{
const auto requested_key = requested_keys[requested_key_index];
const auto * it = key_to_fetched_index.find(requested_key);
if (!request.shouldFillResultColumnWithIndex(attribute_index))
continue;
if (it)
fetched_from_storage->get(it->getMapped(), value_to_insert);
else
value_to_insert = default_value_provider.getDefaultValue(requested_key_index);
auto & result_column = result_columns[attribute_index];
result->insert(value_to_insert);
const auto & fetched_column_from_storage = fetched_columns_from_storage[attribute_index];
const auto & default_value_provider = request.defaultValueProviderAtIndex(attribute_index);
result_column->reserve(requested_keys_size);
for (size_t requested_key_index = 0; requested_key_index < requested_keys_size; ++requested_key_index)
{
const auto requested_key = requested_keys[requested_key_index];
const auto * it = key_to_fetched_index.find(requested_key);
if (it)
fetched_column_from_storage->get(it->getMapped(), value_to_insert);
else
value_to_insert = default_value_provider.getDefaultValue(requested_key_index);
result_column->insert(value_to_insert);
}
}
query_count.fetch_add(requested_keys_size, std::memory_order_relaxed);
return result;
return request.filterRequestedColumns(result_columns);
}
template <DictionaryKeyType dictionary_key_type>
ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(const Columns & key_columns, const DataTypes & key_types [[maybe_unused]]) const
ColumnPtr DirectDictionary<dictionary_key_type>::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnPtr & default_values_column) const
{
return getColumns({ attribute_name }, { result_type }, key_columns, key_types, { default_values_column }).front();
}
template <DictionaryKeyType dictionary_key_type>
ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(
const Columns & key_columns,
const DataTypes & key_types [[maybe_unused]]) const
{
if constexpr (dictionary_key_type == DictionaryKeyType::complex)
dict_struct.validateKeyTypes(key_types);

View File

@ -66,6 +66,13 @@ public:
DictionaryKeyType getKeyType() const override { return dictionary_key_type; }
Columns getColumns(
const Strings & attribute_names,
const DataTypes & result_types,
const Columns & key_columns,
const DataTypes & key_types,
const Columns & default_values_columns) const override;
ColumnPtr getColumn(
const std::string& attribute_name,
const DataTypePtr & result_type,

View File

@ -93,6 +93,12 @@
LIMIT {elements_count}
FORMAT Null;
</query>
<query>
SELECT dictGet('default.simple_key_direct_dictionary', ('value_int', 'value_string', 'value_decimal', 'value_string_nullable'), number)
FROM system.numbers
LIMIT {elements_count}
FORMAT Null;
</query>
<query>
SELECT dictHas('default.simple_key_direct_dictionary', number)
FROM system.numbers
@ -106,6 +112,12 @@
LIMIT {elements_count}
FORMAT Null;
</query>
<query>
SELECT dictGet('default.complex_key_direct_dictionary', ('value_int', 'value_string', 'value_decimal', 'value_string_nullable'), (number, toString(number)))
FROM system.numbers
LIMIT {elements_count}
FORMAT Null;
</query>
<query>
SELECT dictHas('default.complex_key_direct_dictionary', (number, toString(number)))
FROM system.numbers