Merge pull request #33791 from kitaisreal/dictionaries-read-keys-array-copy-fix

Dictionaries remove unnecessary copy of keys during read
This commit is contained in:
Kruglov Pavel 2022-01-20 13:59:41 +03:00 committed by GitHub
commit dd2971791c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 41 additions and 11 deletions

View File

@ -494,7 +494,8 @@ Pipe CacheDictionary<dictionary_key_type>::read(const Names & column_names, size
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
{
auto keys = cache_storage_ptr->getCachedSimpleKeys();
key_columns = {ColumnWithTypeAndName(getColumnFromPODArray(keys), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
auto keys_column = getColumnFromPODArray(std::move(keys));
key_columns = {ColumnWithTypeAndName(std::move(keys_column), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
}
else
{

View File

@ -682,6 +682,15 @@ static ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array)
return column_vector;
}
template <typename T>
static ColumnPtr getColumnFromPODArray(PaddedPODArray<T> && array)
{
auto column_vector = ColumnVector<T>::create();
column_vector->getData() = std::move(array);
return column_vector;
}
template <typename T>
static ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array, size_t start, size_t length)
{

View File

@ -547,7 +547,8 @@ Pipe FlatDictionary::read(const Names & column_names, size_t max_block_size, siz
if (loaded_keys[key_index])
keys.push_back(key_index);
ColumnsWithTypeAndName key_columns = {ColumnWithTypeAndName(getColumnFromPODArray(keys), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
auto keys_column = getColumnFromPODArray(std::move(keys));
ColumnsWithTypeAndName key_columns = {ColumnWithTypeAndName(std::move(keys_column), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
std::shared_ptr<const IDictionary> dictionary = shared_from_this();
auto coordinator = DictionarySourceCoordinator::create(dictionary, column_names, std::move(key_columns), max_block_size);

View File

@ -753,9 +753,14 @@ Pipe HashedArrayDictionary<dictionary_key_type>::read(const Names & column_names
ColumnsWithTypeAndName key_columns;
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
key_columns = {ColumnWithTypeAndName(getColumnFromPODArray(keys), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
{
auto keys_column = getColumnFromPODArray(std::move(keys));
key_columns = {ColumnWithTypeAndName(std::move(keys_column), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
}
else
{
key_columns = deserializeColumnsWithTypeAndNameFromKeys(dict_struct, keys, 0, keys.size());
}
std::shared_ptr<const IDictionary> dictionary = shared_from_this();
auto coordinator = DictionarySourceCoordinator::create(dictionary, column_names, std::move(key_columns), max_block_size);

View File

@ -661,9 +661,14 @@ Pipe HashedDictionary<dictionary_key_type, sparse>::read(const Names & column_na
ColumnsWithTypeAndName key_columns;
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
key_columns = {ColumnWithTypeAndName(getColumnFromPODArray(keys), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
{
auto keys_column = getColumnFromPODArray(std::move(keys));
key_columns = {ColumnWithTypeAndName(std::move(keys_column), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
}
else
{
key_columns = deserializeColumnsWithTypeAndNameFromKeys(dict_struct, keys, 0, keys.size());
}
std::shared_ptr<const IDictionary> dictionary = shared_from_this();
auto coordinator = DictionarySourceCoordinator::create(dictionary, column_names, std::move(key_columns), max_block_size);

View File

@ -715,19 +715,28 @@ Pipe RangeHashedDictionary<dictionary_key_type>::read(const Names & column_names
using RangeType = typename LeftDataType::FieldType;
PaddedPODArray<KeyType> keys;
PaddedPODArray<RangeType> start_dates;
PaddedPODArray<RangeType> end_dates;
getKeysAndDates(keys, start_dates, end_dates);
PaddedPODArray<RangeType> range_start;
PaddedPODArray<RangeType> range_end;
getKeysAndDates(keys, range_start, range_end);
range_min_column = ColumnWithTypeAndName{getColumnFromPODArray(start_dates), dict_struct.range_min->type, dict_struct.range_min->name};
range_max_column = ColumnWithTypeAndName{getColumnFromPODArray(end_dates), dict_struct.range_max->type, dict_struct.range_max->name};
auto date_column = getColumnFromPODArray(makeDateKeys(range_start, range_end));
auto range_start_column = getColumnFromPODArray(std::move(range_start));
range_min_column = ColumnWithTypeAndName{std::move(range_start_column), dict_struct.range_min->type, dict_struct.range_min->name};
auto range_end_column = getColumnFromPODArray(std::move(range_end));
range_max_column = ColumnWithTypeAndName{std::move(range_end_column), dict_struct.range_max->type, dict_struct.range_max->name};
if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
key_columns = {ColumnWithTypeAndName(getColumnFromPODArray(keys), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
{
auto keys_column = getColumnFromPODArray(std::move(keys));
key_columns = {ColumnWithTypeAndName(std::move(keys_column), std::make_shared<DataTypeUInt64>(), dict_struct.id->name)};
}
else
{
key_columns = deserializeColumnsWithTypeAndNameFromKeys(dict_struct, keys, 0, keys.size());
}
auto date_column = getColumnFromPODArray(makeDateKeys(start_dates, end_dates));
key_columns.emplace_back(ColumnWithTypeAndName{std::move(date_column), std::make_shared<DataTypeInt64>(), ""});
return true;