From ad31d86a15f865cca2b18d6240cfbf17adc26435 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 13 Sep 2024 19:58:19 +0200 Subject: [PATCH] move the block inserting --- src/Storages/VirtualColumnUtils.cpp | 44 ++++++++++++++++++----------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 523f236bf59..2daffb43c84 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -207,7 +207,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto return desc; } -static void addFilterDataToVirtualColumns(Block & block, const String & path, size_t idx, const NamesAndTypesList & virtual_columns, const ContextPtr & context) +static void addFilterDataToVirtualColumns(Block & block, const String & path, size_t idx, ColumnWithTypeAndName partitioning_column, const ContextPtr & context) { if (block.has("_path")) block.getByName("_path").column->assumeMutableRef().insert(path); @@ -224,21 +224,11 @@ static void addFilterDataToVirtualColumns(Block & block, const String & path, si block.getByName("_file").column->assumeMutableRef().insert(file); } - std::unordered_map keys; - if (context->getSettingsRef().use_hive_partitioning) - keys = parseHivePartitioningKeysAndValues(path); - - for (const auto & virt_column : virtual_columns) + if (block.has(partitioning_column.name)) { - auto it = keys.find(virt_column.name); - if (it != keys.end()) - { - if (!block.has(virt_column.name)) - block.insert({virt_column.type->createColumn(), virt_column.type, virt_column.name}); - auto & column = block.getByName(virt_column.name).column; - ReadBufferFromString buf(it->second); - virt_column.type->getDefaultSerialization()->deserializeWholeText(column->assumeMutableRef(), buf, getFormatSettings(context)); - } + auto & column = block.getByName(partitioning_column.name).column; + ReadBufferFromString buf(partitioning_column.column->getDataAt(0).toView()); + partitioning_column.type->getDefaultSerialization()->deserializeWholeText(column->assumeMutableRef(), buf, getFormatSettings(context)); } block.getByName("_idx").column->assumeMutableRef().insert(idx); @@ -266,15 +256,37 @@ std::optional createPathAndFileFilterDAG(const ActionsDAG::Node * pr ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const ExpressionActionsPtr & actions, const NamesAndTypesList & virtual_columns, const ContextPtr & context) { Block block; + std::vector> keys_vec; + ColumnsWithTypeAndName partitioning_columns; + if (context->getSettingsRef().use_hive_partitioning) + { + for (const auto & path : paths) + keys_vec.push_back(parseHivePartitioningKeysAndValues(path)); + } for (const auto & column : virtual_columns) { if (column.name == "_file" || column.name == "_path") block.insert({column.type->createColumn(), column.type, column.name}); + else + { + for (auto & keys : keys_vec) + { + const auto & it = keys.find(column.name); + if (it != keys.end()) + { + auto string_column = std::make_shared()->createColumn(); + string_column->insert(it->second); + block.insert({column.type->createColumn(), column.type, column.name}); + partitioning_columns.push_back({string_column->getPtr(), column.type, column.name}); + keys.erase(it); + } + } + } } block.insert({ColumnUInt64::create(), std::make_shared(), "_idx"}); for (size_t i = 0; i != paths.size(); ++i) - addFilterDataToVirtualColumns(block, paths[i], i, virtual_columns, context); + addFilterDataToVirtualColumns(block, paths[i], i, partitioning_columns[i], context); filterBlockWithExpression(actions, block);