From 50f2e488bdd05d758ee94e65a628d1bd517034a1 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 20 Apr 2021 22:53:17 +0800 Subject: [PATCH] Fix invalid virtual column expr --- src/Storages/MergeTree/MergeTreeData.cpp | 6 ++-- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 22 +++++++++------ .../MergeTree/MergeTreeDataSelectExecutor.h | 8 ++---- src/Storages/VirtualColumnUtils.cpp | 28 ++++++++++++++++--- ...1515_force_data_skipping_indices.reference | 1 + .../01515_force_data_skipping_indices.sql | 2 ++ .../01748_partition_id_pruning.reference | 10 +++++++ .../01748_partition_id_pruning.sql | 12 ++++++++ 8 files changed, 70 insertions(+), 19 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index f28d87bb9be..62afb30add9 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -688,9 +688,11 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( const SelectQueryInfo & query_info, ContextPtr local_context, const DataPartsVector & parts) const { + if (parts.empty()) + return 0u; auto metadata_snapshot = getInMemoryMetadataPtr(); ASTPtr expression_ast; - Block virtual_columns_block = MergeTreeDataSelectExecutor::getSampleBlockWithVirtualPartColumns(); + Block virtual_columns_block = MergeTreeDataSelectExecutor::getBlockWithVirtualPartColumns(parts, true /* one_part */); // Generate valid expressions for filtering bool valid = VirtualColumnUtils::prepareFilterBlockWithQuery(query_info.query, local_context, virtual_columns_block, expression_ast); @@ -702,7 +704,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( std::unordered_set part_values; if (valid && expression_ast) { - MergeTreeDataSelectExecutor::fillBlockWithVirtualPartColumns(parts, virtual_columns_block); + virtual_columns_block = MergeTreeDataSelectExecutor::getBlockWithVirtualPartColumns(parts, false /* one_part */); VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, local_context, expression_ast); part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); if (part_values.empty()) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index af72b3e53f2..bfecbe32d82 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -71,17 +71,13 @@ MergeTreeDataSelectExecutor::MergeTreeDataSelectExecutor(const MergeTreeData & d { } - -Block MergeTreeDataSelectExecutor::getSampleBlockWithVirtualPartColumns() +Block MergeTreeDataSelectExecutor::getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part) { - return Block(std::initializer_list{ + Block block(std::initializer_list{ ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "_part"), ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "_partition_id"), ColumnWithTypeAndName(ColumnUUID::create(), std::make_shared(), "_part_uuid")}); -} -void MergeTreeDataSelectExecutor::fillBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, Block & block) -{ MutableColumns columns = block.mutateColumns(); auto & part_column = columns[0]; @@ -93,9 +89,17 @@ void MergeTreeDataSelectExecutor::fillBlockWithVirtualPartColumns(const MergeTre part_column->insert(part->name); partition_id_column->insert(part->info.partition_id); part_uuid_column->insert(part->uuid); + if (one_part) + { + part_column = ColumnConst::create(std::move(part_column), 1); + partition_id_column = ColumnConst::create(std::move(partition_id_column), 1); + part_uuid_column = ColumnConst::create(std::move(part_uuid_column), 1); + break; + } } block.setColumns(std::move(columns)); + return block; } @@ -179,6 +183,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( Names real_column_names; size_t total_parts = parts.size(); + if (total_parts == 0) + return std::make_unique(); bool sample_factor_column_queried = false; Float64 used_sample_factor = 1; @@ -220,7 +226,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( std::unordered_set part_values; ASTPtr expression_ast; - auto virtual_columns_block = getSampleBlockWithVirtualPartColumns(); + auto virtual_columns_block = getBlockWithVirtualPartColumns(parts, true /* one_part */); // Generate valid expressions for filtering VirtualColumnUtils::prepareFilterBlockWithQuery(query_info.query, context, virtual_columns_block, expression_ast); @@ -228,7 +234,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( // If there is still something left, fill the virtual block and do the filtering. if (expression_ast) { - fillBlockWithVirtualPartColumns(parts, virtual_columns_block); + virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */); VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, context, expression_ast); part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); if (part_values.empty()) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 4129b3ea2a0..d7193fbfbfa 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -45,11 +45,9 @@ public: unsigned num_streams, const PartitionIdToMaxBlock * max_block_numbers_to_read = nullptr) const; - /// Construct a sample block consisting only of possible virtual columns for part pruning. - static Block getSampleBlockWithVirtualPartColumns(); - - /// Fill in values of possible virtual columns for part pruning. - static void fillBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, Block & block); + /// Construct a block consisting only of possible virtual columns for part pruning. + /// If one_part is true, fill in at most one part. + static Block getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part); private: const MergeTreeData & data; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index a6a68f598c7..0c6cb563525 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -27,6 +28,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace { @@ -124,6 +130,21 @@ void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & va bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block block, ASTPtr & expression_ast) { + if (block.rows() == 0) + throw Exception("Cannot prepare filter with empty block", ErrorCodes::LOGICAL_ERROR); + + /// Take the first row of the input block to build a constant block + auto columns = block.getColumns(); + Columns const_columns(columns.size()); + for (size_t i = 0; i < columns.size(); ++i) + { + if (isColumnConst(*columns[i])) + const_columns[i] = columns[i]->cloneResized(1); + else + const_columns[i] = ColumnConst::create(columns[i]->cloneResized(1), 1); + } + block.setColumns(const_columns); + bool unmodified = true; const auto & select = query->as(); if (!select.where() && !select.prewhere()) @@ -135,10 +156,6 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block else condition_ast = select.prewhere() ? select.prewhere()->clone() : select.where()->clone(); - // Prepare a constant block with valid expressions - for (size_t i = 0; i < block.columns(); ++i) - block.getByPosition(i).column = block.getByPosition(i).type->createColumnConstWithDefaultValue(1); - // Provide input columns as constant columns to check if an expression is constant. std::function is_constant = [&block, &context](const ASTPtr & node) { @@ -169,6 +186,9 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr context, ASTPtr expression_ast) { + if (block.rows() == 0) + return; + if (!expression_ast) prepareFilterBlockWithQuery(query, context, block, expression_ast); diff --git a/tests/queries/0_stateless/01515_force_data_skipping_indices.reference b/tests/queries/0_stateless/01515_force_data_skipping_indices.reference index e69de29bb2d..d43017edcc5 100644 --- a/tests/queries/0_stateless/01515_force_data_skipping_indices.reference +++ b/tests/queries/0_stateless/01515_force_data_skipping_indices.reference @@ -0,0 +1 @@ +1 2 3 diff --git a/tests/queries/0_stateless/01515_force_data_skipping_indices.sql b/tests/queries/0_stateless/01515_force_data_skipping_indices.sql index 53d3e5c736f..40b66b0ff7b 100644 --- a/tests/queries/0_stateless/01515_force_data_skipping_indices.sql +++ b/tests/queries/0_stateless/01515_force_data_skipping_indices.sql @@ -10,6 +10,8 @@ CREATE TABLE data_01515 Engine=MergeTree() ORDER BY key; +INSERT INTO data_01515 VALUES (1, 2, 3); + SELECT * FROM data_01515; SELECT * FROM data_01515 SETTINGS force_data_skipping_indices=''; -- { serverError 6 } SELECT * FROM data_01515 SETTINGS force_data_skipping_indices='d1_idx'; -- { serverError 277 } diff --git a/tests/queries/0_stateless/01748_partition_id_pruning.reference b/tests/queries/0_stateless/01748_partition_id_pruning.reference index 192e33c03c9..c1e4e2c78ef 100644 --- a/tests/queries/0_stateless/01748_partition_id_pruning.reference +++ b/tests/queries/0_stateless/01748_partition_id_pruning.reference @@ -5,3 +5,13 @@ 1 2 1 3 3 +1 +11 +21 +31 +41 +51 +61 +71 +81 +91 diff --git a/tests/queries/0_stateless/01748_partition_id_pruning.sql b/tests/queries/0_stateless/01748_partition_id_pruning.sql index 0a3f7d2713c..17a405e17ad 100644 --- a/tests/queries/0_stateless/01748_partition_id_pruning.sql +++ b/tests/queries/0_stateless/01748_partition_id_pruning.sql @@ -17,3 +17,15 @@ set max_rows_to_read = 1; -- one row for subquery select count() from x where _partition_id in (select partitionId(number + 1) from numbers(1)); drop table x; + +drop table if exists mt; + +create table mt (n UInt64) engine=MergeTree order by n partition by n % 10; + +set max_rows_to_read = 200; + +insert into mt select * from numbers(100); + +select * from mt where toUInt64(substr(_part, 1, position(_part, '_') - 1)) = 1; + +drop table mt;