From 93b661ad5a39bd94e1d85c4b971ee32111bfde7b Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 3 Mar 2021 16:36:20 +0800 Subject: [PATCH 01/12] partition id pruning --- src/Functions/partitionID.cpp | 75 +++++++++++++++++ .../registerFunctionsMiscellaneous.cpp | 2 + src/Functions/ya.make | 1 + src/Interpreters/ExpressionAnalyzer.cpp | 6 +- src/Interpreters/ExpressionAnalyzer.h | 26 +++--- src/Interpreters/TreeRewriter.cpp | 5 +- src/Storages/MergeTree/MergeTreeData.cpp | 33 ++++++++ src/Storages/MergeTree/MergeTreeData.h | 3 + .../MergeTree/MergeTreeDataSelectExecutor.cpp | 59 +++++++------ .../MergeTree/MergeTreeDataSelectExecutor.h | 6 ++ src/Storages/StorageMergeTree.cpp | 14 +--- src/Storages/StorageReplicatedMergeTree.cpp | 14 +--- src/Storages/VirtualColumnUtils.cpp | 82 +++++++++++++++---- src/Storages/VirtualColumnUtils.h | 7 +- .../01748_partition_id_pruning.reference | 7 ++ .../01748_partition_id_pruning.sql | 19 +++++ 16 files changed, 278 insertions(+), 81 deletions(-) create mode 100644 src/Functions/partitionID.cpp create mode 100644 tests/queries/0_stateless/01748_partition_id_pruning.reference create mode 100644 tests/queries/0_stateless/01748_partition_id_pruning.sql diff --git a/src/Functions/partitionID.cpp b/src/Functions/partitionID.cpp new file mode 100644 index 00000000000..9f5a3d09b86 --- /dev/null +++ b/src/Functions/partitionID.cpp @@ -0,0 +1,75 @@ +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + + +/** partitionID(x, y, ...) is a function that computes partition ids of arguments. + */ +class FunctionPartitionID : public IFunction +{ +public: + static constexpr auto name = "partitionID"; + + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + + size_t getNumberOfArguments() const override { return 0; } + + bool isInjective(const ColumnsWithTypeAndName & /*sample_columns*/) const override { return true; } + + bool useDefaultImplementationForNulls() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.empty()) + throw Exception("Function " + getName() + " requires at least one argument.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + return std::make_shared(); + } + + virtual ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + size_t size = arguments.size(); + Columns columns; + Block sample_block; + for (const auto & argument : arguments) + { + sample_block.insert(argument); + columns.push_back(argument.column); + } + + auto result_column = ColumnString::create(); + for (size_t j = 0; j < input_rows_count; ++j) + { + Row row(size); + for (size_t i = 0; i < size; ++i) + columns[i]->get(j, row[i]); + MergeTreePartition partition(std::move(row)); + result_column->insert(partition.getID(sample_block)); + } + return result_column; + } +}; + +void registerFunctionPartitionID(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index 592f0d6774d..3557b0fb865 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -70,6 +70,7 @@ void registerFunctionTcpPort(FunctionFactory &); void registerFunctionByteSize(FunctionFactory &); void registerFunctionFile(FunctionFactory & factory); void registerFunctionConnectionID(FunctionFactory & factory); +void registerFunctionPartitionID(FunctionFactory & factory); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -140,6 +141,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionByteSize(factory); registerFunctionFile(factory); registerFunctionConnectionID(factory); + registerFunctionPartitionID(factory); #if USE_ICU registerFunctionConvertCharset(factory); diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 7a4deae4d04..01857423dc1 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -373,6 +373,7 @@ SRCS( now.cpp now64.cpp nullIf.cpp + partitionID.cpp pi.cpp plus.cpp pointInEllipses.cpp diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 2e2c3354d4c..92388a46872 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -305,7 +305,7 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global) } -void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name) +void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name, const SelectQueryOptions & query_options) { auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name); @@ -341,7 +341,7 @@ void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr prepared_sets[set_key] = std::move(set); } -SetPtr SelectQueryExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name) +SetPtr ExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name) { const auto * table = subquery_or_table_name->as(); if (!table) @@ -387,7 +387,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) if (arg->as() || arg->as()) { if (settings.use_index_for_in_with_subqueries) - tryMakeSetForIndexFromSubquery(arg); + tryMakeSetForIndexFromSubquery(arg, query_options); } else { diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index dc7afb183fc..441274200bd 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -127,6 +127,19 @@ public: void makeWindowDescriptions(ActionsDAGPtr actions); + /** + * Create Set from a subquery or a table expression in the query. The created set is suitable for using the index. + * The set will not be created if its size hits the limit. + */ + void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name, const SelectQueryOptions & query_options = {}); + + /** + * Checks if subquery is not a plain StorageSet. + * Because while making set we will read data from StorageSet which is not allowed. + * Returns valid SetPtr from StorageSet if the latter is used after IN or nullptr otherwise. + */ + SetPtr isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name); + protected: ExpressionAnalyzer( const ASTPtr & query_, @@ -297,19 +310,6 @@ private: NameSet required_result_columns; SelectQueryOptions query_options; - /** - * Create Set from a subquery or a table expression in the query. The created set is suitable for using the index. - * The set will not be created if its size hits the limit. - */ - void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name); - - /** - * Checks if subquery is not a plain StorageSet. - * Because while making set we will read data from StorageSet which is not allowed. - * Returns valid SetPtr from StorageSet if the latter is used after IN or nullptr otherwise. - */ - SetPtr isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name); - /// Create Set-s that we make from IN section to use index on them. void makeSetsForIndex(const ASTPtr & node); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index bcfdf6869c3..5c9ba8fae57 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -656,7 +656,10 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select const auto & partition_desc = metadata_snapshot->getPartitionKey(); if (partition_desc.expression) { - const auto & partition_source_columns = partition_desc.expression->getRequiredColumns(); + auto partition_source_columns = partition_desc.expression->getRequiredColumns(); + partition_source_columns.push_back("_part"); + partition_source_columns.push_back("_partition_id"); + partition_source_columns.push_back("_part_uuid"); optimize_trivial_count = true; for (const auto & required_column : required) { diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0c22d5fbc0f..49e129e5fac 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -675,6 +676,38 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat } +std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( + const SelectQueryInfo & query_info, const Context & context, const DataPartsVector & parts) const +{ + auto metadata_snapshot = getInMemoryMetadataPtr(); + Block part_block = MergeTreeDataSelectExecutor::getSampleBlockWithVirtualPartColumns(); + ASTPtr expression_ast; + bool valid = VirtualColumnUtils::prepareFilterBlockWithQuery(query_info.query, part_block, expression_ast); + PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context, true /* strict */); + if (partition_pruner.isUseless() && !valid) + return {}; + + std::unordered_set part_values; + if (valid) + { + MergeTreeDataSelectExecutor::fillBlockWithVirtualPartColumns(parts, part_block); + VirtualColumnUtils::filterBlockWithQuery(query_info.query, part_block, context, expression_ast); + part_values = VirtualColumnUtils::extractSingleValueFromBlock(part_block, "_part"); + if (part_values.empty()) + return 0; + } + // At this point, empty `part_values` means all parts. + + size_t res = 0; + for (const auto & part : parts) + { + if ((part_values.empty() || part_values.find(part->name) != part_values.end()) && !partition_pruner.canBePruned(part)) + res += part->rows_count; + } + return res; +} + + String MergeTreeData::MergingParams::getModeName() const { switch (mode) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 15059ab47e5..77df4ac5026 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -825,6 +825,9 @@ protected: return {begin, end}; } + std::optional totalRowsByPartitionPredicateImpl( + const SelectQueryInfo & query_info, const Context & context, const DataPartsVector & parts) const; + static decltype(auto) getStateModifier(DataPartState state) { return [state] (const DataPartPtr & part) { part->setState(state); }; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index b1f3f524beb..917c0bcec81 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -71,28 +71,30 @@ MergeTreeDataSelectExecutor::MergeTreeDataSelectExecutor(const MergeTreeData & d } -/// Construct a block consisting only of possible values of virtual columns -static Block getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool with_uuid) +Block MergeTreeDataSelectExecutor::getSampleBlockWithVirtualPartColumns() { - auto part_column = ColumnString::create(); - auto part_uuid_column = ColumnUUID::create(); + return Block(std::initializer_list{ + ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "_part"), + ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "_partition_id"), + ColumnWithTypeAndName(ColumnUUID::create(), std::make_shared(), "_part_uuid")}); +} + +void MergeTreeDataSelectExecutor::fillBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, Block & block) +{ + MutableColumns columns = block.mutateColumns(); + + auto & part_column = columns[0]; + auto & partition_id_column = columns[1]; + auto & part_uuid_column = columns[2]; for (const auto & part : parts) { part_column->insert(part->name); - if (with_uuid) - part_uuid_column->insert(part->uuid); + partition_id_column->insert(part->info.partition_id); + part_uuid_column->insert(part->uuid); } - if (with_uuid) - { - return Block(std::initializer_list{ - ColumnWithTypeAndName(std::move(part_column), std::make_shared(), "_part"), - ColumnWithTypeAndName(std::move(part_uuid_column), std::make_shared(), "_part_uuid"), - }); - } - - return Block{ColumnWithTypeAndName(std::move(part_column), std::make_shared(), "_part")}; + block.setColumns(std::move(columns)); } @@ -176,8 +178,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( Names real_column_names; size_t total_parts = parts.size(); - bool part_column_queried = false; - bool part_uuid_column_queried = false; bool sample_factor_column_queried = false; Float64 used_sample_factor = 1; @@ -186,7 +186,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( { if (name == "_part") { - part_column_queried = true; virt_column_names.push_back(name); } else if (name == "_part_index") @@ -199,7 +198,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( } else if (name == "_part_uuid") { - part_uuid_column_queried = true; virt_column_names.push_back(name); } else if (name == "_sample_factor") @@ -219,12 +217,21 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( if (real_column_names.empty()) real_column_names.push_back(ExpressionActions::getSmallestColumn(available_real_columns)); - /// If `_part` or `_part_uuid` virtual columns are requested, we try to filter out data by them. - Block virtual_columns_block = getBlockWithVirtualPartColumns(parts, part_uuid_column_queried); - if (part_column_queried || part_uuid_column_queried) - VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, context); + std::unordered_set part_values; + ASTPtr expression_ast; + Block virtual_columns_block = getSampleBlockWithVirtualPartColumns(); + VirtualColumnUtils::prepareFilterBlockWithQuery(query_info.query, virtual_columns_block, expression_ast); - auto part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); + // If there is still something left, fill the virtual block and do the filtering. + if (expression_ast) + { + fillBlockWithVirtualPartColumns(parts, virtual_columns_block); + VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, context, expression_ast); + part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); + if (part_values.empty()) + return {}; + } + // At this point, empty `part_values` means all parts. metadata_snapshot->check(real_column_names, data.getVirtuals(), data.getStorageID()); @@ -1899,7 +1906,7 @@ void MergeTreeDataSelectExecutor::selectPartsToRead( for (const auto & part : prev_parts) { - if (part_values.find(part->name) == part_values.end()) + if (!part_values.empty() && part_values.find(part->name) == part_values.end()) continue; if (part->isEmpty()) @@ -1950,7 +1957,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( for (const auto & part : prev_parts) { - if (part_values.find(part->name) == part_values.end()) + if (!part_values.empty() && part_values.find(part->name) == part_values.end()) continue; if (part->isEmpty()) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 634719639ad..0702605a539 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -44,6 +44,12 @@ public: unsigned num_streams, const PartitionIdToMaxBlock * max_block_numbers_to_read = nullptr) const; + /// Construct a sample block consisting only of possible virtual columns for part pruning. + static Block getSampleBlockWithVirtualPartColumns(); + + /// Fill in values of possible virtual columns for part pruning. + static void fillBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, Block & block); + private: const MergeTreeData & data; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index c8f44c78e6e..ce81849708b 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -208,18 +208,8 @@ std::optional StorageMergeTree::totalRows(const Settings &) const std::optional StorageMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const { - auto metadata_snapshot = getInMemoryMetadataPtr(); - PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context, true /* strict */); - if (partition_pruner.isUseless()) - return {}; - size_t res = 0; - auto lock = lockParts(); - for (const auto & part : getDataPartsStateRange(DataPartState::Committed)) - { - if (!partition_pruner.canBePruned(part)) - res += part->rows_count; - } - return res; + auto parts = getDataPartsVector({DataPartState::Committed}); + return totalRowsByPartitionPredicateImpl(query_info, context, parts); } std::optional StorageMergeTree::totalBytes(const Settings &) const diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 68f3b6d80d1..9f18d9ead21 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3890,17 +3890,9 @@ std::optional StorageReplicatedMergeTree::totalRows(const Settings & set std::optional StorageReplicatedMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const { - auto metadata_snapshot = getInMemoryMetadataPtr(); - PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context, true /* strict */); - if (partition_pruner.isUseless()) - return {}; - size_t res = 0; - foreachCommittedParts([&](auto & part) - { - if (!partition_pruner.canBePruned(part)) - res += part->rows_count; - }, context.getSettingsRef().select_sequential_consistency); - return res; + DataPartsVector parts; + foreachCommittedParts([&](auto & part) { parts.push_back(part); }, context.getSettingsRef().select_sequential_consistency); + return totalRowsByPartitionPredicateImpl(query_info, context, parts); } std::optional StorageReplicatedMergeTree::totalBytes(const Settings & settings) const diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 6b99dc25e37..ba35bde6d51 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -5,12 +5,14 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -30,29 +32,52 @@ namespace /// Verifying that the function depends only on the specified columns bool isValidFunction(const ASTPtr & expression, const NameSet & columns) { - for (const auto & child : expression->children) - if (!isValidFunction(child, columns)) - return false; - - if (auto opt_name = IdentifierSemantic::getColumnName(expression)) - return columns.count(*opt_name); + const auto * function = expression->as(); + if (function) + { + if (functionIsInOrGlobalInOperator(function->name)) + { + // Second argument of IN can be a scalar subquery + if (!isValidFunction(function->arguments->children[0], columns)) + return false; + } + else + { + if (function->arguments) + { + for (const auto & child : function->arguments->children) + if (!isValidFunction(child, columns)) + return false; + } + } + } + else + { + if (auto opt_name = IdentifierSemantic::getColumnName(expression)) + return columns.count(*opt_name); + } return true; } /// Extract all subfunctions of the main conjunction, but depending only on the specified columns -void extractFunctions(const ASTPtr & expression, const NameSet & columns, std::vector & result) +bool extractFunctions(const ASTPtr & expression, const NameSet & columns, std::vector & result) { const auto * function = expression->as(); - if (function && function->name == "and") + if (function && (function->name == "and" || function->name == "indexHint")) { + bool ret = true; for (const auto & child : function->arguments->children) - extractFunctions(child, columns, result); + ret &= extractFunctions(child, columns, result); + return ret; } else if (isValidFunction(expression, columns)) { result.push_back(expression->clone()); + return true; } + else + return false; } /// Construct a conjunction from given functions @@ -65,6 +90,25 @@ ASTPtr buildWhereExpression(const ASTs & functions) return makeASTFunction("and", functions); } +void buildSets(const ASTPtr & expression, ExpressionAnalyzer & analyzer) +{ + const auto * func = expression->as(); + if (func && functionIsInOrGlobalInOperator(func->name)) + { + const IAST & args = *func->arguments; + const ASTPtr & arg = args.children.at(1); + if (arg->as() || arg->as()) + { + analyzer.tryMakeSetForIndexFromSubquery(arg); + } + } + else + { + for (const auto & child : expression->children) + buildSets(child, analyzer); + } +} + } namespace VirtualColumnUtils @@ -96,11 +140,12 @@ void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & va } } -void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context) +bool prepareFilterBlockWithQuery(const ASTPtr & query, const Block & block, ASTPtr & expression_ast) { + bool ret = true; const auto & select = query->as(); if (!select.where() && !select.prewhere()) - return; + return ret; NameSet columns; for (const auto & it : block.getNamesAndTypesList()) @@ -109,17 +154,26 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & c /// We will create an expression that evaluates the expressions in WHERE and PREWHERE, depending only on the existing columns. std::vector functions; if (select.where()) - extractFunctions(select.where(), columns, functions); + ret &= extractFunctions(select.where(), columns, functions); if (select.prewhere()) - extractFunctions(select.prewhere(), columns, functions); + ret &= extractFunctions(select.prewhere(), columns, functions); + + expression_ast = buildWhereExpression(functions); + return ret; +} + +void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context, ASTPtr expression_ast) +{ + if (!expression_ast) + prepareFilterBlockWithQuery(query, block, expression_ast); - ASTPtr expression_ast = buildWhereExpression(functions); if (!expression_ast) return; /// Let's analyze and calculate the expression. auto syntax_result = TreeRewriter(context).analyze(expression_ast, block.getNamesAndTypesList()); ExpressionAnalyzer analyzer(expression_ast, syntax_result, context); + buildSets(expression_ast, analyzer); ExpressionActionsPtr actions = analyzer.getActions(false); Block block_with_filter = block; diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 445a996ab87..326738f4259 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -24,9 +24,14 @@ namespace VirtualColumnUtils /// - `WITH toUInt16(9000) as _port`. void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value, const String & func = ""); +/// Prepare `expression_ast` to filter block. Returns true if `expression_ast` is not trimmed, that is, +/// the sample block provides all needed columns, else return false. +bool prepareFilterBlockWithQuery(const ASTPtr & query, const Block & sample_block, ASTPtr & expression_ast); + /// Leave in the block only the rows that fit under the WHERE clause and the PREWHERE clause of the query. /// Only elements of the outer conjunction are considered, depending only on the columns present in the block. -void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context); +/// If `expression_ast` is passed, use it to filter block. +void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context, ASTPtr expression_ast = {}); /// Extract from the input stream a set of `name` column values template diff --git a/tests/queries/0_stateless/01748_partition_id_pruning.reference b/tests/queries/0_stateless/01748_partition_id_pruning.reference new file mode 100644 index 00000000000..192e33c03c9 --- /dev/null +++ b/tests/queries/0_stateless/01748_partition_id_pruning.reference @@ -0,0 +1,7 @@ +1 1 +1 2 +1 3 +1 1 +1 2 +1 3 +3 diff --git a/tests/queries/0_stateless/01748_partition_id_pruning.sql b/tests/queries/0_stateless/01748_partition_id_pruning.sql new file mode 100644 index 00000000000..cca366c5168 --- /dev/null +++ b/tests/queries/0_stateless/01748_partition_id_pruning.sql @@ -0,0 +1,19 @@ +drop table if exists x; + +create table x (i int, j int) engine MergeTree partition by i order by j settings index_granularity = 1; + +insert into x values (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6); + +set max_rows_to_read = 3; + +select * from x where _partition_id = partitionID(1); + +set max_rows_to_read = 4; -- one row for subquery + +select * from x where _partition_id in (select partitionID(number + 1) from numbers(1)); + +-- trivial count optimization test +set max_rows_to_read = 1; -- one row for subquery +select count() from x where _partition_id in (select partitionID(number + 1) from numbers(1)); + +drop table x; From 2f8f4e96973a48b1900e08cee6b0094381713542 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 3 Mar 2021 22:26:55 +0800 Subject: [PATCH 02/12] Fix tests --- src/Storages/VirtualColumnUtils.cpp | 4 ++++ tests/queries/0_stateless/01651_bugs_from_15889.reference | 1 + tests/queries/0_stateless/01651_bugs_from_15889.sql | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index ba35bde6d51..196455c70f0 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -41,6 +41,10 @@ bool isValidFunction(const ASTPtr & expression, const NameSet & columns) if (!isValidFunction(function->arguments->children[0], columns)) return false; } + else if (function->name == "ignore") + { + return false; + } else { if (function->arguments) diff --git a/tests/queries/0_stateless/01651_bugs_from_15889.reference b/tests/queries/0_stateless/01651_bugs_from_15889.reference index 77ac542d4fb..28271a697e2 100644 --- a/tests/queries/0_stateless/01651_bugs_from_15889.reference +++ b/tests/queries/0_stateless/01651_bugs_from_15889.reference @@ -1,2 +1,3 @@ 0 +0 diff --git a/tests/queries/0_stateless/01651_bugs_from_15889.sql b/tests/queries/0_stateless/01651_bugs_from_15889.sql index 1fbf669a1b8..97da4d78ab6 100644 --- a/tests/queries/0_stateless/01651_bugs_from_15889.sql +++ b/tests/queries/0_stateless/01651_bugs_from_15889.sql @@ -8,7 +8,7 @@ INSERT INTO xp SELECT '2020-01-01', number, '' FROM numbers(100000); CREATE TABLE xp_d AS xp ENGINE = Distributed(test_shard_localhost, currentDatabase(), xp); -SELECT count(7 = (SELECT number FROM numbers(0) ORDER BY number ASC NULLS FIRST LIMIT 7)) FROM xp_d PREWHERE toYYYYMM(A) GLOBAL IN (SELECT NULL = (SELECT number FROM numbers(1) ORDER BY number DESC NULLS LAST LIMIT 1), toYYYYMM(min(A)) FROM xp_d) WHERE B > NULL; -- { serverError 20 } +SELECT count(7 = (SELECT number FROM numbers(0) ORDER BY number ASC NULLS FIRST LIMIT 7)) FROM xp_d PREWHERE toYYYYMM(A) GLOBAL IN (SELECT NULL = (SELECT number FROM numbers(1) ORDER BY number DESC NULLS LAST LIMIT 1), toYYYYMM(min(A)) FROM xp_d) WHERE B > NULL; -- B > NULL is evaluated to 0 and this works SELECT count() FROM xp_d WHERE A GLOBAL IN (SELECT NULL); -- { serverError 53 } From 4c9b0f666ca165d51dd7c9f395b5fc1da8fb9485 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 3 Mar 2021 23:32:37 +0800 Subject: [PATCH 03/12] Fix error --- src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 917c0bcec81..0d4c4c4aa9a 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -229,7 +229,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, context, expression_ast); part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); if (part_values.empty()) - return {}; + return std::make_unique(); } // At this point, empty `part_values` means all parts. @@ -380,7 +380,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( { LOG_DEBUG(log, "Will use no data on this replica because parallel replicas processing has been requested" " (the setting 'max_parallel_replicas') but the table does not support sampling and this replica is not the first."); - return {}; + return std::make_unique(); } bool use_sampling = relative_sample_size > 0 || (settings.parallel_replicas_count > 1 && data.supportsSampling()); From 9205fad8c762644d374936dbba39a3730118dcea Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 4 Mar 2021 13:59:57 +0800 Subject: [PATCH 04/12] Better --- .../ExecuteScalarSubqueriesVisitor.cpp | 2 +- src/Interpreters/ExpressionAnalyzer.cpp | 4 +- src/Interpreters/ExpressionAnalyzer.h | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 15 +++-- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 18 +++--- src/Storages/StorageMerge.cpp | 9 +-- src/Storages/StorageMerge.h | 2 +- src/Storages/System/StorageSystemColumns.cpp | 4 +- .../System/StorageSystemDistributionQueue.cpp | 2 +- .../System/StorageSystemMutations.cpp | 2 +- .../System/StorageSystemPartsBase.cpp | 4 +- src/Storages/System/StorageSystemReplicas.cpp | 2 +- .../System/StorageSystemReplicationQueue.cpp | 2 +- src/Storages/System/StorageSystemTables.cpp | 6 +- src/Storages/VirtualColumnUtils.cpp | 59 +++++++------------ src/Storages/VirtualColumnUtils.h | 7 ++- ...read_distribution_and_max_rows_to_read.sql | 5 +- 17 files changed, 68 insertions(+), 77 deletions(-) diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 7ee7bb1f301..94b3a5218b4 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -201,7 +201,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr void ExecuteScalarSubqueriesMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data) { /// Don't descend into subqueries in arguments of IN operator. - /// But if an argument is not subquery, than deeper may be scalar subqueries and we need to descend in them. + /// But if an argument is not subquery, then deeper may be scalar subqueries and we need to descend in them. std::vector out; if (checkFunctionIsInOrGlobalInOperator(func)) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 92388a46872..50dd6ede2a1 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1324,9 +1324,9 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool proje } -ExpressionActionsPtr ExpressionAnalyzer::getConstActions() +ExpressionActionsPtr ExpressionAnalyzer::getConstActions(const NamesAndTypesList & constant_inputs) { - auto actions = std::make_shared(NamesAndTypesList()); + auto actions = std::make_shared(constant_inputs); getRootActions(query, true, actions, true); return std::make_shared(actions); diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 441274200bd..c44507cc4e2 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -110,7 +110,7 @@ public: /// Actions that can be performed on an empty block: adding constants and applying functions that depend only on constants. /// Does not execute subqueries. - ExpressionActionsPtr getConstActions(); + ExpressionActionsPtr getConstActions(const NamesAndTypesList & constant_inputs = {}); /** Sets that require a subquery to be create. * Only the sets needed to perform actions returned from already executed `append*` or `getActions`. diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 49e129e5fac..945fb5639c1 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -680,19 +680,22 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( const SelectQueryInfo & query_info, const Context & context, const DataPartsVector & parts) const { auto metadata_snapshot = getInMemoryMetadataPtr(); - Block part_block = MergeTreeDataSelectExecutor::getSampleBlockWithVirtualPartColumns(); ASTPtr expression_ast; - bool valid = VirtualColumnUtils::prepareFilterBlockWithQuery(query_info.query, part_block, expression_ast); + Block virtual_columns_block = MergeTreeDataSelectExecutor::getSampleBlockWithVirtualPartColumns(); + + // Generate valid expressions for filtering + bool valid = VirtualColumnUtils::prepareFilterBlockWithQuery(query_info, context, virtual_columns_block, expression_ast); + PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context, true /* strict */); if (partition_pruner.isUseless() && !valid) return {}; std::unordered_set part_values; - if (valid) + if (valid && expression_ast) { - MergeTreeDataSelectExecutor::fillBlockWithVirtualPartColumns(parts, part_block); - VirtualColumnUtils::filterBlockWithQuery(query_info.query, part_block, context, expression_ast); - part_values = VirtualColumnUtils::extractSingleValueFromBlock(part_block, "_part"); + MergeTreeDataSelectExecutor::fillBlockWithVirtualPartColumns(parts, virtual_columns_block); + VirtualColumnUtils::filterBlockWithQuery(query_info, virtual_columns_block, context, expression_ast); + part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); if (part_values.empty()) return 0; } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 0d4c4c4aa9a..59bf32fc14a 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -39,6 +39,7 @@ #include #include #include +#include namespace ProfileEvents { @@ -74,14 +75,15 @@ MergeTreeDataSelectExecutor::MergeTreeDataSelectExecutor(const MergeTreeData & d Block MergeTreeDataSelectExecutor::getSampleBlockWithVirtualPartColumns() { return Block(std::initializer_list{ - ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "_part"), - ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "_partition_id"), - ColumnWithTypeAndName(ColumnUUID::create(), std::make_shared(), "_part_uuid")}); + ColumnWithTypeAndName(DataTypeString().createColumnConstWithDefaultValue(1), std::make_shared(), "_part"), + ColumnWithTypeAndName(DataTypeString().createColumnConstWithDefaultValue(1), std::make_shared(), "_partition_id"), + ColumnWithTypeAndName(DataTypeUUID().createColumnConstWithDefaultValue(1), std::make_shared(), "_part_uuid")}); } void MergeTreeDataSelectExecutor::fillBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, Block & block) { - MutableColumns columns = block.mutateColumns(); + materializeBlockInplace(block); + MutableColumns columns = block.cloneEmptyColumns(); auto & part_column = columns[0]; auto & partition_id_column = columns[1]; @@ -219,14 +221,16 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( std::unordered_set part_values; ASTPtr expression_ast; - Block virtual_columns_block = getSampleBlockWithVirtualPartColumns(); - VirtualColumnUtils::prepareFilterBlockWithQuery(query_info.query, virtual_columns_block, expression_ast); + auto virtual_columns_block = getSampleBlockWithVirtualPartColumns(); + + // Generate valid expressions for filtering + VirtualColumnUtils::prepareFilterBlockWithQuery(query_info, context, virtual_columns_block, expression_ast); // If there is still something left, fill the virtual block and do the filtering. if (expression_ast) { fillBlockWithVirtualPartColumns(parts, virtual_columns_block); - VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, context, expression_ast); + VirtualColumnUtils::filterBlockWithQuery(query_info, virtual_columns_block, context, expression_ast); part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); if (part_values.empty()) return std::make_unique(); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 46be91ba258..54f87d4e459 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -219,8 +219,8 @@ Pipe StorageMerge::read( /** First we make list of selected tables to find out its size. * This is necessary to correctly pass the recommended number of threads to each table. */ - StorageListWithLocks selected_tables = getSelectedTables( - query_info.query, has_table_virtual_column, context.getCurrentQueryId(), context.getSettingsRef()); + StorageListWithLocks selected_tables + = getSelectedTables(query_info, has_table_virtual_column, context.getCurrentQueryId(), context.getSettingsRef()); if (selected_tables.empty()) /// FIXME: do we support sampling in this case? @@ -407,8 +407,9 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const String StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables( - const ASTPtr & query, bool has_virtual_column, const String & query_id, const Settings & settings) const + const SelectQueryInfo & query_info, bool has_virtual_column, const String & query_id, const Settings & settings) const { + const ASTPtr & query = query_info.query; StorageListWithLocks selected_tables; DatabaseTablesIteratorPtr iterator = getDatabaseIterator(global_context); @@ -436,7 +437,7 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables( if (has_virtual_column) { Block virtual_columns_block = Block{ColumnWithTypeAndName(std::move(virtual_column), std::make_shared(), "_table")}; - VirtualColumnUtils::filterBlockWithQuery(query, virtual_columns_block, global_context); + VirtualColumnUtils::filterBlockWithQuery(query_info, virtual_columns_block, global_context); auto values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_table"); /// Remove unused tables from the list diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index eaffd34a379..ea8667aa186 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -59,7 +59,7 @@ private: StorageListWithLocks getSelectedTables(const String & query_id, const Settings & settings) const; StorageMerge::StorageListWithLocks getSelectedTables( - const ASTPtr & query, bool has_virtual_column, const String & query_id, const Settings & settings) const; + const SelectQueryInfo & query_info, bool has_virtual_column, const String & query_id, const Settings & settings) const; template StoragePtr getFirstTable(F && predicate) const; diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 6726d502071..75d90ffe1f8 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -299,7 +299,7 @@ Pipe StorageSystemColumns::read( block_to_filter.insert(ColumnWithTypeAndName(std::move(database_column_mut), std::make_shared(), "database")); /// Filter block with `database` column. - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); + VirtualColumnUtils::filterBlockWithQuery(query_info, block_to_filter, context); if (!block_to_filter.rows()) { @@ -345,7 +345,7 @@ Pipe StorageSystemColumns::read( } /// Filter block with `database` and `table` columns. - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); + VirtualColumnUtils::filterBlockWithQuery(query_info, block_to_filter, context); if (!block_to_filter.rows()) { diff --git a/src/Storages/System/StorageSystemDistributionQueue.cpp b/src/Storages/System/StorageSystemDistributionQueue.cpp index db649e7e1ba..58b468ab7f6 100644 --- a/src/Storages/System/StorageSystemDistributionQueue.cpp +++ b/src/Storages/System/StorageSystemDistributionQueue.cpp @@ -155,7 +155,7 @@ void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, cons { col_table_to_filter, std::make_shared(), "table" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + VirtualColumnUtils::filterBlockWithQuery(query_info, filtered_block, context); if (!filtered_block.rows()) return; diff --git a/src/Storages/System/StorageSystemMutations.cpp b/src/Storages/System/StorageSystemMutations.cpp index f66f57ef5d1..50498f49c81 100644 --- a/src/Storages/System/StorageSystemMutations.cpp +++ b/src/Storages/System/StorageSystemMutations.cpp @@ -89,7 +89,7 @@ void StorageSystemMutations::fillData(MutableColumns & res_columns, const Contex { col_table, std::make_shared(), "table" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + VirtualColumnUtils::filterBlockWithQuery(query_info, filtered_block, context); if (!filtered_block.rows()) return; diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 39cc651e147..e88cf8b2865 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -93,7 +93,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, const std::move(database_column_mut), std::make_shared(), "database")); /// Filter block_to_filter with column 'database'. - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); + VirtualColumnUtils::filterBlockWithQuery(query_info, block_to_filter, context); rows = block_to_filter.rows(); /// Block contains new columns, update database_column. @@ -162,7 +162,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, const if (rows) { /// Filter block_to_filter with columns 'database', 'table', 'engine', 'active'. - VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); + VirtualColumnUtils::filterBlockWithQuery(query_info, block_to_filter, context); rows = block_to_filter.rows(); } diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 0af67ab6986..b66e578ea1a 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -135,7 +135,7 @@ Pipe StorageSystemReplicas::read( { col_engine, std::make_shared(), "engine" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + VirtualColumnUtils::filterBlockWithQuery(query_info, filtered_block, context); if (!filtered_block.rows()) return {}; diff --git a/src/Storages/System/StorageSystemReplicationQueue.cpp b/src/Storages/System/StorageSystemReplicationQueue.cpp index 9cd5e8b8ff3..549b93b5ebc 100644 --- a/src/Storages/System/StorageSystemReplicationQueue.cpp +++ b/src/Storages/System/StorageSystemReplicationQueue.cpp @@ -98,7 +98,7 @@ void StorageSystemReplicationQueue::fillData(MutableColumns & res_columns, const { col_table_to_filter, std::make_shared(), "table" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); + VirtualColumnUtils::filterBlockWithQuery(query_info, filtered_block, context); if (!filtered_block.rows()) return; diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 132ed234323..af25d299c2e 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -62,7 +62,7 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_) } -static ColumnPtr getFilteredDatabases(const ASTPtr & query, const Context & context) +static ColumnPtr getFilteredDatabases(const SelectQueryInfo & query_info, const Context & context) { MutableColumnPtr column = ColumnString::create(); @@ -76,7 +76,7 @@ static ColumnPtr getFilteredDatabases(const ASTPtr & query, const Context & cont } Block block { ColumnWithTypeAndName(std::move(column), std::make_shared(), "database") }; - VirtualColumnUtils::filterBlockWithQuery(query, block, context); + VirtualColumnUtils::filterBlockWithQuery(query_info, block, context); return block.getByPosition(0).column; } @@ -524,7 +524,7 @@ Pipe StorageSystemTables::read( } } - ColumnPtr filtered_databases_column = getFilteredDatabases(query_info.query, context); + ColumnPtr filtered_databases_column = getFilteredDatabases(query_info, context); return Pipe(std::make_shared( std::move(columns_mask), std::move(res_block), max_block_size, std::move(filtered_databases_column), context)); diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 196455c70f0..af817c52805 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -30,52 +30,33 @@ namespace { /// Verifying that the function depends only on the specified columns -bool isValidFunction(const ASTPtr & expression, const NameSet & columns) +bool isValidFunction(const ASTPtr & expression, const Block & block) { const auto * function = expression->as(); - if (function) + if (function && functionIsInOrGlobalInOperator(function->name)) { - if (functionIsInOrGlobalInOperator(function->name)) - { - // Second argument of IN can be a scalar subquery - if (!isValidFunction(function->arguments->children[0], columns)) - return false; - } - else if (function->name == "ignore") - { - return false; - } - else - { - if (function->arguments) - { - for (const auto & child : function->arguments->children) - if (!isValidFunction(child, columns)) - return false; - } - } + // Second argument of IN can be a scalar subquery + return isValidFunction(function->arguments->children[0], block); } else { - if (auto opt_name = IdentifierSemantic::getColumnName(expression)) - return columns.count(*opt_name); + auto column_name = expression->getColumnName(); + return block.has(column_name) && isColumnConst(*block.getByName(column_name).column); } - - return true; } /// Extract all subfunctions of the main conjunction, but depending only on the specified columns -bool extractFunctions(const ASTPtr & expression, const NameSet & columns, std::vector & result) +bool extractFunctions(const ASTPtr & expression, const Block & block, std::vector & result) { const auto * function = expression->as(); if (function && (function->name == "and" || function->name == "indexHint")) { bool ret = true; for (const auto & child : function->arguments->children) - ret &= extractFunctions(child, columns, result); + ret &= extractFunctions(child, block, result); return ret; } - else if (isValidFunction(expression, columns)) + else if (isValidFunction(expression, block)) { result.push_back(expression->clone()); return true; @@ -124,7 +105,6 @@ void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & va if (!select.with()) select.setExpression(ASTSelectQuery::Expression::WITH, std::make_shared()); - if (func.empty()) { auto literal = std::make_shared(value); @@ -144,37 +124,38 @@ void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & va } } -bool prepareFilterBlockWithQuery(const ASTPtr & query, const Block & block, ASTPtr & expression_ast) +bool prepareFilterBlockWithQuery(const SelectQueryInfo & query_info, const Context & context, Block block, ASTPtr & expression_ast) { bool ret = true; - const auto & select = query->as(); + const auto & select = query_info.query->as(); if (!select.where() && !select.prewhere()) return ret; - NameSet columns; - for (const auto & it : block.getNamesAndTypesList()) - columns.insert(it.name); + // Prepare a block with valid expressions + ExpressionAnalyzer(query_info.query, query_info.syntax_analyzer_result, context) + .getConstActions(block.getNamesAndTypesList()) + ->execute(block); /// We will create an expression that evaluates the expressions in WHERE and PREWHERE, depending only on the existing columns. std::vector functions; if (select.where()) - ret &= extractFunctions(select.where(), columns, functions); + ret &= extractFunctions(select.where(), block, functions); if (select.prewhere()) - ret &= extractFunctions(select.prewhere(), columns, functions); + ret &= extractFunctions(select.prewhere(), block, functions); expression_ast = buildWhereExpression(functions); return ret; } -void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context, ASTPtr expression_ast) +void filterBlockWithQuery(const SelectQueryInfo & query_info, Block & block, const Context & context, ASTPtr expression_ast) { if (!expression_ast) - prepareFilterBlockWithQuery(query, block, expression_ast); + prepareFilterBlockWithQuery(query_info, context, block, expression_ast); if (!expression_ast) return; - /// Let's analyze and calculate the expression. + /// Let's analyze and calculate the prepared expression. auto syntax_result = TreeRewriter(context).analyze(expression_ast, block.getNamesAndTypesList()); ExpressionAnalyzer analyzer(expression_ast, syntax_result, context); buildSets(expression_ast, analyzer); diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 326738f4259..7c414aa0984 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -4,6 +4,7 @@ #include #include +#include namespace DB @@ -25,13 +26,13 @@ namespace VirtualColumnUtils void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value, const String & func = ""); /// Prepare `expression_ast` to filter block. Returns true if `expression_ast` is not trimmed, that is, -/// the sample block provides all needed columns, else return false. -bool prepareFilterBlockWithQuery(const ASTPtr & query, const Block & sample_block, ASTPtr & expression_ast); +/// `block` provides all needed columns for `expression_ast`, else return false. +bool prepareFilterBlockWithQuery(const SelectQueryInfo & query_info, const Context & context, Block block, ASTPtr & expression_ast); /// Leave in the block only the rows that fit under the WHERE clause and the PREWHERE clause of the query. /// Only elements of the outer conjunction are considered, depending only on the columns present in the block. /// If `expression_ast` is passed, use it to filter block. -void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context, ASTPtr expression_ast = {}); +void filterBlockWithQuery(const SelectQueryInfo & query_info, Block & block, const Context & context, ASTPtr expression_ast = {}); /// Extract from the input stream a set of `name` column values template diff --git a/tests/queries/0_stateless/00971_merge_tree_uniform_read_distribution_and_max_rows_to_read.sql b/tests/queries/0_stateless/00971_merge_tree_uniform_read_distribution_and_max_rows_to_read.sql index d31989d65bd..5abb1af620a 100644 --- a/tests/queries/0_stateless/00971_merge_tree_uniform_read_distribution_and_max_rows_to_read.sql +++ b/tests/queries/0_stateless/00971_merge_tree_uniform_read_distribution_and_max_rows_to_read.sql @@ -10,7 +10,8 @@ SELECT count() FROM merge_tree; SET max_rows_to_read = 900000; -SELECT count() FROM merge_tree WHERE not ignore(); -- { serverError 158 } -SELECT count() FROM merge_tree WHERE not ignore(); -- { serverError 158 } +-- constant ignore will be pruned by part pruner. ignore(*) is used. +SELECT count() FROM merge_tree WHERE not ignore(*); -- { serverError 158 } +SELECT count() FROM merge_tree WHERE not ignore(*); -- { serverError 158 } DROP TABLE merge_tree; From fac832227cd08c7e12170c4ca60fbc33a2b5b7e0 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 4 Mar 2021 16:53:31 +0800 Subject: [PATCH 05/12] Fix again --- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- src/Interpreters/ExpressionAnalyzer.h | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 9 ++++----- src/Storages/VirtualColumnUtils.cpp | 13 +++++++++++-- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 50dd6ede2a1..f931ecf8c21 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1324,7 +1324,7 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool proje } -ExpressionActionsPtr ExpressionAnalyzer::getConstActions(const NamesAndTypesList & constant_inputs) +ExpressionActionsPtr ExpressionAnalyzer::getConstActions(const ColumnsWithTypeAndName & constant_inputs) { auto actions = std::make_shared(constant_inputs); diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index c44507cc4e2..69b6d75de35 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -110,7 +110,7 @@ public: /// Actions that can be performed on an empty block: adding constants and applying functions that depend only on constants. /// Does not execute subqueries. - ExpressionActionsPtr getConstActions(const NamesAndTypesList & constant_inputs = {}); + ExpressionActionsPtr getConstActions(const ColumnsWithTypeAndName & constant_inputs = {}); /** Sets that require a subquery to be create. * Only the sets needed to perform actions returned from already executed `append*` or `getActions`. diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 59bf32fc14a..2b26c59ee08 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -75,15 +75,14 @@ MergeTreeDataSelectExecutor::MergeTreeDataSelectExecutor(const MergeTreeData & d Block MergeTreeDataSelectExecutor::getSampleBlockWithVirtualPartColumns() { return Block(std::initializer_list{ - ColumnWithTypeAndName(DataTypeString().createColumnConstWithDefaultValue(1), std::make_shared(), "_part"), - ColumnWithTypeAndName(DataTypeString().createColumnConstWithDefaultValue(1), std::make_shared(), "_partition_id"), - ColumnWithTypeAndName(DataTypeUUID().createColumnConstWithDefaultValue(1), std::make_shared(), "_part_uuid")}); + ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "_part"), + ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "_partition_id"), + ColumnWithTypeAndName(ColumnUUID::create(), std::make_shared(), "_part_uuid")}); } void MergeTreeDataSelectExecutor::fillBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, Block & block) { - materializeBlockInplace(block); - MutableColumns columns = block.cloneEmptyColumns(); + MutableColumns columns = block.mutateColumns(); auto & part_column = columns[0]; auto & partition_id_column = columns[1]; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index af817c52805..c69fa59d93a 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -131,9 +131,18 @@ bool prepareFilterBlockWithQuery(const SelectQueryInfo & query_info, const Conte if (!select.where() && !select.prewhere()) return ret; + ASTPtr condition_ast; + if (select.prewhere() && select.where()) + condition_ast = makeASTFunction("and", select.prewhere()->clone(), select.where()->clone()); + else + condition_ast = select.prewhere() ? select.prewhere()->clone() : select.where()->clone(); + // Prepare a block with valid expressions - ExpressionAnalyzer(query_info.query, query_info.syntax_analyzer_result, context) - .getConstActions(block.getNamesAndTypesList()) + for (size_t i = 0; i < block.columns(); ++i) + block.getByPosition(i).column = block.getByPosition(i).type->createColumnConstWithDefaultValue(1); + + ExpressionAnalyzer(condition_ast, query_info.syntax_analyzer_result, context) + .getConstActions(block.getColumnsWithTypeAndName()) ->execute(block); /// We will create an expression that evaluates the expressions in WHERE and PREWHERE, depending only on the existing columns. From 909cb3c24324e923a371255e01cbba6f0730b854 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 4 Mar 2021 22:27:07 +0800 Subject: [PATCH 06/12] Fix again.... --- src/Storages/MergeTree/MergeTreeData.cpp | 4 ++-- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 ++-- src/Storages/StorageMerge.cpp | 2 +- src/Storages/System/StorageSystemColumns.cpp | 4 ++-- .../System/StorageSystemDistributionQueue.cpp | 2 +- .../System/StorageSystemMutations.cpp | 2 +- .../System/StorageSystemPartsBase.cpp | 4 ++-- src/Storages/System/StorageSystemReplicas.cpp | 2 +- .../System/StorageSystemReplicationQueue.cpp | 2 +- src/Storages/System/StorageSystemTables.cpp | 2 +- src/Storages/VirtualColumnUtils.cpp | 21 ++++++++++++------- src/Storages/VirtualColumnUtils.h | 4 ++-- 12 files changed, 30 insertions(+), 23 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 945fb5639c1..e7b83eae85d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -684,7 +684,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( Block virtual_columns_block = MergeTreeDataSelectExecutor::getSampleBlockWithVirtualPartColumns(); // Generate valid expressions for filtering - bool valid = VirtualColumnUtils::prepareFilterBlockWithQuery(query_info, context, virtual_columns_block, expression_ast); + bool valid = VirtualColumnUtils::prepareFilterBlockWithQuery(query_info.query, context, virtual_columns_block, expression_ast); PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context, true /* strict */); if (partition_pruner.isUseless() && !valid) @@ -694,7 +694,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( if (valid && expression_ast) { MergeTreeDataSelectExecutor::fillBlockWithVirtualPartColumns(parts, virtual_columns_block); - VirtualColumnUtils::filterBlockWithQuery(query_info, virtual_columns_block, context, expression_ast); + VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, context, expression_ast); part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); if (part_values.empty()) return 0; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 2b26c59ee08..6588729e25e 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -223,13 +223,13 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( auto virtual_columns_block = getSampleBlockWithVirtualPartColumns(); // Generate valid expressions for filtering - VirtualColumnUtils::prepareFilterBlockWithQuery(query_info, context, virtual_columns_block, expression_ast); + VirtualColumnUtils::prepareFilterBlockWithQuery(query_info.query, context, virtual_columns_block, expression_ast); // If there is still something left, fill the virtual block and do the filtering. if (expression_ast) { fillBlockWithVirtualPartColumns(parts, virtual_columns_block); - VirtualColumnUtils::filterBlockWithQuery(query_info, virtual_columns_block, context, expression_ast); + VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, context, expression_ast); part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); if (part_values.empty()) return std::make_unique(); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 54f87d4e459..6ac8d02e54d 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -437,7 +437,7 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables( if (has_virtual_column) { Block virtual_columns_block = Block{ColumnWithTypeAndName(std::move(virtual_column), std::make_shared(), "_table")}; - VirtualColumnUtils::filterBlockWithQuery(query_info, virtual_columns_block, global_context); + VirtualColumnUtils::filterBlockWithQuery(query_info.query, virtual_columns_block, global_context); auto values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_table"); /// Remove unused tables from the list diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 75d90ffe1f8..6726d502071 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -299,7 +299,7 @@ Pipe StorageSystemColumns::read( block_to_filter.insert(ColumnWithTypeAndName(std::move(database_column_mut), std::make_shared(), "database")); /// Filter block with `database` column. - VirtualColumnUtils::filterBlockWithQuery(query_info, block_to_filter, context); + VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); if (!block_to_filter.rows()) { @@ -345,7 +345,7 @@ Pipe StorageSystemColumns::read( } /// Filter block with `database` and `table` columns. - VirtualColumnUtils::filterBlockWithQuery(query_info, block_to_filter, context); + VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); if (!block_to_filter.rows()) { diff --git a/src/Storages/System/StorageSystemDistributionQueue.cpp b/src/Storages/System/StorageSystemDistributionQueue.cpp index 58b468ab7f6..db649e7e1ba 100644 --- a/src/Storages/System/StorageSystemDistributionQueue.cpp +++ b/src/Storages/System/StorageSystemDistributionQueue.cpp @@ -155,7 +155,7 @@ void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, cons { col_table_to_filter, std::make_shared(), "table" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info, filtered_block, context); + VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); if (!filtered_block.rows()) return; diff --git a/src/Storages/System/StorageSystemMutations.cpp b/src/Storages/System/StorageSystemMutations.cpp index 50498f49c81..f66f57ef5d1 100644 --- a/src/Storages/System/StorageSystemMutations.cpp +++ b/src/Storages/System/StorageSystemMutations.cpp @@ -89,7 +89,7 @@ void StorageSystemMutations::fillData(MutableColumns & res_columns, const Contex { col_table, std::make_shared(), "table" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info, filtered_block, context); + VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); if (!filtered_block.rows()) return; diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index e88cf8b2865..39cc651e147 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -93,7 +93,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, const std::move(database_column_mut), std::make_shared(), "database")); /// Filter block_to_filter with column 'database'. - VirtualColumnUtils::filterBlockWithQuery(query_info, block_to_filter, context); + VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); rows = block_to_filter.rows(); /// Block contains new columns, update database_column. @@ -162,7 +162,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, const if (rows) { /// Filter block_to_filter with columns 'database', 'table', 'engine', 'active'. - VirtualColumnUtils::filterBlockWithQuery(query_info, block_to_filter, context); + VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context); rows = block_to_filter.rows(); } diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index b66e578ea1a..0af67ab6986 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -135,7 +135,7 @@ Pipe StorageSystemReplicas::read( { col_engine, std::make_shared(), "engine" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info, filtered_block, context); + VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); if (!filtered_block.rows()) return {}; diff --git a/src/Storages/System/StorageSystemReplicationQueue.cpp b/src/Storages/System/StorageSystemReplicationQueue.cpp index 549b93b5ebc..9cd5e8b8ff3 100644 --- a/src/Storages/System/StorageSystemReplicationQueue.cpp +++ b/src/Storages/System/StorageSystemReplicationQueue.cpp @@ -98,7 +98,7 @@ void StorageSystemReplicationQueue::fillData(MutableColumns & res_columns, const { col_table_to_filter, std::make_shared(), "table" }, }; - VirtualColumnUtils::filterBlockWithQuery(query_info, filtered_block, context); + VirtualColumnUtils::filterBlockWithQuery(query_info.query, filtered_block, context); if (!filtered_block.rows()) return; diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index af25d299c2e..2b1788f991d 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -76,7 +76,7 @@ static ColumnPtr getFilteredDatabases(const SelectQueryInfo & query_info, const } Block block { ColumnWithTypeAndName(std::move(column), std::make_shared(), "database") }; - VirtualColumnUtils::filterBlockWithQuery(query_info, block, context); + VirtualColumnUtils::filterBlockWithQuery(query_info.query, block, context); return block.getByPosition(0).column; } diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index c69fa59d93a..537ef91f4f5 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -21,6 +21,7 @@ #include #include #include +#include namespace DB @@ -124,10 +125,10 @@ void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & va } } -bool prepareFilterBlockWithQuery(const SelectQueryInfo & query_info, const Context & context, Block block, ASTPtr & expression_ast) +bool prepareFilterBlockWithQuery(const ASTPtr & query, const Context & context, Block block, ASTPtr & expression_ast) { bool ret = true; - const auto & select = query_info.query->as(); + const auto & select = query->as(); if (!select.where() && !select.prewhere()) return ret; @@ -141,9 +142,15 @@ bool prepareFilterBlockWithQuery(const SelectQueryInfo & query_info, const Conte for (size_t i = 0; i < block.columns(); ++i) block.getByPosition(i).column = block.getByPosition(i).type->createColumnConstWithDefaultValue(1); - ExpressionAnalyzer(condition_ast, query_info.syntax_analyzer_result, context) - .getConstActions(block.getColumnsWithTypeAndName()) - ->execute(block); + auto actions = std::make_shared(block.getColumnsWithTypeAndName()); + PreparedSets prepared_sets; + SubqueriesForSets subqueries_for_sets; + ActionsVisitor::Data visitor_data( + context, SizeLimits{}, 1, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false); + ActionsVisitor(visitor_data).visit(condition_ast); + actions = visitor_data.getActions(); + auto expression_actions = std::make_shared(actions); + expression_actions->execute(block); /// We will create an expression that evaluates the expressions in WHERE and PREWHERE, depending only on the existing columns. std::vector functions; @@ -156,10 +163,10 @@ bool prepareFilterBlockWithQuery(const SelectQueryInfo & query_info, const Conte return ret; } -void filterBlockWithQuery(const SelectQueryInfo & query_info, Block & block, const Context & context, ASTPtr expression_ast) +void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context, ASTPtr expression_ast) { if (!expression_ast) - prepareFilterBlockWithQuery(query_info, context, block, expression_ast); + prepareFilterBlockWithQuery(query, context, block, expression_ast); if (!expression_ast) return; diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 7c414aa0984..78e3d62472e 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -27,12 +27,12 @@ void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & va /// Prepare `expression_ast` to filter block. Returns true if `expression_ast` is not trimmed, that is, /// `block` provides all needed columns for `expression_ast`, else return false. -bool prepareFilterBlockWithQuery(const SelectQueryInfo & query_info, const Context & context, Block block, ASTPtr & expression_ast); +bool prepareFilterBlockWithQuery(const ASTPtr & query, const Context & context, Block block, ASTPtr & expression_ast); /// Leave in the block only the rows that fit under the WHERE clause and the PREWHERE clause of the query. /// Only elements of the outer conjunction are considered, depending only on the columns present in the block. /// If `expression_ast` is passed, use it to filter block. -void filterBlockWithQuery(const SelectQueryInfo & query_info, Block & block, const Context & context, ASTPtr expression_ast = {}); +void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context, ASTPtr expression_ast = {}); /// Extract from the input stream a set of `name` column values template From 091894f8cac17fffe5dfdd7aa1f088aaf0347439 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 8 Mar 2021 11:09:06 +0800 Subject: [PATCH 07/12] Add more comments --- src/Storages/VirtualColumnUtils.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 537ef91f4f5..4e2c9577089 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -127,10 +127,10 @@ void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & va bool prepareFilterBlockWithQuery(const ASTPtr & query, const Context & context, Block block, ASTPtr & expression_ast) { - bool ret = true; + bool unmodified = true; const auto & select = query->as(); if (!select.where() && !select.prewhere()) - return ret; + return unmodified; ASTPtr condition_ast; if (select.prewhere() && select.where()) @@ -138,10 +138,11 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, const Context & context, else condition_ast = select.prewhere() ? select.prewhere()->clone() : select.where()->clone(); - // Prepare a block with valid expressions + // Prepare a constant block with valid expressions for (size_t i = 0; i < block.columns(); ++i) block.getByPosition(i).column = block.getByPosition(i).type->createColumnConstWithDefaultValue(1); + // Collect all expression columns in expression_ast. Constant expressions will have constant columns. auto actions = std::make_shared(block.getColumnsWithTypeAndName()); PreparedSets prepared_sets; SubqueriesForSets subqueries_for_sets; @@ -152,15 +153,15 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, const Context & context, auto expression_actions = std::make_shared(actions); expression_actions->execute(block); - /// We will create an expression that evaluates the expressions in WHERE and PREWHERE, depending only on the existing columns. + /// Create an expression that evaluates the expressions in WHERE and PREWHERE, depending only on the existing columns. std::vector functions; if (select.where()) - ret &= extractFunctions(select.where(), block, functions); + unmodified &= extractFunctions(select.where(), block, functions); if (select.prewhere()) - ret &= extractFunctions(select.prewhere(), block, functions); + unmodified &= extractFunctions(select.prewhere(), block, functions); expression_ast = buildWhereExpression(functions); - return ret; + return unmodified; } void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context, ASTPtr expression_ast) From b936619fa963a128633b960b9b1b7134f2ed9307 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 9 Mar 2021 20:09:35 +0800 Subject: [PATCH 08/12] Rename --- .../{connectionID.cpp => connectionId.cpp} | 16 ++++++++-------- .../{partitionID.cpp => partitionId.cpp} | 12 ++++++------ src/Functions/registerFunctionsMiscellaneous.cpp | 8 ++++---- .../0_stateless/01748_partition_id_pruning.sql | 6 +++--- 4 files changed, 21 insertions(+), 21 deletions(-) rename src/Functions/{connectionID.cpp => connectionId.cpp} (64%) rename src/Functions/{partitionID.cpp => partitionId.cpp} (86%) diff --git a/src/Functions/connectionID.cpp b/src/Functions/connectionId.cpp similarity index 64% rename from src/Functions/connectionID.cpp rename to src/Functions/connectionId.cpp index 8e9c81aed6c..a2587afdb1b 100644 --- a/src/Functions/connectionID.cpp +++ b/src/Functions/connectionId.cpp @@ -7,15 +7,15 @@ namespace DB { -/// Get the connection ID. It's used for MySQL handler only. -class FunctionConnectionID : public IFunction +/// Get the connection Id. It's used for MySQL handler only. +class FunctionConnectionId : public IFunction { public: - static constexpr auto name = "connectionID"; + static constexpr auto name = "connectionId"; - explicit FunctionConnectionID(const Context & context_) : context(context_) {} + explicit FunctionConnectionId(const Context & context_) : context(context_) {} - static FunctionPtr create(const Context & context) { return std::make_shared(context); } + static FunctionPtr create(const Context & context) { return std::make_shared(context); } String getName() const override { return name; } @@ -32,10 +32,10 @@ private: const Context & context; }; -void registerFunctionConnectionID(FunctionFactory & factory) +void registerFunctionConnectionId(FunctionFactory & factory) { - factory.registerFunction(); - factory.registerAlias("connection_id", "connectionID"); + factory.registerFunction(); + factory.registerAlias("connection_id", "connectionId"); } } diff --git a/src/Functions/partitionID.cpp b/src/Functions/partitionId.cpp similarity index 86% rename from src/Functions/partitionID.cpp rename to src/Functions/partitionId.cpp index 9f5a3d09b86..b6d9d1bf4e6 100644 --- a/src/Functions/partitionID.cpp +++ b/src/Functions/partitionId.cpp @@ -15,14 +15,14 @@ namespace ErrorCodes } -/** partitionID(x, y, ...) is a function that computes partition ids of arguments. +/** partitionId(x, y, ...) is a function that computes partition ids of arguments. */ -class FunctionPartitionID : public IFunction +class FunctionPartitionId : public IFunction { public: - static constexpr auto name = "partitionID"; + static constexpr auto name = "partitionId"; - static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } @@ -67,9 +67,9 @@ public: } }; -void registerFunctionPartitionID(FunctionFactory & factory) +void registerFunctionPartitionId(FunctionFactory & factory) { - factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index 3557b0fb865..f2c35420ab6 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -69,8 +69,8 @@ void registerFunctionErrorCodeToName(FunctionFactory &); void registerFunctionTcpPort(FunctionFactory &); void registerFunctionByteSize(FunctionFactory &); void registerFunctionFile(FunctionFactory & factory); -void registerFunctionConnectionID(FunctionFactory & factory); -void registerFunctionPartitionID(FunctionFactory & factory); +void registerFunctionConnectionId(FunctionFactory & factory); +void registerFunctionPartitionId(FunctionFactory & factory); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -140,8 +140,8 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionTcpPort(factory); registerFunctionByteSize(factory); registerFunctionFile(factory); - registerFunctionConnectionID(factory); - registerFunctionPartitionID(factory); + registerFunctionConnectionId(factory); + registerFunctionPartitionId(factory); #if USE_ICU registerFunctionConvertCharset(factory); diff --git a/tests/queries/0_stateless/01748_partition_id_pruning.sql b/tests/queries/0_stateless/01748_partition_id_pruning.sql index cca366c5168..0a3f7d2713c 100644 --- a/tests/queries/0_stateless/01748_partition_id_pruning.sql +++ b/tests/queries/0_stateless/01748_partition_id_pruning.sql @@ -6,14 +6,14 @@ insert into x values (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6); set max_rows_to_read = 3; -select * from x where _partition_id = partitionID(1); +select * from x where _partition_id = partitionId(1); set max_rows_to_read = 4; -- one row for subquery -select * from x where _partition_id in (select partitionID(number + 1) from numbers(1)); +select * from x where _partition_id in (select partitionId(number + 1) from numbers(1)); -- trivial count optimization test set max_rows_to_read = 1; -- one row for subquery -select count() from x where _partition_id in (select partitionID(number + 1) from numbers(1)); +select count() from x where _partition_id in (select partitionId(number + 1) from numbers(1)); drop table x; From 02604185f288bee8635350a38f7515b22f2a5dc8 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 10 Mar 2021 02:48:46 +0800 Subject: [PATCH 09/12] Correctly check constant expr --- src/Storages/VirtualColumnUtils.cpp | 45 +++++++++++++++-------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 4e2c9577089..0002d7c9c28 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -31,33 +31,30 @@ namespace { /// Verifying that the function depends only on the specified columns -bool isValidFunction(const ASTPtr & expression, const Block & block) +bool isValidFunction(const ASTPtr & expression, const std::function & is_constant) { const auto * function = expression->as(); if (function && functionIsInOrGlobalInOperator(function->name)) { // Second argument of IN can be a scalar subquery - return isValidFunction(function->arguments->children[0], block); + return isValidFunction(function->arguments->children[0], is_constant); } else - { - auto column_name = expression->getColumnName(); - return block.has(column_name) && isColumnConst(*block.getByName(column_name).column); - } + return is_constant(expression); } /// Extract all subfunctions of the main conjunction, but depending only on the specified columns -bool extractFunctions(const ASTPtr & expression, const Block & block, std::vector & result) +bool extractFunctions(const ASTPtr & expression, const std::function & is_constant, std::vector & result) { const auto * function = expression->as(); if (function && (function->name == "and" || function->name == "indexHint")) { bool ret = true; for (const auto & child : function->arguments->children) - ret &= extractFunctions(child, block, result); + ret &= extractFunctions(child, is_constant, result); return ret; } - else if (isValidFunction(expression, block)) + else if (isValidFunction(expression, is_constant)) { result.push_back(expression->clone()); return true; @@ -142,23 +139,29 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, const Context & context, for (size_t i = 0; i < block.columns(); ++i) block.getByPosition(i).column = block.getByPosition(i).type->createColumnConstWithDefaultValue(1); - // Collect all expression columns in expression_ast. Constant expressions will have constant columns. - auto actions = std::make_shared(block.getColumnsWithTypeAndName()); - PreparedSets prepared_sets; - SubqueriesForSets subqueries_for_sets; - ActionsVisitor::Data visitor_data( - context, SizeLimits{}, 1, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false); - ActionsVisitor(visitor_data).visit(condition_ast); - actions = visitor_data.getActions(); - auto expression_actions = std::make_shared(actions); - expression_actions->execute(block); + // Provide input columns as constant columns to check if an expression is constant. + std::function is_constant = [&block, &context](const ASTPtr & node) + { + auto actions = std::make_shared(block.getColumnsWithTypeAndName()); + PreparedSets prepared_sets; + SubqueriesForSets subqueries_for_sets; + ActionsVisitor::Data visitor_data( + context, SizeLimits{}, 1, {}, std::move(actions), prepared_sets, subqueries_for_sets, true, true, true, false); + ActionsVisitor(visitor_data).visit(node); + actions = visitor_data.getActions(); + auto expression_actions = std::make_shared(actions); + auto block_with_constants = block; + expression_actions->execute(block_with_constants); + auto column_name = node->getColumnName(); + return block_with_constants.has(column_name) && isColumnConst(*block_with_constants.getByName(column_name).column); + }; /// Create an expression that evaluates the expressions in WHERE and PREWHERE, depending only on the existing columns. std::vector functions; if (select.where()) - unmodified &= extractFunctions(select.where(), block, functions); + unmodified &= extractFunctions(select.where(), is_constant, functions); if (select.prewhere()) - unmodified &= extractFunctions(select.prewhere(), block, functions); + unmodified &= extractFunctions(select.prewhere(), is_constant, functions); expression_ast = buildWhereExpression(functions); return unmodified; From 466c70fb7cc75c3c1ef3f0d34adc829834c796ad Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Mar 2021 02:00:25 +0300 Subject: [PATCH 10/12] Code simplification --- src/Functions/partitionId.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/Functions/partitionId.cpp b/src/Functions/partitionId.cpp index b6d9d1bf4e6..c8411fb1860 100644 --- a/src/Functions/partitionId.cpp +++ b/src/Functions/partitionId.cpp @@ -45,21 +45,15 @@ public: virtual ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + Block sample_block(arguments); size_t size = arguments.size(); - Columns columns; - Block sample_block; - for (const auto & argument : arguments) - { - sample_block.insert(argument); - columns.push_back(argument.column); - } auto result_column = ColumnString::create(); for (size_t j = 0; j < input_rows_count; ++j) { Row row(size); for (size_t i = 0; i < size; ++i) - columns[i]->get(j, row[i]); + arguments[i].column->get(j, row[i]); MergeTreePartition partition(std::move(row)); result_column->insert(partition.getID(sample_block)); } From e9c2309c05884f061e45561264e1481771107165 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Mar 2021 02:02:53 +0300 Subject: [PATCH 11/12] A comment --- src/Functions/partitionId.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Functions/partitionId.cpp b/src/Functions/partitionId.cpp index c8411fb1860..39d6f1aff84 100644 --- a/src/Functions/partitionId.cpp +++ b/src/Functions/partitionId.cpp @@ -16,6 +16,7 @@ namespace ErrorCodes /** partitionId(x, y, ...) is a function that computes partition ids of arguments. + * The function is slow and should not be called for large amount of rows. */ class FunctionPartitionId : public IFunction { From 14c4cc2ccbad455af022e99b4d2d8b60fba0240d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Mar 2021 08:19:15 +0300 Subject: [PATCH 12/12] Update ya.make --- src/Functions/ya.make | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/ya.make b/src/Functions/ya.make index e57935743eb..acabd7b3c99 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -210,7 +210,7 @@ SRCS( cbrt.cpp coalesce.cpp concat.cpp - connectionID.cpp + connectionId.cpp convertCharset.cpp cos.cpp cosh.cpp @@ -373,7 +373,7 @@ SRCS( now.cpp now64.cpp nullIf.cpp - partitionID.cpp + partitionId.cpp pi.cpp plus.cpp pointInEllipses.cpp