diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 26a69c1e01b..19bb7ea0554 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -600,7 +600,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( query.setFinal(); } - auto analyze = [&] (bool try_move_to_prewhere) + auto analyze = [&] () { /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it. ASTPtr view_table; @@ -618,7 +618,6 @@ InterpreterSelectQuery::InterpreterSelectQuery( required_result_column_names, table_join); - query_info.syntax_analyzer_result = syntax_analyzer_result; context->setDistributed(syntax_analyzer_result->is_remote_storage); @@ -632,38 +631,6 @@ InterpreterSelectQuery::InterpreterSelectQuery( view = nullptr; } - if (try_move_to_prewhere - && storage && storage->canMoveConditionsToPrewhere() - && query.where() && !query.prewhere() - && !query.hasJoin()) /// Join may produce rows with nulls or default values, it's difficult to analyze if they affected or not. - { - /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable - if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty()) - { - /// Extract column compressed sizes. - std::unordered_map column_compressed_sizes; - for (const auto & [name, sizes] : column_sizes) - column_compressed_sizes[name] = sizes.data_compressed; - - SelectQueryInfo current_info; - current_info.query = query_ptr; - current_info.syntax_analyzer_result = syntax_analyzer_result; - - Names queried_columns = syntax_analyzer_result->requiredSourceColumns(); - const auto & supported_prewhere_columns = storage->supportedPrewhereColumns(); - - MergeTreeWhereOptimizer where_optimizer{ - std::move(column_compressed_sizes), - metadata_snapshot, - storage->getConditionEstimatorByPredicate(query_info, storage_snapshot, context), - queried_columns, - supported_prewhere_columns, - log}; - - where_optimizer.optimize(current_info, context); - } - } - if (query.prewhere() && query.where()) { /// Filter block in WHERE instead to get better performance @@ -777,7 +744,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( result_header = getSampleBlockImpl(); }; - analyze(shouldMoveToPrewhere()); + analyze(); bool need_analyze_again = false; bool can_analyze_again = false; @@ -823,7 +790,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// Do not try move conditions to PREWHERE for the second time. /// Otherwise, we won't be able to fallback from inefficient PREWHERE to WHERE later. - analyze(/* try_move_to_prewhere = */ false); + analyze(); } /// If there is no WHERE, filter blocks as usual diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index d16e01ef2d2..abf86659d02 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -98,23 +98,7 @@ Block getHeaderForProcessingStage( case QueryProcessingStage::FetchColumns: { Block header = storage_snapshot->getSampleBlockForColumns(column_names); - - if (query_info.prewhere_info) - { - auto & prewhere_info = *query_info.prewhere_info; - - if (prewhere_info.row_level_filter) - { - header = prewhere_info.row_level_filter->updateHeader(std::move(header)); - header.erase(prewhere_info.row_level_column_name); - } - - if (prewhere_info.prewhere_actions) - header = prewhere_info.prewhere_actions->updateHeader(std::move(header)); - - if (prewhere_info.remove_prewhere_column) - header.erase(prewhere_info.prewhere_column_name); - } + header = SourceStepWithFilter::applyPrewhereActions(header, query_info.prewhere_info); return header; } case QueryProcessingStage::WithMergeableState: @@ -153,7 +137,8 @@ Block getHeaderForProcessingStage( if (context->getSettingsRef().allow_experimental_analyzer) { - auto storage = std::make_shared(storage_snapshot->storage.getStorageID(), storage_snapshot->metadata->getColumns()); + auto storage = std::make_shared( + storage_snapshot->storage.getStorageID(), storage_snapshot->metadata->getColumns(), &storage_snapshot->storage); InterpreterSelectQueryAnalyzer interpreter(query, context, storage, SelectQueryOptions(processed_stage).analyze()); result = interpreter.getSampleBlock(); } diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 79e76b43e2b..2d4b72720a8 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -135,6 +136,7 @@ void checkStoragesSupportTransactions(const PlannerContextPtr & planner_context) * * StorageDistributed skip unused shards optimization relies on this. * Parallel replicas estimation relies on this too. + * StorageMerge common header calculation relies on this too. * * To collect filters that will be applied to specific table in case we have JOINs requires * to run query plan optimization pipeline. @@ -162,7 +164,7 @@ void collectFiltersForAnalysis(const QueryTreeNodePtr & query_tree, const Planne continue; const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage(); - if (typeid_cast(storage.get()) + if (typeid_cast(storage.get()) || typeid_cast(storage.get()) || (parallel_replicas_estimation_enabled && std::dynamic_pointer_cast(storage))) { collect_filters = true; @@ -210,6 +212,7 @@ void collectFiltersForAnalysis(const QueryTreeNodePtr & query_tree, const Planne auto filter_actions = ActionsDAG::buildFilterActionsDAG(read_from_dummy->getFilterNodes().nodes); auto & table_expression_data = dummy_storage_to_table_expression_data.at(&read_from_dummy->getStorage()); table_expression_data->setFilterActions(std::move(filter_actions)); + table_expression_data->setPrewhereInfo(read_from_dummy->getPrewhereInfo()); } } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index f8d051ad5b2..39da32ea696 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -624,6 +624,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto table_expression_query_info = select_query_info; table_expression_query_info.table_expression = table_expression; table_expression_query_info.filter_actions_dag = table_expression_data.getFilterActions(); + table_expression_query_info.prewhere_info = table_expression_data.getPrewhereInfo(); table_expression_query_info.analyzer_can_use_parallel_replicas_on_follower = table_node == planner_context->getGlobalPlannerContext()->parallel_replicas_table; size_t max_streams = settings.max_threads; @@ -763,15 +764,16 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } } + PrewhereInfoPtr prewhere_info; const auto & prewhere_actions = table_expression_data.getPrewhereFilterActions(); if (prewhere_actions) { - table_expression_query_info.prewhere_info = std::make_shared(); - table_expression_query_info.prewhere_info->prewhere_actions = prewhere_actions; - table_expression_query_info.prewhere_info->prewhere_column_name = prewhere_actions->getOutputs().at(0)->result_name; - table_expression_query_info.prewhere_info->remove_prewhere_column = true; - table_expression_query_info.prewhere_info->need_filter = true; + prewhere_info = std::make_shared(); + prewhere_info->prewhere_actions = prewhere_actions; + prewhere_info->prewhere_column_name = prewhere_actions->getOutputs().at(0)->result_name; + prewhere_info->remove_prewhere_column = true; + prewhere_info->need_filter = true; } updatePrewhereOutputsIfNeeded(table_expression_query_info, table_expression_data.getColumnNames(), storage_snapshot); @@ -784,32 +786,29 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (!filter_info.actions) return; - bool is_final = table_expression_query_info.table_expression_modifiers && - table_expression_query_info.table_expression_modifiers->hasFinal(); - bool optimize_move_to_prewhere = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final); + bool is_final = table_expression_query_info.table_expression_modifiers + && table_expression_query_info.table_expression_modifiers->hasFinal(); + bool optimize_move_to_prewhere + = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final); if (storage->supportsPrewhere() && optimize_move_to_prewhere) { - if (!table_expression_query_info.prewhere_info) - table_expression_query_info.prewhere_info = std::make_shared(); + if (!prewhere_info) + prewhere_info = std::make_shared(); - if (!table_expression_query_info.prewhere_info->prewhere_actions) + if (!prewhere_info->prewhere_actions) { - table_expression_query_info.prewhere_info->prewhere_actions = filter_info.actions; - table_expression_query_info.prewhere_info->prewhere_column_name = filter_info.column_name; - table_expression_query_info.prewhere_info->remove_prewhere_column = filter_info.do_remove_column; - table_expression_query_info.prewhere_info->need_filter = true; + prewhere_info->prewhere_actions = filter_info.actions; + prewhere_info->prewhere_column_name = filter_info.column_name; + prewhere_info->remove_prewhere_column = filter_info.do_remove_column; } else if (!table_expression_query_info.prewhere_info->row_level_filter) { - table_expression_query_info.prewhere_info->row_level_filter = filter_info.actions; - table_expression_query_info.prewhere_info->row_level_column_name = filter_info.column_name; - table_expression_query_info.prewhere_info->need_filter = true; - } - else - { - where_filters.emplace_back(filter_info, std::move(description)); + prewhere_info->row_level_filter = filter_info.actions; + prewhere_info->row_level_column_name = filter_info.column_name; } + + prewhere_info->need_filter = true; } else { @@ -817,7 +816,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } }; - auto row_policy_filter_info = buildRowPolicyFilterIfNeeded(storage, table_expression_query_info, planner_context, used_row_policies); + auto row_policy_filter_info + = buildRowPolicyFilterIfNeeded(storage, table_expression_query_info, planner_context, used_row_policies); add_filter(row_policy_filter_info, "Row-level security filter"); if (row_policy_filter_info.actions) table_expression_data.setRowLevelFilterActions(row_policy_filter_info.actions); @@ -826,25 +826,37 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres { if (settings.parallel_replicas_count > 1) { - auto parallel_replicas_custom_key_filter_info = buildCustomKeyFilterIfNeeded(storage, table_expression_query_info, planner_context); + auto parallel_replicas_custom_key_filter_info + = buildCustomKeyFilterIfNeeded(storage, table_expression_query_info, planner_context); add_filter(parallel_replicas_custom_key_filter_info, "Parallel replicas custom key filter"); } - else + else if (auto * distributed = typeid_cast(storage.get()); + distributed && query_context->canUseParallelReplicasCustomKey(*distributed->getCluster())) { - if (auto * distributed = typeid_cast(storage.get()); - distributed && query_context->canUseParallelReplicasCustomKey(*distributed->getCluster())) - { - planner_context->getMutableQueryContext()->setSetting("distributed_group_by_no_merge", 2); - } + planner_context->getMutableQueryContext()->setSetting("distributed_group_by_no_merge", 2); } } const auto & table_expression_alias = table_expression->getOriginalAlias(); - auto additional_filters_info = buildAdditionalFiltersIfNeeded(storage, table_expression_alias, table_expression_query_info, planner_context); + auto additional_filters_info + = buildAdditionalFiltersIfNeeded(storage, table_expression_alias, table_expression_query_info, planner_context); add_filter(additional_filters_info, "additional filter"); - from_stage = storage->getQueryProcessingStage(query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); - storage->read(query_plan, columns_names, storage_snapshot, table_expression_query_info, query_context, from_stage, max_block_size, max_streams); + if (!table_expression_query_info.prewhere_info) + table_expression_query_info.prewhere_info = prewhere_info; + + from_stage = storage->getQueryProcessingStage( + query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); + + storage->read( + query_plan, + columns_names, + storage_snapshot, + table_expression_query_info, + query_context, + from_stage, + max_block_size, + max_streams); for (const auto & filter_info_and_description : where_filters) { diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h index f6ef4017c98..20c4f05ea7e 100644 --- a/src/Planner/TableExpressionData.h +++ b/src/Planner/TableExpressionData.h @@ -17,6 +17,9 @@ using ColumnIdentifier = std::string; using ColumnIdentifiers = std::vector; using ColumnIdentifierSet = std::unordered_set; +struct PrewhereInfo; +using PrewhereInfoPtr = std::shared_ptr; + /** Table expression data is created for each table expression that take part in query. * Table expression data has information about columns that participate in query, their name to identifier mapping, * and additional table expression properties. @@ -282,6 +285,16 @@ public: filter_actions = std::move(filter_actions_value); } + const PrewhereInfoPtr & getPrewhereInfo() const + { + return prewhere_info; + } + + void setPrewhereInfo(PrewhereInfoPtr prewhere_info_value) + { + prewhere_info = std::move(prewhere_info_value); + } + private: void addColumnImpl(const NameAndTypePair & column, const ColumnIdentifier & column_identifier) { @@ -309,6 +322,9 @@ private: /// Valid for table, table function ActionsDAGPtr filter_actions; + /// Valid for table, table function + PrewhereInfoPtr prewhere_info; + /// Valid for table, table function ActionsDAGPtr prewhere_filter_actions; diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 50ffa83a272..d3a8f68d084 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -410,8 +410,10 @@ QueryTreeNodePtr replaceTableExpressionsWithDummyTables(const QueryTreeNodePtr & const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot(); auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); - storage_dummy - = std::make_shared(storage_snapshot->storage.getStorageID(), ColumnsDescription(storage_snapshot->getColumns(get_column_options))); + storage_dummy = std::make_shared( + storage_snapshot->storage.getStorageID(), + ColumnsDescription(storage_snapshot->getColumns(get_column_options)), + &storage_snapshot->storage); } else if (subquery_node || union_node) { @@ -429,7 +431,9 @@ QueryTreeNodePtr replaceTableExpressionsWithDummyTables(const QueryTreeNodePtr & storage_dummy_columns.emplace_back(projection_column); } - storage_dummy = std::make_shared(StorageID{"dummy", "subquery_" + std::to_string(subquery_index)}, ColumnsDescription::fromNamesAndTypes(storage_dummy_columns)); + storage_dummy = std::make_shared( + StorageID{"dummy", "subquery_" + std::to_string(subquery_index)}, + ColumnsDescription::fromNamesAndTypes(storage_dummy_columns)); ++subquery_index; } diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index 362f7109f47..c1be3dc1967 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -126,8 +126,10 @@ public: const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot(); auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); - auto storage_dummy - = std::make_shared(storage_snapshot->storage.getStorageID(), ColumnsDescription(storage_snapshot->getColumns(get_column_options))); + auto storage_dummy = std::make_shared( + storage_snapshot->storage.getStorageID(), + ColumnsDescription(storage_snapshot->getColumns(get_column_options)), + &storage_snapshot->storage); auto dummy_table_node = std::make_shared(std::move(storage_dummy), context); diff --git a/src/Processors/QueryPlan/IQueryPlanStep.h b/src/Processors/QueryPlan/IQueryPlanStep.h index df5c13a7f3b..a3d0ec8d8e2 100644 --- a/src/Processors/QueryPlan/IQueryPlanStep.h +++ b/src/Processors/QueryPlan/IQueryPlanStep.h @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB { @@ -64,6 +65,8 @@ public: using DataStreams = std::vector; +using QueryPlanRawPtrs = std::list; + /// Single step of query plan. class IQueryPlanStep { @@ -109,6 +112,9 @@ public: /// Get description of processors added in current step. Should be called after updatePipeline(). virtual void describePipeline(FormatSettings & /*settings*/) const {} + /// Get child plans contained inside some steps (e.g ReadFromMerge) so that they are visible when doing EXPLAIN. + virtual QueryPlanRawPtrs getChildPlans() { return {}; } + /// Append extra processors for this step. void appendExtraProcessors(const Processors & extra_processors); diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index 49e1a49f131..be942abbcc1 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -1,8 +1,9 @@ #include #include #include -#include +#include #include +#include #include #include #include @@ -38,36 +39,43 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) * * 1. SomeNode * 2. FilterNode - * 3. ReadFromMergeTreeNode + * 3. SourceStepWithFilterNode */ - auto * read_from_merge_tree = typeid_cast(frame.node->step.get()); - if (!read_from_merge_tree) + auto * source_step_with_filter = dynamic_cast(frame.node->step.get()); + if (!source_step_with_filter) return; - const auto & storage_prewhere_info = read_from_merge_tree->getPrewhereInfo(); - if (storage_prewhere_info && storage_prewhere_info->prewhere_actions) + const auto & storage_snapshot = source_step_with_filter->getStorageSnapshot(); + + const auto * maybe_dummy = typeid_cast(&storage_snapshot->storage); + const auto & storage = maybe_dummy && maybe_dummy->getOriginalStorage() ? *maybe_dummy->getOriginalStorage() : storage_snapshot->storage; + if (!storage.canMoveConditionsToPrewhere()) return; + const auto & storage_prewhere_info = source_step_with_filter->getPrewhereInfo(); + + /// If existing PrewhereInfo is generated by previous optimization pass, + /// we still need to optimize because it can modify current plan. You may + /// wonder why there are previous optimization passes. See collectFiltersForAnalysis for example. + if (storage_prewhere_info && storage_prewhere_info->prewhere_actions && !storage_prewhere_info->generated_by_optimizer) + return; + + /// TODO: We can also check for UnionStep, such as StorageBuffer and local distributed plans. QueryPlan::Node * filter_node = (stack.rbegin() + 1)->node; const auto * filter_step = typeid_cast(filter_node->step.get()); if (!filter_step) return; - const auto & context = read_from_merge_tree->getContext(); + const auto & context = source_step_with_filter->getContext(); const auto & settings = context->getSettingsRef(); - if (!settings.allow_experimental_analyzer) - return; - - bool is_final = read_from_merge_tree->isQueryWithFinal(); + bool is_final = source_step_with_filter->isQueryWithFinal(); bool optimize_move_to_prewhere = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final); if (!optimize_move_to_prewhere) return; - const auto & storage_snapshot = read_from_merge_tree->getStorageSnapshot(); - ColumnsWithTypeAndName required_columns_after_filter; - if (read_from_merge_tree->isQueryWithSampling()) + if (source_step_with_filter->isQueryWithSampling()) { const auto & sampling_key = storage_snapshot->getMetadataForQuery()->getSamplingKey(); const auto & sampling_source_columns = sampling_key.expression->getRequiredColumnsWithTypes(); @@ -77,7 +85,6 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) required_columns_after_filter.insert(required_columns_after_filter.end(), sampling_result_columns.begin(), sampling_result_columns.end()); } - const auto & storage = storage_snapshot->storage; const auto & storage_metadata = storage_snapshot->metadata; auto column_sizes = storage.getColumnSizes(); if (column_sizes.empty()) @@ -88,19 +95,19 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) for (const auto & [name, sizes] : column_sizes) column_compressed_sizes[name] = sizes.data_compressed; - Names queried_columns = read_from_merge_tree->getRealColumnNames(); + Names queried_columns = source_step_with_filter->requiredSourceColumns(); MergeTreeWhereOptimizer where_optimizer{ std::move(column_compressed_sizes), storage_metadata, - storage.getConditionEstimatorByPredicate(read_from_merge_tree->getQueryInfo(), storage_snapshot, context), + storage.getConditionEstimatorByPredicate(source_step_with_filter->getQueryInfo(), storage_snapshot, context), queried_columns, storage.supportedPrewhereColumns(), getLogger("QueryPlanOptimizePrewhere")}; auto optimize_result = where_optimizer.optimize(filter_step->getExpression(), filter_step->getFilterColumnName(), - read_from_merge_tree->getContext(), + source_step_with_filter->getContext(), is_final); if (optimize_result.prewhere_nodes.empty()) @@ -142,7 +149,8 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) /// /// So, here we restore removed inputs for PREWHERE actions { - std::unordered_set first_outputs(split_result.first->getOutputs().begin(), split_result.first->getOutputs().end()); + std::unordered_set first_outputs( + split_result.first->getOutputs().begin(), split_result.first->getOutputs().end()); for (const auto * input : split_result.first->getInputs()) { if (!first_outputs.contains(input)) @@ -157,7 +165,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) ActionsDAG::NodeRawConstPtrs conditions; conditions.reserve(split_result.split_nodes_mapping.size()); - for (const auto * condition : optimize_result.prewhere_nodes) + for (const auto * condition : optimize_result.prewhere_nodes_list) conditions.push_back(split_result.split_nodes_mapping.at(condition)); prewhere_info->prewhere_actions = std::move(split_result.first); @@ -178,20 +186,22 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) prewhere_info->prewhere_actions->getOutputs().push_back(node); } - read_from_merge_tree->updatePrewhereInfo(prewhere_info); + prewhere_info->generated_by_optimizer = true; + source_step_with_filter->updatePrewhereInfo(prewhere_info); if (!optimize_result.fully_moved_to_prewhere) { filter_node->step = std::make_unique( - read_from_merge_tree->getOutputStream(), + source_step_with_filter->getOutputStream(), std::move(split_result.second), filter_step->getFilterColumnName(), filter_step->removesFilterColumn()); } else { + /// Have to keep this expression to change column names to column identifiers filter_node->step = std::make_unique( - read_from_merge_tree->getOutputStream(), + source_step_with_filter->getOutputStream(), std::move(split_result.second)); } } diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 2d2dc66a8c9..6261f6c0ada 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -360,7 +360,7 @@ std::string debugExplainStep(const IQueryPlanStep & step) return out.str(); } -void QueryPlan::explainPlan(WriteBuffer & buffer, const ExplainPlanOptions & options) +void QueryPlan::explainPlan(WriteBuffer & buffer, const ExplainPlanOptions & options, size_t indent) { checkInitialized(); @@ -382,7 +382,7 @@ void QueryPlan::explainPlan(WriteBuffer & buffer, const ExplainPlanOptions & opt if (!frame.is_description_printed) { - settings.offset = (stack.size() - 1) * settings.indent; + settings.offset = (indent + stack.size() - 1) * settings.indent; explainStep(*frame.node->step, settings, options); frame.is_description_printed = true; } @@ -393,7 +393,14 @@ void QueryPlan::explainPlan(WriteBuffer & buffer, const ExplainPlanOptions & opt ++frame.next_child; } else + { + auto child_plans = frame.node->step->getChildPlans(); + + for (const auto & child_plan : child_plans) + child_plan->explainPlan(buffer, options, indent + stack.size()); + stack.pop(); + } } } diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h index 62d658ddccd..bf135ba3cd6 100644 --- a/src/Processors/QueryPlan/QueryPlan.h +++ b/src/Processors/QueryPlan/QueryPlan.h @@ -82,7 +82,7 @@ public: }; JSONBuilder::ItemPtr explainPlan(const ExplainPlanOptions & options); - void explainPlan(WriteBuffer & buffer, const ExplainPlanOptions & options); + void explainPlan(WriteBuffer & buffer, const ExplainPlanOptions & options, size_t indent = 0); void explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptions & options); void explainEstimate(MutableColumns & columns); diff --git a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp index 188956b34fc..22ad53a39e0 100644 --- a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp @@ -95,17 +95,24 @@ private: InitializerFunc initializer_func; }; -ReadFromMemoryStorageStep::ReadFromMemoryStorageStep(const Names & columns_to_read_, - StoragePtr storage_, - const StorageSnapshotPtr & storage_snapshot_, - const size_t num_streams_, - const bool delay_read_for_global_sub_queries_) : - SourceStepWithFilter(DataStream{.header=storage_snapshot_->getSampleBlockForColumns(columns_to_read_)}), - columns_to_read(columns_to_read_), - storage(std::move(storage_)), - storage_snapshot(storage_snapshot_), - num_streams(num_streams_), - delay_read_for_global_sub_queries(delay_read_for_global_sub_queries_) +ReadFromMemoryStorageStep::ReadFromMemoryStorageStep( + const Names & columns_to_read_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + StoragePtr storage_, + const size_t num_streams_, + const bool delay_read_for_global_sub_queries_) + : SourceStepWithFilter( + DataStream{.header = storage_snapshot_->getSampleBlockForColumns(columns_to_read_)}, + columns_to_read_, + query_info_, + storage_snapshot_, + context_) + , columns_to_read(columns_to_read_) + , storage(std::move(storage_)) + , num_streams(num_streams_) + , delay_read_for_global_sub_queries(delay_read_for_global_sub_queries_) { } diff --git a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.h b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.h index 1122bfbb2a5..238c1a3aad0 100644 --- a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.h +++ b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.h @@ -15,11 +15,14 @@ class QueryPipelineBuilder; class ReadFromMemoryStorageStep final : public SourceStepWithFilter { public: - ReadFromMemoryStorageStep(const Names & columns_to_read_, - StoragePtr storage_, - const StorageSnapshotPtr & storage_snapshot_, - size_t num_streams_, - bool delay_read_for_global_sub_queries_); + ReadFromMemoryStorageStep( + const Names & columns_to_read_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + StoragePtr storage_, + size_t num_streams_, + bool delay_read_for_global_sub_queries_); ReadFromMemoryStorageStep() = delete; ReadFromMemoryStorageStep(const ReadFromMemoryStorageStep &) = delete; @@ -37,7 +40,6 @@ private: Names columns_to_read; StoragePtr storage; - StorageSnapshotPtr storage_snapshot; size_t num_streams; bool delay_read_for_global_sub_queries; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index eaf3e1af457..c8720f555f9 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -260,12 +260,13 @@ void ReadFromMergeTree::AnalysisResult::checkLimits(const Settings & settings, c ReadFromMergeTree::ReadFromMergeTree( MergeTreeData::DataPartsVector parts_, std::vector alter_conversions_, + const Names & column_names_, Names real_column_names_, Names virt_column_names_, const MergeTreeData & data_, const SelectQueryInfo & query_info_, - StorageSnapshotPtr storage_snapshot_, - ContextPtr context_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, size_t max_block_size_, size_t num_streams_, bool sample_factor_column_queried_, @@ -277,18 +278,15 @@ ReadFromMergeTree::ReadFromMergeTree( storage_snapshot_->getSampleBlockForColumns(real_column_names_), query_info_.prewhere_info, data_.getPartitionValueType(), - virt_column_names_)}, query_info_.prewhere_info) + virt_column_names_)}, column_names_, query_info_, storage_snapshot_, context_) , reader_settings(getMergeTreeReaderSettings(context_, query_info_)) , prepared_parts(std::move(parts_)) , alter_conversions_for_parts(std::move(alter_conversions_)) , real_column_names(std::move(real_column_names_)) , virt_column_names(std::move(virt_column_names_)) , data(data_) - , query_info(query_info_) , actions_settings(ExpressionActionsSettings::fromContext(context_)) - , storage_snapshot(std::move(storage_snapshot_)) , metadata_for_reading(storage_snapshot->getMetadataForQuery()) - , context(std::move(context_)) , block_size{ .max_block_size_rows = max_block_size_, .preferred_block_size_bytes = context->getSettingsRef().preferred_block_size_bytes, @@ -1464,8 +1462,8 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( MergeTreeData::DataPartsVector parts, std::vector alter_conversions, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, - ContextPtr context, + const SelectQueryInfo & query_info_, + ContextPtr context_, size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, @@ -1478,8 +1476,8 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( std::move(parts), std::move(alter_conversions), metadata_snapshot, - query_info, - context, + query_info_, + context_, num_streams, max_block_numbers_to_read, data, @@ -1493,8 +1491,8 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( MergeTreeData::DataPartsVector parts, std::vector alter_conversions, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, - ContextPtr context, + const SelectQueryInfo & query_info_, + ContextPtr context_, size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, @@ -1504,7 +1502,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( std::optional & indexes) { AnalysisResult result; - const auto & settings = context->getSettingsRef(); + const auto & settings = context_->getSettingsRef(); size_t total_parts = parts.size(); @@ -1522,7 +1520,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( const Names & primary_key_column_names = primary_key.column_names; if (!indexes) - buildIndexes(indexes, query_info.filter_actions_dag, data, parts, context, query_info, metadata_snapshot); + buildIndexes(indexes, query_info_.filter_actions_dag, data, parts, context_, query_info_, metadata_snapshot); if (indexes->part_values && indexes->part_values->empty()) return std::make_shared(std::move(result)); @@ -1554,19 +1552,19 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( indexes->part_values, metadata_snapshot, data, - context, + context_, max_block_numbers_to_read.get(), log, result.index_stats); result.sampling = MergeTreeDataSelectExecutor::getSampling( - query_info, + query_info_, metadata_snapshot->getColumns().getAllPhysical(), parts, indexes->key_condition, data, metadata_snapshot, - context, + context_, sample_factor_column_queried, log); @@ -1577,12 +1575,12 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( total_marks_pk += part->index_granularity.getMarksCountWithoutFinal(); parts_before_pk = parts.size(); - auto reader_settings = getMergeTreeReaderSettings(context, query_info); + auto reader_settings = getMergeTreeReaderSettings(context_, query_info_); result.parts_with_ranges = MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipIndexes( std::move(parts), std::move(alter_conversions), metadata_snapshot, - context, + context_, indexes->key_condition, indexes->part_offset_condition, indexes->skip_indexes, @@ -1618,8 +1616,8 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( result.total_marks_pk = total_marks_pk; result.selected_rows = sum_rows; - if (query_info.input_order_info) - result.read_type = (query_info.input_order_info->direction > 0) + if (query_info_.input_order_info) + result.read_type = (query_info_.input_order_info->direction > 0) ? ReadType::InOrder : ReadType::InReverseOrder; @@ -1768,11 +1766,6 @@ ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const return *result_ptr; } -bool ReadFromMergeTree::isQueryWithFinal() const -{ - return query_info.isFinal(); -} - bool ReadFromMergeTree::isQueryWithSampling() const { if (context->getSettingsRef().parallel_replicas_count > 1 && data.supportsSampling()) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 5a98c7bc300..282d2ce4aa3 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -110,12 +110,13 @@ public: ReadFromMergeTree( MergeTreeData::DataPartsVector parts_, std::vector alter_conversions_, + const Names & column_names_, Names real_column_names_, Names virt_column_names_, const MergeTreeData & data_, const SelectQueryInfo & query_info_, - StorageSnapshotPtr storage_snapshot, - ContextPtr context_, + const StorageSnapshotPtr & storage_snapshot, + const ContextPtr & context_, size_t max_block_size_, size_t num_streams_, bool sample_factor_column_queried_, @@ -139,7 +140,6 @@ public: const Names & getVirtualColumnNames() const { return virt_column_names; } StorageID getStorageID() const { return data.getStorageID(); } - const StorageSnapshotPtr & getStorageSnapshot() const { return storage_snapshot; } UInt64 getSelectedParts() const { return selected_parts; } UInt64 getSelectedRows() const { return selected_rows; } UInt64 getSelectedMarks() const { return selected_marks; } @@ -173,18 +173,14 @@ public: MergeTreeData::DataPartsVector parts, std::vector alter_conversions) const; - ContextPtr getContext() const { return context; } - const SelectQueryInfo & getQueryInfo() const { return query_info; } StorageMetadataPtr getStorageMetadata() const { return metadata_for_reading; } - const PrewhereInfoPtr & getPrewhereInfo() const { return prewhere_info; } /// Returns `false` if requested reading cannot be performed. bool requestReadingInOrder(size_t prefix_size, int direction, size_t limit); bool readsInOrder() const; - void updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value); - bool isQueryWithFinal() const; - bool isQueryWithSampling() const; + void updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value) override; + bool isQueryWithSampling() const override; /// Returns true if the optimization is applicable (and applies it then). bool requestOutputEachPartitionThroughSeparatePort(); @@ -235,13 +231,10 @@ private: Names virt_column_names; const MergeTreeData & data; - SelectQueryInfo query_info; ExpressionActionsSettings actions_settings; - StorageSnapshotPtr storage_snapshot; StorageMetadataPtr metadata_for_reading; - ContextPtr context; const MergeTreeReadTask::BlockSizeParams block_size; size_t requested_num_streams; diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index afc477e0b64..db1cb524e32 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -321,21 +321,24 @@ void shrinkRanges(Ranges & ranges, size_t size) ReadFromSystemNumbersStep::ReadFromSystemNumbersStep( const Names & column_names_, - StoragePtr storage_, + const SelectQueryInfo & query_info_, const StorageSnapshotPtr & storage_snapshot_, - SelectQueryInfo & query_info, - ContextPtr context_, + const ContextPtr & context_, + StoragePtr storage_, size_t max_block_size_, size_t num_streams_) - : SourceStepWithFilter{DataStream{.header = storage_snapshot_->getSampleBlockForColumns(column_names_)}} + : SourceStepWithFilter( + DataStream{.header = storage_snapshot_->getSampleBlockForColumns(column_names_)}, + column_names_, + query_info_, + storage_snapshot_, + context_) , column_names{column_names_} , storage{std::move(storage_)} - , storage_snapshot{storage_snapshot_} - , context{std::move(context_)} , key_expression{KeyDescription::parse(column_names[0], storage_snapshot->metadata->columns, context).expression} , max_block_size{max_block_size_} , num_streams{num_streams_} - , limit_length_and_offset(InterpreterSelectQuery::getLimitLengthAndOffset(query_info.query->as(), context)) + , limit_length_and_offset(InterpreterSelectQuery::getLimitLengthAndOffset(query_info.query->as(), context)) , should_pushdown_limit(shouldPushdownLimit(query_info, limit_length_and_offset.first)) , limit(query_info.limit) , storage_limits(query_info.storage_limits) diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h index 0a8c62b30af..aac0f386974 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.h @@ -16,10 +16,10 @@ class ReadFromSystemNumbersStep final : public SourceStepWithFilter public: ReadFromSystemNumbersStep( const Names & column_names_, - StoragePtr storage_, + const SelectQueryInfo & query_info_, const StorageSnapshotPtr & storage_snapshot_, - SelectQueryInfo & query_info, - ContextPtr context_, + const ContextPtr & context_, + StoragePtr storage_, size_t max_block_size_, size_t num_streams_); @@ -36,8 +36,6 @@ private: const Names column_names; StoragePtr storage; - StorageSnapshotPtr storage_snapshot; - ContextPtr context; ExpressionActionsPtr key_expression; size_t max_block_size; size_t num_streams; diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.cpp b/src/Processors/QueryPlan/SourceStepWithFilter.cpp new file mode 100644 index 00000000000..4df6a00c539 --- /dev/null +++ b/src/Processors/QueryPlan/SourceStepWithFilter.cpp @@ -0,0 +1,146 @@ +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; +} + +Block SourceStepWithFilter::applyPrewhereActions(Block block, const PrewhereInfoPtr & prewhere_info) +{ + if (prewhere_info) + { + if (prewhere_info->row_level_filter) + { + block = prewhere_info->row_level_filter->updateHeader(std::move(block)); + auto & row_level_column = block.getByName(prewhere_info->row_level_column_name); + if (!row_level_column.type->canBeUsedInBooleanContext()) + { + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, + "Invalid type for filter in PREWHERE: {}", + row_level_column.type->getName()); + } + + block.erase(prewhere_info->row_level_column_name); + } + + if (prewhere_info->prewhere_actions) + { + block = prewhere_info->prewhere_actions->updateHeader(std::move(block)); + + auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); + if (!prewhere_column.type->canBeUsedInBooleanContext()) + { + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, + "Invalid type for filter in PREWHERE: {}", + prewhere_column.type->getName()); + } + + if (prewhere_info->remove_prewhere_column) + { + block.erase(prewhere_info->prewhere_column_name); + } + else if (prewhere_info->need_filter) + { + WhichDataType which(removeNullable(recursiveRemoveLowCardinality(prewhere_column.type))); + + if (which.isNativeInt() || which.isNativeUInt()) + prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1u)->convertToFullColumnIfConst(); + else if (which.isFloat()) + prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1.0f)->convertToFullColumnIfConst(); + else + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, + "Illegal type {} of column for filter", + prewhere_column.type->getName()); + } + } + } + + return block; +} + +void SourceStepWithFilter::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value) +{ + query_info.prewhere_info = prewhere_info_value; + prewhere_info = prewhere_info_value; + output_stream = DataStream{.header = applyPrewhereActions(output_stream->header, prewhere_info)}; +} + +void SourceStepWithFilter::describeActions(FormatSettings & format_settings) const +{ + std::string prefix(format_settings.offset, format_settings.indent_char); + + if (prewhere_info) + { + format_settings.out << prefix << "Prewhere info" << '\n'; + format_settings.out << prefix << "Need filter: " << prewhere_info->need_filter << '\n'; + + prefix.push_back(format_settings.indent_char); + prefix.push_back(format_settings.indent_char); + + if (prewhere_info->prewhere_actions) + { + format_settings.out << prefix << "Prewhere filter" << '\n'; + format_settings.out << prefix << "Prewhere filter column: " << prewhere_info->prewhere_column_name; + if (prewhere_info->remove_prewhere_column) + format_settings.out << " (removed)"; + format_settings.out << '\n'; + + auto expression = std::make_shared(prewhere_info->prewhere_actions); + expression->describeActions(format_settings.out, prefix); + } + + if (prewhere_info->row_level_filter) + { + format_settings.out << prefix << "Row level filter" << '\n'; + format_settings.out << prefix << "Row level filter column: " << prewhere_info->row_level_column_name << '\n'; + + auto expression = std::make_shared(prewhere_info->row_level_filter); + expression->describeActions(format_settings.out, prefix); + } + } +} + +void SourceStepWithFilter::describeActions(JSONBuilder::JSONMap & map) const +{ + if (prewhere_info) + { + std::unique_ptr prewhere_info_map = std::make_unique(); + prewhere_info_map->add("Need filter", prewhere_info->need_filter); + + if (prewhere_info->prewhere_actions) + { + std::unique_ptr prewhere_filter_map = std::make_unique(); + prewhere_filter_map->add("Prewhere filter column", prewhere_info->prewhere_column_name); + prewhere_filter_map->add("Prewhere filter remove filter column", prewhere_info->remove_prewhere_column); + auto expression = std::make_shared(prewhere_info->prewhere_actions); + prewhere_filter_map->add("Prewhere filter expression", expression->toTree()); + + prewhere_info_map->add("Prewhere filter", std::move(prewhere_filter_map)); + } + + if (prewhere_info->row_level_filter) + { + std::unique_ptr row_level_filter_map = std::make_unique(); + row_level_filter_map->add("Row level filter column", prewhere_info->row_level_column_name); + auto expression = std::make_shared(prewhere_info->row_level_filter); + row_level_filter_map->add("Row level filter expression", expression->toTree()); + + prewhere_info_map->add("Row level filter", std::move(row_level_filter_map)); + } + + map.add("Prewhere info", std::move(prewhere_info_map)); + } +} + +} diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.h b/src/Processors/QueryPlan/SourceStepWithFilter.h index b71673e2d16..e284c78530a 100644 --- a/src/Processors/QueryPlan/SourceStepWithFilter.h +++ b/src/Processors/QueryPlan/SourceStepWithFilter.h @@ -1,8 +1,9 @@ #pragma once -#include #include +#include #include +#include namespace DB { @@ -16,25 +17,33 @@ public: using Base = ISourceStep; using Base::Base; - explicit SourceStepWithFilter(DataStream output_stream_, PrewhereInfoPtr prewhere_info_ = nullptr) - : ISourceStep(std::move(output_stream_)), prewhere_info(std::move(prewhere_info_)) + SourceStepWithFilter( + DataStream output_stream_, + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_) + : ISourceStep(std::move(output_stream_)) + , required_source_columns(column_names_) + , query_info(query_info_) + , prewhere_info(query_info.prewhere_info) + , storage_snapshot(storage_snapshot_) + , context(context_) { } - const std::vector & getFilters() const - { - return filter_dags; - } + const std::vector & getFilters() const { return filter_dags; } + const ActionDAGNodes & getFilterNodes() const { return filter_nodes; } - const ActionDAGNodes & getFilterNodes() const - { - return filter_nodes; - } + const SelectQueryInfo & getQueryInfo() const { return query_info; } + const PrewhereInfoPtr & getPrewhereInfo() const { return prewhere_info; } + ContextPtr getContext() const { return context; } + const StorageSnapshotPtr & getStorageSnapshot() const { return storage_snapshot; } - const PrewhereInfoPtr & getPrewhereInfo() const - { - return prewhere_info; - } + bool isQueryWithFinal() const { return query_info.isFinal(); } + virtual bool isQueryWithSampling() const { return false; } + + const Names & requiredSourceColumns() const { return required_source_columns; } void addFilter(ActionsDAGPtr filter_dag, std::string column_name) { @@ -49,12 +58,24 @@ public: } /// Apply filters that can optimize reading from storage. - virtual void applyFilters() {} + virtual void applyFilters() { } + + virtual void updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value); + + void describeActions(FormatSettings & format_settings) const override; + + void describeActions(JSONBuilder::JSONMap & map) const override; + + static Block applyPrewhereActions(Block block, const PrewhereInfoPtr & prewhere_info); protected: std::vector filter_dags; ActionDAGNodes filter_nodes; + Names required_source_columns; + SelectQueryInfo query_info; PrewhereInfoPtr prewhere_info; + StorageSnapshotPtr storage_snapshot; + ContextPtr context; }; } diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 5e937d3d31d..d2082d053d9 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -918,18 +918,25 @@ public: void applyFilters() override; ReadFromHDFS( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, ReadFromFormatInfo info_, bool need_only_count_, std::shared_ptr storage_, - ContextPtr context_, size_t max_block_size_, size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) , info(std::move(info_)) , need_only_count(need_only_count_) , storage(std::move(storage_)) - , context(std::move(context_)) , max_block_size(max_block_size_) , num_streams(num_streams_) { @@ -940,7 +947,6 @@ private: const bool need_only_count; std::shared_ptr storage; - ContextPtr context; size_t max_block_size; size_t num_streams; @@ -976,11 +982,14 @@ void StorageHDFS::read( auto this_ptr = std::static_pointer_cast(shared_from_this()); auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + context_, read_from_format_info.source_header, std::move(read_from_format_info), need_only_count, std::move(this_ptr), - context_, max_block_size, num_streams); diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 6766ecd6b4f..c4600ccc649 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -773,6 +773,10 @@ public: void applyFilters() override; ReadFromHive( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block header, std::shared_ptr storage_, std::shared_ptr sources_info_, @@ -781,10 +785,14 @@ public: HiveMetastoreClient::HiveTableMetadataPtr hive_table_metadata_, Block sample_block_, LoggerPtr log_, - ContextPtr context_, size_t max_block_size_, size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(header)}) + : SourceStepWithFilter( + DataStream{.header = std::move(header)}, + column_names_, + query_info_, + storage_snapshot_, + context_) , storage(std::move(storage_)) , sources_info(std::move(sources_info_)) , builder(std::move(builder_)) @@ -792,7 +800,6 @@ public: , hive_table_metadata(std::move(hive_table_metadata_)) , sample_block(std::move(sample_block_)) , log(log_) - , context(std::move(context_)) , max_block_size(max_block_size_) , num_streams(num_streams_) { @@ -807,7 +814,6 @@ private: Block sample_block; LoggerPtr log; - ContextPtr context; size_t max_block_size; size_t num_streams; @@ -835,7 +841,7 @@ void StorageHive::read( QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo &, + SelectQueryInfo & query_info, ContextPtr context_, QueryProcessingStage::Enum /* processed_stage */, size_t max_block_size, @@ -891,6 +897,10 @@ void StorageHive::read( auto this_ptr = std::static_pointer_cast(shared_from_this()); auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + context_, StorageHiveSource::getHeader(sample_block, sources_info), std::move(this_ptr), std::move(sources_info), @@ -899,7 +909,6 @@ void StorageHive::read( std::move(hive_table_metadata), std::move(sample_block), log, - context_, max_block_size, num_streams); diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp index 3129da30f54..32b858bd486 100644 --- a/src/Storages/IStorageCluster.cpp +++ b/src/Storages/IStorageCluster.cpp @@ -47,20 +47,27 @@ public: void applyFilters() override; ReadFromCluster( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, std::shared_ptr storage_, ASTPtr query_to_send_, QueryProcessingStage::Enum processed_stage_, ClusterPtr cluster_, - LoggerPtr log_, - ContextPtr context_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) + LoggerPtr log_) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) , storage(std::move(storage_)) , query_to_send(std::move(query_to_send_)) , processed_stage(processed_stage_) , cluster(std::move(cluster_)) , log(log_) - , context(std::move(context_)) { } @@ -70,7 +77,6 @@ private: QueryProcessingStage::Enum processed_stage; ClusterPtr cluster; LoggerPtr log; - ContextPtr context; std::optional extension; @@ -143,13 +149,16 @@ void IStorageCluster::read( auto this_ptr = std::static_pointer_cast(shared_from_this()); auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + context, sample_block, std::move(this_ptr), std::move(query_to_send), processed_stage, cluster, - log, - context); + log); query_plan.addStep(std::move(reading)); } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index f2424834e6f..f99fefbed0c 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1007,6 +1007,7 @@ QueryPlanStepPtr MergeTreeDataSelectExecutor::readFromParts( return std::make_unique( std::move(parts), std::move(alter_conversions), + column_names_to_return, real_column_names, virt_column_names, data, diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 4e93bd267ec..64cec946991 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -19,7 +20,6 @@ namespace DB namespace ErrorCodes { - extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; extern const int LOGICAL_ERROR; extern const int QUERY_WAS_CANCELLED; } @@ -71,7 +71,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( lightweight_delete_filter_step = std::make_shared(std::move(step)); } - header_without_const_virtual_columns = applyPrewhereActions(pool->getHeader(), prewhere_info); + header_without_const_virtual_columns = SourceStepWithFilter::applyPrewhereActions(pool->getHeader(), prewhere_info); size_t non_const_columns_offset = header_without_const_virtual_columns.columns(); injectNonConstVirtualColumns(0, header_without_const_virtual_columns, virt_column_names); @@ -395,62 +395,11 @@ void MergeTreeSelectProcessor::injectVirtualColumns( injectPartConstVirtualColumns(row_count, block, task, partition_value_type, virtual_columns); } -Block MergeTreeSelectProcessor::applyPrewhereActions(Block block, const PrewhereInfoPtr & prewhere_info) -{ - if (prewhere_info) - { - if (prewhere_info->row_level_filter) - { - block = prewhere_info->row_level_filter->updateHeader(std::move(block)); - auto & row_level_column = block.getByName(prewhere_info->row_level_column_name); - if (!row_level_column.type->canBeUsedInBooleanContext()) - { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}", - row_level_column.type->getName()); - } - - block.erase(prewhere_info->row_level_column_name); - } - - if (prewhere_info->prewhere_actions) - { - block = prewhere_info->prewhere_actions->updateHeader(std::move(block)); - - auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); - if (!prewhere_column.type->canBeUsedInBooleanContext()) - { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}", - prewhere_column.type->getName()); - } - - if (prewhere_info->remove_prewhere_column) - { - block.erase(prewhere_info->prewhere_column_name); - } - else if (prewhere_info->need_filter) - { - WhichDataType which(removeNullable(recursiveRemoveLowCardinality(prewhere_column.type))); - - if (which.isNativeInt() || which.isNativeUInt()) - prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1u)->convertToFullColumnIfConst(); - else if (which.isFloat()) - prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1.0f)->convertToFullColumnIfConst(); - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, - "Illegal type {} of column for filter", - prewhere_column.type->getName()); - } - } - } - - return block; -} - Block MergeTreeSelectProcessor::transformHeader( Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns) { injectVirtualColumns(block, 0, nullptr, partition_value_type, virtual_columns); - auto transformed = applyPrewhereActions(std::move(block), prewhere_info); + auto transformed = SourceStepWithFilter::applyPrewhereActions(std::move(block), prewhere_info); return transformed; } diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index b06ae788e91..b1606f983a1 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -83,7 +83,6 @@ private: /// Used for filling header with no rows as well as block with data static void injectVirtualColumns(Block & block, size_t row_count, MergeTreeReadTask * task, const DataTypePtr & partition_value_type, const Names & virtual_columns); - static Block applyPrewhereActions(Block block, const PrewhereInfoPtr & prewhere_info); /// Sets up range readers corresponding to data readers void initializeRangeReaders(); diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index d9a89b9d4ef..6f1c5302b0e 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -133,10 +133,18 @@ MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::op return {}; std::unordered_set prewhere_conditions; + std::list prewhere_conditions_list; for (const auto & condition : optimize_result->prewhere_conditions) - prewhere_conditions.insert(condition.node.getDAGNode()); + { + const ActionsDAG::Node * condition_node = condition.node.getDAGNode(); + if (prewhere_conditions.insert(condition_node).second) + prewhere_conditions_list.push_back(condition_node); + } - return {.prewhere_nodes = std::move(prewhere_conditions), .fully_moved_to_prewhere = optimize_result->where_conditions.empty()}; + return { + .prewhere_nodes = std::move(prewhere_conditions), + .prewhere_nodes_list = std::move(prewhere_conditions_list), + .fully_moved_to_prewhere = optimize_result->where_conditions.empty()}; } static void collectColumns(const RPNBuilderTreeNode & node, const NameSet & columns_names, NameSet & result_set, bool & has_invalid_column) diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index 84afa4cda17..6c5ff29bc76 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -48,6 +48,7 @@ public: struct FilterActionsOptimizeResult { std::unordered_set prewhere_nodes; + std::list prewhere_nodes_list; /// Keep insertion order of moved prewhere_nodes bool fully_moved_to_prewhere = false; }; diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 77526565fc8..f9e9b3281e9 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -482,28 +482,23 @@ public: void applyFilters() override; ReadFromEmbeddedRocksDB( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, - StorageSnapshotPtr storage_snapshot_, const StorageEmbeddedRocksDB & storage_, - SelectQueryInfo query_info_, - ContextPtr context_, size_t max_block_size_, size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) - , storage_snapshot(std::move(storage_snapshot_)) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) , storage(storage_) - , query_info(std::move(query_info_)) - , context(std::move(context_)) , max_block_size(max_block_size_) , num_streams(num_streams_) { } private: - StorageSnapshotPtr storage_snapshot; const StorageEmbeddedRocksDB & storage; - SelectQueryInfo query_info; - ContextPtr context; size_t max_block_size; size_t num_streams; @@ -526,13 +521,7 @@ void StorageEmbeddedRocksDB::read( Block sample_block = storage_snapshot->metadata->getSampleBlock(); auto reading = std::make_unique( - std::move(sample_block), - storage_snapshot, - *this, - query_info, - context_, - max_block_size, - num_streams); + column_names, query_info, storage_snapshot, context_, std::move(sample_block), *this, max_block_size, num_streams); query_plan.addStep(std::move(reading)); } diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index 4f73f4ee205..d5597aa647d 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -243,15 +243,22 @@ public: void applyFilters() override; ReadFromS3Queue( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, ReadFromFormatInfo info_, std::shared_ptr storage_, - ContextPtr context_, size_t max_block_size_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) , info(std::move(info_)) , storage(std::move(storage_)) - , context(std::move(context_)) , max_block_size(max_block_size_) { } @@ -259,7 +266,6 @@ public: private: ReadFromFormatInfo info; std::shared_ptr storage; - ContextPtr context; size_t max_block_size; std::shared_ptr iterator; @@ -290,7 +296,7 @@ void StorageS3Queue::read( QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /*query_info*/, + SelectQueryInfo & query_info, ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, @@ -312,10 +318,13 @@ void StorageS3Queue::read( auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals()); auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + local_context, read_from_format_info.source_header, read_from_format_info, std::move(this_ptr), - local_context, max_block_size); query_plan.addStep(std::move(reading)); diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 4cb88a6d3fc..ae8f68988b7 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -53,6 +53,7 @@ struct PrewhereInfo String prewhere_column_name; bool remove_prewhere_column = false; bool need_filter = false; + bool generated_by_optimizer = false; PrewhereInfo() = default; explicit PrewhereInfo(ActionsDAGPtr prewhere_actions_, String prewhere_column_name_) @@ -74,6 +75,7 @@ struct PrewhereInfo prewhere_info->prewhere_column_name = prewhere_column_name; prewhere_info->remove_prewhere_column = remove_prewhere_column; prewhere_info->need_filter = need_filter; + prewhere_info->generated_by_optimizer = generated_by_optimizer; return prewhere_info; } diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index f5fcf01c59e..cbc75af24e0 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -680,18 +680,20 @@ public: void applyFilters() override; ReadFromAzureBlob( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, std::shared_ptr storage_, ReadFromFormatInfo info_, const bool need_only_count_, - ContextPtr context_, size_t max_block_size_, size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) , storage(std::move(storage_)) , info(std::move(info_)) , need_only_count(need_only_count_) - , context(std::move(context_)) , max_block_size(max_block_size_) , num_streams(num_streams_) { @@ -702,8 +704,6 @@ private: ReadFromFormatInfo info; const bool need_only_count; - ContextPtr context; - size_t max_block_size; const size_t num_streams; @@ -742,11 +742,14 @@ void StorageAzureBlob::read( && local_context->getSettingsRef().optimize_count_from_files; auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + local_context, read_from_format_info.source_header, std::move(this_ptr), std::move(read_from_format_info), need_only_count, - local_context, max_block_size, num_streams); diff --git a/src/Storages/StorageDummy.cpp b/src/Storages/StorageDummy.cpp index e2396a54acb..8b5cd98db15 100644 --- a/src/Storages/StorageDummy.cpp +++ b/src/Storages/StorageDummy.cpp @@ -10,9 +10,8 @@ namespace DB { -StorageDummy::StorageDummy(const StorageID & table_id_, const ColumnsDescription & columns_, ColumnsDescription object_columns_) - : IStorage(table_id_) - , object_columns(std::move(object_columns_)) +StorageDummy::StorageDummy(const StorageID & table_id_, const ColumnsDescription & columns_, StorageRawPtr original_storage_) + : IStorage(table_id_), original_storage(original_storage_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); @@ -31,23 +30,36 @@ QueryProcessingStage::Enum StorageDummy::getQueryProcessingStage( void StorageDummy::read(QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo &, - ContextPtr, + SelectQueryInfo & query_info, + ContextPtr local_context, QueryProcessingStage::Enum, size_t, size_t) { - query_plan.addStep(std::make_unique(*this, storage_snapshot, column_names)); + query_plan.addStep(std::make_unique( + column_names, + query_info, + storage_snapshot, + local_context, + *this)); } -ReadFromDummy::ReadFromDummy(const StorageDummy & storage_, - StorageSnapshotPtr storage_snapshot_, - Names column_names_) - : SourceStepWithFilter(DataStream{.header = storage_snapshot_->getSampleBlockForColumns(column_names_)}) +ReadFromDummy::ReadFromDummy( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + const StorageDummy & storage_) + : SourceStepWithFilter( + DataStream{.header = storage_snapshot_->getSampleBlockForColumns(column_names_)}, + column_names_, + query_info_, + storage_snapshot_, + context_) , storage(storage_) - , storage_snapshot(std::move(storage_snapshot_)) - , column_names(std::move(column_names_)) -{} + , column_names(column_names_) +{ +} void ReadFromDummy::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { diff --git a/src/Storages/StorageDummy.h b/src/Storages/StorageDummy.h index aa2201a196b..d737f7b79b9 100644 --- a/src/Storages/StorageDummy.h +++ b/src/Storages/StorageDummy.h @@ -8,10 +8,12 @@ namespace DB { +using StorageRawPtr = const IStorage *; + class StorageDummy final : public IStorage { public: - StorageDummy(const StorageID & table_id_, const ColumnsDescription & columns_, ColumnsDescription object_columns_ = {}); + StorageDummy(const StorageID & table_id_, const ColumnsDescription & columns_, StorageRawPtr orignal_storage = nullptr); std::string getName() const override { return "StorageDummy"; } @@ -20,7 +22,7 @@ public: bool supportsPrewhere() const override { return true; } bool supportsSubcolumns() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } - bool canMoveConditionsToPrewhere() const override { return false; } + bool canMoveConditionsToPrewhere() const override { return original_storage ? original_storage->canMoveConditionsToPrewhere() : false; } StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr /*query_context*/) const override { @@ -42,16 +44,25 @@ public: QueryProcessingStage::Enum processed_stage, size_t max_block_size, size_t num_streams) override; + + StorageRawPtr getOriginalStorage() const { return original_storage; } + private: const ColumnsDescription object_columns; + + /// The original storage which is replaced during planning. See collectFiltersForAnalysis. + StorageRawPtr original_storage; }; class ReadFromDummy final : public SourceStepWithFilter { public: - explicit ReadFromDummy(const StorageDummy & storage_, - StorageSnapshotPtr storage_snapshot_, - Names column_names_); + explicit ReadFromDummy( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + const StorageDummy & storage_); const StorageDummy & getStorage() const { @@ -74,7 +85,6 @@ public: private: const StorageDummy & storage; - StorageSnapshotPtr storage_snapshot; Names column_names; }; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 7d674fea9ca..01de3e804dd 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1495,7 +1495,7 @@ std::optional StorageFileSource::tryGetNumRowsFromCache(const String & p return schema_cache.tryGetNumRows(key, get_last_mod_time); } -class ReadFromFile : public SourceStepWithFilter, WithContext +class ReadFromFile : public SourceStepWithFilter { public: std::string getName() const override { return "ReadFromFile"; } @@ -1503,14 +1503,17 @@ public: void applyFilters() override; ReadFromFile( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, std::shared_ptr storage_, ReadFromFormatInfo info_, const bool need_only_count_, - const ContextPtr & context_, size_t max_block_size_, size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}), WithContext(context_) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) , storage(std::move(storage_)) , info(std::move(info_)) , need_only_count(need_only_count_) @@ -1583,11 +1586,14 @@ void StorageFile::read( && context->getSettingsRef().optimize_count_from_files; auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + context, read_from_format_info.source_header, std::move(this_ptr), std::move(read_from_format_info), need_only_count, - context, max_block_size, num_streams); @@ -1604,7 +1610,7 @@ void ReadFromFile::createIterator(const ActionsDAG::Node * predicate) storage->archive_info, predicate, storage->virtual_columns, - getContext(), + context, storage->distributed_processing); } diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 02d81eda59a..cf23d17fc1d 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -152,13 +152,14 @@ void StorageMemory::read( QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /*query_info*/, - ContextPtr /*context*/, + SelectQueryInfo & query_info, + ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, size_t num_streams) { - query_plan.addStep(std::make_unique(column_names, shared_from_this(), storage_snapshot, num_streams, delay_read_for_global_subqueries)); + query_plan.addStep(std::make_unique( + column_names, query_info, storage_snapshot, context, shared_from_this(), num_streams, delay_read_for_global_subqueries)); } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 1b0623d2697..285fb39704f 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -212,6 +212,7 @@ bool StorageMerge::tableSupportsPrewhere() const /// If new table that matches regexp for current storage and doesn't support PREWHERE /// will appear after this check and before calling "read" method, the optimized query may fail. /// Since it's quite rare case, we just ignore this possibility. + /// TODO: Store tables inside StorageSnapshot /// /// NOTE: Type can be different, and in this case, PREWHERE cannot be /// applied for those columns, but there a separate method to return @@ -219,11 +220,6 @@ bool StorageMerge::tableSupportsPrewhere() const return getFirstTable([](const auto & table) { return !table->canMoveConditionsToPrewhere(); }) == nullptr; } -bool StorageMerge::canMoveConditionsToPrewhere() const -{ - return tableSupportsPrewhere(); -} - std::optional StorageMerge::supportedPrewhereColumns() const { bool supports_prewhere = true; @@ -320,49 +316,40 @@ void StorageMerge::read( const size_t max_block_size, size_t num_streams) { - /** Just in case, turn off optimization "transfer to PREWHERE", - * since there is no certainty that it works when one of table is MergeTree and other is not. - */ - auto modified_context = Context::createCopy(local_context); - modified_context->setSetting("optimize_move_to_prewhere", false); - query_plan.addInterpreterContext(modified_context); - /// What will be result structure depending on query processed stage in source tables? Block common_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, processed_stage); auto step = std::make_unique( - common_header, column_names, + query_info, + storage_snapshot, + local_context, + common_header, max_block_size, num_streams, shared_from_this(), - storage_snapshot, - query_info, - std::move(modified_context), processed_stage); query_plan.addStep(std::move(step)); } ReadFromMerge::ReadFromMerge( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block common_header_, - Names all_column_names_, size_t max_block_size, size_t num_streams, StoragePtr storage, - StorageSnapshotPtr storage_snapshot, - const SelectQueryInfo & query_info_, - ContextMutablePtr context_, QueryProcessingStage::Enum processed_stage) - : SourceStepWithFilter(DataStream{.header = common_header_}, query_info_.prewhere_info) + : SourceStepWithFilter(DataStream{.header = common_header_}, column_names_, query_info_, storage_snapshot_, context_) , required_max_block_size(max_block_size) , requested_num_streams(num_streams) , common_header(std::move(common_header_)) - , all_column_names(std::move(all_column_names_)) + , all_column_names(column_names_) , storage_merge(std::move(storage)) - , merge_storage_snapshot(std::move(storage_snapshot)) - , query_info(query_info_) - , context(std::move(context_)) + , merge_storage_snapshot(storage_snapshot) , common_processed_stage(processed_stage) { } @@ -407,8 +394,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu common_header, child_plan.table_aliases, child_plan.row_policy_data_opt, - table, - context); + table); if (source_pipeline && source_pipeline->initialized()) { @@ -506,6 +492,8 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ query_info_.input_order_info = input_sorting_info; } + /// Settings will be modified when planning children tables. + auto modified_context = Context::createCopy(context); for (const auto & table : selected_tables) { size_t current_need_streams = tables_count >= num_streams ? 1 : (num_streams / tables_count); @@ -545,7 +533,8 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ row_policy_data_opt->extendNames(real_column_names); } - auto modified_query_info = getModifiedQueryInfo(context, table, nested_storage_snaphsot, real_column_names, column_names_as_aliases, aliases); + auto modified_query_info + = getModifiedQueryInfo(context, table, nested_storage_snaphsot, real_column_names, column_names_as_aliases, aliases); if (!context->getSettingsRef().allow_experimental_analyzer) { @@ -612,7 +601,7 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ table, column_names_as_aliases.empty() ? std::move(real_column_names) : std::move(column_names_as_aliases), row_policy_data_opt, - context, + modified_context, current_streams); } @@ -819,7 +808,7 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin( SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_context, const StorageWithLockAndName & storage_with_lock_and_name, - const StorageSnapshotPtr & storage_snapshot, + const StorageSnapshotPtr & storage_snapshot_, Names required_column_names, Names & column_names_as_aliases, Aliases & aliases) const @@ -831,7 +820,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_ if (modified_query_info.table_expression) { - auto replacement_table_expression = std::make_shared(storage, storage_lock, storage_snapshot); + auto replacement_table_expression = std::make_shared(storage, storage_lock, storage_snapshot_); if (query_info.table_expression_modifiers) replacement_table_expression->setTableExpressionModifiers(*query_info.table_expression_modifiers); @@ -840,26 +829,26 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_ modified_query_info.planner_context->getOrCreateTableExpressionData(replacement_table_expression); auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); - if (storage_snapshot->storage.supportsSubcolumns()) + if (storage_snapshot_->storage.supportsSubcolumns()) get_column_options.withSubcolumns(); std::unordered_map column_name_to_node; - if (!storage_snapshot->tryGetColumn(get_column_options, "_table")) + if (!storage_snapshot_->tryGetColumn(get_column_options, "_table")) { auto table_name_node = std::make_shared(current_storage_id.table_name); table_name_node->setAlias("_table"); column_name_to_node.emplace("_table", table_name_node); } - if (!storage_snapshot->tryGetColumn(get_column_options, "_database")) + if (!storage_snapshot_->tryGetColumn(get_column_options, "_database")) { auto database_name_node = std::make_shared(current_storage_id.database_name); database_name_node->setAlias("_database"); column_name_to_node.emplace("_database", database_name_node); } - auto storage_columns = storage_snapshot->metadata->getColumns(); + auto storage_columns = storage_snapshot_->metadata->getColumns(); bool with_aliases = /* common_processed_stage == QueryProcessingStage::FetchColumns && */ !storage_columns.getAliases().empty(); if (with_aliases) @@ -903,7 +892,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_ } column_names_as_aliases = filter_actions_dag->getRequiredColumnsNames(); if (column_names_as_aliases.empty()) - column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); + column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_snapshot_->metadata->getColumns().getAllPhysical()).name); } if (!column_name_to_node.empty()) @@ -956,14 +945,13 @@ bool recursivelyApplyToReadingSteps(QueryPlan::Node * node, const std::function< QueryPipelineBuilderPtr ReadFromMerge::createSources( QueryPlan & plan, - const StorageSnapshotPtr & storage_snapshot, + const StorageSnapshotPtr & storage_snapshot_, SelectQueryInfo & modified_query_info, QueryProcessingStage::Enum processed_stage, const Block & header, const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, const StorageWithLockAndName & storage_with_lock, - ContextMutablePtr modified_context, bool concat_streams) const { if (!plan.isInitialized()) @@ -972,12 +960,12 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( QueryPipelineBuilderPtr builder; const auto & [database_name, storage, _, table_name] = storage_with_lock; - bool allow_experimental_analyzer = modified_context->getSettingsRef().allow_experimental_analyzer; + bool allow_experimental_analyzer = context->getSettingsRef().allow_experimental_analyzer; auto storage_stage - = storage->getQueryProcessingStage(modified_context, QueryProcessingStage::Complete, storage_snapshot, modified_query_info); + = storage->getQueryProcessingStage(context, QueryProcessingStage::Complete, storage_snapshot_, modified_query_info); builder = plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(modified_context), BuildQueryPipelineSettings::fromContext(modified_context)); + QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); if (processed_stage > storage_stage || (allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns)) { @@ -1010,7 +998,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); builder->addSimpleTransform([&](const Block & stream_header) { return std::make_shared(stream_header, adding_column_actions); }); @@ -1025,7 +1013,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); builder->addSimpleTransform([&](const Block & stream_header) { return std::make_shared(stream_header, adding_column_actions); }); @@ -1033,14 +1021,15 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( /// Subordinary tables could have different but convertible types, like numeric types of different width. /// We must return streams with structure equals to structure of Merge table. - convertAndFilterSourceStream(header, modified_query_info, storage_snapshot, aliases, row_policy_data_opt, modified_context, *builder, processed_stage); + convertAndFilterSourceStream( + header, modified_query_info, storage_snapshot_, aliases, row_policy_data_opt, context, *builder, processed_stage); } return builder; } QueryPlan ReadFromMerge::createPlanForTable( - const StorageSnapshotPtr & storage_snapshot, + const StorageSnapshotPtr & storage_snapshot_, SelectQueryInfo & modified_query_info, QueryProcessingStage::Enum processed_stage, UInt64 max_block_size, @@ -1065,7 +1054,7 @@ QueryPlan ReadFromMerge::createPlanForTable( auto storage_stage = storage->getQueryProcessingStage(modified_context, QueryProcessingStage::Complete, - storage_snapshot, + storage_snapshot_, modified_query_info); QueryPlan plan; @@ -1074,14 +1063,14 @@ QueryPlan ReadFromMerge::createPlanForTable( { /// If there are only virtual columns in query, you must request at least one other column. if (real_column_names.empty()) - real_column_names.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); + real_column_names.push_back(ExpressionActions::getSmallestColumn(storage_snapshot_->metadata->getColumns().getAllPhysical()).name); StorageView * view = dynamic_cast(storage.get()); if (!view || allow_experimental_analyzer) { storage->read(plan, real_column_names, - storage_snapshot, + storage_snapshot_, modified_query_info, modified_context, processed_stage, @@ -1111,10 +1100,8 @@ QueryPlan ReadFromMerge::createPlanForTable( if (row_policy_data_opt) { - if (auto * source_step_with_filter = dynamic_cast((plan.getRootNode()->step.get()))) - { + if (auto * source_step_with_filter = dynamic_cast((plan.getRootNode()->step.get()))) row_policy_data_opt->addStorageFilter(source_step_with_filter); - } } } else if (processed_stage > storage_stage || (allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns)) @@ -1386,7 +1373,7 @@ void ReadFromMerge::convertAndFilterSourceStream( const StorageSnapshotPtr & snapshot, const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, - ContextMutablePtr local_context, + ContextPtr local_context, QueryPipelineBuilder & builder, QueryProcessingStage::Enum processed_stage) { @@ -1527,6 +1514,18 @@ void ReadFromMerge::applyFilters() applyFilters(child_plan.plan); } +QueryPlanRawPtrs ReadFromMerge::getChildPlans() +{ + filterTablesAndCreateChildrenPlans(); + + QueryPlanRawPtrs plans; + for (auto & child_plan : *child_plans) + if (child_plan.plan.isInitialized()) + plans.push_back(&child_plan.plan); + + return plans; +} + IStorage::ColumnSizeByName StorageMerge::getColumnSizes() const { ColumnSizeByName column_sizes; diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index f5b6c3a7ca9..5e6fe95c189 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -49,11 +49,9 @@ public: bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } bool supportsSubcolumns() const override { return true; } - bool supportsPrewhere() const override { return true; } + bool supportsPrewhere() const override { return tableSupportsPrewhere(); } std::optional supportedPrewhereColumns() const override; - bool canMoveConditionsToPrewhere() const override; - QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; @@ -142,14 +140,14 @@ public: using DatabaseTablesIterators = std::vector; ReadFromMerge( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block common_header_, - Names all_column_names_, size_t max_block_size, size_t num_streams, StoragePtr storage, - StorageSnapshotPtr storage_snapshot, - const SelectQueryInfo & query_info_, - ContextMutablePtr context_, QueryProcessingStage::Enum processed_stage); void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; @@ -161,6 +159,8 @@ public: void applyFilters() override; + QueryPlanRawPtrs getChildPlans() override; + private: const size_t required_max_block_size; const size_t requested_num_streams; @@ -174,8 +174,6 @@ private: StoragePtr storage_merge; StorageSnapshotPtr merge_storage_snapshot; - SelectQueryInfo query_info; - ContextMutablePtr context; QueryProcessingStage::Enum common_processed_stage; InputOrderInfoPtr order_info; @@ -265,7 +263,6 @@ private: const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, const StorageWithLockAndName & storage_with_lock, - ContextMutablePtr modified_context, bool concat_streams = false) const; static void convertAndFilterSourceStream( @@ -274,7 +271,7 @@ private: const StorageSnapshotPtr & snapshot, const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, - ContextMutablePtr context, + ContextPtr context, QueryPipelineBuilder & builder, QueryProcessingStage::Enum processed_stage); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 2d8ef3df1c8..8d8a6df52c3 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -146,40 +146,36 @@ public: void applyFilters() override; ReadFromStorageS3Step( - Block sample_block, const Names & column_names_, - StorageSnapshotPtr storage_snapshot_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, StorageS3 & storage_, ReadFromFormatInfo read_from_format_info_, bool need_only_count_, - ContextPtr context_, size_t max_block_size_, size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) , column_names(column_names_) - , storage_snapshot(std::move(storage_snapshot_)) , storage(storage_) , read_from_format_info(std::move(read_from_format_info_)) , need_only_count(need_only_count_) - , local_context(std::move(context_)) , max_block_size(max_block_size_) , num_streams(num_streams_) { - query_configuration = storage.updateConfigurationAndGetCopy(local_context); + query_configuration = storage.updateConfigurationAndGetCopy(context); virtual_columns = storage.getVirtuals(); } private: Names column_names; - StorageSnapshotPtr storage_snapshot; StorageS3 & storage; ReadFromFormatInfo read_from_format_info; bool need_only_count; StorageS3::Configuration query_configuration; NamesAndTypesList virtual_columns; - ContextPtr local_context; - size_t max_block_size; size_t num_streams; @@ -1153,13 +1149,14 @@ void StorageS3::read( && local_context->getSettingsRef().optimize_count_from_files; auto reading = std::make_unique( - read_from_format_info.source_header, column_names, + query_info, storage_snapshot, + local_context, + read_from_format_info.source_header, *this, std::move(read_from_format_info), need_only_count, - local_context, max_block_size, num_streams); @@ -1182,8 +1179,8 @@ void ReadFromStorageS3Step::createIterator(const ActionsDAG::Node * predicate) return; iterator_wrapper = createFileIterator( - query_configuration, storage.distributed_processing, local_context, predicate, - virtual_columns, nullptr, local_context->getFileProgressCallback()); + query_configuration, storage.distributed_processing, context, predicate, + virtual_columns, nullptr, context->getFileProgressCallback()); } void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) @@ -1200,7 +1197,7 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, /// Disclosed glob iterator can underestimate the amount of keys in some cases. We will keep one stream for this particular case. num_streams = 1; - const size_t max_threads = local_context->getSettingsRef().max_threads; + const size_t max_threads = context->getSettingsRef().max_threads; const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul)); LOG_DEBUG(getLogger("StorageS3"), "Reading in {} streams, {} threads per stream", num_streams, max_parsing_threads); @@ -1212,7 +1209,7 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, read_from_format_info, query_configuration.format, storage.getName(), - local_context, + context, storage.format_settings, max_block_size, query_configuration.request_settings, @@ -1225,7 +1222,7 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, max_parsing_threads, need_only_count); - source->setKeyCondition(filter_nodes.nodes, local_context); + source->setKeyCondition(filter_nodes.nodes, context); pipes.emplace_back(std::move(source)); } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 608e44c3cd0..31822c3b68a 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -992,6 +992,10 @@ public: void applyFilters() override; ReadFromURL( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, std::shared_ptr storage_, std::vector * uri_options_, @@ -999,17 +1003,15 @@ public: const bool need_only_count_, std::vector> read_uri_params_, std::function read_post_data_callback_, - ContextPtr context_, size_t max_block_size_, size_t num_streams_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) , storage(std::move(storage_)) , uri_options(uri_options_) , info(std::move(info_)) , need_only_count(need_only_count_) , read_uri_params(std::move(read_uri_params_)) , read_post_data_callback(std::move(read_post_data_callback_)) - , context(std::move(context_)) , max_block_size(max_block_size_) , num_streams(num_streams_) , max_num_streams(num_streams_) @@ -1025,8 +1027,6 @@ private: std::vector> read_uri_params; std::function read_post_data_callback; - ContextPtr context; - size_t max_block_size; size_t num_streams; const size_t max_num_streams; @@ -1075,6 +1075,10 @@ void IStorageURLBase::read( auto this_ptr = std::static_pointer_cast(shared_from_this()); auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + local_context, read_from_format_info.source_header, std::move(this_ptr), nullptr, @@ -1082,7 +1086,6 @@ void IStorageURLBase::read( need_only_count, std::move(params), std::move(read_post_data_callback), - local_context, max_block_size, num_streams); @@ -1243,6 +1246,10 @@ void StorageURLWithFailover::read( auto this_ptr = std::static_pointer_cast(shared_from_this()); auto reading = std::make_unique( + column_names, + query_info, + storage_snapshot, + local_context, read_from_format_info.source_header, std::move(this_ptr), &uri_options, @@ -1250,7 +1257,6 @@ void StorageURLWithFailover::read( need_only_count, std::move(params), std::move(read_post_data_callback), - local_context, max_block_size, num_streams); diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index b100be7cdf4..c5d8b307368 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -34,7 +34,7 @@ void StorageSystemNumbers::read( size_t num_streams) { query_plan.addStep(std::make_unique( - column_names, shared_from_this(), storage_snapshot, query_info, std::move(context), max_block_size, num_streams)); + column_names, query_info, storage_snapshot, context, shared_from_this(), max_block_size, num_streams)); } } diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index 82a5fd4e33f..e6054235c89 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -1,6 +1,5 @@ #ifdef OS_LINUX /// Because of 'rt_tgsigqueueinfo' functions and RT signals. -#include #include #include @@ -274,15 +273,21 @@ bool isSignalBlocked(UInt64 tid, int signal) class StackTraceSource : public ISource { public: - StackTraceSource(const Names & column_names, Block header_, ASTPtr && query_, ActionsDAGPtr && filter_dag_, ContextPtr context_, UInt64 max_block_size_, LoggerPtr log_) + StackTraceSource( + const Names & column_names, + Block header_, + ActionsDAGPtr && filter_dag_, + ContextPtr context_, + UInt64 max_block_size_, + LoggerPtr log_) : ISource(header_) , context(context_) , header(std::move(header_)) - , query(std::move(query_)) , filter_dag(std::move(filter_dag_)) , predicate(filter_dag ? filter_dag->getOutputs().at(0) : nullptr) , max_block_size(max_block_size_) - , pipe_read_timeout_ms(static_cast(context->getSettingsRef().storage_system_stack_trace_pipe_read_timeout_ms.totalMilliseconds())) + , pipe_read_timeout_ms( + static_cast(context->getSettingsRef().storage_system_stack_trace_pipe_read_timeout_ms.totalMilliseconds())) , log(log_) , proc_it("/proc/self/task") /// It shouldn't be possible to do concurrent reads from this table. @@ -417,7 +422,6 @@ protected: private: ContextPtr context; Block header; - const ASTPtr query; const ActionsDAGPtr filter_dag; const ActionsDAG::Node * predicate; @@ -467,7 +471,6 @@ public: Pipe pipe(std::make_shared( column_names, getOutputStream().header, - std::move(query), std::move(filter_actions_dag), context, max_block_size, @@ -477,15 +480,14 @@ public: ReadFromSystemStackTrace( const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, - ASTPtr && query_, - ContextPtr context_, size_t max_block_size_, LoggerPtr log_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) , column_names(column_names_) - , query(query_) - , context(std::move(context_)) , max_block_size(max_block_size_) , log(log_) { @@ -493,8 +495,6 @@ public: private: Names column_names; - ASTPtr query; - ContextPtr context; size_t max_block_size; LoggerPtr log; }; @@ -548,12 +548,7 @@ void StorageSystemStackTrace::read( Block sample_block = storage_snapshot->metadata->getSampleBlock(); auto reading = std::make_unique( - column_names, - sample_block, - query_info.query->clone(), - context, - max_block_size, - log); + column_names, query_info, storage_snapshot, context, sample_block, max_block_size, log); query_plan.addStep(std::move(reading)); } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 47c4a03a595..47108fff348 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -696,12 +696,19 @@ public: void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; ReadFromSystemTables( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, Block sample_block, - ContextPtr context_, std::vector columns_mask_, size_t max_block_size_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) - , context(std::move(context_)) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) , columns_mask(std::move(columns_mask_)) , max_block_size(max_block_size_) { @@ -710,7 +717,6 @@ public: void applyFilters() override; private: - ContextPtr context; std::vector columns_mask; size_t max_block_size; @@ -722,7 +728,7 @@ void StorageSystemTables::read( QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /*query_info*/, + SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, @@ -734,10 +740,7 @@ void StorageSystemTables::read( auto [columns_mask, res_block] = getQueriedColumnsMaskAndHeader(sample_block, column_names); auto reading = std::make_unique( - std::move(res_block), - context, - std::move(columns_mask), - max_block_size); + column_names, query_info, storage_snapshot, context, std::move(res_block), std::move(columns_mask), max_block_size); query_plan.addStep(std::move(reading)); } diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index abf93bf1ac0..a112c405d37 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -32,7 +32,6 @@ #include #include #include -#include namespace DB @@ -180,7 +179,13 @@ using Paths = std::deque>; class ReadFromSystemZooKeeper final : public SourceStepWithFilter { public: - ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info_, UInt64 max_block_size_, ContextPtr context_); + ReadFromSystemZooKeeper( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + const Block & header, + UInt64 max_block_size_); String getName() const override { return "ReadFromSystemZooKeeper"; } @@ -191,7 +196,6 @@ public: private: std::shared_ptr storage_limits; const UInt64 max_block_size; - ContextPtr context; Paths paths; }; @@ -235,7 +239,7 @@ StorageSystemZooKeeper::StorageSystemZooKeeper(const StorageID & table_id_) void StorageSystemZooKeeper::read( QueryPlan & query_plan, - const Names & /*column_names*/, + const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr context, @@ -244,7 +248,13 @@ void StorageSystemZooKeeper::read( size_t /*num_streams*/) { auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); - auto read_step = std::make_unique(header, query_info, max_block_size, context); + auto read_step = std::make_unique( + column_names, + query_info, + storage_snapshot, + context, + header, + max_block_size); query_plan.addStep(std::move(read_step)); } @@ -646,11 +656,21 @@ Chunk SystemZooKeeperSource::generate() return Chunk(std::move(res_columns), row_count); } -ReadFromSystemZooKeeper::ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info, UInt64 max_block_size_, ContextPtr context_) - : SourceStepWithFilter({.header = header}) +ReadFromSystemZooKeeper::ReadFromSystemZooKeeper( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + const Block & header, + UInt64 max_block_size_) + : SourceStepWithFilter( + {.header = header}, + column_names_, + query_info_, + storage_snapshot_, + context_) , storage_limits(query_info.storage_limits) , max_block_size(max_block_size_) - , context(std::move(context_)) { } diff --git a/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.reference b/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.reference index 30b5ae9c648..25d8f62f5dd 100644 --- a/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.reference +++ b/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.reference @@ -1,12 +1,8 @@ -SELECT - x, - y -FROM prewhere_move -PREWHERE x > 100 -SELECT - x1, - x2, - x3, - x4 -FROM prewhere_move -PREWHERE (x1 > 100) AND (x2 > 100) AND (x3 > 100) AND (x4 > 100) + Prewhere info + Prewhere filter + Prewhere filter column: greater(x, 100) (removed) + Filter + Filter column: and(greater(x1, 100), greater(x2, 100), greater(x3, 100), greater(x4, \'100\')) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(x1, 100) diff --git a/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.sql b/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.sql index bd3e651e0dc..9a4f8d1f734 100644 --- a/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.sql +++ b/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-merge-tree-settings + SET optimize_move_to_prewhere = 1; SET convert_query_to_cnf = 0; @@ -5,14 +7,15 @@ DROP TABLE IF EXISTS prewhere_move; CREATE TABLE prewhere_move (x Int, y String) ENGINE = MergeTree ORDER BY tuple(); INSERT INTO prewhere_move SELECT number, toString(number) FROM numbers(1000); -EXPLAIN SYNTAX SELECT * FROM prewhere_move WHERE x > 100; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move WHERE x > 100) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter%'; DROP TABLE prewhere_move; -CREATE TABLE prewhere_move (x1 Int, x2 Int, x3 Int, x4 Int) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO prewhere_move SELECT number, number, number, number FROM numbers(1000); +CREATE TABLE prewhere_move (x1 Int, x2 Int, x3 Int, x4 String CODEC(NONE)) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO prewhere_move SELECT number, number, number, repeat('a', 1024) FROM numbers(1000); -- Not all conditions moved -EXPLAIN SYNTAX SELECT * FROM prewhere_move WHERE x1 > 100 AND x2 > 100 AND x3 > 100 AND x4 > 100; +SET move_all_conditions_to_prewhere = 0; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_String', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move WHERE x1 > 100 AND x2 > 100 AND x3 > 100 AND x4 > '100') WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter%'; DROP TABLE prewhere_move; diff --git a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.reference b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.reference index 98c76cc2a50..26a0e97729c 100644 --- a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.reference +++ b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.reference @@ -1,114 +1,38 @@ optimize_move_to_prewhere_if_final = 1 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -PREWHERE x > 100 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -PREWHERE x > 100 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -PREWHERE y > 100 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -PREWHERE y > 100 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -PREWHERE (x + y) > 100 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -PREWHERE (x + y) > 100 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -WHERE z > 400 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -PREWHERE y > 100 -WHERE (y > 100) AND (z > 400) - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -PREWHERE x > 50 -WHERE (x > 50) AND (z > 400) - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -PREWHERE (x + y) > 50 -WHERE ((x + y) > 50) AND (z > 400) - + Prewhere info + Prewhere filter + Prewhere filter column: greater(x, 100) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(x, 100) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(y, 100) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(y, 100) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(plus(x, y), 100) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(plus(x, y), 100) (removed) + Filter + Filter column: and(greater(y, 100), greater(z, 400)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(y, 100) + Filter + Filter column: and(greater(x, 50), greater(z, 400)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(x, 50) + Filter + Filter column: and(greater(plus(x, y), 50), greater(z, 400)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: greater(plus(x, y), 50) optimize_move_to_prewhere_if_final = 0 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -PREWHERE y > 100 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -WHERE y > 100 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -WHERE z > 400 - -SELECT - x, - y, - z -FROM prewhere_move_select_final -FINAL -WHERE (y > 100) AND (z > 400) + Prewhere info + Prewhere filter + Prewhere filter column: greater(y, 100) (removed) diff --git a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql index ede15738c5b..d4830e9e357 100644 --- a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql +++ b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql @@ -10,42 +10,27 @@ select 'optimize_move_to_prewhere_if_final = 1'; SET optimize_move_to_prewhere_if_final = 1; -- order key can be pushed down with final -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final WHERE x > 100; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE x > 100; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final WHERE y > 100; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final WHERE x + y > 100; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE x + y > 100; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final WHERE x > 100) WHERE explain LIKE '%Prewhere%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE x > 100) WHERE explain LIKE '%Prewhere%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final WHERE y > 100) WHERE explain LIKE '%Prewhere%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100) WHERE explain LIKE '%Prewhere%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final WHERE x + y > 100) WHERE explain LIKE '%Prewhere%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE x + y > 100) WHERE explain LIKE '%Prewhere%'; -- can not be pushed down -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE z > 400; +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE z > 400) WHERE explain LIKE '%Prewhere filter'; -- only condition with x/y can be pushed down -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100 and z > 400; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE x > 50 and z > 400; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE x + y > 50 and z > 400; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100 and z > 400) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE x > 50 and z > 400) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE x + y > 50 and z > 400) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter%'; -select ''; select 'optimize_move_to_prewhere_if_final = 0'; SET optimize_move_to_prewhere_if_final = 0; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final WHERE y > 100; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE z > 400; -select ''; -EXPLAIN SYNTAX SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100 and z > 400; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final WHERE y > 100) WHERE explain LIKE '%Prewhere%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100) WHERE explain LIKE '%Prewhere%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE z > 400) WHERE explain LIKE '%Prewhere%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_UInt16', '') FROM (EXPLAIN actions=1 SELECT * FROM prewhere_move_select_final FINAL WHERE y > 100 and z > 400) WHERE explain LIKE '%Prewhere%'; DROP TABLE prewhere_move_select_final; diff --git a/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.reference b/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.reference index 686a864f222..ccd51cba776 100644 --- a/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.reference +++ b/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.reference @@ -1,12 +1,19 @@ 1 Wide 2 Compact 35 -SELECT count() -FROM t_move_to_prewhere -PREWHERE a AND b AND c AND (NOT ignore(fat_string)) + Filter + Filter column: and(a, b, c, not(ignore(fat_string))) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(a, b, c) (removed) 1 Compact 2 Compact 35 SELECT count() FROM t_move_to_prewhere -PREWHERE a AND b AND c AND (NOT ignore(fat_string)) +WHERE a AND b AND c AND (NOT ignore(fat_string)) + Filter + Filter column: and(a, b, c, not(ignore(fat_string))) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: a diff --git a/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.sql b/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.sql index 2987c541aef..6ad804ac1b3 100644 --- a/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.sql +++ b/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.sql @@ -2,6 +2,7 @@ SET optimize_move_to_prewhere = 1; SET convert_query_to_cnf = 0; +SET move_all_conditions_to_prewhere = 0; DROP TABLE IF EXISTS t_move_to_prewhere; @@ -17,7 +18,7 @@ WHERE table = 't_move_to_prewhere' AND database = currentDatabase() ORDER BY partition; SELECT count() FROM t_move_to_prewhere WHERE a AND b AND c AND NOT ignore(fat_string); -EXPLAIN SYNTAX SELECT count() FROM t_move_to_prewhere WHERE a AND b AND c AND NOT ignore(fat_string); +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_move_to_prewhere WHERE a AND b AND c AND NOT ignore(fat_string)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter%'; DROP TABLE IF EXISTS t_move_to_prewhere; @@ -38,5 +39,6 @@ ORDER BY partition; SELECT count() FROM t_move_to_prewhere WHERE a AND b AND c AND NOT ignore(fat_string); EXPLAIN SYNTAX SELECT count() FROM t_move_to_prewhere WHERE a AND b AND c AND NOT ignore(fat_string); +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_move_to_prewhere WHERE a AND b AND c AND NOT ignore(fat_string)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter%'; DROP TABLE IF EXISTS t_move_to_prewhere; diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference index 74ba452d783..86a36a9392c 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference @@ -1,12 +1,15 @@ -SELECT count() -FROM t_02156_merge1 -PREWHERE notEmpty(v) AND (k = 3) + Prewhere info + Prewhere filter + Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) 2 -SELECT count() -FROM t_02156_merge2 -WHERE (k = 3) AND notEmpty(v) + Filter column: and(equals(k, 3), notEmpty(v)) (removed) 2 -SELECT count() -FROM t_02156_merge3 -WHERE (k = 3) AND notEmpty(v) + Filter column: and(equals(k, 3), notEmpty(v)) (removed) + Filter column: and(equals(k, 3), notEmpty(v)) (removed) 2 diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql index 83d88a68d9b..f443d9569ce 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql @@ -23,13 +23,13 @@ INSERT INTO t_02156_mt1 SELECT number, toString(number) FROM numbers(10000); INSERT INTO t_02156_mt2 SELECT number, toString(number) FROM numbers(10000); INSERT INTO t_02156_log SELECT number, toString(number) FROM numbers(10000); -EXPLAIN SYNTAX SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v); +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v); -EXPLAIN SYNTAX SELECT count() FROM t_02156_merge2 WHERE k = 3 AND notEmpty(v); +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge2 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; SELECT count() FROM t_02156_merge2 WHERE k = 3 AND notEmpty(v); -EXPLAIN SYNTAX SELECT count() FROM t_02156_merge3 WHERE k = 3 AND notEmpty(v); +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge3 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; SELECT count() FROM t_02156_merge3 WHERE k = 3 AND notEmpty(v); DROP TABLE IF EXISTS t_02156_mt1; diff --git a/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere.sql b/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere.sql index 1e969afac33..805186edcbd 100644 --- a/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere.sql +++ b/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS test_02559; -CREATE TABLE test_02559 (id1 UInt64, id2 UInt64) ENGINE=MergeTree ORDER BY id1; +CREATE TABLE test_02559 (id1 UInt64, id2 UInt64) ENGINE=MergeTree ORDER BY id1 SETTINGS min_bytes_for_wide_part = 0; INSERT INTO test_02559 SELECT number, number FROM numbers(10); diff --git a/tests/queries/0_stateless/02809_prewhere_and_in.reference b/tests/queries/0_stateless/02809_prewhere_and_in.reference index 3080ae862bb..54ea18b3eab 100644 --- a/tests/queries/0_stateless/02809_prewhere_and_in.reference +++ b/tests/queries/0_stateless/02809_prewhere_and_in.reference @@ -1,8 +1,8 @@ -PREWHERE a IN -PREWHERE a IN -PREWHERE a IN -PREWHERE a IN -PREWHERE b NOT IN -PREWHERE b NOT IN -PREWHERE b NOT IN -PREWHERE b NOT IN + Prewhere filter + Prewhere filter + Prewhere filter + Prewhere filter + Prewhere filter + Prewhere filter + Prewhere filter + Prewhere filter diff --git a/tests/queries/0_stateless/02809_prewhere_and_in.sql b/tests/queries/0_stateless/02809_prewhere_and_in.sql index 345577d6c7c..448f9512cf6 100644 --- a/tests/queries/0_stateless/02809_prewhere_and_in.sql +++ b/tests/queries/0_stateless/02809_prewhere_and_in.sql @@ -16,40 +16,16 @@ AS SELECT * FROM numbers(10); SET optimize_move_to_prewhere=1; -- Queries with 'IN' -SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX - SELECT * FROM t_02809 WHERE a IN (SELECT * FROM system.one) -) WHERE explain LIKE '%WHERE%'; - -SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX - SELECT * FROM t_02809 WHERE a IN (1,2,3) -) WHERE explain LIKE '%WHERE%'; - -SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX - SELECT * FROM t_02809 WHERE a IN t_02809_set -) WHERE explain LIKE '%WHERE%'; - -SELECT substring(explain, 1, 13) FROM (EXPLAIN SYNTAX - SELECT * FROM t_02809 WHERE a IN t_02809_aux -) WHERE explain LIKE '%WHERE%'; - +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM t_02809 WHERE a IN (SELECT * FROM system.one)) WHERE explain LIKE '%Prewhere filter'; +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM t_02809 WHERE a IN (1,2,3)) WHERE explain LIKE '%Prewhere filter'; +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM t_02809 WHERE a IN t_02809_set) WHERE explain LIKE '%Prewhere filter'; +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM t_02809 WHERE a IN t_02809_aux) WHERE explain LIKE '%Prewhere filter'; -- Queries with 'NOT IN' -SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX - SELECT * FROM t_02809 WHERE b NOT IN (SELECT * FROM system.one) -) WHERE explain LIKE '%WHERE%'; - -SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX - SELECT * FROM t_02809 WHERE b NOT IN (1,2,3) -) WHERE explain LIKE '%WHERE%'; - -SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX - SELECT * FROM t_02809 WHERE b NOT IN t_02809_set -) WHERE explain LIKE '%WHERE%'; - -SELECT substring(explain, 1, 17) FROM (EXPLAIN SYNTAX - SELECT * FROM t_02809 WHERE b NOT IN t_02809_aux -) WHERE explain LIKE '%WHERE%'; - +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM t_02809 WHERE a NOT IN (SELECT * FROM system.one)) WHERE explain LIKE '%Prewhere filter'; +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM t_02809 WHERE a NOT IN (1,2,3)) WHERE explain LIKE '%Prewhere filter'; +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM t_02809 WHERE a NOT IN t_02809_set) WHERE explain LIKE '%Prewhere filter'; +SELECT * FROM (EXPLAIN actions=1 SELECT * FROM t_02809 WHERE a NOT IN t_02809_aux) WHERE explain LIKE '%Prewhere filter'; DROP TABLE t_02809; DROP TABLE t_02809_set; diff --git a/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference index 6e04d969e67..b91a4dd2f68 100644 --- a/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference +++ b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference @@ -1,20 +1,15 @@ -SELECT count() -FROM t_02848_mt1 -PREWHERE notEmpty(v) AND (k = 3) + Prewhere filter + Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed) 1 -SELECT count() -FROM t_02848_mt2 -PREWHERE (d LIKE \'%es%\') AND (c < 20) AND (b = \'3\') AND (a = 3) + Prewhere filter + Prewhere filter column: and(like(d, \'%es%\'), less(c, 20), equals(b, \'3\'), equals(a, 3)) (removed) 1 -SELECT count() -FROM t_02848_mt2 -PREWHERE (d LIKE \'%es%\') AND (c < 20) AND (c > 0) AND (a = 3) + Prewhere filter + Prewhere filter column: and(like(d, \'%es%\'), less(c, 20), greater(c, 0), equals(a, 3)) (removed) 1 -SELECT count() -FROM t_02848_mt2 -PREWHERE (d LIKE \'%es%\') AND (b = \'3\') AND (c < 20) + Prewhere filter + Prewhere filter column: and(like(d, \'%es%\'), equals(b, \'3\'), less(c, 20)) (removed) 1 -SELECT count() -FROM t_02848_mt2 -PREWHERE (d LIKE \'%es%\') AND (b = \'3\') AND (a = 3) + Prewhere filter + Prewhere filter column: and(like(d, \'%es%\'), equals(b, \'3\'), equals(a, 3)) (removed) 1 diff --git a/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.sql b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.sql index bc9d7e5664e..f863d765798 100644 --- a/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.sql +++ b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.sql @@ -8,7 +8,7 @@ CREATE TABLE t_02848_mt1 (k UInt32, v String) ENGINE = MergeTree ORDER BY k SETT INSERT INTO t_02848_mt1 SELECT number, toString(number) FROM numbers(100); -EXPLAIN SYNTAX SELECT count() FROM t_02848_mt1 WHERE k = 3 AND notEmpty(v); +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02848_mt1 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere filter%' OR explain LIKE '%Filter%'; SELECT count() FROM t_02848_mt1 WHERE k = 3 AND notEmpty(v); CREATE TABLE t_02848_mt2 (a UInt32, b String, c Int32, d String) ENGINE = MergeTree ORDER BY (a,b,c) SETTINGS min_bytes_for_wide_part=0; @@ -18,16 +18,16 @@ INSERT INTO t_02848_mt2 SELECT number, toString(number), number, 'aaaabbbbccccdd -- the estimated column sizes are: {a: 428, b: 318, c: 428, d: 73} -- it's not correct but let's fix it in the future. -EXPLAIN SYNTAX SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND c < 20 AND d like '%es%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_String', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND c < 20 AND d like '%es%') WHERE explain LIKE '%Prewhere filter%' OR explain LIKE '%Filter%'; SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND c < 20 AND d like '%es%'; -EXPLAIN SYNTAX SELECT count() FROM t_02848_mt2 WHERE a = 3 AND c < 20 AND c > 0 AND d like '%es%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_String', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02848_mt2 WHERE a = 3 AND c < 20 AND c > 0 AND d like '%es%') WHERE explain LIKE '%Prewhere filter%' OR explain LIKE '%Filter%'; SELECT count() FROM t_02848_mt2 WHERE a = 3 AND c < 20 AND c > 0 AND d like '%es%'; -EXPLAIN SYNTAX SELECT count() FROM t_02848_mt2 WHERE b == '3' AND c < 20 AND d like '%es%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_String', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02848_mt2 WHERE b == '3' AND c < 20 AND d like '%es%') WHERE explain LIKE '%Prewhere filter%' OR explain LIKE '%Filter%'; SELECT count() FROM t_02848_mt2 WHERE b == '3' AND c < 20 AND d like '%es%'; -EXPLAIN SYNTAX SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND d like '%es%'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8|_String', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND d like '%es%') WHERE explain LIKE '%Prewhere filter%' OR explain LIKE '%Filter%'; SELECT count() FROM t_02848_mt2 WHERE a = 3 AND b == '3' AND d like '%es%'; DROP TABLE t_02848_mt1;