From 483bd165e7ab7ad5b050525bd1dc343eed7ea202 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 23 Apr 2021 01:04:12 +0800 Subject: [PATCH] Check if pipeline is simple and add more comments --- src/Interpreters/InterpreterSelectQuery.h | 2 ++ src/Storages/MergeTree/MergeTreeData.cpp | 24 ++++++++++++++++--- .../queries/0_stateless/01710_projections.sql | 3 +++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index f29074f1f2f..2119c1cbc41 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -93,6 +93,8 @@ public: const SelectQueryExpressionAnalyzer * getQueryAnalyzer() const { return query_analyzer.get(); } + const ExpressionAnalysisResult & getAnalysisResult() const { return analysis_result; } + const Names & getRequiredColumns() const { return required_columns; } bool hasAggregation() const { return query_analyzer->hasAggregation(); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 77809e5f7a1..9ea14470db1 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3815,10 +3815,24 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( InterpreterSelectQuery select( query_ptr, query_context, SelectQueryOptions{QueryProcessingStage::WithMergeableState}.ignoreProjections().ignoreAlias()); + const auto & analysis_result = select.getAnalysisResult(); + /// If first staging query pipeline is more complex than Aggregating - Expression - Filter - ReadFromStorage, return false + if (analysis_result.join != nullptr || analysis_result.array_join != nullptr || analysis_result.before_aggregation == nullptr) + return false; + auto query_block = select.getSampleBlock(); auto required_query = select.getQuery(); - // The ownership of ProjectionDescription is hold in metadata_snapshot which lives with InterpreterSelect + /// Check if all needed columns can be provided by some aggregate projection. Here we also try + /// to find expression matches. For example, suppose an aggregate projection contains a column + /// named sum(x) and the given query also has an expression called sum(x), it's a match. This is + /// why we need to ignore all aliases during projection creation and the above query planning. + /// It's also worth noting that, sqrt(sum(x)) will also work because we can treat sum(x) as a + /// required column. + /// TODO we can use ActionsDAG here to make proper check + + /// The ownership of ProjectionDescription is hold in metadata_snapshot which lives along with + /// InterpreterSelect, thus we can store the raw pointer here. std::vector> candidates; ParserFunction parse_function; for (const auto & projection : metadata_snapshot->projections) @@ -3845,17 +3859,20 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( /// Check if the missing columns can be produced by key columns in projection. for (const auto & expr_name : maybe_dimension_column_exprs) { + /// XXX We need AST out from string names. Have to resort to the parser here. Tokens tokens_number(expr_name.data(), expr_name.data() + expr_name.size()); IParser::Pos pos(tokens_number, settings.max_parser_depth); Expected expected; ASTPtr ast; - // It should be a function call. + + /// It should be a function call, or else we would match them already. if (!parse_function.parse(pos, ast, expected)) { covered = false; break; } - // It should be a function call that only requires unused key columns. + // It should be a function call that only requires unused key columns, or else some key + // column might be transformed in different ways, which is not injective. if (!key_actions.add(ast, expr_name, key_block)) { covered = false; @@ -3890,6 +3907,7 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( } } + // Let's select the best aggregate projection to execute the query. if (!candidates.empty()) { size_t min_key_size = std::numeric_limits::max(); diff --git a/tests/queries/0_stateless/01710_projections.sql b/tests/queries/0_stateless/01710_projections.sql index c94ac58a146..5740bd05f5c 100644 --- a/tests/queries/0_stateless/01710_projections.sql +++ b/tests/queries/0_stateless/01710_projections.sql @@ -6,6 +6,9 @@ insert into projection_test with rowNumberInAllBlocks() as id select toDateTime( set allow_experimental_projection_optimization = 1, force_optimize_projection = 1; +select * from projection_test; -- { serverError 584 } +select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) from projection_test join (select 1) x using (1) where domain = '1' group by dt_m order by dt_m; -- { serverError 584 } + select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) / count(), avg((kbytes * 8) / duration) from projection_test where domain = '1' group by dt_m order by dt_m; select toStartOfMinute(datetime) dt_m, count(), sum(block_count) / sum(duration), avg(block_count / duration) from projection_test group by dt_m order by dt_m;