Check if pipeline is simple and add more comments

2024-11-21 15:12:02 +00:00 · 2021-04-23 01:04:12 +08:00 · 2021-04-23 01:04:12 +08:00 · 483bd165e7
commit 483bd165e7
parent 427f444ba4
3 changed files with 26 additions and 3 deletions
--- a/src/Interpreters/InterpreterSelectQuery.h
+++ b/src/Interpreters/InterpreterSelectQuery.h
@ -93,6 +93,8 @@ public:

    const SelectQueryExpressionAnalyzer * getQueryAnalyzer() const { return query_analyzer.get(); }

+    const ExpressionAnalysisResult & getAnalysisResult() const { return analysis_result; }
+
    const Names & getRequiredColumns() const { return required_columns; }

    bool hasAggregation() const { return query_analyzer->hasAggregation(); }
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@ -3815,10 +3815,24 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection(

    InterpreterSelectQuery select(
        query_ptr, query_context, SelectQueryOptions{QueryProcessingStage::WithMergeableState}.ignoreProjections().ignoreAlias());
+    const auto & analysis_result = select.getAnalysisResult();
+    /// If first staging query pipeline is more complex than Aggregating - Expression - Filter - ReadFromStorage, return false
+    if (analysis_result.join != nullptr || analysis_result.array_join != nullptr || analysis_result.before_aggregation == nullptr)
+        return false;
+
    auto query_block = select.getSampleBlock();
    auto required_query = select.getQuery();

-    // The ownership of ProjectionDescription is hold in metadata_snapshot which lives with InterpreterSelect
+    /// Check if all needed columns can be provided by some aggregate projection. Here we also try
+    /// to find expression matches. For example, suppose an aggregate projection contains a column
+    /// named sum(x) and the given query also has an expression called sum(x), it's a match. This is
+    /// why we need to ignore all aliases during projection creation and the above query planning.
+    /// It's also worth noting that, sqrt(sum(x)) will also work because we can treat sum(x) as a
+    /// required column.
+    /// TODO we can use ActionsDAG here to make proper check
+
+    /// The ownership of ProjectionDescription is hold in metadata_snapshot which lives along with
+    /// InterpreterSelect, thus we can store the raw pointer here.
    std::vector<std::pair<const ProjectionDescription *, ProjectionKeyActions>> candidates;
    ParserFunction parse_function;
    for (const auto & projection : metadata_snapshot->projections)
@ -3845,17 +3859,20 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection(
        /// Check if the missing columns can be produced by key columns in projection.
        for (const auto & expr_name : maybe_dimension_column_exprs)
        {
+            /// XXX We need AST out from string names. Have to resort to the parser here.
            Tokens tokens_number(expr_name.data(), expr_name.data() + expr_name.size());
            IParser::Pos pos(tokens_number, settings.max_parser_depth);
            Expected expected;
            ASTPtr ast;
-            // It should be a function call.
+
+            /// It should be a function call, or else we would match them already.
            if (!parse_function.parse(pos, ast, expected))
            {
                covered = false;
                break;
            }
-            // It should be a function call that only requires unused key columns.
+            // It should be a function call that only requires unused key columns, or else some key
+            // column might be transformed in different ways, which is not injective.
            if (!key_actions.add(ast, expr_name, key_block))
            {
                covered = false;
@ -3890,6 +3907,7 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection(
        }
    }

+    // Let's select the best aggregate projection to execute the query.
    if (!candidates.empty())
    {
        size_t min_key_size = std::numeric_limits<size_t>::max();
--- a/tests/queries/0_stateless/01710_projections.sql
+++ b/tests/queries/0_stateless/01710_projections.sql
@ -6,6 +6,9 @@ insert into projection_test with rowNumberInAllBlocks() as id select toDateTime(

 set allow_experimental_projection_optimization = 1, force_optimize_projection = 1;

+select * from projection_test; -- { serverError 584 }
+select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) from projection_test join (select 1) x using (1) where domain = '1' group by dt_m order by dt_m; -- { serverError 584 }
+
 select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) / count(), avg((kbytes * 8) / duration) from projection_test where domain = '1' group by dt_m order by dt_m;

 select toStartOfMinute(datetime) dt_m, count(), sum(block_count) / sum(duration), avg(block_count / duration) from projection_test group by dt_m order by dt_m;