apply review advices

2024-11-24 08:32:02 +00:00 · 2020-12-15 16:35:19 +08:00 · 2020-12-15 16:35:19 +08:00 · d72d8ee7d8
commit d72d8ee7d8
parent 041f5c8843
6 changed files with 71 additions and 17 deletions
--- a/src/Interpreters/ColumnAliasesVisitor.cpp
+++ b/src/Interpreters/ColumnAliasesVisitor.cpp
@ -33,7 +33,6 @@ bool ColumnAliasesMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)

 void ColumnAliasesMatcher::visit(ASTPtr & ast, Data & data)
 {
-    auto aa = queryToString(ast);
    // If it's select query, only replace filters.
    if (auto * query = ast->as<ASTSelectQuery>())
    {
@ -64,11 +63,14 @@ void ColumnAliasesMatcher::visit(ASTFunction & node, ASTPtr & /*ast*/, Data & da
    if (node.name == "lambda")
    {
        Names local_aliases;
-        for (const auto & name : RequiredSourceColumnsMatcher::extractNamesFromLambda(node))
+        auto names_from_lambda = RequiredSourceColumnsMatcher::extractNamesFromLambda(node);
+        for (const auto & name : names_from_lambda)
+        {
            if (data.private_aliases.insert(name).second)
            {
                local_aliases.push_back(name);
            }
+        }
        /// visit child with masked local aliases
        Visitor(data).visit(node.arguments->children[1]);
        for (const auto & name : local_aliases)
--- a/src/Interpreters/ColumnAliasesVisitor.h
+++ b/src/Interpreters/ColumnAliasesVisitor.h
@ -51,9 +51,14 @@ public:
    struct Data
    {
        const ColumnsDescription & columns;
+
+        /// forbidden_columns are from array join, we can't rewrite alias columns involved in array join.
+        /// Do not analyze joined columns.
+        /// They may have aliases and come to description as is.
        const NameSet & forbidden_columns;
        const Context & context;

+        /// private_aliases are from lambda, so these are local names.
        NameSet private_aliases;

        Data(const ColumnsDescription & columns_, const NameSet & forbidden_columns_, const Context & context_)
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@ -725,7 +725,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
                        result.analyzed_join->table_join);
    collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);

-    /// rewrite filters for select query, must after getArrayJoinedColumns
+    /// rewrite filters for select query, must goes after getArrayJoinedColumns
    if (settings.optimize_alias_column_prediction && result.metadata_snapshot)
    {
        replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.getArrayJoinSourceNameSet(), context);
--- a/src/Storages/ReadInOrderOptimizer.cpp
+++ b/src/Storages/ReadInOrderOptimizer.cpp
@ -123,6 +123,8 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr &
        bool ok;
        /// check if it's alias column
        /// currently we only support alias column without any function wrapper
+        /// ie: `order by aliased_column` can have this optimization, but `order by function(aliased_column)` can not.
+        /// This suits most cases.
        if (context.getSettingsRef().optimize_alias_column_prediction && aliase_columns.contains(required_sort_description[i].column_name))
        {
            auto column_expr = metadata_snapshot->getColumns().get(required_sort_description[i].column_name).default_desc.expression->clone();
--- a/tests/queries/0_stateless/01576_alias_column_rewrite.reference
+++ b/tests/queries/0_stateless/01576_alias_column_rewrite.reference
@ -21,14 +21,54 @@ array-join
 lambda
 1
 optimize_read_in_order
-2020-01-01
+Expression (Projection)
+  Limit (preliminary LIMIT)
+    MergingSorted (Merge sorted streams for ORDER BY)
+      MergeSorting (Merge sorted blocks for ORDER BY)
+        PartialSorting (Sort each block for ORDER BY)
+          Expression (Before ORDER BY and SELECT + Add table aliases)
+            SettingQuotaAndLimits (Set limits and quota after reading from storage)
+              ReadFromStorage (MergeTree)
+Expression (Projection)
+  Limit (preliminary LIMIT)
+    FinishSorting
+      Expression (Before ORDER BY and SELECT + Add table aliases)
+        SettingQuotaAndLimits (Set limits and quota after reading from storage)
+          Union
+            ReadFromStorage (MergeTree  with order)
+            ReadFromStorage (MergeTree  with order)
+            ReadFromStorage (MergeTree  with order)
+Expression (Projection)
+  Limit (preliminary LIMIT)
+    FinishSorting
+      Expression (Before ORDER BY and SELECT)
+        SettingQuotaAndLimits (Set limits and quota after reading from storage)
+          Union
+            ReadFromStorage (MergeTree  with order)
+            ReadFromStorage (MergeTree  with order)
+            ReadFromStorage (MergeTree  with order)
 optimize_aggregation_in_order
-2020-01-01	10
-2020-01-02	10
-2020-01-03	10
-2020-01-01	10
-2020-01-02	10
-2020-01-03	10
+Expression (Projection + Before ORDER BY and SELECT)
+  Aggregating
+    Expression (Before GROUP BY + Add table aliases)
+      SettingQuotaAndLimits (Set limits and quota after reading from storage)
+        ReadFromStorage (MergeTree)
+Expression (Projection + Before ORDER BY and SELECT)
+  Aggregating
+    Expression (Before GROUP BY + Add table aliases)
+      SettingQuotaAndLimits (Set limits and quota after reading from storage)
+        Union
+          ReadFromStorage (MergeTree  with order)
+          ReadFromStorage (MergeTree  with order)
+          ReadFromStorage (MergeTree  with order)
+Expression (Projection + Before ORDER BY and SELECT)
+  Aggregating
+    Expression (Before GROUP BY)
+      SettingQuotaAndLimits (Set limits and quota after reading from storage)
+        Union
+          ReadFromStorage (MergeTree  with order)
+          ReadFromStorage (MergeTree  with order)
+          ReadFromStorage (MergeTree  with order)
 second-index
 1
 1
--- a/tests/queries/0_stateless/01576_alias_column_rewrite.sql
+++ b/tests/queries/0_stateless/01576_alias_column_rewrite.sql
@ -69,28 +69,33 @@ SELECT day1 = '2020-01-04' FROM test_table PREWHERE day1 = '2020-01-04'  WHERE d
 ALTER TABLE test_table add column array Array(UInt8) default [1, 2, 3];
 ALTER TABLE test_table add column struct.key Array(UInt8) default [2, 4, 6], add column struct.value Array(UInt8) alias array;

+
 SELECT 'array-join';
 set max_rows_to_read = 10;
 SELECT count() == 10 FROM test_table WHERE day = '2020-01-01';
 SELECT sum(struct.key) == 30, sum(struct.value) == 30 FROM (SELECT struct.key, struct.value FROM test_table array join struct WHERE day = '2020-01-01');

+
 SELECT 'lambda';
 -- lambda parameters in filter should not be rewrite
 SELECT count() == 10 FROM test_table WHERE  arrayMap((day) -> day + 1, [1,2,3]) [1] = 2 AND day = '2020-01-03';

 set max_rows_to_read = 0;
-- how to test it? currently just check logs, eg: 00940_order_by_read_in_order
+
 SELECT 'optimize_read_in_order';
-SET optimize_read_in_order = 1;
-SELECT day AS s FROM test_table ORDER BY s LIMIT 1;
+EXPLAIN SELECT day AS s FROM test_table ORDER BY s LIMIT 1 SETTINGS optimize_read_in_order = 0;
+EXPLAIN SELECT day AS s FROM test_table ORDER BY s LIMIT 1 SETTINGS optimize_read_in_order = 1;
+EXPLAIN SELECT toDate(timestamp) AS s FROM test_table ORDER BY toDate(timestamp) LIMIT 1 SETTINGS optimize_read_in_order = 1;
+

 SELECT 'optimize_aggregation_in_order';
-SET optimize_aggregation_in_order = 1;
-SELECT day, count() AS s FROM test_table GROUP BY day;
-SELECT toDate(timestamp), count() AS s FROM test_table GROUP BY toDate(timestamp);
+EXPLAIN SELECT day, count() AS s FROM test_table GROUP BY day SETTINGS optimize_aggregation_in_order = 0;
+EXPLAIN SELECT day, count() AS s FROM test_table GROUP BY day SETTINGS optimize_aggregation_in_order = 1;
+EXPLAIN SELECT toDate(timestamp), count() AS s FROM test_table GROUP BY toDate(timestamp) SETTINGS optimize_aggregation_in_order = 1;

 DROP TABLE test_table;

+
 SELECT 'second-index';
 DROP TABLE IF EXISTS test_index;
 CREATE TABLE test_index