From e3ae2f6e7aa58c281bbd87e628fe990ffc833317 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sat, 22 May 2021 01:01:21 +0800 Subject: [PATCH] Rewrite more alias columns --- src/Interpreters/ColumnAliasesVisitor.cpp | 44 ++++++++----------- src/Interpreters/ColumnAliasesVisitor.h | 21 +++++---- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Interpreters/TreeRewriter.cpp | 7 +-- src/Interpreters/addTypeConversionToAST.cpp | 2 +- .../replaceAliasColumnsInQuery.cpp | 5 ++- src/Interpreters/replaceAliasColumnsInQuery.h | 3 +- src/Storages/ReadInOrderOptimizer.cpp | 5 ++- src/Storages/ReadInOrderOptimizer.h | 1 + .../01576_alias_column_rewrite.reference | 9 ++-- .../01576_alias_column_rewrite.sql | 12 +++++ 11 files changed, 63 insertions(+), 48 deletions(-) diff --git a/src/Interpreters/ColumnAliasesVisitor.cpp b/src/Interpreters/ColumnAliasesVisitor.cpp index f41dbd19047..ab72595a491 100644 --- a/src/Interpreters/ColumnAliasesVisitor.cpp +++ b/src/Interpreters/ColumnAliasesVisitor.cpp @@ -25,35 +25,15 @@ bool ColumnAliasesMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &) return !(node->as() || node->as() - || node->as() - || node->as() - || node->as()); + || node->as()); } void ColumnAliasesMatcher::visit(ASTPtr & ast, Data & data) { - // If it's select query, only replace filters. - if (auto * query = ast->as()) - { - if (query->where()) - Visitor(data).visit(query->refWhere()); - if (query->prewhere()) - Visitor(data).visit(query->refPrewhere()); - - return; - } - - if (auto * node = ast->as()) - { - visit(*node, ast, data); - return; - } - - if (auto * node = ast->as()) - { - visit(*node, ast, data); - return; - } + if (auto * func = ast->as()) + visit(*func, ast, data); + else if (auto * ident = ast->as()) + visit(*ident, ast, data); } void ColumnAliasesMatcher::visit(ASTFunction & node, ASTPtr & /*ast*/, Data & data) @@ -81,13 +61,25 @@ void ColumnAliasesMatcher::visit(ASTIdentifier & node, ASTPtr & ast, Data & data { if (auto column_name = IdentifierSemantic::getColumnName(node)) { - if (data.forbidden_columns.count(*column_name) || data.private_aliases.count(*column_name) || !data.columns.has(*column_name)) + if (data.array_join_result_columns.count(*column_name) || data.array_join_source_columns.count(*column_name) + || data.private_aliases.count(*column_name) || !data.columns.has(*column_name)) return; const auto & col = data.columns.get(*column_name); if (col.default_desc.kind == ColumnDefaultKind::Alias) { + auto alias = node.tryGetAlias(); + auto alias_expr = col.default_desc.expression->clone(); + auto original_column = alias_expr->getColumnName(); + // If expanded alias is used in array join, avoid expansion, otherwise the column will be mis-array joined + if (data.array_join_result_columns.count(original_column) || data.array_join_source_columns.count(original_column)) + return; ast = addTypeConversionToAST(col.default_desc.expression->clone(), col.type->getName(), data.columns.getAll(), data.context); + if (!alias.empty()) + ast->setAlias(alias); + else + ast->setAlias(*column_name); + // revisit ast to track recursive alias columns Visitor(data).visit(ast); } diff --git a/src/Interpreters/ColumnAliasesVisitor.h b/src/Interpreters/ColumnAliasesVisitor.h index a1f8e79f64c..e340ab0daa0 100644 --- a/src/Interpreters/ColumnAliasesVisitor.h +++ b/src/Interpreters/ColumnAliasesVisitor.h @@ -52,20 +52,23 @@ public: { const ColumnsDescription & columns; - /// forbidden_columns are from array join, we can't rewrite alias columns involved in array join. - /// Do not analyze joined columns. - /// They may have aliases and come to description as is. - const NameSet & forbidden_columns; + /// columns from array_join_result_to_source cannot be expanded. + NameSet array_join_result_columns; + NameSet array_join_source_columns; ContextPtr context; /// private_aliases are from lambda, so these are local names. NameSet private_aliases; - Data(const ColumnsDescription & columns_, const NameSet & forbidden_columns_, ContextPtr context_) - : columns(columns_) - , forbidden_columns(forbidden_columns_) - , context(context_) - {} + Data(const ColumnsDescription & columns_, const NameToNameMap & array_join_result_columns_, ContextPtr context_) + : columns(columns_), context(context_) + { + for (const auto & [result, source] : array_join_result_columns_) + { + array_join_result_columns.insert(result); + array_join_source_columns.insert(source); + } + } }; static void visit(ASTPtr & ast, Data & data); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 85b9026c642..a7c1202276e 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1636,7 +1636,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions() column_expr = column_default->expression->clone(); // recursive visit for alias to alias replaceAliasColumnsInQuery( - column_expr, metadata_snapshot->getColumns(), syntax_analyzer_result->getArrayJoinSourceNameSet(), context); + column_expr, metadata_snapshot->getColumns(), syntax_analyzer_result->array_join_result_to_source, context); column_expr = addTypeConversionToAST( std::move(column_expr), column_decl.type->getName(), metadata_snapshot->getColumns().getAll(), context); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 81186a239c9..a8f5389ed10 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -929,14 +929,15 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( /// array_join_alias_to_name, array_join_result_to_source. getArrayJoinedColumns(query, result, select_query, result.source_columns, source_columns_set); - setJoinStrictness(*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, - result.analyzed_join->table_join); + setJoinStrictness( + *select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, result.analyzed_join->table_join); + collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases); /// rewrite filters for select query, must go after getArrayJoinedColumns if (settings.optimize_respect_aliases && result.metadata_snapshot) { - replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.getArrayJoinSourceNameSet(), getContext()); + replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.array_join_result_to_source, getContext()); } result.aggregates = getAggregates(query, *select_query); diff --git a/src/Interpreters/addTypeConversionToAST.cpp b/src/Interpreters/addTypeConversionToAST.cpp index 295ac858e28..ba67ec762a9 100644 --- a/src/Interpreters/addTypeConversionToAST.cpp +++ b/src/Interpreters/addTypeConversionToAST.cpp @@ -45,7 +45,7 @@ ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name, const Nam auto block = actions->getSampleBlock(); - auto desc_type = block.getByName(ast->getColumnName()).type; + auto desc_type = block.getByName(ast->getAliasOrColumnName()).type; if (desc_type->getName() != type_name) return addTypeConversionToAST(std::move(ast), type_name); diff --git a/src/Interpreters/replaceAliasColumnsInQuery.cpp b/src/Interpreters/replaceAliasColumnsInQuery.cpp index 0bc8828c878..3f789ec3d4f 100644 --- a/src/Interpreters/replaceAliasColumnsInQuery.cpp +++ b/src/Interpreters/replaceAliasColumnsInQuery.cpp @@ -6,9 +6,10 @@ namespace DB { -void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, ContextPtr context) +void replaceAliasColumnsInQuery( + ASTPtr & ast, const ColumnsDescription & columns, const NameToNameMap & array_join_result_to_source, ContextPtr context) { - ColumnAliasesVisitor::Data aliases_column_data(columns, forbidden_columns, context); + ColumnAliasesVisitor::Data aliases_column_data(columns, array_join_result_to_source, context); ColumnAliasesVisitor aliases_column_visitor(aliases_column_data); aliases_column_visitor.visit(ast); } diff --git a/src/Interpreters/replaceAliasColumnsInQuery.h b/src/Interpreters/replaceAliasColumnsInQuery.h index 92d2686b45b..fadebe3c9e6 100644 --- a/src/Interpreters/replaceAliasColumnsInQuery.h +++ b/src/Interpreters/replaceAliasColumnsInQuery.h @@ -10,6 +10,7 @@ namespace DB class ColumnsDescription; -void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, ContextPtr context); +void replaceAliasColumnsInQuery( + ASTPtr & ast, const ColumnsDescription & columns, const NameToNameMap & array_join_result_to_source, ContextPtr context); } diff --git a/src/Storages/ReadInOrderOptimizer.cpp b/src/Storages/ReadInOrderOptimizer.cpp index 3bb7034b588..87273330b34 100644 --- a/src/Storages/ReadInOrderOptimizer.cpp +++ b/src/Storages/ReadInOrderOptimizer.cpp @@ -32,6 +32,9 @@ ReadInOrderOptimizer::ReadInOrderOptimizer( /// They may have aliases and come to description as is. /// We can mismatch them with order key columns at stage of fetching columns. forbidden_columns = syntax_result->getArrayJoinSourceNameSet(); + + // array join result columns cannot be used in alias expansion. + array_join_result_to_source = syntax_result->array_join_result_to_source; } InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & metadata_snapshot, ContextPtr context) const @@ -133,7 +136,7 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & if (context->getSettingsRef().optimize_respect_aliases && aliased_columns.contains(required_sort_description[i].column_name)) { auto column_expr = metadata_snapshot->getColumns().get(required_sort_description[i].column_name).default_desc.expression->clone(); - replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), forbidden_columns, context); + replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), array_join_result_to_source, context); auto syntax_analyzer_result = TreeRewriter(context).analyze(column_expr, metadata_snapshot->getColumns().getAll()); const auto expression_analyzer = ExpressionAnalyzer(column_expr, syntax_analyzer_result, context).getActions(true); diff --git a/src/Storages/ReadInOrderOptimizer.h b/src/Storages/ReadInOrderOptimizer.h index 0af1121db32..0abf2923a98 100644 --- a/src/Storages/ReadInOrderOptimizer.h +++ b/src/Storages/ReadInOrderOptimizer.h @@ -28,6 +28,7 @@ private: /// Actions for every element of order expression to analyze functions for monotonicity ManyExpressionActions elements_actions; NameSet forbidden_columns; + NameToNameMap array_join_result_to_source; SortDescription required_sort_description; }; } diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.reference b/tests/queries/0_stateless/01576_alias_column_rewrite.reference index 9d3db8c1d00..ef598570b10 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.reference +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.reference @@ -26,13 +26,13 @@ Expression (Projection) MergingSorted (Merge sorted streams for ORDER BY) MergeSorting (Merge sorted blocks for ORDER BY) PartialSorting (Sort each block for ORDER BY) - Expression ((Before ORDER BY + Add table aliases)) + Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromMergeTree Expression (Projection) Limit (preliminary LIMIT) FinishSorting - Expression ((Before ORDER BY + Add table aliases)) + Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromMergeTree Expression (Projection) @@ -44,12 +44,12 @@ Expression (Projection) optimize_aggregation_in_order Expression ((Projection + Before ORDER BY)) Aggregating - Expression ((Before GROUP BY + Add table aliases)) + Expression (Before GROUP BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromMergeTree Expression ((Projection + Before ORDER BY)) Aggregating - Expression ((Before GROUP BY + Add table aliases)) + Expression (Before GROUP BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromMergeTree Expression ((Projection + Before ORDER BY)) @@ -60,3 +60,4 @@ Expression ((Projection + Before ORDER BY)) second-index 1 1 +1 diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.sql b/tests/queries/0_stateless/01576_alias_column_rewrite.sql index fabe20010af..cab32db0192 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.sql +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.sql @@ -115,3 +115,15 @@ SELECT COUNT() == 1 FROM test_index WHERE key_uint32 = 1; SELECT COUNT() == 1 FROM test_index WHERE toUInt32(key_string) = 1; DROP TABLE IF EXISTS test_index; + +-- check alias column can be used to match projections +drop table if exists p; +create table pd (dt DateTime, i int, dt_m DateTime alias toStartOfMinute(dt)) engine Distributed(test_shard_localhost, currentDatabase(), 'pl'); +create table pl (dt DateTime, i int, projection p (select sum(i) group by toStartOfMinute(dt))) engine MergeTree order by dt; + +insert into pl values ('2020-10-24', 1); + +select sum(i) from pd group by dt_m settings allow_experimental_projection_optimization = 1, force_optimize_projection = 1; + +drop table pd; +drop table pl;