Rewrite more alias columns

This commit is contained in:
Amos Bird 2021-05-22 01:01:21 +08:00
parent e2ecc51a1f
commit e3ae2f6e7a
No known key found for this signature in database
GPG Key ID: 80D430DCBECFEDB4
11 changed files with 63 additions and 48 deletions

View File

@ -25,35 +25,15 @@ bool ColumnAliasesMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
return !(node->as<ASTTableExpression>() return !(node->as<ASTTableExpression>()
|| node->as<ASTSubquery>() || node->as<ASTSubquery>()
|| node->as<ASTArrayJoin>() || node->as<ASTArrayJoin>());
|| node->as<ASTSelectQuery>()
|| node->as<ASTSelectWithUnionQuery>());
} }
void ColumnAliasesMatcher::visit(ASTPtr & ast, Data & data) void ColumnAliasesMatcher::visit(ASTPtr & ast, Data & data)
{ {
// If it's select query, only replace filters. if (auto * func = ast->as<ASTFunction>())
if (auto * query = ast->as<ASTSelectQuery>()) visit(*func, ast, data);
{ else if (auto * ident = ast->as<ASTIdentifier>())
if (query->where()) visit(*ident, ast, data);
Visitor(data).visit(query->refWhere());
if (query->prewhere())
Visitor(data).visit(query->refPrewhere());
return;
}
if (auto * node = ast->as<ASTFunction>())
{
visit(*node, ast, data);
return;
}
if (auto * node = ast->as<ASTIdentifier>())
{
visit(*node, ast, data);
return;
}
} }
void ColumnAliasesMatcher::visit(ASTFunction & node, ASTPtr & /*ast*/, Data & data) void ColumnAliasesMatcher::visit(ASTFunction & node, ASTPtr & /*ast*/, Data & data)
@ -81,13 +61,25 @@ void ColumnAliasesMatcher::visit(ASTIdentifier & node, ASTPtr & ast, Data & data
{ {
if (auto column_name = IdentifierSemantic::getColumnName(node)) if (auto column_name = IdentifierSemantic::getColumnName(node))
{ {
if (data.forbidden_columns.count(*column_name) || data.private_aliases.count(*column_name) || !data.columns.has(*column_name)) if (data.array_join_result_columns.count(*column_name) || data.array_join_source_columns.count(*column_name)
|| data.private_aliases.count(*column_name) || !data.columns.has(*column_name))
return; return;
const auto & col = data.columns.get(*column_name); const auto & col = data.columns.get(*column_name);
if (col.default_desc.kind == ColumnDefaultKind::Alias) if (col.default_desc.kind == ColumnDefaultKind::Alias)
{ {
auto alias = node.tryGetAlias();
auto alias_expr = col.default_desc.expression->clone();
auto original_column = alias_expr->getColumnName();
// If expanded alias is used in array join, avoid expansion, otherwise the column will be mis-array joined
if (data.array_join_result_columns.count(original_column) || data.array_join_source_columns.count(original_column))
return;
ast = addTypeConversionToAST(col.default_desc.expression->clone(), col.type->getName(), data.columns.getAll(), data.context); ast = addTypeConversionToAST(col.default_desc.expression->clone(), col.type->getName(), data.columns.getAll(), data.context);
if (!alias.empty())
ast->setAlias(alias);
else
ast->setAlias(*column_name);
// revisit ast to track recursive alias columns // revisit ast to track recursive alias columns
Visitor(data).visit(ast); Visitor(data).visit(ast);
} }

View File

@ -52,20 +52,23 @@ public:
{ {
const ColumnsDescription & columns; const ColumnsDescription & columns;
/// forbidden_columns are from array join, we can't rewrite alias columns involved in array join. /// columns from array_join_result_to_source cannot be expanded.
/// Do not analyze joined columns. NameSet array_join_result_columns;
/// They may have aliases and come to description as is. NameSet array_join_source_columns;
const NameSet & forbidden_columns;
ContextPtr context; ContextPtr context;
/// private_aliases are from lambda, so these are local names. /// private_aliases are from lambda, so these are local names.
NameSet private_aliases; NameSet private_aliases;
Data(const ColumnsDescription & columns_, const NameSet & forbidden_columns_, ContextPtr context_) Data(const ColumnsDescription & columns_, const NameToNameMap & array_join_result_columns_, ContextPtr context_)
: columns(columns_) : columns(columns_), context(context_)
, forbidden_columns(forbidden_columns_) {
, context(context_) for (const auto & [result, source] : array_join_result_columns_)
{} {
array_join_result_columns.insert(result);
array_join_source_columns.insert(source);
}
}
}; };
static void visit(ASTPtr & ast, Data & data); static void visit(ASTPtr & ast, Data & data);

View File

@ -1636,7 +1636,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions()
column_expr = column_default->expression->clone(); column_expr = column_default->expression->clone();
// recursive visit for alias to alias // recursive visit for alias to alias
replaceAliasColumnsInQuery( replaceAliasColumnsInQuery(
column_expr, metadata_snapshot->getColumns(), syntax_analyzer_result->getArrayJoinSourceNameSet(), context); column_expr, metadata_snapshot->getColumns(), syntax_analyzer_result->array_join_result_to_source, context);
column_expr = addTypeConversionToAST( column_expr = addTypeConversionToAST(
std::move(column_expr), column_decl.type->getName(), metadata_snapshot->getColumns().getAll(), context); std::move(column_expr), column_decl.type->getName(), metadata_snapshot->getColumns().getAll(), context);

View File

@ -929,14 +929,15 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
/// array_join_alias_to_name, array_join_result_to_source. /// array_join_alias_to_name, array_join_result_to_source.
getArrayJoinedColumns(query, result, select_query, result.source_columns, source_columns_set); getArrayJoinedColumns(query, result, select_query, result.source_columns, source_columns_set);
setJoinStrictness(*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, setJoinStrictness(
result.analyzed_join->table_join); *select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, result.analyzed_join->table_join);
collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases); collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);
/// rewrite filters for select query, must go after getArrayJoinedColumns /// rewrite filters for select query, must go after getArrayJoinedColumns
if (settings.optimize_respect_aliases && result.metadata_snapshot) if (settings.optimize_respect_aliases && result.metadata_snapshot)
{ {
replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.getArrayJoinSourceNameSet(), getContext()); replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.array_join_result_to_source, getContext());
} }
result.aggregates = getAggregates(query, *select_query); result.aggregates = getAggregates(query, *select_query);

View File

@ -45,7 +45,7 @@ ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name, const Nam
auto block = actions->getSampleBlock(); auto block = actions->getSampleBlock();
auto desc_type = block.getByName(ast->getColumnName()).type; auto desc_type = block.getByName(ast->getAliasOrColumnName()).type;
if (desc_type->getName() != type_name) if (desc_type->getName() != type_name)
return addTypeConversionToAST(std::move(ast), type_name); return addTypeConversionToAST(std::move(ast), type_name);

View File

@ -6,9 +6,10 @@
namespace DB namespace DB
{ {
void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, ContextPtr context) void replaceAliasColumnsInQuery(
ASTPtr & ast, const ColumnsDescription & columns, const NameToNameMap & array_join_result_to_source, ContextPtr context)
{ {
ColumnAliasesVisitor::Data aliases_column_data(columns, forbidden_columns, context); ColumnAliasesVisitor::Data aliases_column_data(columns, array_join_result_to_source, context);
ColumnAliasesVisitor aliases_column_visitor(aliases_column_data); ColumnAliasesVisitor aliases_column_visitor(aliases_column_data);
aliases_column_visitor.visit(ast); aliases_column_visitor.visit(ast);
} }

View File

@ -10,6 +10,7 @@ namespace DB
class ColumnsDescription; class ColumnsDescription;
void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, ContextPtr context); void replaceAliasColumnsInQuery(
ASTPtr & ast, const ColumnsDescription & columns, const NameToNameMap & array_join_result_to_source, ContextPtr context);
} }

View File

@ -32,6 +32,9 @@ ReadInOrderOptimizer::ReadInOrderOptimizer(
/// They may have aliases and come to description as is. /// They may have aliases and come to description as is.
/// We can mismatch them with order key columns at stage of fetching columns. /// We can mismatch them with order key columns at stage of fetching columns.
forbidden_columns = syntax_result->getArrayJoinSourceNameSet(); forbidden_columns = syntax_result->getArrayJoinSourceNameSet();
// array join result columns cannot be used in alias expansion.
array_join_result_to_source = syntax_result->array_join_result_to_source;
} }
InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & metadata_snapshot, ContextPtr context) const InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & metadata_snapshot, ContextPtr context) const
@ -133,7 +136,7 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr &
if (context->getSettingsRef().optimize_respect_aliases && aliased_columns.contains(required_sort_description[i].column_name)) if (context->getSettingsRef().optimize_respect_aliases && aliased_columns.contains(required_sort_description[i].column_name))
{ {
auto column_expr = metadata_snapshot->getColumns().get(required_sort_description[i].column_name).default_desc.expression->clone(); auto column_expr = metadata_snapshot->getColumns().get(required_sort_description[i].column_name).default_desc.expression->clone();
replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), forbidden_columns, context); replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), array_join_result_to_source, context);
auto syntax_analyzer_result = TreeRewriter(context).analyze(column_expr, metadata_snapshot->getColumns().getAll()); auto syntax_analyzer_result = TreeRewriter(context).analyze(column_expr, metadata_snapshot->getColumns().getAll());
const auto expression_analyzer = ExpressionAnalyzer(column_expr, syntax_analyzer_result, context).getActions(true); const auto expression_analyzer = ExpressionAnalyzer(column_expr, syntax_analyzer_result, context).getActions(true);

View File

@ -28,6 +28,7 @@ private:
/// Actions for every element of order expression to analyze functions for monotonicity /// Actions for every element of order expression to analyze functions for monotonicity
ManyExpressionActions elements_actions; ManyExpressionActions elements_actions;
NameSet forbidden_columns; NameSet forbidden_columns;
NameToNameMap array_join_result_to_source;
SortDescription required_sort_description; SortDescription required_sort_description;
}; };
} }

View File

@ -26,13 +26,13 @@ Expression (Projection)
MergingSorted (Merge sorted streams for ORDER BY) MergingSorted (Merge sorted streams for ORDER BY)
MergeSorting (Merge sorted blocks for ORDER BY) MergeSorting (Merge sorted blocks for ORDER BY)
PartialSorting (Sort each block for ORDER BY) PartialSorting (Sort each block for ORDER BY)
Expression ((Before ORDER BY + Add table aliases)) Expression (Before ORDER BY)
SettingQuotaAndLimits (Set limits and quota after reading from storage) SettingQuotaAndLimits (Set limits and quota after reading from storage)
ReadFromMergeTree ReadFromMergeTree
Expression (Projection) Expression (Projection)
Limit (preliminary LIMIT) Limit (preliminary LIMIT)
FinishSorting FinishSorting
Expression ((Before ORDER BY + Add table aliases)) Expression (Before ORDER BY)
SettingQuotaAndLimits (Set limits and quota after reading from storage) SettingQuotaAndLimits (Set limits and quota after reading from storage)
ReadFromMergeTree ReadFromMergeTree
Expression (Projection) Expression (Projection)
@ -44,12 +44,12 @@ Expression (Projection)
optimize_aggregation_in_order optimize_aggregation_in_order
Expression ((Projection + Before ORDER BY)) Expression ((Projection + Before ORDER BY))
Aggregating Aggregating
Expression ((Before GROUP BY + Add table aliases)) Expression (Before GROUP BY)
SettingQuotaAndLimits (Set limits and quota after reading from storage) SettingQuotaAndLimits (Set limits and quota after reading from storage)
ReadFromMergeTree ReadFromMergeTree
Expression ((Projection + Before ORDER BY)) Expression ((Projection + Before ORDER BY))
Aggregating Aggregating
Expression ((Before GROUP BY + Add table aliases)) Expression (Before GROUP BY)
SettingQuotaAndLimits (Set limits and quota after reading from storage) SettingQuotaAndLimits (Set limits and quota after reading from storage)
ReadFromMergeTree ReadFromMergeTree
Expression ((Projection + Before ORDER BY)) Expression ((Projection + Before ORDER BY))
@ -60,3 +60,4 @@ Expression ((Projection + Before ORDER BY))
second-index second-index
1 1
1 1
1

View File

@ -115,3 +115,15 @@ SELECT COUNT() == 1 FROM test_index WHERE key_uint32 = 1;
SELECT COUNT() == 1 FROM test_index WHERE toUInt32(key_string) = 1; SELECT COUNT() == 1 FROM test_index WHERE toUInt32(key_string) = 1;
DROP TABLE IF EXISTS test_index; DROP TABLE IF EXISTS test_index;
-- check alias column can be used to match projections
drop table if exists p;
create table pd (dt DateTime, i int, dt_m DateTime alias toStartOfMinute(dt)) engine Distributed(test_shard_localhost, currentDatabase(), 'pl');
create table pl (dt DateTime, i int, projection p (select sum(i) group by toStartOfMinute(dt))) engine MergeTree order by dt;
insert into pl values ('2020-10-24', 1);
select sum(i) from pd group by dt_m settings allow_experimental_projection_optimization = 1, force_optimize_projection = 1;
drop table pd;
drop table pl;