Rewrite more alias columns

This commit is contained in:
Amos Bird 2021-05-22 01:01:21 +08:00
parent e2ecc51a1f
commit e3ae2f6e7a
No known key found for this signature in database
GPG Key ID: 80D430DCBECFEDB4
11 changed files with 63 additions and 48 deletions

View File

@ -25,35 +25,15 @@ bool ColumnAliasesMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
return !(node->as<ASTTableExpression>()
|| node->as<ASTSubquery>()
|| node->as<ASTArrayJoin>()
|| node->as<ASTSelectQuery>()
|| node->as<ASTSelectWithUnionQuery>());
|| node->as<ASTArrayJoin>());
}
void ColumnAliasesMatcher::visit(ASTPtr & ast, Data & data)
{
// If it's select query, only replace filters.
if (auto * query = ast->as<ASTSelectQuery>())
{
if (query->where())
Visitor(data).visit(query->refWhere());
if (query->prewhere())
Visitor(data).visit(query->refPrewhere());
return;
}
if (auto * node = ast->as<ASTFunction>())
{
visit(*node, ast, data);
return;
}
if (auto * node = ast->as<ASTIdentifier>())
{
visit(*node, ast, data);
return;
}
if (auto * func = ast->as<ASTFunction>())
visit(*func, ast, data);
else if (auto * ident = ast->as<ASTIdentifier>())
visit(*ident, ast, data);
}
void ColumnAliasesMatcher::visit(ASTFunction & node, ASTPtr & /*ast*/, Data & data)
@ -81,13 +61,25 @@ void ColumnAliasesMatcher::visit(ASTIdentifier & node, ASTPtr & ast, Data & data
{
if (auto column_name = IdentifierSemantic::getColumnName(node))
{
if (data.forbidden_columns.count(*column_name) || data.private_aliases.count(*column_name) || !data.columns.has(*column_name))
if (data.array_join_result_columns.count(*column_name) || data.array_join_source_columns.count(*column_name)
|| data.private_aliases.count(*column_name) || !data.columns.has(*column_name))
return;
const auto & col = data.columns.get(*column_name);
if (col.default_desc.kind == ColumnDefaultKind::Alias)
{
auto alias = node.tryGetAlias();
auto alias_expr = col.default_desc.expression->clone();
auto original_column = alias_expr->getColumnName();
// If expanded alias is used in array join, avoid expansion, otherwise the column will be mis-array joined
if (data.array_join_result_columns.count(original_column) || data.array_join_source_columns.count(original_column))
return;
ast = addTypeConversionToAST(col.default_desc.expression->clone(), col.type->getName(), data.columns.getAll(), data.context);
if (!alias.empty())
ast->setAlias(alias);
else
ast->setAlias(*column_name);
// revisit ast to track recursive alias columns
Visitor(data).visit(ast);
}

View File

@ -52,20 +52,23 @@ public:
{
const ColumnsDescription & columns;
/// forbidden_columns are from array join, we can't rewrite alias columns involved in array join.
/// Do not analyze joined columns.
/// They may have aliases and come to description as is.
const NameSet & forbidden_columns;
/// columns from array_join_result_to_source cannot be expanded.
NameSet array_join_result_columns;
NameSet array_join_source_columns;
ContextPtr context;
/// private_aliases are from lambda, so these are local names.
NameSet private_aliases;
Data(const ColumnsDescription & columns_, const NameSet & forbidden_columns_, ContextPtr context_)
: columns(columns_)
, forbidden_columns(forbidden_columns_)
, context(context_)
{}
Data(const ColumnsDescription & columns_, const NameToNameMap & array_join_result_columns_, ContextPtr context_)
: columns(columns_), context(context_)
{
for (const auto & [result, source] : array_join_result_columns_)
{
array_join_result_columns.insert(result);
array_join_source_columns.insert(source);
}
}
};
static void visit(ASTPtr & ast, Data & data);

View File

@ -1636,7 +1636,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions()
column_expr = column_default->expression->clone();
// recursive visit for alias to alias
replaceAliasColumnsInQuery(
column_expr, metadata_snapshot->getColumns(), syntax_analyzer_result->getArrayJoinSourceNameSet(), context);
column_expr, metadata_snapshot->getColumns(), syntax_analyzer_result->array_join_result_to_source, context);
column_expr = addTypeConversionToAST(
std::move(column_expr), column_decl.type->getName(), metadata_snapshot->getColumns().getAll(), context);

View File

@ -929,14 +929,15 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
/// array_join_alias_to_name, array_join_result_to_source.
getArrayJoinedColumns(query, result, select_query, result.source_columns, source_columns_set);
setJoinStrictness(*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys,
result.analyzed_join->table_join);
setJoinStrictness(
*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, result.analyzed_join->table_join);
collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);
/// rewrite filters for select query, must go after getArrayJoinedColumns
if (settings.optimize_respect_aliases && result.metadata_snapshot)
{
replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.getArrayJoinSourceNameSet(), getContext());
replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.array_join_result_to_source, getContext());
}
result.aggregates = getAggregates(query, *select_query);

View File

@ -45,7 +45,7 @@ ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name, const Nam
auto block = actions->getSampleBlock();
auto desc_type = block.getByName(ast->getColumnName()).type;
auto desc_type = block.getByName(ast->getAliasOrColumnName()).type;
if (desc_type->getName() != type_name)
return addTypeConversionToAST(std::move(ast), type_name);

View File

@ -6,9 +6,10 @@
namespace DB
{
void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, ContextPtr context)
void replaceAliasColumnsInQuery(
ASTPtr & ast, const ColumnsDescription & columns, const NameToNameMap & array_join_result_to_source, ContextPtr context)
{
ColumnAliasesVisitor::Data aliases_column_data(columns, forbidden_columns, context);
ColumnAliasesVisitor::Data aliases_column_data(columns, array_join_result_to_source, context);
ColumnAliasesVisitor aliases_column_visitor(aliases_column_data);
aliases_column_visitor.visit(ast);
}

View File

@ -10,6 +10,7 @@ namespace DB
class ColumnsDescription;
void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, ContextPtr context);
void replaceAliasColumnsInQuery(
ASTPtr & ast, const ColumnsDescription & columns, const NameToNameMap & array_join_result_to_source, ContextPtr context);
}

View File

@ -32,6 +32,9 @@ ReadInOrderOptimizer::ReadInOrderOptimizer(
/// They may have aliases and come to description as is.
/// We can mismatch them with order key columns at stage of fetching columns.
forbidden_columns = syntax_result->getArrayJoinSourceNameSet();
// array join result columns cannot be used in alias expansion.
array_join_result_to_source = syntax_result->array_join_result_to_source;
}
InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & metadata_snapshot, ContextPtr context) const
@ -133,7 +136,7 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr &
if (context->getSettingsRef().optimize_respect_aliases && aliased_columns.contains(required_sort_description[i].column_name))
{
auto column_expr = metadata_snapshot->getColumns().get(required_sort_description[i].column_name).default_desc.expression->clone();
replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), forbidden_columns, context);
replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), array_join_result_to_source, context);
auto syntax_analyzer_result = TreeRewriter(context).analyze(column_expr, metadata_snapshot->getColumns().getAll());
const auto expression_analyzer = ExpressionAnalyzer(column_expr, syntax_analyzer_result, context).getActions(true);

View File

@ -28,6 +28,7 @@ private:
/// Actions for every element of order expression to analyze functions for monotonicity
ManyExpressionActions elements_actions;
NameSet forbidden_columns;
NameToNameMap array_join_result_to_source;
SortDescription required_sort_description;
};
}

View File

@ -26,13 +26,13 @@ Expression (Projection)
MergingSorted (Merge sorted streams for ORDER BY)
MergeSorting (Merge sorted blocks for ORDER BY)
PartialSorting (Sort each block for ORDER BY)
Expression ((Before ORDER BY + Add table aliases))
Expression (Before ORDER BY)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
ReadFromMergeTree
Expression (Projection)
Limit (preliminary LIMIT)
FinishSorting
Expression ((Before ORDER BY + Add table aliases))
Expression (Before ORDER BY)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
ReadFromMergeTree
Expression (Projection)
@ -44,12 +44,12 @@ Expression (Projection)
optimize_aggregation_in_order
Expression ((Projection + Before ORDER BY))
Aggregating
Expression ((Before GROUP BY + Add table aliases))
Expression (Before GROUP BY)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
ReadFromMergeTree
Expression ((Projection + Before ORDER BY))
Aggregating
Expression ((Before GROUP BY + Add table aliases))
Expression (Before GROUP BY)
SettingQuotaAndLimits (Set limits and quota after reading from storage)
ReadFromMergeTree
Expression ((Projection + Before ORDER BY))
@ -60,3 +60,4 @@ Expression ((Projection + Before ORDER BY))
second-index
1
1
1

View File

@ -115,3 +115,15 @@ SELECT COUNT() == 1 FROM test_index WHERE key_uint32 = 1;
SELECT COUNT() == 1 FROM test_index WHERE toUInt32(key_string) = 1;
DROP TABLE IF EXISTS test_index;
-- check alias column can be used to match projections
drop table if exists p;
create table pd (dt DateTime, i int, dt_m DateTime alias toStartOfMinute(dt)) engine Distributed(test_shard_localhost, currentDatabase(), 'pl');
create table pl (dt DateTime, i int, projection p (select sum(i) group by toStartOfMinute(dt))) engine MergeTree order by dt;
insert into pl values ('2020-10-24', 1);
select sum(i) from pd group by dt_m settings allow_experimental_projection_optimization = 1, force_optimize_projection = 1;
drop table pd;
drop table pl;