Merge pull request #27530 from kssenii/col-identifier-as-col-number

Allow positional arguments for group by, order by, limit by
This commit is contained in:
Kseniia Sumarokova 2021-08-20 21:08:58 +03:00 committed by GitHub
commit 38a35c1d9e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 194 additions and 4 deletions

View File

@ -114,6 +114,7 @@ class IColumn;
M(UInt64, group_by_two_level_threshold_bytes, 50000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. Two-level aggregation is used when at least one of the thresholds is triggered.", 0) \
M(Bool, distributed_aggregation_memory_efficient, true, "Is the memory-saving mode of distributed aggregation enabled.", 0) \
M(UInt64, aggregation_memory_efficient_merge_threads, 0, "Number of threads to use for merge intermediate aggregation results in memory efficient mode. When bigger, then more memory is consumed. 0 means - same as 'max_threads'.", 0) \
M(Bool, enable_positional_arguments, false, "Enable positional arguments in ORDER BY, GROUP BY and LIMIT BY", 0) \
\
M(UInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled.", 0) \
M(UInt64, parallel_replicas_count, 0, "", 0) \

View File

@ -162,6 +162,36 @@ ExpressionAnalyzer::ExpressionAnalyzer(
analyzeAggregation();
}
static ASTPtr checkPositionalArgument(ASTPtr argument, const ASTSelectQuery * select_query, ASTSelectQuery::Expression expression)
{
auto columns = select_query->select()->children;
/// Case when GROUP BY element is position.
/// Do not consider case when GROUP BY element is not a literal, but expression, even if all values are constants.
if (const auto * ast_literal = typeid_cast<const ASTLiteral *>(argument.get()))
{
auto which = ast_literal->value.getType();
if (which == Field::Types::UInt64)
{
auto pos = ast_literal->value.get<UInt64>();
if (pos > 0 && pos <= columns.size())
{
const auto & column = columns[--pos];
if (const auto * literal_ast = typeid_cast<const ASTIdentifier *>(column.get()))
{
return std::make_shared<ASTIdentifier>(literal_ast->name());
}
else
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal value for positional argument in {}",
ASTSelectQuery::expressionToString(expression));
}
}
/// Do not throw if out of bounds, see appendUnusedGroupByColumn.
}
}
return nullptr;
}
void ExpressionAnalyzer::analyzeAggregation()
{
@ -238,13 +268,22 @@ void ExpressionAnalyzer::analyzeAggregation()
{
NameSet unique_keys;
ASTs & group_asts = select_query->groupBy()->children;
for (ssize_t i = 0; i < ssize_t(group_asts.size()); ++i)
{
ssize_t size = group_asts.size();
getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false);
if (getContext()->getSettingsRef().enable_positional_arguments)
{
auto new_argument = checkPositionalArgument(group_asts[i], select_query, ASTSelectQuery::Expression::GROUP_BY);
if (new_argument)
group_asts[i] = new_argument;
}
const auto & column_name = group_asts[i]->getColumnName();
const auto * node = temp_actions->tryFindInIndex(column_name);
if (!node)
throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER);
@ -1223,11 +1262,20 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai
bool with_fill = false;
NameSet order_by_keys;
for (auto & child : select_query->orderBy()->children)
{
const auto * ast = child->as<ASTOrderByElement>();
auto * ast = child->as<ASTOrderByElement>();
if (!ast || ast->children.empty())
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
if (getContext()->getSettingsRef().enable_positional_arguments)
{
auto new_argument = checkPositionalArgument(ast->children.at(0), select_query, ASTSelectQuery::Expression::ORDER_BY);
if (new_argument)
ast->children[0] = new_argument;
}
ASTPtr order_expression = ast->children.at(0);
step.addRequiredOutput(order_expression->getColumnName());
@ -1277,8 +1325,16 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain
aggregated_names.insert(column.name);
}
for (const auto & child : select_query->limitBy()->children)
auto & children = select_query->limitBy()->children;
for (auto & child : children)
{
if (getContext()->getSettingsRef().enable_positional_arguments)
{
auto new_argument = checkPositionalArgument(child, select_query, ASTSelectQuery::Expression::LIMIT_BY);
if (new_argument)
child = new_argument;
}
auto child_name = child->getColumnName();
if (!aggregated_names.count(child_name))
step.addRequiredOutput(std::move(child_name));

View File

@ -69,7 +69,9 @@ const std::unordered_set<String> possibly_injective_function_names
void appendUnusedGroupByColumn(ASTSelectQuery * select_query, const NameSet & source_columns)
{
/// You must insert a constant that is not the name of the column in the table. Such a case is rare, but it happens.
UInt64 unused_column = 0;
/// Also start unused_column integer from source_columns.size() + 1, because lower numbers ([1, source_columns.size()])
/// might be in positional GROUP BY.
UInt64 unused_column = source_columns.size() + 1;
String unused_column_name = toString(unused_column);
while (source_columns.count(unused_column_name))
@ -111,6 +113,8 @@ void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_colum
group_exprs.pop_back();
};
const auto & settings = context->getSettingsRef();
/// iterate over each GROUP BY expression, eliminate injective function calls and literals
for (size_t i = 0; i < group_exprs.size();)
{
@ -166,6 +170,21 @@ void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_colum
}
else if (is_literal(group_exprs[i]))
{
bool keep_position = false;
if (settings.enable_positional_arguments)
{
const auto & value = group_exprs[i]->as<ASTLiteral>()->value;
if (value.getType() == Field::Types::UInt64)
{
auto pos = value.get<UInt64>();
if (pos > 0 && pos <= select_query->children.size())
keep_position = true;
}
}
if (keep_position)
++i;
else
remove_expr_at_index(i);
}
else

View File

@ -35,6 +35,44 @@ public:
SETTINGS
};
static String expressionToString(Expression expr)
{
switch (expr)
{
case Expression::WITH:
return "WITH";
case Expression::SELECT:
return "SELECT";
case Expression::TABLES:
return "TABLES";
case Expression::PREWHERE:
return "PREWHERE";
case Expression::WHERE:
return "WHERE";
case Expression::GROUP_BY:
return "GROUP BY";
case Expression::HAVING:
return "HAVING";
case Expression::WINDOW:
return "WINDOW";
case Expression::ORDER_BY:
return "ORDER BY";
case Expression::LIMIT_BY_OFFSET:
return "LIMIT BY OFFSET";
case Expression::LIMIT_BY_LENGTH:
return "LIMIT BY LENGTH";
case Expression::LIMIT_BY:
return "LIMIT BY";
case Expression::LIMIT_OFFSET:
return "LIMIT OFFSET";
case Expression::LIMIT_LENGTH:
return "LIMIT LENGTH";
case Expression::SETTINGS:
return "SETTINGS";
}
return "";
}
/** Get the text that identifies this element. */
String getID(char) const override { return "SelectQuery"; }

View File

@ -0,0 +1,50 @@
-- { echo }
select x3, x2, x1 from test order by 1;
1 100 100
10 1 10
100 10 1
select x3, x2, x1 from test order by x3;
1 100 100
10 1 10
100 10 1
select x3, x2, x1 from test order by 1 desc;
100 10 1
10 1 10
1 100 100
select x3, x2, x1 from test order by x3 desc;
100 10 1
10 1 10
1 100 100
insert into test values (1, 10, 200), (10, 1, 200), (100, 100, 1);
select x3, x2 from test group by x3, x2;
200 1
10 1
200 10
1 100
100 10
select x3, x2 from test group by 1, 2;
200 1
10 1
200 10
1 100
100 10
select x1, x2, x3 from test order by x3 limit 1 by x3;
100 100 1
10 1 10
1 10 100
1 10 200
select x1, x2, x3 from test order by 3 limit 1 by 3;
100 100 1
10 1 10
1 10 100
1 10 200
select x1, x2, x3 from test order by x3 limit 1 by x1;
100 100 1
10 1 10
1 10 100
select x1, x2, x3 from test order by 3 limit 1 by 1;
100 100 1
10 1 10
1 10 100
select max(x3), max(x2), max(x1) from test group by 1; -- { serverError 43 }
select max(x1) from test order by 1; -- { serverError 43 }

View File

@ -0,0 +1,26 @@
set enable_positional_arguments = 1;
drop table if exists test;
create table test(x1 Int, x2 Int, x3 Int) engine=Memory();
insert into test values (1, 10, 100), (10, 1, 10), (100, 100, 1);
-- { echo }
select x3, x2, x1 from test order by 1;
select x3, x2, x1 from test order by x3;
select x3, x2, x1 from test order by 1 desc;
select x3, x2, x1 from test order by x3 desc;
insert into test values (1, 10, 200), (10, 1, 200), (100, 100, 1);
select x3, x2 from test group by x3, x2;
select x3, x2 from test group by 1, 2;
select x1, x2, x3 from test order by x3 limit 1 by x3;
select x1, x2, x3 from test order by 3 limit 1 by 3;
select x1, x2, x3 from test order by x3 limit 1 by x1;
select x1, x2, x3 from test order by 3 limit 1 by 1;
select max(x3), max(x2), max(x1) from test group by 1; -- { serverError 43 }
select max(x1) from test order by 1; -- { serverError 43 }