mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Reworking ExpressionAnalyzer (preparations) [#METR-20307].
This commit is contained in:
parent
2436d43637
commit
6e5dd26dda
@ -1,8 +1,13 @@
|
||||
#include <set>
|
||||
#include <unordered_set>
|
||||
|
||||
#include <DB/Analyzers/OptimizeGroupOrderLimitBy.h>
|
||||
#include <DB/Analyzers/TypeAndConstantInference.h>
|
||||
#include <DB/Interpreters/Context.h>
|
||||
#include <DB/Parsers/ASTSelectQuery.h>
|
||||
#include <DB/Parsers/ASTFunction.h>
|
||||
#include <DB/Parsers/ASTOrderByElement.h>
|
||||
#include <DB/Parsers/ASTLiteral.h>
|
||||
#include <DB/Functions/IFunction.h>
|
||||
|
||||
|
||||
@ -46,7 +51,7 @@ static bool isDeterministicFunctionOfKeys(
|
||||
const ASTFunction * ast_function,
|
||||
const TypeAndConstantInference::ExpressionInfo & function_info,
|
||||
const TypeAndConstantInference::Info & all_info,
|
||||
const ASTs & keys)
|
||||
const std::vector<std::string> & keys)
|
||||
{
|
||||
if (!function_info.function || !function_info.function->isDeterministicInScopeOfQuery())
|
||||
return false;
|
||||
@ -61,8 +66,7 @@ static bool isDeterministicFunctionOfKeys(
|
||||
continue;
|
||||
|
||||
/// Function argument is one of keys.
|
||||
if (keys.end() != std::find_if(keys.begin(), keys.end(),
|
||||
[&child_name](const auto & key) { return key->getColumnName() == child_name; }))
|
||||
if (keys.end() != std::find(keys.begin(), keys.end(), child_name))
|
||||
continue;
|
||||
|
||||
/// Function argument is a function, that deterministically depend on keys.
|
||||
@ -134,11 +138,11 @@ static void processGroupByLikeList(ASTPtr & ast, TypeAndConstantInference & expr
|
||||
}
|
||||
|
||||
/// Remove deterministic functions of another keys.
|
||||
ASTs other_keys;
|
||||
std::vector<String> other_keys;
|
||||
other_keys.reserve(elems.size() - 1);
|
||||
for (size_t j = 0, size = elems.size(); j < size; ++j)
|
||||
if (j != i)
|
||||
other_keys.emplace_back(elems[j]);
|
||||
other_keys.emplace_back(elems[j]->getColumnName());
|
||||
|
||||
if (isDeterministicFunctionOfKeys(ast_function, info, expression_info.info, other_keys))
|
||||
{
|
||||
@ -152,6 +156,84 @@ static void processGroupByLikeList(ASTPtr & ast, TypeAndConstantInference & expr
|
||||
}
|
||||
|
||||
|
||||
static void processOrderByList(ASTPtr & ast, TypeAndConstantInference & expression_info)
|
||||
{
|
||||
if (!ast)
|
||||
return;
|
||||
|
||||
ASTs & elems = ast->children;
|
||||
|
||||
/// sort column name and collation
|
||||
std::set<std::pair<std::string, std::string>> unique_keys;
|
||||
size_t i = 0;
|
||||
while (i < elems.size())
|
||||
{
|
||||
const ASTOrderByElement * order_by_elem = typeid_cast<const ASTOrderByElement *>(elems[i].get());
|
||||
if (!order_by_elem)
|
||||
throw Exception("Child of ORDER BY clause is not an ASTOrderByElement", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
|
||||
|
||||
/// It has ASC|DESC and COLLATE inplace, and expression as its only child.
|
||||
if (order_by_elem->children.empty())
|
||||
throw Exception("ORDER BY element has no children", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
|
||||
|
||||
const ASTPtr & elem = order_by_elem->children[0];
|
||||
String collation;
|
||||
if (order_by_elem->collation)
|
||||
{
|
||||
const ASTLiteral * lit = typeid_cast<const ASTLiteral *>(order_by_elem->collation.get());
|
||||
if (!lit)
|
||||
throw Exception("Collation in ORDER BY clause is not an ASTLiteral", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
|
||||
|
||||
if (lit->value.getType() != Field::Types::String)
|
||||
throw Exception("Collation in ORDER BY clause is not a string literal", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
|
||||
|
||||
collation = lit->value.get<String>();
|
||||
}
|
||||
|
||||
String column_name = elem->getColumnName();
|
||||
auto it = expression_info.info.find(column_name);
|
||||
if (it == expression_info.info.end())
|
||||
throw Exception("Type inference was not done for " + column_name, ErrorCodes::LOGICAL_ERROR);
|
||||
const TypeAndConstantInference::ExpressionInfo & info = it->second;
|
||||
|
||||
/// Removing constant expressions.
|
||||
/// Removing duplicate keys.
|
||||
if (info.is_constant_expression
|
||||
/// Having same element but with empty collation. Empty collation is considered more "granular" than any special collation.
|
||||
|| unique_keys.count(std::make_pair(column_name, String()))
|
||||
/// Having same element with same collation.
|
||||
|| !unique_keys.emplace(column_name, collation).second)
|
||||
{
|
||||
elems.erase(elems.begin() + i);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i > 0 && collation.empty() && info.function && !elem->children.empty())
|
||||
{
|
||||
const ASTFunction * ast_function = typeid_cast<const ASTFunction *>(elem.get());
|
||||
if (!ast_function)
|
||||
throw Exception("Column is marked as function during type inference, but corresponding AST node "
|
||||
+ column_name + " is not a function", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
/// Remove deterministic functions of previous keys. Only consider keys without collation.
|
||||
std::vector<String> prev_keys;
|
||||
prev_keys.reserve(i);
|
||||
for (size_t j = 0; j < i; ++j)
|
||||
if (!typeid_cast<const ASTOrderByElement &>(*elems[j]).collation)
|
||||
prev_keys.emplace_back(elems[j]->children.at(0)->getColumnName());
|
||||
|
||||
if (isDeterministicFunctionOfKeys(ast_function, info, expression_info.info, prev_keys))
|
||||
{
|
||||
elems.erase(elems.begin() + i);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void OptimizeGroupOrderLimitBy::process(ASTPtr & ast, TypeAndConstantInference & expression_info)
|
||||
{
|
||||
ASTSelectQuery * select = typeid_cast<ASTSelectQuery *>(ast.get());
|
||||
@ -162,6 +244,19 @@ void OptimizeGroupOrderLimitBy::process(ASTPtr & ast, TypeAndConstantInference &
|
||||
|
||||
processGroupByLikeList(select->group_expression_list, expression_info);
|
||||
processGroupByLikeList(select->limit_by_expression_list, expression_info);
|
||||
|
||||
if (select->order_expression_list)
|
||||
{
|
||||
processOrderByList(select->order_expression_list, expression_info);
|
||||
|
||||
/// ORDER BY could be completely eliminated
|
||||
if (select->order_expression_list->children.empty())
|
||||
{
|
||||
select->children.erase(std::remove(
|
||||
select->children.begin(), select->children.end(), select->order_expression_list), select->children.end());
|
||||
select->order_expression_list.reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -59,8 +59,8 @@ static void processClause(ASTPtr & ast, const ASTPtr & select_expression_list, c
|
||||
throw Exception("Child of ORDER BY clause is not an ASTOrderByElement", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
|
||||
|
||||
/// It has ASC|DESC and COLLATE inplace, and expression as its only child.
|
||||
if (child->children.size() != 1)
|
||||
throw Exception("ORDER BY element has more than one children", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
|
||||
if (child->children.empty())
|
||||
throw Exception("ORDER BY element has no children", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
|
||||
|
||||
processElement(child->children[0], select_expression_list, description);
|
||||
}
|
||||
|
@ -9,3 +9,11 @@ FROM
|
||||
FROM system.numbers
|
||||
)
|
||||
GROUP BY number
|
||||
|
||||
SELECT number
|
||||
FROM system.numbers
|
||||
GROUP BY number
|
||||
ORDER BY
|
||||
number ASC,
|
||||
rand() ASC,
|
||||
toString(rand()) ASC COLLATE 'tr'
|
||||
|
@ -1,3 +1,5 @@
|
||||
#!/bin/sh
|
||||
|
||||
echo "SELECT number, materialize('abc') FROM (SELECT number, 10 AS b FROM system.numbers) GROUP BY number, toString(number + 1), number + number, 1, 2, 'Hello', b" | ./optimize_group_order_limit_by
|
||||
echo
|
||||
echo "SELECT number FROM system.numbers GROUP BY 1 ORDER BY number, 'hello' DESC COLLATE 'tr', number + 1, rand(), identity(number * 2, rand()), toString(rand()) COLLATE 'tr'" | ./optimize_group_order_limit_by
|
||||
|
Loading…
Reference in New Issue
Block a user