Reworking ExpressionAnalyzer (preparations) [#METR-20307].

This commit is contained in:
Alexey Milovidov 2017-01-07 02:47:16 +03:00
parent 2436d43637
commit 6e5dd26dda
4 changed files with 112 additions and 7 deletions

View File

@ -1,8 +1,13 @@
#include <set>
#include <unordered_set>
#include <DB/Analyzers/OptimizeGroupOrderLimitBy.h>
#include <DB/Analyzers/TypeAndConstantInference.h>
#include <DB/Interpreters/Context.h>
#include <DB/Parsers/ASTSelectQuery.h>
#include <DB/Parsers/ASTFunction.h>
#include <DB/Parsers/ASTOrderByElement.h>
#include <DB/Parsers/ASTLiteral.h>
#include <DB/Functions/IFunction.h>
@ -46,7 +51,7 @@ static bool isDeterministicFunctionOfKeys(
const ASTFunction * ast_function,
const TypeAndConstantInference::ExpressionInfo & function_info,
const TypeAndConstantInference::Info & all_info,
const ASTs & keys)
const std::vector<std::string> & keys)
{
if (!function_info.function || !function_info.function->isDeterministicInScopeOfQuery())
return false;
@ -61,8 +66,7 @@ static bool isDeterministicFunctionOfKeys(
continue;
/// Function argument is one of keys.
if (keys.end() != std::find_if(keys.begin(), keys.end(),
[&child_name](const auto & key) { return key->getColumnName() == child_name; }))
if (keys.end() != std::find(keys.begin(), keys.end(), child_name))
continue;
/// Function argument is a function, that deterministically depend on keys.
@ -134,11 +138,11 @@ static void processGroupByLikeList(ASTPtr & ast, TypeAndConstantInference & expr
}
/// Remove deterministic functions of another keys.
ASTs other_keys;
std::vector<String> other_keys;
other_keys.reserve(elems.size() - 1);
for (size_t j = 0, size = elems.size(); j < size; ++j)
if (j != i)
other_keys.emplace_back(elems[j]);
other_keys.emplace_back(elems[j]->getColumnName());
if (isDeterministicFunctionOfKeys(ast_function, info, expression_info.info, other_keys))
{
@ -152,6 +156,84 @@ static void processGroupByLikeList(ASTPtr & ast, TypeAndConstantInference & expr
}
static void processOrderByList(ASTPtr & ast, TypeAndConstantInference & expression_info)
{
if (!ast)
return;
ASTs & elems = ast->children;
/// sort column name and collation
std::set<std::pair<std::string, std::string>> unique_keys;
size_t i = 0;
while (i < elems.size())
{
const ASTOrderByElement * order_by_elem = typeid_cast<const ASTOrderByElement *>(elems[i].get());
if (!order_by_elem)
throw Exception("Child of ORDER BY clause is not an ASTOrderByElement", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
/// It has ASC|DESC and COLLATE inplace, and expression as its only child.
if (order_by_elem->children.empty())
throw Exception("ORDER BY element has no children", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
const ASTPtr & elem = order_by_elem->children[0];
String collation;
if (order_by_elem->collation)
{
const ASTLiteral * lit = typeid_cast<const ASTLiteral *>(order_by_elem->collation.get());
if (!lit)
throw Exception("Collation in ORDER BY clause is not an ASTLiteral", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
if (lit->value.getType() != Field::Types::String)
throw Exception("Collation in ORDER BY clause is not a string literal", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
collation = lit->value.get<String>();
}
String column_name = elem->getColumnName();
auto it = expression_info.info.find(column_name);
if (it == expression_info.info.end())
throw Exception("Type inference was not done for " + column_name, ErrorCodes::LOGICAL_ERROR);
const TypeAndConstantInference::ExpressionInfo & info = it->second;
/// Removing constant expressions.
/// Removing duplicate keys.
if (info.is_constant_expression
/// Having same element but with empty collation. Empty collation is considered more "granular" than any special collation.
|| unique_keys.count(std::make_pair(column_name, String()))
/// Having same element with same collation.
|| !unique_keys.emplace(column_name, collation).second)
{
elems.erase(elems.begin() + i);
continue;
}
if (i > 0 && collation.empty() && info.function && !elem->children.empty())
{
const ASTFunction * ast_function = typeid_cast<const ASTFunction *>(elem.get());
if (!ast_function)
throw Exception("Column is marked as function during type inference, but corresponding AST node "
+ column_name + " is not a function", ErrorCodes::LOGICAL_ERROR);
/// Remove deterministic functions of previous keys. Only consider keys without collation.
std::vector<String> prev_keys;
prev_keys.reserve(i);
for (size_t j = 0; j < i; ++j)
if (!typeid_cast<const ASTOrderByElement &>(*elems[j]).collation)
prev_keys.emplace_back(elems[j]->children.at(0)->getColumnName());
if (isDeterministicFunctionOfKeys(ast_function, info, expression_info.info, prev_keys))
{
elems.erase(elems.begin() + i);
continue;
}
}
++i;
}
}
void OptimizeGroupOrderLimitBy::process(ASTPtr & ast, TypeAndConstantInference & expression_info)
{
ASTSelectQuery * select = typeid_cast<ASTSelectQuery *>(ast.get());
@ -162,6 +244,19 @@ void OptimizeGroupOrderLimitBy::process(ASTPtr & ast, TypeAndConstantInference &
processGroupByLikeList(select->group_expression_list, expression_info);
processGroupByLikeList(select->limit_by_expression_list, expression_info);
if (select->order_expression_list)
{
processOrderByList(select->order_expression_list, expression_info);
/// ORDER BY could be completely eliminated
if (select->order_expression_list->children.empty())
{
select->children.erase(std::remove(
select->children.begin(), select->children.end(), select->order_expression_list), select->children.end());
select->order_expression_list.reset();
}
}
}

View File

@ -59,8 +59,8 @@ static void processClause(ASTPtr & ast, const ASTPtr & select_expression_list, c
throw Exception("Child of ORDER BY clause is not an ASTOrderByElement", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
/// It has ASC|DESC and COLLATE inplace, and expression as its only child.
if (child->children.size() != 1)
throw Exception("ORDER BY element has more than one children", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
if (child->children.empty())
throw Exception("ORDER BY element has no children", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
processElement(child->children[0], select_expression_list, description);
}

View File

@ -9,3 +9,11 @@ FROM
FROM system.numbers
)
GROUP BY number
SELECT number
FROM system.numbers
GROUP BY number
ORDER BY
number ASC,
rand() ASC,
toString(rand()) ASC COLLATE 'tr'

View File

@ -1,3 +1,5 @@
#!/bin/sh
echo "SELECT number, materialize('abc') FROM (SELECT number, 10 AS b FROM system.numbers) GROUP BY number, toString(number + 1), number + number, 1, 2, 'Hello', b" | ./optimize_group_order_limit_by
echo
echo "SELECT number FROM system.numbers GROUP BY 1 ORDER BY number, 'hello' DESC COLLATE 'tr', number + 1, rand(), identity(number * 2, rand()), toString(rand()) COLLATE 'tr'" | ./optimize_group_order_limit_by