2015-02-18 09:43:36 +00:00
|
|
|
|
#include <DB/Interpreters/LogicalExpressionsOptimizer.h>
|
2015-02-18 16:54:42 +00:00
|
|
|
|
#include <DB/Interpreters/Settings.h>
|
2015-02-18 09:43:36 +00:00
|
|
|
|
|
2015-02-18 14:55:11 +00:00
|
|
|
|
#include <DB/Parsers/ASTFunction.h>
|
2015-02-18 09:43:36 +00:00
|
|
|
|
#include <DB/Parsers/ASTSelectQuery.h>
|
|
|
|
|
#include <DB/Parsers/ASTLiteral.h>
|
|
|
|
|
|
2015-02-18 13:16:59 +00:00
|
|
|
|
#include <DB/Core/ErrorCodes.h>
|
|
|
|
|
|
2015-02-18 09:43:36 +00:00
|
|
|
|
#include <deque>
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
2015-02-20 11:33:21 +00:00
|
|
|
|
LogicalExpressionsOptimizer::OrWithExpression::OrWithExpression(ASTFunction * or_function_, const std::string & expression_)
|
2015-02-18 09:43:36 +00:00
|
|
|
|
: or_function(or_function_), expression(expression_)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-20 11:33:21 +00:00
|
|
|
|
bool LogicalExpressionsOptimizer::OrWithExpression::operator<(const OrWithExpression & rhs) const
|
2015-02-18 09:43:36 +00:00
|
|
|
|
{
|
2015-02-25 14:23:15 +00:00
|
|
|
|
return std::tie(this->or_function, this->expression) < std::tie(rhs.or_function, rhs.expression);
|
2015-02-18 09:43:36 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-02-18 16:54:42 +00:00
|
|
|
|
LogicalExpressionsOptimizer::LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, const Settings & settings_)
|
|
|
|
|
: select_query(select_query_), settings(settings_)
|
2015-02-18 09:43:36 +00:00
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void LogicalExpressionsOptimizer::optimizeDisjunctiveEqualityChains()
|
|
|
|
|
{
|
2015-02-20 10:37:55 +00:00
|
|
|
|
if ((select_query == nullptr) || hasOptimizedDisjunctiveEqualityChains)
|
2015-02-18 09:43:36 +00:00
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
collectDisjunctiveEqualityChains();
|
|
|
|
|
|
2015-02-19 14:55:47 +00:00
|
|
|
|
for (auto & chain : disjunctive_equality_chains_map)
|
2015-02-18 09:43:36 +00:00
|
|
|
|
{
|
2015-02-18 13:53:19 +00:00
|
|
|
|
if (!mayOptimizeDisjunctiveEqualityChain(chain))
|
2015-02-18 09:43:36 +00:00
|
|
|
|
continue;
|
2015-02-19 14:55:47 +00:00
|
|
|
|
addInExpression(chain);
|
|
|
|
|
|
|
|
|
|
auto & equalities = chain.second;
|
|
|
|
|
equalities.is_processed = true;
|
2015-02-20 10:37:55 +00:00
|
|
|
|
++processed_count;
|
2015-02-18 09:43:36 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-02-20 10:37:55 +00:00
|
|
|
|
if (processed_count > 0)
|
|
|
|
|
{
|
|
|
|
|
cleanupOrExpressions();
|
|
|
|
|
fixBrokenOrExpressions();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hasOptimizedDisjunctiveEqualityChains = true;
|
2015-02-18 09:43:36 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void LogicalExpressionsOptimizer::collectDisjunctiveEqualityChains()
|
|
|
|
|
{
|
2015-02-18 15:11:47 +00:00
|
|
|
|
using Edge = std::pair<IAST *, IAST *>;
|
2015-02-18 09:43:36 +00:00
|
|
|
|
std::deque<Edge> to_visit;
|
2015-02-18 12:11:16 +00:00
|
|
|
|
|
2015-02-18 15:11:47 +00:00
|
|
|
|
to_visit.push_back(Edge(nullptr, select_query));
|
2015-02-18 09:43:36 +00:00
|
|
|
|
while (!to_visit.empty())
|
|
|
|
|
{
|
2015-02-18 22:13:54 +00:00
|
|
|
|
auto edge = to_visit.back();
|
2015-02-18 15:11:47 +00:00
|
|
|
|
auto from_node = edge.first;
|
|
|
|
|
auto to_node = edge.second;
|
2015-02-18 09:43:36 +00:00
|
|
|
|
|
2015-02-18 22:13:54 +00:00
|
|
|
|
to_visit.pop_back();
|
2015-02-18 23:15:20 +00:00
|
|
|
|
to_node->is_visited = true;
|
2015-02-18 09:43:36 +00:00
|
|
|
|
|
2015-02-18 13:53:19 +00:00
|
|
|
|
bool found_chain = false;
|
2015-02-18 09:43:36 +00:00
|
|
|
|
|
2015-02-18 15:11:47 +00:00
|
|
|
|
auto function = typeid_cast<ASTFunction *>(to_node);
|
2015-02-18 09:43:36 +00:00
|
|
|
|
if ((function != nullptr) && (function->name == "or") && (function->children.size() == 1))
|
|
|
|
|
{
|
2015-02-18 12:11:16 +00:00
|
|
|
|
auto expression_list = typeid_cast<ASTExpressionList *>(&*(function->children[0]));
|
2015-02-18 09:43:36 +00:00
|
|
|
|
if (expression_list != nullptr)
|
|
|
|
|
{
|
|
|
|
|
/// Цепочка элементов выражения OR.
|
2015-02-20 10:37:55 +00:00
|
|
|
|
for (auto & child : expression_list->children)
|
2015-02-18 09:43:36 +00:00
|
|
|
|
{
|
2015-02-18 12:11:16 +00:00
|
|
|
|
auto equals = typeid_cast<ASTFunction *>(&*child);
|
2015-02-18 09:43:36 +00:00
|
|
|
|
if ((equals != nullptr) && (equals->name == "equals") && (equals->children.size() == 1))
|
|
|
|
|
{
|
2015-02-18 12:11:16 +00:00
|
|
|
|
auto equals_expression_list = typeid_cast<ASTExpressionList *>(&*(equals->children[0]));
|
2015-02-18 09:43:36 +00:00
|
|
|
|
if ((equals_expression_list != nullptr) && (equals_expression_list->children.size() == 2))
|
|
|
|
|
{
|
2015-02-18 13:16:59 +00:00
|
|
|
|
/// Равенство expr = xN.
|
2015-02-18 12:11:16 +00:00
|
|
|
|
auto literal = typeid_cast<ASTLiteral *>(&*(equals_expression_list->children[1]));
|
2015-02-18 09:43:36 +00:00
|
|
|
|
if (literal != nullptr)
|
|
|
|
|
{
|
2015-02-19 11:08:49 +00:00
|
|
|
|
auto expr_lhs = equals_expression_list->children[0]->getTreeID();
|
2015-02-18 14:55:11 +00:00
|
|
|
|
OrWithExpression or_with_expression(function, expr_lhs);
|
2015-02-19 14:55:47 +00:00
|
|
|
|
disjunctive_equality_chains_map[or_with_expression].functions.push_back(equals);
|
2015-02-18 13:53:19 +00:00
|
|
|
|
found_chain = true;
|
2015-02-18 09:43:36 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-18 22:13:54 +00:00
|
|
|
|
if (found_chain)
|
|
|
|
|
{
|
|
|
|
|
if (from_node != nullptr)
|
|
|
|
|
{
|
|
|
|
|
auto res = or_parent_map.insert(std::make_pair(function, ParentNodes{from_node}));
|
|
|
|
|
if (!res.second)
|
|
|
|
|
throw Exception("Parent node information is corrupted", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2015-02-18 09:43:36 +00:00
|
|
|
|
for (auto & child : to_node->children)
|
2015-02-18 22:13:54 +00:00
|
|
|
|
{
|
2015-02-18 09:43:36 +00:00
|
|
|
|
if (typeid_cast<ASTSelectQuery *>(&*child) == nullptr)
|
2015-02-18 22:13:54 +00:00
|
|
|
|
{
|
2015-02-18 23:15:20 +00:00
|
|
|
|
if (!child->is_visited)
|
2015-02-18 22:13:54 +00:00
|
|
|
|
to_visit.push_back(Edge(to_node, &*child));
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/// Если узел является функцией OR, обновляем информацию про его родителей.
|
|
|
|
|
auto it = or_parent_map.find(&*child);
|
|
|
|
|
if (it != or_parent_map.end())
|
|
|
|
|
{
|
|
|
|
|
auto & parent_nodes = it->second;
|
|
|
|
|
parent_nodes.push_back(to_node);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-02-18 09:43:36 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-02-18 23:15:20 +00:00
|
|
|
|
select_query->clearVisited();
|
|
|
|
|
|
2015-02-18 18:18:37 +00:00
|
|
|
|
for (auto & chain : disjunctive_equality_chains_map)
|
2015-02-18 09:43:36 +00:00
|
|
|
|
{
|
2015-02-18 12:11:16 +00:00
|
|
|
|
auto & equalities = chain.second;
|
2015-02-19 14:55:47 +00:00
|
|
|
|
auto & equality_functions = equalities.functions;
|
|
|
|
|
std::sort(equality_functions.begin(), equality_functions.end());
|
2015-02-18 09:43:36 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-20 14:22:33 +00:00
|
|
|
|
namespace
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
ASTs & getFunctionOperands(ASTFunction * or_function)
|
|
|
|
|
{
|
|
|
|
|
auto expression_list = static_cast<ASTExpressionList *>(&*(or_function->children[0]));
|
|
|
|
|
return expression_list->children;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-18 13:53:19 +00:00
|
|
|
|
bool LogicalExpressionsOptimizer::mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const
|
2015-02-18 09:43:36 +00:00
|
|
|
|
{
|
2015-02-19 14:55:47 +00:00
|
|
|
|
const auto & equalities = chain.second;
|
|
|
|
|
const auto & equality_functions = equalities.functions;
|
2015-02-18 09:43:36 +00:00
|
|
|
|
|
2015-02-18 13:53:19 +00:00
|
|
|
|
/// Исключаем слишком короткие цепочки.
|
2015-02-25 14:23:15 +00:00
|
|
|
|
if (equality_functions.size() < settings.optimize_min_equality_disjunction_chain_length)
|
2015-02-18 09:43:36 +00:00
|
|
|
|
return false;
|
|
|
|
|
|
2015-02-18 13:53:19 +00:00
|
|
|
|
/// Проверяем, что правые части всех равенств имеют один и тот же тип.
|
2015-02-20 14:22:33 +00:00
|
|
|
|
auto & first_operands = getFunctionOperands(equality_functions[0]);
|
|
|
|
|
auto first_literal = static_cast<ASTLiteral *>(&*first_operands[1]);
|
2015-02-20 14:27:05 +00:00
|
|
|
|
for (size_t i = 1; i < equality_functions.size(); ++i)
|
2015-02-18 09:43:36 +00:00
|
|
|
|
{
|
2015-02-20 14:27:05 +00:00
|
|
|
|
auto & operands = getFunctionOperands(equality_functions[i]);
|
2015-02-20 14:22:33 +00:00
|
|
|
|
auto literal = static_cast<ASTLiteral *>(&*operands[1]);
|
2015-02-18 09:43:36 +00:00
|
|
|
|
|
|
|
|
|
if (literal->type != first_literal->type)
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-19 14:55:47 +00:00
|
|
|
|
void LogicalExpressionsOptimizer::addInExpression(const DisjunctiveEqualityChain & chain)
|
2015-02-18 14:55:11 +00:00
|
|
|
|
{
|
|
|
|
|
using ASTFunctionPtr = Poco::SharedPtr<ASTFunction>;
|
|
|
|
|
|
|
|
|
|
const auto & or_with_expression = chain.first;
|
|
|
|
|
const auto & equalities = chain.second;
|
2015-02-19 14:55:47 +00:00
|
|
|
|
const auto & equality_functions = equalities.functions;
|
2015-02-18 14:55:11 +00:00
|
|
|
|
|
2015-02-18 16:54:42 +00:00
|
|
|
|
/// 1. Создать новое выражение IN на основе информации из OR-цепочки.
|
2015-02-18 14:55:11 +00:00
|
|
|
|
|
2015-02-18 16:54:42 +00:00
|
|
|
|
/// Построить список литералов x1, ..., xN из цепочки expr = x1 OR ... OR expr = xN
|
2015-02-18 09:43:36 +00:00
|
|
|
|
ASTPtr value_list = new ASTExpressionList;
|
2015-02-20 10:37:55 +00:00
|
|
|
|
for (const auto function : equality_functions)
|
2015-02-18 09:43:36 +00:00
|
|
|
|
{
|
2015-02-20 14:22:33 +00:00
|
|
|
|
const auto & operands = getFunctionOperands(function);
|
|
|
|
|
value_list->children.push_back(operands[1]);
|
2015-02-18 09:43:36 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-02-18 16:54:42 +00:00
|
|
|
|
/// Получить выражение expr из цепочки expr = x1 OR ... OR expr = xN
|
|
|
|
|
ASTPtr equals_expr_lhs;
|
|
|
|
|
{
|
2015-02-19 14:55:47 +00:00
|
|
|
|
auto function = equality_functions[0];
|
2015-02-20 14:22:33 +00:00
|
|
|
|
const auto & operands = getFunctionOperands(function);
|
|
|
|
|
equals_expr_lhs = operands[0];
|
2015-02-18 16:54:42 +00:00
|
|
|
|
}
|
2015-02-18 09:43:36 +00:00
|
|
|
|
|
|
|
|
|
ASTFunctionPtr tuple_function = new ASTFunction;
|
|
|
|
|
tuple_function->name = "tuple";
|
|
|
|
|
tuple_function->arguments = value_list;
|
|
|
|
|
tuple_function->children.push_back(tuple_function->arguments);
|
|
|
|
|
|
2015-02-18 16:54:42 +00:00
|
|
|
|
ASTPtr expression_list = new ASTExpressionList;
|
|
|
|
|
expression_list->children.push_back(equals_expr_lhs);
|
|
|
|
|
expression_list->children.push_back(tuple_function);
|
2015-02-18 09:43:36 +00:00
|
|
|
|
|
2015-02-18 16:54:42 +00:00
|
|
|
|
/// Построить выражение expr IN (x1, ..., xN)
|
2015-02-18 09:43:36 +00:00
|
|
|
|
ASTFunctionPtr in_function = new ASTFunction;
|
|
|
|
|
in_function->name = "in";
|
2015-02-18 16:54:42 +00:00
|
|
|
|
in_function->arguments = expression_list;
|
2015-02-18 09:43:36 +00:00
|
|
|
|
in_function->children.push_back(in_function->arguments);
|
|
|
|
|
|
2015-02-19 14:55:47 +00:00
|
|
|
|
/// 2. Вставить новое выражение IN.
|
2015-02-18 13:16:59 +00:00
|
|
|
|
|
2015-02-20 14:22:33 +00:00
|
|
|
|
auto & operands = getFunctionOperands(or_with_expression.or_function);
|
2015-02-18 14:55:11 +00:00
|
|
|
|
operands.push_back(in_function);
|
2015-02-19 14:55:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void LogicalExpressionsOptimizer::cleanupOrExpressions()
|
|
|
|
|
{
|
2015-02-20 10:53:26 +00:00
|
|
|
|
/// Сохраняет для каждой оптимизированной OR-цепочки итератор на первый элемент
|
|
|
|
|
/// списка операндов, которые надо удалить.
|
2015-02-19 14:55:47 +00:00
|
|
|
|
std::unordered_map<ASTFunction *, ASTs::iterator> garbage_map;
|
|
|
|
|
|
|
|
|
|
/// Инициализация.
|
2015-02-20 10:53:26 +00:00
|
|
|
|
garbage_map.reserve(processed_count);
|
2015-02-19 14:55:47 +00:00
|
|
|
|
for (const auto & chain : disjunctive_equality_chains_map)
|
|
|
|
|
{
|
|
|
|
|
const auto & equalities = chain.second;
|
|
|
|
|
if (!chain.second.is_processed)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
const auto & or_with_expression = chain.first;
|
2015-02-20 14:22:33 +00:00
|
|
|
|
auto & operands = getFunctionOperands(or_with_expression.or_function);
|
2015-02-19 14:55:47 +00:00
|
|
|
|
garbage_map.insert(std::make_pair(or_with_expression.or_function, operands.end()));
|
|
|
|
|
}
|
2015-02-18 11:57:44 +00:00
|
|
|
|
|
2015-02-19 14:55:47 +00:00
|
|
|
|
/// Собрать мусор.
|
|
|
|
|
for (const auto & chain : disjunctive_equality_chains_map)
|
2015-02-18 12:11:16 +00:00
|
|
|
|
{
|
2015-02-19 14:55:47 +00:00
|
|
|
|
const auto & equalities = chain.second;
|
|
|
|
|
if (!equalities.is_processed)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
const auto & or_with_expression = chain.first;
|
2015-02-20 14:22:33 +00:00
|
|
|
|
auto & operands = getFunctionOperands(or_with_expression.or_function);
|
2015-02-19 14:55:47 +00:00
|
|
|
|
const auto & equality_functions = equalities.functions;
|
|
|
|
|
|
|
|
|
|
auto it = garbage_map.find(or_with_expression.or_function);
|
|
|
|
|
if (it == garbage_map.end())
|
|
|
|
|
throw Exception("Garbage map is corrupted", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
2015-02-20 10:53:26 +00:00
|
|
|
|
auto & first_erased = it->second;
|
|
|
|
|
first_erased = std::remove_if(operands.begin(), first_erased, [&](const ASTPtr & operand)
|
2015-02-19 14:55:47 +00:00
|
|
|
|
{
|
|
|
|
|
return std::binary_search(equality_functions.begin(), equality_functions.end(), &*operand);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Удалить мусор.
|
|
|
|
|
for (const auto & entry : garbage_map)
|
|
|
|
|
{
|
2015-02-20 10:37:55 +00:00
|
|
|
|
auto function = entry.first;
|
2015-02-20 10:53:26 +00:00
|
|
|
|
auto first_erased = entry.second;
|
2015-02-20 10:37:55 +00:00
|
|
|
|
|
2015-02-20 14:22:33 +00:00
|
|
|
|
auto & operands = getFunctionOperands(function);
|
2015-02-20 10:53:26 +00:00
|
|
|
|
operands.erase(first_erased, operands.end());
|
2015-02-19 14:55:47 +00:00
|
|
|
|
}
|
2015-02-18 11:57:44 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-02-18 09:43:36 +00:00
|
|
|
|
void LogicalExpressionsOptimizer::fixBrokenOrExpressions()
|
|
|
|
|
{
|
2015-02-18 18:18:37 +00:00
|
|
|
|
for (const auto & chain : disjunctive_equality_chains_map)
|
2015-02-18 09:43:36 +00:00
|
|
|
|
{
|
2015-02-20 10:37:55 +00:00
|
|
|
|
const auto & equalities = chain.second;
|
|
|
|
|
if (!equalities.is_processed)
|
|
|
|
|
continue;
|
|
|
|
|
|
2015-02-18 14:55:11 +00:00
|
|
|
|
const auto & or_with_expression = chain.first;
|
|
|
|
|
auto or_function = or_with_expression.or_function;
|
2015-02-20 14:22:33 +00:00
|
|
|
|
auto & operands = getFunctionOperands(or_with_expression.or_function);
|
2015-02-18 09:43:36 +00:00
|
|
|
|
|
2015-02-18 13:16:59 +00:00
|
|
|
|
if (operands.size() == 1)
|
2015-02-18 09:43:36 +00:00
|
|
|
|
{
|
2015-02-18 14:55:11 +00:00
|
|
|
|
auto it = or_parent_map.find(or_function);
|
|
|
|
|
if (it == or_parent_map.end())
|
2015-02-18 13:16:59 +00:00
|
|
|
|
throw Exception("Parent node information is corrupted", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
auto & parents = it->second;
|
|
|
|
|
|
2015-02-18 09:43:36 +00:00
|
|
|
|
for (auto & parent : parents)
|
|
|
|
|
{
|
2015-02-18 13:16:59 +00:00
|
|
|
|
parent->children.push_back(operands[0]);
|
2015-02-20 10:53:26 +00:00
|
|
|
|
auto first_erased = std::remove(parent->children.begin(), parent->children.end(), or_function);
|
|
|
|
|
parent->children.erase(first_erased, parent->children.end());
|
2015-02-18 09:43:36 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Если узел OR был корнем выражения WHERE, PREWHERE или HAVING, то следует обновить этот корень.
|
2015-02-18 13:16:59 +00:00
|
|
|
|
/// Из-за того, что имеем дело с направленным ациклическим графом, надо проверить все случаи.
|
2015-02-18 16:54:42 +00:00
|
|
|
|
if (!select_query->where_expression.isNull() && (or_function == &*(select_query->where_expression)))
|
2015-02-18 13:16:59 +00:00
|
|
|
|
select_query->where_expression = operands[0];
|
2015-02-18 16:54:42 +00:00
|
|
|
|
if (!select_query->prewhere_expression.isNull() && (or_function == &*(select_query->prewhere_expression)))
|
2015-02-18 13:16:59 +00:00
|
|
|
|
select_query->prewhere_expression = operands[0];
|
2015-02-18 16:54:42 +00:00
|
|
|
|
if (!select_query->having_expression.isNull() && (or_function == &*(select_query->having_expression)))
|
2015-02-18 13:16:59 +00:00
|
|
|
|
select_query->having_expression = operands[0];
|
2015-02-18 09:43:36 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|