2018-03-04 16:15:31 +00:00
|
|
|
#include <Common/typeid_cast.h>
|
|
|
|
#include <Storages/IStorage.h>
|
|
|
|
#include <Interpreters/PredicateExpressionsOptimizer.h>
|
|
|
|
#include <Interpreters/InterpreterSelectQuery.h>
|
|
|
|
#include <Parsers/ASTTablesInSelectQuery.h>
|
|
|
|
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
|
|
|
#include <Parsers/ASTSubquery.h>
|
|
|
|
#include <Parsers/queryToString.h>
|
|
|
|
#include <iostream>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
static constexpr auto and_function_name = "and";
|
|
|
|
|
|
|
|
PredicateExpressionsOptimizer::PredicateExpressionsOptimizer(
|
2018-08-14 21:49:51 +00:00
|
|
|
ASTSelectQuery * ast_select_, const Settings & settings_)
|
|
|
|
: ast_select(ast_select_), settings(settings_)
|
2018-03-04 16:15:31 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
bool PredicateExpressionsOptimizer::optimize()
|
|
|
|
{
|
|
|
|
if (!settings.enable_optimize_predicate_expression || !ast_select || !ast_select->tables)
|
|
|
|
return false;
|
|
|
|
|
2018-08-20 15:49:39 +00:00
|
|
|
SubqueriesProjectionColumns all_subquery_projection_columns;
|
2018-03-04 16:15:31 +00:00
|
|
|
getAllSubqueryProjectionColumns(ast_select->tables.get(), all_subquery_projection_columns);
|
|
|
|
|
2018-08-20 15:49:39 +00:00
|
|
|
bool is_rewrite_subqueries = false;
|
2018-03-04 16:15:31 +00:00
|
|
|
if (!all_subquery_projection_columns.empty())
|
|
|
|
{
|
2018-08-20 15:49:39 +00:00
|
|
|
is_rewrite_subqueries |= optimizeImpl(ast_select->where_expression, all_subquery_projection_columns, false);
|
|
|
|
is_rewrite_subqueries |= optimizeImpl(ast_select->prewhere_expression, all_subquery_projection_columns, true);
|
2018-03-04 16:15:31 +00:00
|
|
|
}
|
2018-08-20 15:49:39 +00:00
|
|
|
return is_rewrite_subqueries;
|
2018-03-04 16:15:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool PredicateExpressionsOptimizer::optimizeImpl(
|
2018-08-20 15:49:39 +00:00
|
|
|
ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, bool is_prewhere)
|
2018-03-04 16:15:31 +00:00
|
|
|
{
|
|
|
|
/// split predicate with `and`
|
|
|
|
PredicateExpressions outer_predicate_expressions = splitConjunctionPredicate(outer_expression);
|
|
|
|
|
|
|
|
bool is_rewrite_subquery = false;
|
|
|
|
for (const auto & outer_predicate : outer_predicate_expressions)
|
|
|
|
{
|
|
|
|
ASTs outer_predicate_dependent;
|
|
|
|
getExpressionDependentColumns(outer_predicate, outer_predicate_dependent);
|
|
|
|
|
|
|
|
/// TODO: remove origin expression
|
2018-08-20 15:49:39 +00:00
|
|
|
for (const auto & subquery_projection_columns : subqueries_projection_columns)
|
2018-03-04 16:15:31 +00:00
|
|
|
{
|
|
|
|
auto subquery = static_cast<ASTSelectQuery *>(subquery_projection_columns.first);
|
|
|
|
const ProjectionsWithAliases projection_columns = subquery_projection_columns.second;
|
|
|
|
|
|
|
|
OptimizeKind optimize_kind = OptimizeKind::NONE;
|
|
|
|
if (!cannotPushDownOuterPredicate(projection_columns, subquery, outer_predicate_dependent, is_prewhere, optimize_kind))
|
|
|
|
{
|
|
|
|
ASTPtr inner_predicate;
|
|
|
|
cloneOuterPredicateForInnerPredicate(outer_predicate, projection_columns, outer_predicate_dependent, inner_predicate);
|
|
|
|
|
|
|
|
switch(optimize_kind)
|
|
|
|
{
|
|
|
|
case OptimizeKind::NONE: continue;
|
|
|
|
case OptimizeKind::PUSH_TO_WHERE: is_rewrite_subquery |= optimizeExpression(inner_predicate, subquery->where_expression, subquery); continue;
|
|
|
|
case OptimizeKind::PUSH_TO_HAVING: is_rewrite_subquery |= optimizeExpression(inner_predicate, subquery->having_expression, subquery); continue;
|
|
|
|
case OptimizeKind::PUSH_TO_PREWHERE: is_rewrite_subquery |= optimizeExpression(inner_predicate, subquery->prewhere_expression, subquery); continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return is_rewrite_subquery;
|
|
|
|
}
|
|
|
|
|
|
|
|
PredicateExpressions PredicateExpressionsOptimizer::splitConjunctionPredicate(ASTPtr & predicate_expression)
|
|
|
|
{
|
|
|
|
PredicateExpressions predicate_expressions;
|
|
|
|
|
|
|
|
if (predicate_expression)
|
|
|
|
{
|
|
|
|
predicate_expressions.emplace_back(predicate_expression);
|
|
|
|
|
|
|
|
auto remove_expression_at_index = [&predicate_expressions] (const size_t index)
|
|
|
|
{
|
|
|
|
if (index < predicate_expressions.size() - 1)
|
|
|
|
std::swap(predicate_expressions[index], predicate_expressions.back());
|
|
|
|
predicate_expressions.pop_back();
|
|
|
|
};
|
|
|
|
|
|
|
|
for (size_t idx = 0; idx < predicate_expressions.size();)
|
|
|
|
{
|
|
|
|
const auto expression = predicate_expressions.at(idx);
|
|
|
|
|
|
|
|
if (const auto function = typeid_cast<ASTFunction *>(expression.get()))
|
|
|
|
{
|
|
|
|
if (function->name == and_function_name)
|
|
|
|
{
|
|
|
|
for (auto & child : function->arguments->children)
|
|
|
|
predicate_expressions.emplace_back(child);
|
|
|
|
|
|
|
|
remove_expression_at_index(idx);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
idx++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return predicate_expressions;
|
|
|
|
}
|
|
|
|
|
|
|
|
void PredicateExpressionsOptimizer::getExpressionDependentColumns(const ASTPtr & expression, ASTs & expression_dependent_columns)
|
|
|
|
{
|
|
|
|
if (!typeid_cast<ASTIdentifier *>(expression.get()))
|
|
|
|
{
|
|
|
|
for (const auto & child : expression->children)
|
|
|
|
getExpressionDependentColumns(child, expression_dependent_columns);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
expression_dependent_columns.emplace_back(expression);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool PredicateExpressionsOptimizer::cannotPushDownOuterPredicate(
|
|
|
|
const ProjectionsWithAliases & subquery_projection_columns, ASTSelectQuery * subquery,
|
|
|
|
ASTs & expression_dependent_columns, bool & is_prewhere, OptimizeKind & optimize_kind)
|
|
|
|
{
|
|
|
|
if (subquery->final() || subquery->limit_by_expression_list || subquery->limit_offset || subquery->with_expression_list)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
for (auto & dependent_column : expression_dependent_columns)
|
|
|
|
{
|
|
|
|
bool is_found = false;
|
|
|
|
String dependent_column_name = dependent_column->getAliasOrColumnName();
|
|
|
|
|
|
|
|
for (auto projection_column : subquery_projection_columns)
|
|
|
|
{
|
|
|
|
if (projection_column.second == dependent_column_name)
|
|
|
|
{
|
|
|
|
is_found = true;
|
|
|
|
optimize_kind = isAggregateFunction(projection_column.first) ? OptimizeKind::PUSH_TO_HAVING : optimize_kind;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!is_found)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (optimize_kind == OptimizeKind::NONE)
|
|
|
|
optimize_kind = is_prewhere ? OptimizeKind::PUSH_TO_PREWHERE : OptimizeKind::PUSH_TO_WHERE;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool PredicateExpressionsOptimizer::isAggregateFunction(ASTPtr & node)
|
|
|
|
{
|
|
|
|
if (auto function = typeid_cast<ASTFunction *>(node.get()))
|
|
|
|
{
|
|
|
|
if (AggregateFunctionFactory::instance().isAggregateFunctionName(function->name))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto & child : node->children)
|
|
|
|
if (isAggregateFunction(child))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-08-20 15:49:39 +00:00
|
|
|
void PredicateExpressionsOptimizer::getAllSubqueryProjectionColumns(IAST * node, SubqueriesProjectionColumns & all_subquery_projection_columns)
|
2018-03-04 16:15:31 +00:00
|
|
|
{
|
|
|
|
if (auto ast_subquery = typeid_cast<ASTSubquery *>(node))
|
|
|
|
{
|
|
|
|
ASTs output_projection;
|
|
|
|
IAST * subquery = ast_subquery->children.at(0).get();
|
|
|
|
getSubqueryProjectionColumns(subquery, all_subquery_projection_columns, output_projection);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto & child : node->children)
|
|
|
|
getAllSubqueryProjectionColumns(child.get(), all_subquery_projection_columns);
|
|
|
|
}
|
|
|
|
|
|
|
|
void PredicateExpressionsOptimizer::cloneOuterPredicateForInnerPredicate(
|
|
|
|
const ASTPtr & outer_predicate, const ProjectionsWithAliases & projection_columns, ASTs & predicate_dependent_columns,
|
|
|
|
ASTPtr & inner_predicate)
|
|
|
|
{
|
2018-08-14 21:48:39 +00:00
|
|
|
inner_predicate = outer_predicate->clone();
|
2018-03-04 16:15:31 +00:00
|
|
|
|
|
|
|
ASTs new_expression_require_columns;
|
|
|
|
new_expression_require_columns.reserve(predicate_dependent_columns.size());
|
|
|
|
getExpressionDependentColumns(inner_predicate, new_expression_require_columns);
|
|
|
|
|
|
|
|
for (auto & expression : new_expression_require_columns)
|
|
|
|
{
|
|
|
|
if (auto identifier = typeid_cast<ASTIdentifier *>(expression.get()))
|
|
|
|
{
|
|
|
|
for (auto projection : projection_columns)
|
|
|
|
{
|
|
|
|
if (identifier->name == projection.second)
|
|
|
|
identifier->name = projection.first->getAliasOrColumnName();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool PredicateExpressionsOptimizer::optimizeExpression(const ASTPtr & outer_expression, ASTPtr & subquery_expression, ASTSelectQuery * subquery)
|
|
|
|
{
|
|
|
|
ASTPtr new_subquery_expression = subquery_expression;
|
|
|
|
new_subquery_expression = new_subquery_expression ? makeASTFunction(and_function_name, outer_expression, subquery_expression) : outer_expression;
|
|
|
|
|
|
|
|
if (!subquery_expression)
|
|
|
|
subquery->children.emplace_back(new_subquery_expression);
|
|
|
|
else
|
|
|
|
for (auto & child : subquery->children)
|
|
|
|
if (child == subquery_expression)
|
|
|
|
child = new_subquery_expression;
|
|
|
|
|
|
|
|
subquery_expression = std::move(new_subquery_expression);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-08-20 15:49:39 +00:00
|
|
|
void PredicateExpressionsOptimizer::getSubqueryProjectionColumns(IAST * subquery, SubqueriesProjectionColumns & all_subquery_projection_columns, ASTs & output_projections)
|
2018-03-04 16:15:31 +00:00
|
|
|
{
|
|
|
|
if (auto * with_union_subquery = typeid_cast<ASTSelectWithUnionQuery *>(subquery))
|
|
|
|
for (auto & select : with_union_subquery->list_of_selects->children)
|
|
|
|
getSubqueryProjectionColumns(select.get(), all_subquery_projection_columns, output_projections);
|
|
|
|
|
|
|
|
|
|
|
|
if (auto * without_union_subquery = typeid_cast<ASTSelectQuery *>(subquery))
|
|
|
|
{
|
|
|
|
const auto expression_list = without_union_subquery->select_expression_list->children;
|
|
|
|
|
|
|
|
/// use first projection as the output projection
|
|
|
|
if (output_projections.empty())
|
|
|
|
output_projections = expression_list;
|
|
|
|
|
|
|
|
if (output_projections.size() != expression_list.size())
|
|
|
|
throw Exception("Number of columns doesn't match", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH);
|
|
|
|
|
|
|
|
ProjectionsWithAliases subquery_projections;
|
|
|
|
subquery_projections.reserve(expression_list.size());
|
|
|
|
|
|
|
|
for (size_t idx = 0; idx < expression_list.size(); idx++)
|
|
|
|
subquery_projections.emplace_back(std::pair(expression_list.at(idx), output_projections.at(idx)->getAliasOrColumnName()));
|
|
|
|
|
|
|
|
all_subquery_projection_columns.insert(std::pair(subquery, subquery_projections));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|