ClickHouse/src/Interpreters/TreeOptimizer.cpp
Zhiguo Zhou 773a5bbbaa Optimize predicate with toYear converter
The date converters, such as toYear, are widely used in the where
clauses of the SQL queries, however, these conversions are often
expensive due to the complexity of the calendar system.

The function preimage is found an optimization for the predicates
with the converters. Given a predicate, toYear(c) = y, we could
convert it to its equivalent form: c >= b AND c <= e, where b is
"y-01-01" and e is "y-12-31". The similar transformation applies
to other comparisons (<>, <, >, <=, <=).

This commit implemented the above transformation at the AST level
by adding a new pass in the TreeOptimizer and a new AST visitor
for in-place replacing the predicates of toYear with the converted
ones.
2023-05-25 09:11:51 +08:00

877 lines
30 KiB
C++

#include <Core/Settings.h>
#include <Interpreters/TreeOptimizer.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/OptimizeIfChains.h>
#include <Interpreters/OptimizeIfWithConstantConditionVisitor.h>
#include <Interpreters/WhereConstraintsOptimizer.h>
#include <Interpreters/SubstituteColumnOptimizer.h>
#include <Interpreters/TreeCNFConverter.h>
#include <Interpreters/ArithmeticOperationsInAgrFuncOptimize.h>
#include <Interpreters/DuplicateOrderByVisitor.h>
#include <Interpreters/GroupByFunctionKeysVisitor.h>
#include <Interpreters/AggregateFunctionOfGroupByKeysVisitor.h>
#include <Interpreters/RewriteAnyFunctionVisitor.h>
#include <Interpreters/RemoveInjectiveFunctionsVisitor.h>
#include <Interpreters/FunctionMaskingArgumentCheckVisitor.h>
#include <Interpreters/RedundantFunctionsInOrderByVisitor.h>
#include <Interpreters/RewriteCountVariantsVisitor.h>
#include <Interpreters/MonotonicityCheckVisitor.h>
#include <Interpreters/ConvertStringsToEnumVisitor.h>
#include <Interpreters/ConvertFunctionOrLikeVisitor.h>
#include <Interpreters/RewriteFunctionToSubcolumnVisitor.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/GatherFunctionQuantileVisitor.h>
#include <Interpreters/RewriteSumIfFunctionVisitor.h>
#include <Interpreters/RewriteArrayExistsFunctionVisitor.h>
#include <Interpreters/OptimizeDateFilterVisitor.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Functions/FunctionFactory.h>
#include <Functions/UserDefined/UserDefinedExecutableFunctionFactory.h>
#include <Storages/IStorage.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int UNKNOWN_TYPE_OF_AST_NODE;
}
namespace
{
const std::unordered_set<String> possibly_injective_function_names
{
"dictGet",
"dictGetString",
"dictGetUInt8",
"dictGetUInt16",
"dictGetUInt32",
"dictGetUInt64",
"dictGetInt8",
"dictGetInt16",
"dictGetInt32",
"dictGetInt64",
"dictGetFloat32",
"dictGetFloat64",
"dictGetDate",
"dictGetDateTime"
};
/** You can not completely remove GROUP BY. Because if there were no aggregate functions, then it turns out that there will be no aggregation.
* Instead, leave `GROUP BY const`.
* Next, see deleting the constants in the analyzeAggregation method.
*/
void appendUnusedGroupByColumn(ASTSelectQuery * select_query)
{
/// You must insert a constant that is not the name of the column in the table. Such a case is rare, but it happens.
/// Also start unused_column integer must not intersect with ([1, source_columns.size()])
/// might be in positional GROUP BY.
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::make_shared<ASTExpressionList>());
select_query->groupBy()->children.emplace_back(std::make_shared<ASTLiteral>(static_cast<Int64>(-1)));
}
/// Eliminates injective function calls and constant expressions from group by statement.
void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context)
{
const FunctionFactory & function_factory = FunctionFactory::instance();
if (!select_query->groupBy())
return;
const auto is_literal = [] (const ASTPtr & ast) -> bool
{
return ast->as<ASTLiteral>();
};
auto & group_exprs = select_query->groupBy()->children;
/// removes expression at index idx by making it last one and calling .pop_back()
const auto remove_expr_at_index = [&group_exprs] (const size_t idx)
{
if (idx < group_exprs.size() - 1)
std::swap(group_exprs[idx], group_exprs.back());
group_exprs.pop_back();
};
const auto & settings = context->getSettingsRef();
/// iterate over each GROUP BY expression, eliminate injective function calls and literals
for (size_t i = 0; i < group_exprs.size();)
{
if (const auto * function = group_exprs[i]->as<ASTFunction>())
{
/// assert function is injective
if (possibly_injective_function_names.contains(function->name))
{
/// do not handle semantic errors here
if (function->arguments->children.size() < 2)
{
++i;
continue;
}
const auto * dict_name_ast = function->arguments->children[0]->as<ASTLiteral>();
const auto * attr_name_ast = function->arguments->children[1]->as<ASTLiteral>();
if (!dict_name_ast || !attr_name_ast)
{
++i;
continue;
}
const auto & dict_name = dict_name_ast->value.safeGet<String>();
const auto & attr_name = attr_name_ast->value.safeGet<String>();
const auto & dict_ptr = context->getExternalDictionariesLoader().getDictionary(dict_name, context);
if (!dict_ptr->isInjective(attr_name))
{
++i;
continue;
}
}
else
{
FunctionOverloadResolverPtr function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(function->name, context);
if (!function_builder)
function_builder = function_factory.get(function->name, context);
if (!function_builder->isInjective({}))
{
++i;
continue;
}
}
/// don't optimize functions that shadow any of it's arguments, e.g.:
/// SELECT toString(dummy) as dummy FROM system.one GROUP BY dummy;
if (!function->alias.empty())
{
FunctionMaskingArgumentCheckVisitor::Data data{.alias=function->alias};
FunctionMaskingArgumentCheckVisitor(data).visit(function->arguments);
if (data.is_rejected)
{
++i;
continue;
}
}
/// copy shared pointer to args in order to ensure lifetime
auto args_ast = function->arguments;
/** remove function call and take a step back to ensure
* next iteration does not skip not yet processed data
*/
remove_expr_at_index(i);
/// copy non-literal arguments
std::remove_copy_if(
std::begin(args_ast->children), std::end(args_ast->children),
std::back_inserter(group_exprs), is_literal
);
}
else if (is_literal(group_exprs[i]))
{
bool keep_position = false;
if (settings.enable_positional_arguments)
{
const auto & value = group_exprs[i]->as<ASTLiteral>()->value;
if (value.getType() == Field::Types::UInt64)
{
auto pos = value.get<UInt64>();
if (pos > 0 && pos <= select_query->select()->children.size())
keep_position = true;
}
}
if (keep_position)
++i;
else
remove_expr_at_index(i);
}
else
{
/// if neither a function nor literal - advance to next expression
++i;
}
}
if (group_exprs.empty())
appendUnusedGroupByColumn(select_query);
}
struct GroupByKeysInfo
{
NameSet key_names; ///set of keys' short names
bool has_function = false;
};
GroupByKeysInfo getGroupByKeysInfo(const ASTs & group_by_keys)
{
GroupByKeysInfo data;
/// filling set with short names of keys
for (const auto & group_key : group_by_keys)
{
/// for grouping sets case
if (group_key->as<ASTExpressionList>())
{
const auto express_list_ast = group_key->as<const ASTExpressionList &>();
for (const auto & group_elem : express_list_ast.children)
{
data.key_names.insert(group_elem->getColumnName());
}
}
else
{
if (group_key->as<ASTFunction>())
data.has_function = true;
data.key_names.insert(group_key->getColumnName());
}
}
return data;
}
/// Eliminates min/max/any-aggregators of functions of GROUP BY keys
void optimizeAggregateFunctionsOfGroupByKeys(ASTSelectQuery * select_query, ASTPtr & node)
{
if (!select_query->groupBy())
return;
const auto & group_by_keys = select_query->groupBy()->children;
GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_by_keys);
SelectAggregateFunctionOfGroupByKeysVisitor::Data visitor_data{group_by_keys_data.key_names};
SelectAggregateFunctionOfGroupByKeysVisitor(visitor_data).visit(node);
}
/// Remove duplicate items from ORDER BY.
void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query)
{
if (!select_query->orderBy())
return;
/// Make unique sorting conditions.
using NameAndLocale = std::pair<String, String>;
std::set<NameAndLocale> elems_set;
ASTs & elems = select_query->orderBy()->children;
ASTs unique_elems;
unique_elems.reserve(elems.size());
for (const auto & elem : elems)
{
String name = elem->children.front()->getColumnName();
const auto & order_by_elem = elem->as<ASTOrderByElement &>();
if (order_by_elem.with_fill /// Always keep elements WITH FILL as they affects other.
|| elems_set.emplace(name, order_by_elem.collation ? order_by_elem.collation->getColumnName() : "").second)
unique_elems.emplace_back(elem);
}
if (unique_elems.size() < elems.size())
elems = std::move(unique_elems);
}
/// Optimize duplicate ORDER BY
void optimizeDuplicateOrderBy(ASTPtr & query, ContextPtr context)
{
DuplicateOrderByVisitor::Data order_by_data{context};
DuplicateOrderByVisitor(order_by_data).visit(query);
}
/// Return simple subselect (without UNIONs or JOINs or SETTINGS) if any
const ASTSelectQuery * getSimpleSubselect(const ASTSelectQuery & select)
{
if (!select.tables())
return nullptr;
const auto & tables = select.tables()->children;
if (tables.empty() || tables.size() != 1)
return nullptr;
const auto & ast_table_expression = tables[0]->as<ASTTablesInSelectQueryElement>()->table_expression;
if (!ast_table_expression)
return nullptr;
const auto & table_expression = ast_table_expression->as<ASTTableExpression>();
if (!table_expression->subquery)
return nullptr;
const auto & subquery = table_expression->subquery->as<ASTSubquery>();
if (!subquery || subquery->children.size() != 1)
return nullptr;
const auto & subselect_union = subquery->children[0]->as<ASTSelectWithUnionQuery>();
if (!subselect_union || !subselect_union->list_of_selects ||
subselect_union->list_of_selects->children.size() != 1)
return nullptr;
const auto & subselect = subselect_union->list_of_selects->children[0]->as<ASTSelectQuery>();
if (subselect && subselect->settings())
return nullptr;
return subselect;
}
std::unordered_set<String> getDistinctNames(const ASTSelectQuery & select)
{
if (!select.select() || select.select()->children.empty())
return {};
std::unordered_set<String> names;
std::unordered_set<String> implicit_distinct;
if (!select.distinct)
{
/// SELECT a, b FROM (SELECT DISTINCT a FROM ...)
if (const ASTSelectQuery * subselect = getSimpleSubselect(select))
implicit_distinct = getDistinctNames(*subselect);
if (implicit_distinct.empty())
return {};
}
/// Extract result column names (prefer aliases, ignore table name)
for (const auto & id : select.select()->children)
{
String alias = id->tryGetAlias();
if (const auto * identifier = id->as<ASTIdentifier>())
{
const String & name = identifier->shortName();
if (select.distinct || implicit_distinct.contains(name))
{
if (alias.empty())
names.insert(name);
else
names.insert(alias);
}
}
else if (select.distinct && !alias.empty())
{
/// It's not possible to use getAliasOrColumnName() cause name is context specific (function arguments)
names.insert(alias);
}
}
/// SELECT a FROM (SELECT DISTINCT a, b FROM ...)
if (!select.distinct && names.size() != implicit_distinct.size())
return {};
return names;
}
/// Remove DISTINCT from query if columns are known as DISTINCT from subquery
void optimizeDuplicateDistinct(ASTSelectQuery & select)
{
if (!select.select() || select.select()->children.empty())
return;
const ASTSelectQuery * subselect = getSimpleSubselect(select);
if (!subselect)
return;
std::unordered_set<String> distinct_names = getDistinctNames(*subselect);
std::unordered_set<std::string_view> selected_names;
/// Check source column names from select list (ignore aliases and table names)
for (const auto & id : select.select()->children)
{
const auto * identifier = id->as<ASTIdentifier>();
if (!identifier)
return;
const String & name = identifier->shortName();
if (!distinct_names.contains(name))
return; /// Not a distinct column, keep DISTINCT for it.
selected_names.emplace(name);
}
/// select columns list != distinct columns list
/// SELECT DISTINCT a FROM (SELECT DISTINCT a, b FROM ...)) -- cannot remove DISTINCT
if (selected_names.size() != distinct_names.size())
return;
select.distinct = false;
}
/// Replace monotonous functions in ORDER BY if they don't participate in GROUP BY expression,
/// has a single argument and not an aggregate functions.
void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, ContextPtr context,
const TablesWithColumns & tables_with_columns,
const TreeRewriterResult & result)
{
auto order_by = select_query->orderBy();
if (!order_by)
return;
/// Do not apply optimization for Distributed and Merge storages,
/// because we can't get the sorting key of their underlying tables
/// and we can break the matching of the sorting key for `read_in_order`
/// optimization by removing monotonous functions from the prefix of key.
if (result.is_remote_storage || (result.storage && result.storage->getName() == "Merge"))
return;
for (const auto & child : order_by->children)
{
auto * order_by_element = child->as<ASTOrderByElement>();
if (!order_by_element || order_by_element->children.empty())
throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE, "Bad ORDER BY expression AST");
if (order_by_element->with_fill)
return;
}
std::unordered_set<String> group_by_hashes;
if (auto group_by = select_query->groupBy())
{
if (select_query->group_by_with_grouping_sets)
{
for (auto & set : group_by->children)
{
for (auto & elem : set->children)
{
auto hash = elem->getTreeHash();
String key = toString(hash.first) + '_' + toString(hash.second);
group_by_hashes.insert(key);
}
}
}
else
{
for (auto & elem : group_by->children)
{
auto hash = elem->getTreeHash();
String key = toString(hash.first) + '_' + toString(hash.second);
group_by_hashes.insert(key);
}
}
}
auto sorting_key_columns = result.storage_snapshot ? result.storage_snapshot->metadata->getSortingKeyColumns() : Names{};
bool is_sorting_key_prefix = true;
for (size_t i = 0; i < order_by->children.size(); ++i)
{
auto * order_by_element = order_by->children[i]->as<ASTOrderByElement>();
auto & ast_func = order_by_element->children[0];
if (!ast_func->as<ASTFunction>())
continue;
if (i >= sorting_key_columns.size() || ast_func->getColumnName() != sorting_key_columns[i])
is_sorting_key_prefix = false;
/// If order by expression matches the sorting key, do not remove
/// functions to allow execute reading in order of key.
if (is_sorting_key_prefix)
continue;
MonotonicityCheckVisitor::Data data{tables_with_columns, context, group_by_hashes};
MonotonicityCheckVisitor(data).visit(ast_func);
if (!data.isRejected())
{
ast_func = data.identifier->clone();
ast_func->setAlias("");
if (!data.monotonicity.is_positive)
order_by_element->direction *= -1;
}
}
}
/// If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x.
/// Optimize ORDER BY x, y, f(x), g(x, y), f(h(x)), t(f(x), g(x)) into ORDER BY x, y
/// in case if f(), g(), h(), t() are deterministic (in scope of query).
/// Don't optimize ORDER BY f(x), g(x), x even if f(x) is bijection for x or g(x).
void optimizeRedundantFunctionsInOrderBy(const ASTSelectQuery * select_query, ContextPtr context)
{
const auto & order_by = select_query->orderBy();
if (!order_by)
return;
for (const auto & child : order_by->children)
{
auto * order_by_element = child->as<ASTOrderByElement>();
if (!order_by_element || order_by_element->children.empty())
throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE, "Bad ORDER BY expression AST");
if (order_by_element->with_fill)
return;
}
std::unordered_set<String> prev_keys;
ASTs modified;
modified.reserve(order_by->children.size());
for (auto & order_by_element : order_by->children)
{
/// Order by contains ASTOrderByElement as children and meaning item only as a grand child.
ASTPtr & name_or_function = order_by_element->children[0];
if (name_or_function->as<ASTFunction>())
{
if (!prev_keys.empty())
{
RedundantFunctionsInOrderByVisitor::Data data{prev_keys, context};
RedundantFunctionsInOrderByVisitor(data).visit(name_or_function);
if (data.redundant)
continue;
}
}
/// @note Leave duplicate keys unchanged. They would be removed in optimizeDuplicatesInOrderBy()
if (auto * identifier = name_or_function->as<ASTIdentifier>())
prev_keys.emplace(getIdentifierName(identifier));
modified.push_back(order_by_element);
}
if (modified.size() < order_by->children.size())
order_by->children = std::move(modified);
}
/// Remove duplicate items from LIMIT BY.
void optimizeLimitBy(const ASTSelectQuery * select_query)
{
if (!select_query->limitBy())
return;
std::set<String> elems_set;
ASTs & elems = select_query->limitBy()->children;
ASTs unique_elems;
unique_elems.reserve(elems.size());
for (const auto & elem : elems)
{
if (elems_set.emplace(elem->getColumnName()).second)
unique_elems.emplace_back(elem);
}
if (unique_elems.size() < elems.size())
elems = std::move(unique_elems);
}
/// Use constraints to get rid of useless parts of query
void optimizeWithConstraints(ASTSelectQuery * select_query,
Aliases & /*aliases*/,
const NameSet & /*source_columns_set*/,
const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns*/,
const StorageMetadataPtr & metadata_snapshot,
const bool optimize_append_index)
{
WhereConstraintsOptimizer(select_query, metadata_snapshot, optimize_append_index).perform();
}
void optimizeSubstituteColumn(ASTSelectQuery * select_query,
Aliases & /*aliases*/,
const NameSet & /*source_columns_set*/,
const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns*/,
const StorageMetadataPtr & metadata_snapshot,
const ConstStoragePtr & storage)
{
SubstituteColumnOptimizer(select_query, metadata_snapshot, storage).perform();
}
/// Transform WHERE to CNF for more convenient optimization.
bool convertQueryToCNF(ASTSelectQuery * select_query)
{
if (select_query->where())
{
auto cnf_form = TreeCNFConverter::tryConvertToCNF(select_query->where());
if (!cnf_form)
return false;
cnf_form->pushNotInFunctions();
select_query->refWhere() = TreeCNFConverter::fromCNF(*cnf_form);
return true;
}
return false;
}
/// Remove duplicated columns from USING(...).
void optimizeUsing(const ASTSelectQuery * select_query)
{
if (!select_query->join())
return;
const auto * table_join = select_query->join()->table_join->as<ASTTableJoin>();
if (!(table_join && table_join->using_expression_list))
return;
ASTs & expression_list = table_join->using_expression_list->children;
ASTs uniq_expressions_list;
std::set<String> expressions_names;
for (const auto & expression : expression_list)
{
auto expression_name = expression->getAliasOrColumnName();
if (expressions_names.find(expression_name) == expressions_names.end())
{
uniq_expressions_list.push_back(expression);
expressions_names.insert(expression_name);
}
}
if (uniq_expressions_list.size() < expression_list.size())
expression_list = uniq_expressions_list;
}
void optimizeAggregationFunctions(ASTPtr & query)
{
/// Move arithmetic operations out of aggregation functions
ArithmeticOperationsInAgrFuncVisitor::Data data;
ArithmeticOperationsInAgrFuncVisitor(data).visit(query);
}
void optimizeAnyFunctions(ASTPtr & query)
{
RewriteAnyFunctionVisitor::Data data = {};
RewriteAnyFunctionVisitor(data).visit(query);
}
void optimizeSumIfFunctions(ASTPtr & query)
{
RewriteSumIfFunctionVisitor::Data data = {};
RewriteSumIfFunctionVisitor(data).visit(query);
}
void optimizeArrayExistsFunctions(ASTPtr & query)
{
RewriteArrayExistsFunctionVisitor::Data data = {};
RewriteArrayExistsFunctionVisitor(data).visit(query);
}
void optimizeMultiIfToIf(ASTPtr & query)
{
OptimizeMultiIfToIfVisitor::Data data;
OptimizeMultiIfToIfVisitor(data).visit(query);
}
void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, ContextPtr context)
{
RemoveInjectiveFunctionsVisitor::Data data(context);
RemoveInjectiveFunctionsVisitor(data).visit(query);
}
void optimizeDateFilters(ASTSelectQuery * select_query)
{
/// Predicates in HAVING clause has been moved to WHERE clause.
if (select_query->where())
{
OptimizeDateFilterInPlaceVisitor::Data data;
OptimizeDateFilterInPlaceVisitor(data).visit(select_query->refWhere());
}
if (select_query->prewhere())
{
OptimizeDateFilterInPlaceVisitor::Data data;
OptimizeDateFilterInPlaceVisitor(data).visit(select_query->refPrewhere());
}
}
void transformIfStringsIntoEnum(ASTPtr & query)
{
std::unordered_set<String> function_names = {"if", "transform"};
std::unordered_set<String> used_as_argument;
FindUsedFunctionsVisitor::Data used_data{function_names, used_as_argument};
FindUsedFunctionsVisitor(used_data).visit(query);
ConvertStringsToEnumVisitor::Data convert_data{used_as_argument};
ConvertStringsToEnumVisitor(convert_data).visit(query);
}
void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & metadata_snapshot)
{
RewriteFunctionToSubcolumnVisitor::Data data{metadata_snapshot};
RewriteFunctionToSubcolumnVisitor(data).visit(query);
}
void optimizeOrLikeChain(ASTPtr & query)
{
ConvertFunctionOrLikeVisitor::Data data = {};
ConvertFunctionOrLikeVisitor(data).visit(query);
}
}
void TreeOptimizer::optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_to_multiif, bool multiif_to_if)
{
if (multiif_to_if)
optimizeMultiIfToIf(query);
/// Optimize if with constant condition after constants was substituted instead of scalar subqueries.
OptimizeIfWithConstantConditionVisitor(aliases).visit(query);
if (if_chain_to_multiif)
OptimizeIfChainsVisitor().visit(query);
}
void TreeOptimizer::optimizeCountConstantAndSumOne(ASTPtr & query, ContextPtr context)
{
RewriteCountVariantsVisitor(context).visit(query);
}
///eliminate functions of other GROUP BY keys
void TreeOptimizer::optimizeGroupByFunctionKeys(ASTSelectQuery * select_query)
{
if (!select_query->groupBy())
return;
auto group_by = select_query->groupBy();
const auto & group_by_keys = group_by->children;
ASTs modified; ///result
GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_by_keys);
if (!group_by_keys_data.has_function)
return;
GroupByFunctionKeysVisitor::Data visitor_data{group_by_keys_data.key_names};
GroupByFunctionKeysVisitor(visitor_data).visit(group_by);
modified.reserve(group_by_keys.size());
/// filling the result
for (const auto & group_key : group_by_keys)
if (group_by_keys_data.key_names.contains(group_key->getColumnName()))
modified.push_back(group_key);
/// modifying the input
group_by->children = modified;
}
void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns, ContextPtr context)
{
const auto & settings = context->getSettingsRef();
auto * select_query = query->as<ASTSelectQuery>();
if (!select_query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not select asts.");
if (settings.optimize_functions_to_subcolumns && result.storage_snapshot && result.storage->supportsSubcolumns())
optimizeFunctionsToSubcolumns(query, result.storage_snapshot->metadata);
/// Move arithmetic operations out of aggregation functions
if (settings.optimize_arithmetic_operations_in_aggregate_functions)
optimizeAggregationFunctions(query);
bool converted_to_cnf = false;
if (settings.convert_query_to_cnf)
converted_to_cnf = convertQueryToCNF(select_query);
if (converted_to_cnf && settings.optimize_using_constraints && result.storage_snapshot)
{
optimizeWithConstraints(select_query, result.aliases, result.source_columns_set,
tables_with_columns, result.storage_snapshot->metadata, settings.optimize_append_index);
if (settings.optimize_substitute_columns)
optimizeSubstituteColumn(select_query, result.aliases, result.source_columns_set,
tables_with_columns, result.storage_snapshot->metadata, result.storage);
}
/// Rewrite date filters to avoid the calls of converters such as toYear, toYYYYMM, toISOWeek, etc.
optimizeDateFilters(select_query);
/// GROUP BY injective function elimination.
optimizeGroupBy(select_query, context);
/// GROUP BY functions of other keys elimination.
if (settings.optimize_group_by_function_keys)
optimizeGroupByFunctionKeys(select_query);
/// Move all operations out of any function
if (settings.optimize_move_functions_out_of_any)
optimizeAnyFunctions(query);
if (settings.optimize_normalize_count_variants)
optimizeCountConstantAndSumOne(query, context);
if (settings.optimize_rewrite_sum_if_to_count_if)
optimizeSumIfFunctions(query);
if (settings.optimize_rewrite_array_exists_to_has)
optimizeArrayExistsFunctions(query);
/// Remove injective functions inside uniq
if (settings.optimize_injective_functions_inside_uniq)
optimizeInjectiveFunctionsInsideUniq(query, context);
/// Eliminate min/max/any aggregators of functions of GROUP BY keys
if (settings.optimize_aggregators_of_group_by_keys
&& !select_query->group_by_with_totals
&& !select_query->group_by_with_rollup
&& !select_query->group_by_with_cube)
optimizeAggregateFunctionsOfGroupByKeys(select_query, query);
/// Remove duplicate ORDER BY and DISTINCT from subqueries.
if (settings.optimize_duplicate_order_by_and_distinct)
{
optimizeDuplicateOrderBy(query, context);
/// DISTINCT has special meaning in Distributed query with enabled distributed_group_by_no_merge
/// TODO: disable Distributed/remote() tables only
if (!settings.distributed_group_by_no_merge)
optimizeDuplicateDistinct(*select_query);
}
/// Remove functions from ORDER BY if its argument is also in ORDER BY
if (settings.optimize_redundant_functions_in_order_by)
optimizeRedundantFunctionsInOrderBy(select_query, context);
/// Replace monotonous functions with its argument
if (settings.optimize_monotonous_functions_in_order_by)
optimizeMonotonousFunctionsInOrderBy(select_query, context, tables_with_columns, result);
/// Remove duplicate items from ORDER BY.
/// Execute it after all order by optimizations,
/// because they can produce duplicated columns.
optimizeDuplicatesInOrderBy(select_query);
/// If function "if" has String-type arguments, transform them into enum
if (settings.optimize_if_transform_strings_to_enum)
transformIfStringsIntoEnum(query);
/// Remove duplicated elements from LIMIT BY clause.
optimizeLimitBy(select_query);
/// Remove duplicated columns from USING(...).
optimizeUsing(select_query);
if (settings.optimize_or_like_chain
&& settings.allow_hyperscan
&& settings.max_hyperscan_regexp_length == 0
&& settings.max_hyperscan_regexp_total_length == 0)
{
optimizeOrLikeChain(query);
}
}
}