ClickHouse/src/Interpreters/TreeOptimizer.cpp

824 lines
28 KiB
C++
Raw Normal View History

#include <Core/Settings.h>
#include <Interpreters/TreeOptimizer.h>
2021-05-21 18:48:19 +00:00
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/OptimizeIfChains.h>
#include <Interpreters/OptimizeIfWithConstantConditionVisitor.h>
2021-01-04 20:55:32 +00:00
#include <Interpreters/WhereConstraintsOptimizer.h>
2021-11-10 17:57:59 +00:00
#include <Interpreters/SubstituteColumnOptimizer.h>
2021-01-04 20:55:32 +00:00
#include <Interpreters/TreeCNFConverter.h>
#include <Interpreters/ArithmeticOperationsInAgrFuncOptimize.h>
#include <Interpreters/DuplicateOrderByVisitor.h>
#include <Interpreters/GroupByFunctionKeysVisitor.h>
#include <Interpreters/AggregateFunctionOfGroupByKeysVisitor.h>
#include <Interpreters/RewriteAnyFunctionVisitor.h>
#include <Interpreters/RemoveInjectiveFunctionsVisitor.h>
#include <Interpreters/FunctionMaskingArgumentCheckVisitor.h>
#include <Interpreters/RedundantFunctionsInOrderByVisitor.h>
2021-02-07 07:22:51 +00:00
#include <Interpreters/RewriteCountVariantsVisitor.h>
#include <Interpreters/MonotonicityCheckVisitor.h>
#include <Interpreters/ConvertStringsToEnumVisitor.h>
#include <Interpreters/ConvertFunctionOrLikeVisitor.h>
2021-05-21 18:48:19 +00:00
#include <Interpreters/RewriteFunctionToSubcolumnVisitor.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/GatherFunctionQuantileVisitor.h>
2023-02-09 04:06:42 +00:00
#include <Interpreters/RewriteSumIfFunctionVisitor.h>
#include <Interpreters/RewriteArrayExistsFunctionVisitor.h>
#include <Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Functions/FunctionFactory.h>
#include <Functions/UserDefined/UserDefinedExecutableFunctionFactory.h>
2021-05-21 18:48:19 +00:00
#include <Storages/IStorage.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
2021-09-11 22:41:37 +00:00
extern const int UNKNOWN_TYPE_OF_AST_NODE;
}
namespace
{
const std::unordered_set<String> possibly_injective_function_names
{
"dictGet",
"dictGetString",
"dictGetUInt8",
"dictGetUInt16",
"dictGetUInt32",
"dictGetUInt64",
"dictGetInt8",
"dictGetInt16",
"dictGetInt32",
"dictGetInt64",
"dictGetFloat32",
"dictGetFloat64",
"dictGetDate",
"dictGetDateTime"
};
/** You can not completely remove GROUP BY. Because if there were no aggregate functions, then it turns out that there will be no aggregation.
* Instead, leave `GROUP BY const`.
* Next, see deleting the constants in the analyzeAggregation method.
*/
2021-10-22 12:22:16 +00:00
void appendUnusedGroupByColumn(ASTSelectQuery * select_query)
{
/// You must insert a constant that is not the name of the column in the table. Such a case is rare, but it happens.
2021-10-22 12:22:16 +00:00
/// Also start unused_column integer must not intersect with ([1, source_columns.size()])
/// might be in positional GROUP BY.
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::make_shared<ASTExpressionList>());
select_query->groupBy()->children.emplace_back(std::make_shared<ASTLiteral>(static_cast<Int64>(-1)));
}
/// Eliminates injective function calls and constant expressions from group by statement.
2021-10-22 12:22:16 +00:00
void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context)
{
const FunctionFactory & function_factory = FunctionFactory::instance();
if (!select_query->groupBy())
return;
const auto is_literal = [] (const ASTPtr & ast) -> bool
{
return ast->as<ASTLiteral>();
};
auto & group_exprs = select_query->groupBy()->children;
/// removes expression at index idx by making it last one and calling .pop_back()
const auto remove_expr_at_index = [&group_exprs] (const size_t idx)
{
if (idx < group_exprs.size() - 1)
std::swap(group_exprs[idx], group_exprs.back());
group_exprs.pop_back();
};
const auto & settings = context->getSettingsRef();
/// iterate over each GROUP BY expression, eliminate injective function calls and literals
for (size_t i = 0; i < group_exprs.size();)
{
if (const auto * function = group_exprs[i]->as<ASTFunction>())
{
/// assert function is injective
if (possibly_injective_function_names.contains(function->name))
{
/// do not handle semantic errors here
if (function->arguments->children.size() < 2)
{
++i;
continue;
}
const auto * dict_name_ast = function->arguments->children[0]->as<ASTLiteral>();
const auto * attr_name_ast = function->arguments->children[1]->as<ASTLiteral>();
if (!dict_name_ast || !attr_name_ast)
{
++i;
continue;
}
const auto & dict_name = dict_name_ast->value.safeGet<String>();
const auto & attr_name = attr_name_ast->value.safeGet<String>();
const auto & dict_ptr = context->getExternalDictionariesLoader().getDictionary(dict_name, context);
if (!dict_ptr->isInjective(attr_name))
{
++i;
continue;
}
}
else
{
FunctionOverloadResolverPtr function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(function->name, context);
if (!function_builder)
function_builder = function_factory.get(function->name, context);
2022-04-21 17:10:11 +00:00
if (!function_builder->isInjective({}))
{
++i;
continue;
}
}
2022-09-05 01:50:24 +00:00
/// don't optimize functions that shadow any of it's arguments, e.g.:
/// SELECT toString(dummy) as dummy FROM system.one GROUP BY dummy;
if (!function->alias.empty())
{
FunctionMaskingArgumentCheckVisitor::Data data{.alias=function->alias};
FunctionMaskingArgumentCheckVisitor(data).visit(function->arguments);
if (data.is_rejected)
{
++i;
continue;
}
}
/// copy shared pointer to args in order to ensure lifetime
auto args_ast = function->arguments;
/** remove function call and take a step back to ensure
* next iteration does not skip not yet processed data
*/
remove_expr_at_index(i);
/// copy non-literal arguments
std::remove_copy_if(
std::begin(args_ast->children), std::end(args_ast->children),
std::back_inserter(group_exprs), is_literal
);
}
2021-08-19 15:47:26 +00:00
else if (is_literal(group_exprs[i]))
{
2021-08-19 15:47:26 +00:00
bool keep_position = false;
if (settings.enable_positional_arguments)
{
2021-08-20 09:08:39 +00:00
const auto & value = group_exprs[i]->as<ASTLiteral>()->value;
2021-08-19 15:47:26 +00:00
if (value.getType() == Field::Types::UInt64)
{
auto pos = value.get<UInt64>();
2021-11-15 12:09:04 +00:00
if (pos > 0 && pos <= select_query->select()->children.size())
2021-08-19 15:47:26 +00:00
keep_position = true;
}
}
if (keep_position)
++i;
else
remove_expr_at_index(i);
}
else
{
/// if neither a function nor literal - advance to next expression
++i;
}
}
if (group_exprs.empty())
2021-10-22 12:22:16 +00:00
appendUnusedGroupByColumn(select_query);
}
struct GroupByKeysInfo
{
NameSet key_names; ///set of keys' short names
bool has_function = false;
};
GroupByKeysInfo getGroupByKeysInfo(const ASTs & group_by_keys)
{
GroupByKeysInfo data;
/// filling set with short names of keys
2020-11-13 16:30:10 +00:00
for (const auto & group_key : group_by_keys)
{
2021-07-28 07:02:10 +00:00
/// for grouping sets case
if (group_key->as<ASTExpressionList>())
{
const auto express_list_ast = group_key->as<const ASTExpressionList &>();
for (const auto & group_elem : express_list_ast.children)
{
data.key_names.insert(group_elem->getColumnName());
}
}
else
{
if (group_key->as<ASTFunction>())
data.has_function = true;
2021-07-28 07:02:10 +00:00
data.key_names.insert(group_key->getColumnName());
}
}
return data;
}
/// Eliminates min/max/any-aggregators of functions of GROUP BY keys
void optimizeAggregateFunctionsOfGroupByKeys(ASTSelectQuery * select_query, ASTPtr & node)
{
if (!select_query->groupBy())
return;
2020-11-13 16:30:10 +00:00
const auto & group_by_keys = select_query->groupBy()->children;
GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_by_keys);
SelectAggregateFunctionOfGroupByKeysVisitor::Data visitor_data{group_by_keys_data.key_names};
SelectAggregateFunctionOfGroupByKeysVisitor(visitor_data).visit(node);
}
/// Remove duplicate items from ORDER BY.
void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query)
{
if (!select_query->orderBy())
return;
/// Make unique sorting conditions.
using NameAndLocale = std::pair<String, String>;
std::set<NameAndLocale> elems_set;
ASTs & elems = select_query->orderBy()->children;
ASTs unique_elems;
unique_elems.reserve(elems.size());
for (const auto & elem : elems)
{
String name = elem->children.front()->getColumnName();
const auto & order_by_elem = elem->as<ASTOrderByElement &>();
2021-09-11 23:55:53 +00:00
if (order_by_elem.with_fill /// Always keep elements WITH FILL as they affects other.
|| elems_set.emplace(name, order_by_elem.collation ? order_by_elem.collation->getColumnName() : "").second)
unique_elems.emplace_back(elem);
}
if (unique_elems.size() < elems.size())
elems = std::move(unique_elems);
}
2020-08-20 20:50:53 +00:00
/// Return simple subselect (without UNIONs or JOINs or SETTINGS) if any
const ASTSelectQuery * getSimpleSubselect(const ASTSelectQuery & select)
{
if (!select.tables())
return nullptr;
const auto & tables = select.tables()->children;
if (tables.empty() || tables.size() != 1)
return nullptr;
const auto & ast_table_expression = tables[0]->as<ASTTablesInSelectQueryElement>()->table_expression;
if (!ast_table_expression)
return nullptr;
const auto & table_expression = ast_table_expression->as<ASTTableExpression>();
if (!table_expression->subquery)
return nullptr;
const auto & subquery = table_expression->subquery->as<ASTSubquery>();
if (!subquery || subquery->children.size() != 1)
return nullptr;
const auto & subselect_union = subquery->children[0]->as<ASTSelectWithUnionQuery>();
if (!subselect_union || !subselect_union->list_of_selects ||
subselect_union->list_of_selects->children.size() != 1)
return nullptr;
2020-08-20 20:50:53 +00:00
const auto & subselect = subselect_union->list_of_selects->children[0]->as<ASTSelectQuery>();
if (subselect && subselect->settings())
return nullptr;
return subselect;
}
std::unordered_set<String> getDistinctNames(const ASTSelectQuery & select)
{
if (!select.select() || select.select()->children.empty())
return {};
std::unordered_set<String> names;
std::unordered_set<String> implicit_distinct;
if (!select.distinct)
{
/// SELECT a, b FROM (SELECT DISTINCT a FROM ...)
if (const ASTSelectQuery * subselect = getSimpleSubselect(select))
implicit_distinct = getDistinctNames(*subselect);
if (implicit_distinct.empty())
return {};
}
/// Extract result column names (prefer aliases, ignore table name)
for (const auto & id : select.select()->children)
{
String alias = id->tryGetAlias();
if (const auto * identifier = id->as<ASTIdentifier>())
{
2020-08-20 22:05:06 +00:00
const String & name = identifier->shortName();
if (select.distinct || implicit_distinct.contains(name))
{
if (alias.empty())
names.insert(name);
else
names.insert(alias);
}
}
else if (select.distinct && !alias.empty())
{
/// It's not possible to use getAliasOrColumnName() cause name is context specific (function arguments)
names.insert(alias);
}
}
/// SELECT a FROM (SELECT DISTINCT a, b FROM ...)
2020-08-20 19:04:46 +00:00
if (!select.distinct && names.size() != implicit_distinct.size())
return {};
return names;
}
/// Replace monotonous functions in ORDER BY if they don't participate in GROUP BY expression,
/// has a single argument and not an aggregate functions.
2021-06-01 12:20:52 +00:00
void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, ContextPtr context,
const TablesWithColumns & tables_with_columns,
const TreeRewriterResult & result)
{
auto order_by = select_query->orderBy();
if (!order_by)
return;
/// Do not apply optimization for Distributed and Merge storages,
2022-09-02 08:54:48 +00:00
/// because we can't get the sorting key of their underlying tables
/// and we can break the matching of the sorting key for `read_in_order`
/// optimization by removing monotonous functions from the prefix of key.
if (result.is_remote_storage || (result.storage && result.storage->getName() == "Merge"))
return;
2021-09-11 22:41:37 +00:00
for (const auto & child : order_by->children)
{
auto * order_by_element = child->as<ASTOrderByElement>();
if (!order_by_element || order_by_element->children.empty())
throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE, "Bad ORDER BY expression AST");
2021-09-11 22:41:37 +00:00
if (order_by_element->with_fill)
return;
}
std::unordered_set<String> group_by_hashes;
if (auto group_by = select_query->groupBy())
{
if (select_query->group_by_with_grouping_sets)
{
for (auto & set : group_by->children)
{
for (auto & elem : set->children)
{
auto hash = elem->getTreeHash();
String key = toString(hash.first) + '_' + toString(hash.second);
group_by_hashes.insert(key);
}
}
}
else
{
for (auto & elem : group_by->children)
{
auto hash = elem->getTreeHash();
String key = toString(hash.first) + '_' + toString(hash.second);
group_by_hashes.insert(key);
}
}
}
auto sorting_key_columns = result.storage_snapshot ? result.storage_snapshot->metadata->getSortingKeyColumns() : Names{};
bool is_sorting_key_prefix = true;
for (size_t i = 0; i < order_by->children.size(); ++i)
{
auto * order_by_element = order_by->children[i]->as<ASTOrderByElement>();
2021-09-11 22:41:37 +00:00
auto & ast_func = order_by_element->children[0];
if (!ast_func->as<ASTFunction>())
continue;
if (i >= sorting_key_columns.size() || ast_func->getColumnName() != sorting_key_columns[i])
is_sorting_key_prefix = false;
/// If order by expression matches the sorting key, do not remove
/// functions to allow execute reading in order of key.
if (is_sorting_key_prefix)
continue;
MonotonicityCheckVisitor::Data data{tables_with_columns, context, group_by_hashes};
MonotonicityCheckVisitor(data).visit(ast_func);
if (!data.isRejected())
{
ast_func = data.identifier->clone();
ast_func->setAlias("");
if (!data.monotonicity.is_positive)
order_by_element->direction *= -1;
}
}
}
2020-08-08 01:01:47 +00:00
/// If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x.
/// Optimize ORDER BY x, y, f(x), g(x, y), f(h(x)), t(f(x), g(x)) into ORDER BY x, y
/// in case if f(), g(), h(), t() are deterministic (in scope of query).
/// Don't optimize ORDER BY f(x), g(x), x even if f(x) is bijection for x or g(x).
2021-06-01 12:20:52 +00:00
void optimizeRedundantFunctionsInOrderBy(const ASTSelectQuery * select_query, ContextPtr context)
{
const auto & order_by = select_query->orderBy();
if (!order_by)
return;
2021-09-11 22:41:37 +00:00
for (const auto & child : order_by->children)
{
auto * order_by_element = child->as<ASTOrderByElement>();
if (!order_by_element || order_by_element->children.empty())
throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE, "Bad ORDER BY expression AST");
2021-09-11 22:41:37 +00:00
if (order_by_element->with_fill)
return;
}
std::unordered_set<String> prev_keys;
ASTs modified;
modified.reserve(order_by->children.size());
for (auto & order_by_element : order_by->children)
{
/// Order by contains ASTOrderByElement as children and meaning item only as a grand child.
ASTPtr & name_or_function = order_by_element->children[0];
if (name_or_function->as<ASTFunction>())
{
if (!prev_keys.empty())
{
RedundantFunctionsInOrderByVisitor::Data data{prev_keys, context};
RedundantFunctionsInOrderByVisitor(data).visit(name_or_function);
if (data.redundant)
continue;
}
}
/// @note Leave duplicate keys unchanged. They would be removed in optimizeDuplicatesInOrderBy()
if (auto * identifier = name_or_function->as<ASTIdentifier>())
prev_keys.emplace(getIdentifierName(identifier));
modified.push_back(order_by_element);
}
if (modified.size() < order_by->children.size())
order_by->children = std::move(modified);
}
/// Remove duplicate items from LIMIT BY.
void optimizeLimitBy(const ASTSelectQuery * select_query)
{
if (!select_query->limitBy())
return;
std::set<String> elems_set;
ASTs & elems = select_query->limitBy()->children;
ASTs unique_elems;
unique_elems.reserve(elems.size());
for (const auto & elem : elems)
{
if (elems_set.emplace(elem->getColumnName()).second)
unique_elems.emplace_back(elem);
}
if (unique_elems.size() < elems.size())
elems = std::move(unique_elems);
}
2021-01-04 20:55:32 +00:00
/// Use constraints to get rid of useless parts of query
2021-05-04 19:18:37 +00:00
void optimizeWithConstraints(ASTSelectQuery * select_query,
Aliases & /*aliases*/,
const NameSet & /*source_columns_set*/,
const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns*/,
const StorageMetadataPtr & metadata_snapshot,
const bool optimize_append_index)
2021-01-03 10:24:09 +00:00
{
2021-05-04 19:18:37 +00:00
WhereConstraintsOptimizer(select_query, metadata_snapshot, optimize_append_index).perform();
2021-01-04 20:55:32 +00:00
}
2021-01-03 10:24:09 +00:00
2021-05-04 19:18:37 +00:00
void optimizeSubstituteColumn(ASTSelectQuery * select_query,
Aliases & /*aliases*/,
const NameSet & /*source_columns_set*/,
2021-05-04 18:43:58 +00:00
const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns*/,
2021-04-24 15:24:32 +00:00
const StorageMetadataPtr & metadata_snapshot,
const ConstStoragePtr & storage)
{
2021-05-04 18:43:58 +00:00
SubstituteColumnOptimizer(select_query, metadata_snapshot, storage).perform();
2021-04-24 15:24:32 +00:00
}
2021-11-10 17:57:59 +00:00
/// Transform WHERE to CNF for more convenient optimization.
2021-11-18 14:24:06 +00:00
bool convertQueryToCNF(ASTSelectQuery * select_query)
2021-01-04 20:55:32 +00:00
{
if (select_query->where())
2021-01-03 10:24:09 +00:00
{
2021-11-18 14:24:06 +00:00
auto cnf_form = TreeCNFConverter::tryConvertToCNF(select_query->where());
if (!cnf_form)
return false;
2022-09-05 01:50:24 +00:00
cnf_form->pushNotInFunctions();
2021-11-18 14:24:06 +00:00
select_query->refWhere() = TreeCNFConverter::fromCNF(*cnf_form);
return true;
2021-01-03 10:24:09 +00:00
}
2021-11-18 14:24:06 +00:00
return false;
2021-01-03 10:24:09 +00:00
}
/// Remove duplicated columns from USING(...).
void optimizeUsing(const ASTSelectQuery * select_query)
{
if (!select_query->join())
return;
const auto * table_join = select_query->join()->table_join->as<ASTTableJoin>();
if (!(table_join && table_join->using_expression_list))
return;
ASTs & expression_list = table_join->using_expression_list->children;
ASTs uniq_expressions_list;
std::set<String> expressions_names;
for (const auto & expression : expression_list)
{
auto expression_name = expression->getAliasOrColumnName();
if (expressions_names.find(expression_name) == expressions_names.end())
{
uniq_expressions_list.push_back(expression);
expressions_names.insert(expression_name);
}
}
if (uniq_expressions_list.size() < expression_list.size())
expression_list = uniq_expressions_list;
}
void optimizeAggregationFunctions(ASTPtr & query)
{
/// Move arithmetic operations out of aggregation functions
ArithmeticOperationsInAgrFuncVisitor::Data data;
ArithmeticOperationsInAgrFuncVisitor(data).visit(query);
}
void optimizeAnyFunctions(ASTPtr & query)
{
RewriteAnyFunctionVisitor::Data data = {};
RewriteAnyFunctionVisitor(data).visit(query);
}
void optimizeSumIfFunctions(ASTPtr & query)
{
RewriteSumIfFunctionVisitor::Data data = {};
RewriteSumIfFunctionVisitor(data).visit(query);
}
2023-02-09 04:06:42 +00:00
void optimizeArrayExistsFunctions(ASTPtr & query)
{
RewriteArrayExistsFunctionVisitor::Data data = {};
RewriteArrayExistsFunctionVisitor(data).visit(query);
}
void optimizeMultiIfToIf(ASTPtr & query)
{
OptimizeMultiIfToIfVisitor::Data data;
OptimizeMultiIfToIfVisitor(data).visit(query);
}
2021-06-01 12:20:52 +00:00
void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, ContextPtr context)
{
RemoveInjectiveFunctionsVisitor::Data data(context);
RemoveInjectiveFunctionsVisitor(data).visit(query);
}
void optimizeDateFilters(ASTSelectQuery * select_query, const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns, ContextPtr context)
{
/// Predicates in HAVING clause has been moved to WHERE clause.
if (select_query->where())
{
OptimizeDateOrDateTimeConverterWithPreimageVisitor::Data data{tables_with_columns, context};
OptimizeDateOrDateTimeConverterWithPreimageVisitor(data).visit(select_query->refWhere());
}
if (select_query->prewhere())
{
OptimizeDateOrDateTimeConverterWithPreimageVisitor::Data data{tables_with_columns, context};
OptimizeDateOrDateTimeConverterWithPreimageVisitor(data).visit(select_query->refPrewhere());
}
}
void transformIfStringsIntoEnum(ASTPtr & query)
{
std::unordered_set<String> function_names = {"if", "transform"};
std::unordered_set<String> used_as_argument;
FindUsedFunctionsVisitor::Data used_data{function_names, used_as_argument};
FindUsedFunctionsVisitor(used_data).visit(query);
ConvertStringsToEnumVisitor::Data convert_data{used_as_argument};
ConvertStringsToEnumVisitor(convert_data).visit(query);
}
2021-05-21 23:22:22 +00:00
void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & metadata_snapshot)
2021-05-21 18:48:19 +00:00
{
2021-05-21 23:22:22 +00:00
RewriteFunctionToSubcolumnVisitor::Data data{metadata_snapshot};
2021-05-21 18:48:19 +00:00
RewriteFunctionToSubcolumnVisitor(data).visit(query);
}
void optimizeOrLikeChain(ASTPtr & query)
{
ConvertFunctionOrLikeVisitor::Data data = {};
ConvertFunctionOrLikeVisitor(data).visit(query);
}
}
void TreeOptimizer::optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_to_multiif, bool multiif_to_if)
{
if (multiif_to_if)
optimizeMultiIfToIf(query);
/// Optimize if with constant condition after constants was substituted instead of scalar subqueries.
OptimizeIfWithConstantConditionVisitor(aliases).visit(query);
if (if_chain_to_multiif)
OptimizeIfChainsVisitor().visit(query);
}
void TreeOptimizer::optimizeCountConstantAndSumOne(ASTPtr & query, ContextPtr context)
{
RewriteCountVariantsVisitor(context).visit(query);
}
2022-11-29 17:22:24 +00:00
///eliminate functions of other GROUP BY keys
void TreeOptimizer::optimizeGroupByFunctionKeys(ASTSelectQuery * select_query)
{
if (!select_query->groupBy())
return;
auto group_by = select_query->groupBy();
const auto & group_by_keys = group_by->children;
ASTs modified; ///result
GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_by_keys);
if (!group_by_keys_data.has_function)
return;
GroupByFunctionKeysVisitor::Data visitor_data{group_by_keys_data.key_names};
GroupByFunctionKeysVisitor(visitor_data).visit(group_by);
modified.reserve(group_by_keys.size());
/// filling the result
for (const auto & group_key : group_by_keys)
if (group_by_keys_data.key_names.contains(group_key->getColumnName()))
modified.push_back(group_key);
/// modifying the input
group_by->children = modified;
}
2021-05-21 18:48:19 +00:00
void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns, ContextPtr context)
{
const auto & settings = context->getSettingsRef();
auto * select_query = query->as<ASTSelectQuery>();
if (!select_query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not select asts.");
if (settings.optimize_functions_to_subcolumns && result.storage_snapshot && result.storage->supportsSubcolumns())
optimizeFunctionsToSubcolumns(query, result.storage_snapshot->metadata);
2021-05-21 18:48:19 +00:00
/// Move arithmetic operations out of aggregation functions
if (settings.optimize_arithmetic_operations_in_aggregate_functions)
optimizeAggregationFunctions(query);
2021-11-18 14:24:06 +00:00
bool converted_to_cnf = false;
2021-01-04 20:55:32 +00:00
if (settings.convert_query_to_cnf)
2021-11-18 14:24:06 +00:00
converted_to_cnf = convertQueryToCNF(select_query);
2021-01-04 20:55:32 +00:00
if (converted_to_cnf && settings.optimize_using_constraints && result.storage_snapshot)
2021-04-24 15:24:32 +00:00
{
optimizeWithConstraints(select_query, result.aliases, result.source_columns_set,
tables_with_columns, result.storage_snapshot->metadata, settings.optimize_append_index);
2021-05-04 19:18:37 +00:00
if (settings.optimize_substitute_columns)
optimizeSubstituteColumn(select_query, result.aliases, result.source_columns_set,
tables_with_columns, result.storage_snapshot->metadata, result.storage);
2021-04-24 15:24:32 +00:00
}
2021-02-14 14:47:15 +00:00
/// Rewrite date filters to avoid the calls of converters such as toYear, toYYYYMM, etc.
optimizeDateFilters(select_query, tables_with_columns, context);
/// GROUP BY injective function elimination.
2021-10-22 12:22:16 +00:00
optimizeGroupBy(select_query, context);
/// GROUP BY functions of other keys elimination.
if (settings.optimize_group_by_function_keys)
optimizeGroupByFunctionKeys(select_query);
/// Move all operations out of any function
if (settings.optimize_move_functions_out_of_any)
optimizeAnyFunctions(query);
2021-02-07 07:22:51 +00:00
if (settings.optimize_normalize_count_variants)
optimizeCountConstantAndSumOne(query, context);
2021-02-07 07:22:51 +00:00
if (settings.optimize_rewrite_sum_if_to_count_if)
optimizeSumIfFunctions(query);
2023-02-09 07:15:08 +00:00
if (settings.optimize_rewrite_array_exists_to_has)
2023-02-09 04:06:42 +00:00
optimizeArrayExistsFunctions(query);
/// Remove injective functions inside uniq
if (settings.optimize_injective_functions_inside_uniq)
optimizeInjectiveFunctionsInsideUniq(query, context);
/// Eliminate min/max/any aggregators of functions of GROUP BY keys
if (settings.optimize_aggregators_of_group_by_keys
&& !select_query->group_by_with_totals
&& !select_query->group_by_with_rollup
&& !select_query->group_by_with_cube)
optimizeAggregateFunctionsOfGroupByKeys(select_query, query);
/// Remove functions from ORDER BY if its argument is also in ORDER BY
if (settings.optimize_redundant_functions_in_order_by)
optimizeRedundantFunctionsInOrderBy(select_query, context);
/// Replace monotonous functions with its argument
if (settings.optimize_monotonous_functions_in_order_by)
optimizeMonotonousFunctionsInOrderBy(select_query, context, tables_with_columns, result);
/// Remove duplicate items from ORDER BY.
/// Execute it after all order by optimizations,
/// because they can produce duplicated columns.
optimizeDuplicatesInOrderBy(select_query);
/// If function "if" has String-type arguments, transform them into enum
if (settings.optimize_if_transform_strings_to_enum)
transformIfStringsIntoEnum(query);
/// Remove duplicated elements from LIMIT BY clause.
optimizeLimitBy(select_query);
/// Remove duplicated columns from USING(...).
optimizeUsing(select_query);
if (settings.optimize_or_like_chain
&& settings.allow_hyperscan
&& settings.max_hyperscan_regexp_length == 0
&& settings.max_hyperscan_regexp_total_length == 0)
{
optimizeOrLikeChain(query);
}
}
}