2020-07-22 17:13:05 +00:00
|
|
|
#include <Core/Settings.h>
|
|
|
|
|
|
|
|
#include <Interpreters/TreeOptimizer.h>
|
2021-05-21 18:48:19 +00:00
|
|
|
#include <Interpreters/TreeRewriter.h>
|
2020-07-22 17:13:05 +00:00
|
|
|
#include <Interpreters/OptimizeIfChains.h>
|
|
|
|
#include <Interpreters/OptimizeIfWithConstantConditionVisitor.h>
|
2021-01-04 20:55:32 +00:00
|
|
|
#include <Interpreters/WhereConstraintsOptimizer.h>
|
2021-11-10 17:57:59 +00:00
|
|
|
#include <Interpreters/SubstituteColumnOptimizer.h>
|
2021-01-04 20:55:32 +00:00
|
|
|
#include <Interpreters/TreeCNFConverter.h>
|
2020-07-22 17:13:05 +00:00
|
|
|
#include <Interpreters/ArithmeticOperationsInAgrFuncOptimize.h>
|
|
|
|
#include <Interpreters/DuplicateOrderByVisitor.h>
|
|
|
|
#include <Interpreters/GroupByFunctionKeysVisitor.h>
|
|
|
|
#include <Interpreters/AggregateFunctionOfGroupByKeysVisitor.h>
|
2020-07-23 15:15:22 +00:00
|
|
|
#include <Interpreters/RewriteAnyFunctionVisitor.h>
|
2020-07-22 17:13:05 +00:00
|
|
|
#include <Interpreters/RemoveInjectiveFunctionsVisitor.h>
|
2022-07-11 15:13:36 +00:00
|
|
|
#include <Interpreters/FunctionMaskingArgumentCheckVisitor.h>
|
2020-07-22 17:13:05 +00:00
|
|
|
#include <Interpreters/RedundantFunctionsInOrderByVisitor.h>
|
2021-02-07 07:22:51 +00:00
|
|
|
#include <Interpreters/RewriteCountVariantsVisitor.h>
|
2020-07-22 17:13:05 +00:00
|
|
|
#include <Interpreters/MonotonicityCheckVisitor.h>
|
|
|
|
#include <Interpreters/ConvertStringsToEnumVisitor.h>
|
2022-02-27 16:43:56 +00:00
|
|
|
#include <Interpreters/ConvertFunctionOrLikeVisitor.h>
|
2021-05-21 18:48:19 +00:00
|
|
|
#include <Interpreters/RewriteFunctionToSubcolumnVisitor.h>
|
2020-07-22 17:13:05 +00:00
|
|
|
#include <Interpreters/Context.h>
|
|
|
|
#include <Interpreters/ExternalDictionariesLoader.h>
|
2021-07-21 06:43:40 +00:00
|
|
|
#include <Interpreters/GatherFunctionQuantileVisitor.h>
|
2023-02-09 04:06:42 +00:00
|
|
|
#include <Interpreters/RewriteSumIfFunctionVisitor.h>
|
|
|
|
#include <Interpreters/RewriteArrayExistsFunctionVisitor.h>
|
Optimize predicate with toYear converter
The date converters, such as toYear, are widely used in the where
clauses of the SQL queries, however, these conversions are often
expensive due to the complexity of the calendar system.
The function preimage is found an optimization for the predicates
with the converters. Given a predicate, toYear(c) = y, we could
convert it to its equivalent form: c >= b AND c <= e, where b is
"y-01-01" and e is "y-12-31". The similar transformation applies
to other comparisons (<>, <, >, <=, <=).
This commit implemented the above transformation at the AST level
by adding a new pass in the TreeOptimizer and a new AST visitor
for in-place replacing the predicates of toYear with the converted
ones.
2023-05-19 15:58:32 +00:00
|
|
|
#include <Interpreters/OptimizeDateFilterVisitor.h>
|
2020-07-22 17:13:05 +00:00
|
|
|
|
|
|
|
#include <Parsers/ASTExpressionList.h>
|
|
|
|
#include <Parsers/ASTFunction.h>
|
|
|
|
#include <Parsers/ASTLiteral.h>
|
|
|
|
#include <Parsers/ASTOrderByElement.h>
|
|
|
|
#include <Parsers/ASTSelectQuery.h>
|
2020-08-20 17:04:42 +00:00
|
|
|
#include <Parsers/ASTSubquery.h>
|
|
|
|
#include <Parsers/ASTSelectWithUnionQuery.h>
|
2020-07-22 17:13:05 +00:00
|
|
|
#include <Parsers/ASTTablesInSelectQuery.h>
|
|
|
|
|
|
|
|
#include <Functions/FunctionFactory.h>
|
2022-09-24 21:24:39 +00:00
|
|
|
#include <Functions/UserDefined/UserDefinedExecutableFunctionFactory.h>
|
2021-05-21 18:48:19 +00:00
|
|
|
#include <Storages/IStorage.h>
|
2020-07-22 17:13:05 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int LOGICAL_ERROR;
|
2021-09-11 22:41:37 +00:00
|
|
|
extern const int UNKNOWN_TYPE_OF_AST_NODE;
|
2020-07-22 17:13:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
|
|
|
const std::unordered_set<String> possibly_injective_function_names
|
|
|
|
{
|
|
|
|
"dictGet",
|
|
|
|
"dictGetString",
|
|
|
|
"dictGetUInt8",
|
|
|
|
"dictGetUInt16",
|
|
|
|
"dictGetUInt32",
|
|
|
|
"dictGetUInt64",
|
|
|
|
"dictGetInt8",
|
|
|
|
"dictGetInt16",
|
|
|
|
"dictGetInt32",
|
|
|
|
"dictGetInt64",
|
|
|
|
"dictGetFloat32",
|
|
|
|
"dictGetFloat64",
|
|
|
|
"dictGetDate",
|
|
|
|
"dictGetDateTime"
|
|
|
|
};
|
|
|
|
|
|
|
|
/** You can not completely remove GROUP BY. Because if there were no aggregate functions, then it turns out that there will be no aggregation.
|
|
|
|
* Instead, leave `GROUP BY const`.
|
|
|
|
* Next, see deleting the constants in the analyzeAggregation method.
|
|
|
|
*/
|
2021-10-22 12:22:16 +00:00
|
|
|
void appendUnusedGroupByColumn(ASTSelectQuery * select_query)
|
2020-07-22 17:13:05 +00:00
|
|
|
{
|
|
|
|
/// You must insert a constant that is not the name of the column in the table. Such a case is rare, but it happens.
|
2021-10-22 12:22:16 +00:00
|
|
|
/// Also start unused_column integer must not intersect with ([1, source_columns.size()])
|
2021-08-09 14:16:44 +00:00
|
|
|
/// might be in positional GROUP BY.
|
2020-07-22 17:13:05 +00:00
|
|
|
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::make_shared<ASTExpressionList>());
|
2022-04-18 08:18:31 +00:00
|
|
|
select_query->groupBy()->children.emplace_back(std::make_shared<ASTLiteral>(static_cast<Int64>(-1)));
|
2020-07-22 17:13:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Eliminates injective function calls and constant expressions from group by statement.
|
2021-10-22 12:22:16 +00:00
|
|
|
void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context)
|
2020-07-22 17:13:05 +00:00
|
|
|
{
|
|
|
|
const FunctionFactory & function_factory = FunctionFactory::instance();
|
|
|
|
|
|
|
|
if (!select_query->groupBy())
|
|
|
|
return;
|
|
|
|
|
|
|
|
const auto is_literal = [] (const ASTPtr & ast) -> bool
|
|
|
|
{
|
|
|
|
return ast->as<ASTLiteral>();
|
|
|
|
};
|
|
|
|
|
|
|
|
auto & group_exprs = select_query->groupBy()->children;
|
|
|
|
|
|
|
|
/// removes expression at index idx by making it last one and calling .pop_back()
|
|
|
|
const auto remove_expr_at_index = [&group_exprs] (const size_t idx)
|
|
|
|
{
|
|
|
|
if (idx < group_exprs.size() - 1)
|
|
|
|
std::swap(group_exprs[idx], group_exprs.back());
|
|
|
|
|
|
|
|
group_exprs.pop_back();
|
|
|
|
};
|
|
|
|
|
2021-08-09 14:16:44 +00:00
|
|
|
const auto & settings = context->getSettingsRef();
|
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
/// iterate over each GROUP BY expression, eliminate injective function calls and literals
|
|
|
|
for (size_t i = 0; i < group_exprs.size();)
|
|
|
|
{
|
|
|
|
if (const auto * function = group_exprs[i]->as<ASTFunction>())
|
|
|
|
{
|
|
|
|
/// assert function is injective
|
2022-04-18 10:18:43 +00:00
|
|
|
if (possibly_injective_function_names.contains(function->name))
|
2020-07-22 17:13:05 +00:00
|
|
|
{
|
|
|
|
/// do not handle semantic errors here
|
|
|
|
if (function->arguments->children.size() < 2)
|
|
|
|
{
|
|
|
|
++i;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
const auto * dict_name_ast = function->arguments->children[0]->as<ASTLiteral>();
|
|
|
|
const auto * attr_name_ast = function->arguments->children[1]->as<ASTLiteral>();
|
|
|
|
if (!dict_name_ast || !attr_name_ast)
|
|
|
|
{
|
|
|
|
++i;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
const auto & dict_name = dict_name_ast->value.safeGet<String>();
|
|
|
|
const auto & attr_name = attr_name_ast->value.safeGet<String>();
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
const auto & dict_ptr = context->getExternalDictionariesLoader().getDictionary(dict_name, context);
|
2020-07-22 17:13:05 +00:00
|
|
|
if (!dict_ptr->isInjective(attr_name))
|
|
|
|
{
|
|
|
|
++i;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2022-04-21 11:44:56 +00:00
|
|
|
else
|
2020-07-22 17:13:05 +00:00
|
|
|
{
|
2022-04-21 11:44:56 +00:00
|
|
|
FunctionOverloadResolverPtr function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(function->name, context);
|
|
|
|
|
|
|
|
if (!function_builder)
|
|
|
|
function_builder = function_factory.get(function->name, context);
|
|
|
|
|
2022-04-21 17:10:11 +00:00
|
|
|
if (!function_builder->isInjective({}))
|
2022-04-21 11:44:56 +00:00
|
|
|
{
|
|
|
|
++i;
|
|
|
|
continue;
|
|
|
|
}
|
2020-07-22 17:13:05 +00:00
|
|
|
}
|
2022-09-05 01:50:24 +00:00
|
|
|
/// don't optimize functions that shadow any of it's arguments, e.g.:
|
2022-07-14 15:07:47 +00:00
|
|
|
/// SELECT toString(dummy) as dummy FROM system.one GROUP BY dummy;
|
|
|
|
if (!function->alias.empty())
|
2022-07-11 15:13:36 +00:00
|
|
|
{
|
|
|
|
FunctionMaskingArgumentCheckVisitor::Data data{.alias=function->alias};
|
|
|
|
FunctionMaskingArgumentCheckVisitor(data).visit(function->arguments);
|
|
|
|
|
|
|
|
if (data.is_rejected)
|
|
|
|
{
|
|
|
|
++i;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2020-07-22 17:13:05 +00:00
|
|
|
|
|
|
|
/// copy shared pointer to args in order to ensure lifetime
|
|
|
|
auto args_ast = function->arguments;
|
|
|
|
|
|
|
|
/** remove function call and take a step back to ensure
|
|
|
|
* next iteration does not skip not yet processed data
|
|
|
|
*/
|
|
|
|
remove_expr_at_index(i);
|
|
|
|
|
|
|
|
/// copy non-literal arguments
|
|
|
|
std::remove_copy_if(
|
|
|
|
std::begin(args_ast->children), std::end(args_ast->children),
|
|
|
|
std::back_inserter(group_exprs), is_literal
|
|
|
|
);
|
|
|
|
}
|
2021-08-19 15:47:26 +00:00
|
|
|
else if (is_literal(group_exprs[i]))
|
2020-07-22 17:13:05 +00:00
|
|
|
{
|
2021-08-19 15:47:26 +00:00
|
|
|
bool keep_position = false;
|
|
|
|
if (settings.enable_positional_arguments)
|
|
|
|
{
|
2021-08-20 09:08:39 +00:00
|
|
|
const auto & value = group_exprs[i]->as<ASTLiteral>()->value;
|
2021-08-19 15:47:26 +00:00
|
|
|
if (value.getType() == Field::Types::UInt64)
|
|
|
|
{
|
|
|
|
auto pos = value.get<UInt64>();
|
2021-11-15 12:09:04 +00:00
|
|
|
if (pos > 0 && pos <= select_query->select()->children.size())
|
2021-08-19 15:47:26 +00:00
|
|
|
keep_position = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (keep_position)
|
|
|
|
++i;
|
|
|
|
else
|
|
|
|
remove_expr_at_index(i);
|
2020-07-22 17:13:05 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/// if neither a function nor literal - advance to next expression
|
|
|
|
++i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (group_exprs.empty())
|
2021-10-22 12:22:16 +00:00
|
|
|
appendUnusedGroupByColumn(select_query);
|
2020-07-22 17:13:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
struct GroupByKeysInfo
|
|
|
|
{
|
2020-11-12 19:50:01 +00:00
|
|
|
NameSet key_names; ///set of keys' short names
|
2020-07-22 17:13:05 +00:00
|
|
|
bool has_function = false;
|
|
|
|
};
|
|
|
|
|
2020-11-12 19:50:01 +00:00
|
|
|
GroupByKeysInfo getGroupByKeysInfo(const ASTs & group_by_keys)
|
2020-07-22 17:13:05 +00:00
|
|
|
{
|
|
|
|
GroupByKeysInfo data;
|
|
|
|
|
2020-11-12 19:50:01 +00:00
|
|
|
/// filling set with short names of keys
|
2020-11-13 16:30:10 +00:00
|
|
|
for (const auto & group_key : group_by_keys)
|
2020-07-22 17:13:05 +00:00
|
|
|
{
|
2021-07-28 07:02:10 +00:00
|
|
|
/// for grouping sets case
|
|
|
|
if (group_key->as<ASTExpressionList>())
|
|
|
|
{
|
|
|
|
const auto express_list_ast = group_key->as<const ASTExpressionList &>();
|
|
|
|
for (const auto & group_elem : express_list_ast.children)
|
|
|
|
{
|
|
|
|
data.key_names.insert(group_elem->getColumnName());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (group_key->as<ASTFunction>())
|
|
|
|
data.has_function = true;
|
2020-07-22 17:13:05 +00:00
|
|
|
|
2021-07-28 07:02:10 +00:00
|
|
|
data.key_names.insert(group_key->getColumnName());
|
|
|
|
}
|
2020-07-22 17:13:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return data;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Eliminates min/max/any-aggregators of functions of GROUP BY keys
|
2020-10-13 05:30:56 +00:00
|
|
|
void optimizeAggregateFunctionsOfGroupByKeys(ASTSelectQuery * select_query, ASTPtr & node)
|
2020-07-22 17:13:05 +00:00
|
|
|
{
|
2021-05-21 14:56:54 +00:00
|
|
|
if (!select_query->groupBy())
|
2020-07-22 17:13:05 +00:00
|
|
|
return;
|
|
|
|
|
2020-11-13 16:30:10 +00:00
|
|
|
const auto & group_by_keys = select_query->groupBy()->children;
|
2020-11-12 19:50:01 +00:00
|
|
|
GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_by_keys);
|
2020-07-22 17:13:05 +00:00
|
|
|
|
|
|
|
SelectAggregateFunctionOfGroupByKeysVisitor::Data visitor_data{group_by_keys_data.key_names};
|
2020-10-13 05:30:56 +00:00
|
|
|
SelectAggregateFunctionOfGroupByKeysVisitor(visitor_data).visit(node);
|
2020-07-22 17:13:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Remove duplicate items from ORDER BY.
|
|
|
|
void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query)
|
|
|
|
{
|
|
|
|
if (!select_query->orderBy())
|
|
|
|
return;
|
|
|
|
|
|
|
|
/// Make unique sorting conditions.
|
|
|
|
using NameAndLocale = std::pair<String, String>;
|
|
|
|
std::set<NameAndLocale> elems_set;
|
|
|
|
|
|
|
|
ASTs & elems = select_query->orderBy()->children;
|
|
|
|
ASTs unique_elems;
|
|
|
|
unique_elems.reserve(elems.size());
|
|
|
|
|
|
|
|
for (const auto & elem : elems)
|
|
|
|
{
|
|
|
|
String name = elem->children.front()->getColumnName();
|
|
|
|
const auto & order_by_elem = elem->as<ASTOrderByElement &>();
|
|
|
|
|
2021-09-11 23:55:53 +00:00
|
|
|
if (order_by_elem.with_fill /// Always keep elements WITH FILL as they affects other.
|
|
|
|
|| elems_set.emplace(name, order_by_elem.collation ? order_by_elem.collation->getColumnName() : "").second)
|
2020-07-22 17:13:05 +00:00
|
|
|
unique_elems.emplace_back(elem);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (unique_elems.size() < elems.size())
|
|
|
|
elems = std::move(unique_elems);
|
|
|
|
}
|
|
|
|
|
2020-08-20 17:04:42 +00:00
|
|
|
/// Optimize duplicate ORDER BY
|
2021-06-01 12:20:52 +00:00
|
|
|
void optimizeDuplicateOrderBy(ASTPtr & query, ContextPtr context)
|
2020-07-22 17:13:05 +00:00
|
|
|
{
|
|
|
|
DuplicateOrderByVisitor::Data order_by_data{context};
|
|
|
|
DuplicateOrderByVisitor(order_by_data).visit(query);
|
2020-08-20 17:04:42 +00:00
|
|
|
}
|
|
|
|
|
2020-08-20 20:50:53 +00:00
|
|
|
/// Return simple subselect (without UNIONs or JOINs or SETTINGS) if any
|
2020-08-20 17:04:42 +00:00
|
|
|
const ASTSelectQuery * getSimpleSubselect(const ASTSelectQuery & select)
|
|
|
|
{
|
2020-08-20 18:09:48 +00:00
|
|
|
if (!select.tables())
|
|
|
|
return nullptr;
|
|
|
|
|
2020-08-20 17:04:42 +00:00
|
|
|
const auto & tables = select.tables()->children;
|
|
|
|
if (tables.empty() || tables.size() != 1)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
const auto & ast_table_expression = tables[0]->as<ASTTablesInSelectQueryElement>()->table_expression;
|
|
|
|
if (!ast_table_expression)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
const auto & table_expression = ast_table_expression->as<ASTTableExpression>();
|
|
|
|
if (!table_expression->subquery)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
const auto & subquery = table_expression->subquery->as<ASTSubquery>();
|
|
|
|
if (!subquery || subquery->children.size() != 1)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
const auto & subselect_union = subquery->children[0]->as<ASTSelectWithUnionQuery>();
|
|
|
|
if (!subselect_union || !subselect_union->list_of_selects ||
|
|
|
|
subselect_union->list_of_selects->children.size() != 1)
|
|
|
|
return nullptr;
|
|
|
|
|
2020-08-20 20:50:53 +00:00
|
|
|
const auto & subselect = subselect_union->list_of_selects->children[0]->as<ASTSelectQuery>();
|
|
|
|
if (subselect && subselect->settings())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
return subselect;
|
2020-08-20 17:04:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::unordered_set<String> getDistinctNames(const ASTSelectQuery & select)
|
|
|
|
{
|
|
|
|
if (!select.select() || select.select()->children.empty())
|
|
|
|
return {};
|
|
|
|
|
|
|
|
std::unordered_set<String> names;
|
|
|
|
std::unordered_set<String> implicit_distinct;
|
|
|
|
|
|
|
|
if (!select.distinct)
|
|
|
|
{
|
|
|
|
/// SELECT a, b FROM (SELECT DISTINCT a FROM ...)
|
|
|
|
if (const ASTSelectQuery * subselect = getSimpleSubselect(select))
|
|
|
|
implicit_distinct = getDistinctNames(*subselect);
|
|
|
|
|
|
|
|
if (implicit_distinct.empty())
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Extract result column names (prefer aliases, ignore table name)
|
|
|
|
for (const auto & id : select.select()->children)
|
|
|
|
{
|
|
|
|
String alias = id->tryGetAlias();
|
|
|
|
|
|
|
|
if (const auto * identifier = id->as<ASTIdentifier>())
|
|
|
|
{
|
2020-08-20 22:05:06 +00:00
|
|
|
const String & name = identifier->shortName();
|
2020-08-20 17:04:42 +00:00
|
|
|
|
2022-04-18 10:18:43 +00:00
|
|
|
if (select.distinct || implicit_distinct.contains(name))
|
2020-08-20 17:04:42 +00:00
|
|
|
{
|
|
|
|
if (alias.empty())
|
|
|
|
names.insert(name);
|
|
|
|
else
|
|
|
|
names.insert(alias);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (select.distinct && !alias.empty())
|
|
|
|
{
|
|
|
|
/// It's not possible to use getAliasOrColumnName() cause name is context specific (function arguments)
|
|
|
|
names.insert(alias);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-20 18:51:22 +00:00
|
|
|
/// SELECT a FROM (SELECT DISTINCT a, b FROM ...)
|
2020-08-20 19:04:46 +00:00
|
|
|
if (!select.distinct && names.size() != implicit_distinct.size())
|
2020-08-20 18:51:22 +00:00
|
|
|
return {};
|
|
|
|
|
2020-08-20 17:04:42 +00:00
|
|
|
return names;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Remove DISTINCT from query if columns are known as DISTINCT from subquery
|
|
|
|
void optimizeDuplicateDistinct(ASTSelectQuery & select)
|
|
|
|
{
|
|
|
|
if (!select.select() || select.select()->children.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
const ASTSelectQuery * subselect = getSimpleSubselect(select);
|
|
|
|
if (!subselect)
|
|
|
|
return;
|
|
|
|
|
|
|
|
std::unordered_set<String> distinct_names = getDistinctNames(*subselect);
|
2022-09-13 12:06:56 +00:00
|
|
|
std::unordered_set<std::string_view> selected_names;
|
2020-08-20 17:04:42 +00:00
|
|
|
|
|
|
|
/// Check source column names from select list (ignore aliases and table names)
|
|
|
|
for (const auto & id : select.select()->children)
|
|
|
|
{
|
|
|
|
const auto * identifier = id->as<ASTIdentifier>();
|
|
|
|
if (!identifier)
|
|
|
|
return;
|
|
|
|
|
2022-09-13 12:06:56 +00:00
|
|
|
const String & name = identifier->shortName();
|
2022-04-18 10:18:43 +00:00
|
|
|
if (!distinct_names.contains(name))
|
2020-08-20 17:04:42 +00:00
|
|
|
return; /// Not a distinct column, keep DISTINCT for it.
|
|
|
|
|
2022-09-13 12:06:56 +00:00
|
|
|
selected_names.emplace(name);
|
2020-08-20 17:04:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// select columns list != distinct columns list
|
|
|
|
/// SELECT DISTINCT a FROM (SELECT DISTINCT a, b FROM ...)) -- cannot remove DISTINCT
|
|
|
|
if (selected_names.size() != distinct_names.size())
|
|
|
|
return;
|
|
|
|
|
|
|
|
select.distinct = false;
|
2020-07-22 17:13:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Replace monotonous functions in ORDER BY if they don't participate in GROUP BY expression,
|
|
|
|
/// has a single argument and not an aggregate functions.
|
2021-06-01 12:20:52 +00:00
|
|
|
void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, ContextPtr context,
|
2020-11-12 23:33:26 +00:00
|
|
|
const TablesWithColumns & tables_with_columns,
|
2021-12-13 12:35:25 +00:00
|
|
|
const TreeRewriterResult & result)
|
2020-07-22 17:13:05 +00:00
|
|
|
{
|
|
|
|
auto order_by = select_query->orderBy();
|
2021-05-21 14:56:54 +00:00
|
|
|
if (!order_by)
|
2020-07-22 17:13:05 +00:00
|
|
|
return;
|
|
|
|
|
2021-12-13 12:35:25 +00:00
|
|
|
/// Do not apply optimization for Distributed and Merge storages,
|
2022-09-02 08:54:48 +00:00
|
|
|
/// because we can't get the sorting key of their underlying tables
|
2021-12-13 12:35:25 +00:00
|
|
|
/// and we can break the matching of the sorting key for `read_in_order`
|
|
|
|
/// optimization by removing monotonous functions from the prefix of key.
|
|
|
|
if (result.is_remote_storage || (result.storage && result.storage->getName() == "Merge"))
|
|
|
|
return;
|
|
|
|
|
2021-09-11 22:41:37 +00:00
|
|
|
for (const auto & child : order_by->children)
|
|
|
|
{
|
|
|
|
auto * order_by_element = child->as<ASTOrderByElement>();
|
|
|
|
|
|
|
|
if (!order_by_element || order_by_element->children.empty())
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE, "Bad ORDER BY expression AST");
|
2021-09-11 22:41:37 +00:00
|
|
|
|
|
|
|
if (order_by_element->with_fill)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
std::unordered_set<String> group_by_hashes;
|
|
|
|
if (auto group_by = select_query->groupBy())
|
|
|
|
{
|
2022-05-24 18:56:22 +00:00
|
|
|
if (select_query->group_by_with_grouping_sets)
|
2020-07-22 17:13:05 +00:00
|
|
|
{
|
2022-05-24 18:56:22 +00:00
|
|
|
for (auto & set : group_by->children)
|
|
|
|
{
|
|
|
|
for (auto & elem : set->children)
|
|
|
|
{
|
|
|
|
auto hash = elem->getTreeHash();
|
|
|
|
String key = toString(hash.first) + '_' + toString(hash.second);
|
|
|
|
group_by_hashes.insert(key);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (auto & elem : group_by->children)
|
|
|
|
{
|
|
|
|
auto hash = elem->getTreeHash();
|
|
|
|
String key = toString(hash.first) + '_' + toString(hash.second);
|
|
|
|
group_by_hashes.insert(key);
|
|
|
|
}
|
2020-07-22 17:13:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-17 16:02:29 +00:00
|
|
|
auto sorting_key_columns = result.storage_snapshot ? result.storage_snapshot->metadata->getSortingKeyColumns() : Names{};
|
2021-12-13 12:35:25 +00:00
|
|
|
|
2020-11-12 23:33:26 +00:00
|
|
|
bool is_sorting_key_prefix = true;
|
|
|
|
for (size_t i = 0; i < order_by->children.size(); ++i)
|
2020-07-22 17:13:05 +00:00
|
|
|
{
|
2020-11-12 23:33:26 +00:00
|
|
|
auto * order_by_element = order_by->children[i]->as<ASTOrderByElement>();
|
2021-09-11 22:41:37 +00:00
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
auto & ast_func = order_by_element->children[0];
|
|
|
|
if (!ast_func->as<ASTFunction>())
|
|
|
|
continue;
|
|
|
|
|
2020-11-12 23:33:26 +00:00
|
|
|
if (i >= sorting_key_columns.size() || ast_func->getColumnName() != sorting_key_columns[i])
|
|
|
|
is_sorting_key_prefix = false;
|
|
|
|
|
|
|
|
/// If order by expression matches the sorting key, do not remove
|
|
|
|
/// functions to allow execute reading in order of key.
|
|
|
|
if (is_sorting_key_prefix)
|
|
|
|
continue;
|
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
MonotonicityCheckVisitor::Data data{tables_with_columns, context, group_by_hashes};
|
|
|
|
MonotonicityCheckVisitor(data).visit(ast_func);
|
|
|
|
|
|
|
|
if (!data.isRejected())
|
|
|
|
{
|
|
|
|
ast_func = data.identifier->clone();
|
|
|
|
ast_func->setAlias("");
|
|
|
|
if (!data.monotonicity.is_positive)
|
|
|
|
order_by_element->direction *= -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-08 01:01:47 +00:00
|
|
|
/// If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x.
|
2020-07-22 17:13:05 +00:00
|
|
|
/// Optimize ORDER BY x, y, f(x), g(x, y), f(h(x)), t(f(x), g(x)) into ORDER BY x, y
|
|
|
|
/// in case if f(), g(), h(), t() are deterministic (in scope of query).
|
|
|
|
/// Don't optimize ORDER BY f(x), g(x), x even if f(x) is bijection for x or g(x).
|
2021-06-01 12:20:52 +00:00
|
|
|
void optimizeRedundantFunctionsInOrderBy(const ASTSelectQuery * select_query, ContextPtr context)
|
2020-07-22 17:13:05 +00:00
|
|
|
{
|
|
|
|
const auto & order_by = select_query->orderBy();
|
|
|
|
if (!order_by)
|
|
|
|
return;
|
|
|
|
|
2021-09-11 22:41:37 +00:00
|
|
|
for (const auto & child : order_by->children)
|
|
|
|
{
|
|
|
|
auto * order_by_element = child->as<ASTOrderByElement>();
|
|
|
|
|
|
|
|
if (!order_by_element || order_by_element->children.empty())
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE, "Bad ORDER BY expression AST");
|
2021-09-11 22:41:37 +00:00
|
|
|
|
|
|
|
if (order_by_element->with_fill)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
std::unordered_set<String> prev_keys;
|
|
|
|
ASTs modified;
|
|
|
|
modified.reserve(order_by->children.size());
|
|
|
|
|
|
|
|
for (auto & order_by_element : order_by->children)
|
|
|
|
{
|
|
|
|
/// Order by contains ASTOrderByElement as children and meaning item only as a grand child.
|
|
|
|
ASTPtr & name_or_function = order_by_element->children[0];
|
|
|
|
|
|
|
|
if (name_or_function->as<ASTFunction>())
|
|
|
|
{
|
|
|
|
if (!prev_keys.empty())
|
|
|
|
{
|
|
|
|
RedundantFunctionsInOrderByVisitor::Data data{prev_keys, context};
|
|
|
|
RedundantFunctionsInOrderByVisitor(data).visit(name_or_function);
|
|
|
|
if (data.redundant)
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// @note Leave duplicate keys unchanged. They would be removed in optimizeDuplicatesInOrderBy()
|
|
|
|
if (auto * identifier = name_or_function->as<ASTIdentifier>())
|
|
|
|
prev_keys.emplace(getIdentifierName(identifier));
|
|
|
|
|
|
|
|
modified.push_back(order_by_element);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (modified.size() < order_by->children.size())
|
|
|
|
order_by->children = std::move(modified);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Remove duplicate items from LIMIT BY.
|
|
|
|
void optimizeLimitBy(const ASTSelectQuery * select_query)
|
|
|
|
{
|
|
|
|
if (!select_query->limitBy())
|
|
|
|
return;
|
|
|
|
|
|
|
|
std::set<String> elems_set;
|
|
|
|
|
|
|
|
ASTs & elems = select_query->limitBy()->children;
|
|
|
|
ASTs unique_elems;
|
|
|
|
unique_elems.reserve(elems.size());
|
|
|
|
|
|
|
|
for (const auto & elem : elems)
|
|
|
|
{
|
|
|
|
if (elems_set.emplace(elem->getColumnName()).second)
|
|
|
|
unique_elems.emplace_back(elem);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (unique_elems.size() < elems.size())
|
|
|
|
elems = std::move(unique_elems);
|
|
|
|
}
|
|
|
|
|
2021-01-04 20:55:32 +00:00
|
|
|
/// Use constraints to get rid of useless parts of query
|
2021-05-04 19:18:37 +00:00
|
|
|
void optimizeWithConstraints(ASTSelectQuery * select_query,
|
|
|
|
Aliases & /*aliases*/,
|
|
|
|
const NameSet & /*source_columns_set*/,
|
|
|
|
const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns*/,
|
|
|
|
const StorageMetadataPtr & metadata_snapshot,
|
|
|
|
const bool optimize_append_index)
|
2021-01-03 10:24:09 +00:00
|
|
|
{
|
2021-05-04 19:18:37 +00:00
|
|
|
WhereConstraintsOptimizer(select_query, metadata_snapshot, optimize_append_index).perform();
|
2021-01-04 20:55:32 +00:00
|
|
|
}
|
2021-01-03 10:24:09 +00:00
|
|
|
|
2021-05-04 19:18:37 +00:00
|
|
|
void optimizeSubstituteColumn(ASTSelectQuery * select_query,
|
|
|
|
Aliases & /*aliases*/,
|
|
|
|
const NameSet & /*source_columns_set*/,
|
2021-05-04 18:43:58 +00:00
|
|
|
const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns*/,
|
2021-04-24 15:24:32 +00:00
|
|
|
const StorageMetadataPtr & metadata_snapshot,
|
|
|
|
const ConstStoragePtr & storage)
|
|
|
|
{
|
2021-05-04 18:43:58 +00:00
|
|
|
SubstituteColumnOptimizer(select_query, metadata_snapshot, storage).perform();
|
2021-04-24 15:24:32 +00:00
|
|
|
}
|
|
|
|
|
2021-11-10 17:57:59 +00:00
|
|
|
/// Transform WHERE to CNF for more convenient optimization.
|
2021-11-18 14:24:06 +00:00
|
|
|
bool convertQueryToCNF(ASTSelectQuery * select_query)
|
2021-01-04 20:55:32 +00:00
|
|
|
{
|
|
|
|
if (select_query->where())
|
2021-01-03 10:24:09 +00:00
|
|
|
{
|
2021-11-18 14:24:06 +00:00
|
|
|
auto cnf_form = TreeCNFConverter::tryConvertToCNF(select_query->where());
|
|
|
|
if (!cnf_form)
|
|
|
|
return false;
|
|
|
|
|
2022-09-05 01:50:24 +00:00
|
|
|
cnf_form->pushNotInFunctions();
|
2021-11-18 14:24:06 +00:00
|
|
|
select_query->refWhere() = TreeCNFConverter::fromCNF(*cnf_form);
|
|
|
|
return true;
|
2021-01-03 10:24:09 +00:00
|
|
|
}
|
2021-11-18 14:24:06 +00:00
|
|
|
|
|
|
|
return false;
|
2021-01-03 10:24:09 +00:00
|
|
|
}
|
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
/// Remove duplicated columns from USING(...).
|
|
|
|
void optimizeUsing(const ASTSelectQuery * select_query)
|
|
|
|
{
|
|
|
|
if (!select_query->join())
|
|
|
|
return;
|
|
|
|
|
|
|
|
const auto * table_join = select_query->join()->table_join->as<ASTTableJoin>();
|
|
|
|
if (!(table_join && table_join->using_expression_list))
|
|
|
|
return;
|
|
|
|
|
|
|
|
ASTs & expression_list = table_join->using_expression_list->children;
|
|
|
|
ASTs uniq_expressions_list;
|
|
|
|
|
|
|
|
std::set<String> expressions_names;
|
|
|
|
|
|
|
|
for (const auto & expression : expression_list)
|
|
|
|
{
|
|
|
|
auto expression_name = expression->getAliasOrColumnName();
|
|
|
|
if (expressions_names.find(expression_name) == expressions_names.end())
|
|
|
|
{
|
|
|
|
uniq_expressions_list.push_back(expression);
|
|
|
|
expressions_names.insert(expression_name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (uniq_expressions_list.size() < expression_list.size())
|
|
|
|
expression_list = uniq_expressions_list;
|
|
|
|
}
|
|
|
|
|
|
|
|
void optimizeAggregationFunctions(ASTPtr & query)
|
|
|
|
{
|
|
|
|
/// Move arithmetic operations out of aggregation functions
|
|
|
|
ArithmeticOperationsInAgrFuncVisitor::Data data;
|
|
|
|
ArithmeticOperationsInAgrFuncVisitor(data).visit(query);
|
|
|
|
}
|
|
|
|
|
2020-07-23 15:15:22 +00:00
|
|
|
void optimizeAnyFunctions(ASTPtr & query)
|
2020-07-22 17:13:05 +00:00
|
|
|
{
|
2020-07-23 15:15:22 +00:00
|
|
|
RewriteAnyFunctionVisitor::Data data = {};
|
|
|
|
RewriteAnyFunctionVisitor(data).visit(query);
|
2020-07-22 17:13:05 +00:00
|
|
|
}
|
|
|
|
|
2021-01-21 09:01:35 +00:00
|
|
|
void optimizeSumIfFunctions(ASTPtr & query)
|
|
|
|
{
|
|
|
|
RewriteSumIfFunctionVisitor::Data data = {};
|
|
|
|
RewriteSumIfFunctionVisitor(data).visit(query);
|
|
|
|
}
|
|
|
|
|
2023-02-09 04:06:42 +00:00
|
|
|
void optimizeArrayExistsFunctions(ASTPtr & query)
|
|
|
|
{
|
|
|
|
RewriteArrayExistsFunctionVisitor::Data data = {};
|
|
|
|
RewriteArrayExistsFunctionVisitor(data).visit(query);
|
|
|
|
}
|
|
|
|
|
2022-05-31 14:26:17 +00:00
|
|
|
void optimizeMultiIfToIf(ASTPtr & query)
|
|
|
|
{
|
|
|
|
OptimizeMultiIfToIfVisitor::Data data;
|
|
|
|
OptimizeMultiIfToIfVisitor(data).visit(query);
|
|
|
|
}
|
|
|
|
|
2021-06-01 12:20:52 +00:00
|
|
|
void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, ContextPtr context)
|
2020-07-22 17:13:05 +00:00
|
|
|
{
|
2021-04-10 23:33:54 +00:00
|
|
|
RemoveInjectiveFunctionsVisitor::Data data(context);
|
2020-07-22 17:13:05 +00:00
|
|
|
RemoveInjectiveFunctionsVisitor(data).visit(query);
|
|
|
|
}
|
|
|
|
|
Optimize predicate with toYear converter
The date converters, such as toYear, are widely used in the where
clauses of the SQL queries, however, these conversions are often
expensive due to the complexity of the calendar system.
The function preimage is found an optimization for the predicates
with the converters. Given a predicate, toYear(c) = y, we could
convert it to its equivalent form: c >= b AND c <= e, where b is
"y-01-01" and e is "y-12-31". The similar transformation applies
to other comparisons (<>, <, >, <=, <=).
This commit implemented the above transformation at the AST level
by adding a new pass in the TreeOptimizer and a new AST visitor
for in-place replacing the predicates of toYear with the converted
ones.
2023-05-19 15:58:32 +00:00
|
|
|
void optimizeDateFilters(ASTSelectQuery * select_query)
|
|
|
|
{
|
|
|
|
/// Predicates in HAVING clause has been moved to WHERE clause.
|
|
|
|
if (select_query->where())
|
|
|
|
{
|
|
|
|
OptimizeDateFilterInPlaceVisitor::Data data;
|
|
|
|
OptimizeDateFilterInPlaceVisitor(data).visit(select_query->refWhere());
|
|
|
|
}
|
|
|
|
if (select_query->prewhere())
|
|
|
|
{
|
|
|
|
OptimizeDateFilterInPlaceVisitor::Data data;
|
|
|
|
OptimizeDateFilterInPlaceVisitor(data).visit(select_query->refPrewhere());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
void transformIfStringsIntoEnum(ASTPtr & query)
|
|
|
|
{
|
|
|
|
std::unordered_set<String> function_names = {"if", "transform"};
|
|
|
|
std::unordered_set<String> used_as_argument;
|
|
|
|
|
|
|
|
FindUsedFunctionsVisitor::Data used_data{function_names, used_as_argument};
|
|
|
|
FindUsedFunctionsVisitor(used_data).visit(query);
|
|
|
|
|
|
|
|
ConvertStringsToEnumVisitor::Data convert_data{used_as_argument};
|
|
|
|
ConvertStringsToEnumVisitor(convert_data).visit(query);
|
|
|
|
}
|
|
|
|
|
2021-05-21 23:22:22 +00:00
|
|
|
void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & metadata_snapshot)
|
2021-05-21 18:48:19 +00:00
|
|
|
{
|
2021-05-21 23:22:22 +00:00
|
|
|
RewriteFunctionToSubcolumnVisitor::Data data{metadata_snapshot};
|
2021-05-21 18:48:19 +00:00
|
|
|
RewriteFunctionToSubcolumnVisitor(data).visit(query);
|
|
|
|
}
|
|
|
|
|
2022-02-27 16:43:56 +00:00
|
|
|
void optimizeOrLikeChain(ASTPtr & query)
|
|
|
|
{
|
|
|
|
ConvertFunctionOrLikeVisitor::Data data = {};
|
|
|
|
ConvertFunctionOrLikeVisitor(data).visit(query);
|
|
|
|
}
|
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
}
|
|
|
|
|
2023-05-22 23:31:50 +00:00
|
|
|
void TreeOptimizer::optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_to_multiif, bool multiif_to_if)
|
2020-07-22 17:13:05 +00:00
|
|
|
{
|
2023-05-22 23:31:50 +00:00
|
|
|
if (multiif_to_if)
|
|
|
|
optimizeMultiIfToIf(query);
|
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
/// Optimize if with constant condition after constants was substituted instead of scalar subqueries.
|
|
|
|
OptimizeIfWithConstantConditionVisitor(aliases).visit(query);
|
|
|
|
|
|
|
|
if (if_chain_to_multiif)
|
|
|
|
OptimizeIfChainsVisitor().visit(query);
|
|
|
|
}
|
2022-05-10 14:24:20 +00:00
|
|
|
|
2022-12-02 09:56:23 +00:00
|
|
|
void TreeOptimizer::optimizeCountConstantAndSumOne(ASTPtr & query, ContextPtr context)
|
2022-05-10 14:24:20 +00:00
|
|
|
{
|
2022-12-02 09:56:23 +00:00
|
|
|
RewriteCountVariantsVisitor(context).visit(query);
|
2022-05-10 14:24:20 +00:00
|
|
|
}
|
2020-07-22 17:13:05 +00:00
|
|
|
|
2022-11-29 17:22:24 +00:00
|
|
|
///eliminate functions of other GROUP BY keys
|
|
|
|
void TreeOptimizer::optimizeGroupByFunctionKeys(ASTSelectQuery * select_query)
|
|
|
|
{
|
|
|
|
if (!select_query->groupBy())
|
|
|
|
return;
|
|
|
|
|
|
|
|
auto group_by = select_query->groupBy();
|
|
|
|
const auto & group_by_keys = group_by->children;
|
|
|
|
|
|
|
|
ASTs modified; ///result
|
|
|
|
|
|
|
|
GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_by_keys);
|
|
|
|
|
|
|
|
if (!group_by_keys_data.has_function)
|
|
|
|
return;
|
|
|
|
|
|
|
|
GroupByFunctionKeysVisitor::Data visitor_data{group_by_keys_data.key_names};
|
|
|
|
GroupByFunctionKeysVisitor(visitor_data).visit(group_by);
|
|
|
|
|
|
|
|
modified.reserve(group_by_keys.size());
|
|
|
|
|
|
|
|
/// filling the result
|
|
|
|
for (const auto & group_key : group_by_keys)
|
|
|
|
if (group_by_keys_data.key_names.contains(group_key->getColumnName()))
|
|
|
|
modified.push_back(group_key);
|
|
|
|
|
|
|
|
/// modifying the input
|
|
|
|
group_by->children = modified;
|
|
|
|
}
|
|
|
|
|
2021-05-21 18:48:19 +00:00
|
|
|
void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
|
2021-06-12 00:25:09 +00:00
|
|
|
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns, ContextPtr context)
|
2020-07-22 17:13:05 +00:00
|
|
|
{
|
2021-04-10 23:33:54 +00:00
|
|
|
const auto & settings = context->getSettingsRef();
|
2020-07-22 17:13:05 +00:00
|
|
|
|
|
|
|
auto * select_query = query->as<ASTSelectQuery>();
|
|
|
|
if (!select_query)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not select asts.");
|
2020-07-22 17:13:05 +00:00
|
|
|
|
2021-07-09 03:15:41 +00:00
|
|
|
if (settings.optimize_functions_to_subcolumns && result.storage_snapshot && result.storage->supportsSubcolumns())
|
|
|
|
optimizeFunctionsToSubcolumns(query, result.storage_snapshot->metadata);
|
2021-05-21 18:48:19 +00:00
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
/// Move arithmetic operations out of aggregation functions
|
|
|
|
if (settings.optimize_arithmetic_operations_in_aggregate_functions)
|
|
|
|
optimizeAggregationFunctions(query);
|
|
|
|
|
2021-11-18 14:24:06 +00:00
|
|
|
bool converted_to_cnf = false;
|
2021-01-04 20:55:32 +00:00
|
|
|
if (settings.convert_query_to_cnf)
|
2021-11-18 14:24:06 +00:00
|
|
|
converted_to_cnf = convertQueryToCNF(select_query);
|
2021-01-04 20:55:32 +00:00
|
|
|
|
2022-03-20 20:07:58 +00:00
|
|
|
if (converted_to_cnf && settings.optimize_using_constraints && result.storage_snapshot)
|
2021-04-24 15:24:32 +00:00
|
|
|
{
|
2021-11-10 16:35:28 +00:00
|
|
|
optimizeWithConstraints(select_query, result.aliases, result.source_columns_set,
|
2021-11-22 14:19:35 +00:00
|
|
|
tables_with_columns, result.storage_snapshot->metadata, settings.optimize_append_index);
|
2021-11-10 16:35:28 +00:00
|
|
|
|
2021-05-04 19:18:37 +00:00
|
|
|
if (settings.optimize_substitute_columns)
|
2021-11-10 16:35:28 +00:00
|
|
|
optimizeSubstituteColumn(select_query, result.aliases, result.source_columns_set,
|
2021-11-22 14:19:35 +00:00
|
|
|
tables_with_columns, result.storage_snapshot->metadata, result.storage);
|
2021-04-24 15:24:32 +00:00
|
|
|
}
|
2021-02-14 14:47:15 +00:00
|
|
|
|
Optimize predicate with toYear converter
The date converters, such as toYear, are widely used in the where
clauses of the SQL queries, however, these conversions are often
expensive due to the complexity of the calendar system.
The function preimage is found an optimization for the predicates
with the converters. Given a predicate, toYear(c) = y, we could
convert it to its equivalent form: c >= b AND c <= e, where b is
"y-01-01" and e is "y-12-31". The similar transformation applies
to other comparisons (<>, <, >, <=, <=).
This commit implemented the above transformation at the AST level
by adding a new pass in the TreeOptimizer and a new AST visitor
for in-place replacing the predicates of toYear with the converted
ones.
2023-05-19 15:58:32 +00:00
|
|
|
/// Rewrite date filters to avoid the calls of converters such as toYear, toYYYYMM, toISOWeek, etc.
|
|
|
|
optimizeDateFilters(select_query);
|
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
/// GROUP BY injective function elimination.
|
2021-10-22 12:22:16 +00:00
|
|
|
optimizeGroupBy(select_query, context);
|
2020-07-22 17:13:05 +00:00
|
|
|
|
|
|
|
/// GROUP BY functions of other keys elimination.
|
|
|
|
if (settings.optimize_group_by_function_keys)
|
|
|
|
optimizeGroupByFunctionKeys(select_query);
|
|
|
|
|
2020-07-23 15:15:22 +00:00
|
|
|
/// Move all operations out of any function
|
2020-07-22 17:13:05 +00:00
|
|
|
if (settings.optimize_move_functions_out_of_any)
|
2020-07-23 15:15:22 +00:00
|
|
|
optimizeAnyFunctions(query);
|
2021-01-21 09:01:35 +00:00
|
|
|
|
2021-02-07 07:22:51 +00:00
|
|
|
if (settings.optimize_normalize_count_variants)
|
2022-12-02 09:56:23 +00:00
|
|
|
optimizeCountConstantAndSumOne(query, context);
|
2021-02-07 07:22:51 +00:00
|
|
|
|
2022-07-26 15:55:13 +00:00
|
|
|
if (settings.optimize_rewrite_sum_if_to_count_if)
|
|
|
|
optimizeSumIfFunctions(query);
|
|
|
|
|
2023-02-09 07:15:08 +00:00
|
|
|
if (settings.optimize_rewrite_array_exists_to_has)
|
2023-02-09 04:06:42 +00:00
|
|
|
optimizeArrayExistsFunctions(query);
|
|
|
|
|
2020-07-22 17:13:05 +00:00
|
|
|
/// Remove injective functions inside uniq
|
|
|
|
if (settings.optimize_injective_functions_inside_uniq)
|
|
|
|
optimizeInjectiveFunctionsInsideUniq(query, context);
|
|
|
|
|
|
|
|
/// Eliminate min/max/any aggregators of functions of GROUP BY keys
|
2020-10-26 13:44:46 +00:00
|
|
|
if (settings.optimize_aggregators_of_group_by_keys
|
|
|
|
&& !select_query->group_by_with_totals
|
|
|
|
&& !select_query->group_by_with_rollup
|
|
|
|
&& !select_query->group_by_with_cube)
|
2020-10-13 05:30:56 +00:00
|
|
|
optimizeAggregateFunctionsOfGroupByKeys(select_query, query);
|
2020-07-22 17:13:05 +00:00
|
|
|
|
|
|
|
/// Remove duplicate ORDER BY and DISTINCT from subqueries.
|
|
|
|
if (settings.optimize_duplicate_order_by_and_distinct)
|
2020-08-20 17:04:42 +00:00
|
|
|
{
|
|
|
|
optimizeDuplicateOrderBy(query, context);
|
2020-08-20 20:50:53 +00:00
|
|
|
|
|
|
|
/// DISTINCT has special meaning in Distributed query with enabled distributed_group_by_no_merge
|
|
|
|
/// TODO: disable Distributed/remote() tables only
|
|
|
|
if (!settings.distributed_group_by_no_merge)
|
|
|
|
optimizeDuplicateDistinct(*select_query);
|
2020-08-20 17:04:42 +00:00
|
|
|
}
|
2020-07-22 17:13:05 +00:00
|
|
|
|
|
|
|
/// Remove functions from ORDER BY if its argument is also in ORDER BY
|
|
|
|
if (settings.optimize_redundant_functions_in_order_by)
|
|
|
|
optimizeRedundantFunctionsInOrderBy(select_query, context);
|
|
|
|
|
|
|
|
/// Replace monotonous functions with its argument
|
|
|
|
if (settings.optimize_monotonous_functions_in_order_by)
|
2021-12-13 12:35:25 +00:00
|
|
|
optimizeMonotonousFunctionsInOrderBy(select_query, context, tables_with_columns, result);
|
2020-11-12 23:33:26 +00:00
|
|
|
|
|
|
|
/// Remove duplicate items from ORDER BY.
|
|
|
|
/// Execute it after all order by optimizations,
|
|
|
|
/// because they can produce duplicated columns.
|
|
|
|
optimizeDuplicatesInOrderBy(select_query);
|
2020-07-22 17:13:05 +00:00
|
|
|
|
|
|
|
/// If function "if" has String-type arguments, transform them into enum
|
|
|
|
if (settings.optimize_if_transform_strings_to_enum)
|
|
|
|
transformIfStringsIntoEnum(query);
|
|
|
|
|
|
|
|
/// Remove duplicated elements from LIMIT BY clause.
|
|
|
|
optimizeLimitBy(select_query);
|
|
|
|
|
|
|
|
/// Remove duplicated columns from USING(...).
|
|
|
|
optimizeUsing(select_query);
|
2021-07-21 06:43:40 +00:00
|
|
|
|
2022-02-27 16:43:56 +00:00
|
|
|
if (settings.optimize_or_like_chain
|
|
|
|
&& settings.allow_hyperscan
|
|
|
|
&& settings.max_hyperscan_regexp_length == 0
|
|
|
|
&& settings.max_hyperscan_regexp_total_length == 0)
|
|
|
|
{
|
|
|
|
optimizeOrLikeChain(query);
|
|
|
|
}
|
2020-07-22 17:13:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|