mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-03 04:52:10 +00:00
Enable optimize_rewrite_sum_if_to_count_if by default
This commit is contained in:
parent
931fb65762
commit
1ba7e5942b
@ -681,7 +681,7 @@ class IColumn;
|
||||
M(Bool, query_cache_share_between_users, false, "Allow other users to read entry in the query cache", 0) \
|
||||
M(Bool, enable_sharing_sets_for_mutations, true, "Allow sharing set objects build for IN subqueries between different tasks of the same mutation. This reduces memory usage and CPU consumption", 0) \
|
||||
\
|
||||
M(Bool, optimize_rewrite_sum_if_to_count_if, false, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
|
||||
M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
|
||||
M(Bool, optimize_rewrite_aggregate_function_with_if, true, "Rewrite aggregate functions with if expression as argument when logically equivalent. For example, avg(if(cond, col, null)) can be rewritten to avgIf(cond, col)", 0) \
|
||||
M(Bool, optimize_rewrite_array_exists_to_has, false, "Rewrite arrayExists() functions to has() when logically equivalent. For example, arrayExists(x -> x = 1, arr) can be rewritten to has(arr, 1)", 0) \
|
||||
M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
|
||||
|
@ -93,6 +93,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
|
||||
{"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."},
|
||||
{"first_day_of_week", "Monday", "Monday", "Added a setting for the first day of the week for date/time functions"},
|
||||
{"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"},
|
||||
{"optimize_rewrite_sum_if_to_count_if", false, true, "Only available for the analyzer, where it works correctly"}
|
||||
}},
|
||||
{"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"},
|
||||
{"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"},
|
||||
|
@ -1,121 +0,0 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <Interpreters/RewriteSumIfFunctionVisitor.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void RewriteSumIfFunctionMatcher::visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (auto * func = ast->as<ASTFunction>())
|
||||
{
|
||||
if (func->is_window_function)
|
||||
return;
|
||||
|
||||
visit(*func, ast, data);
|
||||
}
|
||||
}
|
||||
|
||||
void RewriteSumIfFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data &)
|
||||
{
|
||||
if (!func.arguments || func.arguments->children.empty())
|
||||
return;
|
||||
|
||||
auto lower_name = Poco::toLower(func.name);
|
||||
|
||||
/// sumIf, SumIf or sUMIf are valid function names, but sumIF or sumiF are not
|
||||
if (lower_name != "sum" && (lower_name != "sumif" || !endsWith(func.name, "If")))
|
||||
return;
|
||||
|
||||
const auto & func_arguments = func.arguments->children;
|
||||
|
||||
if (lower_name == "sumif")
|
||||
{
|
||||
const auto * literal = func_arguments[0]->as<ASTLiteral>();
|
||||
if (!literal || !DB::isInt64OrUInt64FieldType(literal->value.getType()))
|
||||
return;
|
||||
|
||||
if (func_arguments.size() == 2)
|
||||
{
|
||||
std::shared_ptr<ASTFunction> new_func;
|
||||
if (literal->value.get<UInt64>() == 1)
|
||||
{
|
||||
/// sumIf(1, cond) -> countIf(cond)
|
||||
new_func = makeASTFunction("countIf", func_arguments[1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// sumIf(123, cond) -> 123 * countIf(cond)
|
||||
auto count_if_func = makeASTFunction("countIf", func_arguments[1]);
|
||||
new_func = makeASTFunction("multiply", func_arguments[0], std::move(count_if_func));
|
||||
}
|
||||
new_func->setAlias(func.alias);
|
||||
ast = std::move(new_func);
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto * nested_func = func_arguments[0]->as<ASTFunction>();
|
||||
|
||||
if (!nested_func || Poco::toLower(nested_func->name) != "if" || nested_func->arguments->children.size() != 3)
|
||||
return;
|
||||
|
||||
const auto & if_arguments = nested_func->arguments->children;
|
||||
|
||||
const auto * first_literal = if_arguments[1]->as<ASTLiteral>();
|
||||
const auto * second_literal = if_arguments[2]->as<ASTLiteral>();
|
||||
|
||||
if (first_literal && second_literal)
|
||||
{
|
||||
if (!DB::isInt64OrUInt64FieldType(first_literal->value.getType()) || !DB::isInt64OrUInt64FieldType(second_literal->value.getType()))
|
||||
return;
|
||||
|
||||
auto first_value = first_literal->value.get<UInt64>();
|
||||
auto second_value = second_literal->value.get<UInt64>();
|
||||
|
||||
std::shared_ptr<ASTFunction> new_func;
|
||||
if (second_value == 0)
|
||||
{
|
||||
if (first_value == 1)
|
||||
{
|
||||
/// sum(if(cond, 1, 0)) -> countIf(cond)
|
||||
new_func = makeASTFunction("countIf", if_arguments[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// sum(if(cond, 123, 0)) -> 123 * countIf(cond)
|
||||
auto count_if_func = makeASTFunction("countIf", if_arguments[0]);
|
||||
new_func = makeASTFunction("multiply", if_arguments[1], std::move(count_if_func));
|
||||
}
|
||||
new_func->setAlias(func.alias);
|
||||
ast = std::move(new_func);
|
||||
return;
|
||||
}
|
||||
|
||||
if (first_value == 0)
|
||||
{
|
||||
auto not_func = makeASTFunction("not", if_arguments[0]);
|
||||
if (second_value == 1)
|
||||
{
|
||||
/// sum(if(cond, 0, 1)) -> countIf(not(cond))
|
||||
new_func = makeASTFunction("countIf", std::move(not_func));
|
||||
}
|
||||
else
|
||||
{
|
||||
/// sum(if(cond, 0, 123)) -> 123 * countIf(not(cond))
|
||||
auto count_if_func = makeASTFunction("countIf", std::move(not_func));
|
||||
new_func = makeASTFunction("multiply", if_arguments[2], std::move(count_if_func));
|
||||
}
|
||||
new_func->setAlias(func.alias);
|
||||
ast = std::move(new_func);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_set>
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ASTFunction;
|
||||
|
||||
/// Rewrite 'sum(if())' and 'sumIf' functions to counIf.
|
||||
/// sumIf(1, cond) -> countIf(1, cond)
|
||||
/// sumIf(123, cond) -> 123 * countIf(1, cond)
|
||||
/// sum(if(cond, 1, 0)) -> countIf(cond)
|
||||
/// sum(if(cond, 123, 0)) -> 123 * countIf(cond)
|
||||
/// sum(if(cond, 0, 1)) -> countIf(not(cond))
|
||||
/// sum(if(cond, 0, 123)) -> 123 * countIf(not(cond))
|
||||
class RewriteSumIfFunctionMatcher
|
||||
{
|
||||
public:
|
||||
struct Data
|
||||
{
|
||||
};
|
||||
|
||||
static void visit(ASTPtr & ast, Data &);
|
||||
static void visit(const ASTFunction &, ASTPtr & ast, Data &);
|
||||
static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; }
|
||||
};
|
||||
|
||||
using RewriteSumIfFunctionVisitor = InDepthNodeVisitor<RewriteSumIfFunctionMatcher, false>;
|
||||
}
|
@ -22,7 +22,6 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/ExternalDictionariesLoader.h>
|
||||
#include <Interpreters/GatherFunctionQuantileVisitor.h>
|
||||
#include <Interpreters/RewriteSumIfFunctionVisitor.h>
|
||||
#include <Interpreters/RewriteArrayExistsFunctionVisitor.h>
|
||||
#include <Interpreters/RewriteSumFunctionWithSumAndCountVisitor.h>
|
||||
#include <Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h>
|
||||
@ -602,12 +601,6 @@ void optimizeAggregationFunctions(ASTPtr & query)
|
||||
ArithmeticOperationsInAgrFuncVisitor(data).visit(query);
|
||||
}
|
||||
|
||||
void optimizeSumIfFunctions(ASTPtr & query)
|
||||
{
|
||||
RewriteSumIfFunctionVisitor::Data data = {};
|
||||
RewriteSumIfFunctionVisitor(data).visit(query);
|
||||
}
|
||||
|
||||
void optimizeArrayExistsFunctions(ASTPtr & query)
|
||||
{
|
||||
RewriteArrayExistsFunctionVisitor::Data data = {};
|
||||
@ -768,9 +761,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
|
||||
if (settings.optimize_normalize_count_variants)
|
||||
optimizeCountConstantAndSumOne(query, context);
|
||||
|
||||
if (settings.optimize_rewrite_sum_if_to_count_if)
|
||||
optimizeSumIfFunctions(query);
|
||||
|
||||
if (settings.optimize_rewrite_array_exists_to_has)
|
||||
optimizeArrayExistsFunctions(query);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user