Enable optimize_rewrite_sum_if_to_count_if by default

This commit is contained in:
Raúl Marín 2024-04-24 19:24:36 +02:00
parent 931fb65762
commit 1ba7e5942b
5 changed files with 2 additions and 165 deletions

View File

@ -681,7 +681,7 @@ class IColumn;
M(Bool, query_cache_share_between_users, false, "Allow other users to read entry in the query cache", 0) \
M(Bool, enable_sharing_sets_for_mutations, true, "Allow sharing set objects build for IN subqueries between different tasks of the same mutation. This reduces memory usage and CPU consumption", 0) \
\
M(Bool, optimize_rewrite_sum_if_to_count_if, false, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
M(Bool, optimize_rewrite_aggregate_function_with_if, true, "Rewrite aggregate functions with if expression as argument when logically equivalent. For example, avg(if(cond, col, null)) can be rewritten to avgIf(cond, col)", 0) \
M(Bool, optimize_rewrite_array_exists_to_has, false, "Rewrite arrayExists() functions to has() when logically equivalent. For example, arrayExists(x -> x = 1, arr) can be rewritten to has(arr, 1)", 0) \
M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \

View File

@ -93,6 +93,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."},
{"first_day_of_week", "Monday", "Monday", "Added a setting for the first day of the week for date/time functions"},
{"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"},
{"optimize_rewrite_sum_if_to_count_if", false, true, "Only available for the analyzer, where it works correctly"}
}},
{"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"},
{"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"},

View File

@ -1,121 +0,0 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Interpreters/RewriteSumIfFunctionVisitor.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Common/typeid_cast.h>
namespace DB
{
void RewriteSumIfFunctionMatcher::visit(ASTPtr & ast, Data & data)
{
if (auto * func = ast->as<ASTFunction>())
{
if (func->is_window_function)
return;
visit(*func, ast, data);
}
}
void RewriteSumIfFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data &)
{
if (!func.arguments || func.arguments->children.empty())
return;
auto lower_name = Poco::toLower(func.name);
/// sumIf, SumIf or sUMIf are valid function names, but sumIF or sumiF are not
if (lower_name != "sum" && (lower_name != "sumif" || !endsWith(func.name, "If")))
return;
const auto & func_arguments = func.arguments->children;
if (lower_name == "sumif")
{
const auto * literal = func_arguments[0]->as<ASTLiteral>();
if (!literal || !DB::isInt64OrUInt64FieldType(literal->value.getType()))
return;
if (func_arguments.size() == 2)
{
std::shared_ptr<ASTFunction> new_func;
if (literal->value.get<UInt64>() == 1)
{
/// sumIf(1, cond) -> countIf(cond)
new_func = makeASTFunction("countIf", func_arguments[1]);
}
else
{
/// sumIf(123, cond) -> 123 * countIf(cond)
auto count_if_func = makeASTFunction("countIf", func_arguments[1]);
new_func = makeASTFunction("multiply", func_arguments[0], std::move(count_if_func));
}
new_func->setAlias(func.alias);
ast = std::move(new_func);
return;
}
}
else
{
const auto * nested_func = func_arguments[0]->as<ASTFunction>();
if (!nested_func || Poco::toLower(nested_func->name) != "if" || nested_func->arguments->children.size() != 3)
return;
const auto & if_arguments = nested_func->arguments->children;
const auto * first_literal = if_arguments[1]->as<ASTLiteral>();
const auto * second_literal = if_arguments[2]->as<ASTLiteral>();
if (first_literal && second_literal)
{
if (!DB::isInt64OrUInt64FieldType(first_literal->value.getType()) || !DB::isInt64OrUInt64FieldType(second_literal->value.getType()))
return;
auto first_value = first_literal->value.get<UInt64>();
auto second_value = second_literal->value.get<UInt64>();
std::shared_ptr<ASTFunction> new_func;
if (second_value == 0)
{
if (first_value == 1)
{
/// sum(if(cond, 1, 0)) -> countIf(cond)
new_func = makeASTFunction("countIf", if_arguments[0]);
}
else
{
/// sum(if(cond, 123, 0)) -> 123 * countIf(cond)
auto count_if_func = makeASTFunction("countIf", if_arguments[0]);
new_func = makeASTFunction("multiply", if_arguments[1], std::move(count_if_func));
}
new_func->setAlias(func.alias);
ast = std::move(new_func);
return;
}
if (first_value == 0)
{
auto not_func = makeASTFunction("not", if_arguments[0]);
if (second_value == 1)
{
/// sum(if(cond, 0, 1)) -> countIf(not(cond))
new_func = makeASTFunction("countIf", std::move(not_func));
}
else
{
/// sum(if(cond, 0, 123)) -> 123 * countIf(not(cond))
auto count_if_func = makeASTFunction("countIf", std::move(not_func));
new_func = makeASTFunction("multiply", if_arguments[2], std::move(count_if_func));
}
new_func->setAlias(func.alias);
ast = std::move(new_func);
return;
}
}
}
}
}

View File

@ -1,33 +0,0 @@
#pragma once
#include <unordered_set>
#include <Parsers/IAST.h>
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
{
class ASTFunction;
/// Rewrite 'sum(if())' and 'sumIf' functions to counIf.
/// sumIf(1, cond) -> countIf(1, cond)
/// sumIf(123, cond) -> 123 * countIf(1, cond)
/// sum(if(cond, 1, 0)) -> countIf(cond)
/// sum(if(cond, 123, 0)) -> 123 * countIf(cond)
/// sum(if(cond, 0, 1)) -> countIf(not(cond))
/// sum(if(cond, 0, 123)) -> 123 * countIf(not(cond))
class RewriteSumIfFunctionMatcher
{
public:
struct Data
{
};
static void visit(ASTPtr & ast, Data &);
static void visit(const ASTFunction &, ASTPtr & ast, Data &);
static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; }
};
using RewriteSumIfFunctionVisitor = InDepthNodeVisitor<RewriteSumIfFunctionMatcher, false>;
}

View File

@ -22,7 +22,6 @@
#include <Interpreters/Context.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/GatherFunctionQuantileVisitor.h>
#include <Interpreters/RewriteSumIfFunctionVisitor.h>
#include <Interpreters/RewriteArrayExistsFunctionVisitor.h>
#include <Interpreters/RewriteSumFunctionWithSumAndCountVisitor.h>
#include <Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h>
@ -602,12 +601,6 @@ void optimizeAggregationFunctions(ASTPtr & query)
ArithmeticOperationsInAgrFuncVisitor(data).visit(query);
}
void optimizeSumIfFunctions(ASTPtr & query)
{
RewriteSumIfFunctionVisitor::Data data = {};
RewriteSumIfFunctionVisitor(data).visit(query);
}
void optimizeArrayExistsFunctions(ASTPtr & query)
{
RewriteArrayExistsFunctionVisitor::Data data = {};
@ -768,9 +761,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
if (settings.optimize_normalize_count_variants)
optimizeCountConstantAndSumOne(query, context);
if (settings.optimize_rewrite_sum_if_to_count_if)
optimizeSumIfFunctions(query);
if (settings.optimize_rewrite_array_exists_to_has)
optimizeArrayExistsFunctions(query);