diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 375bdb1c516..28d5f7de8e3 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -681,7 +681,7 @@ class IColumn; M(Bool, query_cache_share_between_users, false, "Allow other users to read entry in the query cache", 0) \ M(Bool, enable_sharing_sets_for_mutations, true, "Allow sharing set objects build for IN subqueries between different tasks of the same mutation. This reduces memory usage and CPU consumption", 0) \ \ - M(Bool, optimize_rewrite_sum_if_to_count_if, false, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \ + M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \ M(Bool, optimize_rewrite_aggregate_function_with_if, true, "Rewrite aggregate functions with if expression as argument when logically equivalent. For example, avg(if(cond, col, null)) can be rewritten to avgIf(cond, col)", 0) \ M(Bool, optimize_rewrite_array_exists_to_has, false, "Rewrite arrayExists() functions to has() when logically equivalent. For example, arrayExists(x -> x = 1, arr) can be rewritten to has(arr, 1)", 0) \ M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index d3b5de06e70..cc619124fcb 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -93,6 +93,7 @@ static std::map sett {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, {"first_day_of_week", "Monday", "Monday", "Added a setting for the first day of the week for date/time functions"}, {"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"}, + {"optimize_rewrite_sum_if_to_count_if", false, true, "Only available for the analyzer, where it works correctly"} }}, {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, diff --git a/src/Interpreters/RewriteSumIfFunctionVisitor.cpp b/src/Interpreters/RewriteSumIfFunctionVisitor.cpp deleted file mode 100644 index c598fc1a6b7..00000000000 --- a/src/Interpreters/RewriteSumIfFunctionVisitor.cpp +++ /dev/null @@ -1,121 +0,0 @@ -#include -#include -#include -#include -#include - -namespace DB -{ - -void RewriteSumIfFunctionMatcher::visit(ASTPtr & ast, Data & data) -{ - if (auto * func = ast->as()) - { - if (func->is_window_function) - return; - - visit(*func, ast, data); - } -} - -void RewriteSumIfFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data &) -{ - if (!func.arguments || func.arguments->children.empty()) - return; - - auto lower_name = Poco::toLower(func.name); - - /// sumIf, SumIf or sUMIf are valid function names, but sumIF or sumiF are not - if (lower_name != "sum" && (lower_name != "sumif" || !endsWith(func.name, "If"))) - return; - - const auto & func_arguments = func.arguments->children; - - if (lower_name == "sumif") - { - const auto * literal = func_arguments[0]->as(); - if (!literal || !DB::isInt64OrUInt64FieldType(literal->value.getType())) - return; - - if (func_arguments.size() == 2) - { - std::shared_ptr new_func; - if (literal->value.get() == 1) - { - /// sumIf(1, cond) -> countIf(cond) - new_func = makeASTFunction("countIf", func_arguments[1]); - } - else - { - /// sumIf(123, cond) -> 123 * countIf(cond) - auto count_if_func = makeASTFunction("countIf", func_arguments[1]); - new_func = makeASTFunction("multiply", func_arguments[0], std::move(count_if_func)); - } - new_func->setAlias(func.alias); - ast = std::move(new_func); - return; - } - } - else - { - const auto * nested_func = func_arguments[0]->as(); - - if (!nested_func || Poco::toLower(nested_func->name) != "if" || nested_func->arguments->children.size() != 3) - return; - - const auto & if_arguments = nested_func->arguments->children; - - const auto * first_literal = if_arguments[1]->as(); - const auto * second_literal = if_arguments[2]->as(); - - if (first_literal && second_literal) - { - if (!DB::isInt64OrUInt64FieldType(first_literal->value.getType()) || !DB::isInt64OrUInt64FieldType(second_literal->value.getType())) - return; - - auto first_value = first_literal->value.get(); - auto second_value = second_literal->value.get(); - - std::shared_ptr new_func; - if (second_value == 0) - { - if (first_value == 1) - { - /// sum(if(cond, 1, 0)) -> countIf(cond) - new_func = makeASTFunction("countIf", if_arguments[0]); - } - else - { - /// sum(if(cond, 123, 0)) -> 123 * countIf(cond) - auto count_if_func = makeASTFunction("countIf", if_arguments[0]); - new_func = makeASTFunction("multiply", if_arguments[1], std::move(count_if_func)); - } - new_func->setAlias(func.alias); - ast = std::move(new_func); - return; - } - - if (first_value == 0) - { - auto not_func = makeASTFunction("not", if_arguments[0]); - if (second_value == 1) - { - /// sum(if(cond, 0, 1)) -> countIf(not(cond)) - new_func = makeASTFunction("countIf", std::move(not_func)); - } - else - { - /// sum(if(cond, 0, 123)) -> 123 * countIf(not(cond)) - auto count_if_func = makeASTFunction("countIf", std::move(not_func)); - new_func = makeASTFunction("multiply", if_arguments[2], std::move(count_if_func)); - } - new_func->setAlias(func.alias); - ast = std::move(new_func); - return; - } - } - } - -} - -} diff --git a/src/Interpreters/RewriteSumIfFunctionVisitor.h b/src/Interpreters/RewriteSumIfFunctionVisitor.h deleted file mode 100644 index b3280e0c7c6..00000000000 --- a/src/Interpreters/RewriteSumIfFunctionVisitor.h +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once - -#include - -#include -#include - -namespace DB -{ - -class ASTFunction; - -/// Rewrite 'sum(if())' and 'sumIf' functions to counIf. -/// sumIf(1, cond) -> countIf(1, cond) -/// sumIf(123, cond) -> 123 * countIf(1, cond) -/// sum(if(cond, 1, 0)) -> countIf(cond) -/// sum(if(cond, 123, 0)) -> 123 * countIf(cond) -/// sum(if(cond, 0, 1)) -> countIf(not(cond)) -/// sum(if(cond, 0, 123)) -> 123 * countIf(not(cond)) -class RewriteSumIfFunctionMatcher -{ -public: - struct Data - { - }; - - static void visit(ASTPtr & ast, Data &); - static void visit(const ASTFunction &, ASTPtr & ast, Data &); - static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } -}; - -using RewriteSumIfFunctionVisitor = InDepthNodeVisitor; -} diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 7b979088170..07c2cc15b95 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -602,12 +601,6 @@ void optimizeAggregationFunctions(ASTPtr & query) ArithmeticOperationsInAgrFuncVisitor(data).visit(query); } -void optimizeSumIfFunctions(ASTPtr & query) -{ - RewriteSumIfFunctionVisitor::Data data = {}; - RewriteSumIfFunctionVisitor(data).visit(query); -} - void optimizeArrayExistsFunctions(ASTPtr & query) { RewriteArrayExistsFunctionVisitor::Data data = {}; @@ -768,9 +761,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, if (settings.optimize_normalize_count_variants) optimizeCountConstantAndSumOne(query, context); - if (settings.optimize_rewrite_sum_if_to_count_if) - optimizeSumIfFunctions(query); - if (settings.optimize_rewrite_array_exists_to_has) optimizeArrayExistsFunctions(query);