mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Any input optimize (#11529)
* workig commit №1 * working commit №2 * first version of ArithmeticOperationsInAgrFuncOptimize (min, max, sum) * fix conflicts * fix №2 * attempt №3 * Description + little mistake * fix style + expanded optimization + add performance test * style + merge with new version * one more branch * bug fix + merge * just merge * some code style and logic fixes * little changes * tests * test(2) * style * seems to have fixed everything that was required * bug fix * style * build fix * fix typo * fix clang-10 warning * fix clang-10 warning * nolint * add new files to ya.make * extract all functions from Any * add last new lines. * extract all functions from Any * add last new lines. * build + requested changes * code style * build + little perf test + anyLast * minor changes * bug fix * minor changes * another minor changes * fix + experement * experement 2.0 * experement 3.0 * experement 3.1 * experement 4.0 * last experement(think so) * just another attempt to fix UB * minor changes * I think I won * ya.make * fix requested changes Co-authored-by: Artem Zuikov <chertus@gmail.com>
This commit is contained in:
parent
730c22f767
commit
23cd919681
@ -360,6 +360,7 @@ struct Settings : public SettingsCollection<Settings>
|
||||
M(SettingBool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
|
||||
M(SettingBool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
|
||||
M(SettingUInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
|
||||
M(SettingBool, optimize_any_input, true, "removal of any operations from Any", 0) \
|
||||
M(SettingBool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
|
||||
M(SettingBool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
|
||||
M(SettingBool, optimize_if_chain_to_miltiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
|
||||
|
92
src/Interpreters/AnyInputOptimize.cpp
Normal file
92
src/Interpreters/AnyInputOptimize.cpp
Normal file
@ -0,0 +1,92 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Interpreters/AnyInputOptimize.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int ILLEGAL_AGGREGATION;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
constexpr const char * any = "any";
|
||||
constexpr const char * anyLast = "anyLast";
|
||||
}
|
||||
|
||||
ASTPtr * getExactChild(const ASTPtr & ast, const size_t ind)
|
||||
{
|
||||
if (ast && ast->as<ASTFunction>()->arguments->children[ind])
|
||||
return &ast->as<ASTFunction>()->arguments->children[ind];
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
///recursive searching of identifiers
|
||||
void changeAllIdentifiers(ASTPtr & ast, size_t ind, std::string& mode)
|
||||
{
|
||||
const char * name = mode.c_str();
|
||||
ASTPtr * exact_child = getExactChild(ast, ind);
|
||||
if (!exact_child)
|
||||
return;
|
||||
if ((*exact_child)->as<ASTIdentifier>())
|
||||
{
|
||||
///put new any
|
||||
ASTPtr old_ast = *exact_child;
|
||||
*exact_child = makeASTFunction(name);
|
||||
(*exact_child)->as<ASTFunction>()->arguments->children.push_back(old_ast);
|
||||
}
|
||||
else if ((*exact_child)->as<ASTFunction>() &&
|
||||
!AggregateFunctionFactory::instance().isAggregateFunctionName((*exact_child)->as<ASTFunction>()->name))
|
||||
for (size_t i = 0; i < (*exact_child)->as<ASTFunction>()->arguments->children.size(); i++)
|
||||
changeAllIdentifiers(*exact_child, i, mode);
|
||||
else if ((*exact_child)->as<ASTFunction>() &&
|
||||
AggregateFunctionFactory::instance().isAggregateFunctionName((*exact_child)->as<ASTFunction>()->name))
|
||||
throw Exception("Aggregate function " + (*exact_child)->as<ASTFunction>()->name +
|
||||
" is found inside aggregate function " + name + " in query", ErrorCodes::ILLEGAL_AGGREGATION);
|
||||
}
|
||||
|
||||
|
||||
///cut old any, put any to identifiers. any(functions(x)) -> functions(any(x))
|
||||
void AnyInputMatcher::visit(ASTPtr & current_ast, Data data)
|
||||
{
|
||||
data = {};
|
||||
if (!current_ast)
|
||||
return;
|
||||
|
||||
auto * function_node = current_ast->as<ASTFunction>();
|
||||
if (function_node && (function_node->name == any || function_node->name == anyLast)
|
||||
&& !function_node->arguments->children.empty() && function_node->arguments->children[0] &&
|
||||
function_node->arguments->children[0]->as<ASTFunction>())
|
||||
{
|
||||
std::string mode = function_node->name;
|
||||
///cut any or anyLast
|
||||
if (function_node->arguments->children[0]->as<ASTFunction>() &&
|
||||
!function_node->arguments->children[0]->as<ASTFunction>()->arguments->children.empty())
|
||||
{
|
||||
current_ast = (function_node->arguments->children[0])->clone();
|
||||
for (size_t i = 0; i < current_ast->as<ASTFunction>()->arguments->children.size(); ++i)
|
||||
changeAllIdentifiers(current_ast, i, mode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool AnyInputMatcher::needChildVisit(const ASTPtr & node, const ASTPtr & child)
|
||||
{
|
||||
if (!child)
|
||||
throw Exception("AST item should not have nullptr in children", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (node->as<ASTTableExpression>() || node->as<ASTArrayJoin>())
|
||||
return false; // NOLINT
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
19
src/Interpreters/AnyInputOptimize.h
Normal file
19
src/Interpreters/AnyInputOptimize.h
Normal file
@ -0,0 +1,19 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
///This optimiser is similar to ArithmeticOperationsInAgrFunc optimizer, but for function any we can extract any functions.
|
||||
class AnyInputMatcher
|
||||
{
|
||||
public:
|
||||
struct Data {};
|
||||
|
||||
static void visit(ASTPtr & ast, Data data);
|
||||
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
|
||||
};
|
||||
using AnyInputVisitor = InDepthNodeVisitor<AnyInputMatcher, true>;
|
||||
}
|
@ -27,6 +27,7 @@
|
||||
#include <Interpreters/DuplicateOrderByVisitor.h>
|
||||
#include <Interpreters/GroupByFunctionKeysVisitor.h>
|
||||
#include <Interpreters/AggregateFunctionOfGroupByKeysVisitor.h>
|
||||
#include <Interpreters/AnyInputOptimize.h>
|
||||
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
@ -573,6 +574,16 @@ void optimizeArithmeticOperationsInAgr(ASTPtr & query, bool optimize_arithmetic_
|
||||
}
|
||||
}
|
||||
|
||||
void optimizeAnyInput(ASTPtr & query, bool optimize_any_input)
|
||||
{
|
||||
if (optimize_any_input)
|
||||
{
|
||||
/// Removing arithmetic operations from functions
|
||||
AnyInputVisitor::Data data = {};
|
||||
AnyInputVisitor(data).visit(query);
|
||||
}
|
||||
}
|
||||
|
||||
void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const ASTSelectQuery * select_query,
|
||||
const NamesAndTypesList & source_columns, const NameSet & source_columns_set)
|
||||
{
|
||||
@ -964,6 +975,9 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
|
||||
/// GROUP BY functions of other keys elimination.
|
||||
optimizeGroupByFunctionKeys(select_query, settings.optimize_group_by_function_keys);
|
||||
|
||||
///Move all operations out of any function
|
||||
optimizeAnyInput(query, settings.optimize_any_input);
|
||||
|
||||
/// Eliminate min/max/any aggregators of functions of GROUP BY keys
|
||||
optimizeAggregateFunctionsOfGroupByKeys(select_query, settings.optimize_aggregators_of_group_by_keys);
|
||||
|
||||
|
@ -19,6 +19,7 @@ SRCS(
|
||||
addMissingDefaults.cpp
|
||||
addTypeConversionToAST.cpp
|
||||
Aggregator.cpp
|
||||
AnyInputOptimize.cpp
|
||||
ArithmeticOperationsInAgrFuncOptimize.cpp
|
||||
ArithmeticOperationsInAgrFuncOptimize.h
|
||||
ArrayJoinAction.cpp
|
||||
|
19
tests/performance/any_anyLast.xml
Normal file
19
tests/performance/any_anyLast.xml
Normal file
@ -0,0 +1,19 @@
|
||||
<test>
|
||||
|
||||
<stop_conditions>
|
||||
<all_of>
|
||||
<iterations>10</iterations>
|
||||
</all_of>
|
||||
</stop_conditions>
|
||||
|
||||
|
||||
<query>SELECT any(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(120000000)</query>
|
||||
|
||||
<query>SELECT anyLast(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(120000000)</query>
|
||||
|
||||
<query>SELECT any(number * 2) as n, n * 3 FROM numbers(120000000)</query>
|
||||
|
||||
<query>SELECT any(number * round(toInt64(number), -2)) FROM numbers(120000000)</query>
|
||||
|
||||
|
||||
</test>
|
@ -1,5 +1,6 @@
|
||||
set optimize_aggregators_of_group_by_keys = 1;
|
||||
set enable_debug_queries = 1;
|
||||
set optimize_any_input = 0;
|
||||
|
||||
SELECT min(number % 2) AS a, max(number % 3) AS b FROM numbers(10000000) GROUP BY number % 2, number % 3 ORDER BY a, b;
|
||||
SELECT any(number % 2) AS a, anyLast(number % 3) AS b FROM numbers(10000000) GROUP BY number % 2, number % 3 ORDER BY a, b;
|
||||
|
Loading…
Reference in New Issue
Block a user