Any input optimize (#11529)

* workig commit №1

* working commit №2

* first version of ArithmeticOperationsInAgrFuncOptimize (min, max, sum)

* fix conflicts

* fix №2

* attempt №3

* Description + little mistake

* fix style + expanded optimization + add performance test

* style + merge with new version

* one more branch

* bug fix + merge

* just merge

* some code style and logic fixes

* little changes

* tests

* test(2)

* style

* seems to have fixed everything that was required

* bug fix

* style

* build fix

* fix typo

* fix clang-10 warning

* fix clang-10 warning

* nolint

* add new files to ya.make

* extract all functions from Any

* add last new lines.

* extract all functions from Any

* add last new lines.

* build + requested changes

* code style

* build + little perf test + anyLast

* minor changes

* bug fix

* minor changes

* another minor changes

* fix + experement

* experement 2.0

* experement 3.0

* experement 3.1

* experement 4.0

* last experement(think so)

* just another attempt to fix UB

* minor changes

* I think I won

* ya.make

* fix requested changes

Co-authored-by: Artem Zuikov <chertus@gmail.com>
This commit is contained in:
Ruslan 2020-06-18 19:34:29 +03:00 committed by GitHub
parent 730c22f767
commit 23cd919681
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 147 additions and 0 deletions

View File

@ -360,6 +360,7 @@ struct Settings : public SettingsCollection<Settings>
M(SettingBool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
M(SettingBool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
M(SettingUInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
M(SettingBool, optimize_any_input, true, "removal of any operations from Any", 0) \
M(SettingBool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
M(SettingBool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
M(SettingBool, optimize_if_chain_to_miltiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \

View File

@ -0,0 +1,92 @@
#include <Common/typeid_cast.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Interpreters/AnyInputOptimize.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <IO/WriteHelpers.h>
#include <Parsers/ASTTablesInSelectQuery.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_AGGREGATION;
}
namespace
{
constexpr const char * any = "any";
constexpr const char * anyLast = "anyLast";
}
ASTPtr * getExactChild(const ASTPtr & ast, const size_t ind)
{
if (ast && ast->as<ASTFunction>()->arguments->children[ind])
return &ast->as<ASTFunction>()->arguments->children[ind];
return nullptr;
}
///recursive searching of identifiers
void changeAllIdentifiers(ASTPtr & ast, size_t ind, std::string& mode)
{
const char * name = mode.c_str();
ASTPtr * exact_child = getExactChild(ast, ind);
if (!exact_child)
return;
if ((*exact_child)->as<ASTIdentifier>())
{
///put new any
ASTPtr old_ast = *exact_child;
*exact_child = makeASTFunction(name);
(*exact_child)->as<ASTFunction>()->arguments->children.push_back(old_ast);
}
else if ((*exact_child)->as<ASTFunction>() &&
!AggregateFunctionFactory::instance().isAggregateFunctionName((*exact_child)->as<ASTFunction>()->name))
for (size_t i = 0; i < (*exact_child)->as<ASTFunction>()->arguments->children.size(); i++)
changeAllIdentifiers(*exact_child, i, mode);
else if ((*exact_child)->as<ASTFunction>() &&
AggregateFunctionFactory::instance().isAggregateFunctionName((*exact_child)->as<ASTFunction>()->name))
throw Exception("Aggregate function " + (*exact_child)->as<ASTFunction>()->name +
" is found inside aggregate function " + name + " in query", ErrorCodes::ILLEGAL_AGGREGATION);
}
///cut old any, put any to identifiers. any(functions(x)) -> functions(any(x))
void AnyInputMatcher::visit(ASTPtr & current_ast, Data data)
{
data = {};
if (!current_ast)
return;
auto * function_node = current_ast->as<ASTFunction>();
if (function_node && (function_node->name == any || function_node->name == anyLast)
&& !function_node->arguments->children.empty() && function_node->arguments->children[0] &&
function_node->arguments->children[0]->as<ASTFunction>())
{
std::string mode = function_node->name;
///cut any or anyLast
if (function_node->arguments->children[0]->as<ASTFunction>() &&
!function_node->arguments->children[0]->as<ASTFunction>()->arguments->children.empty())
{
current_ast = (function_node->arguments->children[0])->clone();
for (size_t i = 0; i < current_ast->as<ASTFunction>()->arguments->children.size(); ++i)
changeAllIdentifiers(current_ast, i, mode);
}
}
}
bool AnyInputMatcher::needChildVisit(const ASTPtr & node, const ASTPtr & child)
{
if (!child)
throw Exception("AST item should not have nullptr in children", ErrorCodes::LOGICAL_ERROR);
if (node->as<ASTTableExpression>() || node->as<ASTArrayJoin>())
return false; // NOLINT
return true;
}
}

View File

@ -0,0 +1,19 @@
#pragma once
#include <Parsers/IAST.h>
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
{
///This optimiser is similar to ArithmeticOperationsInAgrFunc optimizer, but for function any we can extract any functions.
class AnyInputMatcher
{
public:
struct Data {};
static void visit(ASTPtr & ast, Data data);
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
};
using AnyInputVisitor = InDepthNodeVisitor<AnyInputMatcher, true>;
}

View File

@ -27,6 +27,7 @@
#include <Interpreters/DuplicateOrderByVisitor.h>
#include <Interpreters/GroupByFunctionKeysVisitor.h>
#include <Interpreters/AggregateFunctionOfGroupByKeysVisitor.h>
#include <Interpreters/AnyInputOptimize.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
@ -573,6 +574,16 @@ void optimizeArithmeticOperationsInAgr(ASTPtr & query, bool optimize_arithmetic_
}
}
void optimizeAnyInput(ASTPtr & query, bool optimize_any_input)
{
if (optimize_any_input)
{
/// Removing arithmetic operations from functions
AnyInputVisitor::Data data = {};
AnyInputVisitor(data).visit(query);
}
}
void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const ASTSelectQuery * select_query,
const NamesAndTypesList & source_columns, const NameSet & source_columns_set)
{
@ -964,6 +975,9 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
/// GROUP BY functions of other keys elimination.
optimizeGroupByFunctionKeys(select_query, settings.optimize_group_by_function_keys);
///Move all operations out of any function
optimizeAnyInput(query, settings.optimize_any_input);
/// Eliminate min/max/any aggregators of functions of GROUP BY keys
optimizeAggregateFunctionsOfGroupByKeys(select_query, settings.optimize_aggregators_of_group_by_keys);

View File

@ -19,6 +19,7 @@ SRCS(
addMissingDefaults.cpp
addTypeConversionToAST.cpp
Aggregator.cpp
AnyInputOptimize.cpp
ArithmeticOperationsInAgrFuncOptimize.cpp
ArithmeticOperationsInAgrFuncOptimize.h
ArrayJoinAction.cpp

View File

@ -0,0 +1,19 @@
<test>
<stop_conditions>
<all_of>
<iterations>10</iterations>
</all_of>
</stop_conditions>
<query>SELECT any(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(120000000)</query>
<query>SELECT anyLast(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(120000000)</query>
<query>SELECT any(number * 2) as n, n * 3 FROM numbers(120000000)</query>
<query>SELECT any(number * round(toInt64(number), -2)) FROM numbers(120000000)</query>
</test>

View File

@ -1,5 +1,6 @@
set optimize_aggregators_of_group_by_keys = 1;
set enable_debug_queries = 1;
set optimize_any_input = 0;
SELECT min(number % 2) AS a, max(number % 3) AS b FROM numbers(10000000) GROUP BY number % 2, number % 3 ORDER BY a, b;
SELECT any(number % 2) AS a, anyLast(number % 3) AS b FROM numbers(10000000) GROUP BY number % 2, number % 3 ORDER BY a, b;