Delete injective functions inside uniq (#12337)

This commit is contained in:
Artem Zuikov 2020-07-10 13:42:41 +03:00 committed by GitHub
parent 5cae87e664
commit 01b5c2663c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 307 additions and 0 deletions

View File

@ -367,6 +367,7 @@ struct Settings : public SettingsCollection<Settings>
M(SettingBool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
M(SettingUInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
M(SettingBool, optimize_move_functions_out_of_any, true, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
M(SettingBool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
M(SettingBool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
M(SettingBool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
M(SettingBool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \

View File

@ -0,0 +1,63 @@
#include <Common/typeid_cast.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Interpreters/RemoveInjectiveFunctionsVisitor.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Functions/FunctionFactory.h>
namespace DB
{
static bool isUniq(const ASTFunction & func)
{
return func.name == "uniq" || func.name == "uniqExact" || func.name == "uniqHLL12"
|| func.name == "uniqCombined" || func.name == "uniqCombined64";
}
/// Remove injective functions of one argument: replace with a child
static bool removeInjectiveFunction(ASTPtr & ast, const Context & context, const FunctionFactory & function_factory)
{
const ASTFunction * func = ast->as<ASTFunction>();
if (!func)
return false;
if (func->arguments->children.size() != 1)
return false;
if (!function_factory.get(func->name, context)->isInjective(Block{}))
return false;
ast = func->arguments->children[0];
return true;
}
void RemoveInjectiveFunctionsMatcher::visit(ASTPtr & ast, const Data & data)
{
if (auto * func = ast->as<ASTFunction>())
visit(*func, ast, data);
}
void RemoveInjectiveFunctionsMatcher::visit(ASTFunction & func, ASTPtr &, const Data & data)
{
if (isUniq(func))
{
const FunctionFactory & function_factory = FunctionFactory::instance();
for (auto & arg : func.arguments->children)
{
while (removeInjectiveFunction(arg, data.context, function_factory))
;
}
}
}
bool RemoveInjectiveFunctionsMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
{
if (node->as<ASTSubquery>() ||
node->as<ASTTableExpression>())
return false; // NOLINT
return true;
}
}

View File

@ -0,0 +1,28 @@
#pragma once
#include <Parsers/IAST.h>
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
{
class ASTFunction;
/// Removes unneded injective functions inside `uniq*()`.
class RemoveInjectiveFunctionsMatcher
{
public:
struct Data
{
const Context & context;
};
static void visit(ASTPtr & ast, const Data & data);
static void visit(ASTFunction &, ASTPtr & ast, const Data & data);
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
};
using RemoveInjectiveFunctionsVisitor = InDepthNodeVisitor<RemoveInjectiveFunctionsMatcher, true>;
}

View File

@ -28,6 +28,7 @@
#include <Interpreters/GroupByFunctionKeysVisitor.h>
#include <Interpreters/AggregateFunctionOfGroupByKeysVisitor.h>
#include <Interpreters/AnyInputOptimize.h>
#include <Interpreters/RemoveInjectiveFunctionsVisitor.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
@ -601,6 +602,12 @@ void optimizeAnyInput(ASTPtr & query)
AnyInputVisitor(data).visit(query);
}
void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, const Context & context)
{
RemoveInjectiveFunctionsVisitor::Data data = {context};
RemoveInjectiveFunctionsVisitor(data).visit(query);
}
void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const ASTSelectQuery * select_query,
const NamesAndTypesList & source_columns, const NameSet & source_columns_set)
{
@ -998,6 +1005,10 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
if (settings.optimize_move_functions_out_of_any)
optimizeAnyInput(query);
/// Remove injective functions inside uniq
if (settings.optimize_injective_functions_inside_uniq)
optimizeInjectiveFunctionsInsideUniq(query, context);
/// Eliminate min/max/any aggregators of functions of GROUP BY keys
if (settings.optimize_aggregators_of_group_by_keys)
optimizeAggregateFunctionsOfGroupByKeys(select_query);

View File

@ -61,6 +61,7 @@ SRCS(
HashJoin.cpp
IdentifierSemantic.cpp
IExternalLoadable.cpp
RemoveInjectiveFunctionsVisitor.cpp
InJoinSubqueriesPreprocessor.cpp
inplaceBlockConversions.cpp
InternalTextLogsQueue.cpp

View File

@ -0,0 +1,4 @@
<test>
<query>SELECT uniq(negate(negate(negate(number)))) FROM numbers(50000000)</query>
<query>SELECT uniq(hex(hex(number))) FROM numbers(50000000)</query>
</test>

View File

@ -0,0 +1,152 @@
SELECT
uniq(x),
uniqExact(x),
uniqHLL12(x),
uniqCombined(x),
uniqCombined64(x)
FROM
(
SELECT number % 2 AS x
FROM numbers(10)
)
SELECT
uniq(x + y),
uniqExact(x + y),
uniqHLL12(x + y),
uniqCombined(x + y),
uniqCombined64(x + y)
FROM
(
SELECT
number % 2 AS x,
number % 3 AS y
FROM numbers(10)
)
SELECT
uniq(x),
uniqExact(x),
uniqHLL12(x),
uniqCombined(x),
uniqCombined64(x)
FROM
(
SELECT number % 2 AS x
FROM numbers(10)
)
SELECT
uniq(x),
uniqExact(x),
uniqHLL12(x),
uniqCombined(x),
uniqCombined64(x)
FROM
(
SELECT number % 2 AS x
FROM numbers(10)
)
SELECT
uniq(x),
uniqExact(x),
uniqHLL12(x),
uniqCombined(x),
uniqCombined64(x)
FROM
(
SELECT number % 2 AS x
FROM numbers(10)
)
SELECT
uniq(x),
uniqExact(x),
uniqHLL12(x),
uniqCombined(x),
uniqCombined64(x)
FROM
(
SELECT number % 2 AS x
FROM numbers(10)
)
SELECT uniqExact(x)
FROM
(
SELECT number % 2 AS x
FROM numbers(10)
)
SELECT uniq(concatAssumeInjective(\'x\', \'y\'))
FROM numbers(10)
SELECT
uniq(x),
uniqExact(x),
uniqHLL12(x),
uniqCombined(x),
uniqCombined64(x)
FROM
(
SELECT number % 2 AS x
FROM numbers(10)
)
SELECT
uniq(x + y),
uniqExact(x + y),
uniqHLL12(x + y),
uniqCombined(x + y),
uniqCombined64(x + y)
FROM
(
SELECT
number % 2 AS x,
number % 3 AS y
FROM numbers(10)
)
SELECT
uniq(-x),
uniqExact(-x),
uniqHLL12(-x),
uniqCombined(-x),
uniqCombined64(-x)
FROM
(
SELECT number % 2 AS x
FROM numbers(10)
)
SELECT
uniq(bitNot(x)),
uniqExact(bitNot(x)),
uniqHLL12(bitNot(x)),
uniqCombined(bitNot(x)),
uniqCombined64(bitNot(x))
FROM
(
SELECT number % 2 AS x
FROM numbers(10)
)
SELECT
uniq(bitNot(-x)),
uniqExact(bitNot(-x)),
uniqHLL12(bitNot(-x)),
uniqCombined(bitNot(-x)),
uniqCombined64(bitNot(-x))
FROM
(
SELECT number % 2 AS x
FROM numbers(10)
)
SELECT
uniq(-bitNot(-x)),
uniqExact(-bitNot(-x)),
uniqHLL12(-bitNot(-x)),
uniqCombined(-bitNot(-x)),
uniqCombined64(-bitNot(-x))
FROM
(
SELECT number % 2 AS x
FROM numbers(10)
)
SELECT uniqExact(-bitNot(-x))
FROM
(
SELECT number % 2 AS x
FROM numbers(10)
)
SELECT uniq(concatAssumeInjective(\'x\', \'y\'))
FROM numbers(10)

View File

@ -0,0 +1,47 @@
set enable_debug_queries = 1;
set optimize_injective_functions_inside_uniq = 1;
analyze select uniq(x), uniqExact(x), uniqHLL12(x), uniqCombined(x), uniqCombined64(x)
from (select number % 2 as x from numbers(10));
analyze select uniq(x + y), uniqExact(x + y), uniqHLL12(x + y), uniqCombined(x + y), uniqCombined64(x + y)
from (select number % 2 as x, number % 3 y from numbers(10));
analyze select uniq(-x), uniqExact(-x), uniqHLL12(-x), uniqCombined(-x), uniqCombined64(-x)
from (select number % 2 as x from numbers(10));
analyze select uniq(bitNot(x)), uniqExact(bitNot(x)), uniqHLL12(bitNot(x)), uniqCombined(bitNot(x)), uniqCombined64(bitNot(x))
from (select number % 2 as x from numbers(10));
analyze select uniq(bitNot(-x)), uniqExact(bitNot(-x)), uniqHLL12(bitNot(-x)), uniqCombined(bitNot(-x)), uniqCombined64(bitNot(-x))
from (select number % 2 as x from numbers(10));
analyze select uniq(-bitNot(-x)), uniqExact(-bitNot(-x)), uniqHLL12(-bitNot(-x)), uniqCombined(-bitNot(-x)), uniqCombined64(-bitNot(-x))
from (select number % 2 as x from numbers(10));
analyze select count(distinct -bitNot(-x)) from (select number % 2 as x from numbers(10));
analyze select uniq(concatAssumeInjective('x', 'y')) from numbers(10);
set optimize_injective_functions_inside_uniq = 0;
analyze select uniq(x), uniqExact(x), uniqHLL12(x), uniqCombined(x), uniqCombined64(x)
from (select number % 2 as x from numbers(10));
analyze select uniq(x + y), uniqExact(x + y), uniqHLL12(x + y), uniqCombined(x + y), uniqCombined64(x + y)
from (select number % 2 as x, number % 3 y from numbers(10));
analyze select uniq(-x), uniqExact(-x), uniqHLL12(-x), uniqCombined(-x), uniqCombined64(-x)
from (select number % 2 as x from numbers(10));
analyze select uniq(bitNot(x)), uniqExact(bitNot(x)), uniqHLL12(bitNot(x)), uniqCombined(bitNot(x)), uniqCombined64(bitNot(x))
from (select number % 2 as x from numbers(10));
analyze select uniq(bitNot(-x)), uniqExact(bitNot(-x)), uniqHLL12(bitNot(-x)), uniqCombined(bitNot(-x)), uniqCombined64(bitNot(-x))
from (select number % 2 as x from numbers(10));
analyze select uniq(-bitNot(-x)), uniqExact(-bitNot(-x)), uniqHLL12(-bitNot(-x)), uniqCombined(-bitNot(-x)), uniqCombined64(-bitNot(-x))
from (select number % 2 as x from numbers(10));
analyze select count(distinct -bitNot(-x)) from (select number % 2 as x from numbers(10));
analyze select uniq(concatAssumeInjective('x', 'y')) from numbers(10);