Merge pull request #39103 from tonickkozlov/tonickkozlov/37032/do-not-optimize-functions-shadowing-args

Do not optimize GROUP BY functions that shadow their arguments
This commit is contained in:
Igor Nikonov 2022-08-03 15:06:42 +02:00 committed by GitHub
commit 9eef299e11
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 79 additions and 4 deletions

View File

@ -0,0 +1,44 @@
#pragma once
#include <DataTypes/Serializations/ISerialization.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
namespace DB
{
/// Checks from bottom to top if a function's alias shadows the name
/// of one of it's arguments, e.g.
/// SELECT toString(dummy) as dummy FROM system.one GROUP BY dummy;
class FunctionMaskingArgumentCheckMatcher
{
public:
struct Data
{
const String& alias;
bool is_rejected = false;
void reject() { is_rejected = true; }
};
static void visit(const ASTPtr & ast, Data & data)
{
if (data.is_rejected)
return;
if (const auto & identifier = ast->as<ASTIdentifier>())
visit(*identifier, data);
}
static void visit(const ASTIdentifier & ast, Data & data)
{
if (ast.getAliasOrColumnName() == data.alias)
data.reject();
}
static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; }
};
using FunctionMaskingArgumentCheckVisitor = ConstInDepthNodeVisitor<FunctionMaskingArgumentCheckMatcher, false>;
}

View File

@ -13,6 +13,7 @@
#include <Interpreters/AggregateFunctionOfGroupByKeysVisitor.h>
#include <Interpreters/RewriteAnyFunctionVisitor.h>
#include <Interpreters/RemoveInjectiveFunctionsVisitor.h>
#include <Interpreters/FunctionMaskingArgumentCheckVisitor.h>
#include <Interpreters/RedundantFunctionsInOrderByVisitor.h>
#include <Interpreters/RewriteCountVariantsVisitor.h>
#include <Interpreters/MonotonicityCheckVisitor.h>
@ -153,6 +154,19 @@ void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context)
continue;
}
}
/// don't optimise functions that shadow any of it's arguments, e.g.:
/// SELECT toString(dummy) as dummy FROM system.one GROUP BY dummy;
if (!function->alias.empty())
{
FunctionMaskingArgumentCheckVisitor::Data data{.alias=function->alias};
FunctionMaskingArgumentCheckVisitor(data).visit(function->arguments);
if (data.is_rejected)
{
++i;
continue;
}
}
/// copy shared pointer to args in order to ensure lifetime
auto args_ast = function->arguments;

View File

@ -20,17 +20,17 @@ clickhouse-client --query_kind initial_query -q explain plan header=1 select toS
Expression ((Projection + Before ORDER BY))
Header: dummy String
Aggregating
Header: dummy UInt8
Header: toString(dummy) String
Expression (Before GROUP BY)
Header: dummy UInt8
Header: toString(dummy) String
ReadFromStorage (SystemOne)
Header: dummy UInt8
clickhouse-local --query_kind initial_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy
Expression ((Projection + Before ORDER BY))
Header: dummy String
Aggregating
Header: dummy UInt8
Header: toString(dummy) String
Expression (Before GROUP BY)
Header: dummy UInt8
Header: toString(dummy) String
ReadFromStorage (SystemOne)
Header: dummy UInt8

View File

@ -0,0 +1,11 @@
-- { echoOn }
SELECT toString(dummy) as dummy FROM remote('127.{1,1}', 'system.one') GROUP BY dummy;
0
SELECT toString(dummy+1) as dummy FROM remote('127.{1,1}', 'system.one') GROUP BY dummy;
1
SELECT toString((toInt8(dummy)+2) * (toInt8(dummy)+2)) as dummy FROM remote('127.{1,1}', system.one) GROUP BY dummy;
4
SELECT round(number % 3) AS number FROM remote('127.{1,1}', numbers(20)) GROUP BY number ORDER BY number ASC;
0
1
2

View File

@ -0,0 +1,6 @@
SET prefer_localhost_replica=0;
-- { echoOn }
SELECT toString(dummy) as dummy FROM remote('127.{1,1}', 'system.one') GROUP BY dummy;
SELECT toString(dummy+1) as dummy FROM remote('127.{1,1}', 'system.one') GROUP BY dummy;
SELECT toString((toInt8(dummy)+2) * (toInt8(dummy)+2)) as dummy FROM remote('127.{1,1}', system.one) GROUP BY dummy;
SELECT round(number % 3) AS number FROM remote('127.{1,1}', numbers(20)) GROUP BY number ORDER BY number ASC;