Merge pull request #10342 from azat/optimizeGroupBy-isInjective

[RFC] Use isInjective() over manual list of such functions for GROUP BY optimization
This commit is contained in:
alexey-milovidov 2020-04-18 14:28:59 +03:00 committed by GitHub
commit b5f8efefa2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 41 additions and 54 deletions

View File

@ -35,7 +35,7 @@ public:
return 1;
}
bool isInjective(const Block &) override
bool isInjective(const Block &) const override
{
return is_injective;
}

View File

@ -115,7 +115,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) override { return is_injective; }
bool isInjective(const Block &) const override { return is_injective; }
bool useDefaultImplementationForConstants() const override { return true; }

View File

@ -72,7 +72,7 @@ public:
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) override { return true; }
bool isInjective(const Block &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -326,7 +326,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) override { return mask_tail_octets == 0; }
bool isInjective(const Block &) const override { return mask_tail_octets == 0; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -447,7 +447,7 @@ public:
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) override { return true; }
bool isInjective(const Block &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -546,7 +546,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) override { return true; }
bool isInjective(const Block &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -739,7 +739,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) override { return true; }
bool isInjective(const Block &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -837,7 +837,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) override { return true; }
bool isInjective(const Block &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -941,7 +941,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) override { return true; }
bool isInjective(const Block &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -1224,7 +1224,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) override { return true; }
bool isInjective(const Block &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -1313,7 +1313,7 @@ public:
}
bool isVariadic() const override { return true; }
bool isInjective(const Block &) override { return true; }
bool isInjective(const Block &) const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
@ -1408,7 +1408,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) override { return true; }
bool isInjective(const Block &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{

View File

@ -913,7 +913,7 @@ public:
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool isInjective(const Block &) override { return std::is_same_v<Name, NameToString>; }
bool isInjective(const Block &) const override { return std::is_same_v<Name, NameToString>; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
@ -1268,7 +1268,7 @@ public:
}
size_t getNumberOfArguments() const override { return 2; }
bool isInjective(const Block &) override { return true; }
bool isInjective(const Block &) const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{

View File

@ -592,7 +592,7 @@ public:
/// For the purpose of query optimization, we assume this function to be injective
/// even in face of fact that there are many different cities named Moscow.
bool isInjective(const Block &) override { return true; }
bool isInjective(const Block &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{

View File

@ -243,7 +243,7 @@ private:
bool useDefaultImplementationForConstants() const final { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
bool isInjective(const Block & sample_block) override
bool isInjective(const Block & sample_block) const override
{
return isDictGetFunctionInjective(dictionaries_loader, sample_block);
}
@ -769,7 +769,7 @@ private:
bool useDefaultImplementationForConstants() const final { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
bool isInjective(const Block & sample_block) override
bool isInjective(const Block & sample_block) const override
{
return isDictGetFunctionInjective(dictionaries_loader, sample_block);
}
@ -1338,7 +1338,7 @@ private:
bool useDefaultImplementationForConstants() const final { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
bool isInjective(const Block & sample_block) override
bool isInjective(const Block & sample_block) const override
{
return isDictGetFunctionInjective(dictionaries_loader, sample_block);
}
@ -1486,7 +1486,7 @@ private:
bool useDefaultImplementationForConstants() const final { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
bool isInjective(const Block & sample_block) override
bool isInjective(const Block & sample_block) const override
{
return isDictGetFunctionInjective(dictionaries_loader, sample_block);
}
@ -1627,7 +1627,7 @@ public:
private:
size_t getNumberOfArguments() const override { return 2; }
bool isInjective(const Block & /*sample_block*/) override { return true; }
bool isInjective(const Block & /*sample_block*/) const override { return true; }
bool useDefaultImplementationForConstants() const final { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0}; }

View File

@ -42,7 +42,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) override { return true; }
bool isInjective(const Block &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{

View File

@ -134,7 +134,7 @@ public:
*
* sample_block should contain data types of arguments and values of constants, if relevant.
*/
virtual bool isInjective(const Block & /*sample_block*/) { return false; }
virtual bool isInjective(const Block & /*sample_block*/) const { return false; }
/** Function is called "deterministic", if it returns same result for same values of arguments.
* Most of functions are deterministic. Notable counterexample is rand().
@ -189,6 +189,7 @@ public:
/// See the comment for the same method in IFunctionBase
virtual bool isDeterministic() const = 0;
virtual bool isDeterministicInScopeOfQuery() const = 0;
virtual bool isInjective(const Block &) const = 0;
/// Override and return true if function needs to depend on the state of the data.
virtual bool isStateful() const = 0;

View File

@ -68,7 +68,7 @@ public:
return impl->getResultIfAlwaysReturnsConstantAndHasArguments(block, arguments);
}
bool isInjective(const Block & sample_block) final { return impl->isInjective(sample_block); }
bool isInjective(const Block & sample_block) const final { return impl->isInjective(sample_block); }
bool isDeterministic() const final { return impl->isDeterministic(); }
bool isDeterministicInScopeOfQuery() const final { return impl->isDeterministicInScopeOfQuery(); }
bool hasInformationAboutMonotonicity() const final { return impl->hasInformationAboutMonotonicity(); }
@ -96,6 +96,8 @@ public:
bool isDeterministicInScopeOfQuery() const final { return impl->isDeterministicInScopeOfQuery(); }
bool isInjective(const Block & block) const final { return impl->isInjective(block); }
bool isStateful() const final { return impl->isStateful(); }
bool isVariadic() const final { return impl->isVariadic(); }
@ -195,7 +197,7 @@ public:
bool isStateful() const override { return function->isStateful(); }
bool isInjective(const Block & sample_block) override { return function->isInjective(sample_block); }
bool isInjective(const Block & sample_block) const override { return function->isInjective(sample_block); }
bool isDeterministic() const override { return function->isDeterministic(); }
@ -226,6 +228,7 @@ public:
bool isDeterministic() const override { return function->isDeterministic(); }
bool isDeterministicInScopeOfQuery() const override { return function->isDeterministicInScopeOfQuery(); }
bool isInjective(const Block &block) const override { return function->isInjective(block); }
String getName() const override { return function->getName(); }
bool isStateful() const override { return function->isStateful(); }

View File

@ -107,7 +107,7 @@ public:
virtual bool isSuitableForConstantFolding() const { return true; }
virtual ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const Block & /*block*/, const ColumnNumbers & /*arguments*/) const { return nullptr; }
virtual bool isInjective(const Block & /*sample_block*/) { return false; }
virtual bool isInjective(const Block & /*sample_block*/) const { return false; }
virtual bool isDeterministic() const { return true; }
virtual bool isDeterministicInScopeOfQuery() const { return true; }
virtual bool hasInformationAboutMonotonicity() const { return false; }
@ -152,6 +152,7 @@ public:
/// Properties from IFunctionOverloadResolver. See comments in IFunction.h
virtual bool isDeterministic() const { return true; }
virtual bool isDeterministicInScopeOfQuery() const { return true; }
virtual bool isInjective(const Block &) const { return false; }
virtual bool isStateful() const { return false; }
virtual bool isVariadic() const { return false; }
@ -256,7 +257,7 @@ public:
/// Properties from IFunctionBase (see IFunction.h)
virtual bool isSuitableForConstantFolding() const { return true; }
virtual ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const Block & /*block*/, const ColumnNumbers & /*arguments*/) const { return nullptr; }
virtual bool isInjective(const Block & /*sample_block*/) { return false; }
virtual bool isInjective(const Block & /*sample_block*/) const { return false; }
virtual bool isDeterministic() const { return true; }
virtual bool isDeterministicInScopeOfQuery() const { return true; }
virtual bool isStateful() const { return false; }

View File

@ -41,7 +41,7 @@ public:
size_t getNumberOfArguments() const override { return 0; }
bool isInjective(const Block &) override { return is_injective; }
bool isInjective(const Block &) const override { return is_injective; }
bool useDefaultImplementationForConstants() const override { return true; }

View File

@ -71,7 +71,7 @@ public:
return 1;
}
bool isInjective(const Block &) override
bool isInjective(const Block &) const override
{
return true;
}

View File

@ -43,7 +43,7 @@ public:
return 0;
}
bool isInjective(const Block &) override
bool isInjective(const Block &) const override
{
return true;
}

View File

@ -510,7 +510,7 @@ bool LLVMFunction::isSuitableForConstantFolding() const
return true;
}
bool LLVMFunction::isInjective(const Block & sample_block)
bool LLVMFunction::isInjective(const Block & sample_block) const
{
for (const auto & f : originals)
if (!f->isInjective(sample_block))

View File

@ -53,7 +53,7 @@ public:
bool isSuitableForConstantFolding() const override;
bool isInjective(const Block & sample_block) override;
bool isInjective(const Block & sample_block) const override;
bool hasInformationAboutMonotonicity() const override;

View File

@ -33,6 +33,8 @@
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Functions/FunctionFactory.h>
#include <DataTypes/NestedUtils.h>
#include <DataTypes/DataTypeNullable.h>
@ -216,28 +218,6 @@ void executeScalarSubqueries(ASTPtr & query, const Context & context, size_t sub
ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(query);
}
/** Calls to these functions in the GROUP BY statement would be
* replaced by their immediate argument.
*/
const std::unordered_set<String> injective_function_names
{
"negate",
"bitNot",
"reverse",
"reverseUTF8",
"toString",
"toFixedString",
"IPv4NumToString",
"IPv4StringToNum",
"hex",
"unhex",
"bitmaskToList",
"bitmaskToArray",
"tuple",
"regionToName",
"concatAssumeInjective",
};
const std::unordered_set<String> possibly_injective_function_names
{
"dictGetString",
@ -278,6 +258,8 @@ void appendUnusedGroupByColumn(ASTSelectQuery * select_query, const NameSet & so
/// Eliminates injective function calls and constant expressions from group by statement.
void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context)
{
const FunctionFactory & function_factory = FunctionFactory::instance();
if (!select_query->groupBy())
{
// If there is a HAVING clause without GROUP BY, make sure we have some aggregation happen.
@ -327,7 +309,7 @@ void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_colum
continue;
}
}
else if (!injective_function_names.count(function->name))
else if (!function_factory.get(function->name, context)->isInjective(Block{}))
{
++i;
continue;