Merge pull request #12148 from ClickHouse/fix-bad-redundant-order-by-optimization

Fix bad code in redundant ORDER BY optimization
This commit is contained in:
alexey-milovidov 2020-07-06 09:21:45 +03:00 committed by GitHub
commit b6a19b5eff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 107 additions and 20 deletions

View File

@ -28,7 +28,8 @@ AggregateFunctionPtr createAggregateFunctionCount(const std::string & name, cons
void registerAggregateFunctionCount(AggregateFunctionFactory & factory)
{
factory.registerFunction("count", {createAggregateFunctionCount, {true}}, AggregateFunctionFactory::CaseInsensitive);
AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = false };
factory.registerFunction("count", {createAggregateFunctionCount, properties}, AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -162,6 +162,52 @@ AggregateFunctionPtr AggregateFunctionFactory::tryGet(
}
std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPropertiesImpl(const String & name_param, int recursion_level) const
{
String name = getAliasToOrName(name_param);
Value found;
/// Find by exact match.
if (auto it = aggregate_functions.find(name); it != aggregate_functions.end())
{
found = it->second;
}
/// Find by case-insensitive name.
/// Combinators cannot apply for case insensitive (SQL-style) aggregate function names. Only for native names.
else if (recursion_level == 0)
{
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
found = jt->second;
}
if (found.creator)
return found.properties;
/// Combinators of aggregate functions.
/// For every aggregate function 'agg' and combiner '-Comb' there is combined aggregate function with name 'aggComb',
/// that can have different number and/or types of arguments, different result type and different behaviour.
if (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name))
{
if (combinator->isForInternalUsageOnly())
return {};
String nested_name = name.substr(0, name.size() - combinator->getName().size());
/// NOTE: It's reasonable to also allow to transform properties by combinator.
return tryGetPropertiesImpl(nested_name, recursion_level + 1);
}
return {};
}
std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetProperties(const String & name) const
{
return tryGetPropertiesImpl(name, 0);
}
bool AggregateFunctionFactory::isAggregateFunctionName(const String & name, int recursion_level) const
{
if (aggregate_functions.count(name) || isAlias(name))

View File

@ -9,6 +9,7 @@
#include <string>
#include <unordered_map>
#include <vector>
#include <optional>
namespace DB
@ -72,6 +73,9 @@ public:
const Array & parameters,
AggregateFunctionProperties & out_properties) const;
/// Get properties if the aggregate function exists.
std::optional<AggregateFunctionProperties> tryGetProperties(const String & name) const;
bool isAggregateFunctionName(const String & name, int recursion_level = 0) const;
private:
@ -83,6 +87,8 @@ private:
bool has_null_arguments,
int recursion_level) const;
std::optional<AggregateFunctionProperties> tryGetPropertiesImpl(const String & name, int recursion_level) const;
private:
using AggregateFunctions = std::unordered_map<String, Value>;

View File

@ -120,8 +120,10 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample(const std::string &
void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory)
{
factory.registerFunction("groupArray", createAggregateFunctionGroupArray);
factory.registerFunction("groupArraySample", createAggregateFunctionGroupArraySample);
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("groupArray", { createAggregateFunctionGroupArray, properties });
factory.registerFunction("groupArraySample", { createAggregateFunctionGroupArraySample, properties });
}
}

View File

@ -95,8 +95,10 @@ AggregateFunctionPtr createAggregateFunctionMoving(const std::string & name, con
void registerAggregateFunctionMoving(AggregateFunctionFactory & factory)
{
factory.registerFunction("groupArrayMovingSum", createAggregateFunctionMoving<MovingSumTemplate>);
factory.registerFunction("groupArrayMovingAvg", createAggregateFunctionMoving<MovingAvgTemplate>);
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("groupArrayMovingSum", { createAggregateFunctionMoving<MovingSumTemplate>, properties });
factory.registerFunction("groupArrayMovingAvg", { createAggregateFunctionMoving<MovingAvgTemplate>, properties });
}
}

View File

@ -71,7 +71,6 @@ struct MovingAvgData
void add(T val, Arena * arena)
{
sum += val;
value.push_back(sum, arena);
}
@ -96,7 +95,8 @@ class MovingImpl final
public:
using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
using ColVecResult = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>; // probably for overflow function in the future
// probably for overflow function in the future
using ColVecResult = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
explicit MovingImpl(const DataTypePtr & data_type_, UInt64 win_size_ = std::numeric_limits<UInt64>::max())
: IAggregateFunctionDataHelper<Data, MovingImpl<T, Tlimit_num_elems, Data>>({data_type_}, {})

View File

@ -110,7 +110,9 @@ AggregateFunctionPtr createAggregateFunctionGroupUniqArray(const std::string & n
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory & factory)
{
factory.registerFunction("groupUniqArray", createAggregateFunctionGroupUniqArray);
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("groupUniqArray", { createAggregateFunctionGroupUniqArray, properties });
}
}

View File

@ -49,13 +49,18 @@ AggregateFunctionPtr createAggregateFunctionArgMax(const std::string & name, con
void registerAggregateFunctionsMinMaxAny(AggregateFunctionFactory & factory)
{
factory.registerFunction("any", createAggregateFunctionAny);
factory.registerFunction("anyLast", createAggregateFunctionAnyLast);
factory.registerFunction("anyHeavy", createAggregateFunctionAnyHeavy);
factory.registerFunction("min", createAggregateFunctionMin, AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("max", createAggregateFunctionMax, AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("argMin", createAggregateFunctionArgMin);
factory.registerFunction("argMax", createAggregateFunctionArgMax);
/// The functions below depend on the order of data.
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("any", { createAggregateFunctionAny, properties });
factory.registerFunction("anyLast", { createAggregateFunctionAnyLast, properties });
factory.registerFunction("anyHeavy", { createAggregateFunctionAnyHeavy, properties });
factory.registerFunction("argMin", { createAggregateFunctionArgMin, properties });
factory.registerFunction("argMax", { createAggregateFunctionArgMax, properties });
}
}

View File

@ -117,8 +117,10 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const
void registerAggregateFunctionTopK(AggregateFunctionFactory & factory)
{
factory.registerFunction("topK", createAggregateFunctionTopK<false>);
factory.registerFunction("topKWeighted", createAggregateFunctionTopK<true>);
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("topK", { createAggregateFunctionTopK<false>, properties });
factory.registerFunction("topKWeighted", { createAggregateFunctionTopK<true>, properties });
}
}

View File

@ -122,14 +122,16 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const
void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = false };
factory.registerFunction("uniq",
{createAggregateFunctionUniq<AggregateFunctionUniqUniquesHashSetData, AggregateFunctionUniqUniquesHashSetDataForVariadic>, {true}});
{createAggregateFunctionUniq<AggregateFunctionUniqUniquesHashSetData, AggregateFunctionUniqUniquesHashSetDataForVariadic>, properties});
factory.registerFunction("uniqHLL12",
{createAggregateFunctionUniq<false, AggregateFunctionUniqHLL12Data, AggregateFunctionUniqHLL12DataForVariadic>, {true}});
{createAggregateFunctionUniq<false, AggregateFunctionUniqHLL12Data, AggregateFunctionUniqHLL12DataForVariadic>, properties});
factory.registerFunction("uniqExact",
{createAggregateFunctionUniq<true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactData<String>>, {true}});
{createAggregateFunctionUniq<true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactData<String>>, properties});
}
}

View File

@ -289,6 +289,11 @@ struct AggregateFunctionProperties
* or we should return non-Nullable type with default value (example: count, countDistinct).
*/
bool returns_default_when_only_null = false;
/** Result varies depending on the data order (example: groupArray).
* Some may also name this property as "non-commutative".
*/
bool is_order_dependent = false;
};

View File

@ -1,6 +1,7 @@
#pragma once
#include <Functions/FunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Parsers/ASTFunction.h>
@ -24,7 +25,9 @@ public:
bool & is_stateful;
void visit(ASTFunction & ast_function, ASTPtr &)
{
if (ast_function.name == "any" || ast_function.name == "groupArray")
auto aggregate_function_properties = AggregateFunctionFactory::instance().tryGetProperties(ast_function.name);
if (aggregate_function_properties && aggregate_function_properties->is_order_dependent)
{
is_stateful = true;
return;
@ -85,7 +88,6 @@ public:
if (done)
return;
/// Disable optimization for distributed tables
for (const auto & elem : select_query.children)
{
if (elem->as<ASTSetQuery>() && !elem->as<ASTSetQuery>()->is_standalone)

View File

@ -0,0 +1 @@
SELECT \n k,\n groupArrayMovingSum(v)\nFROM \n(\n SELECT \n k,\n dt,\n v\n FROM moving_sum_num\n ORDER BY \n k ASC,\n dt ASC\n)\nGROUP BY k\nORDER BY k ASC

View File

@ -0,0 +1,11 @@
CREATE TEMPORARY TABLE moving_sum_num
(
`k` String,
`dt` DateTime,
`v` UInt64
);
SET enable_debug_queries = 1;
-- ORDER BY from subquery shall not be removed.
ANALYZE SELECT k, groupArrayMovingSum(v) FROM (SELECT * FROM moving_sum_num ORDER BY k, dt) GROUP BY k ORDER BY k;