mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #12148 from ClickHouse/fix-bad-redundant-order-by-optimization
Fix bad code in redundant ORDER BY optimization
This commit is contained in:
commit
b6a19b5eff
@ -28,7 +28,8 @@ AggregateFunctionPtr createAggregateFunctionCount(const std::string & name, cons
|
||||
|
||||
void registerAggregateFunctionCount(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("count", {createAggregateFunctionCount, {true}}, AggregateFunctionFactory::CaseInsensitive);
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = false };
|
||||
factory.registerFunction("count", {createAggregateFunctionCount, properties}, AggregateFunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -162,6 +162,52 @@ AggregateFunctionPtr AggregateFunctionFactory::tryGet(
|
||||
}
|
||||
|
||||
|
||||
std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPropertiesImpl(const String & name_param, int recursion_level) const
|
||||
{
|
||||
String name = getAliasToOrName(name_param);
|
||||
Value found;
|
||||
|
||||
/// Find by exact match.
|
||||
if (auto it = aggregate_functions.find(name); it != aggregate_functions.end())
|
||||
{
|
||||
found = it->second;
|
||||
}
|
||||
/// Find by case-insensitive name.
|
||||
/// Combinators cannot apply for case insensitive (SQL-style) aggregate function names. Only for native names.
|
||||
else if (recursion_level == 0)
|
||||
{
|
||||
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
|
||||
found = jt->second;
|
||||
}
|
||||
|
||||
if (found.creator)
|
||||
return found.properties;
|
||||
|
||||
/// Combinators of aggregate functions.
|
||||
/// For every aggregate function 'agg' and combiner '-Comb' there is combined aggregate function with name 'aggComb',
|
||||
/// that can have different number and/or types of arguments, different result type and different behaviour.
|
||||
|
||||
if (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name))
|
||||
{
|
||||
if (combinator->isForInternalUsageOnly())
|
||||
return {};
|
||||
|
||||
String nested_name = name.substr(0, name.size() - combinator->getName().size());
|
||||
|
||||
/// NOTE: It's reasonable to also allow to transform properties by combinator.
|
||||
return tryGetPropertiesImpl(nested_name, recursion_level + 1);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
|
||||
std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetProperties(const String & name) const
|
||||
{
|
||||
return tryGetPropertiesImpl(name, 0);
|
||||
}
|
||||
|
||||
|
||||
bool AggregateFunctionFactory::isAggregateFunctionName(const String & name, int recursion_level) const
|
||||
{
|
||||
if (aggregate_functions.count(name) || isAlias(name))
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <optional>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -72,6 +73,9 @@ public:
|
||||
const Array & parameters,
|
||||
AggregateFunctionProperties & out_properties) const;
|
||||
|
||||
/// Get properties if the aggregate function exists.
|
||||
std::optional<AggregateFunctionProperties> tryGetProperties(const String & name) const;
|
||||
|
||||
bool isAggregateFunctionName(const String & name, int recursion_level = 0) const;
|
||||
|
||||
private:
|
||||
@ -83,6 +87,8 @@ private:
|
||||
bool has_null_arguments,
|
||||
int recursion_level) const;
|
||||
|
||||
std::optional<AggregateFunctionProperties> tryGetPropertiesImpl(const String & name, int recursion_level) const;
|
||||
|
||||
private:
|
||||
using AggregateFunctions = std::unordered_map<String, Value>;
|
||||
|
||||
|
@ -120,8 +120,10 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample(const std::string &
|
||||
|
||||
void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("groupArray", createAggregateFunctionGroupArray);
|
||||
factory.registerFunction("groupArraySample", createAggregateFunctionGroupArraySample);
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
|
||||
|
||||
factory.registerFunction("groupArray", { createAggregateFunctionGroupArray, properties });
|
||||
factory.registerFunction("groupArraySample", { createAggregateFunctionGroupArraySample, properties });
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -95,8 +95,10 @@ AggregateFunctionPtr createAggregateFunctionMoving(const std::string & name, con
|
||||
|
||||
void registerAggregateFunctionMoving(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("groupArrayMovingSum", createAggregateFunctionMoving<MovingSumTemplate>);
|
||||
factory.registerFunction("groupArrayMovingAvg", createAggregateFunctionMoving<MovingAvgTemplate>);
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
|
||||
|
||||
factory.registerFunction("groupArrayMovingSum", { createAggregateFunctionMoving<MovingSumTemplate>, properties });
|
||||
factory.registerFunction("groupArrayMovingAvg", { createAggregateFunctionMoving<MovingAvgTemplate>, properties });
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -71,7 +71,6 @@ struct MovingAvgData
|
||||
void add(T val, Arena * arena)
|
||||
{
|
||||
sum += val;
|
||||
|
||||
value.push_back(sum, arena);
|
||||
}
|
||||
|
||||
@ -96,7 +95,8 @@ class MovingImpl final
|
||||
|
||||
public:
|
||||
using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
|
||||
using ColVecResult = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>; // probably for overflow function in the future
|
||||
// probably for overflow function in the future
|
||||
using ColVecResult = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
|
||||
|
||||
explicit MovingImpl(const DataTypePtr & data_type_, UInt64 win_size_ = std::numeric_limits<UInt64>::max())
|
||||
: IAggregateFunctionDataHelper<Data, MovingImpl<T, Tlimit_num_elems, Data>>({data_type_}, {})
|
||||
|
@ -110,7 +110,9 @@ AggregateFunctionPtr createAggregateFunctionGroupUniqArray(const std::string & n
|
||||
|
||||
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("groupUniqArray", createAggregateFunctionGroupUniqArray);
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
|
||||
|
||||
factory.registerFunction("groupUniqArray", { createAggregateFunctionGroupUniqArray, properties });
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -49,13 +49,18 @@ AggregateFunctionPtr createAggregateFunctionArgMax(const std::string & name, con
|
||||
|
||||
void registerAggregateFunctionsMinMaxAny(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("any", createAggregateFunctionAny);
|
||||
factory.registerFunction("anyLast", createAggregateFunctionAnyLast);
|
||||
factory.registerFunction("anyHeavy", createAggregateFunctionAnyHeavy);
|
||||
factory.registerFunction("min", createAggregateFunctionMin, AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("max", createAggregateFunctionMax, AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("argMin", createAggregateFunctionArgMin);
|
||||
factory.registerFunction("argMax", createAggregateFunctionArgMax);
|
||||
|
||||
/// The functions below depend on the order of data.
|
||||
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
|
||||
|
||||
factory.registerFunction("any", { createAggregateFunctionAny, properties });
|
||||
factory.registerFunction("anyLast", { createAggregateFunctionAnyLast, properties });
|
||||
factory.registerFunction("anyHeavy", { createAggregateFunctionAnyHeavy, properties });
|
||||
factory.registerFunction("argMin", { createAggregateFunctionArgMin, properties });
|
||||
factory.registerFunction("argMax", { createAggregateFunctionArgMax, properties });
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -117,8 +117,10 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const
|
||||
|
||||
void registerAggregateFunctionTopK(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("topK", createAggregateFunctionTopK<false>);
|
||||
factory.registerFunction("topKWeighted", createAggregateFunctionTopK<true>);
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
|
||||
|
||||
factory.registerFunction("topK", { createAggregateFunctionTopK<false>, properties });
|
||||
factory.registerFunction("topKWeighted", { createAggregateFunctionTopK<true>, properties });
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -122,14 +122,16 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const
|
||||
|
||||
void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = false };
|
||||
|
||||
factory.registerFunction("uniq",
|
||||
{createAggregateFunctionUniq<AggregateFunctionUniqUniquesHashSetData, AggregateFunctionUniqUniquesHashSetDataForVariadic>, {true}});
|
||||
{createAggregateFunctionUniq<AggregateFunctionUniqUniquesHashSetData, AggregateFunctionUniqUniquesHashSetDataForVariadic>, properties});
|
||||
|
||||
factory.registerFunction("uniqHLL12",
|
||||
{createAggregateFunctionUniq<false, AggregateFunctionUniqHLL12Data, AggregateFunctionUniqHLL12DataForVariadic>, {true}});
|
||||
{createAggregateFunctionUniq<false, AggregateFunctionUniqHLL12Data, AggregateFunctionUniqHLL12DataForVariadic>, properties});
|
||||
|
||||
factory.registerFunction("uniqExact",
|
||||
{createAggregateFunctionUniq<true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactData<String>>, {true}});
|
||||
{createAggregateFunctionUniq<true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactData<String>>, properties});
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -289,6 +289,11 @@ struct AggregateFunctionProperties
|
||||
* or we should return non-Nullable type with default value (example: count, countDistinct).
|
||||
*/
|
||||
bool returns_default_when_only_null = false;
|
||||
|
||||
/** Result varies depending on the data order (example: groupArray).
|
||||
* Some may also name this property as "non-commutative".
|
||||
*/
|
||||
bool is_order_dependent = false;
|
||||
};
|
||||
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
@ -24,7 +25,9 @@ public:
|
||||
bool & is_stateful;
|
||||
void visit(ASTFunction & ast_function, ASTPtr &)
|
||||
{
|
||||
if (ast_function.name == "any" || ast_function.name == "groupArray")
|
||||
auto aggregate_function_properties = AggregateFunctionFactory::instance().tryGetProperties(ast_function.name);
|
||||
|
||||
if (aggregate_function_properties && aggregate_function_properties->is_order_dependent)
|
||||
{
|
||||
is_stateful = true;
|
||||
return;
|
||||
@ -85,7 +88,6 @@ public:
|
||||
if (done)
|
||||
return;
|
||||
|
||||
/// Disable optimization for distributed tables
|
||||
for (const auto & elem : select_query.children)
|
||||
{
|
||||
if (elem->as<ASTSetQuery>() && !elem->as<ASTSetQuery>()->is_standalone)
|
||||
|
@ -0,0 +1 @@
|
||||
SELECT \n k,\n groupArrayMovingSum(v)\nFROM \n(\n SELECT \n k,\n dt,\n v\n FROM moving_sum_num\n ORDER BY \n k ASC,\n dt ASC\n)\nGROUP BY k\nORDER BY k ASC
|
11
tests/queries/0_stateless/01372_wrong_order_by_removal.sql
Normal file
11
tests/queries/0_stateless/01372_wrong_order_by_removal.sql
Normal file
@ -0,0 +1,11 @@
|
||||
CREATE TEMPORARY TABLE moving_sum_num
|
||||
(
|
||||
`k` String,
|
||||
`dt` DateTime,
|
||||
`v` UInt64
|
||||
);
|
||||
|
||||
SET enable_debug_queries = 1;
|
||||
|
||||
-- ORDER BY from subquery shall not be removed.
|
||||
ANALYZE SELECT k, groupArrayMovingSum(v) FROM (SELECT * FROM moving_sum_num ORDER BY k, dt) GROUP BY k ORDER BY k;
|
Loading…
Reference in New Issue
Block a user