mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-12 01:12:12 +00:00
GROUP BY with check if aggr func depends on order
+ tests
This commit is contained in:
parent
5684453f09
commit
49694578ba
@ -59,7 +59,7 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node,
|
||||
size_t tryDistinctReadInOrder(QueryPlan::Node * node);
|
||||
|
||||
/// Remove redundant ORDER BY clauses in subqueries
|
||||
void tryRemoveRedundantOrderBy(QueryPlan::Node * parent_node);
|
||||
void tryRemoveRedundantOrderBy(QueryPlan::Node * root);
|
||||
|
||||
/// Put some steps under union, so that plan optimisation could be applied to union parts separately.
|
||||
/// For example, the plan can be rewritten like:
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <Processors/QueryPlan/AggregatingStep.h>
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
#include <Processors/QueryPlan/FillingStep.h>
|
||||
@ -25,7 +26,6 @@ void printStepName(const char * prefix, const QueryPlan::Node * node)
|
||||
|
||||
void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
|
||||
{
|
||||
// do top down find first order by or group by
|
||||
struct Frame
|
||||
{
|
||||
QueryPlan::Node * node = nullptr;
|
||||
@ -36,7 +36,7 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
|
||||
std::vector<Frame> stack;
|
||||
stack.push_back({.node = root});
|
||||
|
||||
std::vector<IQueryPlanStep *> steps_affect_order;
|
||||
std::vector<QueryPlan::Node *> nodes_affect_order;
|
||||
|
||||
while (!stack.empty())
|
||||
{
|
||||
@ -53,25 +53,35 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
|
||||
printStepName("visit", current_node);
|
||||
/// if there is parent node which can affect order and current step is sorting
|
||||
/// then check if we can remove the sorting step (and corresponding expression step)
|
||||
if (!steps_affect_order.empty() && typeid_cast<SortingStep *>(current_step))
|
||||
if (!nodes_affect_order.empty() && typeid_cast<SortingStep *>(current_step))
|
||||
{
|
||||
auto try_to_remove_sorting_step = [&]() -> bool
|
||||
{
|
||||
QueryPlan::Node * node_affect_order = nodes_affect_order.back();
|
||||
IQueryPlanStep * step_affect_order = node_affect_order->step.get();
|
||||
/// if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable
|
||||
/// if ORDER BY is with FILL WITH, it is non-removable
|
||||
if (typeid_cast<LimitStep *>(steps_affect_order.back()) || typeid_cast<LimitByStep *>(steps_affect_order.back())
|
||||
|| typeid_cast<FillingStep *>(steps_affect_order.back()))
|
||||
if (typeid_cast<LimitStep *>(step_affect_order) || typeid_cast<LimitByStep *>(step_affect_order)
|
||||
|| typeid_cast<FillingStep *>(step_affect_order))
|
||||
return false;
|
||||
|
||||
bool remove_sorting = false;
|
||||
/// TODO: (0) check stateful functions
|
||||
/// (1) aggregation
|
||||
if (const AggregatingStep * parent_aggr = typeid_cast<AggregatingStep *>(steps_affect_order.back()); parent_aggr)
|
||||
if (const AggregatingStep * parent_aggr = typeid_cast<AggregatingStep *>(step_affect_order); parent_aggr)
|
||||
{
|
||||
/// TODO: check if it contains aggregation functions which depends on order
|
||||
auto const & aggregates = parent_aggr->getParams().aggregates;
|
||||
for (const auto & aggregate : aggregates)
|
||||
{
|
||||
auto aggregate_function_properties
|
||||
= AggregateFunctionFactory::instance().tryGetProperties(aggregate.function->getName());
|
||||
if (aggregate_function_properties && aggregate_function_properties->is_order_dependent)
|
||||
return false;
|
||||
}
|
||||
remove_sorting = true;
|
||||
}
|
||||
/// (2) sorting
|
||||
else if (SortingStep * parent_sorting = typeid_cast<SortingStep *>(steps_affect_order.back()); parent_sorting)
|
||||
else if (typeid_cast<SortingStep *>(step_affect_order))
|
||||
{
|
||||
remove_sorting = true;
|
||||
}
|
||||
@ -109,7 +119,7 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
|
||||
|| typeid_cast<AggregatingStep *>(current_step)) /// (4) aggregation change order
|
||||
{
|
||||
printStepName("steps_affect_order/push", current_node);
|
||||
steps_affect_order.push_back(current_step);
|
||||
nodes_affect_order.push_back(current_node);
|
||||
}
|
||||
}
|
||||
|
||||
@ -124,10 +134,12 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root)
|
||||
}
|
||||
|
||||
/// bottom-up visit
|
||||
if (!steps_affect_order.empty() && steps_affect_order.back() == current_node->step.get())
|
||||
/// we come here when all children of current_node are visited,
|
||||
/// so it's a node which affect order, remove it from the corresponding stack
|
||||
if (!nodes_affect_order.empty() && nodes_affect_order.back() == current_node)
|
||||
{
|
||||
printStepName("steps_affect_order/pop", current_node);
|
||||
steps_affect_order.pop_back();
|
||||
printStepName("node_affect_order/pop", current_node);
|
||||
nodes_affect_order.pop_back();
|
||||
}
|
||||
|
||||
printStepName("pop", current_node);
|
||||
|
@ -58,15 +58,6 @@ Header: number UInt64
|
||||
Header: number_1 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
-- GROUP BY removes ORDER BY in _all_ subqueries
|
||||
Expression ((Project names + Projection))
|
||||
Header: number UInt64
|
||||
Aggregating
|
||||
Header: number_0 UInt64
|
||||
Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))))
|
||||
Header: number_0 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
-- CROSS JOIN with subqueries, nor ORDER BY nor GROUP BY in main query -> only ORDER BY clauses in most inner subqueries will be removed
|
||||
Expression ((Project names + (Projection + DROP unused columns after JOIN)))
|
||||
Header: t1.number UInt64
|
||||
@ -111,3 +102,76 @@ Header: t1.number UInt64
|
||||
Header: t2.number_1 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY(s) in _all_ subqueries
|
||||
Expression ((Project names + Projection))
|
||||
Header: sum(number) UInt64
|
||||
Aggregating
|
||||
Header: number_0 UInt64
|
||||
sum(number_0) UInt64
|
||||
Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))))
|
||||
Header: number_0 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
-- GROUP BY with aggregation function which depends on order -> keep ORDER BY in first subquery, and eliminate in second subquery
|
||||
Expression ((Project names + Projection))
|
||||
Header: any(number) UInt64
|
||||
Aggregating
|
||||
Header: number_0 UInt64
|
||||
any(number_0) UInt64
|
||||
Expression ((Before GROUP BY + (Change column names to column identifiers + Project names)))
|
||||
Header: number_0 UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number_1 UInt64
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))
|
||||
Header: number_1 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
-- check that optimization is applied recursively to subqueries as well
|
||||
-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY in most inner subquery here
|
||||
Expression (Project names)
|
||||
Header: a UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: a_0 UInt64
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + Projection)))))
|
||||
Header: a_0 UInt64
|
||||
Aggregating
|
||||
Header: number_1 UInt64
|
||||
sum(number_1) UInt64
|
||||
Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))
|
||||
Header: number_1 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
-- GROUP BY with aggregation function which depends on order -> ORDER BY in subquery is kept due to the aggregation function
|
||||
Expression (Project names)
|
||||
Header: a UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: a_0 UInt64
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + Projection)))))
|
||||
Header: a_0 UInt64
|
||||
Aggregating
|
||||
Header: number_1 UInt64
|
||||
any(number_1) UInt64
|
||||
Expression ((Before GROUP BY + (Change column names to column identifiers + Project names)))
|
||||
Header: number_1 UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number_2 UInt64
|
||||
Expression ((Before ORDER BY + (Projection + Change column names to column identifiers)))
|
||||
Header: number_2 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
-- Check that optimization works for subqueries as well, - main query have neither ORDER BY nor GROUP BY
|
||||
Expression ((Project names + Projection))
|
||||
Header: a UInt64
|
||||
Filter ((WHERE + (Change column names to column identifiers + (Project names + Projection))))
|
||||
Header: a_0 UInt64
|
||||
Aggregating
|
||||
Header: number_1 UInt64
|
||||
any(number_1) UInt64
|
||||
Expression ((Before GROUP BY + (Change column names to column identifiers + Project names)))
|
||||
Header: number_1 UInt64
|
||||
Sorting (Sorting for ORDER BY)
|
||||
Header: number_2 UInt64
|
||||
Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))
|
||||
Header: number_2 UInt64
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Header: number UInt64
|
||||
|
@ -73,108 +73,6 @@ FROM
|
||||
)
|
||||
ORDER BY number ASC;
|
||||
|
||||
SELECT '-- GROUP BY removes ORDER BY in _all_ subqueries';
|
||||
EXPLAIN header=1
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number ASC
|
||||
)
|
||||
ORDER BY number DESC
|
||||
)
|
||||
GROUP BY number;
|
||||
|
||||
-- SELECT '-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY(s) in _all_ subqueries';
|
||||
-- EXPLAIN
|
||||
-- SELECT sum(number)
|
||||
-- FROM
|
||||
-- (
|
||||
-- SELECT *
|
||||
-- FROM
|
||||
-- (
|
||||
-- SELECT *
|
||||
-- FROM numbers(3)
|
||||
-- ORDER BY number ASC
|
||||
-- )
|
||||
-- ORDER BY number DESC
|
||||
-- )
|
||||
-- GROUP BY number;
|
||||
|
||||
-- SELECT '-- GROUP BY with aggregation function which depends on order -> keep ORDER BY in first subquery, and eliminate in second subquery';
|
||||
-- EXPLAIN
|
||||
-- SELECT any(number)
|
||||
-- FROM
|
||||
-- (
|
||||
-- SELECT *
|
||||
-- FROM
|
||||
-- (
|
||||
-- SELECT *
|
||||
-- FROM numbers(3)
|
||||
-- ORDER BY number ASC
|
||||
-- )
|
||||
-- ORDER BY number DESC
|
||||
-- )
|
||||
-- GROUP BY number;
|
||||
|
||||
-- SELECT '-- check that optimization is applied recursively to subqueries as well';
|
||||
-- SELECT '-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY in most inner subquery here';
|
||||
-- EXPLAIN
|
||||
-- SELECT a
|
||||
-- FROM
|
||||
-- (
|
||||
-- SELECT sum(number) AS a
|
||||
-- FROM
|
||||
-- (
|
||||
-- SELECT *
|
||||
-- FROM numbers(3)
|
||||
-- ORDER BY number ASC
|
||||
-- )
|
||||
-- GROUP BY number
|
||||
-- )
|
||||
-- ORDER BY a ASC;
|
||||
|
||||
-- SELECT '-- GROUP BY with aggregation function which depends on order -> ORDER BY in subquery is kept due to the aggregation function';
|
||||
-- EXPLAIN
|
||||
-- SELECT a
|
||||
-- FROM
|
||||
-- (
|
||||
-- SELECT any(number) AS a
|
||||
-- FROM
|
||||
-- (
|
||||
-- SELECT *
|
||||
-- FROM numbers(3)
|
||||
-- ORDER BY number ASC
|
||||
-- )
|
||||
-- GROUP BY number
|
||||
-- )
|
||||
-- ORDER BY a ASC;
|
||||
|
||||
-- SELECT '-- Check that optimization works for subqueries as well, - main query have nor ORDER BY nor GROUP BY';
|
||||
-- EXPLAIN
|
||||
-- SELECT a
|
||||
-- FROM
|
||||
-- (
|
||||
-- SELECT any(number) AS a
|
||||
-- FROM
|
||||
-- (
|
||||
-- SELECT *
|
||||
-- FROM
|
||||
-- (
|
||||
-- SELECT *
|
||||
-- FROM numbers(3)
|
||||
-- ORDER BY number DESC
|
||||
-- )
|
||||
-- ORDER BY number ASC
|
||||
-- )
|
||||
-- GROUP BY number
|
||||
-- )
|
||||
-- WHERE a > 0;
|
||||
|
||||
SELECT '-- CROSS JOIN with subqueries, nor ORDER BY nor GROUP BY in main query -> only ORDER BY clauses in most inner subqueries will be removed';
|
||||
EXPLAIN header=1
|
||||
SELECT *
|
||||
@ -225,3 +123,89 @@ FROM
|
||||
ORDER BY number DESC
|
||||
) AS t2
|
||||
ORDER BY t1.number ASC;
|
||||
|
||||
SELECT '-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY(s) in _all_ subqueries';
|
||||
EXPLAIN header=1
|
||||
SELECT sum(number)
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number ASC
|
||||
)
|
||||
ORDER BY number DESC
|
||||
)
|
||||
GROUP BY number;
|
||||
|
||||
SELECT '-- GROUP BY with aggregation function which depends on order -> keep ORDER BY in first subquery, and eliminate in second subquery';
|
||||
EXPLAIN header=1
|
||||
SELECT any(number)
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number ASC
|
||||
)
|
||||
ORDER BY number DESC
|
||||
)
|
||||
GROUP BY number;
|
||||
|
||||
SELECT '-- check that optimization is applied recursively to subqueries as well';
|
||||
SELECT '-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY in most inner subquery here';
|
||||
EXPLAIN header=1
|
||||
SELECT a
|
||||
FROM
|
||||
(
|
||||
SELECT sum(number) AS a
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number ASC
|
||||
)
|
||||
GROUP BY number
|
||||
)
|
||||
ORDER BY a ASC;
|
||||
|
||||
SELECT '-- GROUP BY with aggregation function which depends on order -> ORDER BY in subquery is kept due to the aggregation function';
|
||||
EXPLAIN header=1
|
||||
SELECT a
|
||||
FROM
|
||||
(
|
||||
SELECT any(number) AS a
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number ASC
|
||||
)
|
||||
GROUP BY number
|
||||
)
|
||||
ORDER BY a ASC;
|
||||
|
||||
SELECT '-- Check that optimization works for subqueries as well, - main query have neither ORDER BY nor GROUP BY';
|
||||
EXPLAIN header=1
|
||||
SELECT a
|
||||
FROM
|
||||
(
|
||||
SELECT any(number) AS a
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number DESC
|
||||
)
|
||||
ORDER BY number ASC
|
||||
)
|
||||
GROUP BY number
|
||||
)
|
||||
WHERE a > 0;
|
||||
|
Loading…
Reference in New Issue
Block a user