mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 16:12:01 +00:00
Merge pull request #47135 from ClickHouse/remove-perf-test-duplicate-order-by-and-distinct
Remove duplicate_order_by_and_distinct optimization
This commit is contained in:
commit
9b4357723f
@ -534,7 +534,6 @@ class IColumn;
|
||||
M(Bool, convert_query_to_cnf, false, "Convert SELECT query to CNF", 0) \
|
||||
M(Bool, optimize_or_like_chain, false, "Optimize multiple OR LIKE into multiMatchAny. This optimization should not be enabled by default, because it defies index analysis in some cases.", 0) \
|
||||
M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
|
||||
M(Bool, optimize_duplicate_order_by_and_distinct, false, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
|
||||
M(Bool, optimize_redundant_functions_in_order_by, true, "Remove functions from ORDER BY if its argument is also in ORDER BY", 0) \
|
||||
M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
|
||||
M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \
|
||||
@ -832,6 +831,7 @@ class IColumn;
|
||||
MAKE_OBSOLETE(M, Seconds, drain_timeout, 3) \
|
||||
MAKE_OBSOLETE(M, UInt64, backup_threads, 16) \
|
||||
MAKE_OBSOLETE(M, UInt64, restore_threads, 16) \
|
||||
MAKE_OBSOLETE(M, Bool, optimize_duplicate_order_by_and_distinct, false) \
|
||||
|
||||
/** The section above is for obsolete settings. Do not add anything there. */
|
||||
|
||||
|
@ -289,13 +289,6 @@ void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query)
|
||||
elems = std::move(unique_elems);
|
||||
}
|
||||
|
||||
/// Optimize duplicate ORDER BY
|
||||
void optimizeDuplicateOrderBy(ASTPtr & query, ContextPtr context)
|
||||
{
|
||||
DuplicateOrderByVisitor::Data order_by_data{context};
|
||||
DuplicateOrderByVisitor(order_by_data).visit(query);
|
||||
}
|
||||
|
||||
/// Return simple subselect (without UNIONs or JOINs or SETTINGS) if any
|
||||
const ASTSelectQuery * getSimpleSubselect(const ASTSelectQuery & select)
|
||||
{
|
||||
@ -379,41 +372,6 @@ std::unordered_set<String> getDistinctNames(const ASTSelectQuery & select)
|
||||
return names;
|
||||
}
|
||||
|
||||
/// Remove DISTINCT from query if columns are known as DISTINCT from subquery
|
||||
void optimizeDuplicateDistinct(ASTSelectQuery & select)
|
||||
{
|
||||
if (!select.select() || select.select()->children.empty())
|
||||
return;
|
||||
|
||||
const ASTSelectQuery * subselect = getSimpleSubselect(select);
|
||||
if (!subselect)
|
||||
return;
|
||||
|
||||
std::unordered_set<String> distinct_names = getDistinctNames(*subselect);
|
||||
std::unordered_set<std::string_view> selected_names;
|
||||
|
||||
/// Check source column names from select list (ignore aliases and table names)
|
||||
for (const auto & id : select.select()->children)
|
||||
{
|
||||
const auto * identifier = id->as<ASTIdentifier>();
|
||||
if (!identifier)
|
||||
return;
|
||||
|
||||
const String & name = identifier->shortName();
|
||||
if (!distinct_names.contains(name))
|
||||
return; /// Not a distinct column, keep DISTINCT for it.
|
||||
|
||||
selected_names.emplace(name);
|
||||
}
|
||||
|
||||
/// select columns list != distinct columns list
|
||||
/// SELECT DISTINCT a FROM (SELECT DISTINCT a, b FROM ...)) -- cannot remove DISTINCT
|
||||
if (selected_names.size() != distinct_names.size())
|
||||
return;
|
||||
|
||||
select.distinct = false;
|
||||
}
|
||||
|
||||
/// Replace monotonous functions in ORDER BY if they don't participate in GROUP BY expression,
|
||||
/// has a single argument and not an aggregate functions.
|
||||
void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, ContextPtr context,
|
||||
@ -830,17 +788,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
|
||||
&& !select_query->group_by_with_cube)
|
||||
optimizeAggregateFunctionsOfGroupByKeys(select_query, query);
|
||||
|
||||
/// Remove duplicate ORDER BY and DISTINCT from subqueries.
|
||||
if (settings.optimize_duplicate_order_by_and_distinct)
|
||||
{
|
||||
optimizeDuplicateOrderBy(query, context);
|
||||
|
||||
/// DISTINCT has special meaning in Distributed query with enabled distributed_group_by_no_merge
|
||||
/// TODO: disable Distributed/remote() tables only
|
||||
if (!settings.distributed_group_by_no_merge)
|
||||
optimizeDuplicateDistinct(*select_query);
|
||||
}
|
||||
|
||||
/// Remove functions from ORDER BY if its argument is also in ORDER BY
|
||||
if (settings.optimize_redundant_functions_in_order_by)
|
||||
optimizeRedundantFunctionsInOrderBy(select_query, context);
|
||||
|
@ -1,8 +0,0 @@
|
||||
<test>
|
||||
<settings><max_threads>1</max_threads></settings>
|
||||
|
||||
<!-- FIXME this should have been an EXPLAIN test, no point in measuring performance to deduce that the query was rewritten -->
|
||||
<query>SELECT * FROM (SELECT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY EventDate, CounterID FORMAT Null</query>
|
||||
<query>SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single) FORMAT Null</query>
|
||||
<query>SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY toStartOfWeek(EventDate) FORMAT Null</query>
|
||||
</test>
|
@ -1,58 +0,0 @@
|
||||
SELECT number
|
||||
FROM
|
||||
(
|
||||
SELECT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number
|
||||
FROM numbers(3)
|
||||
)
|
||||
)
|
||||
ORDER BY number ASC
|
||||
0
|
||||
1
|
||||
2
|
||||
SELECT DISTINCT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number
|
||||
FROM numbers(3)
|
||||
ORDER BY number ASC
|
||||
)
|
||||
ORDER BY number ASC
|
||||
)
|
||||
ORDER BY number ASC
|
||||
0
|
||||
1
|
||||
2
|
||||
SELECT number
|
||||
FROM
|
||||
(
|
||||
SELECT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number % 2 AS number
|
||||
FROM numbers(3)
|
||||
)
|
||||
)
|
||||
ORDER BY number ASC
|
||||
0
|
||||
1
|
||||
SELECT DISTINCT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number % 2 AS number
|
||||
FROM numbers(3)
|
||||
ORDER BY number ASC
|
||||
)
|
||||
ORDER BY number ASC
|
||||
)
|
||||
ORDER BY number ASC
|
||||
0
|
||||
1
|
@ -1,123 +0,0 @@
|
||||
set optimize_duplicate_order_by_and_distinct = 1;
|
||||
|
||||
EXPLAIN SYNTAX SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number;
|
||||
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number;
|
||||
|
||||
set optimize_duplicate_order_by_and_distinct = 0;
|
||||
|
||||
EXPLAIN SYNTAX SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number;
|
||||
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM numbers(3)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number;
|
||||
|
||||
set optimize_duplicate_order_by_and_distinct = 1;
|
||||
|
||||
EXPLAIN SYNTAX SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number % 2
|
||||
AS number
|
||||
FROM numbers(3)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number;
|
||||
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number % 2
|
||||
AS number
|
||||
FROM numbers(3)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number;
|
||||
|
||||
set optimize_duplicate_order_by_and_distinct = 0;
|
||||
|
||||
EXPLAIN SYNTAX SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number % 2
|
||||
AS number
|
||||
FROM numbers(3)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number;
|
||||
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT *
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number % 2
|
||||
AS number
|
||||
FROM numbers(3)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number
|
||||
)
|
||||
ORDER BY number;
|
@ -1,4 +0,0 @@
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
@ -1,46 +0,0 @@
|
||||
-- Tags: distributed
|
||||
|
||||
set query_plan_remove_redundant_distinct = 1;
|
||||
set optimize_duplicate_order_by_and_distinct = 0;
|
||||
SET distributed_group_by_no_merge = 0;
|
||||
|
||||
SELECT DISTINCT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number
|
||||
FROM remote('127.0.0.{1,2}', system.numbers)
|
||||
LIMIT 1
|
||||
SETTINGS distributed_group_by_no_merge = 1
|
||||
);
|
||||
|
||||
SET distributed_group_by_no_merge = 1;
|
||||
|
||||
SELECT DISTINCT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number
|
||||
FROM remote('127.0.0.{1,2}', system.numbers)
|
||||
LIMIT 1
|
||||
);
|
||||
|
||||
set optimize_duplicate_order_by_and_distinct = 0;
|
||||
SET distributed_group_by_no_merge = 0;
|
||||
|
||||
SELECT DISTINCT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number
|
||||
FROM remote('127.0.0.{1,2}', system.numbers)
|
||||
LIMIT 1
|
||||
SETTINGS distributed_group_by_no_merge = 1
|
||||
);
|
||||
|
||||
SET distributed_group_by_no_merge = 1;
|
||||
set optimize_duplicate_order_by_and_distinct = 0;
|
||||
SELECT DISTINCT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number
|
||||
FROM remote('127.0.0.{1,2}', system.numbers)
|
||||
LIMIT 1
|
||||
);
|
@ -1,136 +0,0 @@
|
||||
SELECT DISTINCT number
|
||||
FROM numbers(1)
|
||||
SELECT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number
|
||||
FROM numbers(1)
|
||||
)
|
||||
SELECT DISTINCT number * 2
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT
|
||||
number * 2,
|
||||
number
|
||||
FROM numbers(1)
|
||||
)
|
||||
SELECT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number * 2 AS number
|
||||
FROM numbers(1)
|
||||
)
|
||||
SELECT
|
||||
b,
|
||||
a
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT
|
||||
number % 2 AS a,
|
||||
number % 3 AS b
|
||||
FROM numbers(100)
|
||||
)
|
||||
SELECT DISTINCT a
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT
|
||||
number % 2 AS a,
|
||||
number % 3 AS b
|
||||
FROM numbers(100)
|
||||
)
|
||||
SELECT a
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT a
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT
|
||||
number % 2 AS a,
|
||||
number % 3 AS b
|
||||
FROM numbers(100)
|
||||
)
|
||||
)
|
||||
SELECT DISTINCT a
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
a,
|
||||
b
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT
|
||||
number % 2 AS a,
|
||||
number % 3 AS b
|
||||
FROM numbers(100)
|
||||
)
|
||||
)
|
||||
SELECT
|
||||
a,
|
||||
b
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
b,
|
||||
a
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT
|
||||
number AS a,
|
||||
number AS b
|
||||
FROM numbers(1)
|
||||
)
|
||||
)
|
||||
SELECT
|
||||
a,
|
||||
b
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
b,
|
||||
a,
|
||||
a + b
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT
|
||||
number % 2 AS a,
|
||||
number % 3 AS b
|
||||
FROM numbers(100)
|
||||
)
|
||||
)
|
||||
SELECT DISTINCT a
|
||||
FROM
|
||||
(
|
||||
SELECT a
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT
|
||||
number % 2 AS a,
|
||||
number % 3 AS b
|
||||
FROM numbers(100)
|
||||
)
|
||||
)
|
||||
SELECT DISTINCT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number
|
||||
FROM numbers(1)
|
||||
) AS t1
|
||||
CROSS JOIN numbers(2) AS t2
|
||||
SELECT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number
|
||||
FROM numbers(1) AS t1
|
||||
CROSS JOIN numbers(2) AS t2
|
||||
)
|
||||
SELECT DISTINCT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number
|
||||
FROM numbers(1)
|
||||
UNION ALL
|
||||
SELECT DISTINCT number
|
||||
FROM numbers(2)
|
||||
)
|
||||
0
|
||||
1
|
@ -1,32 +0,0 @@
|
||||
SET optimize_duplicate_order_by_and_distinct = 1;
|
||||
|
||||
EXPLAIN SYNTAX SELECT DISTINCT number FROM numbers(1);
|
||||
EXPLAIN SYNTAX SELECT DISTINCT number FROM (SELECT DISTINCT number FROM numbers(1));
|
||||
EXPLAIN SYNTAX SELECT DISTINCT number * 2 FROM (SELECT DISTINCT number * 2, number FROM numbers(1));
|
||||
EXPLAIN SYNTAX SELECT DISTINCT number FROM (SELECT DISTINCT number * 2 AS number FROM numbers(1));
|
||||
EXPLAIN SYNTAX SELECT DISTINCT b, a FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100));
|
||||
EXPLAIN SYNTAX SELECT DISTINCT a FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100));
|
||||
EXPLAIN SYNTAX SELECT DISTINCT a FROM (SELECT DISTINCT a FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100)));
|
||||
EXPLAIN SYNTAX SELECT DISTINCT a FROM (SELECT DISTINCT a, b FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100)));
|
||||
EXPLAIN SYNTAX SELECT DISTINCT a, b FROM (SELECT DISTINCT b, a FROM (SELECT DISTINCT number a, number b FROM numbers(1)));
|
||||
EXPLAIN SYNTAX SELECT DISTINCT a, b FROM (SELECT b, a, a + b FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100)));
|
||||
EXPLAIN SYNTAX SELECT DISTINCT a FROM (SELECT a FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100)));
|
||||
EXPLAIN SYNTAX SELECT DISTINCT number FROM (SELECT DISTINCT number FROM numbers(1)) t1 CROSS JOIN numbers(2) t2;
|
||||
EXPLAIN SYNTAX SELECT DISTINCT number FROM (SELECT DISTINCT number FROM numbers(1) t1 CROSS JOIN numbers(2) t2);
|
||||
|
||||
EXPLAIN SYNTAX SELECT DISTINCT number FROM
|
||||
(
|
||||
(SELECT DISTINCT number FROM numbers(1))
|
||||
UNION ALL
|
||||
(SELECT DISTINCT number FROM numbers(2))
|
||||
);
|
||||
|
||||
--
|
||||
|
||||
SELECT DISTINCT number FROM
|
||||
(
|
||||
(SELECT DISTINCT number FROM numbers(1))
|
||||
UNION ALL
|
||||
(SELECT DISTINCT number FROM numbers(2))
|
||||
)
|
||||
ORDER BY number;
|
@ -477,3 +477,32 @@ Expression (Projection)
|
||||
ReadFromStorage (SystemNumbers)
|
||||
-- execute
|
||||
1
|
||||
-- UNION ALL with DISTINCT => do _not_ remove DISTINCT
|
||||
-- query
|
||||
SELECT DISTINCT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number
|
||||
FROM numbers(1)
|
||||
UNION ALL
|
||||
SELECT DISTINCT number
|
||||
FROM numbers(2)
|
||||
)
|
||||
-- explain
|
||||
Expression (Projection)
|
||||
Distinct
|
||||
Distinct (Preliminary DISTINCT)
|
||||
Union
|
||||
Expression ((Before ORDER BY + Projection))
|
||||
Distinct
|
||||
Distinct (Preliminary DISTINCT)
|
||||
Expression (Before ORDER BY)
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Expression (( + Projection))
|
||||
Distinct
|
||||
Distinct (Preliminary DISTINCT)
|
||||
Expression (Before ORDER BY)
|
||||
ReadFromStorage (SystemNumbers)
|
||||
-- execute
|
||||
0
|
||||
1
|
||||
|
@ -264,3 +264,15 @@ run_query "$query"
|
||||
echo "-- DISTINCT COUNT() with GROUP BY => do _not_ remove DISTINCT"
|
||||
query="select distinct count() from numbers(10) group by number"
|
||||
run_query "$query"
|
||||
|
||||
echo "-- UNION ALL with DISTINCT => do _not_ remove DISTINCT"
|
||||
query="SELECT DISTINCT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number
|
||||
FROM numbers(1)
|
||||
UNION ALL
|
||||
SELECT DISTINCT number
|
||||
FROM numbers(2)
|
||||
)"
|
||||
run_query "$query"
|
||||
|
@ -479,3 +479,32 @@ Expression (Project names)
|
||||
ReadFromStorage (SystemNumbers)
|
||||
-- execute
|
||||
1
|
||||
-- UNION ALL with DISTINCT => do _not_ remove DISTINCT
|
||||
-- query
|
||||
SELECT DISTINCT number
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT number
|
||||
FROM numbers(1)
|
||||
UNION ALL
|
||||
SELECT DISTINCT number
|
||||
FROM numbers(2)
|
||||
)
|
||||
-- explain
|
||||
Expression (Project names)
|
||||
Distinct (DISTINCT)
|
||||
Distinct (Preliminary DISTINCT)
|
||||
Union
|
||||
Expression ((Projection + (Change column names to column identifiers + Project names)))
|
||||
Distinct (DISTINCT)
|
||||
Distinct (Preliminary DISTINCT)
|
||||
Expression ((Projection + Change column names to column identifiers))
|
||||
ReadFromStorage (SystemNumbers)
|
||||
Expression (( + ( + Project names)))
|
||||
Distinct (DISTINCT)
|
||||
Distinct (Preliminary DISTINCT)
|
||||
Expression ((Projection + Change column names to column identifiers))
|
||||
ReadFromStorage (SystemNumbers)
|
||||
-- execute
|
||||
0
|
||||
1
|
||||
|
Loading…
Reference in New Issue
Block a user