Merge pull request #52438 from ClickHouse/fix_remove_redundant_distinct_with_view

Fix: remove redundant distinct with views
This commit is contained in:
Alexey Milovidov 2023-07-23 21:16:49 +03:00 committed by GitHub
commit 5c2eb2d749
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 63 additions and 4 deletions

View File

@ -2515,11 +2515,21 @@ FindOriginalNodeForOutputName::FindOriginalNodeForOutputName(const ActionsDAGPtr
/// find input node which refers to the output node
/// consider only aliases on the path
const auto * node = output_node;
while (node && node->type == ActionsDAG::ActionType::ALIAS)
while (node)
{
/// alias has only one child
chassert(node->children.size() == 1);
node = node->children.front();
if (node->type == ActionsDAG::ActionType::ALIAS)
{
node = node->children.front();
}
/// materiailze() function can occur when dealing with views
/// TODO: not sure if it should be done here, looks too generic place
else if (node->type == ActionsDAG::ActionType::FUNCTION && node->function_base->getName() == "materialize")
{
chassert(node->children.size() == 1);
node = node->children.front();
}
else
break;
}
if (node && node->type == ActionsDAG::ActionType::INPUT)
index.emplace(output_node->result_name, node);

View File

@ -0,0 +1,21 @@
-- { echoOn }
set query_plan_remove_redundant_distinct=1;
-- DISTINCT has to be removed since the view already has DISTINCT on the same column
SELECT count()
FROM
(
EXPLAIN SELECT DISTINCT x FROM tab_v
)
WHERE explain ILIKE '%distinct%';
2
SELECT DISTINCT x FROM tab_v ORDER BY x;
1
2
-- explicitly checking that materialize() doesn't affect the result, - redundant DISTINCT is still removed
SELECT count()
FROM
(
EXPLAIN SELECT DISTINCT x FROM (SELECT materialize(x) as x FROM (select DISTINCT x from tab))
)
WHERE explain ILIKE '%distinct%';
2

View File

@ -0,0 +1,28 @@
drop table if exists tab_v;
drop table if exists tab;
create table tab (x UInt64, y UInt64) engine MergeTree() order by (x, y);
insert into tab values(1, 1);
insert into tab values(1, 2);
insert into tab values(2, 1);
create view tab_v as select distinct(x) from tab;
-- { echoOn }
set query_plan_remove_redundant_distinct=1;
-- DISTINCT has to be removed since the view already has DISTINCT on the same column
SELECT count()
FROM
(
EXPLAIN SELECT DISTINCT x FROM tab_v
)
WHERE explain ILIKE '%distinct%';
SELECT DISTINCT x FROM tab_v ORDER BY x;
-- explicitly checking that materialize() doesn't affect the result, - redundant DISTINCT is still removed
SELECT count()
FROM
(
EXPLAIN SELECT DISTINCT x FROM (SELECT materialize(x) as x FROM (select DISTINCT x from tab))
)
WHERE explain ILIKE '%distinct%';