ClickHouse/tests/queries/0_stateless/02500_remove_redundant_distinct.sh

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

279 lines
5.9 KiB
Bash
Raw Normal View History

2023-01-26 22:15:02 +00:00
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
if [ -z ${ENABLE_ANALYZER+x} ]; then
ENABLE_ANALYZER=0
fi
2023-01-26 22:15:02 +00:00
OPTIMIZATION_SETTING="query_plan_remove_redundant_distinct"
DISABLE_OPTIMIZATION="set allow_experimental_analyzer=$ENABLE_ANALYZER;SET $OPTIMIZATION_SETTING=0;SET optimize_duplicate_order_by_and_distinct=0"
ENABLE_OPTIMIZATION="set allow_experimental_analyzer=$ENABLE_ANALYZER;SET $OPTIMIZATION_SETTING=1;SET optimize_duplicate_order_by_and_distinct=0"
2023-01-26 22:15:02 +00:00
echo "-- Disabled $OPTIMIZATION_SETTING"
query="SELECT DISTINCT *
FROM
(
SELECT DISTINCT *
FROM
(
SELECT DISTINCT *
FROM numbers(3)
)
)"
$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;EXPLAIN $query"
function run_query {
echo "-- query"
echo "$1"
echo "-- explain"
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;EXPLAIN $1"
echo "-- execute"
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$1"
}
echo "-- Enabled $OPTIMIZATION_SETTING"
echo "-- DISTINCT is only in most inner subquery"
run_query "$query"
echo "-- do _not_ remove DISTINCT after UNION"
query="SELECT DISTINCT number FROM
(
(SELECT DISTINCT number FROM numbers(1))
UNION ALL
(SELECT DISTINCT number FROM numbers(2))
)
ORDER BY number"
run_query "$query"
echo "-- do _not_ remove DISTINCT after JOIN"
query="SELECT DISTINCT *
FROM
(
SELECT DISTINCT number AS n
FROM numbers(2)
2023-02-14 20:36:12 +00:00
) as x,
2023-01-26 22:15:02 +00:00
(
SELECT DISTINCT number AS n
FROM numbers(2)
2023-02-14 20:36:12 +00:00
) as y"
2023-01-26 22:15:02 +00:00
run_query "$query"
echo "-- DISTINCT duplicates with several columns"
query="SELECT DISTINCT *
FROM
(
SELECT DISTINCT *
FROM
(
SELECT DISTINCT number as a, 2*number as b
FROM numbers(3)
)
)"
run_query "$query"
echo "-- DISTINCT duplicates with constant columns"
query="SELECT DISTINCT 2, a, b
FROM
(
SELECT DISTINCT a, b
FROM
(
SELECT DISTINCT 1, number as a, 2*number as b
FROM numbers(3)
)
)"
run_query "$query"
echo "-- ARRAY JOIN: do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs"
query="SELECT DISTINCT *
FROM
(
SELECT DISTINCT *
FROM VALUES('Hello', 'World', 'Goodbye')
) AS words
ARRAY JOIN [0, 1] AS arr"
run_query "$query"
echo "-- WITH FILL: do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs"
query="SELECT DISTINCT *
FROM
(
SELECT DISTINCT *
FROM values('id UInt8', 0, 2)
ORDER BY id ASC WITH FILL
)"
run_query "$query"
echo "-- WHERE with arrayJoin(): do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs"
query="SELECT DISTINCT *
FROM
(
SELECT DISTINCT ['Istanbul', 'Berlin', 'Bensheim'] AS cities
)
WHERE arrayJoin(cities) IN ['Berlin', 'Bensheim']"
run_query "$query"
echo "-- GROUP BY before DISTINCT with on the same columns => remove DISTINCT"
query="SELECT DISTINCT a
FROM
(
SELECT
a,
sum(b) AS c
FROM
(
SELECT
x.number AS a,
y.number AS b
FROM numbers(3) AS x, numbers(3, 3) AS y
)
GROUP BY a
)"
run_query "$query"
echo "-- GROUP BY before DISTINCT with on different columns => do _not_ remove DISTINCT"
query="SELECT DISTINCT c
FROM
(
SELECT
a,
sum(b) AS c
FROM
(
SELECT
x.number AS a,
y.number AS b
FROM numbers(3) AS x, numbers(3, 3) AS y
)
GROUP BY a
)"
run_query "$query"
2023-02-14 19:45:57 +00:00
echo "-- GROUP BY WITH ROLLUP before DISTINCT with on different columns => do _not_ remove DISTINCT"
query="SELECT DISTINCT c
FROM
(
SELECT
a,
sum(b) AS c
FROM
(
SELECT
x.number AS a,
y.number AS b
FROM numbers(3) AS x, numbers(3, 3) AS y
)
GROUP BY a WITH ROLLUP
)"
run_query "$query"
echo "-- GROUP BY WITH ROLLUP before DISTINCT with on the same columns => remove DISTINCT"
query="SELECT DISTINCT a
FROM
(
SELECT
a,
sum(b) AS c
FROM
(
SELECT
x.number AS a,
y.number AS b
FROM numbers(3) AS x, numbers(3, 3) AS y
)
GROUP BY a WITH ROLLUP
)"
run_query "$query"
echo "-- GROUP BY WITH CUBE before DISTINCT with on different columns => do _not_ remove DISTINCT"
query="SELECT DISTINCT c
FROM
(
SELECT
a,
sum(b) AS c
FROM
(
SELECT
x.number AS a,
y.number AS b
FROM numbers(3) AS x, numbers(3, 3) AS y
)
GROUP BY a WITH CUBE
)"
run_query "$query"
echo "-- GROUP BY WITH CUBE before DISTINCT with on the same columns => remove DISTINCT"
query="SELECT DISTINCT a
FROM
(
SELECT
a,
sum(b) AS c
FROM
(
SELECT
x.number AS a,
y.number AS b
FROM numbers(3) AS x, numbers(3, 3) AS y
)
GROUP BY a WITH CUBE
)"
run_query "$query"
2023-02-14 21:14:41 +00:00
echo "-- GROUP BY WITH TOTALS before DISTINCT with on different columns => do _not_ remove DISTINCT"
query="SELECT DISTINCT c
FROM
(
SELECT
a,
sum(b) AS c
FROM
(
SELECT
x.number AS a,
y.number AS b
FROM numbers(3) AS x, numbers(3, 3) AS y
)
GROUP BY a WITH TOTALS
)"
run_query "$query"
echo "-- GROUP BY WITH TOTALS before DISTINCT with on the same columns => remove DISTINCT"
query="SELECT DISTINCT a
FROM
(
SELECT
a,
sum(b) AS c
FROM
(
SELECT
x.number AS a,
y.number AS b
FROM numbers(3) AS x, numbers(3, 3) AS y
)
GROUP BY a WITH TOTALS
)"
run_query "$query"
2023-03-15 12:44:22 +00:00
echo "-- DISTINCT COUNT() with GROUP BY => do _not_ remove DISTINCT"
query="select distinct count() from numbers(10) group by number"
run_query "$query"
2023-07-19 19:34:49 +00:00
echo "-- UNION ALL with DISTINCT => do _not_ remove DISTINCT"
query="SELECT DISTINCT number
FROM
(
SELECT DISTINCT number
FROM numbers(1)
UNION ALL
SELECT DISTINCT number
FROM numbers(2)
)"
run_query "$query"