2023-01-26 22:15:02 +00:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
|
|
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
|
|
|
# shellcheck source=../shell_config.sh
|
|
|
|
. "$CURDIR"/../shell_config.sh
|
|
|
|
|
|
|
|
OPTIMIZATION_SETTING="query_plan_remove_redundant_distinct"
|
2023-02-14 20:36:12 +00:00
|
|
|
DISABLE_OPTIMIZATION="SET $OPTIMIZATION_SETTING=0;SET optimize_duplicate_order_by_and_distinct=0"
|
|
|
|
ENABLE_OPTIMIZATION="SET $OPTIMIZATION_SETTING=1;SET optimize_duplicate_order_by_and_distinct=0"
|
2023-01-26 22:15:02 +00:00
|
|
|
|
|
|
|
echo "-- Disabled $OPTIMIZATION_SETTING"
|
|
|
|
query="SELECT DISTINCT *
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT DISTINCT *
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT DISTINCT *
|
|
|
|
FROM numbers(3)
|
|
|
|
)
|
|
|
|
)"
|
|
|
|
|
|
|
|
$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;EXPLAIN $query"
|
|
|
|
|
|
|
|
function run_query {
|
|
|
|
echo "-- query"
|
|
|
|
echo "$1"
|
|
|
|
echo "-- explain"
|
|
|
|
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;EXPLAIN $1"
|
|
|
|
echo "-- execute"
|
|
|
|
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$1"
|
|
|
|
}
|
|
|
|
|
|
|
|
echo "-- Enabled $OPTIMIZATION_SETTING"
|
|
|
|
echo "-- DISTINCT is only in most inner subquery"
|
|
|
|
run_query "$query"
|
|
|
|
|
|
|
|
echo "-- do _not_ remove DISTINCT after UNION"
|
|
|
|
query="SELECT DISTINCT number FROM
|
|
|
|
(
|
|
|
|
(SELECT DISTINCT number FROM numbers(1))
|
|
|
|
UNION ALL
|
|
|
|
(SELECT DISTINCT number FROM numbers(2))
|
|
|
|
)
|
|
|
|
ORDER BY number"
|
|
|
|
run_query "$query"
|
|
|
|
|
|
|
|
echo "-- do _not_ remove DISTINCT after JOIN"
|
|
|
|
query="SELECT DISTINCT *
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT DISTINCT number AS n
|
|
|
|
FROM numbers(2)
|
2023-02-14 20:36:12 +00:00
|
|
|
) as x,
|
2023-01-26 22:15:02 +00:00
|
|
|
(
|
|
|
|
SELECT DISTINCT number AS n
|
|
|
|
FROM numbers(2)
|
2023-02-14 20:36:12 +00:00
|
|
|
) as y"
|
2023-01-26 22:15:02 +00:00
|
|
|
run_query "$query"
|
2023-01-30 23:08:26 +00:00
|
|
|
|
|
|
|
echo "-- DISTINCT duplicates with several columns"
|
|
|
|
query="SELECT DISTINCT *
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT DISTINCT *
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT DISTINCT number as a, 2*number as b
|
|
|
|
FROM numbers(3)
|
|
|
|
)
|
|
|
|
)"
|
|
|
|
run_query "$query"
|
2023-02-01 21:13:44 +00:00
|
|
|
|
|
|
|
echo "-- DISTINCT duplicates with constant columns"
|
|
|
|
query="SELECT DISTINCT 2, a, b
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT DISTINCT a, b
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT DISTINCT 1, number as a, 2*number as b
|
|
|
|
FROM numbers(3)
|
|
|
|
)
|
|
|
|
)"
|
|
|
|
run_query "$query"
|
2023-02-07 11:48:23 +00:00
|
|
|
|
|
|
|
echo "-- ARRAY JOIN: do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs"
|
|
|
|
query="SELECT DISTINCT *
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT DISTINCT *
|
|
|
|
FROM VALUES('Hello', 'World', 'Goodbye')
|
|
|
|
) AS words
|
|
|
|
ARRAY JOIN [0, 1] AS arr"
|
|
|
|
run_query "$query"
|
|
|
|
|
|
|
|
echo "-- WITH FILL: do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs"
|
|
|
|
query="SELECT DISTINCT *
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT DISTINCT *
|
|
|
|
FROM values('id UInt8', 0, 2)
|
|
|
|
ORDER BY id ASC WITH FILL
|
|
|
|
)"
|
|
|
|
run_query "$query"
|
|
|
|
|
|
|
|
echo "-- WHERE with arrayJoin(): do _not_ remove outer DISTINCT because new rows are generated between inner and outer DISTINCTs"
|
|
|
|
query="SELECT DISTINCT *
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT DISTINCT ['Istanbul', 'Berlin', 'Bensheim'] AS cities
|
|
|
|
)
|
|
|
|
WHERE arrayJoin(cities) IN ['Berlin', 'Bensheim']"
|
|
|
|
run_query "$query"
|
2023-02-09 11:37:15 +00:00
|
|
|
|
|
|
|
echo "-- GROUP BY before DISTINCT with on the same columns => remove DISTINCT"
|
|
|
|
query="SELECT DISTINCT a
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT
|
|
|
|
a,
|
|
|
|
sum(b) AS c
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT
|
|
|
|
x.number AS a,
|
|
|
|
y.number AS b
|
|
|
|
FROM numbers(3) AS x, numbers(3, 3) AS y
|
|
|
|
)
|
|
|
|
GROUP BY a
|
|
|
|
)"
|
|
|
|
run_query "$query"
|
|
|
|
|
|
|
|
echo "-- GROUP BY before DISTINCT with on different columns => do _not_ remove DISTINCT"
|
|
|
|
query="SELECT DISTINCT c
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT
|
|
|
|
a,
|
|
|
|
sum(b) AS c
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT
|
|
|
|
x.number AS a,
|
|
|
|
y.number AS b
|
|
|
|
FROM numbers(3) AS x, numbers(3, 3) AS y
|
|
|
|
)
|
|
|
|
GROUP BY a
|
|
|
|
)"
|
|
|
|
run_query "$query"
|
2023-02-14 19:45:57 +00:00
|
|
|
|
|
|
|
echo "-- GROUP BY WITH ROLLUP before DISTINCT with on different columns => do _not_ remove DISTINCT"
|
|
|
|
query="SELECT DISTINCT c
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT
|
|
|
|
a,
|
|
|
|
sum(b) AS c
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT
|
|
|
|
x.number AS a,
|
|
|
|
y.number AS b
|
|
|
|
FROM numbers(3) AS x, numbers(3, 3) AS y
|
|
|
|
)
|
|
|
|
GROUP BY a WITH ROLLUP
|
|
|
|
)"
|
|
|
|
run_query "$query"
|
|
|
|
|
|
|
|
echo "-- GROUP BY WITH ROLLUP before DISTINCT with on the same columns => remove DISTINCT"
|
|
|
|
query="SELECT DISTINCT a
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT
|
|
|
|
a,
|
|
|
|
sum(b) AS c
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT
|
|
|
|
x.number AS a,
|
|
|
|
y.number AS b
|
|
|
|
FROM numbers(3) AS x, numbers(3, 3) AS y
|
|
|
|
)
|
|
|
|
GROUP BY a WITH ROLLUP
|
|
|
|
)"
|
|
|
|
run_query "$query"
|
|
|
|
|
|
|
|
echo "-- GROUP BY WITH CUBE before DISTINCT with on different columns => do _not_ remove DISTINCT"
|
|
|
|
query="SELECT DISTINCT c
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT
|
|
|
|
a,
|
|
|
|
sum(b) AS c
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT
|
|
|
|
x.number AS a,
|
|
|
|
y.number AS b
|
|
|
|
FROM numbers(3) AS x, numbers(3, 3) AS y
|
|
|
|
)
|
|
|
|
GROUP BY a WITH CUBE
|
|
|
|
)"
|
|
|
|
run_query "$query"
|
|
|
|
|
|
|
|
echo "-- GROUP BY WITH CUBE before DISTINCT with on the same columns => remove DISTINCT"
|
|
|
|
query="SELECT DISTINCT a
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT
|
|
|
|
a,
|
|
|
|
sum(b) AS c
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT
|
|
|
|
x.number AS a,
|
|
|
|
y.number AS b
|
|
|
|
FROM numbers(3) AS x, numbers(3, 3) AS y
|
|
|
|
)
|
|
|
|
GROUP BY a WITH CUBE
|
|
|
|
)"
|
|
|
|
run_query "$query"
|
2023-02-14 21:14:41 +00:00
|
|
|
|
|
|
|
echo "-- GROUP BY WITH TOTALS before DISTINCT with on different columns => do _not_ remove DISTINCT"
|
|
|
|
query="SELECT DISTINCT c
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT
|
|
|
|
a,
|
|
|
|
sum(b) AS c
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT
|
|
|
|
x.number AS a,
|
|
|
|
y.number AS b
|
|
|
|
FROM numbers(3) AS x, numbers(3, 3) AS y
|
|
|
|
)
|
|
|
|
GROUP BY a WITH TOTALS
|
|
|
|
)"
|
|
|
|
run_query "$query"
|
|
|
|
|
|
|
|
echo "-- GROUP BY WITH TOTALS before DISTINCT with on the same columns => remove DISTINCT"
|
|
|
|
query="SELECT DISTINCT a
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT
|
|
|
|
a,
|
|
|
|
sum(b) AS c
|
|
|
|
FROM
|
|
|
|
(
|
|
|
|
SELECT
|
|
|
|
x.number AS a,
|
|
|
|
y.number AS b
|
|
|
|
FROM numbers(3) AS x, numbers(3, 3) AS y
|
|
|
|
)
|
|
|
|
GROUP BY a WITH TOTALS
|
|
|
|
)"
|
|
|
|
run_query "$query"
|
2023-03-15 12:44:22 +00:00
|
|
|
|
|
|
|
echo "-- DISTINCT COUNT() with GROUP BY => do _not_ remove DISTINCT"
|
|
|
|
query="select distinct count() from numbers(10) group by number"
|
|
|
|
run_query "$query"
|