ClickHouse/tests/performance/window_functions.xml

148 lines
4.6 KiB
XML
Raw Permalink Normal View History

<test>
<!--
For some counters, find top 10 users by the numer of records.
First with LIMIT BY, next with window functions.
-->
<query><![CDATA[
2020-12-28 10:08:38 +00:00
select CounterID, UserID, count(*) user_hits
from hits_100m_single
where CounterID < 10000
group by CounterID, UserID
2020-12-28 10:08:38 +00:00
order by user_hits desc
limit 10 by CounterID
format Null
]]></query>
<query><![CDATA[
select *
from (
2020-12-28 10:08:38 +00:00
select CounterID, UserID, count(*) user_hits,
count()
over (partition by CounterID order by user_hits desc
rows unbounded preceding)
user_rank
from hits_100m_single
where CounterID < 10000
group by CounterID, UserID
)
where user_rank <= 10
format Null
]]></query>
<!--
The RANGE version should give (almost) the same result, because counts
for the top ranking users are probably different, so the ranks won't be
influenced by grouping. But it is going to be slower than ROWS because
of the additional work of finding the group boundaries.
-->
<query><![CDATA[
select *
from (
select CounterID, UserID, count(*) user_hits,
count()
over (partition by CounterID order by user_hits desc
range unbounded preceding)
2020-12-28 10:08:38 +00:00
user_rank
from hits_100m_single
where CounterID < 10000
group by CounterID, UserID
)
2020-12-28 10:08:38 +00:00
where user_rank <= 10
format Null
]]></query>
2021-02-09 14:44:04 +00:00
<!--
Rows from the hottest 21-second intervals, to test the RANGE OFFSET frame.
-->
<query>
SELECT * FROM
(SELECT EventTime,
count(*) OVER (ORDER BY EventTime ASC
RANGE BETWEEN 10 PRECEDING AND 10 FOLLOWING) AS c
FROM hits_10m_single)
FORMAT Null
</query>
<!--
This is kind of the same, except the following frame boundary is not
inclusive. Should be much faster, because we don't have to reset the
aggregation state. After we support subtraction of aggregate state, the
above query should become closer in performance to this one.
-->
<query>
select * from
(select EventTime,
count(*) over (partition by
floor((toUInt32(EventTime) + 10 + 1) / 20)) as c
from hits_10m_single)
format Null
</query>
2021-02-11 16:20:57 +00:00
<!-- Some synthetic tests.-->
<query>
select
min(number) over w,
count(*) over w,
max(number) over w
from
(select number, intDiv(number, 1111) p, mod(number, 111) o
from numbers(10000000)) t
window w as (partition by p order by o)
format Null
</query>
<query>
select
first_value(number) over w,
dense_rank() over w
from
(select number, intDiv(number, 1111) p, mod(number, 111) o
from numbers(10000000)) t
window w as (partition by p order by o)
format Null
</query>
2021-03-18 23:05:43 +00:00
<!-- Our variant of lead. -->
<query>
2021-03-25 15:49:01 +00:00
select leadInFrame(number) over w
2021-03-18 23:05:43 +00:00
from
(select number, intDiv(number, 1111) p, mod(number, 111) o
from numbers(10000000)) t
window w as (partition by p order by o
rows between unbounded preceding and unbounded following)
format Null
</query>
<!-- A faster replacement for lead with constant offset. -->
<query>
select any(number) over w
from
(select number, intDiv(number, 1111) p, mod(number, 111) o
from numbers(10000000)) t
window w as (partition by p order by o
rows between 1 following and 1 following)
format Null
</query>
<query>
2021-03-25 15:49:01 +00:00
select leadInFrame(number, number) over w
2021-03-18 23:05:43 +00:00
from
(select number, intDiv(number, 1111) p, mod(number, 111) o
from numbers(10000000)) t
window w as (partition by p order by o
rows between unbounded preceding and unbounded following)
format Null
</query>
<query>
2021-03-25 15:49:01 +00:00
select leadInFrame(number, number, number) over w
2021-03-18 23:05:43 +00:00
from
(select number, intDiv(number, 1111) p, mod(number, 111) o
from numbers(10000000)) t
window w as (partition by p order by o
rows between unbounded preceding and unbounded following)
format Null
</query>
</test>