2020-12-24 08:49:55 +00:00
|
|
|
<test>
|
|
|
|
<preconditions>
|
|
|
|
<table_exists>hits_100m_single</table_exists>
|
|
|
|
</preconditions>
|
|
|
|
|
2020-12-25 03:15:36 +00:00
|
|
|
<settings>
|
|
|
|
<allow_experimental_window_functions>1</allow_experimental_window_functions>
|
|
|
|
</settings>
|
|
|
|
|
2020-12-24 08:49:55 +00:00
|
|
|
<!--
|
|
|
|
For some counters, find top 10 users by the numer of records.
|
|
|
|
First with LIMIT BY, next with window functions.
|
|
|
|
-->
|
|
|
|
<query><![CDATA[
|
2020-12-28 10:08:38 +00:00
|
|
|
select CounterID, UserID, count(*) user_hits
|
2020-12-24 08:49:55 +00:00
|
|
|
from hits_100m_single
|
|
|
|
where CounterID < 10000
|
|
|
|
group by CounterID, UserID
|
2020-12-28 10:08:38 +00:00
|
|
|
order by user_hits desc
|
2020-12-24 08:49:55 +00:00
|
|
|
limit 10 by CounterID
|
|
|
|
format Null
|
|
|
|
]]></query>
|
|
|
|
|
|
|
|
<query><![CDATA[
|
|
|
|
select *
|
|
|
|
from (
|
2020-12-28 10:08:38 +00:00
|
|
|
select CounterID, UserID, count(*) user_hits,
|
2021-02-03 05:53:21 +00:00
|
|
|
count()
|
|
|
|
over (partition by CounterID order by user_hits desc
|
|
|
|
rows unbounded preceding)
|
|
|
|
user_rank
|
|
|
|
from hits_100m_single
|
|
|
|
where CounterID < 10000
|
|
|
|
group by CounterID, UserID
|
|
|
|
)
|
|
|
|
where user_rank <= 10
|
|
|
|
format Null
|
|
|
|
]]></query>
|
|
|
|
|
|
|
|
<!--
|
|
|
|
The RANGE version should give (almost) the same result, because counts
|
|
|
|
for the top ranking users are probably different, so the ranks won't be
|
|
|
|
influenced by grouping. But it is going to be slower than ROWS because
|
|
|
|
of the additional work of finding the group boundaries.
|
|
|
|
-->
|
|
|
|
<query><![CDATA[
|
|
|
|
select *
|
|
|
|
from (
|
|
|
|
select CounterID, UserID, count(*) user_hits,
|
|
|
|
count()
|
|
|
|
over (partition by CounterID order by user_hits desc
|
|
|
|
range unbounded preceding)
|
2020-12-28 10:08:38 +00:00
|
|
|
user_rank
|
|
|
|
from hits_100m_single
|
|
|
|
where CounterID < 10000
|
|
|
|
group by CounterID, UserID
|
2020-12-24 08:49:55 +00:00
|
|
|
)
|
2020-12-28 10:08:38 +00:00
|
|
|
where user_rank <= 10
|
2020-12-24 08:49:55 +00:00
|
|
|
format Null
|
|
|
|
]]></query>
|
|
|
|
|
2021-02-09 14:44:04 +00:00
|
|
|
<!--
|
|
|
|
Rows from the hottest 21-second intervals, to test the RANGE OFFSET frame.
|
|
|
|
-->
|
|
|
|
<query>
|
|
|
|
SELECT * FROM
|
|
|
|
(SELECT EventTime,
|
|
|
|
count(*) OVER (ORDER BY EventTime ASC
|
|
|
|
RANGE BETWEEN 10 PRECEDING AND 10 FOLLOWING) AS c
|
|
|
|
FROM hits_10m_single)
|
|
|
|
FORMAT Null
|
|
|
|
</query>
|
|
|
|
|
|
|
|
<!--
|
|
|
|
This is kind of the same, except the following frame boundary is not
|
|
|
|
inclusive. Should be much faster, because we don't have to reset the
|
|
|
|
aggregation state. After we support subtraction of aggregate state, the
|
|
|
|
above query should become closer in performance to this one.
|
|
|
|
-->
|
|
|
|
<query>
|
|
|
|
select * from
|
|
|
|
(select EventTime,
|
|
|
|
count(*) over (partition by
|
|
|
|
floor((toUInt32(EventTime) + 10 + 1) / 20)) as c
|
|
|
|
from hits_10m_single)
|
|
|
|
format Null
|
|
|
|
</query>
|
|
|
|
|
2021-02-11 16:20:57 +00:00
|
|
|
<!-- Some synthetic tests.-->
|
|
|
|
<query>
|
|
|
|
select
|
|
|
|
min(number) over w,
|
|
|
|
count(*) over w,
|
|
|
|
max(number) over w
|
|
|
|
from
|
|
|
|
(select number, intDiv(number, 1111) p, mod(number, 111) o
|
|
|
|
from numbers(10000000)) t
|
|
|
|
window w as (partition by p order by o)
|
|
|
|
format Null
|
|
|
|
</query>
|
|
|
|
|
|
|
|
<query>
|
|
|
|
select
|
|
|
|
first_value(number) over w,
|
|
|
|
dense_rank() over w
|
|
|
|
from
|
|
|
|
(select number, intDiv(number, 1111) p, mod(number, 111) o
|
|
|
|
from numbers(10000000)) t
|
|
|
|
window w as (partition by p order by o)
|
|
|
|
format Null
|
|
|
|
</query>
|
|
|
|
|
2021-03-18 23:05:43 +00:00
|
|
|
<!-- Our variant of lead. -->
|
|
|
|
<query>
|
2021-03-25 15:49:01 +00:00
|
|
|
select leadInFrame(number) over w
|
2021-03-18 23:05:43 +00:00
|
|
|
from
|
|
|
|
(select number, intDiv(number, 1111) p, mod(number, 111) o
|
|
|
|
from numbers(10000000)) t
|
|
|
|
window w as (partition by p order by o
|
|
|
|
rows between unbounded preceding and unbounded following)
|
|
|
|
format Null
|
|
|
|
</query>
|
|
|
|
|
|
|
|
<!-- A faster replacement for lead with constant offset. -->
|
|
|
|
<query>
|
|
|
|
select any(number) over w
|
|
|
|
from
|
|
|
|
(select number, intDiv(number, 1111) p, mod(number, 111) o
|
|
|
|
from numbers(10000000)) t
|
|
|
|
window w as (partition by p order by o
|
|
|
|
rows between 1 following and 1 following)
|
|
|
|
format Null
|
|
|
|
</query>
|
|
|
|
|
|
|
|
<query>
|
2021-03-25 15:49:01 +00:00
|
|
|
select leadInFrame(number, number) over w
|
2021-03-18 23:05:43 +00:00
|
|
|
from
|
|
|
|
(select number, intDiv(number, 1111) p, mod(number, 111) o
|
|
|
|
from numbers(10000000)) t
|
|
|
|
window w as (partition by p order by o
|
|
|
|
rows between unbounded preceding and unbounded following)
|
|
|
|
format Null
|
|
|
|
</query>
|
|
|
|
|
|
|
|
<query>
|
2021-03-25 15:49:01 +00:00
|
|
|
select leadInFrame(number, number, number) over w
|
2021-03-18 23:05:43 +00:00
|
|
|
from
|
|
|
|
(select number, intDiv(number, 1111) p, mod(number, 111) o
|
|
|
|
from numbers(10000000)) t
|
|
|
|
window w as (partition by p order by o
|
|
|
|
rows between unbounded preceding and unbounded following)
|
|
|
|
format Null
|
|
|
|
</query>
|
|
|
|
|
2020-12-24 08:49:55 +00:00
|
|
|
</test>
|