mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-02 04:22:03 +00:00
63 lines
1.9 KiB
XML
63 lines
1.9 KiB
XML
<test>
|
|
<preconditions>
|
|
<table_exists>hits_100m_single</table_exists>
|
|
</preconditions>
|
|
|
|
<settings>
|
|
<allow_experimental_window_functions>1</allow_experimental_window_functions>
|
|
</settings>
|
|
|
|
<!--
|
|
For some counters, find top 10 users by the numer of records.
|
|
First with LIMIT BY, next with window functions.
|
|
-->
|
|
<query><![CDATA[
|
|
select CounterID, UserID, count(*) user_hits
|
|
from hits_100m_single
|
|
where CounterID < 10000
|
|
group by CounterID, UserID
|
|
order by user_hits desc
|
|
limit 10 by CounterID
|
|
format Null
|
|
]]></query>
|
|
|
|
<query><![CDATA[
|
|
select *
|
|
from (
|
|
select CounterID, UserID, count(*) user_hits,
|
|
count()
|
|
over (partition by CounterID order by user_hits desc
|
|
rows unbounded preceding)
|
|
user_rank
|
|
from hits_100m_single
|
|
where CounterID < 10000
|
|
group by CounterID, UserID
|
|
)
|
|
where user_rank <= 10
|
|
format Null
|
|
]]></query>
|
|
|
|
<!--
|
|
The RANGE version should give (almost) the same result, because counts
|
|
for the top ranking users are probably different, so the ranks won't be
|
|
influenced by grouping. But it is going to be slower than ROWS because
|
|
of the additional work of finding the group boundaries.
|
|
-->
|
|
<query><![CDATA[
|
|
select *
|
|
from (
|
|
select CounterID, UserID, count(*) user_hits,
|
|
count()
|
|
over (partition by CounterID order by user_hits desc
|
|
range unbounded preceding)
|
|
user_rank
|
|
from hits_100m_single
|
|
where CounterID < 10000
|
|
group by CounterID, UserID
|
|
)
|
|
where user_rank <= 10
|
|
format Null
|
|
]]></query>
|
|
|
|
</test>
|