ClickHouse/tests/performance/window_functions.xml

<test>
    <!--
        For some counters, find top 10 users by the numer of records.
        First with LIMIT BY, next with window functions.
    -->
    <query><![CDATA[
        select CounterID, UserID, count(*) user_hits
        from hits_100m_single
        where CounterID < 10000
        group by CounterID, UserID
        order by user_hits desc
        limit 10 by CounterID
        format Null
    ]]></query>

    <query><![CDATA[
        select *
        from (
            select CounterID, UserID, count(*) user_hits,
                count()
                    over (partition by CounterID order by user_hits desc
                        rows unbounded preceding)
                    user_rank
            from hits_100m_single
            where CounterID < 10000
            group by CounterID, UserID
        )
        where user_rank <= 10
        format Null
    ]]></query>

    <!--
        The RANGE version should give (almost) the same result, because counts
        for the top ranking users are probably different, so the ranks won't be
        influenced by grouping. But it is going to be slower than ROWS because
        of the additional work of finding the group boundaries.
    -->
    <query><![CDATA[
        select *
        from (
            select CounterID, UserID, count(*) user_hits,
                count()
                    over (partition by CounterID order by user_hits desc
                        range unbounded preceding)
                    user_rank
            from hits_100m_single
            where CounterID < 10000
            group by CounterID, UserID
        )
        where user_rank <= 10
        format Null
    ]]></query>

    <!--
        Rows from the hottest 21-second intervals, to test the RANGE OFFSET frame.
    -->
    <query>
        SELECT * FROM
            (SELECT EventTime,
                count(*) OVER (ORDER BY EventTime ASC
                    RANGE BETWEEN 10 PRECEDING AND 10 FOLLOWING) AS c
            FROM hits_10m_single)
        FORMAT Null
    </query>

    <!--
        This is kind of the same, except the following frame boundary is not
        inclusive. Should be much faster, because we don't have to reset the
        aggregation state. After we support subtraction of aggregate state, the
        above query should become closer in performance to this one.
    -->
    <query>
        select * from
            (select EventTime,
                count(*) over (partition by
                    floor((toUInt32(EventTime) + 10 + 1) / 20)) as c
            from hits_10m_single)
        format Null
    </query>

    <!-- Some synthetic tests.-->
    <query>
        select
            min(number) over w,
            count(*) over w,
            max(number) over w
        from
            (select number, intDiv(number, 1111) p, mod(number, 111) o
                from numbers(10000000)) t
        window w as (partition by p order by o)
        format Null
    </query>

    <query>
        select
            first_value(number) over w,
            dense_rank() over w
        from
            (select number, intDiv(number, 1111) p, mod(number, 111) o
                from numbers(10000000)) t
        window w as (partition by p order by o)
        format Null
    </query>

    <!-- Our variant of lead. -->
    <query>
        select leadInFrame(number) over w
        from
            (select number, intDiv(number, 1111) p, mod(number, 111) o
                from numbers(10000000)) t
        window w as (partition by p order by o
            rows between unbounded preceding and unbounded following)
        format Null
    </query>

    <!-- A faster replacement for lead with constant offset. -->
    <query>
        select any(number) over w
        from
            (select number, intDiv(number, 1111) p, mod(number, 111) o
                from numbers(10000000)) t
        window w as (partition by p order by o
            rows between 1 following and 1 following)
        format Null
    </query>

    <query>
        select leadInFrame(number, number) over w
        from
            (select number, intDiv(number, 1111) p, mod(number, 111) o
                from numbers(10000000)) t
        window w as (partition by p order by o
            rows between unbounded preceding and unbounded following)
        format Null
    </query>

    <query>
        select leadInFrame(number, number, number) over w
        from
            (select number, intDiv(number, 1111) p, mod(number, 111) o
                from numbers(10000000)) t
        window w as (partition by p order by o
            rows between unbounded preceding and unbounded following)
        format Null
    </query>

</test>
some provision for aggregate fns as window fn args (doesn't work yet) also a perf test w/LIMIT BY 2020-12-24 08:49:55 +00:00			`<test>`
			`<!--`
			`For some counters, find top 10 users by the numer of records.`
			`First with LIMIT BY, next with window functions.`
			`-->`
			`<query><![CDATA[`
cleanup 2020-12-28 10:08:38 +00:00			`select CounterID, UserID, count(*) user_hits`
some provision for aggregate fns as window fn args (doesn't work yet) also a perf test w/LIMIT BY 2020-12-24 08:49:55 +00:00			`from hits_100m_single`
			`where CounterID < 10000`
			`group by CounterID, UserID`
cleanup 2020-12-28 10:08:38 +00:00			`order by user_hits desc`
some provision for aggregate fns as window fn args (doesn't work yet) also a perf test w/LIMIT BY 2020-12-24 08:49:55 +00:00			`limit 10 by CounterID`
			`format Null`
			`]]></query>`

			`<query><![CDATA[`
			`select *`
			`from (`
cleanup 2020-12-28 10:08:38 +00:00			`select CounterID, UserID, count(*) user_hits,`
fix the calculation for moving frame start 2021-02-03 05:53:21 +00:00			`count()`
			`over (partition by CounterID order by user_hits desc`
			`rows unbounded preceding)`
			`user_rank`
			`from hits_100m_single`
			`where CounterID < 10000`
			`group by CounterID, UserID`
			`)`
			`where user_rank <= 10`
			`format Null`
			`]]></query>`

			`<!--`
			`The RANGE version should give (almost) the same result, because counts`
			`for the top ranking users are probably different, so the ranks won't be`
			`influenced by grouping. But it is going to be slower than ROWS because`
			`of the additional work of finding the group boundaries.`
			`-->`
			`<query><![CDATA[`
			`select *`
			`from (`
			`select CounterID, UserID, count(*) user_hits,`
			`count()`
			`over (partition by CounterID order by user_hits desc`
			`range unbounded preceding)`
cleanup 2020-12-28 10:08:38 +00:00			`user_rank`
			`from hits_100m_single`
			`where CounterID < 10000`
			`group by CounterID, UserID`
some provision for aggregate fns as window fn args (doesn't work yet) also a perf test w/LIMIT BY 2020-12-24 08:49:55 +00:00			`)`
cleanup 2020-12-28 10:08:38 +00:00			`where user_rank <= 10`
some provision for aggregate fns as window fn args (doesn't work yet) also a perf test w/LIMIT BY 2020-12-24 08:49:55 +00:00			`format Null`
			`]]></query>`

fix a bug and add some tests 2021-02-09 14:44:04 +00:00			`<!--`
			`Rows from the hottest 21-second intervals, to test the RANGE OFFSET frame.`
			`-->`
			`<query>`
			`SELECT * FROM`
			`(SELECT EventTime,`
			`count(*) OVER (ORDER BY EventTime ASC`
			`RANGE BETWEEN 10 PRECEDING AND 10 FOLLOWING) AS c`
			`FROM hits_10m_single)`
			`FORMAT Null`
			`</query>`

			`<!--`
			`This is kind of the same, except the following frame boundary is not`
			`inclusive. Should be much faster, because we don't have to reset the`
			`aggregation state. After we support subtraction of aggregate state, the`
			`above query should become closer in performance to this one.`
			`-->`
			`<query>`
			`select * from`
			`(select EventTime,`
			`count(*) over (partition by`
			`floor((toUInt32(EventTime) + 10 + 1) / 20)) as c`
			`from hits_10m_single)`
			`format Null`
			`</query>`

some tests and speedup 2021-02-11 16:20:57 +00:00			`<!-- Some synthetic tests.-->`
			`<query>`
			`select`
			`min(number) over w,`
			`count(*) over w,`
			`max(number) over w`
			`from`
			`(select number, intDiv(number, 1111) p, mod(number, 111) o`
			`from numbers(10000000)) t`
			`window w as (partition by p order by o)`
			`format Null`
			`</query>`

			`<query>`
			`select`
			`first_value(number) over w,`
			`dense_rank() over w`
			`from`
			`(select number, intDiv(number, 1111) p, mod(number, 111) o`
			`from numbers(10000000)) t`
			`window w as (partition by p order by o)`
			`format Null`
			`</query>`

float frames and lag/lead_in_frame 2021-03-18 23:05:43 +00:00			`<!-- Our variant of lead. -->`
			`<query>`
use camelCase 2021-03-25 15:49:01 +00:00			`select leadInFrame(number) over w`
float frames and lag/lead_in_frame 2021-03-18 23:05:43 +00:00			`from`
			`(select number, intDiv(number, 1111) p, mod(number, 111) o`
			`from numbers(10000000)) t`
			`window w as (partition by p order by o`
			`rows between unbounded preceding and unbounded following)`
			`format Null`
			`</query>`

			`<!-- A faster replacement for lead with constant offset. -->`
			`<query>`
			`select any(number) over w`
			`from`
			`(select number, intDiv(number, 1111) p, mod(number, 111) o`
			`from numbers(10000000)) t`
			`window w as (partition by p order by o`
			`rows between 1 following and 1 following)`
			`format Null`
			`</query>`

			`<query>`
use camelCase 2021-03-25 15:49:01 +00:00			`select leadInFrame(number, number) over w`
float frames and lag/lead_in_frame 2021-03-18 23:05:43 +00:00			`from`
			`(select number, intDiv(number, 1111) p, mod(number, 111) o`
			`from numbers(10000000)) t`
			`window w as (partition by p order by o`
			`rows between unbounded preceding and unbounded following)`
			`format Null`
			`</query>`

			`<query>`
use camelCase 2021-03-25 15:49:01 +00:00			`select leadInFrame(number, number, number) over w`
float frames and lag/lead_in_frame 2021-03-18 23:05:43 +00:00			`from`
			`(select number, intDiv(number, 1111) p, mod(number, 111) o`
			`from numbers(10000000)) t`
			`window w as (partition by p order by o`
			`rows between unbounded preceding and unbounded following)`
			`format Null`
			`</query>`

some provision for aggregate fns as window fn args (doesn't work yet) also a perf test w/LIMIT BY 2020-12-24 08:49:55 +00:00			`</test>`