ClickHouse/tests/performance/window_functions.xml

<test>
    <preconditions>
        <table_exists>hits_100m_single</table_exists>
    </preconditions>

    <!--
        For some counters, find top 10 users by the numer of records.
        First with LIMIT BY, next with window functions.
    -->
    <query><![CDATA[
        select CounterID, UserID, count(*) user_hits
        from hits_100m_single
        where CounterID < 10000
        group by CounterID, UserID
        order by user_hits desc
        limit 10 by CounterID
        format Null
    ]]></query>

    <query><![CDATA[
        select *
        from (
            select CounterID, UserID, count(*) user_hits,
                count()
                    over (partition by CounterID order by user_hits desc
                        rows unbounded preceding)
                    user_rank
            from hits_100m_single
            where CounterID < 10000
            group by CounterID, UserID
        )
        where user_rank <= 10
        format Null
    ]]></query>

    <!--
        The RANGE version should give (almost) the same result, because counts
        for the top ranking users are probably different, so the ranks won't be
        influenced by grouping. But it is going to be slower than ROWS because
        of the additional work of finding the group boundaries.
    -->
    <query><![CDATA[
        select *
        from (
            select CounterID, UserID, count(*) user_hits,
                count()
                    over (partition by CounterID order by user_hits desc
                        range unbounded preceding)
                    user_rank
            from hits_100m_single
            where CounterID < 10000
            group by CounterID, UserID
        )
        where user_rank <= 10
        format Null
    ]]></query>

    <!--
        Rows from the hottest 21-second intervals, to test the RANGE OFFSET frame.
    -->
    <query>
        SELECT * FROM
            (SELECT EventTime,
                count(*) OVER (ORDER BY EventTime ASC
                    RANGE BETWEEN 10 PRECEDING AND 10 FOLLOWING) AS c
            FROM hits_10m_single)
        FORMAT Null
    </query>

    <!--
        This is kind of the same, except the following frame boundary is not
        inclusive. Should be much faster, because we don't have to reset the
        aggregation state. After we support subtraction of aggregate state, the
        above query should become closer in performance to this one.
    -->
    <query>
        select * from
            (select EventTime,
                count(*) over (partition by
                    floor((toUInt32(EventTime) + 10 + 1) / 20)) as c
            from hits_10m_single)
        format Null
    </query>

    <!-- Some synthetic tests.-->
    <query>
        select
            min(number) over w,
            count(*) over w,
            max(number) over w
        from
            (select number, intDiv(number, 1111) p, mod(number, 111) o
                from numbers(10000000)) t
        window w as (partition by p order by o)
        format Null
    </query>

    <query>
        select
            first_value(number) over w,
            dense_rank() over w
        from
            (select number, intDiv(number, 1111) p, mod(number, 111) o
                from numbers(10000000)) t
        window w as (partition by p order by o)
        format Null
    </query>

    <!-- Our variant of lead. -->
    <query>
        select leadInFrame(number) over w
        from
            (select number, intDiv(number, 1111) p, mod(number, 111) o
                from numbers(10000000)) t
        window w as (partition by p order by o
            rows between unbounded preceding and unbounded following)
        format Null
    </query>

    <!-- A faster replacement for lead with constant offset. -->
    <query>
        select any(number) over w
        from
            (select number, intDiv(number, 1111) p, mod(number, 111) o
                from numbers(10000000)) t
        window w as (partition by p order by o
            rows between 1 following and 1 following)
        format Null
    </query>

    <query>
        select leadInFrame(number, number) over w
        from
            (select number, intDiv(number, 1111) p, mod(number, 111) o
                from numbers(10000000)) t
        window w as (partition by p order by o
            rows between unbounded preceding and unbounded following)
        format Null
    </query>

    <query>
        select leadInFrame(number, number, number) over w
        from
            (select number, intDiv(number, 1111) p, mod(number, 111) o
                from numbers(10000000)) t
        window w as (partition by p order by o
            rows between unbounded preceding and unbounded following)
        format Null
    </query>

</test>