ClickHouse/tests/performance/uniq.xml

<test>
    <settings>
        <max_memory_usage>30000000000</max_memory_usage>
        <!--
            Because of random distribution of data between threads, the number
            of unique keys per thread might differ. This means that sometimes
            we switch to two-level aggregation, and sometimes we don't, based on
            the "bytes" threshold. Two-level aggregation turns out to be twice
            as fast, because it merges aggregation states in multiple threads.
            Lower the threshold here, to avoid jitter. It is unclear whether it
            would be beneficial to lower the default as well.
        -->
        <group_by_two_level_threshold_bytes>10000000</group_by_two_level_threshold_bytes>
    </settings>

    <substitutions>
        <substitution>
           <name>key</name>
           <values>
               <!--
                    1 as a key doesn't make much sense - the queries are either
                    too long in case of uniqExact or too short in case of limited
                    uniq, so the result is drowned by the noise.
                    <value>1</value>
                -->
               <value>SearchEngineID</value>
               <value>RegionID</value>
               <value>SearchPhrase</value>
               <value>ClientIP</value>
           </values>
        </substitution>
        <substitution>
           <name>func</name>
           <values>
               <value>sum</value>
               <value>uniq</value>
               <value>uniqExact</value>
               <value>uniqHLL12</value>
               <value>uniqCombined(12)</value>
               <value>uniqCombined(13)</value>
               <value>uniqCombined(14)</value>
               <value>uniqCombined(15)</value>
               <value>uniqCombined(16)</value>
               <value>uniqCombined(17)</value>
               <value>uniqCombined(18)</value>
               <value>uniqUpTo(3)</value>
               <value>uniqUpTo(5)</value>
               <value>uniqUpTo(10)</value>
               <value>uniqUpTo(25)</value>
               <value>uniqUpTo(100)</value>
           </values>
       </substitution>
    </substitutions>

    <query>SELECT {key} AS k, {func}(UserID) FROM hits_100m_single GROUP BY k FORMAT Null</query>
    <query>SELECT {key} AS k, uniqTheta(UserID) FROM hits_10m_single GROUP BY k FORMAT Null</query>
</test>