Merge pull request #24049 from ClickHouse/aku/perf-unstable

complain about unstable perf test queries
2024-11-21 15:12:02 +00:00 · 2021-05-24 12:59:12 +03:00 · 2021-05-24 12:59:12 +03:00 · f227020018
commit f227020018
parent 9edfc1641a 30fa7d4acc
45 changed files with 68 additions and 58 deletions
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@ -44,7 +44,7 @@ parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated l
 parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.')
 parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
 parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.')
-parser.add_argument('--max-query-seconds', type=int, default=10, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.')
+parser.add_argument('--max-query-seconds', type=int, default=15, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.')
 parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.')
 parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
 parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
@ -273,8 +273,14 @@ for query_index in queries_to_run:
            prewarm_id = f'{query_prefix}.prewarm0'

            try:
-                # Will also detect too long queries during warmup stage
-                res = c.execute(q, query_id = prewarm_id, settings = {'max_execution_time': args.max_query_seconds})
+                # During the warmup runs, we will also:
+                # * detect queries that are exceedingly long, to fail fast,
+                # * collect profiler traces, which might be helpful for analyzing
+                #   test coverage. We disable profiler for normal runs because
+                #   it makes the results unstable.
+                res = c.execute(q, query_id = prewarm_id,
+                    settings = {'max_execution_time': args.max_query_seconds,
+                        'query_profiler_real_time_period_ns': 10000000})
            except clickhouse_driver.errors.Error as e:
                # Add query id to the exception to make debugging easier.
                e.args = (prewarm_id, *e.args)
@ -359,10 +365,11 @@ for query_index in queries_to_run:
        # For very short queries we have a special mode where we run them for at
        # least some time. The recommended lower bound of run time for "normal"
        # queries is about 0.1 s, and we run them about 10 times, giving the
-        # time per query per server of about one second. Use this value as a
-        # reference for "short" queries.
+        # time per query per server of about one second. Run "short" queries
+        # for longer time, because they have a high percentage of overhead and
+        # might give less stable results.
        if is_short[query_index]:
-            if server_seconds >= 2 * len(this_query_connections):
+            if server_seconds >= 8 * len(this_query_connections):
                break
            # Also limit the number of runs, so that we don't go crazy processing
            # the results -- 'eqmed.sql' is really suboptimal.
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@ -446,6 +446,9 @@ if args.report == 'main':
                attrs[3] = f'style="background: {color_bad}"'
            else:
                attrs[3] = ''
+                # Just don't add the slightly unstable queries we don't consider
+                # errors. It's not clear what the user should do with them.
+                continue

            text += tableRow(r, attrs, anchor)

@ -553,12 +556,11 @@ if args.report == 'main':
        error_tests += unstable_partial_queries
        status = 'failure'

-    if unstable_queries:
-        message_array.append(str(unstable_queries) + ' unstable')
-
-#    Disabled before fix.
-#    if very_unstable_queries:
-#        status = 'failure'
+    # Don't show mildly unstable queries, only the very unstable ones we
+    # treat as errors.
+    if very_unstable_queries:
+        status = 'failure'
+        message_array.append(str(very_unstable_queries) + ' unstable')

    error_tests += slow_average_tests
    if error_tests:
--- a/tests/performance/agg_functions_min_max_any.xml
+++ b/tests/performance/agg_functions_min_max_any.xml
@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.3">
    <preconditions>
        <table_exists>hits_100m_single</table_exists>
    </preconditions>
--- a/tests/performance/arithmetic.xml
+++ b/tests/performance/arithmetic.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test max_ignored_relative_change="0.7">
    <settings>
        <max_memory_usage>30000000000</max_memory_usage>
    </settings>
--- a/tests/performance/array_join.xml
+++ b/tests/performance/array_join.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>



--- a/tests/performance/avg_weighted.xml
+++ b/tests/performance/avg_weighted.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.4">
+<test>
    <settings>
        <allow_experimental_bigint_types>1</allow_experimental_bigint_types>
        <max_threads>1</max_threads>
--- a/tests/performance/bounding_ratio.xml
+++ b/tests/performance/bounding_ratio.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <query>SELECT boundingRatio(number, number) FROM numbers(100000000)</query>
    <query>SELECT (argMax(number, number) - argMin(number, number)) / (max(number) - min(number)) FROM numbers(100000000)</query>
 </test>
--- a/tests/performance/codecs_float_insert.xml
+++ b/tests/performance/codecs_float_insert.xml
@ -1,5 +1,4 @@
-<!-- FIXME this instability is abysmal, investigate the unstable queries -->
-<test max_ignored_relative_change="0.2">
+<test>
    <settings>
        <allow_suspicious_codecs>1</allow_suspicious_codecs>
    </settings>
--- a/tests/performance/codecs_int_insert.xml
+++ b/tests/performance/codecs_int_insert.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <settings>
        <allow_suspicious_codecs>1</allow_suspicious_codecs>
    </settings>
--- a/tests/performance/collations.xml
+++ b/tests/performance/collations.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>



--- a/tests/performance/date_time_64.xml
+++ b/tests/performance/date_time_64.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <preconditions>
        <table_exists>hits_100m_single</table_exists>
    </preconditions>
--- a/tests/performance/date_time_long.xml
+++ b/tests/performance/date_time_long.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.3">
+<test>
    <substitutions>
       <substitution>
           <name>datetime_transform</name>
--- a/tests/performance/decimal_aggregates.xml
+++ b/tests/performance/decimal_aggregates.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.5">
+<test max_ignored_relative_change="0.2">
    <settings>
        <max_memory_usage>35G</max_memory_usage>
    </settings>
--- a/tests/performance/decimal_casts.xml
+++ b/tests/performance/decimal_casts.xml
@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
    <settings>
        <max_memory_usage>15G</max_memory_usage>
    </settings>
--- a/tests/performance/direct_dictionary.xml
+++ b/tests/performance/direct_dictionary.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.3">
+<test max_ignored_relative_change="0.4">
    <create_query>
        CREATE TABLE simple_key_direct_dictionary_source_table
        (
--- a/tests/performance/encodeXMLComponent.xml
+++ b/tests/performance/encodeXMLComponent.xml
@ -1,7 +1,7 @@
 <test>
    <preconditions>
-        <table_exists>test.hits</table_exists>
+        <table_exists>hits_100m_single</table_exists>
    </preconditions>

-    <query>SELECT count() FROM test.hits WHERE NOT ignore(encodeXMLComponent(URL))</query>
+    <query>SELECT count() FROM hits_100m_single WHERE NOT ignore(encodeXMLComponent(URL))</query>
 </test>
--- a/tests/performance/flat_dictionary.xml
+++ b/tests/performance/flat_dictionary.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.3">
+<test>
    <create_query>
        CREATE TABLE simple_key_flat_dictionary_source_table
        (
--- a/tests/performance/float_parsing.xml
+++ b/tests/performance/float_parsing.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <substitutions>
       <substitution>
           <name>expr</name>
--- a/tests/performance/fuzz_bits.xml
+++ b/tests/performance/fuzz_bits.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.3">
+<test>



--- a/tests/performance/general_purpose_hashes.xml
+++ b/tests/performance/general_purpose_hashes.xml
@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
    <substitutions>
        <substitution>
           <name>gp_hash_func</name>
--- a/tests/performance/general_purpose_hashes_on_UUID.xml
+++ b/tests/performance/general_purpose_hashes_on_UUID.xml
@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.6">
    <substitutions>
        <substitution>
           <name>hash_func</name>
--- a/tests/performance/generate_table_function.xml
+++ b/tests/performance/generate_table_function.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') LIMIT 1000000000);</query>
    <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 0, 10, 10) LIMIT 1000000000);</query>
    <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Enum8(\'hello\' = 1, \'world\' = 5)', 0, 10, 10) LIMIT 1000000000);</query>
--- a/tests/performance/hashed_dictionary.xml
+++ b/tests/performance/hashed_dictionary.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.3">
+<test max_ignored_relative_change="0.2">
    <create_query>
        CREATE TABLE simple_key_hashed_dictionary_source_table
        (
--- a/tests/performance/int_parsing.xml
+++ b/tests/performance/int_parsing.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <preconditions>
        <table_exists>hits_100m_single</table_exists>
        <table_exists>hits_10m_single</table_exists>
--- a/tests/performance/jit_small_requests.xml
+++ b/tests/performance/jit_small_requests.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <query>
        WITH
            bitXor(number, 0x4CF2D2BAAE6DA887) AS x0,
--- a/tests/performance/json_extract_rapidjson.xml
+++ b/tests/performance/json_extract_rapidjson.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>



--- a/tests/performance/json_extract_simdjson.xml
+++ b/tests/performance/json_extract_simdjson.xml
@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
    <substitutions>
        <substitution>
           <name>json</name>
--- a/tests/performance/math.xml
+++ b/tests/performance/math.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.3">
+<test max_ignored_relative_change="0.6">
    <substitutions>
        <substitution>
           <name>func_slow</name>
--- a/tests/performance/merge_tree_many_partitions_2.xml
+++ b/tests/performance/merge_tree_many_partitions_2.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>

    <create_query>CREATE TABLE bad_partitions (a UInt64, b UInt64, c UInt64, d UInt64, e UInt64, f UInt64, g UInt64, h UInt64, i UInt64, j UInt64, k UInt64, l UInt64, m UInt64, n UInt64, o UInt64, p UInt64, q UInt64, r UInt64, s UInt64, t UInt64, u UInt64, v UInt64, w UInt64, x UInt64, y UInt64, z UInt64) ENGINE = MergeTree PARTITION BY x ORDER BY x</create_query>
    <fill_query>INSERT INTO bad_partitions (x) SELECT * FROM numbers_mt(3000)</fill_query>
--- a/tests/performance/number_formatting_formats.xml
+++ b/tests/performance/number_formatting_formats.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test max_ignored_relative_change="0.3">
    <substitutions>
        <substitution>
            <name>format</name>
--- a/tests/performance/optimized_select_final.xml
+++ b/tests/performance/optimized_select_final.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.3">
+<test>
    <settings>
        <do_not_merge_across_partitions_select_final>1</do_not_merge_across_partitions_select_final>
    </settings>
--- a/tests/performance/parallel_mv.xml
+++ b/tests/performance/parallel_mv.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <settings>
        <parallel_view_processing>1</parallel_view_processing>
    </settings>
--- a/tests/performance/point_in_polygon.xml
+++ b/tests/performance/point_in_polygon.xml
@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
    <settings>
        <!--
            Not sure why it's needed. Maybe it has something to do with the
--- a/tests/performance/polymorphic_parts_l.xml
+++ b/tests/performance/polymorphic_parts_l.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <create_query>
        CREATE TABLE hits_wide AS hits_10m_single ENGINE = MergeTree()
        PARTITION BY toYYYYMM(EventDate)
--- a/tests/performance/polymorphic_parts_m.xml
+++ b/tests/performance/polymorphic_parts_m.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <create_query>
        CREATE TABLE hits_wide AS hits_10m_single ENGINE = MergeTree()
        PARTITION BY toYYYYMM(EventDate)
--- a/tests/performance/polymorphic_parts_s.xml
+++ b/tests/performance/polymorphic_parts_s.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <create_query>
        CREATE TABLE hits_wide AS hits_10m_single ENGINE = MergeTree()
        PARTITION BY toYYYYMM(EventDate)
--- a/tests/performance/questdb_sum_int32.xml
+++ b/tests/performance/questdb_sum_int32.xml
@ -1,4 +1,4 @@
-<test>
+<test max_ignored_relative_change="0.2">
    <settings>
        <max_threads>4</max_threads>
        <max_memory_usage>20G</max_memory_usage>
--- a/tests/performance/random_string.xml
+++ b/tests/performance/random_string.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <query>SELECT count() FROM zeros(100000000) WHERE NOT ignore(randomString(10))</query>
    <query>SELECT count() FROM zeros(100000000) WHERE NOT ignore(randomString(100))</query>
    <query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomString(1000))</query>
--- a/tests/performance/redundant_functions_in_order_by.xml
+++ b/tests/performance/redundant_functions_in_order_by.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.3">
+<test>
    <preconditions>
        <table_exists>hits_100m_single</table_exists>
    </preconditions>
--- a/tests/performance/rewrite_sumIf.xml
+++ b/tests/performance/rewrite_sumIf.xml
@ -1,4 +1,5 @@
 <test>
-    <query>SELECT sumIf(1, 0) FROM numbers(100000000)</query>
-    <query>SELECT sumIf(1, 1) FROM numbers(100000000)</query>
+    <!-- Shouldn't have been a perf test, but an EXPLAIN one. -->
+    <query>SELECT sumIf(1, 0) FROM numbers(1000000000)</query>
+    <query>SELECT sumIf(1, 1) FROM numbers(1000000000)</query>
 </test>
--- a/tests/performance/set_index.xml
+++ b/tests/performance/set_index.xml
--- a/tests/performance/sum_map.xml
+++ b/tests/performance/sum_map.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.3">
+<test max_ignored_relative_change="0.2">
    <settings>
        <max_threads>1</max_threads>
    </settings>
--- a/tests/performance/synthetic_hardware_benchmark.xml
+++ b/tests/performance/synthetic_hardware_benchmark.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.3">
+<test max_ignored_relative_change="0.2">
    <settings>
        <max_memory_usage>30000000000</max_memory_usage>
    </settings>
--- a/tests/performance/url_hits.xml
+++ b/tests/performance/url_hits.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <preconditions>
        <table_exists>hits_100m_single</table_exists>
        <table_exists>hits_10m_single</table_exists>
--- a/tests/performance/visit_param_extract_raw.xml
+++ b/tests/performance/visit_param_extract_raw.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.3">
+<test max_ignored_relative_change="0.2">
    <substitutions>
        <substitution>
           <name>param</name>