calculate perf test precision thresholds from historical data

2024-11-21 15:12:02 +00:00 · 2021-05-26 16:30:43 +03:00 · 2021-05-26 16:30:43 +03:00 · 5da54c2745
commit 5da54c2745
parent 67044d69f3
40 changed files with 132 additions and 51 deletions
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@ -552,6 +552,63 @@ create table query_metric_stats_denorm engine File(TSVWithNamesAndTypes,
    order by test, query_index, metric_name
    ;
 " 2> >(tee -a analyze/errors.log 1>&2)
+
+# Fetch historical query variability thresholds from the CI database
+clickhouse-local --query "
+    left join file('analyze/report-thresholds.tsv', TSV,
+            'test text, report_threshold float') thresholds
+        on query_metric_stats.test = thresholds.test
+"
+
+if [ -v CHPC_DATABASE_URL ]
+then
+    set +x # Don't show password in the log
+    client=(clickhouse-client
+        # Surprisingly, clickhouse-client doesn't understand --host 127.0.0.1:9000
+        # so I have to extract host and port with clickhouse-local. I tried to use
+        # Poco URI parser to support this in the client, but it's broken and can't
+        # parse host:port.
+        $(clickhouse-local --query "with '${CHPC_DATABASE_URL}' as url select '--host ' || domain(url) || ' --port ' || toString(port(url)) format TSV")
+        --secure
+        --user "${CHPC_DATABASE_USER}"
+        --password "${CHPC_DATABASE_PASSWORD}"
+        --config "right/config/client_config.xml"
+        --database perftest
+        --date_time_input_format=best_effort)
+
+
+# Precision is going to be 1.5 times worse for PRs. How do I know it? I ran this:
+# SELECT quantilesExact(0., 0.1, 0.5, 0.75, 0.95, 1.)(p / m)
+# FROM
+# (
+#     SELECT
+#         quantileIf(0.95)(stat_threshold, pr_number = 0) AS m,
+#         quantileIf(0.95)(stat_threshold, (pr_number != 0) AND (abs(diff) < stat_threshold)) AS p
+#     FROM query_metrics_v2
+#     WHERE (event_date > (today() - toIntervalMonth(1))) AND (metric = 'client_time')
+#     GROUP BY
+#         test,
+#         query_index,
+#         query_display_name
+#     HAVING count(*) > 100
+# )
+# The file can be empty if the server is inaccessible, so we can't use TSVWithNamesAndTypes.
+    "${client[@]}" --query "
+            select test, query_index,
+                quantileExact(0.99)(abs(diff)) max_diff,
+                quantileExactIf(0.99)(stat_threshold, abs(diff) < stat_threshold) * 1.5 max_stat_threshold,
+                query_display_name
+            from query_metrics_v2
+            where event_date > now() - interval 1 month
+                and metric = 'client_time'
+                and pr_number = 0
+            group by test, query_index, query_display_name
+            having count(*) > 100
+            " > analyze/historical-thresholds.tsv
+else
+    touch analyze/historical-thresholds.tsv
+fi
+
 }

 # Analyze results
@ -596,6 +653,26 @@ create view query_metric_stats as
            diff float, stat_threshold float')
    ;

+create table report_thresholds engine File(TSVWithNamesAndTypes, 'report/thresholds.tsv')
+    as select
+        query_display_names.test test, query_display_names.query_index query_index,
+        ceil(greatest(0.1, historical_thresholds.max_diff,
+            test_thresholds.report_threshold), 2) changed_threshold,
+        ceil(greatest(0.2, historical_thresholds.max_stat_threshold,
+            test_thresholds.report_threshold + 0.1), 2) unstable_threshold,
+        query_display_names.query_display_name query_display_name
+    from query_display_names
+    left join file('analyze/historical-thresholds.tsv', TSV,
+        'test text, query_index int, max_diff float, max_stat_threshold float,
+            query_display_name text') historical_thresholds
+    on query_display_names.test = historical_thresholds.test
+        and query_display_names.query_index = historical_thresholds.query_index
+        and query_display_names.query_display_name = historical_thresholds.query_display_name
+    left join file('analyze/report-thresholds.tsv', TSV,
+        'test text, report_threshold float') test_thresholds
+    on query_display_names.test = test_thresholds.test
+    ;
+
 -- Main statistics for queries -- query time as reported in query log.
 create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
    as select
@ -610,23 +687,23 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
        -- uncaught regressions, because for the default 7 runs we do for PRs,
        -- the randomization distribution has only 16 values, so the max quantile
        -- is actually 0.9375.
-        abs(diff) > report_threshold        and abs(diff) >= stat_threshold as changed_fail,
-        abs(diff) > report_threshold - 0.05 and abs(diff) >= stat_threshold as changed_show,
+        abs(diff) > changed_threshold        and abs(diff) >= stat_threshold as changed_fail,
+        abs(diff) > changed_threshold - 0.05 and abs(diff) >= stat_threshold as changed_show,

-        not changed_fail and stat_threshold > report_threshold + 0.10 as unstable_fail,
-        not changed_show and stat_threshold > report_threshold - 0.05 as unstable_show,
+        not changed_fail and stat_threshold > unstable_threshold as unstable_fail,
+        not changed_show and stat_threshold > unstable_threshold - 0.05 as unstable_show,

        left, right, diff, stat_threshold,
-        if(report_threshold > 0, report_threshold, 0.10) as report_threshold,
        query_metric_stats.test test, query_metric_stats.query_index query_index,
-        query_display_name
+        query_display_names.query_display_name query_display_name
    from query_metric_stats
-    left join file('analyze/report-thresholds.tsv', TSV,
-            'test text, report_threshold float') thresholds
-        on query_metric_stats.test = thresholds.test
    left join query_display_names
        on query_metric_stats.test = query_display_names.test
            and query_metric_stats.query_index = query_display_names.query_index
+    left join report_thresholds
+        on query_display_names.test = report_thresholds.test
+            and query_display_names.query_index = report_thresholds.query_index
+            and query_display_names.query_display_name = report_thresholds.query_display_name
    -- 'server_time' is rounded down to ms, which might be bad for very short queries.
    -- Use 'client_time' instead.
    where metric_name = 'client_time'
@ -889,7 +966,6 @@ create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.ts
    order by test, query_index;
 " 2> >(tee -a report/errors.log 1>&2)

-
 # Prepare source data for metrics and flamegraphs for queries that were profiled
 # by perf.py.
 for version in {right,left}
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@ -453,7 +453,10 @@ if args.report == 'main':
            text += tableRow(r, attrs, anchor)

        text += tableEnd()
-        tables.append(text)
+
+        # Don't add an empty table.
+        if very_unstable_queries:
+            tables.append(text)

    add_unstable_queries()

@ -552,13 +555,13 @@ if args.report == 'main':
        message_array.append(str(slower_queries) + ' slower')

    if unstable_partial_queries:
-        unstable_queries += unstable_partial_queries
-        error_tests += unstable_partial_queries
+        very_unstable_queries += unstable_partial_queries
        status = 'failure'

    # Don't show mildly unstable queries, only the very unstable ones we
    # treat as errors.
    if very_unstable_queries:
+        error_tests += very_unstable_queries
        status = 'failure'
        message_array.append(str(very_unstable_queries) + ' unstable')

--- a/src/TableFunctions/ITableFunctionFileLike.cpp
+++ b/src/TableFunctions/ITableFunctionFileLike.cpp
@ -58,7 +58,9 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context

    structure = args[2]->as<ASTLiteral &>().value.safeGet<String>();
    if (structure.empty())
-        throw Exception("Table structure is empty", ErrorCodes::BAD_ARGUMENTS);
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+            "Table structure is empty for table function '{}'",
+            ast_function.formatForErrorMessage());

    if (args.size() == 4)
        compression_method = args[3]->as<ASTLiteral &>().value.safeGet<String>();
--- a/tests/performance/ColumnMap.xml
+++ b/tests/performance/ColumnMap.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>

    <settings>
        <allow_experimental_map_type>1</allow_experimental_map_type>
--- a/tests/performance/agg_functions_min_max_any.xml
+++ b/tests/performance/agg_functions_min_max_any.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.3">
+<test>
    <preconditions>
        <table_exists>hits_100m_single</table_exists>
    </preconditions>
--- a/tests/performance/arithmetic.xml
+++ b/tests/performance/arithmetic.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.7">
+<test>
    <settings>
        <max_memory_usage>30000000000</max_memory_usage>
    </settings>
--- a/tests/performance/array_auc.xml
+++ b/tests/performance/array_auc.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>

    <query>SELECT avg(ifNotFinite(arrayAUC(arrayMap(x -> rand(x) / 0x100000000, range(2 + rand() % 100)), arrayMap(x -> rand(x) % 2, range(2 + rand() % 100))), 0)) FROM numbers(100000)</query>
 </test>
--- a/tests/performance/constant_column_search.xml
+++ b/tests/performance/constant_column_search.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <tags>
        <tag>search</tag>
    </tags>
--- a/tests/performance/decimal_aggregates.xml
+++ b/tests/performance/decimal_aggregates.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <settings>
        <max_memory_usage>35G</max_memory_usage>
    </settings>
--- a/tests/performance/decimal_casts.xml
+++ b/tests/performance/decimal_casts.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <settings>
        <max_memory_usage>15G</max_memory_usage>
    </settings>
--- a/tests/performance/direct_dictionary.xml
+++ b/tests/performance/direct_dictionary.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.4">
+<test>
    <create_query>
        CREATE TABLE simple_key_direct_dictionary_source_table
        (
--- a/tests/performance/format_readable.xml
+++ b/tests/performance/format_readable.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
 <query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(formatReadableSize(number))</query>
 <query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(formatReadableQuantity(number))</query>
 <query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(formatReadableTimeDelta(number))</query>
--- a/tests/performance/general_purpose_hashes.xml
+++ b/tests/performance/general_purpose_hashes.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <substitutions>
        <substitution>
           <name>gp_hash_func</name>
--- a/tests/performance/general_purpose_hashes_on_UUID.xml
+++ b/tests/performance/general_purpose_hashes_on_UUID.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.6">
+<test>
    <substitutions>
        <substitution>
           <name>hash_func</name>
--- a/tests/performance/group_array_moving_sum.xml
+++ b/tests/performance/group_array_moving_sum.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <settings>
        <max_memory_usage>30000000000</max_memory_usage>
    </settings>
--- a/tests/performance/group_by_sundy_li.xml
+++ b/tests/performance/group_by_sundy_li.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.4">
+<test>
    <settings>
        <max_insert_threads>8</max_insert_threads>
    </settings>
--- a/tests/performance/hashed_dictionary.xml
+++ b/tests/performance/hashed_dictionary.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <create_query>
        CREATE TABLE simple_key_hashed_dictionary_source_table
        (
--- a/tests/performance/if_array_string.xml
+++ b/tests/performance/if_array_string.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.3">
+<test>
    <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : ['a', 'b', 'c'])</query>
    <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(rand() % 2 ? materialize(['Hello', 'World']) : ['a', 'b', 'c'])</query>
    <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : materialize(['a', 'b', 'c']))</query>
--- a/tests/performance/inserts_arrays_lowcardinality.xml
+++ b/tests/performance/inserts_arrays_lowcardinality.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    
    <create_query>CREATE TABLE lot_of_string_arrays_src (`id` UInt64, `col00` Array(String), `col01` Array(String), `col02` Array(String), `col03` Array(String), `col04` Array(String), `col05` Array(String), `col06` Array(String), `col07` Array(String), `col08` Array(String), `col09` Array(String), `col10` Array(String), `col11` Array(String), `col12` Array(String), `col13` Array(String), `col14` Array(String), `col15` Array(String), `col16` Array(String), `col17` Array(String), `col18` Array(String), `col19` Array(String), `col20` Array(String), `col21` Array(String), `col22` Array(String), `col23` Array(String), `col24` Array(String), `col25` Array(String), `col26` Array(String), `col27` Array(String), `col28` Array(String), `col29` Array(String), `col30` Array(String), `col31` Array(String), `col32` Array(String), `col33` Array(String), `col34` Array(String), `col35` Array(String), `col36` Array(String), `col37` Array(String), `col38` Array(String), `col39` Array(String), `col40` Array(String), `col41` Array(String), `col42` Array(String), `col43` Array(String), `col44` Array(String), `col45` Array(String), `col46` Array(String), `col47` Array(String), `col48` Array(String), `col49` Array(String)) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;</create_query>
    <create_query>CREATE TABLE lot_of_string_arrays_dst_lowcardinality (`id` UInt64, `col00` Array(LowCardinality(String)), `col01` Array(LowCardinality(String)), `col02` Array(LowCardinality(String)), `col03` Array(LowCardinality(String)), `col04` Array(LowCardinality(String)), `col05` Array(LowCardinality(String)), `col06` Array(LowCardinality(String)), `col07` Array(LowCardinality(String)), `col08` Array(LowCardinality(String)), `col09` Array(LowCardinality(String)), `col10` Array(LowCardinality(String)), `col11` Array(LowCardinality(String)), `col12` Array(LowCardinality(String)), `col13` Array(LowCardinality(String)), `col14` Array(LowCardinality(String)), `col15` Array(LowCardinality(String)), `col16` Array(LowCardinality(String)), `col17` Array(LowCardinality(String)), `col18` Array(LowCardinality(String)), `col19` Array(LowCardinality(String)), `col20` Array(LowCardinality(String)), `col21` Array(LowCardinality(String)), `col22` Array(LowCardinality(String)), `col23` Array(LowCardinality(String)), `col24` Array(LowCardinality(String)), `col25` Array(LowCardinality(String)), `col26` Array(LowCardinality(String)), `col27` Array(LowCardinality(String)), `col28` Array(LowCardinality(String)), `col29` Array(LowCardinality(String)), `col30` Array(LowCardinality(String)), `col31` Array(LowCardinality(String)), `col32` Array(LowCardinality(String)), `col33` Array(LowCardinality(String)), `col34` Array(LowCardinality(String)), `col35` Array(LowCardinality(String)), `col36` Array(LowCardinality(String)), `col37` Array(LowCardinality(String)), `col38` Array(LowCardinality(String)), `col39` Array(LowCardinality(String)), `col40` Array(LowCardinality(String)), `col41` Array(LowCardinality(String)), `col42` Array(LowCardinality(String)), `col43` Array(LowCardinality(String)), `col44` Array(LowCardinality(String)), `col45` Array(LowCardinality(String)), `col46` Array(LowCardinality(String)), `col47` Array(LowCardinality(String)), `col48` Array(LowCardinality(String)), `col49` Array(LowCardinality(String))) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;</create_query>
--- a/tests/performance/joins_in_memory.xml
+++ b/tests/performance/joins_in_memory.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <create_query>CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory</create_query>

    <fill_query>INSERT INTO ints SELECT number AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000)</fill_query>
--- a/tests/performance/joins_in_memory_pmj.xml
+++ b/tests/performance/joins_in_memory_pmj.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="2">
+<test>
    <create_query>CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory</create_query>

    <settings>
--- a/tests/performance/json_extract_simdjson.xml
+++ b/tests/performance/json_extract_simdjson.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <substitutions>
        <substitution>
           <name>json</name>
--- a/tests/performance/logical_functions_medium.xml
+++ b/tests/performance/logical_functions_medium.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <settings>
        <max_threads>1</max_threads>
    </settings>
--- a/tests/performance/logical_functions_small.xml
+++ b/tests/performance/logical_functions_small.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <settings>
        <max_threads>1</max_threads>
    </settings>
--- a/tests/performance/math.xml
+++ b/tests/performance/math.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.6">
+<test>
    <substitutions>
        <substitution>
           <name>func_slow</name>
--- a/tests/performance/number_formatting_formats.xml
+++ b/tests/performance/number_formatting_formats.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.3">
+<test>
    <substitutions>
        <substitution>
            <name>format</name>
--- a/tests/performance/parallel_index.xml
+++ b/tests/performance/parallel_index.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <create_query>create table test_parallel_index (x UInt64, y UInt64, z UInt64, INDEX a (y) TYPE minmax GRANULARITY 2,
        INDEX b (z) TYPE set(8) GRANULARITY 2) engine = MergeTree order by x partition by bitAnd(x, 63 * 64) settings index_granularity = 4;</create_query>

--- a/tests/performance/parse_engine_file.xml
+++ b/tests/performance/parse_engine_file.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>

 <preconditions>
    <table_exists>test.hits</table_exists>
--- a/tests/performance/point_in_polygon.xml
+++ b/tests/performance/point_in_polygon.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <settings>
        <!--
            Not sure why it's needed. Maybe it has something to do with the
--- a/tests/performance/questdb_sum_int32.xml
+++ b/tests/performance/questdb_sum_int32.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <settings>
        <max_threads>4</max_threads>
        <max_memory_usage>20G</max_memory_usage>
--- a/tests/performance/random_printable_ascii.xml
+++ b/tests/performance/random_printable_ascii.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomPrintableASCII(10))</query>
    <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomPrintableASCII(100))</query>
    <query>SELECT count() FROM zeros(100000) WHERE NOT ignore(randomPrintableASCII(1000))</query>
--- a/tests/performance/reinterpret_as.xml
+++ b/tests/performance/reinterpret_as.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <settings>
        <allow_experimental_bigint_types>1</allow_experimental_bigint_types>
        <max_memory_usage>15G</max_memory_usage>
--- a/tests/performance/select_format.xml
+++ b/tests/performance/select_format.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.3">
+<test>
    <settings>
        <output_format_pretty_max_rows>1000000</output_format_pretty_max_rows>
        <max_threads>1</max_threads>
--- a/tests/performance/set_index.xml
+++ b/tests/performance/set_index.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="2">
+<test>
    <create_query>CREATE TABLE test_in (`a` UInt32) ENGINE = MergeTree() ORDER BY a</create_query>
    <fill_query>INSERT INTO test_in SELECT number FROM numbers(500000000)</fill_query>

--- a/tests/performance/string_join.xml
+++ b/tests/performance/string_join.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>



--- a/tests/performance/string_set.xml
+++ b/tests/performance/string_set.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>



--- a/tests/performance/sum_map.xml
+++ b/tests/performance/sum_map.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <settings>
        <max_threads>1</max_threads>
    </settings>
--- a/tests/performance/synthetic_hardware_benchmark.xml
+++ b/tests/performance/synthetic_hardware_benchmark.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <settings>
        <max_memory_usage>30000000000</max_memory_usage>
    </settings>
--- a/tests/performance/visit_param_extract_raw.xml
+++ b/tests/performance/visit_param_extract_raw.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>
    <substitutions>
        <substitution>
           <name>param</name>
--- a/tests/performance/website.xml
+++ b/tests/performance/website.xml
@ -1,4 +1,4 @@
-<test max_ignored_relative_change="0.2">
+<test>

    <preconditions>
        <table_exists>hits_10m_single</table_exists>