calculate perf test precision thresholds from historical data

This commit is contained in:
Alexander Kuzmenkov 2021-05-26 16:30:43 +03:00
parent 67044d69f3
commit 5da54c2745
40 changed files with 132 additions and 51 deletions

View File

@ -552,6 +552,63 @@ create table query_metric_stats_denorm engine File(TSVWithNamesAndTypes,
order by test, query_index, metric_name
;
" 2> >(tee -a analyze/errors.log 1>&2)
# Fetch historical query variability thresholds from the CI database
clickhouse-local --query "
left join file('analyze/report-thresholds.tsv', TSV,
'test text, report_threshold float') thresholds
on query_metric_stats.test = thresholds.test
"
if [ -v CHPC_DATABASE_URL ]
then
set +x # Don't show password in the log
client=(clickhouse-client
# Surprisingly, clickhouse-client doesn't understand --host 127.0.0.1:9000
# so I have to extract host and port with clickhouse-local. I tried to use
# Poco URI parser to support this in the client, but it's broken and can't
# parse host:port.
$(clickhouse-local --query "with '${CHPC_DATABASE_URL}' as url select '--host ' || domain(url) || ' --port ' || toString(port(url)) format TSV")
--secure
--user "${CHPC_DATABASE_USER}"
--password "${CHPC_DATABASE_PASSWORD}"
--config "right/config/client_config.xml"
--database perftest
--date_time_input_format=best_effort)
# Precision is going to be 1.5 times worse for PRs. How do I know it? I ran this:
# SELECT quantilesExact(0., 0.1, 0.5, 0.75, 0.95, 1.)(p / m)
# FROM
# (
# SELECT
# quantileIf(0.95)(stat_threshold, pr_number = 0) AS m,
# quantileIf(0.95)(stat_threshold, (pr_number != 0) AND (abs(diff) < stat_threshold)) AS p
# FROM query_metrics_v2
# WHERE (event_date > (today() - toIntervalMonth(1))) AND (metric = 'client_time')
# GROUP BY
# test,
# query_index,
# query_display_name
# HAVING count(*) > 100
# )
# The file can be empty if the server is inaccessible, so we can't use TSVWithNamesAndTypes.
"${client[@]}" --query "
select test, query_index,
quantileExact(0.99)(abs(diff)) max_diff,
quantileExactIf(0.99)(stat_threshold, abs(diff) < stat_threshold) * 1.5 max_stat_threshold,
query_display_name
from query_metrics_v2
where event_date > now() - interval 1 month
and metric = 'client_time'
and pr_number = 0
group by test, query_index, query_display_name
having count(*) > 100
" > analyze/historical-thresholds.tsv
else
touch analyze/historical-thresholds.tsv
fi
}
# Analyze results
@ -596,6 +653,26 @@ create view query_metric_stats as
diff float, stat_threshold float')
;
create table report_thresholds engine File(TSVWithNamesAndTypes, 'report/thresholds.tsv')
as select
query_display_names.test test, query_display_names.query_index query_index,
ceil(greatest(0.1, historical_thresholds.max_diff,
test_thresholds.report_threshold), 2) changed_threshold,
ceil(greatest(0.2, historical_thresholds.max_stat_threshold,
test_thresholds.report_threshold + 0.1), 2) unstable_threshold,
query_display_names.query_display_name query_display_name
from query_display_names
left join file('analyze/historical-thresholds.tsv', TSV,
'test text, query_index int, max_diff float, max_stat_threshold float,
query_display_name text') historical_thresholds
on query_display_names.test = historical_thresholds.test
and query_display_names.query_index = historical_thresholds.query_index
and query_display_names.query_display_name = historical_thresholds.query_display_name
left join file('analyze/report-thresholds.tsv', TSV,
'test text, report_threshold float') test_thresholds
on query_display_names.test = test_thresholds.test
;
-- Main statistics for queries -- query time as reported in query log.
create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
as select
@ -610,23 +687,23 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
-- uncaught regressions, because for the default 7 runs we do for PRs,
-- the randomization distribution has only 16 values, so the max quantile
-- is actually 0.9375.
abs(diff) > report_threshold and abs(diff) >= stat_threshold as changed_fail,
abs(diff) > report_threshold - 0.05 and abs(diff) >= stat_threshold as changed_show,
abs(diff) > changed_threshold and abs(diff) >= stat_threshold as changed_fail,
abs(diff) > changed_threshold - 0.05 and abs(diff) >= stat_threshold as changed_show,
not changed_fail and stat_threshold > report_threshold + 0.10 as unstable_fail,
not changed_show and stat_threshold > report_threshold - 0.05 as unstable_show,
not changed_fail and stat_threshold > unstable_threshold as unstable_fail,
not changed_show and stat_threshold > unstable_threshold - 0.05 as unstable_show,
left, right, diff, stat_threshold,
if(report_threshold > 0, report_threshold, 0.10) as report_threshold,
query_metric_stats.test test, query_metric_stats.query_index query_index,
query_display_name
query_display_names.query_display_name query_display_name
from query_metric_stats
left join file('analyze/report-thresholds.tsv', TSV,
'test text, report_threshold float') thresholds
on query_metric_stats.test = thresholds.test
left join query_display_names
on query_metric_stats.test = query_display_names.test
and query_metric_stats.query_index = query_display_names.query_index
left join report_thresholds
on query_display_names.test = report_thresholds.test
and query_display_names.query_index = report_thresholds.query_index
and query_display_names.query_display_name = report_thresholds.query_display_name
-- 'server_time' is rounded down to ms, which might be bad for very short queries.
-- Use 'client_time' instead.
where metric_name = 'client_time'
@ -889,7 +966,6 @@ create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.ts
order by test, query_index;
" 2> >(tee -a report/errors.log 1>&2)
# Prepare source data for metrics and flamegraphs for queries that were profiled
# by perf.py.
for version in {right,left}

View File

@ -453,7 +453,10 @@ if args.report == 'main':
text += tableRow(r, attrs, anchor)
text += tableEnd()
tables.append(text)
# Don't add an empty table.
if very_unstable_queries:
tables.append(text)
add_unstable_queries()
@ -552,13 +555,13 @@ if args.report == 'main':
message_array.append(str(slower_queries) + ' slower')
if unstable_partial_queries:
unstable_queries += unstable_partial_queries
error_tests += unstable_partial_queries
very_unstable_queries += unstable_partial_queries
status = 'failure'
# Don't show mildly unstable queries, only the very unstable ones we
# treat as errors.
if very_unstable_queries:
error_tests += very_unstable_queries
status = 'failure'
message_array.append(str(very_unstable_queries) + ' unstable')

View File

@ -58,7 +58,9 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context
structure = args[2]->as<ASTLiteral &>().value.safeGet<String>();
if (structure.empty())
throw Exception("Table structure is empty", ErrorCodes::BAD_ARGUMENTS);
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Table structure is empty for table function '{}'",
ast_function.formatForErrorMessage());
if (args.size() == 4)
compression_method = args[3]->as<ASTLiteral &>().value.safeGet<String>();

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<settings>
<allow_experimental_map_type>1</allow_experimental_map_type>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.3">
<test>
<preconditions>
<table_exists>hits_100m_single</table_exists>
</preconditions>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.7">
<test>
<settings>
<max_memory_usage>30000000000</max_memory_usage>
</settings>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<query>SELECT avg(ifNotFinite(arrayAUC(arrayMap(x -> rand(x) / 0x100000000, range(2 + rand() % 100)), arrayMap(x -> rand(x) % 2, range(2 + rand() % 100))), 0)) FROM numbers(100000)</query>
</test>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<tags>
<tag>search</tag>
</tags>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<settings>
<max_memory_usage>35G</max_memory_usage>
</settings>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<settings>
<max_memory_usage>15G</max_memory_usage>
</settings>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.4">
<test>
<create_query>
CREATE TABLE simple_key_direct_dictionary_source_table
(

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(formatReadableSize(number))</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(formatReadableQuantity(number))</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(formatReadableTimeDelta(number))</query>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<substitutions>
<substitution>
<name>gp_hash_func</name>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.6">
<test>
<substitutions>
<substitution>
<name>hash_func</name>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<settings>
<max_memory_usage>30000000000</max_memory_usage>
</settings>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.4">
<test>
<settings>
<max_insert_threads>8</max_insert_threads>
</settings>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<create_query>
CREATE TABLE simple_key_hashed_dictionary_source_table
(

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.3">
<test>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : ['a', 'b', 'c'])</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(rand() % 2 ? materialize(['Hello', 'World']) : ['a', 'b', 'c'])</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : materialize(['a', 'b', 'c']))</query>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<create_query>CREATE TABLE lot_of_string_arrays_src (`id` UInt64, `col00` Array(String), `col01` Array(String), `col02` Array(String), `col03` Array(String), `col04` Array(String), `col05` Array(String), `col06` Array(String), `col07` Array(String), `col08` Array(String), `col09` Array(String), `col10` Array(String), `col11` Array(String), `col12` Array(String), `col13` Array(String), `col14` Array(String), `col15` Array(String), `col16` Array(String), `col17` Array(String), `col18` Array(String), `col19` Array(String), `col20` Array(String), `col21` Array(String), `col22` Array(String), `col23` Array(String), `col24` Array(String), `col25` Array(String), `col26` Array(String), `col27` Array(String), `col28` Array(String), `col29` Array(String), `col30` Array(String), `col31` Array(String), `col32` Array(String), `col33` Array(String), `col34` Array(String), `col35` Array(String), `col36` Array(String), `col37` Array(String), `col38` Array(String), `col39` Array(String), `col40` Array(String), `col41` Array(String), `col42` Array(String), `col43` Array(String), `col44` Array(String), `col45` Array(String), `col46` Array(String), `col47` Array(String), `col48` Array(String), `col49` Array(String)) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;</create_query>
<create_query>CREATE TABLE lot_of_string_arrays_dst_lowcardinality (`id` UInt64, `col00` Array(LowCardinality(String)), `col01` Array(LowCardinality(String)), `col02` Array(LowCardinality(String)), `col03` Array(LowCardinality(String)), `col04` Array(LowCardinality(String)), `col05` Array(LowCardinality(String)), `col06` Array(LowCardinality(String)), `col07` Array(LowCardinality(String)), `col08` Array(LowCardinality(String)), `col09` Array(LowCardinality(String)), `col10` Array(LowCardinality(String)), `col11` Array(LowCardinality(String)), `col12` Array(LowCardinality(String)), `col13` Array(LowCardinality(String)), `col14` Array(LowCardinality(String)), `col15` Array(LowCardinality(String)), `col16` Array(LowCardinality(String)), `col17` Array(LowCardinality(String)), `col18` Array(LowCardinality(String)), `col19` Array(LowCardinality(String)), `col20` Array(LowCardinality(String)), `col21` Array(LowCardinality(String)), `col22` Array(LowCardinality(String)), `col23` Array(LowCardinality(String)), `col24` Array(LowCardinality(String)), `col25` Array(LowCardinality(String)), `col26` Array(LowCardinality(String)), `col27` Array(LowCardinality(String)), `col28` Array(LowCardinality(String)), `col29` Array(LowCardinality(String)), `col30` Array(LowCardinality(String)), `col31` Array(LowCardinality(String)), `col32` Array(LowCardinality(String)), `col33` Array(LowCardinality(String)), `col34` Array(LowCardinality(String)), `col35` Array(LowCardinality(String)), `col36` Array(LowCardinality(String)), `col37` Array(LowCardinality(String)), `col38` Array(LowCardinality(String)), `col39` Array(LowCardinality(String)), `col40` Array(LowCardinality(String)), `col41` Array(LowCardinality(String)), `col42` Array(LowCardinality(String)), `col43` Array(LowCardinality(String)), `col44` Array(LowCardinality(String)), `col45` Array(LowCardinality(String)), `col46` Array(LowCardinality(String)), `col47` Array(LowCardinality(String)), `col48` Array(LowCardinality(String)), `col49` Array(LowCardinality(String))) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;</create_query>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<create_query>CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory</create_query>
<fill_query>INSERT INTO ints SELECT number AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000)</fill_query>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="2">
<test>
<create_query>CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory</create_query>
<settings>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<substitutions>
<substitution>
<name>json</name>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<settings>
<max_threads>1</max_threads>
</settings>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<settings>
<max_threads>1</max_threads>
</settings>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.6">
<test>
<substitutions>
<substitution>
<name>func_slow</name>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.3">
<test>
<substitutions>
<substitution>
<name>format</name>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<create_query>create table test_parallel_index (x UInt64, y UInt64, z UInt64, INDEX a (y) TYPE minmax GRANULARITY 2,
INDEX b (z) TYPE set(8) GRANULARITY 2) engine = MergeTree order by x partition by bitAnd(x, 63 * 64) settings index_granularity = 4;</create_query>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<preconditions>
<table_exists>test.hits</table_exists>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<settings>
<!--
Not sure why it's needed. Maybe it has something to do with the

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<settings>
<max_threads>4</max_threads>
<max_memory_usage>20G</max_memory_usage>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomPrintableASCII(10))</query>
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomPrintableASCII(100))</query>
<query>SELECT count() FROM zeros(100000) WHERE NOT ignore(randomPrintableASCII(1000))</query>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<settings>
<allow_experimental_bigint_types>1</allow_experimental_bigint_types>
<max_memory_usage>15G</max_memory_usage>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.3">
<test>
<settings>
<output_format_pretty_max_rows>1000000</output_format_pretty_max_rows>
<max_threads>1</max_threads>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="2">
<test>
<create_query>CREATE TABLE test_in (`a` UInt32) ENGINE = MergeTree() ORDER BY a</create_query>
<fill_query>INSERT INTO test_in SELECT number FROM numbers(500000000)</fill_query>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<settings>
<max_threads>1</max_threads>
</settings>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<settings>
<max_memory_usage>30000000000</max_memory_usage>
</settings>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<substitutions>
<substitution>
<name>param</name>

View File

@ -1,4 +1,4 @@
<test max_ignored_relative_change="0.2">
<test>
<preconditions>
<table_exists>hits_10m_single</table_exists>