mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
calculate perf test precision thresholds from historical data
This commit is contained in:
parent
67044d69f3
commit
5da54c2745
@ -552,6 +552,63 @@ create table query_metric_stats_denorm engine File(TSVWithNamesAndTypes,
|
||||
order by test, query_index, metric_name
|
||||
;
|
||||
" 2> >(tee -a analyze/errors.log 1>&2)
|
||||
|
||||
# Fetch historical query variability thresholds from the CI database
|
||||
clickhouse-local --query "
|
||||
left join file('analyze/report-thresholds.tsv', TSV,
|
||||
'test text, report_threshold float') thresholds
|
||||
on query_metric_stats.test = thresholds.test
|
||||
"
|
||||
|
||||
if [ -v CHPC_DATABASE_URL ]
|
||||
then
|
||||
set +x # Don't show password in the log
|
||||
client=(clickhouse-client
|
||||
# Surprisingly, clickhouse-client doesn't understand --host 127.0.0.1:9000
|
||||
# so I have to extract host and port with clickhouse-local. I tried to use
|
||||
# Poco URI parser to support this in the client, but it's broken and can't
|
||||
# parse host:port.
|
||||
$(clickhouse-local --query "with '${CHPC_DATABASE_URL}' as url select '--host ' || domain(url) || ' --port ' || toString(port(url)) format TSV")
|
||||
--secure
|
||||
--user "${CHPC_DATABASE_USER}"
|
||||
--password "${CHPC_DATABASE_PASSWORD}"
|
||||
--config "right/config/client_config.xml"
|
||||
--database perftest
|
||||
--date_time_input_format=best_effort)
|
||||
|
||||
|
||||
# Precision is going to be 1.5 times worse for PRs. How do I know it? I ran this:
|
||||
# SELECT quantilesExact(0., 0.1, 0.5, 0.75, 0.95, 1.)(p / m)
|
||||
# FROM
|
||||
# (
|
||||
# SELECT
|
||||
# quantileIf(0.95)(stat_threshold, pr_number = 0) AS m,
|
||||
# quantileIf(0.95)(stat_threshold, (pr_number != 0) AND (abs(diff) < stat_threshold)) AS p
|
||||
# FROM query_metrics_v2
|
||||
# WHERE (event_date > (today() - toIntervalMonth(1))) AND (metric = 'client_time')
|
||||
# GROUP BY
|
||||
# test,
|
||||
# query_index,
|
||||
# query_display_name
|
||||
# HAVING count(*) > 100
|
||||
# )
|
||||
# The file can be empty if the server is inaccessible, so we can't use TSVWithNamesAndTypes.
|
||||
"${client[@]}" --query "
|
||||
select test, query_index,
|
||||
quantileExact(0.99)(abs(diff)) max_diff,
|
||||
quantileExactIf(0.99)(stat_threshold, abs(diff) < stat_threshold) * 1.5 max_stat_threshold,
|
||||
query_display_name
|
||||
from query_metrics_v2
|
||||
where event_date > now() - interval 1 month
|
||||
and metric = 'client_time'
|
||||
and pr_number = 0
|
||||
group by test, query_index, query_display_name
|
||||
having count(*) > 100
|
||||
" > analyze/historical-thresholds.tsv
|
||||
else
|
||||
touch analyze/historical-thresholds.tsv
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
# Analyze results
|
||||
@ -596,6 +653,26 @@ create view query_metric_stats as
|
||||
diff float, stat_threshold float')
|
||||
;
|
||||
|
||||
create table report_thresholds engine File(TSVWithNamesAndTypes, 'report/thresholds.tsv')
|
||||
as select
|
||||
query_display_names.test test, query_display_names.query_index query_index,
|
||||
ceil(greatest(0.1, historical_thresholds.max_diff,
|
||||
test_thresholds.report_threshold), 2) changed_threshold,
|
||||
ceil(greatest(0.2, historical_thresholds.max_stat_threshold,
|
||||
test_thresholds.report_threshold + 0.1), 2) unstable_threshold,
|
||||
query_display_names.query_display_name query_display_name
|
||||
from query_display_names
|
||||
left join file('analyze/historical-thresholds.tsv', TSV,
|
||||
'test text, query_index int, max_diff float, max_stat_threshold float,
|
||||
query_display_name text') historical_thresholds
|
||||
on query_display_names.test = historical_thresholds.test
|
||||
and query_display_names.query_index = historical_thresholds.query_index
|
||||
and query_display_names.query_display_name = historical_thresholds.query_display_name
|
||||
left join file('analyze/report-thresholds.tsv', TSV,
|
||||
'test text, report_threshold float') test_thresholds
|
||||
on query_display_names.test = test_thresholds.test
|
||||
;
|
||||
|
||||
-- Main statistics for queries -- query time as reported in query log.
|
||||
create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
|
||||
as select
|
||||
@ -610,23 +687,23 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
|
||||
-- uncaught regressions, because for the default 7 runs we do for PRs,
|
||||
-- the randomization distribution has only 16 values, so the max quantile
|
||||
-- is actually 0.9375.
|
||||
abs(diff) > report_threshold and abs(diff) >= stat_threshold as changed_fail,
|
||||
abs(diff) > report_threshold - 0.05 and abs(diff) >= stat_threshold as changed_show,
|
||||
abs(diff) > changed_threshold and abs(diff) >= stat_threshold as changed_fail,
|
||||
abs(diff) > changed_threshold - 0.05 and abs(diff) >= stat_threshold as changed_show,
|
||||
|
||||
not changed_fail and stat_threshold > report_threshold + 0.10 as unstable_fail,
|
||||
not changed_show and stat_threshold > report_threshold - 0.05 as unstable_show,
|
||||
not changed_fail and stat_threshold > unstable_threshold as unstable_fail,
|
||||
not changed_show and stat_threshold > unstable_threshold - 0.05 as unstable_show,
|
||||
|
||||
left, right, diff, stat_threshold,
|
||||
if(report_threshold > 0, report_threshold, 0.10) as report_threshold,
|
||||
query_metric_stats.test test, query_metric_stats.query_index query_index,
|
||||
query_display_name
|
||||
query_display_names.query_display_name query_display_name
|
||||
from query_metric_stats
|
||||
left join file('analyze/report-thresholds.tsv', TSV,
|
||||
'test text, report_threshold float') thresholds
|
||||
on query_metric_stats.test = thresholds.test
|
||||
left join query_display_names
|
||||
on query_metric_stats.test = query_display_names.test
|
||||
and query_metric_stats.query_index = query_display_names.query_index
|
||||
left join report_thresholds
|
||||
on query_display_names.test = report_thresholds.test
|
||||
and query_display_names.query_index = report_thresholds.query_index
|
||||
and query_display_names.query_display_name = report_thresholds.query_display_name
|
||||
-- 'server_time' is rounded down to ms, which might be bad for very short queries.
|
||||
-- Use 'client_time' instead.
|
||||
where metric_name = 'client_time'
|
||||
@ -889,7 +966,6 @@ create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.ts
|
||||
order by test, query_index;
|
||||
" 2> >(tee -a report/errors.log 1>&2)
|
||||
|
||||
|
||||
# Prepare source data for metrics and flamegraphs for queries that were profiled
|
||||
# by perf.py.
|
||||
for version in {right,left}
|
||||
|
@ -453,6 +453,9 @@ if args.report == 'main':
|
||||
text += tableRow(r, attrs, anchor)
|
||||
|
||||
text += tableEnd()
|
||||
|
||||
# Don't add an empty table.
|
||||
if very_unstable_queries:
|
||||
tables.append(text)
|
||||
|
||||
add_unstable_queries()
|
||||
@ -552,13 +555,13 @@ if args.report == 'main':
|
||||
message_array.append(str(slower_queries) + ' slower')
|
||||
|
||||
if unstable_partial_queries:
|
||||
unstable_queries += unstable_partial_queries
|
||||
error_tests += unstable_partial_queries
|
||||
very_unstable_queries += unstable_partial_queries
|
||||
status = 'failure'
|
||||
|
||||
# Don't show mildly unstable queries, only the very unstable ones we
|
||||
# treat as errors.
|
||||
if very_unstable_queries:
|
||||
error_tests += very_unstable_queries
|
||||
status = 'failure'
|
||||
message_array.append(str(very_unstable_queries) + ' unstable')
|
||||
|
||||
|
@ -58,7 +58,9 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context
|
||||
|
||||
structure = args[2]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
if (structure.empty())
|
||||
throw Exception("Table structure is empty", ErrorCodes::BAD_ARGUMENTS);
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Table structure is empty for table function '{}'",
|
||||
ast_function.formatForErrorMessage());
|
||||
|
||||
if (args.size() == 4)
|
||||
compression_method = args[3]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
|
||||
<settings>
|
||||
<allow_experimental_map_type>1</allow_experimental_map_type>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.3">
|
||||
<test>
|
||||
<preconditions>
|
||||
<table_exists>hits_100m_single</table_exists>
|
||||
</preconditions>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.7">
|
||||
<test>
|
||||
<settings>
|
||||
<max_memory_usage>30000000000</max_memory_usage>
|
||||
</settings>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
|
||||
<query>SELECT avg(ifNotFinite(arrayAUC(arrayMap(x -> rand(x) / 0x100000000, range(2 + rand() % 100)), arrayMap(x -> rand(x) % 2, range(2 + rand() % 100))), 0)) FROM numbers(100000)</query>
|
||||
</test>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<tags>
|
||||
<tag>search</tag>
|
||||
</tags>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<settings>
|
||||
<max_memory_usage>35G</max_memory_usage>
|
||||
</settings>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<settings>
|
||||
<max_memory_usage>15G</max_memory_usage>
|
||||
</settings>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.4">
|
||||
<test>
|
||||
<create_query>
|
||||
CREATE TABLE simple_key_direct_dictionary_source_table
|
||||
(
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(formatReadableSize(number))</query>
|
||||
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(formatReadableQuantity(number))</query>
|
||||
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(formatReadableTimeDelta(number))</query>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<substitutions>
|
||||
<substitution>
|
||||
<name>gp_hash_func</name>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.6">
|
||||
<test>
|
||||
<substitutions>
|
||||
<substitution>
|
||||
<name>hash_func</name>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<settings>
|
||||
<max_memory_usage>30000000000</max_memory_usage>
|
||||
</settings>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.4">
|
||||
<test>
|
||||
<settings>
|
||||
<max_insert_threads>8</max_insert_threads>
|
||||
</settings>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<create_query>
|
||||
CREATE TABLE simple_key_hashed_dictionary_source_table
|
||||
(
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.3">
|
||||
<test>
|
||||
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : ['a', 'b', 'c'])</query>
|
||||
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(rand() % 2 ? materialize(['Hello', 'World']) : ['a', 'b', 'c'])</query>
|
||||
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(rand() % 2 ? ['Hello', 'World'] : materialize(['a', 'b', 'c']))</query>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
|
||||
<create_query>CREATE TABLE lot_of_string_arrays_src (`id` UInt64, `col00` Array(String), `col01` Array(String), `col02` Array(String), `col03` Array(String), `col04` Array(String), `col05` Array(String), `col06` Array(String), `col07` Array(String), `col08` Array(String), `col09` Array(String), `col10` Array(String), `col11` Array(String), `col12` Array(String), `col13` Array(String), `col14` Array(String), `col15` Array(String), `col16` Array(String), `col17` Array(String), `col18` Array(String), `col19` Array(String), `col20` Array(String), `col21` Array(String), `col22` Array(String), `col23` Array(String), `col24` Array(String), `col25` Array(String), `col26` Array(String), `col27` Array(String), `col28` Array(String), `col29` Array(String), `col30` Array(String), `col31` Array(String), `col32` Array(String), `col33` Array(String), `col34` Array(String), `col35` Array(String), `col36` Array(String), `col37` Array(String), `col38` Array(String), `col39` Array(String), `col40` Array(String), `col41` Array(String), `col42` Array(String), `col43` Array(String), `col44` Array(String), `col45` Array(String), `col46` Array(String), `col47` Array(String), `col48` Array(String), `col49` Array(String)) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;</create_query>
|
||||
<create_query>CREATE TABLE lot_of_string_arrays_dst_lowcardinality (`id` UInt64, `col00` Array(LowCardinality(String)), `col01` Array(LowCardinality(String)), `col02` Array(LowCardinality(String)), `col03` Array(LowCardinality(String)), `col04` Array(LowCardinality(String)), `col05` Array(LowCardinality(String)), `col06` Array(LowCardinality(String)), `col07` Array(LowCardinality(String)), `col08` Array(LowCardinality(String)), `col09` Array(LowCardinality(String)), `col10` Array(LowCardinality(String)), `col11` Array(LowCardinality(String)), `col12` Array(LowCardinality(String)), `col13` Array(LowCardinality(String)), `col14` Array(LowCardinality(String)), `col15` Array(LowCardinality(String)), `col16` Array(LowCardinality(String)), `col17` Array(LowCardinality(String)), `col18` Array(LowCardinality(String)), `col19` Array(LowCardinality(String)), `col20` Array(LowCardinality(String)), `col21` Array(LowCardinality(String)), `col22` Array(LowCardinality(String)), `col23` Array(LowCardinality(String)), `col24` Array(LowCardinality(String)), `col25` Array(LowCardinality(String)), `col26` Array(LowCardinality(String)), `col27` Array(LowCardinality(String)), `col28` Array(LowCardinality(String)), `col29` Array(LowCardinality(String)), `col30` Array(LowCardinality(String)), `col31` Array(LowCardinality(String)), `col32` Array(LowCardinality(String)), `col33` Array(LowCardinality(String)), `col34` Array(LowCardinality(String)), `col35` Array(LowCardinality(String)), `col36` Array(LowCardinality(String)), `col37` Array(LowCardinality(String)), `col38` Array(LowCardinality(String)), `col39` Array(LowCardinality(String)), `col40` Array(LowCardinality(String)), `col41` Array(LowCardinality(String)), `col42` Array(LowCardinality(String)), `col43` Array(LowCardinality(String)), `col44` Array(LowCardinality(String)), `col45` Array(LowCardinality(String)), `col46` Array(LowCardinality(String)), `col47` Array(LowCardinality(String)), `col48` Array(LowCardinality(String)), `col49` Array(LowCardinality(String))) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;</create_query>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<create_query>CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory</create_query>
|
||||
|
||||
<fill_query>INSERT INTO ints SELECT number AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000)</fill_query>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="2">
|
||||
<test>
|
||||
<create_query>CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory</create_query>
|
||||
|
||||
<settings>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<substitutions>
|
||||
<substitution>
|
||||
<name>json</name>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<settings>
|
||||
<max_threads>1</max_threads>
|
||||
</settings>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<settings>
|
||||
<max_threads>1</max_threads>
|
||||
</settings>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.6">
|
||||
<test>
|
||||
<substitutions>
|
||||
<substitution>
|
||||
<name>func_slow</name>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.3">
|
||||
<test>
|
||||
<substitutions>
|
||||
<substitution>
|
||||
<name>format</name>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<create_query>create table test_parallel_index (x UInt64, y UInt64, z UInt64, INDEX a (y) TYPE minmax GRANULARITY 2,
|
||||
INDEX b (z) TYPE set(8) GRANULARITY 2) engine = MergeTree order by x partition by bitAnd(x, 63 * 64) settings index_granularity = 4;</create_query>
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
|
||||
<preconditions>
|
||||
<table_exists>test.hits</table_exists>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<settings>
|
||||
<!--
|
||||
Not sure why it's needed. Maybe it has something to do with the
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<settings>
|
||||
<max_threads>4</max_threads>
|
||||
<max_memory_usage>20G</max_memory_usage>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomPrintableASCII(10))</query>
|
||||
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomPrintableASCII(100))</query>
|
||||
<query>SELECT count() FROM zeros(100000) WHERE NOT ignore(randomPrintableASCII(1000))</query>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<settings>
|
||||
<allow_experimental_bigint_types>1</allow_experimental_bigint_types>
|
||||
<max_memory_usage>15G</max_memory_usage>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.3">
|
||||
<test>
|
||||
<settings>
|
||||
<output_format_pretty_max_rows>1000000</output_format_pretty_max_rows>
|
||||
<max_threads>1</max_threads>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="2">
|
||||
<test>
|
||||
<create_query>CREATE TABLE test_in (`a` UInt32) ENGINE = MergeTree() ORDER BY a</create_query>
|
||||
<fill_query>INSERT INTO test_in SELECT number FROM numbers(500000000)</fill_query>
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
|
||||
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
|
||||
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<settings>
|
||||
<max_threads>1</max_threads>
|
||||
</settings>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<settings>
|
||||
<max_memory_usage>30000000000</max_memory_usage>
|
||||
</settings>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
<substitutions>
|
||||
<substitution>
|
||||
<name>param</name>
|
||||
|
@ -1,4 +1,4 @@
|
||||
<test max_ignored_relative_change="0.2">
|
||||
<test>
|
||||
|
||||
<preconditions>
|
||||
<table_exists>hits_10m_single</table_exists>
|
||||
|
Loading…
Reference in New Issue
Block a user