mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
Merge pull request #21381 from ClickHouse/aku/perf-negative
fix a rare false negative in perf tests
This commit is contained in:
commit
f169be740a
@ -358,6 +358,8 @@ mkdir analyze analyze/tmp ||:
|
||||
build_log_column_definitions
|
||||
|
||||
# Split the raw test output into files suitable for analysis.
|
||||
# To debug calculations only for a particular test, substitute a suitable
|
||||
# wildcard here, e.g. `for test_file in modulo-raw.tsv`.
|
||||
for test_file in *-raw.tsv
|
||||
do
|
||||
test_name=$(basename "$test_file" "-raw.tsv")
|
||||
@ -467,7 +469,13 @@ create view broken_queries as
|
||||
create table query_run_metrics_for_stats engine File(
|
||||
TSV, -- do not add header -- will parse with grep
|
||||
'analyze/query-run-metrics-for-stats.tsv')
|
||||
as select test, query_index, 0 run, version, metric_values
|
||||
as select test, query_index, 0 run, version,
|
||||
-- For debugging, add a filter for a particular metric like this:
|
||||
-- arrayFilter(m, n -> n = 'client_time', metric_values, metric_names)
|
||||
-- metric_values
|
||||
-- Note that further reporting may break, because the metric names are
|
||||
-- not filtered.
|
||||
metric_values
|
||||
from query_run_metric_arrays
|
||||
where (test, query_index) not in broken_queries
|
||||
order by test, query_index, run, version
|
||||
@ -585,8 +593,19 @@ create view query_metric_stats as
|
||||
-- Main statistics for queries -- query time as reported in query log.
|
||||
create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
|
||||
as select
|
||||
abs(diff) > report_threshold and abs(diff) > stat_threshold as changed_fail,
|
||||
abs(diff) > report_threshold - 0.05 and abs(diff) > stat_threshold as changed_show,
|
||||
-- It is important to have a non-strict inequality with stat_threshold
|
||||
-- here. The randomization distribution is actually discrete, and when
|
||||
-- the number of runs is small, the quantile we need (e.g. 0.99) turns
|
||||
-- out to be the maximum value of the distribution. We can also hit this
|
||||
-- maximum possible value with our test run, and this obviously means
|
||||
-- that we have observed the difference to the best precision possible
|
||||
-- for the given number of runs. If we use a strict equality here, we
|
||||
-- will miss such cases. This happened in the wild and lead to some
|
||||
-- uncaught regressions, because for the default 7 runs we do for PRs,
|
||||
-- the randomization distribution has only 16 values, so the max quantile
|
||||
-- is actually 0.9375.
|
||||
abs(diff) > report_threshold and abs(diff) >= stat_threshold as changed_fail,
|
||||
abs(diff) > report_threshold - 0.05 and abs(diff) >= stat_threshold as changed_show,
|
||||
|
||||
not changed_fail and stat_threshold > report_threshold + 0.10 as unstable_fail,
|
||||
not changed_show and stat_threshold > report_threshold - 0.05 as unstable_show,
|
||||
|
@ -1,4 +1,6 @@
|
||||
-- input is table(test text, query text, run UInt32, version int, metrics Array(float))
|
||||
-- The input is table(test text, query text, run UInt32, version UInt8, metrics Array(float)).
|
||||
-- Run like this:
|
||||
-- clickhouse-local --queries-file eqmed.sql -S 'test text, query text, run UInt32, version UInt8, metrics Array(float)' --file analyze/tmp/modulo_0.tsv
|
||||
select
|
||||
arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[1] as l) l_rounded,
|
||||
arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[2] as r) r_rounded,
|
||||
@ -8,14 +10,19 @@ select
|
||||
from
|
||||
(
|
||||
-- quantiles of randomization distributions
|
||||
-- note that for small number of runs, the exact quantile might not make
|
||||
-- sense, because the last possible value of randomization distribution
|
||||
-- might take a larger percentage of distirbution (i.e. the distribution
|
||||
-- actually has discrete values, and the last step can be large).
|
||||
select quantileExactForEach(0.99)(
|
||||
arrayMap(x, y -> abs(x - y), metrics_by_label[1], metrics_by_label[2]) as d
|
||||
) threshold
|
||||
---- uncomment to see what the distribution is really like
|
||||
--, uniqExact(d.1) u
|
||||
---- Uncomment to see what the distribution is really like. This debug
|
||||
---- code only works for single (the first) metric.
|
||||
--, uniqExact(d[1]) u
|
||||
--, arraySort(x->x.1,
|
||||
-- arrayZip(
|
||||
-- (sumMap([d.1], [1]) as f).1,
|
||||
-- (sumMap([d[1]], [1]) as f).1,
|
||||
-- f.2)) full_histogram
|
||||
from
|
||||
(
|
||||
|
@ -1,7 +1,4 @@
|
||||
<test>
|
||||
|
||||
|
||||
|
||||
<query>SELECT number % 128 FROM numbers(300000000) FORMAT Null</query>
|
||||
<query>SELECT number % 255 FROM numbers(300000000) FORMAT Null</query>
|
||||
<query>SELECT number % 256 FROM numbers(300000000) FORMAT Null</query>
|
||||
|
Loading…
Reference in New Issue
Block a user