2019-12-26 17:35:41 +00:00
|
|
|
-- input is table(query text, run UInt32, version int, time float)
|
|
|
|
select
|
2020-01-27 12:35:56 +00:00
|
|
|
floor(original_medians_array.time_by_version[1], 4) l,
|
|
|
|
floor(original_medians_array.time_by_version[2], 4) r,
|
|
|
|
floor((r - l) / l, 3) diff_percent,
|
2020-04-27 12:47:59 +00:00
|
|
|
floor(threshold / l, 3) threshold_percent,
|
|
|
|
query
|
2019-12-26 17:35:41 +00:00
|
|
|
from
|
|
|
|
(
|
2020-04-27 12:47:59 +00:00
|
|
|
-- quantiles of randomization distributions
|
|
|
|
select quantileExact(0.999)(abs(time_by_label[1] - time_by_label[2]) as d) threshold
|
2020-04-28 07:45:35 +00:00
|
|
|
---- uncomment to see what the distribution is really like
|
|
|
|
--, uniqExact(d) u
|
|
|
|
--, arraySort(x->x.1,
|
|
|
|
-- arrayZip(
|
|
|
|
-- (sumMap([d], [1]) as f).1,
|
|
|
|
-- f.2)) full_histogram
|
2019-12-26 17:35:41 +00:00
|
|
|
from
|
|
|
|
(
|
2020-04-27 12:47:59 +00:00
|
|
|
select virtual_run, groupArrayInsertAt(median_time, random_label) time_by_label -- make array 'random label' -> 'median time'
|
2019-12-26 17:35:41 +00:00
|
|
|
from (
|
2020-04-27 12:47:59 +00:00
|
|
|
select medianExact(time) median_time, virtual_run, random_label -- get median times, grouping by random label
|
2019-12-26 17:35:41 +00:00
|
|
|
from (
|
2020-04-27 12:47:59 +00:00
|
|
|
select *, toUInt32(rowNumberInAllBlocks() % 2) random_label -- randomly relabel measurements
|
2019-12-26 17:35:41 +00:00
|
|
|
from (
|
2020-04-27 12:47:59 +00:00
|
|
|
select time, number virtual_run
|
|
|
|
from
|
|
|
|
-- strip the query away before the join -- it might be several kB long;
|
|
|
|
(select time, run, version from table) no_query,
|
|
|
|
-- duplicate input measurements into many virtual runs
|
|
|
|
numbers(1, 100000) nn
|
|
|
|
-- for each virtual run, randomly reorder measurements
|
|
|
|
order by virtual_run, rand()
|
2019-12-26 17:35:41 +00:00
|
|
|
) virtual_runs
|
|
|
|
) relabeled
|
2020-04-27 12:47:59 +00:00
|
|
|
group by virtual_run, random_label
|
2019-12-26 17:35:41 +00:00
|
|
|
) virtual_medians
|
2020-04-27 12:47:59 +00:00
|
|
|
group by virtual_run -- aggregate by random_label
|
2019-12-26 17:35:41 +00:00
|
|
|
) virtual_medians_array
|
2020-04-27 12:47:59 +00:00
|
|
|
-- this select aggregates by virtual_run
|
2019-12-26 17:35:41 +00:00
|
|
|
) rd,
|
|
|
|
(
|
2020-04-27 12:47:59 +00:00
|
|
|
select groupArrayInsertAt(median_time, version) time_by_version
|
2019-12-26 17:35:41 +00:00
|
|
|
from
|
2020-04-27 12:47:59 +00:00
|
|
|
(
|
|
|
|
select medianExact(time) median_time, version
|
|
|
|
from table
|
|
|
|
group by version
|
|
|
|
) original_medians
|
|
|
|
) original_medians_array,
|
|
|
|
(
|
|
|
|
select any(query) query from table
|
|
|
|
) any_query,
|
|
|
|
(
|
|
|
|
select throwIf(uniq(query) != 1) from table
|
|
|
|
) check_single_query -- this subselect checks that there is only one query in the input table;
|
|
|
|
-- written this way so that it is not optimized away (#10523)
|
|
|
|
;
|