
1432 lines
56 KiB
Raw Normal View History

2020-08-19 15:34:23 +00:00
set -exu
set -o pipefail
trap "exit" INT TERM
2021-03-15 18:45:57 +00:00
# The watchdog is in the separate process group, so we have to kill it separately
# if the script terminates earlier.
trap 'kill $(jobs -pr) ${watchdog_pid:-} ||:' EXIT
2020-02-27 20:02:50 +00:00
2020-03-24 17:33:18 +00:00
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
# upstream/master
# patched version
# abort_conf -- abort if some options is not recognized
# abort -- abort if something is not right in the env (i.e. per-cpu arenas does not work)
# narenas -- set them explicitly to avoid disabling per-cpu arena in env
# that returns different number of CPUs for some of the following
export MALLOC_CONF="abort_conf:true,abort:true,narenas:$(nproc --all)"
2020-08-19 15:08:23 +00:00
function wait_for_server # port, pid
for _ in {1..60}
if clickhouse-client --port "$1" --query "select 1" || ! kill -0 "$2"
sleep 1
if ! clickhouse-client --port "$1" --query "select 1"
echo "Cannot connect to ClickHouse server at $1"
return 1
if ! kill -0 "$2"
echo "Server pid '$2' is not running"
return 1
function left_or_right()
local from=$1 && shift
local basename=$1 && shift
if [ -e "$from/$basename" ]; then
echo "$from/$basename"
case "$from" in
left) echo "right/$basename" ;;
right) echo "left/$basename" ;;
function configure
2020-02-25 19:51:09 +00:00
# Use the new config for both servers, so that we can change it in a PR.
2019-12-26 21:33:10 +00:00
rm right/config/config.d/text_log.xml ||:
2020-02-25 19:51:09 +00:00
cp -rv right/config left ||:
2020-01-14 19:05:58 +00:00
# Start a temporary server to rename the tables
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
2020-01-14 19:05:58 +00:00
echo all killed
set -m # Spawn temporary in its own process groups
local setup_left_server_opts=(
# server options
# server *config* directives overrides
--path db0
--user_files_path db0/user_files
--top_level_domains_path "$(left_or_right right top_level_domains)"
--tcp_port $LEFT_SERVER_PORT
left/clickhouse-server "${setup_left_server_opts[@]}" &> setup-server-log.log &
2020-01-14 19:05:58 +00:00
kill -0 $left_pid
disown $left_pid
set +m
2020-08-19 15:08:23 +00:00
wait_for_server $LEFT_SERVER_PORT $left_pid
echo "Server for setup started"
clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||:
clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||:
2020-02-14 12:55:47 +00:00
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
2020-02-14 12:55:47 +00:00
echo all killed
2020-01-14 19:05:58 +00:00
# Make copies of the original db for both servers. Use hardlinks instead
2020-06-25 20:19:27 +00:00
# of copying to save space. Before that, remove preprocessed configs and
# system tables, because sharing them between servers with hardlinks may
2020-10-27 11:04:03 +00:00
# lead to weird effects.
2020-01-14 19:05:58 +00:00
rm -r left/db ||:
rm -r right/db ||:
2020-04-13 19:06:07 +00:00
rm -r db0/preprocessed_configs ||:
2020-06-25 20:19:27 +00:00
rm -r db0/{data,metadata}/system ||:
rm db0/status ||:
2020-01-14 19:05:58 +00:00
cp -al db0/ left/db/
cp -al db0/ right/db/
2020-02-25 19:51:09 +00:00
function restart
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
2020-02-25 19:51:09 +00:00
echo all killed
2020-01-14 19:05:58 +00:00
2020-09-25 10:19:37 +00:00
set -m # Spawn servers in their own process groups
2020-10-27 11:04:03 +00:00
local left_server_opts=(
# server options
# server *config* directives overrides
--path left/db
--user_files_path left/db/user_files
--top_level_domains_path "$(left_or_right left top_level_domains)"
--tcp_port $LEFT_SERVER_PORT
--keeper_server.tcp_port $LEFT_SERVER_KEEPER_PORT
--keeper_server.raft_configuration.server.port $LEFT_SERVER_KEEPER_RAFT_PORT
--zookeeper.node.port $LEFT_SERVER_KEEPER_PORT
--interserver_http_port $LEFT_SERVER_INTERSERVER_PORT
left/clickhouse-server "${left_server_opts[@]}" &>> left-server-log.log &
kill -0 $left_pid
disown $left_pid
local right_server_opts=(
# server options
# server *config* directives overrides
--path right/db
--user_files_path right/db/user_files
--top_level_domains_path "$(left_or_right right top_level_domains)"
--keeper_server.tcp_port $RIGHT_SERVER_KEEPER_PORT
--keeper_server.raft_configuration.server.port $RIGHT_SERVER_KEEPER_RAFT_PORT
--zookeeper.node.port $RIGHT_SERVER_KEEPER_PORT
--interserver_http_port $RIGHT_SERVER_INTERSERVER_PORT
right/clickhouse-server "${right_server_opts[@]}" &>> right-server-log.log &
kill -0 $right_pid
disown $right_pid
set +m
wait_for_server $LEFT_SERVER_PORT $left_pid
echo left ok
2020-08-19 15:08:23 +00:00
wait_for_server $RIGHT_SERVER_PORT $right_pid
echo right ok
2019-12-26 21:33:10 +00:00
2023-08-13 23:26:04 +00:00
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')"
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.build_options"
2023-08-13 23:26:04 +00:00
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')"
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.build_options"
2020-04-17 15:47:01 +00:00
# Check again that both servers we started are running -- this is important
# for running locally, when there might be some other servers started and we
# will connect to them instead.
kill -0 $left_pid
kill -0 $right_pid
function run_tests
# Just check that the script runs at all
"$script_dir/" --help > /dev/null
2020-04-30 08:36:33 +00:00
# Find the directory with test files.
if [ -v CHPC_TEST_PATH ]
2020-02-14 12:55:47 +00:00
2020-04-30 08:36:33 +00:00
# Use the explicitly set path to directory with test files.
elif [ "$PR_TO_TEST" == "0" ]
2020-04-30 08:36:33 +00:00
# When testing commits from master, use the older test files. This
# allows the tests to pass even when we add new functions and tests for
# them, that are not supported in the old revision.
2020-06-05 14:30:26 +00:00
2020-04-30 08:36:33 +00:00
# For PRs, use newer test files so we can test these changes.
2020-02-14 12:55:47 +00:00
2020-04-30 08:36:33 +00:00
# Determine which tests to run.
2020-04-23 20:18:46 +00:00
if [ -v CHPC_TEST_GREP ]
2020-02-14 12:55:47 +00:00
2020-04-30 08:36:33 +00:00
# Run only explicitly specified tests, if any.
2020-08-19 15:31:13 +00:00
# shellcheck disable=SC2010
2023-01-01 19:53:06 +00:00
test_files=($(ls "$test_prefix" | rg "$CHPC_TEST_GREP" | xargs -I{} -n1 readlink -f "$test_prefix/{}"))
elif [ "$PR_TO_TEST" -ne 0 ] \
&& [ "$(wc -l < changed-test-definitions.txt)" -gt 0 ] \
&& [ "$(wc -l < other-changed-files.txt)" -eq 0 ]
2020-04-02 18:44:58 +00:00
# If only the perf tests were changed in the PR, we will run only these
# tests. The lists of changed files are prepared in because
# it has the repository.
2021-12-13 09:58:34 +00:00
test_files=($(sed "s/tests\/performance/${test_prefix//\//\\/}/" changed-test-definitions.txt))
2020-04-30 08:36:33 +00:00
# The default -- run all tests found in the test dir.
2021-12-13 09:58:34 +00:00
test_files=($(ls "$test_prefix"/*.xml))
2020-04-02 18:44:58 +00:00
# We can filter out certain tests
if [ -v CHPC_TEST_GREP_EXCLUDE ]; then
# filter tests array in bash
2023-01-01 19:53:06 +00:00
filtered_test_files=( $( for i in ${test_files[@]} ; do echo $i ; done | rg -v ${CHPC_TEST_GREP_EXCLUDE} ) )
2021-12-13 08:56:54 +00:00
# We split perf tests into multiple checks to make them faster
# filter tests array in bash
for index in "${!test_files[@]}"; do
2021-12-13 08:56:54 +00:00
unset -v 'test_files[$index]'
# to have sequential indexes...
if [ "$run_only_changed_tests" -ne 0 ]; then
if [ ${#test_files[@]} -eq 0 ]; then
time "$script_dir/" --no-tests-run > report.html
exit 0
# For PRs w/o changes in test definitions, test only a subset of queries,
# and run them less times. If the corresponding environment variables are
# already set, keep those values.
# NOTE: too high CHPC_RUNS/CHPC_MAX_QUERIES may hit internal CI timeout.
2021-11-24 20:05:27 +00:00
# NOTE: Currently we disabled complete run even for master branch
#if [ "$PR_TO_TEST" -ne 0 ] && [ "$(wc -l < changed-test-definitions.txt)" -eq 0 ]
2020-09-15 10:44:21 +00:00
export CHPC_RUNS
2020-06-23 12:30:45 +00:00
# Determine which concurrent benchmarks to run. For now, the only test
# we run as a concurrent benchmark is 'website'. Run it as benchmark if we
# are also going to run it as a normal test.
2021-12-13 09:58:34 +00:00
for test in ${test_files[@]}; do echo "$test"; done | sed -n '/website/p' > benchmarks-to-run.txt
2020-06-23 12:30:45 +00:00
2020-04-30 08:36:33 +00:00
# Delete old report files.
for x in {test-times,wall-clock-times}.tsv
2020-04-30 08:36:33 +00:00
rm -v "$x" ||:
touch "$x"
2021-12-13 09:58:34 +00:00
# Randomize test order. BTW, it's not an array no more.
test_files=$(for f in ${test_files[@]}; do echo "$f"; done | sort -R)
2020-06-25 20:19:27 +00:00
2020-09-25 09:52:09 +00:00
# Limit profiling time to 10 minutes, not to run for too long.
2020-02-25 19:51:09 +00:00
# Run the tests.
2021-04-23 17:32:30 +00:00
total_tests=$(echo "$test_files" | wc -w)
2021-04-23 13:47:13 +00:00
2020-02-25 19:51:09 +00:00
2020-02-14 12:55:47 +00:00
for test in $test_files
2021-04-23 19:01:24 +00:00
echo "$current_test of $total_tests tests complete" > status.txt
2020-06-25 20:19:27 +00:00
# Check that both servers are alive, and restart them if they die.
clickhouse-client --port $LEFT_SERVER_PORT --query "select 1 format Null" \
2020-06-25 20:19:27 +00:00
|| { echo $test_name >> left-server-died.log ; restart ; }
clickhouse-client --port $RIGHT_SERVER_PORT --query "select 1 format Null" \
2020-06-25 20:19:27 +00:00
|| { echo $test_name >> right-server-died.log ; restart ; }
2020-02-25 19:51:09 +00:00
2020-02-27 20:02:50 +00:00
test_name=$(basename "$test" ".xml")
echo test "$test_name"
2020-02-14 19:11:46 +00:00
2020-09-25 09:52:09 +00:00
# Don't profile if we're past the time limit.
2020-10-27 11:04:03 +00:00
# Use awk because bash doesn't support floating point arithmetic.
2020-09-29 11:07:31 +00:00
profile_seconds=$(awk "BEGIN { print ($profile_seconds_left > 0 ? 10 : 0) }")
2020-09-25 09:52:09 +00:00
if rg --quiet "$(basename $test)" changed-test-definitions.txt
# Run all queries from changed test files to ensure that all new queries will be tested.
set +x
--host localhost localhost
--runs "$CHPC_RUNS"
--max-queries "$max_queries"
--profile-seconds "$profile_seconds"
TIMEFORMAT=$(printf "$test_name\t%%3R\t%%3U\t%%3S\n")
# one more subshell to suppress trace output for "set +x"
time "$script_dir/" "${argv[@]}" > "$test_name-raw.tsv" 2> "$test_name-err.log"
) 2>>wall-clock-times.tsv >/dev/null \
|| echo "Test $test_name failed with error code $?" >> "$test_name-err.log"
) 2>/dev/null
2020-09-25 09:52:09 +00:00
profile_seconds_left=$(awk -F' ' \
2020-09-28 11:44:49 +00:00
'BEGIN { s = '$profile_seconds_left'; } /^profile-total/ { s -= $2 } END { print s }' \
2020-09-25 09:52:09 +00:00
2021-04-23 13:47:13 +00:00
current_test=$((current_test + 1))
2020-02-14 12:55:47 +00:00
2020-02-14 19:11:46 +00:00
2020-02-14 12:55:47 +00:00
2020-04-21 21:53:13 +00:00
function get_profiles_watchdog
sleep 600
2020-04-21 21:53:13 +00:00
2020-04-22 19:41:40 +00:00
echo "The trace collection did not finish in time." >> profile-errors.log
2020-04-21 21:53:13 +00:00
for pid in $(pgrep -f clickhouse)
sudo gdb -p "$pid" --batch --ex "info proc all" --ex "thread apply all bt" --ex quit &> "$pid.gdb.log" &
2020-04-21 21:53:13 +00:00
2020-04-29 09:47:17 +00:00
for _ in {1..10}
2020-04-21 21:53:13 +00:00
if ! pkill -f clickhouse
sleep 1
2020-02-14 19:11:46 +00:00
function get_profiles
2020-02-14 12:55:47 +00:00
# Collect the profiles
clickhouse-client --port $LEFT_SERVER_PORT --query "system flush logs" &
clickhouse-client --port $RIGHT_SERVER_PORT --query "system flush logs" &
2020-06-25 20:19:27 +00:00
2020-04-17 15:47:01 +00:00
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > left-query-log.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: &
2020-04-17 15:47:01 +00:00
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > right-query-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > right-metric-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > right-async-metric-log.tsv ||: &
2020-02-14 12:55:47 +00:00
2020-04-24 07:19:23 +00:00
# Just check that the servers are alive so that we return a proper exit code.
# We don't consistently check the return codes of the above background jobs.
clickhouse-client --port $LEFT_SERVER_PORT --query "select 1"
clickhouse-client --port $RIGHT_SERVER_PORT --query "select 1"
2020-05-21 23:03:41 +00:00
# Build and analyze randomization distribution for all queries.
function analyze_queries
rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv ./*-report.tsv raw-queries.tsv ||:
2020-05-25 01:42:56 +00:00
rm -rf analyze ||:
mkdir analyze analyze/tmp ||:
2020-05-21 23:03:41 +00:00
2020-04-29 17:26:28 +00:00
# Split the raw test output into files suitable for analysis.
# To debug calculations only for a particular test, substitute a suitable
# wildcard here, e.g. `for test_file in modulo-raw.tsv`.
2020-08-19 15:31:13 +00:00
for test_file in *-raw.tsv
2020-04-29 17:26:28 +00:00
test_name=$(basename "$test_file" "-raw.tsv")
sed -n "s/^query\t/$test_name\t/p" < "$test_file" >> "analyze/query-runs.tsv"
2020-09-25 09:52:09 +00:00
sed -n "s/^profile\t/$test_name\t/p" < "$test_file" >> "analyze/query-profiles.tsv"
sed -n "s/^client-time\t/$test_name\t/p" < "$test_file" >> "analyze/client-times.tsv"
sed -n "s/^report-threshold\t/$test_name\t/p" < "$test_file" >> "analyze/report-thresholds.tsv"
sed -n "s/^skipped\t/$test_name\t/p" < "$test_file" >> "analyze/skipped-tests.tsv"
sed -n "s/^display-name\t/$test_name\t/p" < "$test_file" >> "analyze/query-display-names.tsv"
sed -n "s/^partial\t/$test_name\t/p" < "$test_file" >> "analyze/partial-queries.tsv"
2020-04-29 17:26:28 +00:00
2020-04-27 12:47:59 +00:00
# for each query run, prepare array of metrics from query log
2023-09-14 03:12:44 +00:00
clickhouse-local --multiquery --query "
create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
'test text, query_index int, query_id text, version UInt8, time float');
-- Separately process backward-incompatible ('partial') queries which we could only run on the new server
2020-07-03 08:39:43 +00:00
-- because they use new functions. We can't make normal stats for them, but still
-- have to show some stats so that the PR author can tweak them.
create view partial_queries as select test, query_index
from file('analyze/partial-queries.tsv', TSV,
'test text, query_index int, servers Array(int)');
create table partial_query_times engine File(TSVWithNamesAndTypes,
as select test, query_index, stddevPop(time) time_stddev, median(time) time_median
from query_runs
where (test, query_index) in partial_queries
group by test, query_index
2020-07-03 08:39:43 +00:00
-- Process queries that were run normally, on both servers.
create view left_query_log as select *
from file('left-query-log.tsv', TSVWithNamesAndTypes);
create view right_query_log as select *
from file('right-query-log.tsv', TSVWithNamesAndTypes);
2020-06-25 20:19:27 +00:00
create view query_logs as
select 0 version, query_id, ProfileEvents,
2020-08-11 11:53:25 +00:00
query_duration_ms, memory_usage from left_query_log
2020-06-25 20:19:27 +00:00
union all
select 1 version, query_id, ProfileEvents,
2020-08-11 11:53:25 +00:00
query_duration_ms, memory_usage from right_query_log
2020-06-25 20:19:27 +00:00
-- This is a single source of truth on all metrics we have for query runs. The
-- metrics include ProfileEvents from system.query_log, and query run times
-- reported by the test runner.
create table query_run_metric_arrays engine File(TSV, 'analyze/query-run-metric-arrays.tsv')
2020-06-25 20:19:27 +00:00
with (
2024-01-29 12:07:30 +00:00
-- sumMapState with the list of all keys with nullable '0' values because sumMap removes keys with default values
-- and 0::Nullable != NULL
with (select groupUniqArrayArray(mapKeys(ProfileEvents)) from query_logs) as all_names
select arrayReduce('sumMapState', [(all_names, arrayMap(x->0::Nullable(Float64), all_names))])
2020-06-25 20:19:27 +00:00
) as all_metrics
select test, query_index, version, query_id,
arrayMap(x->toNullable(toFloat64(x)), mapValues(ProfileEvents)))]
2020-06-25 20:19:27 +00:00
arrayReduce('sumMapState', [(
2020-08-11 11:53:25 +00:00
['client_time', 'server_time', 'memory_usage'],
[toNullable(toFloat64(query_runs.time)), toNullable(toFloat64(query_duration_ms / 1000.)), toNullable(toFloat64(memory_usage))]
2020-06-25 20:19:27 +00:00
)) as metrics_tuple).1 metric_names,
arrayMap(x->if(isNaN(x),0,x), metrics_tuple.2) metric_values
2020-06-25 20:19:27 +00:00
from query_logs
right join query_runs
2020-06-25 20:19:27 +00:00
on query_logs.query_id = query_runs.query_id
and query_logs.version = query_runs.version
where (test, query_index) not in partial_queries
2020-06-25 20:19:27 +00:00
-- This is just for convenience -- human-readable + easy to make plots.
create table query_run_metrics_denorm engine File(TSV, 'analyze/query-run-metrics-denorm.tsv')
as select test, query_index, metric_names, version, query_id, metric_values
from query_run_metric_arrays
array join metric_names, metric_values
order by test, query_index, metric_names, version, query_id
-- Filter out tests that don't have an even number of runs, to avoid breaking
-- the further calculations. This may happen if there was an error during the
-- test runs, e.g. the server died. It will be reported in test errors, so we
-- don't have to report it again.
create view broken_queries as
select test, query_index
from query_runs
group by test, query_index
having count(*) % 2 != 0
-- This is for statistical processing with eqmed.sql
create table query_run_metrics_for_stats engine File(
2020-06-25 20:19:27 +00:00
TSV, -- do not add header -- will parse with grep
as select test, query_index, 0 run, version,
-- For debugging, add a filter for a particular metric like this:
-- arrayFilter(m, n -> n = 'client_time', metric_values, metric_names)
-- metric_values
-- Note that further reporting may break, because the metric names are
-- not filtered.
from query_run_metric_arrays
where (test, query_index) not in broken_queries
2020-06-25 20:19:27 +00:00
order by test, query_index, run, version
-- This is the list of metric names, so that we can join them back after
-- statistical processing.
2020-06-25 20:19:27 +00:00
create table query_run_metric_names engine File(TSV, 'analyze/query-run-metric-names.tsv')
as select metric_names from query_run_metric_arrays limit 1
" 2> >(tee -a analyze/errors.log 1>&2)
2020-04-27 12:47:59 +00:00
# This is a lateral join in bash... please forgive me.
# We don't have arrayPermute(), so I have to make random permutations with
2020-04-27 12:47:59 +00:00
# `order by rand`, and it becomes really slow if I do it for more than one
# query. We also don't have lateral joins. So I just put all runs of each
# query into a separate file, and then compute randomization distribution
# for each file. I do this in parallel using GNU parallel.
( set +x # do not bloat the log
2020-04-27 12:47:59 +00:00
for prefix in $(cut -f1,2 "analyze/query-run-metrics-for-stats.tsv" | sort | uniq)
2020-04-27 12:47:59 +00:00
2020-08-19 15:31:13 +00:00
file="analyze/tmp/${prefix// /_}.tsv"
2023-01-01 19:53:06 +00:00
rg "^$prefix " "analyze/query-run-metrics-for-stats.tsv" > "$file" &
printf "%s\0\n" \
"clickhouse-local \
--file \"$file\" \
--structure 'test text, query text, run int, version UInt8, metrics Array(float)' \
--query \"$(cat "$script_dir/eqmed.sql")\" \
>> \"analyze/query-metric-stats.tsv\"" \
2>> analyze/errors.log \
>> analyze/commands.txt
2020-04-27 12:47:59 +00:00
unset IFS
2020-04-27 12:47:59 +00:00
2020-10-01 10:56:56 +00:00
# The comparison script might be bound to one NUMA node for better test
# stability, and the calculation runs out of memory because of this. Use
# all nodes.
2020-10-16 14:44:59 +00:00
numactl --show
2020-10-19 14:31:02 +00:00
numactl --cpunodebind=all --membind=all numactl --show
# Notes for parallel:
# Some queries can consume 8+ GB of memory, so it worth to limit amount of jobs
# that can be run in parallel.
# --memfree:
# will kill jobs, which is not good (and retried until --retries exceeded)
# --memsuspend:
# If the available memory falls below 2 * size, GNU parallel will suspend some of the running jobs.
numactl --cpunodebind=all --membind=all parallel -v --joblog analyze/parallel-log.txt --memsuspend 15G --null < analyze/commands.txt 2>> analyze/errors.log
2023-09-14 03:12:44 +00:00
clickhouse-local --multiquery --query "
-- Join the metric names back to the metric statistics we've calculated, and make
-- a denormalized table of them -- statistics for all metrics for all queries.
-- The WITH, ARRAY JOIN and CROSS JOIN do not like each other:
-- Because of this, we make a view with arrays first, and then apply all the
-- array joins.
create view query_metric_stat_arrays as
with (select * from file('analyze/query-run-metric-names.tsv',
TSV, 'n Array(String)')) as metric_name
select test, query_index, metric_name, left, right, diff, stat_threshold
from file('analyze/query-metric-stats.tsv', TSV, 'left Array(float),
right Array(float), diff Array(float), stat_threshold Array(float),
test text, query_index int') reports
order by test, query_index, metric_name
create table query_metric_stats_denorm engine File(TSVWithNamesAndTypes,
as select test, query_index, metric_name, left, right, diff, stat_threshold
from query_metric_stat_arrays
left array join metric_name, left, right, diff, stat_threshold
order by test, query_index, metric_name
" 2> >(tee -a analyze/errors.log 1>&2)
# Fetch historical query variability thresholds from the CI database
2022-06-07 10:00:31 +00:00
set +x # Don't show password in the log
# Surprisingly, clickhouse-client doesn't understand --host
# so I have to extract host and port with clickhouse-local. I tried to use
# Poco URI parser to support this in the client, but it's broken and can't
# parse host:port.
2022-06-07 10:00:31 +00:00
$(clickhouse-local --query "with '${CLICKHOUSE_PERFORMANCE_COMPARISON_DATABASE_URL}' as url select '--host ' || domain(url) || ' --port ' || toString(port(url)) format TSV")
2022-06-07 10:00:31 +00:00
--config "right/config/client_config.xml"
2021-06-03 17:40:05 +00:00
# Precision is going to be 1.5 times worse for PRs, because we run the queries
# less times. How do I know it? I ran this:
# SELECT quantilesExact(0., 0.1, 0.5, 0.75, 0.95, 1.)(p / m)
# (
# quantileIf(0.95)(stat_threshold, pr_number = 0) AS m,
# quantileIf(0.95)(stat_threshold, (pr_number != 0) AND (abs(diff) < stat_threshold)) AS p
# FROM query_metrics_v2
# WHERE (event_date > (today() - toIntervalMonth(1))) AND (metric = 'client_time')
# test,
# query_index,
# query_display_name
# HAVING count(*) > 100
# )
2021-06-03 17:40:05 +00:00
# The file can be empty if the server is inaccessible, so we can't use
# TSVWithNamesAndTypes.
"${client[@]}" --query "
select test, query_index,
2021-06-06 09:57:18 +00:00
quantileExact(0.99)(abs(diff)) * 1.5 AS max_diff,
quantileExactIf(0.99)(stat_threshold, abs(diff) < stat_threshold) * 1.5 AS max_stat_threshold,
from query_metrics_v2
2021-06-03 17:40:05 +00:00
-- We use results at least one week in the past, so that the current
-- changes do not immediately influence the statistics, and we have
-- some time to notice that something is wrong.
where event_date between now() - interval 1 month - interval 1 week
and now() - interval 1 week
and metric = 'client_time'
and pr_number = 0
group by test, query_index, query_display_name
having count(*) > 100
" > analyze/historical-thresholds.tsv
2021-06-04 15:27:21 +00:00
set -x
touch analyze/historical-thresholds.tsv
2020-03-02 18:47:37 +00:00
# Analyze results
2020-02-07 18:34:24 +00:00
function report
2020-04-28 07:45:35 +00:00
rm -r report ||:
2020-05-25 01:42:56 +00:00
mkdir report report/tmp ||:
2020-04-28 07:45:35 +00:00
2023-08-22 13:41:15 +00:00
rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv all-queries.tsv run-errors.tsv ||:
2020-02-17 19:32:40 +00:00
cat analyze/errors.log >> report/errors.log ||:
2020-04-28 07:45:35 +00:00
cat profile-errors.log >> report/errors.log ||:
2020-04-22 19:41:40 +00:00
2023-09-14 03:12:44 +00:00
clickhouse-local --multiquery --query "
create view query_display_names as select * from
file('analyze/query-display-names.tsv', TSV,
'test text, query_index int, query_display_name text')
2020-07-03 08:39:43 +00:00
create view partial_query_times as select * from
file('analyze/partial-query-times.tsv', TSVWithNamesAndTypes,
2021-11-01 09:13:09 +00:00
'test text, query_index int, time_stddev float, time_median double')
2020-07-03 08:39:43 +00:00
-- Report for backward-incompatible ('partial') queries that we could only run on the new server (e.g.
-- queries with new functions added in the tested PR).
create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv')
as select round(time_median, 3) time,
round(time_stddev / time_median, 3) relative_time_stddev,
test, query_index, query_display_name
2020-07-03 08:39:43 +00:00
from partial_query_times
join query_display_names using (test, query_index)
order by test, query_index
create view query_metric_stats as
select * from file('analyze/query-metric-stats-denorm.tsv',
'test text, query_index int, metric_name text, left float, right float,
diff float, stat_threshold float')
2020-06-25 20:19:27 +00:00
create table report_thresholds engine File(TSVWithNamesAndTypes, 'report/thresholds.tsv')
as select
query_display_names.test test, query_display_names.query_index query_index,
ceil(greatest(0.1, historical_thresholds.max_diff,
test_thresholds.report_threshold), 2) changed_threshold,
ceil(greatest(0.2, historical_thresholds.max_stat_threshold,
test_thresholds.report_threshold + 0.1), 2) unstable_threshold,
query_display_names.query_display_name query_display_name
from query_display_names
left join file('analyze/historical-thresholds.tsv', TSV,
'test text, query_index int, max_diff float, max_stat_threshold float,
query_display_name text') historical_thresholds
on query_display_names.test = historical_thresholds.test
and query_display_names.query_index = historical_thresholds.query_index
and query_display_names.query_display_name = historical_thresholds.query_display_name
left join file('analyze/report-thresholds.tsv', TSV,
'test text, report_threshold float') test_thresholds
on query_display_names.test = test_thresholds.test
-- Main statistics for queries -- query time as reported in query log.
2020-04-28 07:45:35 +00:00
create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
2020-03-23 09:39:12 +00:00
as select
-- It is important to have a non-strict inequality with stat_threshold
-- here. The randomization distribution is actually discrete, and when
-- the number of runs is small, the quantile we need (e.g. 0.99) turns
-- out to be the maximum value of the distribution. We can also hit this
-- maximum possible value with our test run, and this obviously means
-- that we have observed the difference to the best precision possible
-- for the given number of runs. If we use a strict equality here, we
-- will miss such cases. This happened in the wild and lead to some
-- uncaught regressions, because for the default 7 runs we do for PRs,
-- the randomization distribution has only 16 values, so the max quantile
-- is actually 0.9375.
abs(diff) > changed_threshold and abs(diff) >= stat_threshold as changed_fail,
abs(diff) > changed_threshold - 0.05 and abs(diff) >= stat_threshold as changed_show,
2020-10-27 11:04:03 +00:00
not changed_fail and stat_threshold > unstable_threshold as unstable_fail,
not changed_show and stat_threshold > unstable_threshold - 0.05 as unstable_show,
2020-10-27 11:04:03 +00:00
2020-04-28 07:45:35 +00:00
left, right, diff, stat_threshold,
2020-07-15 13:29:00 +00:00
query_metric_stats.test test, query_metric_stats.query_index query_index,
query_display_names.query_display_name query_display_name
from query_metric_stats
2020-07-03 08:39:43 +00:00
left join query_display_names
on query_metric_stats.test = query_display_names.test
and query_metric_stats.query_index = query_display_names.query_index
left join report_thresholds
on query_display_names.test = report_thresholds.test
and query_display_names.query_index = report_thresholds.query_index
and query_display_names.query_display_name = report_thresholds.query_display_name
2020-09-01 19:05:57 +00:00
-- 'server_time' is rounded down to ms, which might be bad for very short queries.
-- Use 'client_time' instead.
where metric_name = 'client_time'
order by test, query_index, metric_name
2020-04-28 07:45:35 +00:00
create table changed_perf_report engine File(TSV, 'report/changed-perf.tsv')
as with
2020-07-31 19:58:18 +00:00
-- server_time is sometimes reported as zero (if it's less than 1 ms),
-- so we have to work around this to not get an error about conversion
-- of NaN to decimal.
2020-08-05 23:02:18 +00:00
(left > right ? left / right : right / left) as times_change_float,
isFinite(times_change_float) as times_change_finite,
round(times_change_finite ? times_change_float : 1., 3) as times_change_decimal,
2020-08-05 23:02:18 +00:00
? (left > right ? '-' : '+') || toString(times_change_decimal) || 'x'
: '--' as times_change_str
round(left, 3), round(right, 3), times_change_str,
round(diff, 3), round(stat_threshold, 3),
2020-08-05 23:02:18 +00:00
changed_fail, test, query_index, query_display_name
from queries where changed_show order by abs(diff) desc;
2020-02-11 20:00:53 +00:00
create table unstable_queries_report engine File(TSV, 'report/unstable-queries.tsv')
as select
round(left, 3), round(right, 3), round(diff, 3),
round(stat_threshold, 3), unstable_fail, test, query_index, query_display_name
from queries where unstable_show order by stat_threshold desc;
2020-02-11 20:00:53 +00:00
2020-06-09 13:29:07 +00:00
2020-09-03 00:57:25 +00:00
create view test_speedup as
exp2(avg(log2(left / right))) times_speedup,
count(*) queries,
unstable + changed bad,
sum(changed_show) changed,
sum(unstable_show) unstable
2020-06-09 13:29:07 +00:00
from queries
group by test
2020-09-03 00:57:25 +00:00
order by times_speedup desc
create view total_speedup as
'Total' test,
exp2(avg(log2(times_speedup))) times_speedup,
sum(queries) queries,
unstable + changed bad,
sum(changed) changed,
sum(unstable) unstable
from test_speedup
2020-06-09 13:29:07 +00:00
create table test_perf_changes_report engine File(TSV, 'report/test-perf-changes.tsv')
as with
2020-09-03 00:57:25 +00:00
(times_speedup >= 1
? '-' || toString(round(times_speedup, 3)) || 'x'
: '+' || toString(round(1 / times_speedup, 3)) || 'x')
2020-09-03 00:57:25 +00:00
as times_speedup_str
select test, times_speedup_str, queries, bad, changed, unstable
-- Not sure what's the precedence of UNION ALL vs WHERE & ORDER BY, hence all
-- the braces.
from (
select * from total_speedup
) union all (
select * from test_speedup
(times_speedup >= 1 ? times_speedup : (1 / times_speedup)) >= 1.005
or bad
2020-09-06 23:49:07 +00:00
order by test = 'Total' desc, times_speedup desc
2020-06-09 13:29:07 +00:00
2020-02-07 18:34:24 +00:00
2020-09-03 00:57:25 +00:00
2020-07-03 08:39:43 +00:00
create view total_client_time_per_query as select *
from file('analyze/client-times.tsv', TSV,
'test text, query_index int, client float, server float');
2020-02-07 18:34:24 +00:00
2020-07-03 08:39:43 +00:00
create table wall_clock_time_per_test engine Memory as select *
from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');
2020-02-10 16:34:07 +00:00
create table test_time engine Memory as
2020-02-11 15:01:16 +00:00
select test, sum(client) total_client_time,
2020-09-03 01:42:25 +00:00
max(client) query_max,
min(client) query_min,
count(*) queries
2020-07-03 08:39:43 +00:00
from total_client_time_per_query full join queries using (test, query_index)
2020-02-11 15:01:16 +00:00
group by test;
2020-02-07 18:34:24 +00:00
create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
'test text, query_index int, query_id text, version UInt8, time float');
-- Guess the number of query runs used for this test. The number is required to
-- calculate and check the average query run time in the report.
-- We have to be careful, because we will encounter:
-- 1) backward-incompatible ('partial') queries which run only on one server
-- 3) some errors that make query run for a different number of times on a
-- particular server.
create view test_runs as
select test,
-- Default to 7 runs if there are only 'short' queries in the test, and
-- we can't determine the number of runs.
if((ceil(median(t.runs), 0) as r) != 0, r, 7) runs
from (
-- The query id is the same for both servers, so no need to divide here.
uniqExact(query_id) runs,
test, query_index
from query_runs
group by test, query_index
) t
group by test
2020-09-24 11:46:03 +00:00
create view test_times_view as
wall_clock_time_per_test.test test,
2020-02-11 15:01:16 +00:00
2020-09-24 11:46:03 +00:00
2021-03-18 14:59:49 +00:00
real / if(queries > 0, queries, 1) avg_real_per_query,
2020-09-24 11:46:03 +00:00
2020-04-29 16:27:40 +00:00
from test_time
-- wall clock times are also measured for skipped tests, so don't
-- do full join
left join wall_clock_time_per_test
on wall_clock_time_per_test.test = test_time.test
full join test_runs
on test_runs.test = test_time.test
2020-09-24 11:46:03 +00:00
-- WITH TOTALS doesn't work with INSERT SELECT, so we have to jump through these
-- hoops:
create view test_times_view_total as
'Total' test,
2021-03-18 14:59:49 +00:00
sum(real) / if(sum(queries) > 0, sum(queries), 1) avg_real_per_query,
2020-09-24 11:46:03 +00:00
-- Totaling the number of runs doesn't make sense, but use the max so
-- that the reporting script doesn't complain about queries being too
-- long.
from test_times_view
create table test_times_report engine File(TSV, 'report/test-times.tsv')
as select
2020-09-24 11:46:03 +00:00
round(real, 3),
round(total_client_time, 3),
2020-09-24 11:46:03 +00:00
round(query_max, 3),
round(avg_real_per_query, 3),
round(query_min, 3),
2020-09-24 11:46:03 +00:00
from (
select * from test_times_view
union all
select * from test_times_view_total
order by test = 'Total' desc, avg_real_per_query desc
2020-02-10 16:34:07 +00:00
-- report for all queries page, only main metric
create table all_tests_report engine File(TSV, 'report/all-queries.tsv')
as with
2020-08-05 23:02:18 +00:00
-- server_time is sometimes reported as zero (if it's less than 1 ms),
-- so we have to work around this to not get an error about conversion
-- of NaN to decimal.
(left > right ? left / right : right / left) as times_change_float,
isFinite(times_change_float) as times_change_finite,
round(times_change_finite ? times_change_float : 1., 3) as times_change_decimal,
2020-08-05 23:02:18 +00:00
? (left > right ? '-' : '+') || toString(times_change_decimal) || 'x'
: '--' as times_change_str
2020-04-28 07:45:35 +00:00
select changed_fail, unstable_fail,
round(left, 3), round(right, 3), times_change_str,
round(isFinite(diff) ? diff : 0, 3),
round(isFinite(stat_threshold) ? stat_threshold : 0, 3),
2020-07-31 19:58:18 +00:00
test, query_index, query_display_name
2020-06-08 13:57:33 +00:00
from queries order by test, query_index;
2020-04-28 07:45:35 +00:00
2020-07-03 08:39:43 +00:00
-- various compatibility data formats follow, not related to the main report
-- keep the table in old format so that we can analyze new and old data together
create table queries_old_format engine File(TSVWithNamesAndTypes, 'queries.rep')
2020-09-03 01:42:25 +00:00
as select 0 short, changed_fail, unstable_fail, left, right, diff,
2020-07-03 08:39:43 +00:00
stat_threshold, test, query_display_name query
from queries
-- new report for all queries with all metrics (no page yet)
create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.tsv') as
select metric_name, left, right, diff,
floor(left > right ? left / right : right / left, 3),
stat_threshold, test, query_index, query_display_name
from query_metric_stats
2020-07-03 08:39:43 +00:00
left join query_display_names
on query_metric_stats.test = query_display_names.test
and query_metric_stats.query_index = query_display_names.query_index
order by test, query_index;
" 2> >(tee -a report/errors.log 1>&2)
2020-03-24 17:33:18 +00:00
2020-09-25 09:52:09 +00:00
# Prepare source data for metrics and flamegraphs for queries that were profiled
# by
2020-03-24 17:33:18 +00:00
for version in {right,left}
2020-05-25 01:03:21 +00:00
rm -rf data
2023-09-14 03:12:44 +00:00
clickhouse-local --multiquery --query "
2020-09-25 09:52:09 +00:00
create view query_profiles as
with 0 as left, 1 as right
2020-09-25 09:52:09 +00:00
select * from file('analyze/query-profiles.tsv', TSV,
'test text, query_index int, query_id text, version UInt8, time float')
where version = $version
2020-02-17 19:32:40 +00:00
create view query_display_names as select * from
file('analyze/query-display-names.tsv', TSV,
'test text, query_index int, query_display_name text')
2020-02-20 16:28:21 +00:00
2020-03-24 17:33:18 +00:00
create table unstable_query_runs engine File(TSVWithNamesAndTypes,
'unstable-query-runs.$version.rep') as
2020-09-25 09:52:09 +00:00
select query_profiles.test test, query_profiles.query_index query_index,
2020-07-15 13:29:00 +00:00
query_display_name, query_id
2020-09-25 09:52:09 +00:00
from query_profiles
left join query_display_names on
2020-09-25 09:52:09 +00:00
query_profiles.test = query_display_names.test
and query_profiles.query_index = query_display_names.query_index
2020-02-17 19:32:40 +00:00
create view query_log as select *
from file('$version-query-log.tsv', TSVWithNamesAndTypes);
2020-02-20 16:28:21 +00:00
2020-03-24 17:33:18 +00:00
create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
'unstable-run-metrics.$version.rep') as
select test, query_index, query_id, value, metric
from query_log
array join
mapValues(ProfileEvents) as value,
mapKeys(ProfileEvents) as metric
join unstable_query_runs using (query_id)
2020-02-17 19:32:40 +00:00
2020-03-24 17:33:18 +00:00
create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes,
'unstable-run-metrics-2.$version.rep') as
test, query_index, query_id,
v, n
from (
test, query_index, query_id,
2020-03-17 02:41:47 +00:00
['memory_usage', 'read_bytes', 'written_bytes', 'query_duration_ms'] n,
[memory_usage, read_bytes, written_bytes, query_duration_ms] v
2020-03-24 17:33:18 +00:00
from query_log
join unstable_query_runs using (query_id)
array join v, n;
create view trace_log as select *
from file('$version-trace-log.tsv', TSVWithNamesAndTypes);
2020-05-25 01:03:21 +00:00
create view addresses_src as select addr,
-- Some functions change name between builds, e.g. '__clone' or 'clone' or
-- even '__GI__clone@@GLIBC_2.32'. This breaks differential flame graphs, so
-- filter them out here.
[name, 'clone.S (filtered by script)', 'pthread_cond_timedwait (filtered by script)']
-- this line is a subscript operator of the above array
[1 + multiSearchFirstIndex(name, ['clone.S', 'pthread_cond_timedwait'])] name
from file('$version-addresses.tsv', TSVWithNamesAndTypes);
create table addresses_join_$version engine Join(any, left, address) as
select addr address, name from addresses_src;
2020-02-20 16:28:21 +00:00
2020-03-24 17:33:18 +00:00
create table unstable_run_traces engine File(TSVWithNamesAndTypes,
'unstable-run-traces.$version.rep') as
test, query_index, query_id,
2020-03-24 17:33:18 +00:00
count() value,
2020-05-25 01:03:21 +00:00
joinGet(addresses_join_$version, 'name', arrayJoin(trace))
|| '(' || toString(trace_type) || ')' metric
from trace_log
join unstable_query_runs using query_id
group by test, query_index, query_id, metric
2020-02-17 19:32:40 +00:00
order by count() desc
2020-05-25 01:03:21 +00:00
create table stacks engine File(TSV, 'report/stacks.$version.tsv') as
2020-02-20 16:28:21 +00:00
-- first goes the key used to split the file with grep
2020-05-25 01:03:21 +00:00
test, query_index, trace_type, any(query_display_name),
2020-05-21 23:03:41 +00:00
-- next go the stacks in flamegraph format: 'func1;...;funcN count'
2020-02-20 16:28:21 +00:00
2020-05-25 01:03:21 +00:00
addr -> joinGet(addresses_join_$version, 'name', addr),
2020-02-20 16:28:21 +00:00
) readable_trace,
count() c
2020-03-24 17:33:18 +00:00
from trace_log
2020-02-20 16:28:21 +00:00
join unstable_query_runs using query_id
2020-05-25 01:03:21 +00:00
group by test, query_index, trace_type, trace
order by test, query_index, trace_type, trace
2020-02-20 16:28:21 +00:00
2020-07-31 19:58:18 +00:00
" 2> >(tee -a report/errors.log 1>&2) &
2020-03-24 17:33:18 +00:00
2020-02-11 15:01:16 +00:00
2020-05-25 01:42:56 +00:00
# Create per-query flamegraphs
2021-04-26 09:39:54 +00:00
touch report/query-files.txt
2020-02-20 16:28:21 +00:00
2020-03-24 17:33:18 +00:00
for version in {right,left}
2020-02-20 16:28:21 +00:00
2020-05-25 01:03:21 +00:00
for query in $(cut -d' ' -f1-4 "report/stacks.$version.tsv" | sort | uniq)
2020-03-24 17:33:18 +00:00
query_file=$(echo "$query" | cut -c-120 | sed 's/[/ ]/_/g')
2020-05-21 23:03:41 +00:00
echo "$query_file" >> report/query-files.txt
2020-04-21 18:46:45 +00:00
# Build separate .svg flamegraph for each query.
2020-05-21 23:03:41 +00:00
# -F is somewhat unsafe because it might match not the beginning of the
# string, but this is unlikely and escaping the query for grep is a pain.
2023-01-01 19:53:06 +00:00
rg -F "$query " "report/stacks.$version.tsv" \
2020-05-25 01:03:21 +00:00
| cut -f 5- \
2020-03-24 17:33:18 +00:00
| sed 's/\t/ /g' \
2020-05-25 01:42:56 +00:00
| tee "report/tmp/$query_file.stacks.$version.tsv" \
2020-05-21 23:03:41 +00:00
| ~/fg/ --hash > "$query_file.$version.svg" &
2020-03-24 17:33:18 +00:00
2020-02-20 16:28:21 +00:00
unset IFS
2020-05-21 23:03:41 +00:00
# Create differential flamegraphs.
2020-08-19 15:31:13 +00:00
while IFS= read -r query_file
2020-05-21 23:03:41 +00:00
2020-05-25 01:42:56 +00:00
~/fg/ "report/tmp/$query_file.stacks.left.tsv" \
"report/tmp/$query_file.stacks.right.tsv" \
| tee "report/tmp/$query_file.stacks.diff.tsv" \
2020-05-21 23:03:41 +00:00
| ~/fg/ > "$query_file.diff.svg" &
2020-08-19 15:31:13 +00:00
done < report/query-files.txt
2020-05-21 23:03:41 +00:00
2020-05-25 01:42:56 +00:00
# Create per-query files with metrics. Note that the key is different from flamegraphs.
for version in {right,left}
for query in $(cut -d' ' -f1-3 "report/metric-deviation.$version.tsv" | sort | uniq)
query_file=$(echo "$query" | cut -c-120 | sed 's/[/ ]/_/g')
# Ditto the above comment about -F.
2023-01-01 19:53:06 +00:00
rg -F "$query " "report/metric-deviation.$version.tsv" \
2020-05-25 01:42:56 +00:00
| cut -f4- > "$query_file.$version.metrics.rep" &
unset IFS
2020-07-02 07:13:02 +00:00
# Prefer to grep for clickhouse_driver exception messages, but if there are none,
# just show a couple of lines from the log.
for log in *-err.log
test=$(basename "$log" "-err.log")
2020-09-01 19:05:57 +00:00
# The second grep is a heuristic for error messages like
# "socket.timeout: timed out".
2023-01-06 21:57:47 +00:00
rg --no-filename --max-count=2 -i '\(Exception\|Error\):[^:]' "$log" \
|| rg --no-filename --max-count=2 -i '^[^ ]\+: ' "$log" \
2020-07-02 07:13:02 +00:00
|| head -2 "$log"
} | sed "s/^/$test\t/" >> run-errors.tsv ||:
2020-02-07 18:34:24 +00:00
2020-02-10 18:37:46 +00:00
2020-06-25 20:19:27 +00:00
function report_metrics
rm -rf metrics ||:
mkdir metrics
2023-09-14 03:12:44 +00:00
clickhouse-local --multiquery --query "
2020-06-25 20:19:27 +00:00
create view right_async_metric_log as
select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes)
2020-06-25 20:19:27 +00:00
-- Use the right log as time reference because it may have higher precision.
create table metrics engine File(TSV, 'metrics/metrics.tsv') as
with (select min(event_time) from right_async_metric_log) as min_time
select metric, r.event_time - min_time event_time, l.value as left, r.value as right
2020-06-25 20:19:27 +00:00
from right_async_metric_log r
asof join file('left-async-metric-log.tsv', TSVWithNamesAndTypes) l
on l.metric = r.metric and r.event_time <= l.event_time
2020-06-25 20:19:27 +00:00
order by metric, event_time
-- Show metrics that have changed
create table changes engine File(TSV, 'metrics/changes.tsv')
as select metric, left, right,
round(diff, 3), round(times_diff, 3)
2020-07-31 19:58:18 +00:00
from (
select metric, median(left) as left, median(right) as right,
(right - left) / left diff,
if(left > right, left / right, right / left) times_diff
from metrics
group by metric
2021-07-22 15:37:20 +00:00
having abs(diff) > 0.05 and isFinite(diff) and isFinite(times_diff)
2020-07-31 19:58:18 +00:00
2020-06-25 20:19:27 +00:00
order by diff desc
2020-08-19 15:31:13 +00:00
" 2> >(tee -a metrics/errors.log 1>&2)
2020-06-25 20:19:27 +00:00
for prefix in $(cut -f1 "metrics/metrics.tsv" | sort | uniq)
2023-01-01 19:53:06 +00:00
rg "^$prefix " "metrics/metrics.tsv" | cut -f2- > "$file"
2020-06-25 20:19:27 +00:00
gnuplot -e "
set datafile separator '\t';
set terminal png size 960,540;
set xtics time format '%tH:%tM';
set title '$prefix' noenhanced offset 0,-3;
set key left top;
'$file' using 1:2 with lines title 'Left'
, '$file' using 1:3 with lines title 'Right'
" \
| convert - -filter point -resize "200%" "metrics/$prefix.png" &
unset IFS
function upload_results
2021-06-03 17:40:05 +00:00
# Prepare info for the CI checks table.
2022-06-15 20:25:36 +00:00
rm -f ci-checks.tsv
2023-09-14 03:12:44 +00:00
clickhouse-local --multiquery --query "
2022-06-15 20:25:36 +00:00
create view queries as select * from file('report/queries.tsv', TSVWithNamesAndTypes);
2021-06-03 17:40:05 +00:00
create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv')
as select
2022-06-15 20:25:36 +00:00
$PR_TO_TEST :: UInt32 AS pull_request_number,
'$SHA_TO_TEST' :: LowCardinality(String) AS commit_sha,
'${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME:-Performance}' :: LowCardinality(String) AS check_name,
'$(sed -n 's/.*<!--status: \(.*\)-->/\1/p' report.html)' :: LowCardinality(String) AS check_status,
(($(date +%s) - $CHPC_CHECK_START_TIMESTAMP) * 1000) :: UInt64 AS check_duration_ms,
2021-06-08 14:12:47 +00:00
fromUnixTimestamp($CHPC_CHECK_START_TIMESTAMP) check_start_time,
test_name :: LowCardinality(String) AS test_name ,
test_status :: LowCardinality(String) AS test_status,
test_duration_ms :: UInt64 AS test_duration_ms,
2021-06-04 15:27:21 +00:00
2021-06-03 17:40:05 +00:00
: '$PR_TO_TEST' pull_request_url,
'' commit_url,
'' task_url,
'' base_ref,
'' base_repo,
'' head_ref,
'' head_repo
from (
select '' test_name,
'$(sed -n 's/.*<!--message: \(.*\)-->/\1/p' report.html)' test_status,
2021-06-04 15:27:21 +00:00
0 test_duration_ms,
2021-06-03 17:40:05 +00:00
union all
Upload time of the perf tests into artifacts as test_duration_ms Now perf test changes/failures will have two rows, row for new and row for old server. I thought about uploading only the time of the test on the new server, but because not all perf tests uploaded, you cannot always get the time of the test without the changes (i.e. from run on the upstream/master repo/branch). <details> Before: ```sql SELECT concat(test, ' #', toString(query_index)), 'slower' AS test_status, 0 AS test_duration_ms, concat('$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.', test, '.', toString(query_index)) AS report_url FROM queries WHERE (changed_fail != 0) AND (diff > 0) UNION ALL SELECT concat(test, ' #', toString(query_index)), 'unstable' AS test_status, 0 AS test_duration_ms, concat('$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#unstable-queries.', test, '.', toString(query_index)) AS report_url FROM queries WHERE unstable_fail != 0 Query id: 49dfdc9a-f549-4499-9a1a-410e5053f6c1 ┌─concat(test, ' #', toString(query_index))─┬─test_status─┬─test_duration_ms─┬─report_url─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ hashed_array_dictionary #16 │ slower │ 0 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.hashed_array_dictionary.16 │ │ ngram_distance #2 │ slower │ 0 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.2 │ │ ngram_distance #3 │ slower │ 0 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.3 │ │ ngram_distance #4 │ slower │ 0 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.4 │ └───────────────────────────────────────────┴─────────────┴──────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` After: ```sql SELECT concat(test, ' #', toString(query_index), '::', test_desc_.1) AS test_name, 'slower' AS test_status, test_desc_.2 AS test_duration_ms, concat('$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.', test, '.', toString(query_index)) AS report_url FROM queries ARRAY JOIN map('old', left, 'new', right) AS test_desc_ WHERE (changed_fail != 0) AND (diff > 0) UNION ALL SELECT concat(test, ' #', toString(query_index), '::', test_desc_.1) AS test_name, 'unstable' AS test_status, test_desc_.2 AS test_duration_ms, concat('$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#unstable-queries.', test, '.', toString(query_index)) AS report_url FROM queries ARRAY JOIN map('old', left, 'new', right) AS test_desc_ WHERE unstable_fail != 0 Query id: 20475bfd-754b-4159-aa16-7798f4720bf8 ┌─test_name────────────────────────┬─test_status─┬─test_duration_ms─┬─report_url─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ hashed_array_dictionary #16::old │ slower │ 0.2149 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.hashed_array_dictionary.16 │ │ hashed_array_dictionary #16::new │ slower │ 0.2519 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.hashed_array_dictionary.16 │ │ ngram_distance #2::old │ slower │ 0.3598 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.2 │ │ ngram_distance #2::new │ slower │ 0.4425 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.2 │ │ ngram_distance #3::old │ slower │ 0.3644 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.3 │ │ ngram_distance #3::new │ slower │ 0.4716 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.3 │ │ ngram_distance #4::old │ slower │ 0.3577 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.4 │ │ ngram_distance #4::new │ slower │ 0.4577 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.4 │ └──────────────────────────────────┴─────────────┴──────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` </details> Signed-off-by: Azat Khuzhin <>
2023-12-29 12:33:32 +00:00
test || ' #' || toString(query_index) || '::' || test_desc_.1 test_name,
'slower' test_status,
test_desc_.2*1e3 test_duration_ms,
Upload time of the perf tests into artifacts as test_duration_ms Now perf test changes/failures will have two rows, row for new and row for old server. I thought about uploading only the time of the test on the new server, but because not all perf tests uploaded, you cannot always get the time of the test without the changes (i.e. from run on the upstream/master repo/branch). <details> Before: ```sql SELECT concat(test, ' #', toString(query_index)), 'slower' AS test_status, 0 AS test_duration_ms, concat('$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.', test, '.', toString(query_index)) AS report_url FROM queries WHERE (changed_fail != 0) AND (diff > 0) UNION ALL SELECT concat(test, ' #', toString(query_index)), 'unstable' AS test_status, 0 AS test_duration_ms, concat('$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#unstable-queries.', test, '.', toString(query_index)) AS report_url FROM queries WHERE unstable_fail != 0 Query id: 49dfdc9a-f549-4499-9a1a-410e5053f6c1 ┌─concat(test, ' #', toString(query_index))─┬─test_status─┬─test_duration_ms─┬─report_url─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ hashed_array_dictionary #16 │ slower │ 0 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.hashed_array_dictionary.16 │ │ ngram_distance #2 │ slower │ 0 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.2 │ │ ngram_distance #3 │ slower │ 0 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.3 │ │ ngram_distance #4 │ slower │ 0 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.4 │ └───────────────────────────────────────────┴─────────────┴──────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` After: ```sql SELECT concat(test, ' #', toString(query_index), '::', test_desc_.1) AS test_name, 'slower' AS test_status, test_desc_.2 AS test_duration_ms, concat('$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.', test, '.', toString(query_index)) AS report_url FROM queries ARRAY JOIN map('old', left, 'new', right) AS test_desc_ WHERE (changed_fail != 0) AND (diff > 0) UNION ALL SELECT concat(test, ' #', toString(query_index), '::', test_desc_.1) AS test_name, 'unstable' AS test_status, test_desc_.2 AS test_duration_ms, concat('$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#unstable-queries.', test, '.', toString(query_index)) AS report_url FROM queries ARRAY JOIN map('old', left, 'new', right) AS test_desc_ WHERE unstable_fail != 0 Query id: 20475bfd-754b-4159-aa16-7798f4720bf8 ┌─test_name────────────────────────┬─test_status─┬─test_duration_ms─┬─report_url─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ hashed_array_dictionary #16::old │ slower │ 0.2149 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.hashed_array_dictionary.16 │ │ hashed_array_dictionary #16::new │ slower │ 0.2519 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.hashed_array_dictionary.16 │ │ ngram_distance #2::old │ slower │ 0.3598 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.2 │ │ ngram_distance #2::new │ slower │ 0.4425 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.2 │ │ ngram_distance #3::old │ slower │ 0.3644 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.3 │ │ ngram_distance #3::new │ slower │ 0.4716 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.3 │ │ ngram_distance #4::old │ slower │ 0.3577 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.4 │ │ ngram_distance #4::new │ slower │ 0.4577 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.4 │ └──────────────────────────────────┴─────────────┴──────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` </details> Signed-off-by: Azat Khuzhin <>
2023-12-29 12:33:32 +00:00
'$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.' || test || '.' || toString(query_index) report_url
from queries
array join map('old', left, 'new', right) as test_desc_
where changed_fail != 0 and diff > 0
2021-06-03 17:40:05 +00:00
union all
Upload time of the perf tests into artifacts as test_duration_ms Now perf test changes/failures will have two rows, row for new and row for old server. I thought about uploading only the time of the test on the new server, but because not all perf tests uploaded, you cannot always get the time of the test without the changes (i.e. from run on the upstream/master repo/branch). <details> Before: ```sql SELECT concat(test, ' #', toString(query_index)), 'slower' AS test_status, 0 AS test_duration_ms, concat('$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.', test, '.', toString(query_index)) AS report_url FROM queries WHERE (changed_fail != 0) AND (diff > 0) UNION ALL SELECT concat(test, ' #', toString(query_index)), 'unstable' AS test_status, 0 AS test_duration_ms, concat('$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#unstable-queries.', test, '.', toString(query_index)) AS report_url FROM queries WHERE unstable_fail != 0 Query id: 49dfdc9a-f549-4499-9a1a-410e5053f6c1 ┌─concat(test, ' #', toString(query_index))─┬─test_status─┬─test_duration_ms─┬─report_url─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ hashed_array_dictionary #16 │ slower │ 0 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.hashed_array_dictionary.16 │ │ ngram_distance #2 │ slower │ 0 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.2 │ │ ngram_distance #3 │ slower │ 0 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.3 │ │ ngram_distance #4 │ slower │ 0 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.4 │ └───────────────────────────────────────────┴─────────────┴──────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` After: ```sql SELECT concat(test, ' #', toString(query_index), '::', test_desc_.1) AS test_name, 'slower' AS test_status, test_desc_.2 AS test_duration_ms, concat('$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.', test, '.', toString(query_index)) AS report_url FROM queries ARRAY JOIN map('old', left, 'new', right) AS test_desc_ WHERE (changed_fail != 0) AND (diff > 0) UNION ALL SELECT concat(test, ' #', toString(query_index), '::', test_desc_.1) AS test_name, 'unstable' AS test_status, test_desc_.2 AS test_duration_ms, concat('$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#unstable-queries.', test, '.', toString(query_index)) AS report_url FROM queries ARRAY JOIN map('old', left, 'new', right) AS test_desc_ WHERE unstable_fail != 0 Query id: 20475bfd-754b-4159-aa16-7798f4720bf8 ┌─test_name────────────────────────┬─test_status─┬─test_duration_ms─┬─report_url─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ hashed_array_dictionary #16::old │ slower │ 0.2149 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.hashed_array_dictionary.16 │ │ hashed_array_dictionary #16::new │ slower │ 0.2519 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.hashed_array_dictionary.16 │ │ ngram_distance #2::old │ slower │ 0.3598 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.2 │ │ ngram_distance #2::new │ slower │ 0.4425 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.2 │ │ ngram_distance #3::old │ slower │ 0.3644 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.3 │ │ ngram_distance #3::new │ slower │ 0.4716 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.3 │ │ ngram_distance #4::old │ slower │ 0.3577 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.4 │ │ ngram_distance #4::new │ slower │ 0.4577 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.4 │ └──────────────────────────────────┴─────────────┴──────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` </details> Signed-off-by: Azat Khuzhin <>
2023-12-29 12:33:32 +00:00
test || ' #' || toString(query_index) || '::' || test_desc_.1 test_name,
'unstable' test_status,
test_desc_.2*1e3 test_duration_ms,
Upload time of the perf tests into artifacts as test_duration_ms Now perf test changes/failures will have two rows, row for new and row for old server. I thought about uploading only the time of the test on the new server, but because not all perf tests uploaded, you cannot always get the time of the test without the changes (i.e. from run on the upstream/master repo/branch). <details> Before: ```sql SELECT concat(test, ' #', toString(query_index)), 'slower' AS test_status, 0 AS test_duration_ms, concat('$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.', test, '.', toString(query_index)) AS report_url FROM queries WHERE (changed_fail != 0) AND (diff > 0) UNION ALL SELECT concat(test, ' #', toString(query_index)), 'unstable' AS test_status, 0 AS test_duration_ms, concat('$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#unstable-queries.', test, '.', toString(query_index)) AS report_url FROM queries WHERE unstable_fail != 0 Query id: 49dfdc9a-f549-4499-9a1a-410e5053f6c1 ┌─concat(test, ' #', toString(query_index))─┬─test_status─┬─test_duration_ms─┬─report_url─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ hashed_array_dictionary #16 │ slower │ 0 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.hashed_array_dictionary.16 │ │ ngram_distance #2 │ slower │ 0 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.2 │ │ ngram_distance #3 │ slower │ 0 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.3 │ │ ngram_distance #4 │ slower │ 0 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.4 │ └───────────────────────────────────────────┴─────────────┴──────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` After: ```sql SELECT concat(test, ' #', toString(query_index), '::', test_desc_.1) AS test_name, 'slower' AS test_status, test_desc_.2 AS test_duration_ms, concat('$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.', test, '.', toString(query_index)) AS report_url FROM queries ARRAY JOIN map('old', left, 'new', right) AS test_desc_ WHERE (changed_fail != 0) AND (diff > 0) UNION ALL SELECT concat(test, ' #', toString(query_index), '::', test_desc_.1) AS test_name, 'unstable' AS test_status, test_desc_.2 AS test_duration_ms, concat('$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#unstable-queries.', test, '.', toString(query_index)) AS report_url FROM queries ARRAY JOIN map('old', left, 'new', right) AS test_desc_ WHERE unstable_fail != 0 Query id: 20475bfd-754b-4159-aa16-7798f4720bf8 ┌─test_name────────────────────────┬─test_status─┬─test_duration_ms─┬─report_url─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ hashed_array_dictionary #16::old │ slower │ 0.2149 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.hashed_array_dictionary.16 │ │ hashed_array_dictionary #16::new │ slower │ 0.2519 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.hashed_array_dictionary.16 │ │ ngram_distance #2::old │ slower │ 0.3598 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.2 │ │ ngram_distance #2::new │ slower │ 0.4425 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.2 │ │ ngram_distance #3::old │ slower │ 0.3644 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.3 │ │ ngram_distance #3::new │ slower │ 0.4716 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.3 │ │ ngram_distance #4::old │ slower │ 0.3577 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.4 │ │ ngram_distance #4::new │ slower │ 0.4577 │$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.4 │ └──────────────────────────────────┴─────────────┴──────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` </details> Signed-off-by: Azat Khuzhin <>
2023-12-29 12:33:32 +00:00
'$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#unstable-queries.' || test || '.' || toString(query_index) report_url
from queries
array join map('old', left, 'new', right) as test_desc_
where unstable_fail != 0
2021-06-03 17:40:05 +00:00
2022-06-07 10:00:31 +00:00
echo Database for test results is not specified, will not upload them.
return 0
2020-11-04 01:19:53 +00:00
set +x # Don't show password in the log
2021-04-22 16:46:54 +00:00
# Surprisingly, clickhouse-client doesn't understand --host
# so I have to extract host and port with clickhouse-local. I tried to use
2021-04-22 16:46:54 +00:00
# Poco URI parser to support this in the client, but it's broken and can't
# parse host:port.
2022-06-07 10:00:31 +00:00
$(clickhouse-local --query "with '${CLICKHOUSE_PERFORMANCE_COMPARISON_DATABASE_URL}' as url select '--host ' || domain(url) || ' --port ' || toString(port(url)) format TSV")
2021-04-22 16:46:54 +00:00
2022-06-07 10:00:31 +00:00
2021-04-22 16:46:54 +00:00
--config "right/config/client_config.xml"
2022-06-14 17:39:16 +00:00
# CREATE TABLE IF NOT EXISTS query_metrics_v2 (
# `event_date` Date,
# `event_time` DateTime,
# `pr_number` UInt32,
# `old_sha` String,
# `new_sha` String,
# `test` LowCardinality(String),
# `query_index` UInt32,
# `query_display_name` String,
2022-06-15 20:25:36 +00:00
# `metric` LowCardinality(String),
2022-06-14 17:39:16 +00:00
# `old_value` Float64,
# `new_value` Float64,
# `diff` Float64,
# `stat_threshold` Float64
# ) ENGINE = ReplicatedMergeTree
# ORDER BY event_date
# CREATE TABLE IF NOT EXISTS run_attributes_v1 (
# `old_sha` String,
# `new_sha` String,
2022-06-15 20:25:36 +00:00
# `metric` LowCardinality(String),
2022-06-14 17:39:16 +00:00
# `metric_value` String
# ) ENGINE = ReplicatedMergeTree
# ORDER BY (old_sha, new_sha)
2022-06-10 14:04:31 +00:00
2021-04-22 16:46:54 +00:00
"${client[@]}" --query "
2020-11-06 10:45:42 +00:00
insert into query_metrics_v2
2020-11-04 01:19:53 +00:00
toDate(event_time) event_date,
toDateTime('$(cd right/ch && git show -s --format=%ci "$SHA_TO_TEST" | cut -d' ' -f-2)') event_time,
$PR_TO_TEST pr_number,
'$REF_SHA' old_sha,
'$SHA_TO_TEST' new_sha,
2022-06-15 20:25:36 +00:00
metric_name as metric,
2020-11-04 01:19:53 +00:00
from input('metric_name text, old_value float, new_value float, diff float,
ratio_display_text text, stat_threshold float,
test text, query_index int, query_display_name text')
format TSV
" < report/all-query-metrics.tsv # Don't leave whitespace after INSERT:
2021-04-22 16:46:54 +00:00
# Upload some run attributes. I use this weird form because it is the same
# form that can be used for historical data when you only have compare.log.
cat compare.log \
| sed -n '
2021-04-22 17:18:21 +00:00
s/.*Model name:[[:space:]]\+\(.*\)$/metric lscpu-model-name \1/p;
s/.*L1d cache:[[:space:]]\+\(.*\)$/metric lscpu-l1d-cache \1/p;
s/.*L1i cache:[[:space:]]\+\(.*\)$/metric lscpu-l1i-cache \1/p;
s/.*L2 cache:[[:space:]]\+\(.*\)$/metric lscpu-l2-cache \1/p;
s/.*L3 cache:[[:space:]]\+\(.*\)$/metric lscpu-l3-cache \1/p;
2021-04-22 16:46:54 +00:00
s/.*left_sha=\(.*\)$/old-sha \1/p;
s/.*right_sha=\(.*\)/new-sha \1/p' \
| awk '
BEGIN { FS = "\t"; OFS = "\t" }
/^old-sha/ { old_sha=$2 }
/^new-sha/ { new_sha=$2 }
/^metric/ { print old_sha, new_sha, $2, $3 }' \
2022-06-10 14:04:31 +00:00
| "${client[@]}" --query "INSERT INTO run_attributes_v1 FORMAT TSV"
2021-04-22 16:46:54 +00:00
# Grepping numactl results from log is too crazy, I'll just call it again.
2022-06-10 14:04:31 +00:00
"${client[@]}" --query "INSERT INTO run_attributes_v1 FORMAT TSV" <<EOF
$REF_SHA $SHA_TO_TEST $(numactl --show | sed -n 's/^cpubind:[[:space:]]\+/numactl-cpubind /p')
$REF_SHA $SHA_TO_TEST $(numactl --hardware | sed -n 's/^available:[[:space:]]\+/numactl-available /p')
2021-06-03 17:40:05 +00:00
# Also insert some data about the check into the CI checks table.
2022-03-29 19:06:50 +00:00
"${client[@]}" --query "INSERT INTO "'"'"default"'"'".checks FORMAT TSVWithNamesAndTypes" \
2021-06-03 17:40:05 +00:00
< ci-checks.tsv
set -x
2020-04-17 15:47:01 +00:00
# Check that local and client are in PATH
clickhouse-local --version > /dev/null
2020-04-17 15:47:01 +00:00
clickhouse-client --version > /dev/null
2020-02-10 18:37:46 +00:00
case "$stage" in
2020-02-14 19:11:46 +00:00
time configure
2020-10-22 12:52:43 +00:00
numactl --show ||:
numactl --hardware ||:
lscpu ||:
2020-10-21 15:06:42 +00:00
dmidecode -t 4 ||:
2020-02-14 19:11:46 +00:00
time restart
2020-03-17 16:37:09 +00:00
# Ignore the errors to collect the log and build at least some report, anyway
2020-02-25 19:51:09 +00:00
time run_tests ||:
2020-02-14 19:11:46 +00:00
2020-06-25 20:19:27 +00:00
# Check for huge pages.
cat /sys/kernel/mm/transparent_hugepage/enabled > thp-enabled.txt ||:
cat /proc/meminfo > meminfo.txt ||:
for pid in $(pgrep -f clickhouse-server)
cat "/proc/$pid/smaps" > "$pid-smaps.txt" ||:
# We had a bug where getting profiles froze sometimes, so try to save some
# logs if this happens again. Give the servers some time to collect all info,
# then trace and kill. Start in a subshell, so that both function don't
# interfere with each other's jobs through `wait`. Also make the subshell
# have its own process group, so that we can then kill it with all its child
# processes. Somehow it doesn't kill the children by itself when dying.
2020-04-21 21:53:13 +00:00
set -m
( get_profiles_watchdog ) &
set +m
# Check that the watchdog started OK.
kill -0 $watchdog_pid
2020-02-28 16:22:07 +00:00
# If the tests fail with OOM or something, still try to restart the servers
# to collect the logs. Prefer not to restart, because addresses might change
2020-04-21 21:53:13 +00:00
# and we won't be able to process trace_log data. Start in a subshell, so that
# it doesn't interfere with the watchdog through `wait`.
2020-09-23 08:21:55 +00:00
( get_profiles || { restart && get_profiles ; } ) ||:
2020-04-21 21:53:13 +00:00
# Kill the whole process group, because somehow when the subshell is killed,
# the sleep inside remains alive and orphaned.
while env kill -- -$watchdog_pid ; do sleep 1; done
2020-03-03 10:47:32 +00:00
# Stop the servers to free memory for the subsequent query analysis.
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
2020-03-03 10:47:32 +00:00
echo Servers stopped.
2020-02-10 18:37:46 +00:00
2020-02-27 19:43:43 +00:00
2020-04-28 14:26:49 +00:00
time analyze_queries ||:
2020-02-27 19:43:43 +00:00
2020-02-10 18:37:46 +00:00
2020-04-30 08:36:33 +00:00
time report ||:
2020-06-25 20:19:27 +00:00
time report_metrics ||:
2020-07-31 19:58:18 +00:00
cat metrics/errors.log >> report/errors.log ||:
2020-06-25 20:19:27 +00:00
2020-04-28 07:45:35 +00:00
time "$script_dir/" --report=all-queries > all-queries.html 2> >(tee -a report/errors.log 1>&2) ||:
2020-03-17 16:37:09 +00:00
time "$script_dir/" > report.html
2020-02-10 18:37:46 +00:00
time upload_results ||:
2020-02-10 18:37:46 +00:00
2020-04-21 21:53:13 +00:00
# Print some final debug info to help debug Weirdness, of which there is plenty.
pstree -apgT