ClickHouse/tests/performance/scripts/compare.sh
Azat Khuzhin 0d71bf1411 Upload time of the perf tests into artifacts as test_duration_ms
Now perf test changes/failures will have two rows, row for new and row
for old server.

I thought about uploading only the time of the test on the new server,
but because not all perf tests uploaded, you cannot always get the time
of the test without the changes (i.e. from run on the upstream/master
repo/branch).

<details>

Before:

```sql
SELECT
    concat(test, ' #', toString(query_index)),
    'slower' AS test_status,
    0 AS test_duration_ms,
    concat('https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.', test, '.', toString(query_index)) AS report_url
FROM queries
WHERE (changed_fail != 0) AND (diff > 0)
UNION ALL
SELECT
    concat(test, ' #', toString(query_index)),
    'unstable' AS test_status,
    0 AS test_duration_ms,
    concat('https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#unstable-queries.', test, '.', toString(query_index)) AS report_url
FROM queries
WHERE unstable_fail != 0

Query id: 49dfdc9a-f549-4499-9a1a-410e5053f6c1

┌─concat(test, ' #', toString(query_index))─┬─test_status─┬─test_duration_ms─┬─report_url─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ hashed_array_dictionary #16               │ slower      │                0 │ https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.hashed_array_dictionary.16 │
│ ngram_distance #2                         │ slower      │                0 │ https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.2           │
│ ngram_distance #3                         │ slower      │                0 │ https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.3           │
│ ngram_distance #4                         │ slower      │                0 │ https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.4           │
└───────────────────────────────────────────┴─────────────┴──────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```

After:

```sql
SELECT
    concat(test, ' #', toString(query_index), '::', test_desc_.1) AS test_name,
    'slower' AS test_status,
    test_desc_.2 AS test_duration_ms,
    concat('https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.', test, '.', toString(query_index)) AS report_url
FROM queries
ARRAY JOIN map('old', left, 'new', right) AS test_desc_
WHERE (changed_fail != 0) AND (diff > 0)
UNION ALL
SELECT
    concat(test, ' #', toString(query_index), '::', test_desc_.1) AS test_name,
    'unstable' AS test_status,
    test_desc_.2 AS test_duration_ms,
    concat('https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#unstable-queries.', test, '.', toString(query_index)) AS report_url
FROM queries
ARRAY JOIN map('old', left, 'new', right) AS test_desc_
WHERE unstable_fail != 0

Query id: 20475bfd-754b-4159-aa16-7798f4720bf8

┌─test_name────────────────────────┬─test_status─┬─test_duration_ms─┬─report_url─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ hashed_array_dictionary #16::old │ slower      │           0.2149 │ https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.hashed_array_dictionary.16 │
│ hashed_array_dictionary #16::new │ slower      │           0.2519 │ https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.hashed_array_dictionary.16 │
│ ngram_distance #2::old           │ slower      │           0.3598 │ https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.2           │
│ ngram_distance #2::new           │ slower      │           0.4425 │ https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.2           │
│ ngram_distance #3::old           │ slower      │           0.3644 │ https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.3           │
│ ngram_distance #3::new           │ slower      │           0.4716 │ https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.3           │
│ ngram_distance #4::old           │ slower      │           0.3577 │ https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.4           │
│ ngram_distance #4::new           │ slower      │           0.4577 │ https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.ngram_distance.4           │
└──────────────────────────────────┴─────────────┴──────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```

</details>

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2023-12-29 13:48:41 +01:00

1420 lines
56 KiB
Bash
Executable File

#!/bin/bash
set -exu
set -o pipefail
trap "exit" INT TERM
# The watchdog is in the separate process group, so we have to kill it separately
# if the script terminates earlier.
trap 'kill $(jobs -pr) ${watchdog_pid:-} ||:' EXIT
stage=${stage:-}
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
# upstream/master
LEFT_SERVER_PORT=9001
# patched version
RIGHT_SERVER_PORT=9002
# abort_conf -- abort if some options is not recognized
# abort -- abort if something is not right in the env (i.e. per-cpu arenas does not work)
# narenas -- set them explicitly to avoid disabling per-cpu arena in env
# that returns different number of CPUs for some of the following
# _SC_NPROCESSORS_ONLN/_SC_NPROCESSORS_CONF/sched_getaffinity
export MALLOC_CONF="abort_conf:true,abort:true,narenas:$(nproc --all)"
function wait_for_server # port, pid
{
for _ in {1..60}
do
if clickhouse-client --port "$1" --query "select 1" || ! kill -0 "$2"
then
break
fi
sleep 1
done
if ! clickhouse-client --port "$1" --query "select 1"
then
echo "Cannot connect to ClickHouse server at $1"
return 1
fi
if ! kill -0 "$2"
then
echo "Server pid '$2' is not running"
return 1
fi
}
function left_or_right()
{
local from=$1 && shift
local basename=$1 && shift
if [ -e "$from/$basename" ]; then
echo "$from/$basename"
return
fi
case "$from" in
left) echo "right/$basename" ;;
right) echo "left/$basename" ;;
esac
}
function configure
{
# Use the new config for both servers, so that we can change it in a PR.
rm right/config/config.d/text_log.xml ||:
cp -rv right/config left ||:
# Start a temporary server to rename the tables
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo all killed
set -m # Spawn temporary in its own process groups
local setup_left_server_opts=(
# server options
--config-file=left/config/config.xml
--
# server *config* directives overrides
--path db0
--user_files_path db0/user_files
--top_level_domains_path "$(left_or_right right top_level_domains)"
--tcp_port $LEFT_SERVER_PORT
)
left/clickhouse-server "${setup_left_server_opts[@]}" &> setup-server-log.log &
left_pid=$!
kill -0 $left_pid
disown $left_pid
set +m
wait_for_server $LEFT_SERVER_PORT $left_pid
echo "Server for setup started"
clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||:
clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||:
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo all killed
# Make copies of the original db for both servers. Use hardlinks instead
# of copying to save space. Before that, remove preprocessed configs and
# system tables, because sharing them between servers with hardlinks may
# lead to weird effects.
rm -r left/db ||:
rm -r right/db ||:
rm -r db0/preprocessed_configs ||:
rm -r db0/{data,metadata}/system ||:
rm db0/status ||:
cp -al db0/ left/db/
cp -al db0/ right/db/
}
function restart
{
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo all killed
set -m # Spawn servers in their own process groups
local left_server_opts=(
# server options
--config-file=left/config/config.xml
--
# server *config* directives overrides
--path left/db
--user_files_path left/db/user_files
--top_level_domains_path "$(left_or_right left top_level_domains)"
--tcp_port $LEFT_SERVER_PORT
)
left/clickhouse-server "${left_server_opts[@]}" &>> left-server-log.log &
left_pid=$!
kill -0 $left_pid
disown $left_pid
local right_server_opts=(
# server options
--config-file=right/config/config.xml
--
# server *config* directives overrides
--path right/db
--user_files_path right/db/user_files
--top_level_domains_path "$(left_or_right right top_level_domains)"
--tcp_port $RIGHT_SERVER_PORT
)
right/clickhouse-server "${right_server_opts[@]}" &>> right-server-log.log &
right_pid=$!
kill -0 $right_pid
disown $right_pid
set +m
wait_for_server $LEFT_SERVER_PORT $left_pid
echo left ok
wait_for_server $RIGHT_SERVER_PORT $right_pid
echo right ok
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')"
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.build_options"
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')"
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.build_options"
# Check again that both servers we started are running -- this is important
# for running locally, when there might be some other servers started and we
# will connect to them instead.
kill -0 $left_pid
kill -0 $right_pid
}
function run_tests
{
# Just check that the script runs at all
"$script_dir/perf.py" --help > /dev/null
# Find the directory with test files.
if [ -v CHPC_TEST_PATH ]
then
# Use the explicitly set path to directory with test files.
test_prefix="$CHPC_TEST_PATH"
elif [ "$PR_TO_TEST" == "0" ]
then
# When testing commits from master, use the older test files. This
# allows the tests to pass even when we add new functions and tests for
# them, that are not supported in the old revision.
test_prefix=left/performance
else
# For PRs, use newer test files so we can test these changes.
test_prefix=right/performance
fi
run_only_changed_tests=0
# Determine which tests to run.
if [ -v CHPC_TEST_GREP ]
then
# Run only explicitly specified tests, if any.
# shellcheck disable=SC2010
test_files=($(ls "$test_prefix" | rg "$CHPC_TEST_GREP" | xargs -I{} -n1 readlink -f "$test_prefix/{}"))
elif [ "$PR_TO_TEST" -ne 0 ] \
&& [ "$(wc -l < changed-test-definitions.txt)" -gt 0 ] \
&& [ "$(wc -l < other-changed-files.txt)" -eq 0 ]
then
# If only the perf tests were changed in the PR, we will run only these
# tests. The lists of changed files are prepared in entrypoint.sh because
# it has the repository.
test_files=($(sed "s/tests\/performance/${test_prefix//\//\\/}/" changed-test-definitions.txt))
run_only_changed_tests=1
else
# The default -- run all tests found in the test dir.
test_files=($(ls "$test_prefix"/*.xml))
fi
# We can filter out certain tests
if [ -v CHPC_TEST_GREP_EXCLUDE ]; then
# filter tests array in bash https://stackoverflow.com/a/40375567
filtered_test_files=( $( for i in ${test_files[@]} ; do echo $i ; done | rg -v ${CHPC_TEST_GREP_EXCLUDE} ) )
test_files=("${filtered_test_files[@]}")
fi
# We split perf tests into multiple checks to make them faster
if [ -v CHPC_TEST_RUN_BY_HASH_TOTAL ]; then
# filter tests array in bash https://stackoverflow.com/a/40375567
for index in "${!test_files[@]}"; do
[ $(( index % CHPC_TEST_RUN_BY_HASH_TOTAL )) != "$CHPC_TEST_RUN_BY_HASH_NUM" ] && \
unset -v 'test_files[$index]'
done
# to have sequential indexes...
test_files=("${test_files[@]}")
fi
if [ "$run_only_changed_tests" -ne 0 ]; then
if [ ${#test_files[@]} -eq 0 ]; then
time "$script_dir/report.py" --no-tests-run > report.html
exit 0
fi
fi
# For PRs w/o changes in test definitions, test only a subset of queries,
# and run them less times. If the corresponding environment variables are
# already set, keep those values.
#
# NOTE: too high CHPC_RUNS/CHPC_MAX_QUERIES may hit internal CI timeout.
# NOTE: Currently we disabled complete run even for master branch
#if [ "$PR_TO_TEST" -ne 0 ] && [ "$(wc -l < changed-test-definitions.txt)" -eq 0 ]
#then
# CHPC_RUNS=${CHPC_RUNS:-7}
# CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-10}
#else
# CHPC_RUNS=${CHPC_RUNS:-13}
# CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-0}
#fi
CHPC_RUNS=${CHPC_RUNS:-7}
CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-10}
export CHPC_RUNS
export CHPC_MAX_QUERIES
# Determine which concurrent benchmarks to run. For now, the only test
# we run as a concurrent benchmark is 'website'. Run it as benchmark if we
# are also going to run it as a normal test.
for test in ${test_files[@]}; do echo "$test"; done | sed -n '/website/p' > benchmarks-to-run.txt
# Delete old report files.
for x in {test-times,wall-clock-times}.tsv
do
rm -v "$x" ||:
touch "$x"
done
# Randomize test order. BTW, it's not an array no more.
test_files=$(for f in ${test_files[@]}; do echo "$f"; done | sort -R)
# Limit profiling time to 10 minutes, not to run for too long.
profile_seconds_left=600
# Run the tests.
total_tests=$(echo "$test_files" | wc -w)
current_test=0
test_name="<none>"
for test in $test_files
do
echo "$current_test of $total_tests tests complete" > status.txt
# Check that both servers are alive, and restart them if they die.
clickhouse-client --port $LEFT_SERVER_PORT --query "select 1 format Null" \
|| { echo $test_name >> left-server-died.log ; restart ; }
clickhouse-client --port $RIGHT_SERVER_PORT --query "select 1 format Null" \
|| { echo $test_name >> right-server-died.log ; restart ; }
test_name=$(basename "$test" ".xml")
echo test "$test_name"
# Don't profile if we're past the time limit.
# Use awk because bash doesn't support floating point arithmetic.
profile_seconds=$(awk "BEGIN { print ($profile_seconds_left > 0 ? 10 : 0) }")
if rg --quiet "$(basename $test)" changed-test-definitions.txt
then
# Run all queries from changed test files to ensure that all new queries will be tested.
max_queries=0
else
max_queries=$CHPC_MAX_QUERIES
fi
(
set +x
argv=(
--host localhost localhost
--port "$LEFT_SERVER_PORT" "$RIGHT_SERVER_PORT"
--runs "$CHPC_RUNS"
--max-queries "$max_queries"
--profile-seconds "$profile_seconds"
"$test"
)
TIMEFORMAT=$(printf "$test_name\t%%3R\t%%3U\t%%3S\n")
# one more subshell to suppress trace output for "set +x"
(
time "$script_dir/perf.py" "${argv[@]}" > "$test_name-raw.tsv" 2> "$test_name-err.log"
) 2>>wall-clock-times.tsv >/dev/null \
|| echo "Test $test_name failed with error code $?" >> "$test_name-err.log"
) 2>/dev/null
profile_seconds_left=$(awk -F' ' \
'BEGIN { s = '$profile_seconds_left'; } /^profile-total/ { s -= $2 } END { print s }' \
"$test_name-raw.tsv")
current_test=$((current_test + 1))
done
wait
}
function get_profiles_watchdog
{
sleep 600
echo "The trace collection did not finish in time." >> profile-errors.log
for pid in $(pgrep -f clickhouse)
do
sudo gdb -p "$pid" --batch --ex "info proc all" --ex "thread apply all bt" --ex quit &> "$pid.gdb.log" &
done
wait
for _ in {1..10}
do
if ! pkill -f clickhouse
then
break
fi
sleep 1
done
}
function get_profiles
{
# Collect the profiles
clickhouse-client --port $LEFT_SERVER_PORT --query "system flush logs" &
clickhouse-client --port $RIGHT_SERVER_PORT --query "system flush logs" &
wait
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > left-query-log.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > right-query-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > right-metric-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > right-async-metric-log.tsv ||: &
wait
# Just check that the servers are alive so that we return a proper exit code.
# We don't consistently check the return codes of the above background jobs.
clickhouse-client --port $LEFT_SERVER_PORT --query "select 1"
clickhouse-client --port $RIGHT_SERVER_PORT --query "select 1"
}
# Build and analyze randomization distribution for all queries.
function analyze_queries
{
rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv ./*-report.tsv raw-queries.tsv ||:
rm -rf analyze ||:
mkdir analyze analyze/tmp ||:
# Split the raw test output into files suitable for analysis.
# To debug calculations only for a particular test, substitute a suitable
# wildcard here, e.g. `for test_file in modulo-raw.tsv`.
for test_file in *-raw.tsv
do
test_name=$(basename "$test_file" "-raw.tsv")
sed -n "s/^query\t/$test_name\t/p" < "$test_file" >> "analyze/query-runs.tsv"
sed -n "s/^profile\t/$test_name\t/p" < "$test_file" >> "analyze/query-profiles.tsv"
sed -n "s/^client-time\t/$test_name\t/p" < "$test_file" >> "analyze/client-times.tsv"
sed -n "s/^report-threshold\t/$test_name\t/p" < "$test_file" >> "analyze/report-thresholds.tsv"
sed -n "s/^skipped\t/$test_name\t/p" < "$test_file" >> "analyze/skipped-tests.tsv"
sed -n "s/^display-name\t/$test_name\t/p" < "$test_file" >> "analyze/query-display-names.tsv"
sed -n "s/^partial\t/$test_name\t/p" < "$test_file" >> "analyze/partial-queries.tsv"
done
# for each query run, prepare array of metrics from query log
clickhouse-local --multiquery --query "
create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
'test text, query_index int, query_id text, version UInt8, time float');
-- Separately process backward-incompatible ('partial') queries which we could only run on the new server
-- because they use new functions. We can't make normal stats for them, but still
-- have to show some stats so that the PR author can tweak them.
create view partial_queries as select test, query_index
from file('analyze/partial-queries.tsv', TSV,
'test text, query_index int, servers Array(int)');
create table partial_query_times engine File(TSVWithNamesAndTypes,
'analyze/partial-query-times.tsv')
as select test, query_index, stddevPop(time) time_stddev, median(time) time_median
from query_runs
where (test, query_index) in partial_queries
group by test, query_index
;
-- Process queries that were run normally, on both servers.
create view left_query_log as select *
from file('left-query-log.tsv', TSVWithNamesAndTypes);
create view right_query_log as select *
from file('right-query-log.tsv', TSVWithNamesAndTypes);
create view query_logs as
select 0 version, query_id, ProfileEvents,
query_duration_ms, memory_usage from left_query_log
union all
select 1 version, query_id, ProfileEvents,
query_duration_ms, memory_usage from right_query_log
;
-- This is a single source of truth on all metrics we have for query runs. The
-- metrics include ProfileEvents from system.query_log, and query run times
-- reported by the perf.py test runner.
create table query_run_metric_arrays engine File(TSV, 'analyze/query-run-metric-arrays.tsv')
as
with (
-- sumMapState with the list of all keys with '-0.' values. Negative zero is because
-- sumMap removes keys with positive zeros.
with (select groupUniqArrayArray(mapKeys(ProfileEvents)) from query_logs) as all_names
select arrayReduce('sumMapState', [(all_names, arrayMap(x->-0., all_names))])
) as all_metrics
select test, query_index, version, query_id,
(finalizeAggregation(
arrayReduce('sumMapMergeState',
[
all_metrics,
arrayReduce('sumMapState',
[(mapKeys(ProfileEvents),
arrayMap(x->toFloat64(x), mapValues(ProfileEvents)))]
),
arrayReduce('sumMapState', [(
['client_time', 'server_time', 'memory_usage'],
arrayMap(x->if(x != 0., x, -0.), [
toFloat64(query_runs.time),
toFloat64(query_duration_ms / 1000.),
toFloat64(memory_usage)]))])
]
)) as metrics_tuple).1 metric_names,
metrics_tuple.2 metric_values
from query_logs
right join query_runs
on query_logs.query_id = query_runs.query_id
and query_logs.version = query_runs.version
where (test, query_index) not in partial_queries
;
-- This is just for convenience -- human-readable + easy to make plots.
create table query_run_metrics_denorm engine File(TSV, 'analyze/query-run-metrics-denorm.tsv')
as select test, query_index, metric_names, version, query_id, metric_values
from query_run_metric_arrays
array join metric_names, metric_values
order by test, query_index, metric_names, version, query_id
;
-- Filter out tests that don't have an even number of runs, to avoid breaking
-- the further calculations. This may happen if there was an error during the
-- test runs, e.g. the server died. It will be reported in test errors, so we
-- don't have to report it again.
create view broken_queries as
select test, query_index
from query_runs
group by test, query_index
having count(*) % 2 != 0
;
-- This is for statistical processing with eqmed.sql
create table query_run_metrics_for_stats engine File(
TSV, -- do not add header -- will parse with grep
'analyze/query-run-metrics-for-stats.tsv')
as select test, query_index, 0 run, version,
-- For debugging, add a filter for a particular metric like this:
-- arrayFilter(m, n -> n = 'client_time', metric_values, metric_names)
-- metric_values
-- Note that further reporting may break, because the metric names are
-- not filtered.
metric_values
from query_run_metric_arrays
where (test, query_index) not in broken_queries
order by test, query_index, run, version
;
-- This is the list of metric names, so that we can join them back after
-- statistical processing.
create table query_run_metric_names engine File(TSV, 'analyze/query-run-metric-names.tsv')
as select metric_names from query_run_metric_arrays limit 1
;
" 2> >(tee -a analyze/errors.log 1>&2)
# This is a lateral join in bash... please forgive me.
# We don't have arrayPermute(), so I have to make random permutations with
# `order by rand`, and it becomes really slow if I do it for more than one
# query. We also don't have lateral joins. So I just put all runs of each
# query into a separate file, and then compute randomization distribution
# for each file. I do this in parallel using GNU parallel.
( set +x # do not bloat the log
IFS=$'\n'
for prefix in $(cut -f1,2 "analyze/query-run-metrics-for-stats.tsv" | sort | uniq)
do
file="analyze/tmp/${prefix// /_}.tsv"
rg "^$prefix " "analyze/query-run-metrics-for-stats.tsv" > "$file" &
printf "%s\0\n" \
"clickhouse-local \
--file \"$file\" \
--structure 'test text, query text, run int, version UInt8, metrics Array(float)' \
--query \"$(cat "$script_dir/eqmed.sql")\" \
>> \"analyze/query-metric-stats.tsv\"" \
2>> analyze/errors.log \
>> analyze/commands.txt
done
wait
unset IFS
)
# The comparison script might be bound to one NUMA node for better test
# stability, and the calculation runs out of memory because of this. Use
# all nodes.
numactl --show
numactl --cpunodebind=all --membind=all numactl --show
# Notes for parallel:
#
# Some queries can consume 8+ GB of memory, so it worth to limit amount of jobs
# that can be run in parallel.
#
# --memfree:
#
# will kill jobs, which is not good (and retried until --retries exceeded)
#
# --memsuspend:
#
# If the available memory falls below 2 * size, GNU parallel will suspend some of the running jobs.
numactl --cpunodebind=all --membind=all parallel -v --joblog analyze/parallel-log.txt --memsuspend 15G --null < analyze/commands.txt 2>> analyze/errors.log
clickhouse-local --multiquery --query "
-- Join the metric names back to the metric statistics we've calculated, and make
-- a denormalized table of them -- statistics for all metrics for all queries.
-- The WITH, ARRAY JOIN and CROSS JOIN do not like each other:
-- https://github.com/ClickHouse/ClickHouse/issues/11868
-- https://github.com/ClickHouse/ClickHouse/issues/11757
-- Because of this, we make a view with arrays first, and then apply all the
-- array joins.
create view query_metric_stat_arrays as
with (select * from file('analyze/query-run-metric-names.tsv',
TSV, 'n Array(String)')) as metric_name
select test, query_index, metric_name, left, right, diff, stat_threshold
from file('analyze/query-metric-stats.tsv', TSV, 'left Array(float),
right Array(float), diff Array(float), stat_threshold Array(float),
test text, query_index int') reports
order by test, query_index, metric_name
;
create table query_metric_stats_denorm engine File(TSVWithNamesAndTypes,
'analyze/query-metric-stats-denorm.tsv')
as select test, query_index, metric_name, left, right, diff, stat_threshold
from query_metric_stat_arrays
left array join metric_name, left, right, diff, stat_threshold
order by test, query_index, metric_name
;
" 2> >(tee -a analyze/errors.log 1>&2)
# Fetch historical query variability thresholds from the CI database
if [ -v CLICKHOUSE_PERFORMANCE_COMPARISON_DATABASE_URL ]
then
set +x # Don't show password in the log
client=(clickhouse-client
# Surprisingly, clickhouse-client doesn't understand --host 127.0.0.1:9000
# so I have to extract host and port with clickhouse-local. I tried to use
# Poco URI parser to support this in the client, but it's broken and can't
# parse host:port.
$(clickhouse-local --query "with '${CLICKHOUSE_PERFORMANCE_COMPARISON_DATABASE_URL}' as url select '--host ' || domain(url) || ' --port ' || toString(port(url)) format TSV")
--secure
--user "${CLICKHOUSE_PERFORMANCE_COMPARISON_DATABASE_USER}"
--password "${CLICKHOUSE_PERFORMANCE_COMPARISON_DATABASE_USER_PASSWORD}"
--config "right/config/client_config.xml"
--date_time_input_format=best_effort)
# Precision is going to be 1.5 times worse for PRs, because we run the queries
# less times. How do I know it? I ran this:
# SELECT quantilesExact(0., 0.1, 0.5, 0.75, 0.95, 1.)(p / m)
# FROM
# (
# SELECT
# quantileIf(0.95)(stat_threshold, pr_number = 0) AS m,
# quantileIf(0.95)(stat_threshold, (pr_number != 0) AND (abs(diff) < stat_threshold)) AS p
# FROM query_metrics_v2
# WHERE (event_date > (today() - toIntervalMonth(1))) AND (metric = 'client_time')
# GROUP BY
# test,
# query_index,
# query_display_name
# HAVING count(*) > 100
# )
#
# The file can be empty if the server is inaccessible, so we can't use
# TSVWithNamesAndTypes.
#
"${client[@]}" --query "
select test, query_index,
quantileExact(0.99)(abs(diff)) * 1.5 AS max_diff,
quantileExactIf(0.99)(stat_threshold, abs(diff) < stat_threshold) * 1.5 AS max_stat_threshold,
query_display_name
from query_metrics_v2
-- We use results at least one week in the past, so that the current
-- changes do not immediately influence the statistics, and we have
-- some time to notice that something is wrong.
where event_date between now() - interval 1 month - interval 1 week
and now() - interval 1 week
and metric = 'client_time'
and pr_number = 0
group by test, query_index, query_display_name
having count(*) > 100
" > analyze/historical-thresholds.tsv
set -x
else
touch analyze/historical-thresholds.tsv
fi
}
# Analyze results
function report
{
rm -r report ||:
mkdir report report/tmp ||:
rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv all-queries.tsv run-errors.tsv ||:
cat analyze/errors.log >> report/errors.log ||:
cat profile-errors.log >> report/errors.log ||:
clickhouse-local --multiquery --query "
create view query_display_names as select * from
file('analyze/query-display-names.tsv', TSV,
'test text, query_index int, query_display_name text')
;
create view partial_query_times as select * from
file('analyze/partial-query-times.tsv', TSVWithNamesAndTypes,
'test text, query_index int, time_stddev float, time_median double')
;
-- Report for backward-incompatible ('partial') queries that we could only run on the new server (e.g.
-- queries with new functions added in the tested PR).
create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv')
as select round(time_median, 3) time,
round(time_stddev / time_median, 3) relative_time_stddev,
test, query_index, query_display_name
from partial_query_times
join query_display_names using (test, query_index)
order by test, query_index
;
create view query_metric_stats as
select * from file('analyze/query-metric-stats-denorm.tsv',
TSVWithNamesAndTypes,
'test text, query_index int, metric_name text, left float, right float,
diff float, stat_threshold float')
;
create table report_thresholds engine File(TSVWithNamesAndTypes, 'report/thresholds.tsv')
as select
query_display_names.test test, query_display_names.query_index query_index,
ceil(greatest(0.1, historical_thresholds.max_diff,
test_thresholds.report_threshold), 2) changed_threshold,
ceil(greatest(0.2, historical_thresholds.max_stat_threshold,
test_thresholds.report_threshold + 0.1), 2) unstable_threshold,
query_display_names.query_display_name query_display_name
from query_display_names
left join file('analyze/historical-thresholds.tsv', TSV,
'test text, query_index int, max_diff float, max_stat_threshold float,
query_display_name text') historical_thresholds
on query_display_names.test = historical_thresholds.test
and query_display_names.query_index = historical_thresholds.query_index
and query_display_names.query_display_name = historical_thresholds.query_display_name
left join file('analyze/report-thresholds.tsv', TSV,
'test text, report_threshold float') test_thresholds
on query_display_names.test = test_thresholds.test
;
-- Main statistics for queries -- query time as reported in query log.
create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
as select
-- It is important to have a non-strict inequality with stat_threshold
-- here. The randomization distribution is actually discrete, and when
-- the number of runs is small, the quantile we need (e.g. 0.99) turns
-- out to be the maximum value of the distribution. We can also hit this
-- maximum possible value with our test run, and this obviously means
-- that we have observed the difference to the best precision possible
-- for the given number of runs. If we use a strict equality here, we
-- will miss such cases. This happened in the wild and lead to some
-- uncaught regressions, because for the default 7 runs we do for PRs,
-- the randomization distribution has only 16 values, so the max quantile
-- is actually 0.9375.
abs(diff) > changed_threshold and abs(diff) >= stat_threshold as changed_fail,
abs(diff) > changed_threshold - 0.05 and abs(diff) >= stat_threshold as changed_show,
not changed_fail and stat_threshold > unstable_threshold as unstable_fail,
not changed_show and stat_threshold > unstable_threshold - 0.05 as unstable_show,
left, right, diff, stat_threshold,
query_metric_stats.test test, query_metric_stats.query_index query_index,
query_display_names.query_display_name query_display_name
from query_metric_stats
left join query_display_names
on query_metric_stats.test = query_display_names.test
and query_metric_stats.query_index = query_display_names.query_index
left join report_thresholds
on query_display_names.test = report_thresholds.test
and query_display_names.query_index = report_thresholds.query_index
and query_display_names.query_display_name = report_thresholds.query_display_name
-- 'server_time' is rounded down to ms, which might be bad for very short queries.
-- Use 'client_time' instead.
where metric_name = 'client_time'
order by test, query_index, metric_name
;
create table changed_perf_report engine File(TSV, 'report/changed-perf.tsv')
as with
-- server_time is sometimes reported as zero (if it's less than 1 ms),
-- so we have to work around this to not get an error about conversion
-- of NaN to decimal.
(left > right ? left / right : right / left) as times_change_float,
isFinite(times_change_float) as times_change_finite,
round(times_change_finite ? times_change_float : 1., 3) as times_change_decimal,
times_change_finite
? (left > right ? '-' : '+') || toString(times_change_decimal) || 'x'
: '--' as times_change_str
select
round(left, 3), round(right, 3), times_change_str,
round(diff, 3), round(stat_threshold, 3),
changed_fail, test, query_index, query_display_name
from queries where changed_show order by abs(diff) desc;
create table unstable_queries_report engine File(TSV, 'report/unstable-queries.tsv')
as select
round(left, 3), round(right, 3), round(diff, 3),
round(stat_threshold, 3), unstable_fail, test, query_index, query_display_name
from queries where unstable_show order by stat_threshold desc;
create view test_speedup as
select
test,
exp2(avg(log2(left / right))) times_speedup,
count(*) queries,
unstable + changed bad,
sum(changed_show) changed,
sum(unstable_show) unstable
from queries
group by test
order by times_speedup desc
;
create view total_speedup as
select
'Total' test,
exp2(avg(log2(times_speedup))) times_speedup,
sum(queries) queries,
unstable + changed bad,
sum(changed) changed,
sum(unstable) unstable
from test_speedup
;
create table test_perf_changes_report engine File(TSV, 'report/test-perf-changes.tsv')
as with
(times_speedup >= 1
? '-' || toString(round(times_speedup, 3)) || 'x'
: '+' || toString(round(1 / times_speedup, 3)) || 'x')
as times_speedup_str
select test, times_speedup_str, queries, bad, changed, unstable
-- Not sure what's the precedence of UNION ALL vs WHERE & ORDER BY, hence all
-- the braces.
from (
(
select * from total_speedup
) union all (
select * from test_speedup
where
(times_speedup >= 1 ? times_speedup : (1 / times_speedup)) >= 1.005
or bad
)
)
order by test = 'Total' desc, times_speedup desc
;
create view total_client_time_per_query as select *
from file('analyze/client-times.tsv', TSV,
'test text, query_index int, client float, server float');
create table wall_clock_time_per_test engine Memory as select *
from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');
create table test_time engine Memory as
select test, sum(client) total_client_time,
max(client) query_max,
min(client) query_min,
count(*) queries
from total_client_time_per_query full join queries using (test, query_index)
group by test;
create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
'test text, query_index int, query_id text, version UInt8, time float');
--
-- Guess the number of query runs used for this test. The number is required to
-- calculate and check the average query run time in the report.
-- We have to be careful, because we will encounter:
-- 1) backward-incompatible ('partial') queries which run only on one server
-- 3) some errors that make query run for a different number of times on a
-- particular server.
--
create view test_runs as
select test,
-- Default to 7 runs if there are only 'short' queries in the test, and
-- we can't determine the number of runs.
if((ceil(median(t.runs), 0) as r) != 0, r, 7) runs
from (
select
-- The query id is the same for both servers, so no need to divide here.
uniqExact(query_id) runs,
test, query_index
from query_runs
group by test, query_index
) t
group by test
;
create view test_times_view as
select
wall_clock_time_per_test.test test,
real,
total_client_time,
queries,
query_max,
real / if(queries > 0, queries, 1) avg_real_per_query,
query_min,
runs
from test_time
-- wall clock times are also measured for skipped tests, so don't
-- do full join
left join wall_clock_time_per_test
on wall_clock_time_per_test.test = test_time.test
full join test_runs
on test_runs.test = test_time.test
;
-- WITH TOTALS doesn't work with INSERT SELECT, so we have to jump through these
-- hoops: https://github.com/ClickHouse/ClickHouse/issues/15227
create view test_times_view_total as
select
'Total' test,
sum(real),
sum(total_client_time),
sum(queries),
max(query_max),
sum(real) / if(sum(queries) > 0, sum(queries), 1) avg_real_per_query,
min(query_min),
-- Totaling the number of runs doesn't make sense, but use the max so
-- that the reporting script doesn't complain about queries being too
-- long.
max(runs)
from test_times_view
;
create table test_times_report engine File(TSV, 'report/test-times.tsv')
as select
test,
round(real, 3),
round(total_client_time, 3),
queries,
round(query_max, 3),
round(avg_real_per_query, 3),
round(query_min, 3),
runs
from (
select * from test_times_view
union all
select * from test_times_view_total
)
order by test = 'Total' desc, avg_real_per_query desc
;
-- report for all queries page, only main metric
create table all_tests_report engine File(TSV, 'report/all-queries.tsv')
as with
-- server_time is sometimes reported as zero (if it's less than 1 ms),
-- so we have to work around this to not get an error about conversion
-- of NaN to decimal.
(left > right ? left / right : right / left) as times_change_float,
isFinite(times_change_float) as times_change_finite,
round(times_change_finite ? times_change_float : 1., 3) as times_change_decimal,
times_change_finite
? (left > right ? '-' : '+') || toString(times_change_decimal) || 'x'
: '--' as times_change_str
select changed_fail, unstable_fail,
round(left, 3), round(right, 3), times_change_str,
round(isFinite(diff) ? diff : 0, 3),
round(isFinite(stat_threshold) ? stat_threshold : 0, 3),
test, query_index, query_display_name
from queries order by test, query_index;
--------------------------------------------------------------------------------
-- various compatibility data formats follow, not related to the main report
-- keep the table in old format so that we can analyze new and old data together
create table queries_old_format engine File(TSVWithNamesAndTypes, 'queries.rep')
as select 0 short, changed_fail, unstable_fail, left, right, diff,
stat_threshold, test, query_display_name query
from queries
;
-- new report for all queries with all metrics (no page yet)
create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.tsv') as
select metric_name, left, right, diff,
floor(left > right ? left / right : right / left, 3),
stat_threshold, test, query_index, query_display_name
from query_metric_stats
left join query_display_names
on query_metric_stats.test = query_display_names.test
and query_metric_stats.query_index = query_display_names.query_index
order by test, query_index;
" 2> >(tee -a report/errors.log 1>&2)
# Prepare source data for metrics and flamegraphs for queries that were profiled
# by perf.py.
for version in {right,left}
do
rm -rf data
clickhouse-local --multiquery --query "
create view query_profiles as
with 0 as left, 1 as right
select * from file('analyze/query-profiles.tsv', TSV,
'test text, query_index int, query_id text, version UInt8, time float')
where version = $version
;
create view query_display_names as select * from
file('analyze/query-display-names.tsv', TSV,
'test text, query_index int, query_display_name text')
;
create table unstable_query_runs engine File(TSVWithNamesAndTypes,
'unstable-query-runs.$version.rep') as
select query_profiles.test test, query_profiles.query_index query_index,
query_display_name, query_id
from query_profiles
left join query_display_names on
query_profiles.test = query_display_names.test
and query_profiles.query_index = query_display_names.query_index
;
create view query_log as select *
from file('$version-query-log.tsv', TSVWithNamesAndTypes);
create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
'unstable-run-metrics.$version.rep') as
select test, query_index, query_id, value, metric
from query_log
array join
mapValues(ProfileEvents) as value,
mapKeys(ProfileEvents) as metric
join unstable_query_runs using (query_id)
;
create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes,
'unstable-run-metrics-2.$version.rep') as
select
test, query_index, query_id,
v, n
from (
select
test, query_index, query_id,
['memory_usage', 'read_bytes', 'written_bytes', 'query_duration_ms'] n,
[memory_usage, read_bytes, written_bytes, query_duration_ms] v
from query_log
join unstable_query_runs using (query_id)
)
array join v, n;
create view trace_log as select *
from file('$version-trace-log.tsv', TSVWithNamesAndTypes);
create view addresses_src as select addr,
-- Some functions change name between builds, e.g. '__clone' or 'clone' or
-- even '__GI__clone@@GLIBC_2.32'. This breaks differential flame graphs, so
-- filter them out here.
[name, 'clone.S (filtered by script)', 'pthread_cond_timedwait (filtered by script)']
-- this line is a subscript operator of the above array
[1 + multiSearchFirstIndex(name, ['clone.S', 'pthread_cond_timedwait'])] name
from file('$version-addresses.tsv', TSVWithNamesAndTypes);
create table addresses_join_$version engine Join(any, left, address) as
select addr address, name from addresses_src;
create table unstable_run_traces engine File(TSVWithNamesAndTypes,
'unstable-run-traces.$version.rep') as
select
test, query_index, query_id,
count() value,
joinGet(addresses_join_$version, 'name', arrayJoin(trace))
|| '(' || toString(trace_type) || ')' metric
from trace_log
join unstable_query_runs using query_id
group by test, query_index, query_id, metric
order by count() desc
;
create table stacks engine File(TSV, 'report/stacks.$version.tsv') as
select
-- first goes the key used to split the file with grep
test, query_index, trace_type, any(query_display_name),
-- next go the stacks in flamegraph format: 'func1;...;funcN count'
arrayStringConcat(
arrayMap(
addr -> joinGet(addresses_join_$version, 'name', addr),
arrayReverse(trace)
),
';'
) readable_trace,
count() c
from trace_log
join unstable_query_runs using query_id
group by test, query_index, trace_type, trace
order by test, query_index, trace_type, trace
;
" 2> >(tee -a report/errors.log 1>&2) &
done
wait
# Create per-query flamegraphs
touch report/query-files.txt
IFS=$'\n'
for version in {right,left}
do
for query in $(cut -d' ' -f1-4 "report/stacks.$version.tsv" | sort | uniq)
do
query_file=$(echo "$query" | cut -c-120 | sed 's/[/ ]/_/g')
echo "$query_file" >> report/query-files.txt
# Build separate .svg flamegraph for each query.
# -F is somewhat unsafe because it might match not the beginning of the
# string, but this is unlikely and escaping the query for grep is a pain.
rg -F "$query " "report/stacks.$version.tsv" \
| cut -f 5- \
| sed 's/\t/ /g' \
| tee "report/tmp/$query_file.stacks.$version.tsv" \
| ~/fg/flamegraph.pl --hash > "$query_file.$version.svg" &
done
done
wait
unset IFS
# Create differential flamegraphs.
while IFS= read -r query_file
do
~/fg/difffolded.pl "report/tmp/$query_file.stacks.left.tsv" \
"report/tmp/$query_file.stacks.right.tsv" \
| tee "report/tmp/$query_file.stacks.diff.tsv" \
| ~/fg/flamegraph.pl > "$query_file.diff.svg" &
done < report/query-files.txt
wait
# Create per-query files with metrics. Note that the key is different from flamegraphs.
IFS=$'\n'
for version in {right,left}
do
for query in $(cut -d' ' -f1-3 "report/metric-deviation.$version.tsv" | sort | uniq)
do
query_file=$(echo "$query" | cut -c-120 | sed 's/[/ ]/_/g')
# Ditto the above comment about -F.
rg -F "$query " "report/metric-deviation.$version.tsv" \
| cut -f4- > "$query_file.$version.metrics.rep" &
done
done
wait
unset IFS
# Prefer to grep for clickhouse_driver exception messages, but if there are none,
# just show a couple of lines from the log.
for log in *-err.log
do
test=$(basename "$log" "-err.log")
{
# The second grep is a heuristic for error messages like
# "socket.timeout: timed out".
rg --no-filename --max-count=2 -i '\(Exception\|Error\):[^:]' "$log" \
|| rg --no-filename --max-count=2 -i '^[^ ]\+: ' "$log" \
|| head -2 "$log"
} | sed "s/^/$test\t/" >> run-errors.tsv ||:
done
}
function report_metrics
{
rm -rf metrics ||:
mkdir metrics
clickhouse-local --multiquery --query "
create view right_async_metric_log as
select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes)
;
-- Use the right log as time reference because it may have higher precision.
create table metrics engine File(TSV, 'metrics/metrics.tsv') as
with (select min(event_time) from right_async_metric_log) as min_time
select metric, r.event_time - min_time event_time, l.value as left, r.value as right
from right_async_metric_log r
asof join file('left-async-metric-log.tsv', TSVWithNamesAndTypes) l
on l.metric = r.metric and r.event_time <= l.event_time
order by metric, event_time
;
-- Show metrics that have changed
create table changes engine File(TSV, 'metrics/changes.tsv')
as select metric, left, right,
round(diff, 3), round(times_diff, 3)
from (
select metric, median(left) as left, median(right) as right,
(right - left) / left diff,
if(left > right, left / right, right / left) times_diff
from metrics
group by metric
having abs(diff) > 0.05 and isFinite(diff) and isFinite(times_diff)
)
order by diff desc
;
" 2> >(tee -a metrics/errors.log 1>&2)
IFS=$'\n'
for prefix in $(cut -f1 "metrics/metrics.tsv" | sort | uniq)
do
file="metrics/$prefix.tsv"
rg "^$prefix " "metrics/metrics.tsv" | cut -f2- > "$file"
gnuplot -e "
set datafile separator '\t';
set terminal png size 960,540;
set xtics time format '%tH:%tM';
set title '$prefix' noenhanced offset 0,-3;
set key left top;
plot
'$file' using 1:2 with lines title 'Left'
, '$file' using 1:3 with lines title 'Right'
;
" \
| convert - -filter point -resize "200%" "metrics/$prefix.png" &
done
wait
unset IFS
}
function upload_results
{
# Prepare info for the CI checks table.
rm -f ci-checks.tsv
clickhouse-local --multiquery --query "
create view queries as select * from file('report/queries.tsv', TSVWithNamesAndTypes);
create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv')
as select
$PR_TO_TEST :: UInt32 AS pull_request_number,
'$SHA_TO_TEST' :: LowCardinality(String) AS commit_sha,
'${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME:-Performance}' :: LowCardinality(String) AS check_name,
'$(sed -n 's/.*<!--status: \(.*\)-->/\1/p' report.html)' :: LowCardinality(String) AS check_status,
(($(date +%s) - $CHPC_CHECK_START_TIMESTAMP) * 1000) :: UInt64 AS check_duration_ms,
fromUnixTimestamp($CHPC_CHECK_START_TIMESTAMP) check_start_time,
test_name :: LowCardinality(String) AS test_name ,
test_status :: LowCardinality(String) AS test_status,
test_duration_ms :: UInt64 AS test_duration_ms,
report_url,
$PR_TO_TEST = 0
? 'https://github.com/ClickHouse/ClickHouse/commit/$SHA_TO_TEST'
: 'https://github.com/ClickHouse/ClickHouse/pull/$PR_TO_TEST' pull_request_url,
'' commit_url,
'' task_url,
'' base_ref,
'' base_repo,
'' head_ref,
'' head_repo
from (
select '' test_name,
'$(sed -n 's/.*<!--message: \(.*\)-->/\1/p' report.html)' test_status,
0 test_duration_ms,
'https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#fail1' report_url
union all
select
test || ' #' || toString(query_index) || '::' || test_desc_.1 test_name,
'slower' test_status,
test_desc_.2 test_duration_ms,
'https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#changes-in-performance.' || test || '.' || toString(query_index) report_url
from queries
array join map('old', left, 'new', right) as test_desc_
where changed_fail != 0 and diff > 0
union all
select
test || ' #' || toString(query_index) || '::' || test_desc_.1 test_name,
'unstable' test_status,
test_desc_.2 test_duration_ms,
'https://s3.amazonaws.com/clickhouse-test-reports/$PR_TO_TEST/$SHA_TO_TEST/${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX}/report.html#unstable-queries.' || test || '.' || toString(query_index) report_url
from queries
array join map('old', left, 'new', right) as test_desc_
where unstable_fail != 0
)
;
"
if ! [ -v CLICKHOUSE_PERFORMANCE_COMPARISON_DATABASE_URL ]
then
echo Database for test results is not specified, will not upload them.
return 0
fi
set +x # Don't show password in the log
client=(clickhouse-client
# Surprisingly, clickhouse-client doesn't understand --host 127.0.0.1:9000
# so I have to extract host and port with clickhouse-local. I tried to use
# Poco URI parser to support this in the client, but it's broken and can't
# parse host:port.
$(clickhouse-local --query "with '${CLICKHOUSE_PERFORMANCE_COMPARISON_DATABASE_URL}' as url select '--host ' || domain(url) || ' --port ' || toString(port(url)) format TSV")
--secure
--user "${CLICKHOUSE_PERFORMANCE_COMPARISON_DATABASE_USER}"
--password "${CLICKHOUSE_PERFORMANCE_COMPARISON_DATABASE_USER_PASSWORD}"
--config "right/config/client_config.xml"
--date_time_input_format=best_effort)
# CREATE TABLE IF NOT EXISTS query_metrics_v2 (
# `event_date` Date,
# `event_time` DateTime,
# `pr_number` UInt32,
# `old_sha` String,
# `new_sha` String,
# `test` LowCardinality(String),
# `query_index` UInt32,
# `query_display_name` String,
# `metric` LowCardinality(String),
# `old_value` Float64,
# `new_value` Float64,
# `diff` Float64,
# `stat_threshold` Float64
# ) ENGINE = ReplicatedMergeTree
# ORDER BY event_date
# CREATE TABLE IF NOT EXISTS run_attributes_v1 (
# `old_sha` String,
# `new_sha` String,
# `metric` LowCardinality(String),
# `metric_value` String
# ) ENGINE = ReplicatedMergeTree
# ORDER BY (old_sha, new_sha)
"${client[@]}" --query "
insert into query_metrics_v2
select
toDate(event_time) event_date,
toDateTime('$(cd right/ch && git show -s --format=%ci "$SHA_TO_TEST" | cut -d' ' -f-2)') event_time,
$PR_TO_TEST pr_number,
'$REF_SHA' old_sha,
'$SHA_TO_TEST' new_sha,
test,
query_index,
query_display_name,
metric_name as metric,
old_value,
new_value,
diff,
stat_threshold
from input('metric_name text, old_value float, new_value float, diff float,
ratio_display_text text, stat_threshold float,
test text, query_index int, query_display_name text')
format TSV
" < report/all-query-metrics.tsv # Don't leave whitespace after INSERT: https://github.com/ClickHouse/ClickHouse/issues/16652
# Upload some run attributes. I use this weird form because it is the same
# form that can be used for historical data when you only have compare.log.
cat compare.log \
| sed -n '
s/.*Model name:[[:space:]]\+\(.*\)$/metric lscpu-model-name \1/p;
s/.*L1d cache:[[:space:]]\+\(.*\)$/metric lscpu-l1d-cache \1/p;
s/.*L1i cache:[[:space:]]\+\(.*\)$/metric lscpu-l1i-cache \1/p;
s/.*L2 cache:[[:space:]]\+\(.*\)$/metric lscpu-l2-cache \1/p;
s/.*L3 cache:[[:space:]]\+\(.*\)$/metric lscpu-l3-cache \1/p;
s/.*left_sha=\(.*\)$/old-sha \1/p;
s/.*right_sha=\(.*\)/new-sha \1/p' \
| awk '
BEGIN { FS = "\t"; OFS = "\t" }
/^old-sha/ { old_sha=$2 }
/^new-sha/ { new_sha=$2 }
/^metric/ { print old_sha, new_sha, $2, $3 }' \
| "${client[@]}" --query "INSERT INTO run_attributes_v1 FORMAT TSV"
# Grepping numactl results from log is too crazy, I'll just call it again.
"${client[@]}" --query "INSERT INTO run_attributes_v1 FORMAT TSV" <<EOF
$REF_SHA $SHA_TO_TEST $(numactl --show | sed -n 's/^cpubind:[[:space:]]\+/numactl-cpubind /p')
$REF_SHA $SHA_TO_TEST $(numactl --hardware | sed -n 's/^available:[[:space:]]\+/numactl-available /p')
EOF
# Also insert some data about the check into the CI checks table.
"${client[@]}" --query "INSERT INTO "'"'"default"'"'".checks FORMAT TSVWithNamesAndTypes" \
< ci-checks.tsv
set -x
}
# Check that local and client are in PATH
clickhouse-local --version > /dev/null
clickhouse-client --version > /dev/null
case "$stage" in
"")
;&
"configure")
time configure
;&
"restart")
numactl --show ||:
numactl --hardware ||:
lscpu ||:
dmidecode -t 4 ||:
time restart
;&
"run_tests")
# Ignore the errors to collect the log and build at least some report, anyway
time run_tests ||:
;&
"get_profiles")
# Check for huge pages.
cat /sys/kernel/mm/transparent_hugepage/enabled > thp-enabled.txt ||:
cat /proc/meminfo > meminfo.txt ||:
for pid in $(pgrep -f clickhouse-server)
do
cat "/proc/$pid/smaps" > "$pid-smaps.txt" ||:
done
# We had a bug where getting profiles froze sometimes, so try to save some
# logs if this happens again. Give the servers some time to collect all info,
# then trace and kill. Start in a subshell, so that both function don't
# interfere with each other's jobs through `wait`. Also make the subshell
# have its own process group, so that we can then kill it with all its child
# processes. Somehow it doesn't kill the children by itself when dying.
set -m
( get_profiles_watchdog ) &
watchdog_pid=$!
set +m
# Check that the watchdog started OK.
kill -0 $watchdog_pid
# If the tests fail with OOM or something, still try to restart the servers
# to collect the logs. Prefer not to restart, because addresses might change
# and we won't be able to process trace_log data. Start in a subshell, so that
# it doesn't interfere with the watchdog through `wait`.
( get_profiles || { restart && get_profiles ; } ) ||:
# Kill the whole process group, because somehow when the subshell is killed,
# the sleep inside remains alive and orphaned.
while env kill -- -$watchdog_pid ; do sleep 1; done
# Stop the servers to free memory for the subsequent query analysis.
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo Servers stopped.
;&
"analyze_queries")
time analyze_queries ||:
;&
"report")
time report ||:
;&
"report_metrics")
time report_metrics ||:
cat metrics/errors.log >> report/errors.log ||:
;&
"report_html")
time "$script_dir/report.py" --report=all-queries > all-queries.html 2> >(tee -a report/errors.log 1>&2) ||:
time "$script_dir/report.py" > report.html
;&
"upload_results")
time upload_results ||:
;&
esac
# Print some final debug info to help debug Weirdness, of which there is plenty.
jobs
pstree -apgT