performance comparison

This commit is contained in:
Alexander Kuzmenkov 2020-03-18 04:28:57 +03:00
parent a800fa7986
commit 56c4b49844
3 changed files with 65 additions and 61 deletions

View File

@ -2,55 +2,9 @@
set -ex
set -o pipefail
trap "exit" INT TERM
trap "kill 0" EXIT
trap "kill $(jobs -pr) ||:" EXIT
stage=${stage:-}
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
mkdir db0 ||:
left_pr=$1
left_sha=$2
right_pr=$3
right_sha=$4
datasets=${CHPC_DATASETS:-"hits1 hits10 hits100 values"}
declare -A dataset_paths
dataset_paths["hits10"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar"
dataset_paths["hits100"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar"
dataset_paths["hits1"]="https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar"
dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_expressions/partitions/test_values.tar"
function download
{
rm -r left ||:
mkdir left ||:
rm -r right ||:
mkdir right ||:
# might have the same version on left and right
if ! [ "$left_sha" = "$right_sha" ]
then
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv &
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/performance/performance.tgz" -O- | tar -C right --strip-components=1 -zxv &
else
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv && cp -a left right &
fi
for dataset_name in $datasets
do
dataset_path="${dataset_paths[$dataset_name]}"
[ "$dataset_path" != "" ]
cd db0 && wget -nv -nd -c "$dataset_path" -O- | tar -xv &
done
mkdir ~/fg ||:
cd ~/fg && wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl" && chmod +x ~/fg/flamegraph.pl &
wait
}
function configure
{
@ -295,14 +249,14 @@ create table queries engine Memory as select
-- immediately, so for now we pretend they don't exist. We don't want to
-- remove them altogether because we want to be able to detect regressions,
-- but the right way to do this is not yet clear.
left + right < 0.02 as short,
left + right < 0.05 as short,
not short and abs(diff) < 0.10 and rd[3] > 0.10 as unstable,
-- Do not consider changed the queries with 5% RD below 1% -- e.g., we're
-- likely to observe a difference > 1% in less than 5% cases.
-- Do not consider changed the queries with 5% RD below 5% -- e.g., we're
-- likely to observe a difference > 5% in less than 5% cases.
-- Not sure it is correct, but empirically it filters out a lot of noise.
not short and abs(diff) > 0.15 and abs(diff) > rd[3] and rd[1] > 0.01 as changed,
not short and abs(diff) > 0.15 and abs(diff) > rd[3] and rd[1] > 0.05 as changed,
*
from file('*-report.tsv', TSV, 'left float, right float, diff float, rd Array(float), query text');
@ -411,7 +365,7 @@ create table metric_devation engine File(TSVWithNamesAndTypes, 'metric-deviation
join queries using query
group by query, metric
having d > 0.5
order by any(rd[3]) desc, d desc
order by any(rd[3]) desc, query desc, d desc
;
create table stacks engine File(TSV, 'stacks.rep') as
@ -451,9 +405,6 @@ grep -H -m2 'Exception:[^:]' ./*-err.log | sed 's/:/\t/' > run-errors.tsv ||:
case "$stage" in
"")
;&
"download")
time download
;&
"configure")
time configure
;&

View File

@ -0,0 +1,52 @@
#!/bin/bash
set -ex
set -o pipefail
trap "exit" INT TERM
trap "kill $(jobs -pr) ||:" EXIT
mkdir db0 ||:
left_pr=$1
left_sha=$2
right_pr=$3
right_sha=$4
datasets=${CHPC_DATASETS:-"hits1 hits10 hits100 values"}
declare -A dataset_paths
dataset_paths["hits10"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar"
dataset_paths["hits100"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar"
dataset_paths["hits1"]="https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar"
dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_expressions/partitions/test_values.tar"
function download
{
rm -r left ||:
mkdir left ||:
rm -r right ||:
mkdir right ||:
# might have the same version on left and right
if ! [ "$left_sha" = "$right_sha" ]
then
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv &
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/performance/performance.tgz" -O- | tar -C right --strip-components=1 -zxv &
else
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv && cp -a left right &
fi
for dataset_name in $datasets
do
dataset_path="${dataset_paths[$dataset_name]}"
[ "$dataset_path" != "" ]
cd db0 && wget -nv -nd -c "$dataset_path" -O- | tar -xv &
done
mkdir ~/fg ||:
cd ~/fg && wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl" && chmod +x ~/fg/flamegraph.pl &
wait
}
download

View File

@ -92,12 +92,13 @@ export CHPC_RUNS=${CHPC_RUNS:-7}
# Even if we have some errors, try our best to save the logs.
set +e
# compare.sh kills its process group, so put it into a separate one.
# It's probably at fault for using `kill 0` as an error handling mechanism,
# but I can't be bothered to change this now.
set -m
time ../compare.sh "$REF_PR" "$REF_SHA" "$PR_TO_TEST" "$SHA_TO_TEST" 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee compare.log
set +m
# Use main comparison script from the tested package, so that we can change it
# in PRs.
{ \
time ../download.sh "$REF_PR" "$REF_SHA" "$PR_TO_TEST" "$SHA_TO_TEST" && \
time stage=configure right/scripts/compare.sh ; \
} 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee compare.log
# Stop the servers to free memory. Normally they are restarted before getting
# the profile info, so they shouldn't use much, but if the comparison script