ClickHouse/docker/test/performance-comparison/download.sh

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

85 lines
2.7 KiB
Bash
Raw Normal View History

2020-03-18 01:28:57 +00:00
#!/bin/bash
set -ex
set -o pipefail
trap "exit" INT TERM
2020-04-17 15:47:01 +00:00
trap 'kill $(jobs -pr) ||:' EXIT
S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"}
BUILD_NAME=${BUILD_NAME:-package_release}
export S3_URL BUILD_NAME
2020-03-18 01:28:57 +00:00
mkdir db0 ||:
2020-04-22 21:39:51 +00:00
mkdir left ||:
2020-03-18 01:28:57 +00:00
left_pr=$1
left_sha=$2
2020-09-30 17:06:14 +00:00
# right_pr=$3 not used for now
2020-03-18 01:28:57 +00:00
right_sha=$4
2021-08-18 22:21:51 +00:00
datasets=${CHPC_DATASETS-"hits1 hits10 hits100 values"}
2020-03-18 01:28:57 +00:00
declare -A dataset_paths
2022-03-28 13:53:22 +00:00
dataset_paths["hits10"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_10m_single/partitions/hits_10m_single.tar"
dataset_paths["hits100"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_100m_single/partitions/hits_100m_single.tar"
dataset_paths["hits1"]="https://clickhouse-datasets.s3.amazonaws.com/hits/partitions/hits_v1.tar"
dataset_paths["values"]="https://clickhouse-datasets.s3.amazonaws.com/values_with_expressions/partitions/test_values.tar"
2021-11-19 13:10:49 +00:00
2020-03-18 01:28:57 +00:00
function download
{
# Historically there were various paths for the performance test package.
# Test all of them.
declare -a urls_to_try=(
"$S3_URL/PRs/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst"
"$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst"
"$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tgz"
)
2021-11-19 13:10:49 +00:00
for path in "${urls_to_try[@]}"
do
if curl --fail --head "$path"
then
left_path="$path"
fi
done
2020-08-11 14:14:06 +00:00
# Might have the same version on left and right (for testing) -- in this case we just copy
# already downloaded 'right' to the 'left. There is the third case when we don't have to
# download anything, for example in some manual runs. In this case, SHAs are not set.
if ! [ "$left_sha" = "$right_sha" ]
2020-03-18 01:28:57 +00:00
then
2023-01-09 06:01:20 +00:00
wget -nv -nd -c "$left_path" -O- | tar -C left --no-same-owner --strip-components=1 --zstd --extract --verbose &
2020-08-11 14:14:06 +00:00
elif [ "$right_sha" != "" ]
then
mkdir left ||:
2020-08-11 14:14:06 +00:00
cp -an right/* left &
2020-03-18 01:28:57 +00:00
fi
for dataset_name in $datasets
do
dataset_path="${dataset_paths[$dataset_name]}"
2020-04-22 21:39:51 +00:00
if [ "$dataset_path" = "" ]
then
>&2 echo "Unknown dataset '$dataset_name'"
exit 1
fi
2023-01-09 06:01:20 +00:00
cd db0 && wget -nv -nd -c "$dataset_path" -O- | tar --extract --verbose &
2020-03-18 01:28:57 +00:00
done
mkdir ~/fg ||:
2020-05-22 08:40:02 +00:00
(
cd ~/fg
wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl"
wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/difffolded.pl"
chmod +x ~/fg/difffolded.pl
chmod +x ~/fg/flamegraph.pl
) &
2020-03-18 01:28:57 +00:00
wait
2022-06-29 22:00:08 +00:00
echo "ATTACH DATABASE default ENGINE=Ordinary" > db0/metadata/default.sql
echo "ATTACH DATABASE datasets ENGINE=Ordinary" > db0/metadata/datasets.sql
2022-06-29 17:55:46 +00:00
ls db0/metadata
2020-03-18 01:28:57 +00:00
}
download