ClickHouse/tests/performance/scripts/entrypoint.sh

#!/bin/bash
set -ex

CHPC_CHECK_START_TIMESTAMP="$(date +%s)"
export CHPC_CHECK_START_TIMESTAMP

S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"}
BUILD_NAME=${BUILD_NAME:-package_release}
export S3_URL BUILD_NAME
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"

# Sometimes AWS responds with DNS error and it's impossible to retry it with
# current curl version options.
function curl_with_retry
{
    for _ in 1 2 3 4 5 6 7 8 9 10; do
        if curl --fail --head "$1"
        then
            return 0
        else
            sleep 1
        fi
    done
    return 1
}

# Use the packaged repository to find the revision we will compare to.
function find_reference_sha
{
    git -C right/ch log -1 origin/master
    git -C right/ch log -1 pr
    # Go back from the revision to be tested, trying to find the closest published
    # testing release. The PR branch may be either pull/*/head which is the
    # author's branch, or pull/*/merge, which is head merged with some master
    # automatically by Github. We will use a merge base with master as a reference
    # for tesing (or some older commit). A caveat is that if we're testing the
    # master, the merge base is the tested commit itself, so we have to step back
    # once.
    start_ref=$(git -C right/ch merge-base origin/master pr)
    if [ "$PR_TO_TEST" == "0" ]
    then
        start_ref=$start_ref~
    fi

    # Loop back to find a commit that actually has a published perf test package.
    while :
    do
        # FIXME the original idea was to compare to a closest testing tag, which
        # is a version that is verified to work correctly. However, we're having
        # some test stability issues now, and the testing release can't roll out
        # for more that a weak already because of that. Temporarily switch to
        # using just closest master, so that we can go on.
        #ref_tag=$(git -C ch describe --match='v*-testing' --abbrev=0 --first-parent "$start_ref")
        ref_tag="$start_ref"

        echo Reference tag is "$ref_tag"
        # We use annotated tags which have their own shas, so we have to further
        # dereference the tag to get the commit it points to, hence the '~0' thing.
        REF_SHA=$(git -C right/ch rev-parse "$ref_tag~0")

        # FIXME sometimes we have testing tags on commits without published builds.
        # Normally these are documentation commits. Loop to skip them.
        # Historically there were various path for the performance test package,
        # test all of them.
        unset found
        declare -a urls_to_try=(
            "$S3_URL/PRs/0/$REF_SHA/$BUILD_NAME/performance.tar.zst"
            "$S3_URL/0/$REF_SHA/$BUILD_NAME/performance.tar.zst"
            "$S3_URL/0/$REF_SHA/$BUILD_NAME/performance.tgz"
        )
        for path in "${urls_to_try[@]}"
        do
            if curl_with_retry "$path"
            then
                found="$path"
                break
            fi
        done
        if [ -n "$found" ] ; then break; fi

        start_ref="$REF_SHA~"
    done

    REF_PR=0
}

chown nobody workspace output
chgrp nogroup workspace output
chmod 777 workspace output

cd workspace

[ ! -e "/artifacts/performance.tar.zst" ] && echo "ERROR: performance.tar.zst not found" && exit 1
mkdir -p right
tar -xf "/artifacts/performance.tar.zst" -C right --no-same-owner --strip-components=1 --zstd --extract --verbose

# Find reference revision if not specified explicitly
if [ "$REF_SHA" == "" ]; then find_reference_sha; fi
if [ "$REF_SHA" == "" ]; then echo Reference SHA is not specified ; exit 1 ; fi
if [ "$REF_PR" == "" ]; then echo Reference PR is not specified ; exit 1 ; fi

# Show what we're testing
(
    git -C right/ch log -1 --decorate "$REF_SHA" ||:
) | tee left-commit.txt

(
    git -C right/ch log -1 --decorate "$SHA_TO_TEST" ||:
    echo
    echo Real tested commit is:
    git -C right/ch log -1 --decorate "pr"
) | tee right-commit.txt

if [ "$PR_TO_TEST" != "0" ]
then
    # If the PR only changes the tests and nothing else, prepare a list of these
    # tests for use by compare.sh. Compare to merge base, because master might be
    # far in the future and have unrelated test changes.
    base=$(git -C right/ch merge-base pr origin/master)
    git -C right/ch diff --name-only "$base" pr -- . | tee all-changed-files.txt
    git -C right/ch diff --name-only --diff-filter=d "$base" pr -- tests/performance/*.xml | tee changed-test-definitions.txt
    git -C right/ch diff --name-only "$base" pr -- :!tests/performance/*.xml :!docker/test/performance-comparison | tee other-changed-files.txt
fi

# Set python output encoding so that we can print queries with non-ASCII letters.
export PYTHONIOENCODING=utf-8

# By default, use the main comparison script from the tested package, so that we
# can change it in PRs.
script_path="right/scripts"
if [ -v CHPC_LOCAL_SCRIPT ]
then
    script_path=".."
fi

# Even if we have some errors, try our best to save the logs.
set +e

# Use clickhouse-client and clickhouse-local from the right server.
PATH="$(readlink -f right/)":"$PATH"
export PATH

export REF_PR
export REF_SHA

# Try to collect some core dumps.
# At least we remove the ulimit and then try to pack some common file names into output.
ulimit -c unlimited
cat /proc/sys/kernel/core_pattern

# Start the main comparison script.
{
    time $SCRIPT_DIR/download.sh "$REF_PR" "$REF_SHA" "$PR_TO_TEST" "$SHA_TO_TEST" && \
    time stage=configure "$script_path"/compare.sh ; \
} 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee -a compare.log

# Stop the servers to free memory. Normally they are restarted before getting
# the profile info, so they shouldn't use much, but if the comparison script
# fails in the middle, this might not be the case.
for _ in {1..30}
do
    killall clickhouse || break
    sleep 1
done

dmesg -T > dmesg.log

ls -lath

7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} \
    {right,left}/{performance,scripts} {{right,left}/db,db0}/preprocessed_configs \
    report analyze benchmark metrics \
    ./*.core.dmp ./*.core

# If the files aren't same, copy it
cmp --silent compare.log /output/compare.log || \
  cp compare.log /output
[wip] performance comparison fixes 2019-12-26 21:33:10 +00:00			`#!/bin/bash`
Performance comparison improvements. 2020-01-10 14:06:07 +00:00			`set -ex`
[wip] performance comparison fixes 2019-12-26 21:33:10 +00:00
fix shellcheck 2021-06-09 11:40:38 +00:00			`CHPC_CHECK_START_TIMESTAMP="$(date +%s)"`
			`export CHPC_CHECK_START_TIMESTAMP`
fix 2021-06-08 14:12:47 +00:00
Remove outdated links from CI 2022-03-28 13:53:22 +00:00			`S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"}`
Build performance output in release builds 2022-05-13 11:36:08 +00:00			`BUILD_NAME=${BUILD_NAME:-package_release}`
Use variables in URLs, migrate to RPs prefix 2023-03-23 15:44:10 +00:00			`export S3_URL BUILD_NAME`
Reapply "improve CI with digest for docker, build and test jobs" (#57904) * Revert "Revert "improve CI with digest for docker, build and test jobs"" * fix: docker manifest merge for missing images only 2023-12-18 08:07:22 +00:00			`SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"`
Add performance comparison check 2021-11-16 11:16:10 +00:00
Reapply "improve CI with digest for docker, build and test jobs" (#57904) * Revert "Revert "improve CI with digest for docker, build and test jobs"" * fix: docker manifest merge for missing images only 2023-12-18 08:07:22 +00:00			`# Sometimes AWS responds with DNS error and it's impossible to retry it with`
Followup 2021-12-17 13:19:44 +00:00			`# current curl version options.`
More agressive retries in perf tests 2021-12-17 13:16:35 +00:00			`function curl_with_retry`
			`{`
Update entrypoint.sh 2022-04-20 18:38:49 +00:00			`for _ in 1 2 3 4 5 6 7 8 9 10; do`
Fix supply chain attack in performance tests 2024-01-14 07:25:12 +00:00			`if curl --fail --head "$1"`
			`then`
Fix style 2021-12-17 13:43:58 +00:00			`return 0`
			`else`
Update entrypoint.sh 2022-04-20 18:38:49 +00:00			`sleep 1`
Fix style 2021-12-17 13:43:58 +00:00			`fi`
			`done`
More agressive retries in perf tests 2021-12-17 13:16:35 +00:00			`return 1`
			`}`

Choose proper old version for perf comparison 2020-07-23 15:16:40 +00:00			`# Use the packaged repository to find the revision we will compare to.`
performance comparison 2020-03-12 12:51:09 +00:00			`function find_reference_sha`
			`{`
Update entrypoint.sh 2020-07-28 13:15:41 +00:00			`git -C right/ch log -1 origin/master`
			`git -C right/ch log -1 pr`
performance comparison 2020-03-12 12:51:09 +00:00			`# Go back from the revision to be tested, trying to find the closest published`
Choose proper old version for perf comparison 2020-07-23 15:16:40 +00:00			`# testing release. The PR branch may be either pull/*/head which is the`
			`# author's branch, or pull/*/merge, which is head merged with some master`
			`# automatically by Github. We will use a merge base with master as a reference`
			`# for tesing (or some older commit). A caveat is that if we're testing the`
			`# master, the merge base is the tested commit itself, so we have to step back`
			`# once.`
performance comparison 2020-07-27 11:20:45 +00:00			`start_ref=$(git -C right/ch merge-base origin/master pr)`
performance comparison 2020-08-05 23:33:32 +00:00			`if [ "$PR_TO_TEST" == "0" ]`
performance comparison 2020-03-12 12:51:09 +00:00			`then`
Choose proper old version for perf comparison 2020-07-23 15:16:40 +00:00			`start_ref=$start_ref~`
performance comparison 2020-03-12 12:51:09 +00:00			`fi`

Choose proper old version for perf comparison 2020-07-23 15:16:40 +00:00			`# Loop back to find a commit that actually has a published perf test package.`
performance comparison 2020-03-12 12:51:09 +00:00			`while :`
			`do`
performance comparison 2020-03-16 14:54:17 +00:00			`# FIXME the original idea was to compare to a closest testing tag, which`
			`# is a version that is verified to work correctly. However, we're having`
			`# some test stability issues now, and the testing release can't roll out`
			`# for more that a weak already because of that. Temporarily switch to`
			`# using just closest master, so that we can go on.`
			`#ref_tag=$(git -C ch describe --match='v*-testing' --abbrev=0 --first-parent "$start_ref")`
			`ref_tag="$start_ref"`

performance comparison 2020-03-12 12:51:09 +00:00			`echo Reference tag is "$ref_tag"`
			`# We use annotated tags which have their own shas, so we have to further`
			`# dereference the tag to get the commit it points to, hence the '~0' thing.`
performance comparison 2020-07-27 11:20:45 +00:00			`REF_SHA=$(git -C right/ch rev-parse "$ref_tag~0")`
performance comparison 2020-03-12 12:51:09 +00:00
Choose proper old version for perf comparison 2020-07-23 15:16:40 +00:00			`# FIXME sometimes we have testing tags on commits without published builds.`
			`# Normally these are documentation commits. Loop to skip them.`
			`# Historically there were various path for the performance test package,`
			`# test all of them.`
perf test: report queries with new functions separately 2020-06-27 00:45:00 +00:00			`unset found`
Build performance output in release builds 2022-05-13 11:36:08 +00:00			`declare -a urls_to_try=(`
Use variables in URLs, migrate to RPs prefix 2023-03-23 15:44:10 +00:00			`"$S3_URL/PRs/0/$REF_SHA/$BUILD_NAME/performance.tar.zst"`
			`"$S3_URL/0/$REF_SHA/$BUILD_NAME/performance.tar.zst"`
			`"$S3_URL/0/$REF_SHA/$BUILD_NAME/performance.tgz"`
Build performance output in release builds 2022-05-13 11:36:08 +00:00			`)`
Fix search for old version 2021-11-18 10:38:59 +00:00			`for path in "${urls_to_try[@]}"`
perf test: report queries with new functions separately 2020-06-27 00:45:00 +00:00			`do`
More agressive retries in perf tests 2021-12-17 13:16:35 +00:00			`if curl_with_retry "$path"`
perf test: report queries with new functions separately 2020-06-27 00:45:00 +00:00			`then`
			`found="$path"`
			`break`
			`fi`
			`done`
			`if [ -n "$found" ] ; then break; fi`
performance comparison 2020-03-12 12:51:09 +00:00
			`start_ref="$REF_SHA~"`
			`done`

			`REF_PR=0`
			`}`

Choose proper old version for perf comparison 2020-07-23 15:16:40 +00:00			`chown nobody workspace output`
			`chgrp nogroup workspace output`
			`chmod 777 workspace output`

			`cd workspace`

Reapply "improve CI with digest for docker, build and test jobs" (#57904) * Revert "Revert "improve CI with digest for docker, build and test jobs"" * fix: docker manifest merge for missing images only 2023-12-18 08:07:22 +00:00			`[ ! -e "/artifacts/performance.tar.zst" ] && echo "ERROR: performance.tar.zst not found" && exit 1`
			`mkdir -p right`
			`tar -xf "/artifacts/performance.tar.zst" -C right --no-same-owner --strip-components=1 --zstd --extract --verbose`
Choose proper old version for perf comparison 2020-07-23 15:16:40 +00:00
performance comparison 2020-03-12 12:51:09 +00:00			`# Find reference revision if not specified explicitly`
			`if [ "$REF_SHA" == "" ]; then find_reference_sha; fi`
			`if [ "$REF_SHA" == "" ]; then echo Reference SHA is not specified ; exit 1 ; fi`
			`if [ "$REF_PR" == "" ]; then echo Reference PR is not specified ; exit 1 ; fi`
[wip] performance comparison 2020-01-27 12:35:56 +00:00
			`# Show what we're testing`
performance comparison 2020-02-03 17:06:17 +00:00			`(`
performance comparison 2020-07-27 11:20:45 +00:00			`git -C right/ch log -1 --decorate "$REF_SHA" \|\|:`
performance comparison 2020-02-03 17:06:17 +00:00			`) \| tee left-commit.txt`
performance comparison 2020-03-12 12:51:09 +00:00
performance comparison 2020-02-03 17:06:17 +00:00			`(`
performance comparison 2020-07-27 11:20:45 +00:00			`git -C right/ch log -1 --decorate "$SHA_TO_TEST" \|\|:`
			`echo`
			`echo Real tested commit is:`
			`git -C right/ch log -1 --decorate "pr"`
performance comparison 2020-02-03 17:06:17 +00:00			`) \| tee right-commit.txt`
Performance comparison improvements. 2020-01-10 14:06:07 +00:00
performance comparison 2020-05-22 08:54:35 +00:00			`if [ "$PR_TO_TEST" != "0" ]`
			`then`
performance comparison 2020-06-05 14:30:26 +00:00			`# If the PR only changes the tests and nothing else, prepare a list of these`
			`# tests for use by compare.sh. Compare to merge base, because master might be`
			`# far in the future and have unrelated test changes.`
performance comparison 2020-07-27 11:20:45 +00:00			`base=$(git -C right/ch merge-base pr origin/master)`
If perf test definition changed, run everything + longer (as in master) Also some other perf test fixes 2020-09-18 13:27:50 +00:00			`git -C right/ch diff --name-only "$base" pr -- . \| tee all-changed-files.txt`
don't run removed tests 2024-07-30 14:03:09 +00:00			`git -C right/ch diff --name-only --diff-filter=d "$base" pr -- tests/performance/*.xml \| tee changed-test-definitions.txt`
Perf: Only consider XML files 2024-01-29 16:47:50 +00:00			`git -C right/ch diff --name-only "$base" pr -- :!tests/performance/*.xml :!docker/test/performance-comparison \| tee other-changed-files.txt`
performance comparison 2020-05-22 08:54:35 +00:00			`fi`
performance comparison 2020-02-25 19:51:09 +00:00
Export logs from CI in performance (preparation) 2023-08-12 22:15:22 +00:00			`# Set python output encoding so that we can print queries with non-ASCII letters.`
[wip] Performance comparison test. 2020-01-14 19:05:58 +00:00			`export PYTHONIOENCODING=utf-8`

Performance comparison fixes 2020-04-02 18:44:58 +00:00			`# By default, use the main comparison script from the tested package, so that we`
			`# can change it in PRs.`
			`script_path="right/scripts"`
			`if [ -v CHPC_LOCAL_SCRIPT ]`
			`then`
			`script_path=".."`
			`fi`

[wip] performance comparison test 2020-01-16 19:39:07 +00:00			`# Even if we have some errors, try our best to save the logs.`
			`set +e`
performance comparison 2020-03-18 01:28:57 +00:00
performance comparison 2020-04-17 15:47:01 +00:00			`# Use clickhouse-client and clickhouse-local from the right server.`
			`PATH="$(readlink -f right/)":"$PATH"`
			`export PATH`

fixes 2020-11-03 14:37:54 +00:00			`export REF_PR`
			`export REF_SHA`

Export logs from CI in performance (preparation) 2023-08-12 22:15:22 +00:00			`# Try to collect some core dumps.`
Update entrypoint.sh 2021-08-18 22:21:23 +00:00			`# At least we remove the ulimit and then try to pack some common file names into output.`
Update entrypoint.sh 2021-08-18 04:00:51 +00:00			`ulimit -c unlimited`
Update entrypoint.sh 2021-08-18 22:21:23 +00:00			`cat /proc/sys/kernel/core_pattern`
Update entrypoint.sh 2021-08-18 07:58:21 +00:00
performance comparison 2020-04-17 15:47:01 +00:00			`# Start the main comparison script.`
Build performance output in release builds 2022-05-13 11:36:08 +00:00			`{`
Reapply "improve CI with digest for docker, build and test jobs" (#57904) * Revert "Revert "improve CI with digest for docker, build and test jobs"" * fix: docker manifest merge for missing images only 2023-12-18 08:07:22 +00:00			`time $SCRIPT_DIR/download.sh "$REF_PR" "$REF_SHA" "$PR_TO_TEST" "$SHA_TO_TEST" && \`
Performance comparison fixes 2020-04-02 18:44:58 +00:00			`time stage=configure "$script_path"/compare.sh ; \`
perf tests set cgroups_memory_usage_observer_wait_time to zero 2024-08-27 13:26:20 +00:00			`} 2>&1 \| ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" \| tee -a compare.log`
[wip] performance comparison fixes 2019-12-26 21:33:10 +00:00
performance comparison 2020-02-27 17:57:08 +00:00			`# Stop the servers to free memory. Normally they are restarted before getting`
			`# the profile info, so they shouldn't use much, but if the comparison script`
			`# fails in the middle, this might not be the case.`
Fix shellcheck warnings. 2020-02-27 20:02:50 +00:00			`for _ in {1..30}`
performance comparison 2020-02-27 17:57:08 +00:00			`do`
Fix shellcheck warnings. 2020-02-27 20:02:50 +00:00			`killall clickhouse \|\| break`
			`sleep 1`
performance comparison 2020-02-27 17:57:08 +00:00			`done`

performance comparison 2020-02-25 19:51:09 +00:00			`dmesg -T > dmesg.log`
performance comparison 2020-02-18 17:30:10 +00:00
Update entrypoint.sh 2021-08-17 10:30:51 +00:00			`ls -lath`

performance comparison 2020-06-18 03:14:18 +00:00			`7z a '-x!/tmp' /output/output.7z ./.{log,tsv,html,txt,rep,svg,columns} \`
			`{right,left}/{performance,scripts} {{right,left}/db,db0}/preprocessed_configs \`
try to collect some core dumps in perf tests 2021-08-16 17:05:50 +00:00			`report analyze benchmark metrics \`
Update entrypoint.sh 2021-08-18 22:21:23 +00:00			`./.core.dmp ./.core`
performance comparison 2020-06-18 03:14:18 +00:00
Do not fail if output and workspace are the same 2022-05-17 21:56:22 +00:00			`# If the files aren't same, copy it`
			`cmp --silent compare.log /output/compare.log \|\| \`
			`cp compare.log /output`