ClickHouse/docker/test/upgrade/run.sh

#!/bin/bash
# shellcheck disable=SC2094
# shellcheck disable=SC2086
# shellcheck disable=SC2024

# Avoid overlaps with previous runs
dmesg --clear

set -x

# we mount tests folder from repo to /usr/share
ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages
ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag

# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
source /usr/share/clickhouse-test/ci/stress_tests.lib

azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
./setup_minio.sh stateless # to have a proper environment

echo "Get previous release tag"
previous_release_tag=$(dpkg --info package_folder/clickhouse-client*.deb | grep "Version: " | awk '{print $2}' | cut -f1 -d'+' | get_previous_release_tag)
echo $previous_release_tag

echo "Clone previous release repository"
git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository

echo "Download clickhouse-server from the previous release"
mkdir previous_release_package_folder

echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \
    || echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv

# Check if we cloned previous release repository successfully
if ! [ "$(ls -A previous_release_repository/tests/queries)" ]
then
    echo -e 'failure\tFailed to clone previous release tests' > /test_output/check_status.tsv
    exit
elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
then
    echo -e 'failure\tFailed to download previous release packages' > /test_output/check_status.tsv
    exit
fi

echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv
echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv

# Make upgrade check more funny by forcing Ordinary engine for system database
mkdir -p /var/lib/clickhouse/metadata
echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql

# Install previous release packages
install_packages previous_release_package_folder

# Initial run without S3 to create system.*_log on local file system to make it
# available for dump via clickhouse-local
configure

# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml

start
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log

# force_sync=false doesn't work correctly on some older versions
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
  | sed "s|<force_sync>false</force_sync>|<force_sync>true</force_sync>|" \
  > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml

# But we still need default disk because some tables loaded only into it
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
  | sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \
  > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp    mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml

# Start server from previous release
# Let's enable S3 storage by default
export USE_S3_STORAGE_FOR_MERGE_TREE=1
# Previous version may not be ready for fault injections
export ZOOKEEPER_FAULT_INJECTION=0
configure

# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml

start

clickhouse-client --query="SELECT 'Server version: ', version()"

mkdir tmp_stress_output

stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\""  --upgrade-check --output-folder tmp_stress_output --global-time-limit=1200 \
    && echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
    || echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv

rm -rf tmp_stress_output

# We experienced deadlocks in this command in very rare cases. Let's debug it:
timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" ||
(
    echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log
    timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
    clickhouse stop --force
)

# Use bigger timeout for previous version and disable additional hang check
stop 300 false
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log

# Install and start new server
install_packages package_folder
export ZOOKEEPER_FAULT_INJECTION=1
configure

# Just in case previous version left some garbage in zk
sudo cat /etc/clickhouse-server/config.d/lost_forever_check.xml \
  | sed "s|>1<|>0<|g" \
  > /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp
sudo mv /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp /etc/clickhouse-server/config.d/lost_forever_check.xml

start 500
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
    || (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \
    && echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \
    >> /test_output/test_results.tsv)

# Remove file application_errors.txt if it's empty
[ -s /test_output/application_errors.txt ] || rm /test_output/application_errors.txt

clickhouse-client --query="SELECT 'Server version: ', version()"

# Let the server run for a while before checking log.
sleep 60

stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.upgrade.log

# Error messages (we should ignore some errors)
# FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64")
# FIXME Not sure if it's expected, but some tests from stress test may not be finished yet when we restarting server.
#       Let's just ignore all errors from queries ("} <Error> TCPHandler: Code:", "} <Error> executeQuery: Code:")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'")
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility
echo "Check for Error messages in server log:"
rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
           -e "Code: 236. DB::Exception: Cancelled mutating parts" \
           -e "REPLICA_IS_ALREADY_ACTIVE" \
           -e "REPLICA_ALREADY_EXISTS" \
           -e "ALL_REPLICAS_LOST" \
           -e "DDLWorker: Cannot parse DDL task query" \
           -e "RaftInstance: failed to accept a rpc connection due to error 125" \
           -e "UNKNOWN_DATABASE" \
           -e "NETWORK_ERROR" \
           -e "UNKNOWN_TABLE" \
           -e "ZooKeeperClient" \
           -e "KEEPER_EXCEPTION" \
           -e "DirectoryMonitor" \
           -e "TABLE_IS_READ_ONLY" \
           -e "Code: 1000, e.code() = 111, Connection refused" \
           -e "UNFINISHED" \
           -e "NETLINK_ERROR" \
           -e "Renaming unexpected part" \
           -e "PART_IS_TEMPORARILY_LOCKED" \
           -e "and a merge is impossible: we didn't find" \
           -e "found in queue and some source parts for it was lost" \
           -e "is lost forever." \
           -e "Unknown index: idx." \
           -e "Cannot parse string 'Hello' as UInt64" \
           -e "} <Error> TCPHandler: Code:" \
           -e "} <Error> executeQuery: Code:" \
           -e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \
           -e "The set of parts restored in place of" \
           -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
           -e "Code: 269. DB::Exception: Destination table is myself" \
           -e "Coordination::Exception: Connection loss" \
           -e "MutateFromLogEntryTask" \
           -e "No connection to ZooKeeper, cannot get shared table ID" \
           -e "Session expired" \
           -e "TOO_MANY_PARTS" \
           -e "Authentication failed" \
           -e "Cannot flush" \
           -e "Container already exists" \
           -e "doesn't have metadata version on disk" \
    clickhouse-server.upgrade.log \
    | grep -av -e "_repl_01111_.*Mapping for table with UUID" \
    | zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
    && echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/upgrade_error_messages.txt)" \
        >> /test_output/test_results.tsv \
    || echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv

# Remove file upgrade_error_messages.txt if it's empty
[ -s /test_output/upgrade_error_messages.txt ] || rm /test_output/upgrade_error_messages.txt

# Grep logs for sanitizer asserts, crashes and other critical errors
check_logs_for_critical_errors

tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:

collect_query_and_trace_logs

mv /var/log/clickhouse-server/stderr.log /test_output/

# Write check result into check_status.tsv
# Try to choose most specific error for the whole check status
clickhouse-local --structure "test String, res String, time Nullable(Float32), desc String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by
(test like '%Sanitizer%') DESC,
(test like '%Killed by signal%') DESC,
(test like '%gdb.log%') DESC,
(test ilike '%possible deadlock%') DESC,
(test like '%start%') DESC,
(test like '%dmesg%') DESC,
(test like '%OOM%') DESC,
(test like '%Signal 9%') DESC,
(test like '%Fatal message%') DESC,
(test like '%Error message%') DESC,
(test like '%previous release%') DESC,
rowNumberInAllBlocks()
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv

collect_core_dumps
Split stress test and backward compatibility check 2023-01-03 19:22:31 +00:00			`#!/bin/bash`
			`# shellcheck disable=SC2094`
			`# shellcheck disable=SC2086`
			`# shellcheck disable=SC2024`

			`# Avoid overlaps with previous runs`
			`dmesg --clear`

			`set -x`

Fix 2023-01-24 16:45:07 +00:00			`# we mount tests folder from repo to /usr/share`
			`ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress`
			`ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test`
			`ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages`
			`ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag`

Fix style 2023-02-24 16:58:36 +00:00			`# Stress tests and upgrade check uses similar code that was placed`
			`# in a separate bash library. See tests/ci/stress_tests.lib`
Make better 2023-02-24 16:43:28 +00:00			`source /usr/share/clickhouse-test/ci/stress_tests.lib`

			`azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &`
			`./setup_minio.sh stateless # to have a proper environment`

Split stress test and backward compatibility check 2023-01-03 19:22:31 +00:00			`echo "Get previous release tag"`
Update run.sh 2023-01-25 17:59:50 +00:00			`previous_release_tag=$(dpkg --info package_folder/clickhouse-client*.deb \| grep "Version: " \| awk '{print $2}' \| cut -f1 -d'+' \| get_previous_release_tag)`
Split stress test and backward compatibility check 2023-01-03 19:22:31 +00:00			`echo $previous_release_tag`

			`echo "Clone previous release repository"`
			`git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository`

			`echo "Download clickhouse-server from the previous release"`
			`mkdir previous_release_package_folder`

Make better 2023-02-24 16:43:28 +00:00			`echo $previous_release_tag \| download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \`
			`\|\| echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv`
Split stress test and backward compatibility check 2023-01-03 19:22:31 +00:00
			`# Check if we cloned previous release repository successfully`
			`if ! [ "$(ls -A previous_release_repository/tests/queries)" ]`
			`then`
Make better 2023-02-24 16:43:28 +00:00			`echo -e 'failure\tFailed to clone previous release tests' > /test_output/check_status.tsv`
			`exit`
Split stress test and backward compatibility check 2023-01-03 19:22:31 +00:00			`elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_.deb && ls -A previous_release_package_folder/clickhouse-server_.deb)" ]`
			`then`
Make better 2023-02-24 16:43:28 +00:00			`echo -e 'failure\tFailed to download previous release packages' > /test_output/check_status.tsv`
			`exit`
Split stress test and backward compatibility check 2023-01-03 19:22:31 +00:00			`fi`

Make better 2023-02-24 16:43:28 +00:00			`echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv`
			`echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv`
Split stress test and backward compatibility check 2023-01-03 19:22:31 +00:00
Make better 2023-02-24 16:43:28 +00:00			`# Make upgrade check more funny by forcing Ordinary engine for system database`
ci/upgrade: fix creating system database with Ordinary engine Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> 2023-03-14 13:07:09 +00:00			`mkdir -p /var/lib/clickhouse/metadata`
Make better 2023-02-24 16:43:28 +00:00			`echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql`
Split stress test and backward compatibility check 2023-01-03 19:22:31 +00:00
Make better 2023-02-24 16:43:28 +00:00			`# Install previous release packages`
			`install_packages previous_release_package_folder`
Split stress test and backward compatibility check 2023-01-03 19:22:31 +00:00
ci/upgrade: store system.*_log tables on local filesystem (and fix artifacts) Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> 2023-03-14 13:11:59 +00:00			`# Initial run without S3 to create system.*_log on local file system to make it`
			`# available for dump via clickhouse-local`
			`configure`

fix 2023-06-19 10:50:13 +00:00			`# it contains some new settings, but we can safely remove it`
			`rm /etc/clickhouse-server/config.d/merge_tree.xml`
fix 'Illegal column timezone' in stress tests 2023-06-13 14:02:54 +00:00			`rm /etc/clickhouse-server/users.d/nonconst_timezone.xml`
Add zookeeper name in endpoint id (#49780) * Add zookeeper name in endpoint id When we migrate a replicated table from one zookeeper cluster to another (the reason why we migration is that zookeeper's load is too high), we will create a new table with the same zpath, but it will fail and the old table will be in trouble. Here is some infomation: 1.old table: CREATE TABLE a1 (`id` UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/default/a1/{shard}', '{replica}') ORDER BY (id); 2.new table: CREATE TABLE a2 (`id` UInt64) ENGINE = ReplicatedMergeTree('aux1:/clickhouse/tables/default/a1/{shard}', '{replica}') ORDER BY (id); 3.error info: <Error> executeQuery: Code: 220. DB::Exception: Duplicate interserver IO endpoint: DataPartsExchange:/clickhouse/tables/default/a1/01/replicas/02. (DUPLICATE_INTERSERVER_IO_ENDPOINT) <Error> InterserverIOHTTPHandler: Code: 221. DB::Exception: No interserver IO endpoint named DataPartsExchange:/clickhouse/tables/default/a1/01/replicas/02. (NO_SUCH_INTERSERVER_IO_ENDPOINT) * Revert "Add zookeeper name in endpoint id" This reverts commit 9deb75b249619b7abdd38e3949ca8b3a76c9df8e. * Add zookeeper name in endpoint id When we migrate a replicated table from one zookeeper cluster to another (the reason why we migration is that zookeeper's load is too high), we will create a new table with the same zpath, but it will fail and the old table will be in trouble. * Fix incompatible with a new setting * add a test, fix other issues * Update 02442_auxiliary_zookeeper_endpoint_id.sql * Update 02735_system_zookeeper_connection.reference * Update 02735_system_zookeeper_connection.sql * Update run.sh * Remove the 'no-fasttest' tag * Update 02442_auxiliary_zookeeper_endpoint_id.sql --------- Co-authored-by: Alexander Tokmakov <tavplubix@clickhouse.com> Co-authored-by: Alexander Tokmakov <tavplubix@gmail.com> 2023-05-25 09:50:14 +00:00
ci/upgrade: store system.*_log tables on local filesystem (and fix artifacts) Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> 2023-03-14 13:11:59 +00:00			`start`
			`stop`
			`mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log`
Use force_sync=true in upgrade check 2023-03-09 14:45:26 +00:00
			`# force_sync=false doesn't work correctly on some older versions`
			`sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \`
			`\| sed "s\|<force_sync>false</force_sync>\|<force_sync>true</force_sync>\|" \`
			`> /etc/clickhouse-server/config.d/keeper_port.xml.tmp`
			`sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml`

Make better 2023-02-24 16:43:28 +00:00			`# But we still need default disk because some tables loaded only into it`
			`sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \`
			`\| sed "s\|<main><disk>s3</disk></main>\|<main><disk>s3</disk></main><default><disk>default</disk></default>\|" \`
			`> /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml`
			`sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml`
			`sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml`

ci/upgrade: store system.*_log tables on local filesystem (and fix artifacts) Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> 2023-03-14 13:11:59 +00:00			`# Start server from previous release`
			`# Let's enable S3 storage by default`
			`export USE_S3_STORAGE_FOR_MERGE_TREE=1`
			`# Previous version may not be ready for fault injections`
			`export ZOOKEEPER_FAULT_INJECTION=0`
			`configure`

fix 2023-06-19 10:50:13 +00:00			`# it contains some new settings, but we can safely remove it`
			`rm /etc/clickhouse-server/config.d/merge_tree.xml`
fix 'Illegal column timezone' in stress tests 2023-06-13 14:02:54 +00:00			`rm /etc/clickhouse-server/users.d/nonconst_timezone.xml`
Add zookeeper name in endpoint id (#49780) * Add zookeeper name in endpoint id When we migrate a replicated table from one zookeeper cluster to another (the reason why we migration is that zookeeper's load is too high), we will create a new table with the same zpath, but it will fail and the old table will be in trouble. Here is some infomation: 1.old table: CREATE TABLE a1 (`id` UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/default/a1/{shard}', '{replica}') ORDER BY (id); 2.new table: CREATE TABLE a2 (`id` UInt64) ENGINE = ReplicatedMergeTree('aux1:/clickhouse/tables/default/a1/{shard}', '{replica}') ORDER BY (id); 3.error info: <Error> executeQuery: Code: 220. DB::Exception: Duplicate interserver IO endpoint: DataPartsExchange:/clickhouse/tables/default/a1/01/replicas/02. (DUPLICATE_INTERSERVER_IO_ENDPOINT) <Error> InterserverIOHTTPHandler: Code: 221. DB::Exception: No interserver IO endpoint named DataPartsExchange:/clickhouse/tables/default/a1/01/replicas/02. (NO_SUCH_INTERSERVER_IO_ENDPOINT) * Revert "Add zookeeper name in endpoint id" This reverts commit 9deb75b249619b7abdd38e3949ca8b3a76c9df8e. * Add zookeeper name in endpoint id When we migrate a replicated table from one zookeeper cluster to another (the reason why we migration is that zookeeper's load is too high), we will create a new table with the same zpath, but it will fail and the old table will be in trouble. * Fix incompatible with a new setting * add a test, fix other issues * Update 02442_auxiliary_zookeeper_endpoint_id.sql * Update 02735_system_zookeeper_connection.reference * Update 02735_system_zookeeper_connection.sql * Update run.sh * Remove the 'no-fasttest' tag * Update 02442_auxiliary_zookeeper_endpoint_id.sql --------- Co-authored-by: Alexander Tokmakov <tavplubix@clickhouse.com> Co-authored-by: Alexander Tokmakov <tavplubix@gmail.com> 2023-05-25 09:50:14 +00:00
Make better 2023-02-24 16:43:28 +00:00			`start`

			`clickhouse-client --query="SELECT 'Server version: ', version()"`

			`mkdir tmp_stress_output`

			`stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --upgrade-check --output-folder tmp_stress_output --global-time-limit=1200 \`
			`&& echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \`
			`\|\| echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv`

			`rm -rf tmp_stress_output`

			`# We experienced deadlocks in this command in very rare cases. Let's debug it:`
			`timeout 10m clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" \|\|`
			`(`
			`echo "thread apply all backtrace (on select tables count)" >> /test_output/gdb.log`
			`timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" \| ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log`
			`clickhouse stop --force`
			`)`

			`# Use bigger timeout for previous version and disable additional hang check`
			`stop 300 false`
			`mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log`

			`# Install and start new server`
			`install_packages package_folder`
Enable ZK fault injection on new servers (but not during startup) 2023-03-27 13:07:59 +00:00			`export ZOOKEEPER_FAULT_INJECTION=1`
Make better 2023-02-24 16:43:28 +00:00			`configure`
suppress check for covered parts in zk 2023-06-20 21:53:32 +00:00
			`# Just in case previous version left some garbage in zk`
			`sudo cat /etc/clickhouse-server/config.d/lost_forever_check.xml \`
			`\| sed "s\|>1<\|>0<\|g" \`
			`> /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp`
			`sudo mv /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp /etc/clickhouse-server/config.d/lost_forever_check.xml`

Make better 2023-02-24 16:43:28 +00:00			`start 500`
			`clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \`
			`\|\| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \`
			`&& echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \`
			`>> /test_output/test_results.tsv)`

			`# Remove file application_errors.txt if it's empty`
			`[ -s /test_output/application_errors.txt ] \|\| rm /test_output/application_errors.txt`

			`clickhouse-client --query="SELECT 'Server version: ', version()"`

			`# Let the server run for a while before checking log.`
			`sleep 60`

			`stop`
			`mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.upgrade.log`

			`# Error messages (we should ignore some errors)`
			`# FIXME https://github.com/ClickHouse/ClickHouse/issues/38643 ("Unknown index: idx.")`
			`# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 ("Cannot parse string 'Hello' as UInt64")`
			`# FIXME Not sure if it's expected, but some tests from stress test may not be finished yet when we restarting server.`
			`# Let's just ignore all errors from queries ("} <Error> TCPHandler: Code:", "} <Error> executeQuery: Code:")`
			`# FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'")`
			`# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility`
			`echo "Check for Error messages in server log:"`
			`rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \`
			`-e "Code: 236. DB::Exception: Cancelled mutating parts" \`
			`-e "REPLICA_IS_ALREADY_ACTIVE" \`
			`-e "REPLICA_ALREADY_EXISTS" \`
			`-e "ALL_REPLICAS_LOST" \`
			`-e "DDLWorker: Cannot parse DDL task query" \`
			`-e "RaftInstance: failed to accept a rpc connection due to error 125" \`
			`-e "UNKNOWN_DATABASE" \`
			`-e "NETWORK_ERROR" \`
			`-e "UNKNOWN_TABLE" \`
			`-e "ZooKeeperClient" \`
			`-e "KEEPER_EXCEPTION" \`
			`-e "DirectoryMonitor" \`
			`-e "TABLE_IS_READ_ONLY" \`
			`-e "Code: 1000, e.code() = 111, Connection refused" \`
			`-e "UNFINISHED" \`
			`-e "NETLINK_ERROR" \`
			`-e "Renaming unexpected part" \`
			`-e "PART_IS_TEMPORARILY_LOCKED" \`
			`-e "and a merge is impossible: we didn't find" \`
			`-e "found in queue and some source parts for it was lost" \`
			`-e "is lost forever." \`
			`-e "Unknown index: idx." \`
			`-e "Cannot parse string 'Hello' as UInt64" \`
			`-e "} <Error> TCPHandler: Code:" \`
			`-e "} <Error> executeQuery: Code:" \`
			`-e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \`
			`-e "The set of parts restored in place of" \`
			`-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \`
			`-e "Code: 269. DB::Exception: Destination table is myself" \`
			`-e "Coordination::Exception: Connection loss" \`
			`-e "MutateFromLogEntryTask" \`
			`-e "No connection to ZooKeeper, cannot get shared table ID" \`
			`-e "Session expired" \`
			`-e "TOO_MANY_PARTS" \`
			`-e "Authentication failed" \`
Suppress "Cannot flush" for Distributed tables in upgrade check CI reports [1]: 2023.03.01 04:39:00.185189 [ 358842 ] {} <Error> StorageDistributed (dist_01555): Cannot flush: Code: 279. DB::NetException: All connection tries failed. Log: 2023.03.01 04:39:00.242066 [ 358842 ] {} <Error> StorageDistributed (dist_01555): Cannot flush: Code: 279. DB::NetException: All connection tries failed. Log: [1]: https://s3.amazonaws.com/clickhouse-test-reports/47042/0e0a24234aa3409bb7799cc9a4b7a72bb006a923/upgrade_check__asan_.html Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com> 2023-03-01 16:36:01 +00:00			`-e "Cannot flush" \`
Make better 2023-02-24 16:43:28 +00:00			`-e "Container already exists" \`
fix 2023-06-28 21:20:02 +00:00			`-e "doesn't have metadata version on disk" \`
suppress expected errors from test 01111 in Upgrade check 2023-03-08 14:27:43 +00:00			`clickhouse-server.upgrade.log \`
			`\| grep -av -e "_repl_01111_.*Mapping for table with UUID" \`
			`\| zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \`
Update run.sh 2023-03-03 12:02:05 +00:00			`&& echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/upgrade_error_messages.txt)" \`
Make better 2023-02-24 16:43:28 +00:00			`>> /test_output/test_results.tsv \`
			`\|\| echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv`

			`# Remove file upgrade_error_messages.txt if it's empty`
			`[ -s /test_output/upgrade_error_messages.txt ] \|\| rm /test_output/upgrade_error_messages.txt`

			`# Grep logs for sanitizer asserts, crashes and other critical errors`
			`check_logs_for_critical_errors`

			`tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination \|\|:`

			`collect_query_and_trace_logs`

			`mv /var/log/clickhouse-server/stderr.log /test_output/`
Fix 2023-01-24 16:45:07 +00:00
Split stress test and backward compatibility check 2023-01-03 19:22:31 +00:00			`# Write check result into check_status.tsv`
Merge branch 'master' of github.com:ClickHouse/ClickHouse into split-bc-check 2023-02-09 13:24:19 +00:00			`# Try to choose most specific error for the whole check status`
Make better 2023-02-24 16:43:28 +00:00			`clickhouse-local --structure "test String, res String, time Nullable(Float32), desc String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by`
Merge branch 'master' of github.com:ClickHouse/ClickHouse into split-bc-check 2023-02-09 13:24:19 +00:00			`(test like '%Sanitizer%') DESC,`
			`(test like '%Killed by signal%') DESC,`
			`(test like '%gdb.log%') DESC,`
			`(test ilike '%possible deadlock%') DESC,`
			`(test like '%start%') DESC,`
			`(test like '%dmesg%') DESC,`
			`(test like '%OOM%') DESC,`
			`(test like '%Signal 9%') DESC,`
			`(test like '%Fatal message%') DESC,`
			`(test like '%Error message%') DESC,`
			`(test like '%previous release%') DESC,`
			`rowNumberInAllBlocks()`
Make better 2023-02-24 16:43:28 +00:00			`LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv \|\| echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv`
Split stress test and backward compatibility check 2023-01-03 19:22:31 +00:00			`[ -s /test_output/check_status.tsv ] \|\| echo -e "success\tNo errors found" > /test_output/check_status.tsv`

Update 2023-02-27 15:42:29 +00:00			`collect_core_dumps`