ClickHouse/tests/docker_scripts/stateless_runner.sh

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

488 lines
22 KiB
Bash
Raw Normal View History

2020-07-08 08:41:39 +00:00
#!/bin/bash
2024-08-09 14:22:33 +00:00
# fail on errors, verbose and export all env variables
set -e -x -a
2023-08-16 20:53:51 +00:00
# shellcheck disable=SC1091
source /setup_export_logs.sh
# shellcheck source=../stateless/stress_tests.lib
2024-08-09 14:22:33 +00:00
source /repo/tests/docker_scripts/stress_tests.lib
# Avoid overlaps with previous runs
dmesg --clear
2020-10-21 20:11:35 +00:00
# fail on errors, verbose and export all env variables
set -e -x -a
2020-07-08 08:41:39 +00:00
USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0}
USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0}
# Choose random timezone for this test run.
#
# NOTE: that clickhouse-test will randomize session_timezone by itself as well
# (it will choose between default server timezone and something specific).
2023-01-01 19:53:06 +00:00
TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)"
2024-07-05 09:57:54 +00:00
echo "Chosen random timezone $TZ"
2021-01-06 04:20:33 +00:00
ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
2020-07-08 08:41:39 +00:00
dpkg -i package_folder/clickhouse-common-static_*.deb
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
2024-07-05 09:57:54 +00:00
dpkg -i package_folder/clickhouse-odbc-bridge_*.deb
dpkg -i package_folder/clickhouse-library-bridge_*.deb
2020-07-08 08:41:39 +00:00
dpkg -i package_folder/clickhouse-server_*.deb
dpkg -i package_folder/clickhouse-client_*.deb
2022-02-15 12:03:51 +00:00
echo "$BUGFIX_VALIDATE_CHECK"
2023-11-12 01:45:25 +00:00
# Check that the tools are available under short names
if [[ -z "$BUGFIX_VALIDATE_CHECK" ]]; then
ch --query "SELECT 1" || exit 1
chl --query "SELECT 1" || exit 1
chc --version || exit 1
fi
2023-11-12 01:45:25 +00:00
2024-08-09 14:22:33 +00:00
ln -sf /repo/tests/clickhouse-test /usr/bin/clickhouse-test
export CLICKHOUSE_GRPC_CLIENT="/repo/utils/grpc-client/clickhouse-grpc-client.py"
2020-07-08 08:41:39 +00:00
2023-06-06 10:23:00 +00:00
# shellcheck disable=SC1091
2024-08-09 14:22:33 +00:00
source /repo/tests/docker_scripts/attach_gdb.lib
2023-06-05 18:21:40 +00:00
2023-06-30 14:41:27 +00:00
# shellcheck disable=SC1091
2024-08-09 14:22:33 +00:00
source /repo/tests/docker_scripts/utils.lib
2023-06-30 14:41:27 +00:00
2020-09-24 08:18:36 +00:00
# install test configs
2024-08-09 14:22:33 +00:00
/repo/tests/config/install.sh
2020-07-08 08:41:39 +00:00
2024-08-09 14:22:33 +00:00
/repo/tests/docker_scripts/setup_minio.sh stateless
2024-07-10 18:41:48 +00:00
2024-08-09 14:22:33 +00:00
/repo/tests/docker_scripts/setup_hdfs_minicluster.sh
2021-08-26 13:12:42 +00:00
2023-08-16 20:53:51 +00:00
config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
2023-08-06 02:38:04 +00:00
if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; then
function remove_keeper_config()
{
2024-03-19 16:53:32 +00:00
sudo sed -i "/<$1>$2<\/$1>/d" /etc/clickhouse-server/config.d/keeper_port.xml
}
2024-09-20 07:50:45 +00:00
remove_keeper_config "remove_recursive" "[[:digit:]]\+"
fi
2024-08-04 21:59:01 +00:00
export IS_FLAKY_CHECK=0
# Export NUM_TRIES so python scripts will see its value as env variable
export NUM_TRIES
# For flaky check we also enable thread fuzzer
if [ "$NUM_TRIES" -gt "1" ]; then
2024-08-04 21:59:01 +00:00
export IS_FLAKY_CHECK=1
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
2024-03-14 22:02:09 +00:00
export THREAD_FUZZER_SLEEP_TIME_US_MAX=100000
2020-12-21 20:24:16 +00:00
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
2024-03-14 22:02:09 +00:00
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US_MAX=10000
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US_MAX=10000
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US_MAX=10000
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US_MAX=10000
2020-12-21 20:24:16 +00:00
mkdir -p /var/run/clickhouse-server
fi
2020-07-08 08:41:39 +00:00
# simplest way to forward env variables to server
sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
2024-03-19 16:53:32 +00:00
sudo sed -i "s|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches/</filesystem_caches_path>|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches_1/</filesystem_caches_path>|" /etc/clickhouse-server1/config.d/filesystem_caches_path.xml
sudo sed -i "s|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches/</filesystem_caches_path>|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches_2/</filesystem_caches_path>|" /etc/clickhouse-server2/config.d/filesystem_caches_path.xml
sudo sed -i "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_1/</custom_cached_disks_base_directory>|" /etc/clickhouse-server1/config.d/filesystem_caches_path.xml
sudo sed -i "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_2/</custom_cached_disks_base_directory>|" /etc/clickhouse-server2/config.d/filesystem_caches_path.xml
mkdir -p /var/run/clickhouse-server1
sudo chown clickhouse:clickhouse /var/run/clickhouse-server1
2021-03-18 12:49:31 +00:00
sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server1/config.xml --daemon \
--pid-file /var/run/clickhouse-server1/clickhouse-server.pid \
2021-03-18 12:49:31 +00:00
-- --path /var/lib/clickhouse1/ --logger.stderr /var/log/clickhouse-server/stderr1.log \
--logger.log /var/log/clickhouse-server/clickhouse-server1.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server1.err.log \
--tcp_port 19000 --tcp_port_secure 19440 --http_port 18123 --https_port 18443 --interserver_http_port 19009 --tcp_with_proxy_port 19010 \
2021-04-12 18:40:34 +00:00
--mysql_port 19004 --postgresql_port 19005 \
2021-03-29 20:04:50 +00:00
--keeper_server.tcp_port 19181 --keeper_server.server_id 2 \
--prometheus.port 19988 \
--macros.replica r2 # It doesn't work :(
2021-03-13 10:22:48 +00:00
mkdir -p /var/run/clickhouse-server2
sudo chown clickhouse:clickhouse /var/run/clickhouse-server2
2021-03-18 12:49:31 +00:00
sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server2/config.xml --daemon \
--pid-file /var/run/clickhouse-server2/clickhouse-server.pid \
2021-03-18 12:49:31 +00:00
-- --path /var/lib/clickhouse2/ --logger.stderr /var/log/clickhouse-server/stderr2.log \
--logger.log /var/log/clickhouse-server/clickhouse-server2.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server2.err.log \
2021-03-13 10:22:48 +00:00
--tcp_port 29000 --tcp_port_secure 29440 --http_port 28123 --https_port 28443 --interserver_http_port 29009 --tcp_with_proxy_port 29010 \
2021-04-12 18:40:34 +00:00
--mysql_port 29004 --postgresql_port 29005 \
2021-03-29 20:04:50 +00:00
--keeper_server.tcp_port 29181 --keeper_server.server_id 3 \
--prometheus.port 29988 \
2021-03-13 10:22:48 +00:00
--macros.shard s2 # It doesn't work :(
2020-07-08 08:41:39 +00:00
fi
if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \
| sed "s|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches/</filesystem_caches_path>|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches_1/</filesystem_caches_path>|" \
> /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp
mv /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server1/config.d/filesystem_caches_path.xml
sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \
| sed "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_1/</custom_cached_disks_base_directory>|" \
> /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp
mv /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server1/config.d/filesystem_caches_path.xml
mkdir -p /var/run/clickhouse-server1
sudo chown clickhouse:clickhouse /var/run/clickhouse-server1
sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server1/config.xml --daemon \
--pid-file /var/run/clickhouse-server1/clickhouse-server.pid \
-- --path /var/lib/clickhouse1/ --logger.stderr /var/log/clickhouse-server/stderr1.log \
--logger.log /var/log/clickhouse-server/clickhouse-server1.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server1.err.log \
--tcp_port 19000 --tcp_port_secure 19440 --http_port 18123 --https_port 18443 --interserver_http_port 19009 --tcp_with_proxy_port 19010 \
--mysql_port 19004 --postgresql_port 19005 \
--keeper_server.tcp_port 19181 --keeper_server.server_id 2 \
--prometheus.port 19988 \
--macros.replica r2 # It doesn't work :(
fi
2023-08-06 02:38:04 +00:00
2023-08-10 02:32:39 +00:00
# Wait for the server to start, but not for too long.
for _ in {1..100}
2023-08-06 02:38:04 +00:00
do
clickhouse-client --query "SELECT 1" && break
sleep 1
done
2023-08-16 20:53:51 +00:00
setup_logs_replication
2024-08-01 17:50:13 +00:00
attach_gdb_to_clickhouse
2023-06-02 15:00:24 +00:00
2024-08-07 18:13:50 +00:00
# create tables for minio log webhooks
2024-08-07 15:00:41 +00:00
clickhouse-client --query "CREATE TABLE minio_audit_logs
(
log String,
2024-08-09 10:56:53 +00:00
event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '\"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC')
2024-08-07 15:00:41 +00:00
)
ENGINE = MergeTree
ORDER BY tuple()"
clickhouse-client --query "CREATE TABLE minio_server_logs
(
log String,
2024-08-09 10:56:53 +00:00
event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '\"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC')
2024-08-07 15:00:41 +00:00
)
ENGINE = MergeTree
ORDER BY tuple()"
2024-08-07 18:13:50 +00:00
# create minio log webhooks for both audit and server logs
2024-08-08 15:12:27 +00:00
# use async inserts to avoid creating too many parts
2024-08-10 16:58:28 +00:00
./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500
./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500
2024-08-12 06:32:14 +00:00
2024-08-07 18:13:50 +00:00
max_retries=100
retry=1
while [ $retry -le $max_retries ]; do
echo "clickminio restart attempt $retry:"
2024-08-12 06:32:14 +00:00
output=$(./mc admin service restart clickminio --wait --json 2>&1 | jq -r .status)
echo "Output of restart status: $output"
2024-08-07 18:13:50 +00:00
2024-08-12 06:32:14 +00:00
expected_output="success
success"
if [ "$output" = "$expected_output" ]; then
2024-08-07 18:13:50 +00:00
echo "Restarted clickminio successfully."
break
fi
sleep 1
retry=$((retry + 1))
done
if [ $retry -gt $max_retries ]; then
echo "Failed to restart clickminio after $max_retries attempts."
fi
./mc admin trace clickminio > /test_output/minio.log &
MC_ADMIN_PID=$!
2024-08-07 15:00:41 +00:00
2023-07-02 09:52:50 +00:00
function fn_exists() {
declare -F "$1" > /dev/null;
}
# FIXME: to not break old builds, clean on 2023-09-01
function try_run_with_retry() {
local total_retries="$1"
shift
if fn_exists run_with_retry; then
run_with_retry "$total_retries" "$@"
else
"$@"
fi
}
function run_tests()
{
2021-03-16 16:39:31 +00:00
set -x
# We can have several additional options so we pass them as array because it is more ideologically correct.
2020-10-22 14:33:23 +00:00
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
HIGH_LEVEL_COVERAGE=YES
# Use random order in flaky check
if [ "$NUM_TRIES" -gt "1" ]; then
2021-03-29 18:24:29 +00:00
ADDITIONAL_OPTIONS+=('--order=random')
HIGH_LEVEL_COVERAGE=NO
fi
if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--s3-storage')
fi
2024-04-15 16:52:17 +00:00
if [[ -n "$USE_AZURE_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then
# to disable the same tests
2024-07-02 12:15:59 +00:00
ADDITIONAL_OPTIONS+=('--azure-blob-storage')
2024-04-15 16:52:17 +00:00
# azurite is slow, but with these two settings it can be super slow
ADDITIONAL_OPTIONS+=('--no-random-settings')
ADDITIONAL_OPTIONS+=('--no-random-merge-tree-settings')
fi
if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--shared-catalog')
fi
2024-07-29 10:57:36 +00:00
if [[ "$USE_DISTRIBUTED_CACHE" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--distributed-cache')
fi
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
2021-02-15 10:26:34 +00:00
ADDITIONAL_OPTIONS+=('--replicated-database')
2024-02-09 02:55:39 +00:00
# Too many tests fail for DatabaseReplicated in parallel.
ADDITIONAL_OPTIONS+=('--jobs')
ADDITIONAL_OPTIONS+=('3')
2024-02-10 01:27:55 +00:00
elif [[ 1 == $(clickhouse-client --query "SELECT value LIKE '%SANITIZE_COVERAGE%' FROM system.build_options WHERE name = 'CXX_FLAGS'") ]]; then
2024-02-09 02:55:39 +00:00
# Coverage on a per-test basis could only be collected sequentially.
# Do not set the --jobs parameter.
echo "Running tests with coverage collection."
else
2024-02-09 02:55:39 +00:00
# All other configurations are OK.
ADDITIONAL_OPTIONS+=('--jobs')
ADDITIONAL_OPTIONS+=('8')
2021-02-15 10:26:34 +00:00
fi
if [[ -n "$RUN_BY_HASH_NUM" ]] && [[ -n "$RUN_BY_HASH_TOTAL" ]]; then
ADDITIONAL_OPTIONS+=('--run-by-hash-num')
ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_NUM")
ADDITIONAL_OPTIONS+=('--run-by-hash-total')
ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_TOTAL")
HIGH_LEVEL_COVERAGE=NO
fi
2022-06-23 07:59:13 +00:00
if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--db-engine=Ordinary')
fi
if [[ "${HIGH_LEVEL_COVERAGE}" = "YES" ]]; then
ADDITIONAL_OPTIONS+=('--report-coverage')
fi
2023-01-17 19:04:25 +00:00
ADDITIONAL_OPTIONS+=('--report-logs-stats')
2023-07-02 09:52:50 +00:00
try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')"
Add zookeeper name in endpoint id (#49780) * Add zookeeper name in endpoint id When we migrate a replicated table from one zookeeper cluster to another (the reason why we migration is that zookeeper's load is too high), we will create a new table with the same zpath, but it will fail and the old table will be in trouble. Here is some infomation: 1.old table: CREATE TABLE a1 (`id` UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/default/a1/{shard}', '{replica}') ORDER BY (id); 2.new table: CREATE TABLE a2 (`id` UInt64) ENGINE = ReplicatedMergeTree('aux1:/clickhouse/tables/default/a1/{shard}', '{replica}') ORDER BY (id); 3.error info: <Error> executeQuery: Code: 220. DB::Exception: Duplicate interserver IO endpoint: DataPartsExchange:/clickhouse/tables/default/a1/01/replicas/02. (DUPLICATE_INTERSERVER_IO_ENDPOINT) <Error> InterserverIOHTTPHandler: Code: 221. DB::Exception: No interserver IO endpoint named DataPartsExchange:/clickhouse/tables/default/a1/01/replicas/02. (NO_SUCH_INTERSERVER_IO_ENDPOINT) * Revert "Add zookeeper name in endpoint id" This reverts commit 9deb75b249619b7abdd38e3949ca8b3a76c9df8e. * Add zookeeper name in endpoint id When we migrate a replicated table from one zookeeper cluster to another (the reason why we migration is that zookeeper's load is too high), we will create a new table with the same zpath, but it will fail and the old table will be in trouble. * Fix incompatible with a new setting * add a test, fix other issues * Update 02442_auxiliary_zookeeper_endpoint_id.sql * Update 02735_system_zookeeper_connection.reference * Update 02735_system_zookeeper_connection.sql * Update run.sh * Remove the 'no-fasttest' tag * Update 02442_auxiliary_zookeeper_endpoint_id.sql --------- Co-authored-by: Alexander Tokmakov <tavplubix@clickhouse.com> Co-authored-by: Alexander Tokmakov <tavplubix@gmail.com>
2023-05-25 09:50:14 +00:00
set +e
TEST_ARGS=(
--testname
--shard
--zookeeper
--check-zookeeper-session
--hung-check
--print-time
--no-drop-if-fail
--capture-client-stacktrace
2024-08-09 14:22:33 +00:00
--queries "/repo/tests/queries"
--test-runs "$NUM_TRIES"
"${ADDITIONAL_OPTIONS[@]}"
)
clickhouse-test "${TEST_ARGS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt
set -e
}
export -f run_tests
if [ "$NUM_TRIES" -gt "1" ]; then
# We don't run tests with Ordinary database in PRs, only in master.
# So run new/changed tests with Ordinary at least once in flaky check.
2024-08-22 13:50:59 +00:00
NUM_TRIES=1 USE_DATABASE_ORDINARY=1 run_tests \
2024-07-19 22:04:52 +00:00
| sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||:
fi
run_tests ||:
2021-01-14 10:42:51 +00:00
2021-12-10 16:38:20 +00:00
echo "Files in current directory"
ls -la ./
echo "Files in root directory"
ls -la /
2024-08-09 14:22:33 +00:00
/repo/tests/docker_scripts/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
2021-02-20 20:04:24 +00:00
2021-03-30 10:49:43 +00:00
clickhouse-client -q "system flush logs" ||:
2021-03-29 11:47:57 +00:00
# stop logs replication to make it possible to dump logs tables via clickhouse-local
stop_logs_replication
# Try to get logs while server is running
2024-01-26 20:22:39 +00:00
failed_to_save_logs=0
for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log
do
if ! clickhouse-client -q "select * from system.$table into outfile '/test_output/$table.tsv.zst' format TSVWithNamesAndTypes"; then
failed_to_save_logs=1
fi
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
if ! clickhouse-client --port 19000 -q "select * from system.$table into outfile '/test_output/$table.1.tsv.zst' format TSVWithNamesAndTypes"; then
failed_to_save_logs=1
fi
if ! clickhouse-client --port 29000 -q "select * from system.$table into outfile '/test_output/$table.2.tsv.zst' format TSVWithNamesAndTypes"; then
failed_to_save_logs=1
fi
fi
if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
if ! clickhouse-client --port 29000 -q "select * from system.$table into outfile '/test_output/$table.2.tsv.zst' format TSVWithNamesAndTypes"; then
failed_to_save_logs=1
fi
fi
done
2024-08-07 15:00:41 +00:00
# collect minio audit and server logs
2024-08-10 16:58:28 +00:00
# wait for minio to flush its batch if it has any
sleep 1
2024-08-09 07:44:30 +00:00
clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE"
2024-08-14 02:54:33 +00:00
clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_bytes 0 --max_result_rows 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow"
clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_bytes 0 --max_result_rows 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow"
2024-08-07 15:00:41 +00:00
# Stop server so we can safely read data with clickhouse-local.
# Why do we read data with clickhouse-local?
# Because it's the simplest way to read it when server has crashed.
sudo clickhouse stop ||:
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||:
sudo clickhouse stop --pid-path /var/run/clickhouse-server2 ||:
2022-05-03 22:10:40 +00:00
fi
if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||:
fi
2024-07-10 18:41:48 +00:00
# Kill minio admin client to stop collecting logs
kill $MC_ADMIN_PID
2023-02-14 12:12:53 +00:00
rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server.log ||:
2023-03-23 14:36:17 +00:00
rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||:
2023-01-01 20:17:43 +00:00
zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst &
data_path_config="--path=/var/lib/clickhouse/"
if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then
# We need s3 storage configuration (but it's more likely that clickhouse-local will fail for some reason)
data_path_config="--config-file=/etc/clickhouse-server/config.xml"
fi
# If server crashed dump system logs with clickhouse-local
2024-01-26 20:22:39 +00:00
if [ $failed_to_save_logs -ne 0 ]; then
# Compress tables.
#
# NOTE:
# - that due to tests with s3 storage we cannot use /var/lib/clickhouse/data
# directly
# - even though ci auto-compress some files (but not *.tsv) it does this only
# for files >64MB, we want this files to be compressed explicitly
for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log
do
2024-01-31 12:44:16 +00:00
clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
2024-01-31 12:44:16 +00:00
clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||:
fi
if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
fi
done
fi
# Also export trace log in flamegraph-friendly format.
for trace_type in CPU Memory Real
do
clickhouse-local "$data_path_config" --only-system-tables -q "
select
arrayStringConcat((arrayMap(x -> concat(splitByChar('/', addressToLine(x))[-1], '#', demangle(addressToSymbol(x)) ), trace)), ';') AS stack,
count(*) AS samples
from system.trace_log
2021-04-23 20:03:53 +00:00
where trace_type = '$trace_type'
group by trace
order by samples desc
settings allow_introspection_functions = 1
format TabSeparated" \
2023-01-01 20:17:43 +00:00
| zstd --threads=0 > "/test_output/trace-log-$trace_type-flamegraph.tsv.zst" ||:
done
# Grep logs for sanitizer asserts, crashes and other critical errors
check_logs_for_critical_errors
2021-03-29 11:47:57 +00:00
# Compressed (FIXME: remove once only github actions will be left)
rm /var/log/clickhouse-server/clickhouse-server.log
2021-02-22 13:53:43 +00:00
mv /var/log/clickhouse-server/stderr.log /test_output/ ||:
if [[ -n "$WITH_COVERAGE" ]] && [[ "$WITH_COVERAGE" -eq 1 ]]; then
2023-01-01 20:17:43 +00:00
tar --zstd -chf /test_output/clickhouse_coverage.tar.zst /profraw ||:
2021-02-22 13:53:43 +00:00
fi
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
rm -rf /var/lib/clickhouse/data/system/*/
tar -chf /test_output/store.tar /var/lib/clickhouse/store ||:
tar -chf /test_output/metadata.tar /var/lib/clickhouse/metadata/*.sql ||:
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
2023-02-14 12:12:53 +00:00
rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server1.log ||:
rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server2.log ||:
2023-01-01 20:17:43 +00:00
zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.zst ||:
zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server2.log > /test_output/clickhouse-server2.log.zst ||:
2021-03-18 12:49:31 +00:00
mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:
mv /var/log/clickhouse-server/stderr2.log /test_output/ ||:
tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||:
tar -chf /test_output/coordination2.tar /var/lib/clickhouse2/coordination ||:
fi
if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
rg -Fa "<Fatal>" /var/log/clickhouse-server/clickhouse-server1.log ||:
zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.zst ||:
mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:
tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||:
fi
2024-07-09 15:06:37 +00:00
collect_core_dumps