2020-07-14 14:47:23 +00:00
#!/bin/bash
2021-06-28 11:28:49 +00:00
# shellcheck disable=SC2094
2021-06-28 13:21:17 +00:00
# shellcheck disable=SC2086
2020-07-14 14:47:23 +00:00
2020-07-15 09:23:50 +00:00
set -x
2021-08-10 20:49:05 +00:00
# Thread Fuzzer allows to check more permutations of possible thread scheduling
# and find more potential issues.
export THREAD_FUZZER_CPU_TIME_PERIOD_US = 1000
export THREAD_FUZZER_SLEEP_PROBABILITY = 0.1
export THREAD_FUZZER_SLEEP_TIME_US = 100000
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY = 1
export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY = 1
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY = 1
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY = 1
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY = 0.001
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY = 0.001
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY = 0.001
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY = 0.001
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US = 10000
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US = 10000
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US = 10000
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US = 10000
2020-07-14 14:47:23 +00:00
dpkg -i package_folder/clickhouse-common-static_*.deb
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
dpkg -i package_folder/clickhouse-server_*.deb
dpkg -i package_folder/clickhouse-client_*.deb
dpkg -i package_folder/clickhouse-test_*.deb
2021-02-15 18:02:21 +00:00
function configure( )
2020-08-24 00:14:24 +00:00
{
2021-02-15 18:02:21 +00:00
# install test configs
/usr/share/clickhouse-test/config/install.sh
2021-11-16 17:03:50 +00:00
2021-11-16 14:45:37 +00:00
# avoid too slow startup
2021-11-16 17:03:50 +00:00
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml | sed "s|<snapshot_distance>100000</snapshot_distance>|<snapshot_distance>10000</snapshot_distance>|" > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
2020-08-24 00:14:24 +00:00
2021-02-15 18:02:21 +00:00
# for clickhouse-server (via service)
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
# for clickhouse-client
export ASAN_OPTIONS = 'malloc_context_size=10 allocator_release_to_os_interval_ms=10000'
# since we run clickhouse from root
sudo chown root: /var/lib/clickhouse
2021-04-24 00:27:23 +00:00
# Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM).
2021-10-25 18:15:42 +00:00
echo "<clickhouse><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></clickhouse>" \
2021-04-24 00:27:23 +00:00
> /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml
2021-12-06 06:05:34 +00:00
local total_mem
total_mem = $( awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB
total_mem = $(( total_mem*1024 )) # bytes
2021-04-24 00:27:23 +00:00
# Set maximum memory usage as half of total memory (less chance of OOM).
2021-12-06 06:05:34 +00:00
#
# But not via max_server_memory_usage but via max_memory_usage_for_user,
# so that we can override this setting and execute service queries, like:
# - hung check
# - show/drop database
# - ...
#
# So max_memory_usage_for_user will be a soft limit, and
# max_server_memory_usage will be hard limit, and queries that should be
# executed regardless memory limits will use max_memory_usage_for_user=0,
# instead of relying on max_untracked_memory
local max_server_mem
max_server_mem = $(( total_mem*75/100)) # 75%
echo " Setting max_server_memory_usage= $max_server_mem "
cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml <<EOL
<clickhouse>
<max_server_memory_usage>${ max_server_mem } </max_server_memory_usage>
</clickhouse>
EOL
local max_users_mem
max_users_mem = $(( total_mem*50/100)) # 50%
echo " Setting max_memory_usage_for_user= $max_users_mem "
cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml <<EOL
<clickhouse>
<profiles>
<default>
<max_memory_usage_for_user>${ max_users_mem } </max_memory_usage_for_user>
</default>
</profiles>
</clickhouse>
EOL
2021-02-15 18:02:21 +00:00
}
2020-08-24 00:14:24 +00:00
function stop( )
{
2021-02-14 20:31:58 +00:00
clickhouse stop
2020-08-24 00:14:24 +00:00
}
function start( )
2020-07-14 14:47:23 +00:00
{
2021-06-27 15:41:25 +00:00
# Rename existing log file - it will be more convenient to read separate files for separate server runs.
if [ -f '/var/log/clickhouse-server/clickhouse-server.log' ]
then
log_file_counter = 1
while [ -f " /var/log/clickhouse-server/clickhouse-server.log. ${ log_file_counter } " ]
do
log_file_counter = $(( log_file_counter + 1 ))
done
mv '/var/log/clickhouse-server/clickhouse-server.log' " /var/log/clickhouse-server/clickhouse-server.log. ${ log_file_counter } "
fi
2020-07-14 14:47:23 +00:00
counter = 0
until clickhouse-client --query "SELECT 1"
do
2021-08-18 14:57:36 +00:00
if [ " $counter " -gt 240 ]
2020-07-14 14:47:23 +00:00
then
2020-08-18 09:43:02 +00:00
echo "Cannot start clickhouse-server"
cat /var/log/clickhouse-server/stdout.log
2020-08-23 20:48:27 +00:00
tail -n1000 /var/log/clickhouse-server/stderr.log
2021-08-28 16:19:21 +00:00
tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | grep -F -v -e '<Warning> RaftInstance:' -e '<Information> RaftInstance' | tail -n1000
2020-07-14 14:47:23 +00:00
break
fi
2021-02-14 20:31:58 +00:00
# use root to match with current uid
2021-07-16 07:46:22 +00:00
clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log
2020-07-14 14:47:23 +00:00
sleep 0.5
2020-09-30 17:06:14 +00:00
counter = $(( counter + 1 ))
2020-07-14 14:47:23 +00:00
done
2021-02-13 08:41:00 +00:00
echo "
2021-06-28 07:03:38 +00:00
set follow-fork-mode child
2021-02-13 08:41:00 +00:00
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print
handle SIGABRT stop print
continue
thread apply all backtrace
2021-02-20 16:27:04 +00:00
detach
quit
2021-02-13 08:41:00 +00:00
" > script.gdb
2021-02-22 13:53:43 +00:00
# FIXME Hung check may work incorrectly because of attached gdb
# 1. False positives are possible
# 2. We cannot attach another gdb to get stacktraces if some queries hung
2021-02-20 16:27:04 +00:00
gdb -batch -command script.gdb -p " $( cat /var/run/clickhouse-server/clickhouse-server.pid) " >> /test_output/gdb.log &
2020-07-14 14:47:23 +00:00
}
2021-02-15 18:02:21 +00:00
configure
2020-07-14 14:47:23 +00:00
2020-08-23 21:13:21 +00:00
start
2020-07-14 14:47:23 +00:00
2020-10-01 09:27:05 +00:00
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
2021-11-01 10:32:56 +00:00
/s3downloader --url-prefix " $S3_URL " --dataset-names $DATASETS
2020-07-14 14:47:23 +00:00
chmod 777 -R /var/lib/clickhouse
clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary"
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test"
2020-08-04 08:48:47 +00:00
2020-08-23 21:13:21 +00:00
stop
start
2020-07-14 14:47:23 +00:00
clickhouse-client --query "SHOW TABLES FROM datasets"
clickhouse-client --query "SHOW TABLES FROM test"
clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
clickhouse-client --query "SHOW TABLES FROM test"
2021-06-03 15:16:12 +00:00
./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests " $SKIP_TESTS_OPTION " \
2021-02-18 22:08:44 +00:00
&& echo -e 'Test script exit code\tOK' >> /test_output/test_results.tsv \
|| echo -e 'Test script failed\tFAIL' >> /test_output/test_results.tsv
2020-07-14 14:47:23 +00:00
2020-08-23 21:13:21 +00:00
stop
start
2020-07-14 14:47:23 +00:00
2021-02-18 22:08:44 +00:00
clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \
|| echo -e 'Server failed to start\tFAIL' >> /test_output/test_results.tsv
[ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL"
[ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL"
# Print Fatal log messages to stdout
2021-07-15 07:24:35 +00:00
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log*
2021-02-18 22:08:44 +00:00
# Grep logs for sanitizer asserts, crashes and other critical errors
# Sanitizer asserts
2021-11-30 10:24:04 +00:00
grep -Fa "==================" /var/log/clickhouse-server/stderr.log | grep -v "in query:" >> /test_output/tmp
2021-11-30 10:22:50 +00:00
grep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
2021-07-16 07:46:22 +00:00
zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \
2021-02-18 22:08:44 +00:00
&& echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No sanitizer asserts\tOK' >> /test_output/test_results.tsv
rm -f /test_output/tmp
2021-04-09 06:39:25 +00:00
# OOM
2021-07-15 07:24:35 +00:00
zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
2021-04-09 06:39:25 +00:00
&& echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
2021-02-18 22:08:44 +00:00
# Logical errors
2021-07-15 07:24:35 +00:00
zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
2021-02-18 22:08:44 +00:00
&& echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv
# Crash
2021-07-15 07:24:35 +00:00
zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
2021-02-18 22:08:44 +00:00
&& echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv
2021-04-09 06:39:25 +00:00
# It also checks for crash without stacktrace (printed by watchdog)
2021-07-15 07:24:35 +00:00
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
2021-02-18 22:08:44 +00:00
&& echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
zgrep -Fa "########################################" /test_output/* > /dev/null \
&& echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv
2021-02-19 09:57:09 +00:00
# Put logs into /test_output/
2021-06-27 15:41:25 +00:00
for log_file in /var/log/clickhouse-server/clickhouse-server.log*
do
2021-06-28 22:29:14 +00:00
pigz < " ${ log_file } " > /test_output/" $( basename ${ log_file } ) " .gz
2021-11-19 18:17:47 +00:00
# FIXME: remove once only github actions will be left
rm " ${ log_file } "
2021-06-27 15:41:25 +00:00
done
2021-03-07 14:44:30 +00:00
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination || :
2021-02-19 09:57:09 +00:00
mv /var/log/clickhouse-server/stderr.log /test_output/
2021-10-23 16:58:10 +00:00
# Replace the engine with Ordinary to avoid extra symlinks stuff in artifacts.
# (so that clickhouse-local --path can read it w/o extra care).
sed -i -e "s/ATTACH DATABASE _ UUID '[^']*'/ATTACH DATABASE system/" -e "s/Atomic/Ordinary/" /var/lib/clickhouse/metadata/system.sql
for table in query_log trace_log; do
sed -i " s/ATTACH TABLE _ UUID '[^']*'/ATTACH TABLE $table / " /var/lib/clickhouse/metadata/system/${ table } .sql
tar -chf /test_output/${ table } _dump.tar /var/lib/clickhouse/metadata/system.sql /var/lib/clickhouse/metadata/system/${ table } .sql /var/lib/clickhouse/data/system/${ table } || :
done
2021-02-19 09:57:09 +00:00
2021-02-18 22:08:44 +00:00
# Write check result into check_status.tsv
2021-02-25 16:11:43 +00:00
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%') LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
2021-02-19 19:39:42 +00:00
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv