ClickHouse/docker/test/fuzzer/run-fuzzer.sh

268 lines
8.7 KiB
Bash
Raw Normal View History

2020-07-09 11:21:23 +00:00
#!/bin/bash
2021-01-16 20:47:01 +00:00
# shellcheck disable=SC2086
2020-07-10 11:11:31 +00:00
set -eux
2020-07-09 11:21:23 +00:00
set -o pipefail
trap "exit" INT TERM
2021-03-15 18:45:57 +00:00
# The watchdog is in the separate process group, so we have to kill it separately
# if the script terminates earlier.
trap 'kill $(jobs -pr) ${watchdog_pid:-} ||:' EXIT
2020-07-09 11:21:23 +00:00
stage=${stage:-}
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
2020-07-10 11:11:31 +00:00
echo "$script_dir"
repo_dir=ch
2021-02-15 06:31:35 +00:00
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-12_debug_none_bundled_unsplitted_disable_False_binary"}
2020-07-09 11:21:23 +00:00
function clone
{
# The download() function is dependent on CI binaries anyway, so we can take
# the repo from the CI as well. For local runs, start directly from the "fuzz"
# stage.
2020-07-09 11:21:23 +00:00
rm -rf ch ||:
2021-03-18 02:09:38 +00:00
mkdir ch ||:
2021-03-15 18:45:57 +00:00
wget -nv -nd -c "https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/repo/clickhouse_no_subs.tar.gz"
2021-03-17 21:22:36 +00:00
tar -C ch --strip-components=1 -xf clickhouse_no_subs.tar.gz
ls -lath ||:
2020-07-09 11:21:23 +00:00
}
function download
{
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse" &
2021-03-15 18:45:57 +00:00
wget -nv -nd -c "https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/repo/ci-changed-files.txt" &
wait
2020-07-09 18:30:22 +00:00
chmod +x clickhouse
2020-07-16 18:04:16 +00:00
ln -s ./clickhouse ./clickhouse-server
ln -s ./clickhouse ./clickhouse-client
# clickhouse-server is in the current dir
export PATH="$PWD:$PATH"
2020-07-09 11:21:23 +00:00
}
function configure
{
2020-07-10 11:11:31 +00:00
rm -rf db ||:
2020-07-09 11:21:23 +00:00
mkdir db ||:
cp -av --dereference "$repo_dir"/programs/server/config* db
cp -av --dereference "$repo_dir"/programs/server/user* db
2020-07-17 18:41:33 +00:00
# TODO figure out which ones are needed
cp -av --dereference "$repo_dir"/tests/config/config.d/listen.xml db/config.d
cp -av --dereference "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d
2020-07-09 11:21:23 +00:00
}
2020-07-10 11:11:31 +00:00
function watchdog
{
sleep 3600
echo "Fuzzing run has timed out"
2020-07-16 18:04:16 +00:00
killall clickhouse-client ||:
2020-09-30 17:06:14 +00:00
for _ in {1..10}
2020-07-16 18:04:16 +00:00
do
if ! pgrep -f clickhouse-client
then
break
fi
sleep 1
done
2020-07-20 11:56:53 +00:00
killall -9 clickhouse-client ||:
2020-07-10 11:11:31 +00:00
}
function filter_exists
{
local path
for path in "$@"; do
if [ -e "$path" ]; then
echo "$path"
else
echo "'$path' does not exists" >&2
fi
done
}
2020-07-09 11:21:23 +00:00
function fuzz
{
2021-01-16 15:03:32 +00:00
# Obtain the list of newly added tests. They will be fuzzed in more extreme way than other tests.
# Don't overwrite the NEW_TESTS_OPT so that it can be set from the environment.
NEW_TESTS="$(grep -P 'tests/queries/0_stateless/.*\.sql' ci-changed-files.txt | sed -r -e 's!^!ch/!' | sort -R)"
# ci-changed-files.txt contains also files that has been deleted/renamed, filter them out.
NEW_TESTS="$(filter_exists $NEW_TESTS)"
2021-01-16 16:36:08 +00:00
if [[ -n "$NEW_TESTS" ]]
then
NEW_TESTS_OPT="${NEW_TESTS_OPT:---interleave-queries-file ${NEW_TESTS}}"
2021-01-18 15:24:05 +00:00
else
2021-03-15 18:45:57 +00:00
NEW_TESTS_OPT="${NEW_TESTS_OPT:-}"
2021-01-16 19:01:29 +00:00
fi
2021-01-16 15:03:32 +00:00
clickhouse-server --config-file db/config.xml -- --path db 2>&1 | tail -100000 > server.log &
2021-01-23 21:45:17 +00:00
2020-07-09 11:21:23 +00:00
server_pid=$!
kill -0 $server_pid
while ! clickhouse-client --query "select 1" && kill -0 $server_pid ; do echo . ; sleep 1 ; done
clickhouse-client --query "select 1"
2020-07-10 11:11:31 +00:00
kill -0 $server_pid
2020-07-09 11:21:23 +00:00
echo Server started
2021-01-23 21:45:17 +00:00
echo "
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print
continue
thread apply all backtrace
continue
" > script.gdb
2021-01-25 02:45:58 +00:00
gdb -batch -command script.gdb -p "$(pidof clickhouse-server)" &
2021-01-23 21:45:17 +00:00
2020-07-10 11:11:31 +00:00
fuzzer_exit_code=0
2021-01-06 01:56:10 +00:00
# SC2012: Use find instead of ls to better handle non-alphanumeric filenames. They are all alphanumeric.
# SC2046: Quote this to prevent word splitting. Actually I need word splitting.
# shellcheck disable=SC2012,SC2046
clickhouse-client --query-fuzzer-runs=1000 --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) $NEW_TESTS_OPT \
2021-01-15 15:06:51 +00:00
> >(tail -n 100000 > fuzzer.log) \
2020-07-10 11:11:31 +00:00
2>&1 \
|| fuzzer_exit_code=$?
2020-07-10 11:11:31 +00:00
echo "Fuzzer exit code is $fuzzer_exit_code"
2020-07-20 11:56:53 +00:00
clickhouse-client --query "select elapsed, query from system.processes" ||:
2020-07-20 11:56:53 +00:00
killall clickhouse-server ||:
2020-08-31 23:33:42 +00:00
for _ in {1..10}
2020-07-20 11:56:53 +00:00
do
if ! pgrep -f clickhouse-server
then
break
fi
sleep 1
done
killall -9 clickhouse-server ||:
2020-07-09 11:21:23 +00:00
}
case "$stage" in
"")
2021-01-16 15:03:32 +00:00
;& # Did you know? This is "fallthrough" in bash. https://stackoverflow.com/questions/12010686/case-statement-fallthrough
2020-07-09 11:21:23 +00:00
"clone")
time clone
2020-07-10 11:11:31 +00:00
if [ -v FUZZ_LOCAL_SCRIPT ]
then
# just fall through
echo Using the testing script from docker container
:
else
2020-07-10 14:16:16 +00:00
# Run the testing script from the repository
2020-07-10 11:11:31 +00:00
echo Using the testing script from the repository
export stage=download
2020-07-30 19:04:15 +00:00
time ch/docker/test/fuzzer/run-fuzzer.sh
2020-07-10 14:16:16 +00:00
# Keep the error code
2020-07-30 19:04:15 +00:00
exit $?
2020-07-10 11:11:31 +00:00
fi
;&
2020-07-09 11:21:23 +00:00
"download")
time download
;&
"configure")
time configure
;&
"fuzz")
2020-07-16 18:04:16 +00:00
# Start a watchdog that should kill the fuzzer on timeout.
# The shell won't kill the child sleep when we kill it, so we have to put it
# into a separate process group so that we can kill them all.
set -m
2020-07-10 11:11:31 +00:00
watchdog &
watchdog_pid=$!
2020-07-16 18:04:16 +00:00
set +m
# Check that the watchdog has started
kill -0 $watchdog_pid
2020-07-10 11:11:31 +00:00
fuzzer_exit_code=0
time fuzz || fuzzer_exit_code=$?
2020-07-16 18:04:16 +00:00
kill -- -$watchdog_pid ||:
2020-07-13 13:58:09 +00:00
# Debug
date
sleep 10
jobs
pstree -aspgT
2020-07-16 18:04:16 +00:00
# Make files with status and description we'll show for this check on Github
2020-07-29 17:04:56 +00:00
task_exit_code=$fuzzer_exit_code
if [ "$fuzzer_exit_code" == 143 ]
2020-07-16 18:04:16 +00:00
then
2020-07-29 17:04:56 +00:00
# SIGTERM -- the fuzzer was killed by timeout, which means a normal run.
2020-07-16 18:04:16 +00:00
echo "success" > status.txt
2020-07-30 17:07:10 +00:00
echo "OK" > description.txt
2020-07-29 17:04:56 +00:00
task_exit_code=0
elif [ "$fuzzer_exit_code" == 210 ]
2020-07-30 13:35:24 +00:00
then
2020-07-29 17:04:56 +00:00
# Lost connection to the server. This probably means that the server died
# with abort.
2020-07-16 18:04:16 +00:00
echo "failure" > status.txt
2021-04-08 11:53:52 +00:00
if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: AddressSanitizer:.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt
2020-07-16 18:04:16 +00:00
then
2021-01-28 01:09:47 +00:00
echo "Lost connection to server. See the logs." > description.txt
2020-07-16 18:04:16 +00:00
fi
2020-07-29 17:04:56 +00:00
else
# Something different -- maybe the fuzzer itself died? Don't grep the
# server log in this case, because we will find a message about normal
# server termination (Received signal 15), which is confusing.
echo "failure" > status.txt
2021-01-28 01:09:47 +00:00
echo "Fuzzer failed ($fuzzer_exit_code). See the logs." > description.txt
2020-07-16 18:04:16 +00:00
fi
2020-08-31 23:33:42 +00:00
;&
"report")
cat > report.html <<EOF ||:
<!DOCTYPE html>
<html lang="en">
<link rel="preload" as="font" href="https://yastatic.net/adv-www/_/sUYVCPUAQE7ExrvMS7FoISoO83s.woff2" type="font/woff2" crossorigin="anonymous"/>
<style>
@font-face {
font-family:'Yandex Sans Display Web';
src:url(https://yastatic.net/adv-www/_/H63jN0veW07XQUIA2317lr9UIm8.eot);
src:url(https://yastatic.net/adv-www/_/H63jN0veW07XQUIA2317lr9UIm8.eot?#iefix) format('embedded-opentype'),
url(https://yastatic.net/adv-www/_/sUYVCPUAQE7ExrvMS7FoISoO83s.woff2) format('woff2'),
url(https://yastatic.net/adv-www/_/v2Sve_obH3rKm6rKrtSQpf-eB7U.woff) format('woff'),
url(https://yastatic.net/adv-www/_/PzD8hWLMunow5i3RfJ6WQJAL7aI.ttf) format('truetype'),
url(https://yastatic.net/adv-www/_/lF_KG5g4tpQNlYIgA0e77fBSZ5s.svg#YandexSansDisplayWeb-Regular) format('svg');
font-weight:400;
font-style:normal;
font-stretch:normal
}
2020-07-16 18:04:16 +00:00
2020-08-31 23:33:42 +00:00
body { font-family: "Yandex Sans Display Web", Arial, sans-serif; background: #EEE; }
h1 { margin-left: 10px; }
th, td { border: 0; padding: 5px 10px 5px 10px; text-align: left; vertical-align: top; line-height: 1.5; background-color: #FFF;
td { white-space: pre; font-family: Monospace, Courier New; }
border: 0; box-shadow: 0 0 0 1px rgba(0, 0, 0, 0.05), 0 8px 25px -5px rgba(0, 0, 0, 0.1); }
a { color: #06F; text-decoration: none; }
a:hover, a:active { color: #F40; text-decoration: underline; }
table { border: 0; }
.main { margin-left: 10%; }
p.links a { padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-space: nowrap; box-shadow: 0 0 0 1px rgba(0, 0, 0, 0.05), 0 8px 25px -5px rgba(0, 0, 0, 0.1); }
th { cursor: pointer; }
</style>
<title>AST Fuzzer for PR #${PR_TO_TEST} @ ${SHA_TO_TEST}</title>
</head>
<body>
<div class="main">
<h1>AST Fuzzer for PR #${PR_TO_TEST} @ ${SHA_TO_TEST}</h1>
<p class="links">
<a href="fuzzer.log">fuzzer.log</a>
<a href="server.log">server.log</a>
<a href="main.log">main.log</a>
</p>
<table>
<tr><th>Test name</th><th>Test status</th><th>Description</th></tr>
<tr><td>AST Fuzzer</td><td>$(cat status.txt)</td><td>$(cat description.txt)</td></tr>
</table>
</body>
</html>
EOF
2020-07-09 11:21:23 +00:00
;&
esac
2020-09-16 07:46:38 +00:00
exit $task_exit_code