Merge pull request #40775 from azat/ci/core-dumps-rework

Rework core collecting on CI (eliminate gcore usage)
This commit is contained in:
Alexander Tokmakov 2022-09-09 20:20:10 +03:00 committed by GitHub
commit e77b9e4d0c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 44 additions and 7 deletions

View File

@ -1,8 +1,15 @@
#!/bin/bash #!/bin/bash
# shellcheck disable=SC2086,SC2001,SC2046,SC2030,SC2031 # shellcheck disable=SC2086,SC2001,SC2046,SC2030,SC2031
set -eux set -x
# core.COMM.PID-TID
sysctl kernel.core_pattern='core.%e.%p-%P'
set -e
set -u
set -o pipefail set -o pipefail
trap "exit" INT TERM trap "exit" INT TERM
# The watchdog is in the separate process group, so we have to kill it separately # The watchdog is in the separate process group, so we have to kill it separately
# if the script terminates earlier. # if the script terminates earlier.
@ -87,6 +94,19 @@ function configure
# TODO figure out which ones are needed # TODO figure out which ones are needed
cp -av --dereference "$repo_dir"/tests/config/config.d/listen.xml db/config.d cp -av --dereference "$repo_dir"/tests/config/config.d/listen.xml db/config.d
cp -av --dereference "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d cp -av --dereference "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d
cat > db/config.d/core.xml <<EOL
<clickhouse>
<core_dump>
<!-- 100GiB -->
<size_limit>107374182400</size_limit>
</core_dump>
<!-- NOTE: no need to configure core_path,
since clickhouse is not started as daemon (via clickhouse start)
-->
<core_path>$PWD</core_path>
</clickhouse>
EOL
} }
function watchdog function watchdog
@ -180,7 +200,6 @@ handle SIGUSR2 nostop noprint pass
handle SIG$RTMIN nostop noprint pass handle SIG$RTMIN nostop noprint pass
info signals info signals
continue continue
gcore
backtrace full backtrace full
thread apply all backtrace full thread apply all backtrace full
info registers info registers

View File

@ -8,6 +8,9 @@ dmesg --clear
set -x set -x
# core.COMM.PID-TID
sysctl kernel.core_pattern='core.%e.%p-%P'
# Thread Fuzzer allows to check more permutations of possible thread scheduling # Thread Fuzzer allows to check more permutations of possible thread scheduling
# and find more potential issues. # and find more potential issues.
@ -104,6 +107,19 @@ EOL
</default> </default>
</profiles> </profiles>
</clickhouse> </clickhouse>
EOL
cat > /etc/clickhouse-server/config.d/core.xml <<EOL
<clickhouse>
<core_dump>
<!-- 100GiB -->
<size_limit>107374182400</size_limit>
</core_dump>
<!-- NOTE: no need to configure core_path,
since clickhouse is not started as daemon (via clickhouse start)
-->
<core_path>$PWD</core_path>
</clickhouse>
EOL EOL
} }
@ -160,7 +176,6 @@ handle SIGUSR2 nostop noprint pass
handle SIG$RTMIN nostop noprint pass handle SIG$RTMIN nostop noprint pass
info signals info signals
continue continue
gcore
backtrace full backtrace full
thread apply all backtrace full thread apply all backtrace full
info registers info registers
@ -504,8 +519,7 @@ done
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
# Core dumps (see gcore) # Core dumps
# Default filename is 'core.PROCESS_ID'
for core in core.*; do for core in core.*; do
pigz $core pigz $core
mv $core.gz /test_output/ mv $core.gz /test_output/

View File

@ -29,7 +29,11 @@ IMAGE_NAME = "clickhouse/fuzzer"
def get_run_command(pr_number, sha, download_url, workspace_path, image): def get_run_command(pr_number, sha, download_url, workspace_path, image):
return ( return (
f"docker run --network=host --volume={workspace_path}:/workspace " f"docker run "
# For sysctl
"--privileged "
"--network=host "
f"--volume={workspace_path}:/workspace "
"--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE " "--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE "
f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" ' f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" '
f"{image}" f"{image}"

View File

@ -33,7 +33,7 @@ def get_run_command(
"docker run --cap-add=SYS_PTRACE " "docker run --cap-add=SYS_PTRACE "
# a static link, don't use S3_URL or S3_DOWNLOAD # a static link, don't use S3_URL or S3_DOWNLOAD
"-e S3_URL='https://s3.amazonaws.com/clickhouse-datasets' " "-e S3_URL='https://s3.amazonaws.com/clickhouse-datasets' "
# For dmesg # For dmesg and sysctl
"--privileged " "--privileged "
f"--volume={build_path}:/package_folder " f"--volume={build_path}:/package_folder "
f"--volume={result_folder}:/test_output " f"--volume={result_folder}:/test_output "