mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Rework core collecting on CI (eliminate gcore usage)
gcore is a gdb command, that internally uses gdb to dump the core. However with proper configuration of limits (core_dump.size_limit) it should not be required, althought some issues is possible: - non standard kernel.core_pattern - sanitizers So yes, gcore is more "universal" (you don't need to configure any `kernel_pattern`), but it is ad-hoc, and it has drawbacks - **it does not work when gdb fails**. For example gdb may fail with `Dwarf Error: DW_FORM_strx1 found in non-DWO CU` in case of DWARF-5 [1]. [1]: https://github.com/ClickHouse/ClickHouse/pull/40772#issuecomment-1236331323. Let's try to switch to more native way. Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
parent
efc74e33e5
commit
25e3bebd9d
@ -1,8 +1,15 @@
|
||||
#!/bin/bash
|
||||
# shellcheck disable=SC2086,SC2001,SC2046,SC2030,SC2031
|
||||
|
||||
set -eux
|
||||
set -x
|
||||
|
||||
# core.COMM.PID-TID
|
||||
sysctl kernel.core_pattern='core.%e.%p-%P'
|
||||
|
||||
set -e
|
||||
set -u
|
||||
set -o pipefail
|
||||
|
||||
trap "exit" INT TERM
|
||||
# The watchdog is in the separate process group, so we have to kill it separately
|
||||
# if the script terminates earlier.
|
||||
@ -87,6 +94,19 @@ function configure
|
||||
# TODO figure out which ones are needed
|
||||
cp -av --dereference "$repo_dir"/tests/config/config.d/listen.xml db/config.d
|
||||
cp -av --dereference "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d
|
||||
|
||||
cat > db/config.d/core.xml <<EOL
|
||||
<clickhouse>
|
||||
<core_dump>
|
||||
<!-- 100GiB -->
|
||||
<size_limit>107374182400</size_limit>
|
||||
</core_dump>
|
||||
<!-- NOTE: no need to configure core_path,
|
||||
since clickhouse is not started as daemon (via clickhouse start)
|
||||
-->
|
||||
<core_path>$PWD</core_path>
|
||||
</clickhouse>
|
||||
EOL
|
||||
}
|
||||
|
||||
function watchdog
|
||||
@ -180,7 +200,6 @@ handle SIGUSR2 nostop noprint pass
|
||||
handle SIG$RTMIN nostop noprint pass
|
||||
info signals
|
||||
continue
|
||||
gcore
|
||||
backtrace full
|
||||
thread apply all backtrace full
|
||||
info registers
|
||||
|
@ -5,6 +5,9 @@
|
||||
|
||||
set -x
|
||||
|
||||
# core.COMM.PID-TID
|
||||
sysctl kernel.core_pattern='core.%e.%p-%P'
|
||||
|
||||
# Thread Fuzzer allows to check more permutations of possible thread scheduling
|
||||
# and find more potential issues.
|
||||
|
||||
@ -99,6 +102,19 @@ EOL
|
||||
</default>
|
||||
</profiles>
|
||||
</clickhouse>
|
||||
EOL
|
||||
|
||||
cat > /etc/clickhouse-server/config.d/core.xml <<EOL
|
||||
<clickhouse>
|
||||
<core_dump>
|
||||
<!-- 100GiB -->
|
||||
<size_limit>107374182400</size_limit>
|
||||
</core_dump>
|
||||
<!-- NOTE: no need to configure core_path,
|
||||
since clickhouse is not started as daemon (via clickhouse start)
|
||||
-->
|
||||
<core_path>$PWD</core_path>
|
||||
</clickhouse>
|
||||
EOL
|
||||
}
|
||||
|
||||
@ -155,7 +171,6 @@ handle SIGUSR2 nostop noprint pass
|
||||
handle SIG$RTMIN nostop noprint pass
|
||||
info signals
|
||||
continue
|
||||
gcore
|
||||
backtrace full
|
||||
thread apply all backtrace full
|
||||
info registers
|
||||
@ -467,8 +482,7 @@ done
|
||||
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
|
||||
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
|
||||
|
||||
# Core dumps (see gcore)
|
||||
# Default filename is 'core.PROCESS_ID'
|
||||
# Core dumps
|
||||
for core in core.*; do
|
||||
pigz $core
|
||||
mv $core.gz /test_output/
|
||||
|
@ -29,7 +29,11 @@ IMAGE_NAME = "clickhouse/fuzzer"
|
||||
|
||||
def get_run_command(pr_number, sha, download_url, workspace_path, image):
|
||||
return (
|
||||
f"docker run --network=host --volume={workspace_path}:/workspace "
|
||||
f"docker run "
|
||||
# For sysctl
|
||||
"--privileged "
|
||||
"--network=host "
|
||||
f"--volume={workspace_path}:/workspace "
|
||||
"--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE "
|
||||
f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" '
|
||||
f"{image}"
|
||||
|
@ -33,7 +33,7 @@ def get_run_command(
|
||||
"docker run --cap-add=SYS_PTRACE "
|
||||
# a static link, don't use S3_URL or S3_DOWNLOAD
|
||||
"-e S3_URL='https://s3.amazonaws.com/clickhouse-datasets' "
|
||||
# For dmesg
|
||||
# For dmesg and sysctl
|
||||
"--privileged "
|
||||
f"--volume={build_path}:/package_folder "
|
||||
f"--volume={result_folder}:/test_output "
|
||||
|
Loading…
Reference in New Issue
Block a user