Rework core collecting on CI (eliminate gcore usage)

gcore is a gdb command, that internally uses gdb to dump the core. However with proper configuration of limits (core_dump.size_limit) it should not be required, althought some issues is possible: - non standard kernel.core_pattern - sanitizers So yes, gcore is more "universal" (you don't need to configure any `kernel_pattern`), but it is ad-hoc, and it has drawbacks - **it does not work when gdb fails**. For example gdb may fail with `Dwarf Error: DW_FORM_strx1 found in non-DWO CU` in case of DWARF-5 [1]. [1]: https://github.com/ClickHouse/ClickHouse/pull/40772#issuecomment-1236331323. Let's try to switch to more native way. Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2024-11-21 15:12:02 +00:00 · 2022-08-29 20:36:13 +02:00 · 2022-08-29 20:36:13 +02:00 · 25e3bebd9d
commit 25e3bebd9d
parent efc74e33e5
4 changed files with 44 additions and 7 deletions
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -1,8 +1,15 @@
 #!/bin/bash
 # shellcheck disable=SC2086,SC2001,SC2046,SC2030,SC2031

-set -eux
+set -x
+
+# core.COMM.PID-TID
+sysctl kernel.core_pattern='core.%e.%p-%P'
+
+set -e
+set -u
 set -o pipefail
+
 trap "exit" INT TERM
 # The watchdog is in the separate process group, so we have to kill it separately
 # if the script terminates earlier.
@ -87,6 +94,19 @@ function configure
    # TODO figure out which ones are needed
    cp -av --dereference "$repo_dir"/tests/config/config.d/listen.xml db/config.d
    cp -av --dereference "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d
+
+    cat > db/config.d/core.xml <<EOL
+<clickhouse>
+    <core_dump>
+        <!-- 100GiB -->
+        <size_limit>107374182400</size_limit>
+    </core_dump>
+    <!-- NOTE: no need to configure core_path,
+         since clickhouse is not started as daemon (via clickhouse start)
+    -->
+    <core_path>$PWD</core_path>
+</clickhouse>
+EOL
 }

 function watchdog
@ -180,7 +200,6 @@ handle SIGUSR2 nostop noprint pass
 handle SIG$RTMIN nostop noprint pass
 info signals
 continue
-gcore
 backtrace full
 thread apply all backtrace full
 info registers
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@ -5,6 +5,9 @@

 set -x

+# core.COMM.PID-TID
+sysctl kernel.core_pattern='core.%e.%p-%P'
+
 # Thread Fuzzer allows to check more permutations of possible thread scheduling
 # and find more potential issues.

@ -99,6 +102,19 @@ EOL
        </default>
    </profiles>
 </clickhouse>
+EOL
+
+    cat > /etc/clickhouse-server/config.d/core.xml <<EOL
+<clickhouse>
+    <core_dump>
+        <!-- 100GiB -->
+        <size_limit>107374182400</size_limit>
+    </core_dump>
+    <!-- NOTE: no need to configure core_path,
+         since clickhouse is not started as daemon (via clickhouse start)
+    -->
+    <core_path>$PWD</core_path>
+</clickhouse>
 EOL
 }

@ -155,7 +171,6 @@ handle SIGUSR2 nostop noprint pass
 handle SIG$RTMIN nostop noprint pass
 info signals
 continue
-gcore
 backtrace full
 thread apply all backtrace full
 info registers
@ -467,8 +482,7 @@ done
 clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
 [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv

-# Core dumps (see gcore)
-# Default filename is 'core.PROCESS_ID'
+# Core dumps
 for core in core.*; do
    pigz $core
    mv $core.gz /test_output/
--- a/tests/ci/ast_fuzzer_check.py
+++ b/tests/ci/ast_fuzzer_check.py
@ -29,7 +29,11 @@ IMAGE_NAME = "clickhouse/fuzzer"

 def get_run_command(pr_number, sha, download_url, workspace_path, image):
    return (
-        f"docker run --network=host --volume={workspace_path}:/workspace "
+        f"docker run "
+        # For sysctl
+        "--privileged "
+        "--network=host "
+        f"--volume={workspace_path}:/workspace "
        "--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE "
        f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" '
        f"{image}"
--- a/tests/ci/stress_check.py
+++ b/tests/ci/stress_check.py
@ -33,7 +33,7 @@ def get_run_command(
        "docker run --cap-add=SYS_PTRACE "
        # a static link, don't use S3_URL or S3_DOWNLOAD
        "-e S3_URL='https://s3.amazonaws.com/clickhouse-datasets' "
-        # For dmesg
+        # For dmesg and sysctl
        "--privileged "
        f"--volume={build_path}:/package_folder "
        f"--volume={result_folder}:/test_output "