Merge pull request #32511 from ClickHouse/fix_attaching_gdb2

Try to fix attaching gdb 2
This commit is contained in:
tavplubix 2021-12-14 21:10:00 +03:00 committed by GitHub
commit 2cf5456423
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 57 additions and 33 deletions

View File

@ -111,19 +111,6 @@ function start_server
fi
echo "ClickHouse server pid '$server_pid' started and responded"
echo "
set follow-fork-mode child
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print
handle SIGABRT stop print
continue
thread apply all backtrace
continue
" > script.gdb
gdb -batch -command script.gdb -p "$server_pid" &
}
function clone_root

View File

@ -155,17 +155,34 @@ function fuzz
kill -0 $server_pid
# Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
# and clickhouse-server can do fork-exec, for example, to run some bridge.
# Do not set nostop noprint for all signals, because some it may cause gdb to hang,
# explicitly ignore non-fatal signals that are used by server.
# Number of SIGRTMIN can be determined only in runtime.
RTMIN=$(kill -l SIGRTMIN)
echo "
set follow-fork-mode child
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print
continue
thread apply all backtrace
set follow-fork-mode parent
handle SIGHUP nostop noprint pass
handle SIGINT nostop noprint pass
handle SIGQUIT nostop noprint pass
handle SIGPIPE nostop noprint pass
handle SIGTERM nostop noprint pass
handle SIGUSR1 nostop noprint pass
handle SIGUSR2 nostop noprint pass
handle SIG$RTMIN nostop noprint pass
info signals
continue
backtrace full
info locals
detach
quit
" > script.gdb
sudo gdb -batch -command script.gdb -p $server_pid &
gdb -batch -command script.gdb -p $server_pid &
sleep 5
# gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
# Check connectivity after we attach gdb, because it might cause the server
# to freeze and the fuzzer will fail.

View File

@ -128,14 +128,26 @@ function start()
counter=$((counter + 1))
done
# Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog
# and clickhouse-server can do fork-exec, for example, to run some bridge.
# Do not set nostop noprint for all signals, because some it may cause gdb to hang,
# explicitly ignore non-fatal signals that are used by server.
# Number of SIGRTMIN can be determined only in runtime.
RTMIN=$(kill -l SIGRTMIN)
echo "
set follow-fork-mode child
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print
handle SIGABRT stop print
set follow-fork-mode parent
handle SIGHUP nostop noprint pass
handle SIGINT nostop noprint pass
handle SIGQUIT nostop noprint pass
handle SIGPIPE nostop noprint pass
handle SIGTERM nostop noprint pass
handle SIGUSR1 nostop noprint pass
handle SIGUSR2 nostop noprint pass
handle SIG$RTMIN nostop noprint pass
info signals
continue
thread apply all backtrace
backtrace full
info locals
detach
quit
" > script.gdb
@ -143,7 +155,10 @@ quit
# FIXME Hung check may work incorrectly because of attached gdb
# 1. False positives are possible
# 2. We cannot attach another gdb to get stacktraces if some queries hung
sudo gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" >> /test_output/gdb.log &
gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log &
sleep 5
# gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s)
time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||:
}
configure
@ -214,6 +229,9 @@ zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log* > /dev/n
zgrep -Fa "########################################" /test_output/* > /dev/null \
&& echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv
zgrep -Fa " received signal " /test_output/gdb.log > /dev/null \
&& echo -e 'Found signal in gdb.log\tFAIL' >> /test_output/test_results.tsv
# Put logs into /test_output/
for log_file in /var/log/clickhouse-server/clickhouse-server.log*
do

View File

@ -54,3 +54,11 @@ def test_system_logs_non_empty_queue():
'log_queries_min_type': 'QUERY_START',
})
node.query('SYSTEM FLUSH LOGS')
def test_system_suspend():
node.query("CREATE TABLE t (x DateTime) ENGINE=Memory;")
node.query("INSERT INTO t VALUES (now());")
node.query("SYSTEM SUSPEND FOR 1 SECOND;")
node.query("INSERT INTO t VALUES (now());")
assert "1\n" == node.query("SELECT max(x) - min(x) >= 1 FROM t;")

View File

@ -1,5 +0,0 @@
CREATE TEMPORARY TABLE t (x DateTime);
INSERT INTO t VALUES (now());
SYSTEM SUSPEND FOR 1 SECOND;
INSERT INTO t VALUES (now());
SELECT max(x) - min(x) >= 1 FROM t;