mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 17:12:03 +00:00
Merge pull request #19558 from ClickHouse/better_hung_check
Check for hung queries or server hung in fast test
This commit is contained in:
commit
b3726c1c8e
@ -337,7 +337,7 @@ function run_tests
|
||||
01666_blns
|
||||
)
|
||||
|
||||
time clickhouse-test -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
|
||||
# substr is to remove semicolon after test name
|
||||
readarray -t FAILED_TESTS < <(awk '/\[ FAIL|TIMEOUT|ERROR \]/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
|
||||
@ -360,7 +360,7 @@ function run_tests
|
||||
|
||||
echo "Going to run again: ${FAILED_TESTS[*]}"
|
||||
|
||||
clickhouse-test --order=random --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
|
||||
clickhouse-test --hung-check --order=random --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
|
||||
else
|
||||
echo "No failed tests"
|
||||
fi
|
||||
|
@ -197,11 +197,14 @@ def need_retry(stderr):
|
||||
return any(msg in stderr for msg in MESSAGES_TO_RETRY)
|
||||
|
||||
|
||||
def get_processlist(client_cmd):
|
||||
def get_processlist(args):
|
||||
try:
|
||||
return subprocess.check_output("{} --query 'SHOW PROCESSLIST FORMAT Vertical'".format(client_cmd), shell=True).decode('utf-8')
|
||||
except:
|
||||
return "" # server seems dead
|
||||
clickhouse_proc = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
|
||||
(stdout, _) = clickhouse_proc.communicate((b"SHOW PROCESSLIST FORMAT Vertical"), timeout=10)
|
||||
return False, stdout.decode('utf-8')
|
||||
except Exception as ex:
|
||||
print("Exception", ex)
|
||||
return True, ""
|
||||
|
||||
|
||||
# collect server stacktraces using gdb
|
||||
@ -334,9 +337,13 @@ def run_tests_array(all_tests_with_params):
|
||||
|
||||
if args.testname:
|
||||
clickhouse_proc = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True)
|
||||
clickhouse_proc.communicate(("SELECT 'Running test {suite}/{case} from pid={pid}';".format(pid = os.getpid(), case = case, suite = suite)), timeout=10)
|
||||
failed_to_check = False
|
||||
try:
|
||||
clickhouse_proc.communicate(("SELECT 'Running test {suite}/{case} from pid={pid}';".format(pid = os.getpid(), case = case, suite = suite)), timeout=10)
|
||||
except:
|
||||
failed_to_check = True
|
||||
|
||||
if clickhouse_proc.returncode != 0:
|
||||
if failed_to_check or clickhouse_proc.returncode != 0:
|
||||
failures += 1
|
||||
print("Server does not respond to health check")
|
||||
SERVER_DIED = True
|
||||
@ -760,14 +767,17 @@ def main(args):
|
||||
|
||||
# Some queries may execute in background for some time after test was finished. This is normal.
|
||||
for n in range(1, 60):
|
||||
processlist = get_processlist(args.client)
|
||||
if not processlist:
|
||||
timeout, processlist = get_processlist(args)
|
||||
if timeout or not processlist:
|
||||
break
|
||||
sleep(1)
|
||||
|
||||
if processlist:
|
||||
print(colored("\nFound hung queries in processlist:", args, "red", attrs=["bold"]))
|
||||
print(processlist)
|
||||
if timeout or processlist:
|
||||
if processlist:
|
||||
print(colored("\nFound hung queries in processlist:", args, "red", attrs=["bold"]))
|
||||
print(processlist)
|
||||
else:
|
||||
print(colored("Seems like server hung and cannot respond to queries", "red", attrs=["bold"]))
|
||||
|
||||
clickhouse_tcp_port = os.getenv("CLICKHOUSE_PORT_TCP", '9000')
|
||||
server_pid = get_server_pid(clickhouse_tcp_port)
|
||||
|
Loading…
Reference in New Issue
Block a user