mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Backport #69557 to 24.3: Kill runner when integration tests fail to pre-pull
This commit is contained in:
parent
d74c7ddca8
commit
e733e4ff74
@ -1,8 +1,13 @@
|
||||
from contextlib import contextmanager
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
from typing import Any, List, Union, Iterator
|
||||
import subprocess
|
||||
import time
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterator, List, Tuple, Union
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WithIter(type):
|
||||
@ -51,6 +56,34 @@ class GHActions:
|
||||
print("::endgroup::")
|
||||
|
||||
|
||||
def kill_ci_runner(message: str) -> None:
|
||||
"""The function to kill the current process with all parents when it's possible.
|
||||
Works only when run with the set `CI` environment"""
|
||||
if not os.getenv("CI", ""): # cycle import env_helper
|
||||
logger.info("Running outside the CI, won't kill the runner")
|
||||
return
|
||||
print(f"::error::{message}")
|
||||
|
||||
def get_ppid_name(pid: int) -> Tuple[int, str]:
|
||||
# Avoid using psutil, it's not in stdlib
|
||||
stats = Path(f"/proc/{pid}/stat").read_text(encoding="utf-8").split()
|
||||
return int(stats[3]), stats[1]
|
||||
|
||||
pid = os.getpid()
|
||||
pids = {} # type: Dict[str, str]
|
||||
while pid:
|
||||
ppid, name = get_ppid_name(pid)
|
||||
pids[str(pid)] = name
|
||||
pid = ppid
|
||||
logger.error(
|
||||
"Sleeping 5 seconds and killing all possible processes from following:\n %s",
|
||||
"\n ".join(f"{p}: {n}" for p, n in pids.items()),
|
||||
)
|
||||
time.sleep(5)
|
||||
# The current process will be killed too
|
||||
subprocess.run(f"kill -9 {' '.join(pids.keys())}", check=False, shell=True)
|
||||
|
||||
|
||||
def set_job_timeout():
|
||||
def timeout_handler(_signum, _frame):
|
||||
print("Timeout expired")
|
||||
|
@ -18,6 +18,7 @@ from collections import defaultdict
|
||||
from itertools import chain
|
||||
from typing import Any, Dict
|
||||
|
||||
from ci_utils import kill_ci_runner
|
||||
from env_helper import CI
|
||||
from integration_test_images import IMAGES
|
||||
|
||||
@ -325,7 +326,9 @@ class ClickhouseIntegrationTestsRunner:
|
||||
except subprocess.CalledProcessError as err:
|
||||
logging.info("docker-compose pull failed: %s", str(err))
|
||||
continue
|
||||
logging.error("Pulling images failed for 5 attempts. Will fail the worker.")
|
||||
message = "Pulling images failed for 5 attempts. Will fail the worker."
|
||||
logging.error(message)
|
||||
kill_ci_runner(message)
|
||||
# We pass specific retcode to to ci/integration_test_check.py to skip status reporting and restart job
|
||||
sys.exit(13)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user