Backport #69557 to 24.6: Kill runner when integration tests fail to pre-pull

This commit is contained in:
robot-clickhouse 2024-09-18 21:08:44 +00:00
parent d6d5449890
commit 201ba182f4
2 changed files with 38 additions and 2 deletions

View File

@ -1,7 +1,12 @@
import logging
import os
import subprocess
import time
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Iterator, List, Union
from typing import Any, Dict, Iterator, List, Tuple, Union
logger = logging.getLogger(__name__)
class WithIter(type):
@ -27,6 +32,34 @@ def is_hex(s):
return False
def kill_ci_runner(message: str) -> None:
"""The function to kill the current process with all parents when it's possible.
Works only when run with the set `CI` environment"""
if not os.getenv("CI", ""): # cycle import env_helper
logger.info("Running outside the CI, won't kill the runner")
return
print(f"::error::{message}")
def get_ppid_name(pid: int) -> Tuple[int, str]:
# Avoid using psutil, it's not in stdlib
stats = Path(f"/proc/{pid}/stat").read_text(encoding="utf-8").split()
return int(stats[3]), stats[1]
pid = os.getpid()
pids = {} # type: Dict[str, str]
while pid:
ppid, name = get_ppid_name(pid)
pids[str(pid)] = name
pid = ppid
logger.error(
"Sleeping 5 seconds and killing all possible processes from following:\n %s",
"\n ".join(f"{p}: {n}" for p, n in pids.items()),
)
time.sleep(5)
# The current process will be killed too
subprocess.run(f"kill -9 {' '.join(pids.keys())}", check=False, shell=True)
def normalize_string(string: str) -> str:
res = string.lower()
for r in ((" ", "_"), ("(", "_"), (")", "_"), (",", "_"), ("/", "_"), ("-", "_")):

View File

@ -18,6 +18,7 @@ from collections import defaultdict
from itertools import chain
from typing import Any, Dict
from ci_utils import kill_ci_runner
from env_helper import IS_CI
from integration_test_images import IMAGES
@ -327,7 +328,9 @@ class ClickhouseIntegrationTestsRunner:
except subprocess.CalledProcessError as err:
logging.info("docker-compose pull failed: %s", str(err))
continue
logging.error("Pulling images failed for 5 attempts. Will fail the worker.")
message = "Pulling images failed for 5 attempts. Will fail the worker."
logging.error(message)
kill_ci_runner(message)
# We pass specific retcode to to ci/integration_test_check.py to skip status reporting and restart job
sys.exit(13)