Merge pull request #69557 from ClickHouse/integration-prepull-kill-runner

Kill runner when integration tests fail to pre-pull
This commit is contained in:
Mikhail f. Shiryaev 2024-09-17 17:27:20 +00:00 committed by GitHub
commit fdee35cccc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 37 additions and 3 deletions

View File

@ -1,4 +1,5 @@
import json import json
import logging
import os import os
import re import re
import subprocess import subprocess
@ -6,10 +7,12 @@ import sys
import time import time
from contextlib import contextmanager from contextlib import contextmanager
from pathlib import Path from pathlib import Path
from typing import Any, Iterator, List, Union, Optional, Sequence from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union
import requests import requests
logger = logging.getLogger(__name__)
class Envs: class Envs:
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse") GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")
@ -36,6 +39,34 @@ def cd(path: Union[Path, str]) -> Iterator[None]:
os.chdir(oldpwd) os.chdir(oldpwd)
def kill_ci_runner(message: str) -> None:
"""The function to kill the current process with all parents when it's possible.
Works only when run with the set `CI` environment"""
if not os.getenv("CI", ""): # cycle import env_helper
logger.info("Running outside the CI, won't kill the runner")
return
print(f"::error::{message}")
def get_ppid_name(pid: int) -> Tuple[int, str]:
# Avoid using psutil, it's not in stdlib
stats = Path(f"/proc/{pid}/stat").read_text(encoding="utf-8").split()
return int(stats[3]), stats[1]
pid = os.getpid()
pids = {} # type: Dict[str, str]
while pid:
ppid, name = get_ppid_name(pid)
pids[str(pid)] = name
pid = ppid
logger.error(
"Sleeping 5 seconds and killing all possible processes from following:\n %s",
"\n ".join(f"{p}: {n}" for p, n in pids.items()),
)
time.sleep(5)
# The current process will be killed too
subprocess.run(f"kill -9 {' '.join(pids.keys())}", check=False, shell=True)
class GH: class GH:
class ActionsNames: class ActionsNames:
RunConfig = "RunConfig" RunConfig = "RunConfig"

View File

@ -19,11 +19,12 @@ from collections import defaultdict
from itertools import chain from itertools import chain
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
from ci_utils import kill_ci_runner
from env_helper import IS_CI from env_helper import IS_CI
from integration_test_images import IMAGES from integration_test_images import IMAGES
from tee_popen import TeePopen
from report import JOB_TIMEOUT_TEST_NAME from report import JOB_TIMEOUT_TEST_NAME
from stopwatch import Stopwatch from stopwatch import Stopwatch
from tee_popen import TeePopen
MAX_RETRY = 1 MAX_RETRY = 1
NUM_WORKERS = 5 NUM_WORKERS = 5
@ -332,7 +333,9 @@ class ClickhouseIntegrationTestsRunner:
except subprocess.CalledProcessError as err: except subprocess.CalledProcessError as err:
logging.info("docker-compose pull failed: %s", str(err)) logging.info("docker-compose pull failed: %s", str(err))
continue continue
logging.error("Pulling images failed for 5 attempts. Will fail the worker.") message = "Pulling images failed for 5 attempts. Will fail the worker."
logging.error(message)
kill_ci_runner(message)
# We pass specific retcode to to ci/integration_test_check.py to skip status reporting and restart job # We pass specific retcode to to ci/integration_test_check.py to skip status reporting and restart job
sys.exit(13) sys.exit(13)