mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-09 17:14:47 +00:00
Merge pull request #69557 from ClickHouse/integration-prepull-kill-runner
Kill runner when integration tests fail to pre-pull
This commit is contained in:
commit
fdee35cccc
@ -1,4 +1,5 @@
|
|||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
@ -6,10 +7,12 @@ import sys
|
|||||||
import time
|
import time
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Iterator, List, Union, Optional, Sequence
|
from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Envs:
|
class Envs:
|
||||||
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")
|
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")
|
||||||
@ -36,6 +39,34 @@ def cd(path: Union[Path, str]) -> Iterator[None]:
|
|||||||
os.chdir(oldpwd)
|
os.chdir(oldpwd)
|
||||||
|
|
||||||
|
|
||||||
|
def kill_ci_runner(message: str) -> None:
|
||||||
|
"""The function to kill the current process with all parents when it's possible.
|
||||||
|
Works only when run with the set `CI` environment"""
|
||||||
|
if not os.getenv("CI", ""): # cycle import env_helper
|
||||||
|
logger.info("Running outside the CI, won't kill the runner")
|
||||||
|
return
|
||||||
|
print(f"::error::{message}")
|
||||||
|
|
||||||
|
def get_ppid_name(pid: int) -> Tuple[int, str]:
|
||||||
|
# Avoid using psutil, it's not in stdlib
|
||||||
|
stats = Path(f"/proc/{pid}/stat").read_text(encoding="utf-8").split()
|
||||||
|
return int(stats[3]), stats[1]
|
||||||
|
|
||||||
|
pid = os.getpid()
|
||||||
|
pids = {} # type: Dict[str, str]
|
||||||
|
while pid:
|
||||||
|
ppid, name = get_ppid_name(pid)
|
||||||
|
pids[str(pid)] = name
|
||||||
|
pid = ppid
|
||||||
|
logger.error(
|
||||||
|
"Sleeping 5 seconds and killing all possible processes from following:\n %s",
|
||||||
|
"\n ".join(f"{p}: {n}" for p, n in pids.items()),
|
||||||
|
)
|
||||||
|
time.sleep(5)
|
||||||
|
# The current process will be killed too
|
||||||
|
subprocess.run(f"kill -9 {' '.join(pids.keys())}", check=False, shell=True)
|
||||||
|
|
||||||
|
|
||||||
class GH:
|
class GH:
|
||||||
class ActionsNames:
|
class ActionsNames:
|
||||||
RunConfig = "RunConfig"
|
RunConfig = "RunConfig"
|
||||||
|
@ -19,11 +19,12 @@ from collections import defaultdict
|
|||||||
from itertools import chain
|
from itertools import chain
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
from ci_utils import kill_ci_runner
|
||||||
from env_helper import IS_CI
|
from env_helper import IS_CI
|
||||||
from integration_test_images import IMAGES
|
from integration_test_images import IMAGES
|
||||||
from tee_popen import TeePopen
|
|
||||||
from report import JOB_TIMEOUT_TEST_NAME
|
from report import JOB_TIMEOUT_TEST_NAME
|
||||||
from stopwatch import Stopwatch
|
from stopwatch import Stopwatch
|
||||||
|
from tee_popen import TeePopen
|
||||||
|
|
||||||
MAX_RETRY = 1
|
MAX_RETRY = 1
|
||||||
NUM_WORKERS = 5
|
NUM_WORKERS = 5
|
||||||
@ -332,7 +333,9 @@ class ClickhouseIntegrationTestsRunner:
|
|||||||
except subprocess.CalledProcessError as err:
|
except subprocess.CalledProcessError as err:
|
||||||
logging.info("docker-compose pull failed: %s", str(err))
|
logging.info("docker-compose pull failed: %s", str(err))
|
||||||
continue
|
continue
|
||||||
logging.error("Pulling images failed for 5 attempts. Will fail the worker.")
|
message = "Pulling images failed for 5 attempts. Will fail the worker."
|
||||||
|
logging.error(message)
|
||||||
|
kill_ci_runner(message)
|
||||||
# We pass specific retcode to to ci/integration_test_check.py to skip status reporting and restart job
|
# We pass specific retcode to to ci/integration_test_check.py to skip status reporting and restart job
|
||||||
sys.exit(13)
|
sys.exit(13)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user