Implement global timeout check in _test_run

This commit is contained in:
Mikhail f. Shiryaev 2024-05-16 18:23:35 +02:00
parent 87138301c4
commit 92ee671310
No known key found for this signature in database
GPG Key ID: 4B02ED204C7D93F4
6 changed files with 37 additions and 73 deletions

View File

@ -18,6 +18,7 @@ import docker_images_helper
import upload_result_helper
from build_check import get_release_or_pr
from ci_config import CI_CONFIG, Build, CILabels, CIStages, JobNames, StatusNames
from ci_metadata import CiMetadata
from ci_utils import GHActions, is_hex, normalize_string
from clickhouse_helper import (
CiLogsCredentials,
@ -39,22 +40,23 @@ from digest_helper import DockerDigester, JobDigester
from env_helper import (
CI,
GITHUB_JOB_API_URL,
GITHUB_REPOSITORY,
GITHUB_RUN_ID,
GITHUB_RUN_URL,
REPO_COPY,
REPORT_PATH,
S3_BUILDS_BUCKET,
TEMP_PATH,
GITHUB_RUN_ID,
GITHUB_REPOSITORY,
)
from get_robot_token import get_best_robot_token
from git_helper import GIT_PREFIX, Git
from git_helper import Runner as GitRunner
from github_helper import GitHub
from pr_info import PRInfo
from report import ERROR, SUCCESS, BuildResult, JobReport, PENDING
from report import ERROR, FAILURE, PENDING, SUCCESS, BuildResult, JobReport, TestResult
from s3_helper import S3Helper
from ci_metadata import CiMetadata
from stopwatch import Stopwatch
from tee_popen import TeePopen
from version_helper import get_version_from_repo
# pylint: disable=too-many-lines
@ -1868,8 +1870,7 @@ def _run_test(job_name: str, run_command: str) -> int:
), "Run command must be provided as input argument or be configured in job config"
env = os.environ.copy()
if CI_CONFIG.get_job_config(job_name).timeout:
env["KILL_TIMEOUT"] = str(CI_CONFIG.get_job_config(job_name).timeout)
timeout = CI_CONFIG.get_job_config(job_name).timeout or None
if not run_command:
run_command = "/".join(
@ -1882,25 +1883,25 @@ def _run_test(job_name: str, run_command: str) -> int:
print("Use run command from the workflow")
env["CHECK_NAME"] = job_name
print(f"Going to start run command [{run_command}]")
process = subprocess.run(
run_command,
stdout=sys.stdout,
stderr=sys.stderr,
env=env,
text=True,
check=False,
shell=True,
)
stopwatch = Stopwatch()
job_log = Path(TEMP_PATH) / "job_log.txt"
with TeePopen(run_command, job_log, env, timeout) as process:
retcode = process.wait()
if retcode != 0:
print(f"Run action failed for: [{job_name}] with exit code [{retcode}]")
if timeout and process.timeout_exceeded:
print(f"Timeout {timeout} exceeded, dumping the job report")
JobReport(
status=FAILURE,
description=f"Timeout {timeout} exceeded",
test_results=[TestResult.create_check_timeout_expired(timeout)],
start_time=stopwatch.start_time_str,
duration=stopwatch.duration_seconds,
additional_files=[job_log],
).dump()
if process.returncode == 0:
print(f"Run action done for: [{job_name}]")
exit_code = 0
else:
print(
f"Run action failed for: [{job_name}] with exit code [{process.returncode}]"
)
exit_code = process.returncode
return exit_code
print(f"Run action done for: [{job_name}]")
return retcode
def _get_ext_check_name(check_name: str) -> str:

View File

@ -472,12 +472,12 @@ compatibility_test_common_params = {
}
stateless_test_common_params = {
"digest": stateless_check_digest,
"run_command": 'functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT',
"run_command": 'functional_test_check.py "$CHECK_NAME"',
"timeout": 10800,
}
stateful_test_common_params = {
"digest": stateful_check_digest,
"run_command": 'functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT',
"run_command": 'functional_test_check.py "$CHECK_NAME"',
"timeout": 3600,
}
stress_test_common_params = {

View File

@ -1,8 +1,7 @@
from contextlib import contextmanager
import os
import signal
from typing import Any, List, Union, Iterator
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Iterator, List, Union
class WithIter(type):
@ -49,14 +48,3 @@ class GHActions:
for line in lines:
print(line)
print("::endgroup::")
def set_job_timeout():
def timeout_handler(_signum, _frame):
print("Timeout expired")
raise TimeoutError("Job's KILL_TIMEOUT expired")
kill_timeout = int(os.getenv("KILL_TIMEOUT", "0"))
assert kill_timeout > 0, "kill timeout must be provided in KILL_TIMEOUT env"
signal.signal(signal.SIGALRM, timeout_handler)
signal.alarm(kill_timeout)

View File

@ -68,7 +68,6 @@ def get_run_command(
repo_path: Path,
result_path: Path,
server_log_path: Path,
kill_timeout: int,
additional_envs: List[str],
ci_logs_args: str,
image: DockerImage,
@ -86,7 +85,6 @@ def get_run_command(
)
envs = [
f"-e MAX_RUN_TIME={int(0.9 * kill_timeout)}",
# a static link, don't use S3_URL or S3_DOWNLOAD
'-e S3_URL="https://s3.amazonaws.com/clickhouse-datasets"',
]
@ -192,7 +190,6 @@ def process_results(
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("check_name")
parser.add_argument("kill_timeout", type=int)
parser.add_argument(
"--validate-bugfix",
action="store_true",
@ -224,12 +221,7 @@ def main():
assert (
check_name
), "Check name must be provided as an input arg or in CHECK_NAME env"
kill_timeout = args.kill_timeout or int(os.getenv("KILL_TIMEOUT", "0"))
assert (
kill_timeout > 0
), "kill timeout must be provided as an input arg or in KILL_TIMEOUT env"
validate_bugfix_check = args.validate_bugfix
print(f"Runnin check [{check_name}] with timeout [{kill_timeout}]")
flaky_check = "flaky" in check_name.lower()
@ -288,7 +280,6 @@ def main():
repo_path,
result_path,
server_log_path,
kill_timeout,
additional_envs,
ci_logs_args,
docker_image,

View File

@ -1,25 +1,21 @@
#!/usr/bin/env python3
import argparse
import logging
import sys
import subprocess
import sys
from pathlib import Path
from shutil import copy2
from typing import Dict
from build_download_helper import download_builds_filter
from compress_files import compress_fast
from docker_images_helper import DockerImage, pull_image, get_docker_image
from env_helper import CI, REPORT_PATH, TEMP_PATH as TEMP
from report import JobReport, TestResults, TestResult, FAILURE, FAIL, OK, SUCCESS
from docker_images_helper import DockerImage, get_docker_image, pull_image
from env_helper import REPORT_PATH
from env_helper import TEMP_PATH as TEMP
from report import FAIL, FAILURE, OK, SUCCESS, JobReport, TestResult, TestResults
from stopwatch import Stopwatch
from tee_popen import TeePopen
from ci_utils import set_job_timeout
RPM_IMAGE = "clickhouse/install-rpm-test"
DEB_IMAGE = "clickhouse/install-deb-test"
@ -256,9 +252,6 @@ def main():
args = parse_args()
if CI:
set_job_timeout()
TEMP_PATH.mkdir(parents=True, exist_ok=True)
LOGS_PATH.mkdir(parents=True, exist_ok=True)

View File

@ -9,8 +9,8 @@ from pathlib import Path
from typing import Tuple
from build_download_helper import download_all_deb_packages
from docker_images_helper import DockerImage, pull_image, get_docker_image
from env_helper import REPORT_PATH, TEMP_PATH, REPO_COPY
from docker_images_helper import DockerImage, get_docker_image, pull_image
from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH
from report import (
ERROR,
FAIL,
@ -72,11 +72,6 @@ def parse_args() -> argparse.Namespace:
required=False,
default="",
)
parser.add_argument(
"--kill-timeout",
required=False,
default=0,
)
return parser.parse_args()
@ -96,10 +91,6 @@ def main():
assert (
check_name
), "Check name must be provided as an input arg or in CHECK_NAME env"
kill_timeout = args.kill_timeout or int(os.getenv("KILL_TIMEOUT", "0"))
assert (
kill_timeout > 0
), "kill timeout must be provided as an input arg or in KILL_TIMEOUT env"
docker_image = pull_image(get_docker_image(IMAGE_NAME))
@ -127,7 +118,7 @@ def main():
)
logging.info("Going to run func tests: %s", run_command)
with TeePopen(run_command, run_log_path, timeout=kill_timeout) as process:
with TeePopen(run_command, run_log_path) as process:
retcode = process.wait()
if retcode == 0:
logging.info("Run successfully")