Merge pull request #66440 from ClickHouse/ci_not_block_on_few_test_failures

CI: Do not block CI on few number of test failures
This commit is contained in:
Max K 2024-07-13 12:13:28 +00:00 committed by GitHub
commit 2ff09bdf18
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 88 additions and 16 deletions

View File

@ -13,6 +13,9 @@ class CI:
each config item in the below dicts should be an instance of JobConfig class or inherited from it
"""
MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI = 5
MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 2
# reimport types to CI class so that they visible as CI.* and mypy is happy
# pylint:disable=useless-import-alias,reimported,import-outside-toplevel
from ci_definitions import BuildConfig as BuildConfig

View File

@ -1,8 +1,9 @@
import os
import re
import subprocess
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Iterator, List, Union
from typing import Any, Iterator, List, Union, Optional
class WithIter(type):
@ -83,3 +84,15 @@ class Shell:
check=False,
)
return result.returncode == 0
class Utils:
@staticmethod
def get_failed_tests_number(description: str) -> Optional[int]:
description = description.lower()
pattern = r"fail:\s*(\d+)\s*(?=,|$)"
match = re.search(pattern, description)
if match:
return int(match.group(1))
return None

View File

@ -26,6 +26,8 @@ from pr_info import PRInfo
from report import SUCCESS, FAILURE
from env_helper import GITHUB_UPSTREAM_REPOSITORY, GITHUB_REPOSITORY
from synchronizer_utils import SYNC_BRANCH_PREFIX
from ci_config import CI
from ci_utils import Utils
# The team name for accepted approvals
TEAM_NAME = getenv("GITHUB_TEAM_NAME", "core")
@ -251,23 +253,77 @@ def main():
# set mergeable check status and exit
commit = get_commit(gh, args.pr_info.sha)
statuses = get_commit_filtered_statuses(commit)
state = trigger_mergeable_check(
commit,
statuses,
workflow_failed=(args.wf_status != "success"),
)
# Process upstream StatusNames.SYNC
pr_info = PRInfo()
if (
pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/")
and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY
):
print("Updating upstream statuses")
update_upstream_sync_status(pr_info, state)
max_failed_tests_per_job = 0
job_name_with_max_failures = None
total_failed_tests = 0
failed_to_get_info = False
has_failed_statuses = False
for status in statuses:
if not CI.is_required(status.context):
continue
if status.state == FAILURE:
has_failed_statuses = True
failed_cnt = Utils.get_failed_tests_number(status.description)
if failed_cnt is None:
failed_to_get_info = True
else:
if failed_cnt > max_failed_tests_per_job:
job_name_with_max_failures = status.context
max_failed_tests_per_job = failed_cnt
total_failed_tests += failed_cnt
elif status.state != SUCCESS and status.context not in (
CI.StatusNames.SYNC,
CI.StatusNames.PR_CHECK,
):
# do not block CI on failures in (CI.StatusNames.SYNC, CI.StatusNames.PR_CHECK)
has_failed_statuses = True
print(
f"Unexpected status for [{status.context}]: [{status.state}] - block further testing"
)
failed_to_get_info = True
if args.wf_status != "success":
# exit with 1 to rerun on workflow failed job restart
can_continue = True
if total_failed_tests > CI.MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI:
print(
f"Required check has [{total_failed_tests}] failed - block further testing"
)
can_continue = False
if max_failed_tests_per_job > CI.MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI:
print(
f"Job [{job_name_with_max_failures}] has [{max_failed_tests_per_job}] failures - block further testing"
)
can_continue = False
if failed_to_get_info:
print("Unexpected commit status state - block further testing")
can_continue = False
if args.wf_status != SUCCESS and not has_failed_statuses:
# workflow failed but reason is unknown as no failed statuses present
can_continue = False
print(
"WARNING: Either the runner is faulty or the operating status is unknown. The first is self-healing, the second requires investigation."
)
if args.wf_status == SUCCESS or has_failed_statuses:
# do not set mergeable check status if args.wf_status == failure, apparently it has died runners and is to be restarted
state = trigger_mergeable_check(
commit,
statuses,
)
# Process upstream StatusNames.SYNC
pr_info = PRInfo()
if (
pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/")
and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY
):
print("Updating upstream statuses")
update_upstream_sync_status(pr_info, state)
else:
print(
"Workflow failed but no failed statuses found (died runner?) - cannot set Mergeable Check status"
)
if not can_continue:
sys.exit(1)
sys.exit(0)