From a3ab1ab5ca707c0e40b4ad738413dd868f2db606 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 12 Jul 2024 13:10:13 +0000 Subject: [PATCH 1/4] CI: Do not block on few number of test failures --- tests/ci/ci_config.py | 3 ++ tests/ci/ci_utils.py | 15 ++++++++- tests/ci/merge_pr.py | 78 ++++++++++++++++++++++++++++++++++--------- 3 files changed, 80 insertions(+), 16 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 8eda6e6b96f..9a9aa553e1b 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -13,6 +13,9 @@ class CI: each config item in the below dicts should be an instance of JobConfig class or inherited from it """ + MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI = 2 + MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 1 + # reimport types to CI class so that they visible as CI.* and mypy is happy # pylint:disable=useless-import-alias,reimported,import-outside-toplevel from ci_definitions import BuildConfig as BuildConfig diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index 629f37289a9..abc4a88989d 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -1,8 +1,9 @@ import os +import re import subprocess from contextlib import contextmanager from pathlib import Path -from typing import Any, Iterator, List, Union +from typing import Any, Iterator, List, Union, Optional class WithIter(type): @@ -83,3 +84,15 @@ class Shell: check=False, ) return result.returncode == 0 + + +class Utils: + @staticmethod + def get_failed_tests_number(description: str) -> Optional[int]: + description = description.lower() + + pattern = r"fail:\s*(\d+)\s*(?=,|$)" + match = re.search(pattern, description) + if match: + return int(match.group(1)) + return None diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 37c08fc4efe..6b437731561 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -26,6 +26,8 @@ from pr_info import PRInfo from report import SUCCESS, FAILURE from env_helper import GITHUB_UPSTREAM_REPOSITORY, GITHUB_REPOSITORY from synchronizer_utils import SYNC_BRANCH_PREFIX +from ci_config import CI +from ci_utils import Utils # The team name for accepted approvals TEAM_NAME = getenv("GITHUB_TEAM_NAME", "core") @@ -251,23 +253,69 @@ def main(): # set mergeable check status and exit commit = get_commit(gh, args.pr_info.sha) statuses = get_commit_filtered_statuses(commit) - state = trigger_mergeable_check( - commit, - statuses, - workflow_failed=(args.wf_status != "success"), - ) - # Process upstream StatusNames.SYNC - pr_info = PRInfo() - if ( - pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") - and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY - ): - print("Updating upstream statuses") - update_upstream_sync_status(pr_info, state) + max_failed_tests_per_job = 0 + job_name_with_max_failures = None + total_failed_tests = 0 + failed_to_get_info = False + has_failed_statuses = False + for status in statuses: + if not CI.is_required(status.context): + continue + if status.state == FAILURE: + has_failed_statuses = True + failed_cnt = Utils.get_failed_tests_number(status.description) + if failed_cnt is None: + failed_to_get_info = True + else: + if failed_cnt > max_failed_tests_per_job: + job_name_with_max_failures = status.context + max_failed_tests_per_job = failed_cnt + total_failed_tests += failed_cnt + elif status.state != SUCCESS: + has_failed_statuses = True + print( + f"Unexpected status for [{status.context}]: [{status.state}] - block further testing" + ) + failed_to_get_info = True - if args.wf_status != "success": - # exit with 1 to rerun on workflow failed job restart + can_continue = True + if total_failed_tests > CI.MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI: + print( + f"Required check has [{total_failed_tests}] failed - block further testing" + ) + can_continue = False + if max_failed_tests_per_job > CI.MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI: + print( + f"Job [{job_name_with_max_failures}] has [{max_failed_tests_per_job}] failures - block further testing" + ) + can_continue = False + if failed_to_get_info: + print(f"Unexpected commit status state - block further testing") + can_continue = False + if args.wf_status != SUCCESS: + can_continue = False + print("Workflow has failures - block further testing") + + if args.wf_status == "success" or has_failed_statuses: + state = trigger_mergeable_check( + commit, + statuses, + ) + # Process upstream StatusNames.SYNC + pr_info = PRInfo() + if ( + pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") + and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY + ): + print("Updating upstream statuses") + update_upstream_sync_status(pr_info, state) + else: + print( + "Workflow failed but no failed statuses found (died runner?) - cannot set Mergeable Check status" + ) + + if not can_continue: sys.exit(1) sys.exit(0) From 5b1e9bebe47e6b6971e6432f788e9ad9ce1c5f2b Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 12 Jul 2024 18:19:30 +0200 Subject: [PATCH 2/4] change thresholds --- tests/ci/ci_config.py | 4 ++-- tests/ci/merge_pr.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 9a9aa553e1b..d9f8e7d3afd 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -13,8 +13,8 @@ class CI: each config item in the below dicts should be an instance of JobConfig class or inherited from it """ - MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI = 2 - MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 1 + MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI = 5 + MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 2 # reimport types to CI class so that they visible as CI.* and mypy is happy # pylint:disable=useless-import-alias,reimported,import-outside-toplevel diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 6b437731561..061376fc856 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -291,13 +291,14 @@ def main(): ) can_continue = False if failed_to_get_info: - print(f"Unexpected commit status state - block further testing") + print("Unexpected commit status state - block further testing") can_continue = False if args.wf_status != SUCCESS: can_continue = False print("Workflow has failures - block further testing") if args.wf_status == "success" or has_failed_statuses: + # do not set mergeable check status if args.wf_status == failure, apparently it has died runners and is to be restarted state = trigger_mergeable_check( commit, statuses, From 04525888f5db6f2c0e61e170cab5ad57626fbf17 Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 13 Jul 2024 11:55:25 +0200 Subject: [PATCH 3/4] fix for failed workflow status --- tests/ci/merge_pr.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 061376fc856..6fb6821ede4 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -293,11 +293,14 @@ def main(): if failed_to_get_info: print("Unexpected commit status state - block further testing") can_continue = False - if args.wf_status != SUCCESS: + if args.wf_status != SUCCESS and not has_failed_statuses: + # workflow failed but reason is unknown as no failed statuses present can_continue = False - print("Workflow has failures - block further testing") + print( + "WARNING: Either the runner is faulty or the operating status is unknown. The first is self-healing, the second requires investigation." + ) - if args.wf_status == "success" or has_failed_statuses: + if args.wf_status == SUCCESS or has_failed_statuses: # do not set mergeable check status if args.wf_status == failure, apparently it has died runners and is to be restarted state = trigger_mergeable_check( commit, From 8706145c467852e7d4b84e5a9823050b8de3e085 Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 13 Jul 2024 12:17:03 +0200 Subject: [PATCH 4/4] fix for not success status in Sync --- tests/ci/merge_pr.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 6fb6821ede4..59749abb4fa 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -272,7 +272,11 @@ def main(): job_name_with_max_failures = status.context max_failed_tests_per_job = failed_cnt total_failed_tests += failed_cnt - elif status.state != SUCCESS: + elif status.state != SUCCESS and status.context not in ( + CI.StatusNames.SYNC, + CI.StatusNames.PR_CHECK, + ): + # do not block CI on failures in (CI.StatusNames.SYNC, CI.StatusNames.PR_CHECK) has_failed_statuses = True print( f"Unexpected status for [{status.context}]: [{status.state}] - block further testing"