From fa0f760fd9afd652dee50b89fd8d176d4e031174 Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 22 Jul 2024 15:46:27 +0200 Subject: [PATCH] CI: CI Buddy to notify about fatal workflow failures --- .github/workflows/backport_branches.yml | 14 ++++++++- .github/workflows/master.yml | 38 +++++++------------------ .github/workflows/merge_queue.yml | 13 ++++++++- .github/workflows/nightly.yml | 17 +++++++++++ .github/workflows/pull_request.yml | 13 ++++++++- .github/workflows/release_branches.yml | 14 ++++++++- .yamllint | 6 ++++ tests/ci/ci_buddy.py | 37 +++++++++++++++++++++--- tests/ci/ci_utils.py | 27 ++++++++++++++++++ 9 files changed, 143 insertions(+), 36 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 9645d0e46de..50f4f503f5d 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -241,8 +241,9 @@ jobs: runner_type: stress-tester data: ${{ needs.RunConfig.outputs.data }} FinishCheck: - if: ${{ !failure() && !cancelled() }} + if: ${{ !cancelled() }} needs: + - RunConfig - Builds_Report - FunctionalStatelessTestAsan - FunctionalStatefulTestDebug @@ -257,6 +258,7 @@ jobs: with: clear-repository: true - name: Finish label + if: ${{ !failure() }} run: | cd "$GITHUB_WORKSPACE/tests/ci" # update mergeable check @@ -264,3 +266,13 @@ jobs: # update overall ci report python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} python3 merge_pr.py + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 09acef5eb8b..b28d87ee31f 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -121,34 +121,6 @@ jobs: runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} - MarkReleaseReady: - if: ${{ !failure() && !cancelled() }} - needs: [RunConfig, Builds_1, Builds_2] - runs-on: [self-hosted, style-checker-aarch64] - steps: - - name: Debug - run: | - echo need with different filters - cat << 'EOF' - ${{ toJSON(needs) }} - ${{ toJSON(needs.*.result) }} - no failures ${{ !contains(needs.*.result, 'failure') }} - no skips ${{ !contains(needs.*.result, 'skipped') }} - no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} - EOF - - name: Not ready - # fail the job to be able to restart it - if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }} - run: exit 1 - - name: Check out repository code - if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} - uses: ClickHouse/checkout@v1 - - name: Mark Commit Release Ready - if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 mark_release_ready.py - FinishCheck: if: ${{ !cancelled() }} needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] @@ -160,3 +132,13 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml index 31a65ac3d15..db89825a99a 100644 --- a/.github/workflows/merge_queue.yml +++ b/.github/workflows/merge_queue.yml @@ -93,7 +93,7 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} CheckReadyForMerge: - if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} + if: ${{ !cancelled() }} # Test_2 or Test_3 must not have jobs required for Mergeable check needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Tests_1] runs-on: [self-hosted, style-checker-aarch64] @@ -101,6 +101,17 @@ jobs: - name: Check out repository code uses: ClickHouse/checkout@v1 - name: Check and set merge status + if: ${{ needs.StyleCheck.result == 'success' }} run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index bffe5b4c1bf..fd5b5eefcc4 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -44,3 +44,20 @@ jobs: with: data: "${{ needs.RunConfig.outputs.data }}" set_latest: true + CheckWorkflow: + if: ${{ !cancelled() }} + needs: [RunConfig, BuildDockers] + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 5124e4dba2c..9930cf6dde4 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -151,7 +151,7 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} CheckReadyForMerge: - if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} + if: ${{ !cancelled() }} # Test_2 or Test_3 must not have jobs required for Mergeable check needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1] runs-on: [self-hosted, style-checker-aarch64] @@ -161,9 +161,20 @@ jobs: with: filter: tree:0 - name: Check and set merge status + if: ${{ needs.StyleCheck.result == 'success' }} run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status ################################# Stage Final ################################# # diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 6a18999d74e..50565112825 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -441,8 +441,9 @@ jobs: runner_type: stress-tester data: ${{ needs.RunConfig.outputs.data }} FinishCheck: - if: ${{ !failure() && !cancelled() }} + if: ${{ !cancelled() }} needs: + - RunConfig - DockerServerImage - DockerKeeperImage - Builds_Report @@ -478,9 +479,20 @@ jobs: with: clear-repository: true - name: Finish label + if: ${{ !failure() }} run: | cd "$GITHUB_WORKSPACE/tests/ci" # update mergeable check python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} # update overall ci report python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.yamllint b/.yamllint index f144e2d47b1..7fb741ec9f4 100644 --- a/.yamllint +++ b/.yamllint @@ -14,3 +14,9 @@ rules: comments: min-spaces-from-content: 1 document-start: disable + colons: disable + indentation: disable + line-length: disable + trailing-spaces: disable + truthy: disable + new-line-at-end-of-file: disable diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py index 3eba5532e66..688c7d59988 100644 --- a/tests/ci/ci_buddy.py +++ b/tests/ci/ci_buddy.py @@ -1,3 +1,4 @@ +import argparse import json import os from typing import Union, Dict @@ -7,7 +8,7 @@ import requests from botocore.exceptions import ClientError from pr_info import PRInfo -from ci_utils import Shell +from ci_utils import Shell, GHActions class CIBuddy: @@ -29,6 +30,11 @@ class CIBuddy: self.commit_url = pr_info.commit_html_url self.sha = pr_info.sha[:10] + def check_workflow(self): + res = GHActions.get_workflow_job_result(GHActions.ActionsNames.RunConfig) + if res != GHActions.ActionStatuses.SUCCESS: + self.post_job_error("Workflow Configuration Failed", critical=True) + @staticmethod def _get_webhooks(): name = "ci_buddy_web_hooks" @@ -139,7 +145,30 @@ class CIBuddy: self.post(message) +def parse_args(): + parser = argparse.ArgumentParser("CI Buddy bot notifies about CI events") + parser.add_argument( + "--check-wf-status", + action="store_true", + help="Checks workflow status", + ) + parser.add_argument( + "--test", + action="store_true", + help="for test and debug", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="dry run mode", + ) + return parser.parse_args(), parser + + if __name__ == "__main__": - # test - buddy = CIBuddy(dry_run=True) - buddy.post_job_error("TEst") + args, parser = parse_args() + + if args.test: + CIBuddy(dry_run=True).post_job_error("TEst") + elif args.check_wf_status: + CIBuddy(dry_run=args.dry_run).check_workflow() diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index 9a1b12af310..1963e3f39d0 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -1,3 +1,4 @@ +import json import os import re import subprocess @@ -11,6 +12,9 @@ import requests class Envs: GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse") + WORKFLOW_RESULT_FILE = os.getenv( + "WORKFLOW_RESULT_FILE", "/tmp/workflow_results.json" + ) LABEL_CATEGORIES = { @@ -79,6 +83,29 @@ def normalize_string(string: str) -> str: class GHActions: + class ActionsNames: + RunConfig = "RunConfig" + + class ActionStatuses: + ERROR = "error" + FAILURE = "failure" + PENDING = "pending" + SUCCESS = "success" + + @staticmethod + def get_workflow_job_result(wf_job_name: str) -> Optional[str]: + if not Path(Envs.WORKFLOW_RESULT_FILE).exists(): + print( + f"ERROR: Failed to get workflow results from file [{Envs.WORKFLOW_RESULT_FILE}]" + ) + return None + with open(Envs.WORKFLOW_RESULT_FILE, "r", encoding="utf-8") as json_file: + res = json.load(json_file) + if wf_job_name in res: + return res[wf_job_name]["result"] # type: ignore + else: + return None + @staticmethod def print_in_group(group_name: str, lines: Union[Any, List[Any]]) -> None: lines = list(lines)