CI Buddy bot to notify about CI events

This commit is contained in:
Max Kainov 2024-07-11 11:37:26 +00:00
parent e1ad5c7652
commit 6868708a58
5 changed files with 141 additions and 3 deletions

View File

@ -35,10 +35,9 @@ disable = '''
broad-except,
bare-except,
no-else-return,
global-statement
global-statement,
'''
[tool.pylint.SIMILARITIES]
# due to SQL
min-similarity-lines=1000

View File

@ -15,3 +15,4 @@ warn_return_any = True
no_implicit_reexport = True
strict_equality = True
extra_checks = True
ignore_missing_imports = True

View File

@ -15,7 +15,7 @@ import upload_result_helper
from build_check import get_release_or_pr
from ci_config import CI
from ci_metadata import CiMetadata
from ci_utils import GHActions, normalize_string
from ci_utils import GHActions, normalize_string, Shell
from clickhouse_helper import (
CiLogsCredentials,
ClickHouseHelper,
@ -53,6 +53,7 @@ from stopwatch import Stopwatch
from tee_popen import TeePopen
from ci_cache import CiCache
from ci_settings import CiSettings
from ci_buddy import CIBuddy
from version_helper import get_version_from_repo
# pylint: disable=too-many-lines
@ -262,6 +263,8 @@ def check_missing_images_on_dockerhub(
def _pre_action(s3, indata, pr_info):
print("Clear dmesg")
Shell.run("sudo dmesg --clear ||:")
CommitStatusData.cleanup()
JobReport.cleanup()
BuildResult.cleanup()
@ -1118,6 +1121,12 @@ def main() -> int:
### POST action: start
elif args.post:
if Shell.check(
"sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'"
):
print("WARNING: OOM while job execution")
CIBuddy().post_error("Out Of Memory")
job_report = JobReport.load() if JobReport.exist() else None
if job_report:
ch_helper = ClickHouseHelper()

88
tests/ci/ci_buddy.py Normal file
View File

@ -0,0 +1,88 @@
import json
import os
import boto3
import requests
from botocore.exceptions import ClientError
from pr_info import PRInfo
from ci_utils import Shell
class CIBuddy:
_HEADERS = {"Content-Type": "application/json"}
def __init__(self, dry_run=False):
self.repo = os.getenv("GITHUB_REPOSITORY", "")
self.dry_run = dry_run
res = self._get_webhooks()
self.test_channel = ""
self.dev_ci_channel = ""
if res:
self.test_channel = json.loads(res)["test_channel"]
self.dev_ci_channel = json.loads(res)["ci_channel"]
self.job_name = os.getenv("CHECK_NAME", "unknown")
pr_info = PRInfo()
self.pr_number = pr_info.number
self.head_ref = pr_info.head_ref
self.commit_url = pr_info.commit_html_url
@staticmethod
def _get_webhooks():
name = "ci_buddy_web_hooks"
session = boto3.Session(region_name="us-east-1") # Replace with your region
ssm_client = session.client("ssm")
json_string = None
try:
response = ssm_client.get_parameter(
Name=name,
WithDecryption=True, # Set to True if the parameter is a SecureString
)
json_string = response["Parameter"]["Value"]
except ClientError as e:
print(f"An error occurred: {e}")
return json_string
def post(self, message, dry_run=None):
if dry_run is None:
dry_run = self.dry_run
print(f"Posting slack message, dry_run [{dry_run}]")
if dry_run:
url = self.test_channel
else:
url = self.dev_ci_channel
data = {"text": message}
try:
requests.post(url, headers=self._HEADERS, data=json.dumps(data), timeout=10)
except Exception as e:
print(f"ERROR: Failed to post message, ex {e}")
def post_error(self, error_description, job_name="", with_instance_info=True):
instance_id, instance_type = "unknown", "unknown"
if with_instance_info:
instance_id = Shell.run("ec2metadata --instance-id") or instance_id
instance_type = Shell.run("ec2metadata --instance-type") or instance_type
if not job_name:
job_name = os.getenv("CHECK_NAME", "unknown")
line_err = f":red_circle: {error_description} :red_circle:\n\n"
line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n"
line_job = f" *Job:* `{job_name}`\n"
line_pr_ = f" *PR:* <https://github.com/{self.repo}/pull/{self.pr_number}|#{self.pr_number}>\n"
line_br_ = f" *Branch:* `{self.head_ref}`, <{self.commit_url}|commit>\n"
message = line_err
message += line_job
if with_instance_info:
message += line_ghr
if self.pr_number > 0:
message += line_pr_
else:
message += line_br_
self.post(message)
if __name__ == "__main__":
# test
buddy = CIBuddy(dry_run=True)
buddy.post_error("Out of memory")

View File

@ -1,4 +1,5 @@
import os
import subprocess
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Iterator, List, Union
@ -42,3 +43,43 @@ class GHActions:
for line in lines:
print(line)
print("::endgroup::")
class Shell:
@classmethod
def run_strict(cls, command):
subprocess.run(
command + " 2>&1",
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True,
)
@classmethod
def run(cls, command):
res = ""
result = subprocess.run(
command,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
)
if result.returncode == 0:
res = result.stdout
return res.strip()
@classmethod
def check(cls, command):
result = subprocess.run(
command + " 2>&1",
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
)
return result.returncode == 0