ClickHouse/tests/ci/ci_runners_metrics_lambda/app.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

174 lines
4.8 KiB
Python
Raw Normal View History

2021-09-30 09:00:45 +00:00
#!/usr/bin/env python3
"""
Lambda function to:
- calculate number of running runners
- cleaning dead runners from GitHub
- terminating stale lost runners in EC2
"""
2021-09-30 09:00:45 +00:00
import argparse
import sys
from datetime import datetime
from typing import Dict, List
import requests # type: ignore
import boto3 # type: ignore
from botocore.exceptions import ClientError # type: ignore
2021-12-02 16:38:18 +00:00
from lambda_shared import (
RUNNER_TYPE_LABELS,
RunnerDescription,
RunnerDescriptions,
list_runners,
)
from lambda_shared.token import (
get_cached_access_token,
get_key_and_app_from_aws,
get_access_token_by_key_app,
)
UNIVERSAL_LABEL = "universal"
2022-01-07 09:11:46 +00:00
2021-09-30 09:00:45 +00:00
def handler(event, context):
_ = event
_ = context
main(get_cached_access_token(), True)
2021-09-30 09:00:45 +00:00
def group_runners_by_tag(
listed_runners: RunnerDescriptions,
) -> Dict[str, RunnerDescriptions]:
result = {} # type: Dict[str, RunnerDescriptions]
2021-10-21 11:09:15 +00:00
def add_to_result(tag, runner):
if tag not in result:
result[tag] = []
result[tag].append(runner)
2021-10-21 11:09:15 +00:00
for runner in listed_runners:
if UNIVERSAL_LABEL in runner.tags:
# Do not proceed other labels if UNIVERSAL_LABEL is included
add_to_result(UNIVERSAL_LABEL, runner)
continue
2021-10-21 11:09:15 +00:00
for tag in runner.tags:
if tag in RUNNER_TYPE_LABELS:
add_to_result(tag, runner)
2021-10-21 11:09:15 +00:00
break
else:
add_to_result("unlabeled", runner)
2021-10-21 11:09:15 +00:00
return result
def push_metrics_to_cloudwatch(
listed_runners: RunnerDescriptions, group_name: str
) -> None:
2022-01-07 09:11:46 +00:00
client = boto3.client("cloudwatch")
namespace = "RunnersMetrics"
2021-09-30 09:00:45 +00:00
metrics_data = []
2022-01-07 09:11:46 +00:00
busy_runners = sum(
1 for runner in listed_runners if runner.busy and not runner.offline
)
dimensions = [{"Name": "group", "Value": group_name}]
2022-01-07 09:11:46 +00:00
metrics_data.append(
{
"MetricName": "BusyRunners",
"Value": busy_runners,
"Unit": "Count",
"Dimensions": dimensions,
2022-01-07 09:11:46 +00:00
}
)
2021-09-30 09:00:45 +00:00
total_active_runners = sum(1 for runner in listed_runners if not runner.offline)
2022-01-07 09:11:46 +00:00
metrics_data.append(
{
"MetricName": "ActiveRunners",
"Value": total_active_runners,
"Unit": "Count",
"Dimensions": dimensions,
2022-01-07 09:11:46 +00:00
}
)
2021-09-30 09:00:45 +00:00
total_runners = len(listed_runners)
2022-01-07 09:11:46 +00:00
metrics_data.append(
{
"MetricName": "TotalRunners",
"Value": total_runners,
"Unit": "Count",
"Dimensions": dimensions,
2022-01-07 09:11:46 +00:00
}
)
2021-09-30 10:12:58 +00:00
if total_active_runners == 0:
busy_ratio = 100.0
2021-09-30 10:12:58 +00:00
else:
busy_ratio = busy_runners / total_active_runners * 100
2022-01-07 09:11:46 +00:00
metrics_data.append(
{
"MetricName": "BusyRunnersRatio",
"Value": busy_ratio,
"Unit": "Percent",
"Dimensions": dimensions,
2022-01-07 09:11:46 +00:00
}
)
2021-09-30 09:00:45 +00:00
2021-10-21 11:09:15 +00:00
client.put_metric_data(Namespace=namespace, MetricData=metrics_data)
2021-09-30 09:00:45 +00:00
2022-01-07 09:11:46 +00:00
def main(
access_token: str,
push_to_cloudwatch: bool,
) -> None:
gh_runners = list_runners(access_token)
grouped_runners = group_runners_by_tag(gh_runners)
2021-10-21 11:09:15 +00:00
for group, group_runners in grouped_runners.items():
if push_to_cloudwatch:
print(f"Pushing metrics for group '{group}'")
push_metrics_to_cloudwatch(group_runners, group)
2021-10-21 11:09:15 +00:00
else:
2021-11-02 19:29:58 +00:00
print(group, f"({len(group_runners)})")
2021-10-27 08:02:30 +00:00
for runner in group_runners:
2022-01-07 09:11:46 +00:00
print("\t", runner)
2022-01-07 09:11:46 +00:00
2021-09-30 09:00:45 +00:00
if __name__ == "__main__":
2022-01-07 09:11:46 +00:00
parser = argparse.ArgumentParser(description="Get list of runners and their states")
parser.add_argument(
"-p", "--private-key-path", help="Path to file with private key"
)
parser.add_argument("-k", "--private-key", help="Private key")
parser.add_argument(
"-a", "--app-id", type=int, help="GitHub application ID", required=True
)
parser.add_argument(
"--push-to-cloudwatch",
action="store_true",
help="Push metrics for active and busy runners to cloudwatch",
2022-01-07 09:11:46 +00:00
)
2021-09-30 09:00:45 +00:00
args = parser.parse_args()
if not args.private_key_path and not args.private_key:
2022-01-07 09:11:46 +00:00
print(
"Either --private-key-path or --private-key must be specified",
file=sys.stderr,
)
2021-09-30 09:00:45 +00:00
if args.private_key_path and args.private_key:
2022-01-07 09:11:46 +00:00
print(
"Either --private-key-path or --private-key must be specified",
file=sys.stderr,
)
2021-09-30 09:00:45 +00:00
if args.private_key:
private_key = args.private_key
elif args.private_key_path:
2022-01-07 09:11:46 +00:00
with open(args.private_key_path, "r") as key_file:
2021-09-30 09:00:45 +00:00
private_key = key_file.read()
else:
print("Attempt to get key and id from AWS secret manager")
private_key, args.app_id = get_key_and_app_from_aws()
2021-09-30 09:00:45 +00:00
token = get_access_token_by_key_app(private_key, args.app_id)
main(token, args.push_to_cloudwatch)