2021-09-30 09:00:45 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import requests
|
|
|
|
import argparse
|
|
|
|
import jwt
|
|
|
|
import sys
|
|
|
|
import json
|
|
|
|
import time
|
|
|
|
from collections import namedtuple
|
2021-12-02 16:38:18 +00:00
|
|
|
import boto3
|
|
|
|
|
|
|
|
def get_dead_runners_in_ec2(runners):
|
|
|
|
ids = {runner.name: runner for runner in runners if runner.offline == True and runner.busy == False}
|
|
|
|
if not ids:
|
|
|
|
return []
|
|
|
|
|
|
|
|
client = boto3.client('ec2')
|
|
|
|
|
|
|
|
print("Checking ids", list(ids.keys()))
|
|
|
|
instances_statuses = client.describe_instance_status(InstanceIds=list(ids.keys()))
|
|
|
|
found_instances = set([])
|
|
|
|
print("Response", instances_statuses)
|
|
|
|
for instance_status in instances_statuses['InstanceStatuses']:
|
|
|
|
if instance_status['InstanceState']['Name'] in ('pending', 'running'):
|
|
|
|
found_instances.add(instance_status['InstanceId'])
|
|
|
|
|
|
|
|
print("Found instances", found_instances)
|
|
|
|
result_to_delete = []
|
|
|
|
for instance_id, runner in ids.items():
|
|
|
|
if instance_id not in found_instances:
|
|
|
|
print("Instance", instance_id, "is not alive, going to remove it")
|
|
|
|
result_to_delete.append(runner)
|
|
|
|
return result_to_delete
|
2021-09-30 09:00:45 +00:00
|
|
|
|
|
|
|
def get_key_and_app_from_aws():
|
|
|
|
import boto3
|
2021-10-19 19:39:55 +00:00
|
|
|
secret_name = "clickhouse_github_secret_key"
|
2021-09-30 09:00:45 +00:00
|
|
|
session = boto3.session.Session()
|
|
|
|
client = session.client(
|
|
|
|
service_name='secretsmanager',
|
|
|
|
)
|
|
|
|
get_secret_value_response = client.get_secret_value(
|
|
|
|
SecretId=secret_name
|
|
|
|
)
|
|
|
|
data = json.loads(get_secret_value_response['SecretString'])
|
|
|
|
return data['clickhouse-app-key'], int(data['clickhouse-app-id'])
|
|
|
|
|
|
|
|
def handler(event, context):
|
|
|
|
private_key, app_id = get_key_and_app_from_aws()
|
2021-12-02 16:38:18 +00:00
|
|
|
main(private_key, app_id, True, True)
|
2021-09-30 09:00:45 +00:00
|
|
|
|
|
|
|
def get_installation_id(jwt_token):
|
|
|
|
headers = {
|
|
|
|
"Authorization": f"Bearer {jwt_token}",
|
|
|
|
"Accept": "application/vnd.github.v3+json",
|
|
|
|
}
|
|
|
|
response = requests.get("https://api.github.com/app/installations", headers=headers)
|
|
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
|
|
return data[0]['id']
|
|
|
|
|
|
|
|
def get_access_token(jwt_token, installation_id):
|
|
|
|
headers = {
|
|
|
|
"Authorization": f"Bearer {jwt_token}",
|
|
|
|
"Accept": "application/vnd.github.v3+json",
|
|
|
|
}
|
|
|
|
response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers)
|
|
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
|
|
return data['token']
|
|
|
|
|
|
|
|
|
|
|
|
RunnerDescription = namedtuple('RunnerDescription', ['id', 'name', 'tags', 'offline', 'busy'])
|
|
|
|
|
|
|
|
def list_runners(access_token):
|
|
|
|
headers = {
|
|
|
|
"Authorization": f"token {access_token}",
|
|
|
|
"Accept": "application/vnd.github.v3+json",
|
|
|
|
}
|
2021-10-29 22:09:07 +00:00
|
|
|
response = requests.get("https://api.github.com/orgs/ClickHouse/actions/runners?per_page=100", headers=headers)
|
2021-09-30 09:00:45 +00:00
|
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
2021-10-29 22:09:07 +00:00
|
|
|
total_runners = data['total_count']
|
2021-09-30 09:00:45 +00:00
|
|
|
runners = data['runners']
|
2021-10-29 22:09:07 +00:00
|
|
|
|
|
|
|
total_pages = int(total_runners / 100 + 1)
|
|
|
|
print("Total pages", total_pages)
|
|
|
|
for i in range(2, total_pages + 1):
|
|
|
|
response = requests.get(f"https://api.github.com/orgs/ClickHouse/actions/runners?page={i}&per_page=100", headers=headers)
|
|
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
|
|
runners += data['runners']
|
|
|
|
|
|
|
|
print("Total runners", len(runners))
|
2021-09-30 09:00:45 +00:00
|
|
|
result = []
|
|
|
|
for runner in runners:
|
|
|
|
tags = [tag['name'] for tag in runner['labels']]
|
|
|
|
desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags,
|
|
|
|
offline=runner['status']=='offline', busy=runner['busy'])
|
|
|
|
result.append(desc)
|
2021-12-02 16:38:18 +00:00
|
|
|
|
2021-09-30 09:00:45 +00:00
|
|
|
return result
|
|
|
|
|
2021-10-21 11:09:15 +00:00
|
|
|
def group_runners_by_tag(listed_runners):
|
|
|
|
result = {}
|
|
|
|
|
2021-11-01 10:27:46 +00:00
|
|
|
RUNNER_TYPE_LABELS = ['style-checker', 'builder', 'func-tester', 'stress-tester']
|
2021-10-21 11:09:15 +00:00
|
|
|
for runner in listed_runners:
|
|
|
|
for tag in runner.tags:
|
|
|
|
if tag in RUNNER_TYPE_LABELS:
|
|
|
|
if tag not in result:
|
|
|
|
result[tag] = []
|
|
|
|
result[tag].append(runner)
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
if 'unlabeled' not in result:
|
|
|
|
result['unlabeled'] = []
|
|
|
|
result['unlabeled'].append(runner)
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
2021-09-30 09:00:45 +00:00
|
|
|
def push_metrics_to_cloudwatch(listed_runners, namespace):
|
|
|
|
client = boto3.client('cloudwatch')
|
|
|
|
metrics_data = []
|
|
|
|
busy_runners = sum(1 for runner in listed_runners if runner.busy)
|
|
|
|
metrics_data.append({
|
|
|
|
'MetricName': 'BusyRunners',
|
|
|
|
'Value': busy_runners,
|
|
|
|
'Unit': 'Count',
|
|
|
|
})
|
|
|
|
total_active_runners = sum(1 for runner in listed_runners if not runner.offline)
|
|
|
|
metrics_data.append({
|
|
|
|
'MetricName': 'ActiveRunners',
|
|
|
|
'Value': total_active_runners,
|
|
|
|
'Unit': 'Count',
|
|
|
|
})
|
|
|
|
total_runners = len(listed_runners)
|
|
|
|
metrics_data.append({
|
|
|
|
'MetricName': 'TotalRunners',
|
|
|
|
'Value': total_runners,
|
|
|
|
'Unit': 'Count',
|
|
|
|
})
|
2021-09-30 10:12:58 +00:00
|
|
|
if total_active_runners == 0:
|
|
|
|
busy_ratio = 100
|
|
|
|
else:
|
|
|
|
busy_ratio = busy_runners / total_active_runners * 100
|
|
|
|
|
2021-09-30 09:00:45 +00:00
|
|
|
metrics_data.append({
|
|
|
|
'MetricName': 'BusyRunnersRatio',
|
2021-09-30 10:12:58 +00:00
|
|
|
'Value': busy_ratio,
|
2021-09-30 09:00:45 +00:00
|
|
|
'Unit': 'Percent',
|
|
|
|
})
|
|
|
|
|
2021-10-21 11:09:15 +00:00
|
|
|
client.put_metric_data(Namespace=namespace, MetricData=metrics_data)
|
2021-09-30 09:00:45 +00:00
|
|
|
|
2021-11-02 19:29:58 +00:00
|
|
|
def delete_runner(access_token, runner):
|
|
|
|
headers = {
|
|
|
|
"Authorization": f"token {access_token}",
|
|
|
|
"Accept": "application/vnd.github.v3+json",
|
|
|
|
}
|
|
|
|
|
|
|
|
response = requests.delete(f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", headers=headers)
|
|
|
|
response.raise_for_status()
|
|
|
|
print(f"Response code deleting {runner.name} is {response.status_code}")
|
|
|
|
return response.status_code == 204
|
|
|
|
|
|
|
|
def main(github_secret_key, github_app_id, push_to_cloudwatch, delete_offline_runners):
|
2021-09-30 09:00:45 +00:00
|
|
|
payload = {
|
|
|
|
"iat": int(time.time()) - 60,
|
|
|
|
"exp": int(time.time()) + (10 * 60),
|
|
|
|
"iss": github_app_id,
|
|
|
|
}
|
|
|
|
|
|
|
|
encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256")
|
|
|
|
installation_id = get_installation_id(encoded_jwt)
|
|
|
|
access_token = get_access_token(encoded_jwt, installation_id)
|
|
|
|
runners = list_runners(access_token)
|
2021-10-21 11:09:15 +00:00
|
|
|
grouped_runners = group_runners_by_tag(runners)
|
|
|
|
for group, group_runners in grouped_runners.items():
|
|
|
|
if push_to_cloudwatch:
|
|
|
|
push_metrics_to_cloudwatch(group_runners, 'RunnersMetrics/' + group)
|
|
|
|
else:
|
2021-11-02 19:29:58 +00:00
|
|
|
print(group, f"({len(group_runners)})")
|
2021-10-27 08:02:30 +00:00
|
|
|
for runner in group_runners:
|
|
|
|
print('\t', runner)
|
2021-11-03 07:53:16 +00:00
|
|
|
|
2021-11-02 19:29:58 +00:00
|
|
|
if delete_offline_runners:
|
|
|
|
print("Going to delete offline runners")
|
2021-12-02 16:38:18 +00:00
|
|
|
dead_runners = get_dead_runners_in_ec2(runners)
|
|
|
|
for runner in dead_runners:
|
|
|
|
print("Deleting runner", runner)
|
|
|
|
delete_runner(access_token, runner)
|
2021-09-30 09:00:45 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
parser = argparse.ArgumentParser(description='Get list of runners and their states')
|
|
|
|
parser.add_argument('-p', '--private-key-path', help='Path to file with private key')
|
|
|
|
parser.add_argument('-k', '--private-key', help='Private key')
|
|
|
|
parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True)
|
|
|
|
parser.add_argument('--push-to-cloudwatch', action='store_true', help='Store received token in parameter store')
|
2021-11-02 19:29:58 +00:00
|
|
|
parser.add_argument('--delete-offline', action='store_true', help='Remove offline runners')
|
2021-09-30 09:00:45 +00:00
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
if not args.private_key_path and not args.private_key:
|
|
|
|
print("Either --private-key-path or --private-key must be specified", file=sys.stderr)
|
|
|
|
|
|
|
|
if args.private_key_path and args.private_key:
|
|
|
|
print("Either --private-key-path or --private-key must be specified", file=sys.stderr)
|
|
|
|
|
|
|
|
if args.private_key:
|
|
|
|
private_key = args.private_key
|
|
|
|
else:
|
|
|
|
with open(args.private_key_path, 'r') as key_file:
|
|
|
|
private_key = key_file.read()
|
|
|
|
|
2021-11-02 19:29:58 +00:00
|
|
|
main(private_key, args.app_id, args.push_to_cloudwatch, args.delete_offline)
|