2021-09-30 10:12:58 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import requests
|
|
|
|
import argparse
|
|
|
|
import jwt
|
|
|
|
import sys
|
|
|
|
import json
|
|
|
|
import time
|
|
|
|
from collections import namedtuple
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-09-30 10:12:58 +00:00
|
|
|
def get_key_and_app_from_aws():
|
|
|
|
import boto3
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-10-19 19:39:55 +00:00
|
|
|
secret_name = "clickhouse_github_secret_key"
|
2021-09-30 10:12:58 +00:00
|
|
|
session = boto3.session.Session()
|
|
|
|
client = session.client(
|
2022-03-22 16:39:58 +00:00
|
|
|
service_name="secretsmanager",
|
2021-09-30 10:12:58 +00:00
|
|
|
)
|
2022-03-22 16:39:58 +00:00
|
|
|
get_secret_value_response = client.get_secret_value(SecretId=secret_name)
|
|
|
|
data = json.loads(get_secret_value_response["SecretString"])
|
|
|
|
return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
|
|
|
|
|
2021-09-30 10:12:58 +00:00
|
|
|
|
|
|
|
def get_installation_id(jwt_token):
|
|
|
|
headers = {
|
|
|
|
"Authorization": f"Bearer {jwt_token}",
|
|
|
|
"Accept": "application/vnd.github.v3+json",
|
|
|
|
}
|
|
|
|
response = requests.get("https://api.github.com/app/installations", headers=headers)
|
|
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
2022-05-06 17:15:01 +00:00
|
|
|
for installation in data:
|
|
|
|
if installation["account"]["login"] == "ClickHouse":
|
|
|
|
installation_id = installation["id"]
|
|
|
|
return installation_id
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-09-30 10:12:58 +00:00
|
|
|
|
|
|
|
def get_access_token(jwt_token, installation_id):
|
|
|
|
headers = {
|
|
|
|
"Authorization": f"Bearer {jwt_token}",
|
|
|
|
"Accept": "application/vnd.github.v3+json",
|
|
|
|
}
|
2022-03-22 16:39:58 +00:00
|
|
|
response = requests.post(
|
|
|
|
f"https://api.github.com/app/installations/{installation_id}/access_tokens",
|
|
|
|
headers=headers,
|
|
|
|
)
|
2021-09-30 10:12:58 +00:00
|
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
2022-03-22 16:39:58 +00:00
|
|
|
return data["token"]
|
|
|
|
|
2021-09-30 10:12:58 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
RunnerDescription = namedtuple(
|
|
|
|
"RunnerDescription", ["id", "name", "tags", "offline", "busy"]
|
|
|
|
)
|
2021-09-30 10:12:58 +00:00
|
|
|
|
|
|
|
|
|
|
|
def list_runners(access_token):
|
|
|
|
headers = {
|
|
|
|
"Authorization": f"token {access_token}",
|
|
|
|
"Accept": "application/vnd.github.v3+json",
|
|
|
|
}
|
2022-03-22 16:39:58 +00:00
|
|
|
response = requests.get(
|
|
|
|
"https://api.github.com/orgs/ClickHouse/actions/runners?per_page=100",
|
|
|
|
headers=headers,
|
|
|
|
)
|
2021-09-30 10:12:58 +00:00
|
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
2022-03-22 16:39:58 +00:00
|
|
|
total_runners = data["total_count"]
|
|
|
|
runners = data["runners"]
|
2021-10-29 22:09:07 +00:00
|
|
|
|
|
|
|
total_pages = int(total_runners / 100 + 1)
|
|
|
|
for i in range(2, total_pages + 1):
|
2022-03-22 16:39:58 +00:00
|
|
|
response = requests.get(
|
|
|
|
f"https://api.github.com/orgs/ClickHouse/actions/runners?page={i}&per_page=100",
|
|
|
|
headers=headers,
|
|
|
|
)
|
2021-10-29 22:09:07 +00:00
|
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
2022-03-22 16:39:58 +00:00
|
|
|
runners += data["runners"]
|
2021-10-29 22:09:07 +00:00
|
|
|
|
|
|
|
print("Total runners", len(runners))
|
2021-09-30 10:12:58 +00:00
|
|
|
result = []
|
|
|
|
for runner in runners:
|
2022-03-22 16:39:58 +00:00
|
|
|
tags = [tag["name"] for tag in runner["labels"]]
|
|
|
|
desc = RunnerDescription(
|
|
|
|
id=runner["id"],
|
|
|
|
name=runner["name"],
|
|
|
|
tags=tags,
|
|
|
|
offline=runner["status"] == "offline",
|
|
|
|
busy=runner["busy"],
|
|
|
|
)
|
2021-09-30 10:12:58 +00:00
|
|
|
result.append(desc)
|
|
|
|
return result
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-09-30 10:39:15 +00:00
|
|
|
def push_metrics_to_cloudwatch(listed_runners, namespace):
|
|
|
|
import boto3
|
2022-03-22 16:39:58 +00:00
|
|
|
|
|
|
|
client = boto3.client("cloudwatch")
|
2021-09-30 10:39:15 +00:00
|
|
|
metrics_data = []
|
|
|
|
busy_runners = sum(1 for runner in listed_runners if runner.busy)
|
2022-03-22 16:39:58 +00:00
|
|
|
metrics_data.append(
|
|
|
|
{
|
|
|
|
"MetricName": "BusyRunners",
|
|
|
|
"Value": busy_runners,
|
|
|
|
"Unit": "Count",
|
|
|
|
}
|
|
|
|
)
|
2021-09-30 10:39:15 +00:00
|
|
|
total_active_runners = sum(1 for runner in listed_runners if not runner.offline)
|
2022-03-22 16:39:58 +00:00
|
|
|
metrics_data.append(
|
|
|
|
{
|
|
|
|
"MetricName": "ActiveRunners",
|
|
|
|
"Value": total_active_runners,
|
|
|
|
"Unit": "Count",
|
|
|
|
}
|
|
|
|
)
|
2021-09-30 10:39:15 +00:00
|
|
|
total_runners = len(listed_runners)
|
2022-03-22 16:39:58 +00:00
|
|
|
metrics_data.append(
|
|
|
|
{
|
|
|
|
"MetricName": "TotalRunners",
|
|
|
|
"Value": total_runners,
|
|
|
|
"Unit": "Count",
|
|
|
|
}
|
|
|
|
)
|
2021-09-30 10:39:15 +00:00
|
|
|
if total_active_runners == 0:
|
|
|
|
busy_ratio = 100
|
|
|
|
else:
|
|
|
|
busy_ratio = busy_runners / total_active_runners * 100
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
metrics_data.append(
|
|
|
|
{
|
|
|
|
"MetricName": "BusyRunnersRatio",
|
|
|
|
"Value": busy_ratio,
|
|
|
|
"Unit": "Percent",
|
|
|
|
}
|
|
|
|
)
|
2021-09-30 10:39:15 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
client.put_metric_data(Namespace="RunnersMetrics", MetricData=metrics_data)
|
2021-09-30 10:39:15 +00:00
|
|
|
|
|
|
|
|
2021-09-30 10:12:58 +00:00
|
|
|
def how_many_instances_to_kill(event_data):
|
2022-03-22 16:39:58 +00:00
|
|
|
data_array = event_data["CapacityToTerminate"]
|
2021-09-30 10:12:58 +00:00
|
|
|
to_kill_by_zone = {}
|
|
|
|
for av_zone in data_array:
|
2022-03-22 16:39:58 +00:00
|
|
|
zone_name = av_zone["AvailabilityZone"]
|
|
|
|
to_kill = av_zone["Capacity"]
|
2021-09-30 10:12:58 +00:00
|
|
|
if zone_name not in to_kill_by_zone:
|
|
|
|
to_kill_by_zone[zone_name] = 0
|
|
|
|
|
|
|
|
to_kill_by_zone[zone_name] += to_kill
|
|
|
|
return to_kill_by_zone
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-09-30 10:12:58 +00:00
|
|
|
def get_candidates_to_be_killed(event_data):
|
2022-03-22 16:39:58 +00:00
|
|
|
data_array = event_data["Instances"]
|
2021-09-30 10:12:58 +00:00
|
|
|
instances_by_zone = {}
|
|
|
|
for instance in data_array:
|
2022-03-22 16:39:58 +00:00
|
|
|
zone_name = instance["AvailabilityZone"]
|
|
|
|
instance_id = instance["InstanceId"]
|
2021-09-30 10:12:58 +00:00
|
|
|
if zone_name not in instances_by_zone:
|
|
|
|
instances_by_zone[zone_name] = []
|
|
|
|
instances_by_zone[zone_name].append(instance_id)
|
|
|
|
|
|
|
|
return instances_by_zone
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-09-30 10:12:58 +00:00
|
|
|
def delete_runner(access_token, runner):
|
|
|
|
headers = {
|
|
|
|
"Authorization": f"token {access_token}",
|
|
|
|
"Accept": "application/vnd.github.v3+json",
|
|
|
|
}
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
response = requests.delete(
|
|
|
|
f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}",
|
|
|
|
headers=headers,
|
|
|
|
)
|
2021-09-30 10:12:58 +00:00
|
|
|
response.raise_for_status()
|
2022-03-22 16:39:58 +00:00
|
|
|
print(
|
|
|
|
f"Response code deleting {runner.name} with id {runner.id} is {response.status_code}"
|
|
|
|
)
|
2021-09-30 10:12:58 +00:00
|
|
|
return response.status_code == 204
|
|
|
|
|
|
|
|
|
|
|
|
def main(github_secret_key, github_app_id, event):
|
|
|
|
print("Got event", json.dumps(event, sort_keys=True, indent=4))
|
|
|
|
to_kill_by_zone = how_many_instances_to_kill(event)
|
|
|
|
instances_by_zone = get_candidates_to_be_killed(event)
|
|
|
|
|
|
|
|
payload = {
|
|
|
|
"iat": int(time.time()) - 60,
|
|
|
|
"exp": int(time.time()) + (10 * 60),
|
|
|
|
"iss": github_app_id,
|
|
|
|
}
|
|
|
|
|
|
|
|
encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256")
|
|
|
|
installation_id = get_installation_id(encoded_jwt)
|
|
|
|
access_token = get_access_token(encoded_jwt, installation_id)
|
|
|
|
|
|
|
|
runners = list_runners(access_token)
|
|
|
|
|
|
|
|
to_delete_runners = []
|
|
|
|
instances_to_kill = []
|
|
|
|
for zone in to_kill_by_zone:
|
|
|
|
num_to_kill = to_kill_by_zone[zone]
|
|
|
|
candidates = instances_by_zone[zone]
|
|
|
|
if num_to_kill > len(candidates):
|
2022-03-22 16:39:58 +00:00
|
|
|
raise Exception(
|
|
|
|
f"Required to kill {num_to_kill}, but have only {len(candidates)} candidates in AV {zone}"
|
|
|
|
)
|
2021-09-30 10:12:58 +00:00
|
|
|
|
|
|
|
delete_for_av = []
|
|
|
|
for candidate in candidates:
|
2021-09-30 11:26:46 +00:00
|
|
|
if candidate not in set([runner.name for runner in runners]):
|
2022-03-22 16:39:58 +00:00
|
|
|
print(
|
|
|
|
f"Candidate {candidate} was not in runners list, simply delete it"
|
|
|
|
)
|
2021-09-30 11:26:46 +00:00
|
|
|
instances_to_kill.append(candidate)
|
|
|
|
|
|
|
|
for candidate in candidates:
|
|
|
|
if len(delete_for_av) + len(instances_to_kill) == num_to_kill:
|
2021-09-30 10:12:58 +00:00
|
|
|
break
|
2021-09-30 11:26:46 +00:00
|
|
|
if candidate in instances_to_kill:
|
|
|
|
continue
|
|
|
|
|
2021-09-30 10:12:58 +00:00
|
|
|
for runner in runners:
|
|
|
|
if runner.name == candidate:
|
|
|
|
if not runner.busy:
|
2022-03-22 16:39:58 +00:00
|
|
|
print(
|
|
|
|
f"Runner {runner.name} is not busy and can be deleted from AV {zone}"
|
|
|
|
)
|
2021-09-30 10:12:58 +00:00
|
|
|
delete_for_av.append(runner)
|
|
|
|
else:
|
|
|
|
print(f"Runner {runner.name} is busy, not going to delete it")
|
|
|
|
break
|
|
|
|
|
|
|
|
if len(delete_for_av) < num_to_kill:
|
2022-03-22 16:39:58 +00:00
|
|
|
print(
|
|
|
|
f"Checked all candidates for av {zone}, get to delete {len(delete_for_av)}, but still cannot get required {num_to_kill}"
|
|
|
|
)
|
2021-09-30 10:12:58 +00:00
|
|
|
to_delete_runners += delete_for_av
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
print("Got instances to kill: ", ", ".join(instances_to_kill))
|
|
|
|
print(
|
|
|
|
"Going to delete runners:",
|
|
|
|
", ".join([runner.name for runner in to_delete_runners]),
|
|
|
|
)
|
2021-09-30 10:12:58 +00:00
|
|
|
for runner in to_delete_runners:
|
|
|
|
if delete_runner(access_token, runner):
|
2022-03-22 16:39:58 +00:00
|
|
|
print(
|
|
|
|
f"Runner with name {runner.name} and id {runner.id} successfuly deleted from github"
|
|
|
|
)
|
2021-09-30 10:12:58 +00:00
|
|
|
instances_to_kill.append(runner.name)
|
|
|
|
else:
|
|
|
|
print(f"Cannot delete {runner.name} from github")
|
|
|
|
|
2021-09-30 11:26:46 +00:00
|
|
|
## push metrics
|
2022-03-22 16:39:58 +00:00
|
|
|
# runners = list_runners(access_token)
|
|
|
|
# push_metrics_to_cloudwatch(runners, 'RunnersMetrics')
|
2021-09-30 10:39:15 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
response = {"InstanceIDs": instances_to_kill}
|
2021-09-30 10:12:58 +00:00
|
|
|
print(response)
|
|
|
|
return response
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-09-30 10:12:58 +00:00
|
|
|
def handler(event, context):
|
|
|
|
private_key, app_id = get_key_and_app_from_aws()
|
|
|
|
return main(private_key, app_id, event)
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-09-30 10:12:58 +00:00
|
|
|
if __name__ == "__main__":
|
2022-03-22 16:39:58 +00:00
|
|
|
parser = argparse.ArgumentParser(description="Get list of runners and their states")
|
|
|
|
parser.add_argument(
|
|
|
|
"-p", "--private-key-path", help="Path to file with private key"
|
|
|
|
)
|
|
|
|
parser.add_argument("-k", "--private-key", help="Private key")
|
|
|
|
parser.add_argument(
|
|
|
|
"-a", "--app-id", type=int, help="GitHub application ID", required=True
|
|
|
|
)
|
2021-09-30 10:12:58 +00:00
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
if not args.private_key_path and not args.private_key:
|
2022-03-22 16:39:58 +00:00
|
|
|
print(
|
|
|
|
"Either --private-key-path or --private-key must be specified",
|
|
|
|
file=sys.stderr,
|
|
|
|
)
|
2021-09-30 10:12:58 +00:00
|
|
|
|
|
|
|
if args.private_key_path and args.private_key:
|
2022-03-22 16:39:58 +00:00
|
|
|
print(
|
|
|
|
"Either --private-key-path or --private-key must be specified",
|
|
|
|
file=sys.stderr,
|
|
|
|
)
|
2021-09-30 10:12:58 +00:00
|
|
|
|
|
|
|
if args.private_key:
|
|
|
|
private_key = args.private_key
|
|
|
|
else:
|
2022-03-22 16:39:58 +00:00
|
|
|
with open(args.private_key_path, "r") as key_file:
|
2021-09-30 10:12:58 +00:00
|
|
|
private_key = key_file.read()
|
|
|
|
|
|
|
|
sample_event = {
|
|
|
|
"AutoScalingGroupARN": "arn:aws:autoscaling:us-east-1:<account-id>:autoScalingGroup:d4738357-2d40-4038-ae7e-b00ae0227003:autoScalingGroupName/my-asg",
|
|
|
|
"AutoScalingGroupName": "my-asg",
|
|
|
|
"CapacityToTerminate": [
|
|
|
|
{
|
|
|
|
"AvailabilityZone": "us-east-1b",
|
|
|
|
"Capacity": 1,
|
2022-03-22 16:39:58 +00:00
|
|
|
"InstanceMarketOption": "OnDemand",
|
2021-09-30 10:12:58 +00:00
|
|
|
},
|
|
|
|
{
|
|
|
|
"AvailabilityZone": "us-east-1c",
|
|
|
|
"Capacity": 2,
|
2022-03-22 16:39:58 +00:00
|
|
|
"InstanceMarketOption": "OnDemand",
|
|
|
|
},
|
2021-09-30 10:12:58 +00:00
|
|
|
],
|
|
|
|
"Instances": [
|
|
|
|
{
|
|
|
|
"AvailabilityZone": "us-east-1b",
|
|
|
|
"InstanceId": "i-08d0b3c1a137e02a5",
|
|
|
|
"InstanceType": "t2.nano",
|
2022-03-22 16:39:58 +00:00
|
|
|
"InstanceMarketOption": "OnDemand",
|
2021-09-30 10:12:58 +00:00
|
|
|
},
|
|
|
|
{
|
|
|
|
"AvailabilityZone": "us-east-1c",
|
|
|
|
"InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal",
|
|
|
|
"InstanceType": "t2.nano",
|
2022-03-22 16:39:58 +00:00
|
|
|
"InstanceMarketOption": "OnDemand",
|
2021-09-30 10:12:58 +00:00
|
|
|
},
|
|
|
|
{
|
|
|
|
"AvailabilityZone": "us-east-1c",
|
|
|
|
"InstanceId": "ip-172-31-27-227.eu-west-1.compute.internal",
|
|
|
|
"InstanceType": "t2.nano",
|
2022-03-22 16:39:58 +00:00
|
|
|
"InstanceMarketOption": "OnDemand",
|
2021-09-30 10:12:58 +00:00
|
|
|
},
|
|
|
|
{
|
|
|
|
"AvailabilityZone": "us-east-1c",
|
|
|
|
"InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal",
|
|
|
|
"InstanceType": "t2.nano",
|
2022-03-22 16:39:58 +00:00
|
|
|
"InstanceMarketOption": "OnDemand",
|
|
|
|
},
|
2021-09-30 10:12:58 +00:00
|
|
|
],
|
2022-03-22 16:39:58 +00:00
|
|
|
"Cause": "SCALE_IN",
|
2021-09-30 10:12:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
main(private_key, args.app_id, sample_event)
|