Terminate long-running offline non-busy runners in EC2

This commit is contained in:
Mikhail f. Shiryaev 2023-03-01 12:37:34 +01:00
parent 05142af0c0
commit 43dbd7bc25
No known key found for this signature in database
GPG Key ID: 4B02ED204C7D93F4

View File

@ -37,15 +37,17 @@ RunnerDescriptions = List[RunnerDescription]
def get_dead_runners_in_ec2(runners: RunnerDescriptions) -> RunnerDescriptions:
"""Returns instances that are offline/dead in EC2, or not found in EC2"""
ids = {
runner.name: runner
for runner in runners
# Only `i-deadbead123` are valid names for an instance ID
if runner.offline and not runner.busy and runner.name.startswith("i-")
if runner.name.startswith("i-") and runner.offline and not runner.busy
}
if not ids:
return []
# Delete all offline runners with wrong name
result_to_delete = [
runner
for runner in runners
@ -57,7 +59,7 @@ def get_dead_runners_in_ec2(runners: RunnerDescriptions) -> RunnerDescriptions:
i = 0
inc = 100
print("Checking ids", ids.keys())
print("Checking ids: ", " ".join(ids.keys()))
instances_statuses = []
while i < len(ids.keys()):
try:
@ -106,6 +108,9 @@ def get_lost_ec2_instances(runners: RunnerDescriptions) -> List[dict]:
Filters=[{"Name": "tag-key", "Values": ["github:runner-type"]}]
)["Reservations"]
lost_instances = []
offline_runners = [
runner.name for runner in runners if runner.offline and not runner.busy
]
# Here we refresh the runners to get the most recent state
now = datetime.now().timestamp()
@ -126,6 +131,10 @@ def get_lost_ec2_instances(runners: RunnerDescriptions) -> List[dict]:
):
continue
if instance["InstanceId"] in offline_runners:
lost_instances.append(instance)
continue
if instance["State"]["Name"] == "running" and (
not [
runner