mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 16:12:01 +00:00
Get instances for the region to not kill a fresh one
This commit is contained in:
parent
bf9b563e0b
commit
855afb56f9
@ -198,11 +198,37 @@ def get_candidates_to_be_killed(event_data: dict) -> Dict[str, List[str]]:
|
|||||||
|
|
||||||
|
|
||||||
def main(access_token: str, event: dict) -> Dict[str, List[str]]:
|
def main(access_token: str, event: dict) -> Dict[str, List[str]]:
|
||||||
|
start = time.time()
|
||||||
print("Got event", json.dumps(event, sort_keys=True).replace("\n", ""))
|
print("Got event", json.dumps(event, sort_keys=True).replace("\n", ""))
|
||||||
to_kill_by_zone = how_many_instances_to_kill(event)
|
to_kill_by_zone = how_many_instances_to_kill(event)
|
||||||
instances_by_zone = get_candidates_to_be_killed(event)
|
instances_by_zone = get_candidates_to_be_killed(event)
|
||||||
|
# Getting ASG and instances' descriptions from the API
|
||||||
|
# We don't kill instances that alive for less than 10 minutes, since they
|
||||||
|
# could be not in the GH active runners yet
|
||||||
|
print(f"Check other hosts from the same ASG {event['AutoScalingGroupName']}")
|
||||||
|
asg_client = boto3.client("autoscaling")
|
||||||
|
as_groups_response = asg_client.describe_auto_scaling_groups(
|
||||||
|
AutoScalingGroupNames=[event["AutoScalingGroupName"]]
|
||||||
|
)
|
||||||
|
assert len(as_groups_response["AutoScalingGroups"]) == 1
|
||||||
|
asg = as_groups_response["AutoScalingGroups"][0]
|
||||||
|
asg_instance_ids = [instance["InstanceId"] for instance in asg["Instances"]]
|
||||||
|
instance_descriptions = get_cached_instances()
|
||||||
|
# The instances launched less than 10 minutes ago
|
||||||
|
immune_ids = [
|
||||||
|
instance["InstanceId"]
|
||||||
|
for instance in instance_descriptions.values()
|
||||||
|
if start - instance["LaunchTime"].timestamp() < 600
|
||||||
|
]
|
||||||
|
# if the ASG's instance ID not in instance_descriptions, it's most probably
|
||||||
|
# is not cached yet, so we must mark it as immuned
|
||||||
|
immune_ids.extend(
|
||||||
|
iid for iid in asg_instance_ids if iid not in instance_descriptions
|
||||||
|
)
|
||||||
|
print("Time spent on the requests to AWS: ", time.time() - start)
|
||||||
|
|
||||||
runners = list_runners(access_token)
|
runners = list_runners(access_token)
|
||||||
|
runner_ids = set(runner.name for runner in runners)
|
||||||
# We used to delete potential hosts to terminate from GitHub runners pool,
|
# We used to delete potential hosts to terminate from GitHub runners pool,
|
||||||
# but the documentation states:
|
# but the documentation states:
|
||||||
# --- Returning an instance first in the response data does not guarantee its termination
|
# --- Returning an instance first in the response data does not guarantee its termination
|
||||||
@ -221,13 +247,17 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]:
|
|||||||
|
|
||||||
delete_for_av = [] # type: RunnerDescriptions
|
delete_for_av = [] # type: RunnerDescriptions
|
||||||
for candidate in candidates:
|
for candidate in candidates:
|
||||||
if candidate not in set(runner.name for runner in runners):
|
if candidate in immune_ids:
|
||||||
|
print(
|
||||||
|
f"Candidate {candidate} started less than 10 minutes ago, won't touch a child"
|
||||||
|
)
|
||||||
|
break
|
||||||
|
if candidate not in runner_ids:
|
||||||
print(
|
print(
|
||||||
f"Candidate {candidate} was not in runners list, simply delete it"
|
f"Candidate {candidate} was not in runners list, simply delete it"
|
||||||
)
|
)
|
||||||
instances_to_kill.append(candidate)
|
instances_to_kill.append(candidate)
|
||||||
|
break
|
||||||
for candidate in candidates:
|
|
||||||
if len(delete_for_av) + len(instances_to_kill) == num_to_kill:
|
if len(delete_for_av) + len(instances_to_kill) == num_to_kill:
|
||||||
break
|
break
|
||||||
if candidate in instances_to_kill:
|
if candidate in instances_to_kill:
|
||||||
@ -253,16 +283,11 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]:
|
|||||||
instances_to_kill += [runner.name for runner in delete_for_av]
|
instances_to_kill += [runner.name for runner in delete_for_av]
|
||||||
|
|
||||||
if len(instances_to_kill) < total_to_kill:
|
if len(instances_to_kill) < total_to_kill:
|
||||||
print(f"Check other hosts from the same ASG {event['AutoScalingGroupName']}")
|
for instance in asg_instance_ids:
|
||||||
client = boto3.client("autoscaling")
|
if instance in immune_ids:
|
||||||
as_groups = client.describe_auto_scaling_groups(
|
continue
|
||||||
AutoScalingGroupNames=[event["AutoScalingGroupName"]]
|
|
||||||
)
|
|
||||||
assert len(as_groups["AutoScalingGroups"]) == 1
|
|
||||||
asg = as_groups["AutoScalingGroups"][0]
|
|
||||||
for instance in asg["Instances"]:
|
|
||||||
for runner in runners:
|
for runner in runners:
|
||||||
if runner.name == instance["InstanceId"] and not runner.busy:
|
if runner.name == instance and not runner.busy:
|
||||||
print(f"Runner {runner.name} is not busy and can be deleted")
|
print(f"Runner {runner.name} is not busy and can be deleted")
|
||||||
instances_to_kill.append(runner.name)
|
instances_to_kill.append(runner.name)
|
||||||
|
|
||||||
@ -270,9 +295,9 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]:
|
|||||||
print("Got enough instances to kill")
|
print("Got enough instances to kill")
|
||||||
break
|
break
|
||||||
|
|
||||||
print("Got instances to kill: ", ", ".join(instances_to_kill))
|
|
||||||
response = {"InstanceIDs": instances_to_kill}
|
response = {"InstanceIDs": instances_to_kill}
|
||||||
print(response)
|
print("Got instances to kill: ", response)
|
||||||
|
print("Time spent on the request: ", time.time() - start)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user