Process / Steps :
- Set Scale in protection for running instances.
- 2x the auto-scaling instances, example if currently you have 2, then will increase to 4.
- Set draining current instances, so once the new instances are registered with ecs pods will be moved there.
- Once process name ecs_drain_check will actively check the draining status with 60s interval.
- Once all the pods safely move to the new instance, the scale in protection will toss between new and old instances. Old instance scale in protection will removed and applied to new Instances.
- Next auto-scaling will back to the previous desired number, which is set 2x.
- The final part is to remove scale in protection from all instances.
Installation :
sudo apt-get install python3 python3-venv python3-pip
python3 -m venv venv
source venv/bin/activate
pip3 install click boto3
import boto3
import time
import click
from datetime import datetime
import sys
# Example pr-blends01-dev pr-blends01-dev ap-northeast-1
AutoScalingGroupNames = "pr-*****"
ClusterName = "pr-*****"
region_name = "ap-*****"
DEBUG = True
# Set up default session with profile and region
boto3.setup_default_session(profile_name='default', region_name=region_name)
client = boto3.client('autoscaling')
ecs_client = boto3.client('ecs')
@click.group(help="This is the main CLI group. Use it to manage your commands.")
def cli():
pass
def all_instance_ids():
response = client.describe_auto_scaling_groups(
AutoScalingGroupNames = [AutoScalingGroupNames],)
all_instance_ids_unprotected = [instance['InstanceId'] for instance in response['AutoScalingGroups'][0]['Instances'] if not instance['ProtectedFromScaleIn']]
all_instance_ids_protected = [instance['InstanceId'] for instance in response['AutoScalingGroups'][0]['Instances'] if instance['ProtectedFromScaleIn']==True]
return all_instance_ids_protected,all_instance_ids_unprotected
@cli.command("protectedFromScaleIn", help="This command fetch instance scaleinprotection status and toss them")
def protectedFromScaleIn():
protected_instances, unprotected_instances = all_instance_ids()
if protected_instances:
response = client.set_instance_protection(
InstanceIds = protected_instances,
AutoScalingGroupName=AutoScalingGroupNames,
ProtectedFromScaleIn=False)
print("Instance protection removed:", protected_instances)
else:
print("No instances to remove protection.")
if unprotected_instances:
response = client.set_instance_protection(
InstanceIds = unprotected_instances,
AutoScalingGroupName=AutoScalingGroupNames,
ProtectedFromScaleIn=True)
print("Instance protection added :", unprotected_instances)
else:
print("No instances to added protection.")
@cli.command("scale_adjustment", help="This command scaleup and scale down")
@click.option('--expand', is_flag=True, default=False, help="expand the scale")
def scale_adjustment(expand):
response = client.describe_auto_scaling_groups(
AutoScalingGroupNames = [AutoScalingGroupNames],)
Current = response['AutoScalingGroups'][0]['DesiredCapacity']
SetDesire = 0
if expand == True:
SetDesire = Current * 2
else:
SetDesire = Current / 2
response = client.set_desired_capacity(
AutoScalingGroupName=AutoScalingGroupNames,
DesiredCapacity=int(SetDesire),
HonorCooldown=True,
)
@cli.command("ecs_drain_container_instances", help="This command fetch ACTIVE container and set DRAINING")
def ecs_drain_container_instances():
ecs_list_container_instances = ecs_client.list_container_instances(
cluster=ClusterName,
status='ACTIVE'
)
response = ecs_client.update_container_instances_state(
cluster=ClusterName,
containerInstances= ecs_list_container_instances['containerInstanceArns'] ,
status='DRAINING'
)
@cli.command("ecs_drain_check", help="This command check DRAINING status")
def ecs_drain_check():
print("Function : ecs_drain_check Trigger ")
ecs_list_container_instances = ecs_client.list_container_instances(
cluster=ClusterName,
status='DRAINING'
)
# Check if ecs_list_container_instances is empty
if not ecs_list_container_instances['containerInstanceArns']:
return "Done", 0
while True:
describe_container_instances = ecs_client.describe_container_instances(
cluster=ClusterName,
containerInstances = ecs_list_container_instances['containerInstanceArns']
)
TotalTask = sum([TotalTask['runningTasksCount'] for TotalTask in describe_container_instances['containerInstances']]) if describe_container_instances['containerInstances'] else 0
status = "Done" if TotalTask == 0 else "Running"
if status == "Done":
break
else:
print(f"Current status: {status}, Total tasks pending: {TotalTask}")
time.sleep(60)
return status, TotalTask
@cli.command("traffic", help="complete automated operation")
@click.pass_context
def traffic(ctm):
print("************** Operation Started **************")
print(f"Function Started : protectedFromScaleIn at {datetime.now()}")
ctm.invoke(protectedFromScaleIn)
if DEBUG:
approval = input("Do you want to proceed with scale_adjustment? (yes/no): ")
if approval.lower() != 'yes':
print("Operation aborted by user.")
return
print(f"Function Started : scale_adjustment at {datetime.now()}")
ctm.invoke(scale_adjustment,expand=True)
if DEBUG:
approval = input("Do you want to proceed with ecs_drain_container_instances? (yes/no): ")
if approval.lower() != 'yes':
print("Operation aborted by user.")
return
print(f"Function Started : ecs_drain_container_instances at {datetime.now()}")
ctm.invoke(ecs_drain_container_instances)
if DEBUG:
approval = input("Do you want to proceed with ecs_drain_check ? (yes/no): ")
if approval.lower() != 'yes':
print("Operation aborted by user.")
return
print(f"Function Started : ecs_drain_check at {datetime.now()}")
ctm.invoke(ecs_drain_check)
if DEBUG:
approval = input("Do you want to proceed with protectedFromScaleIn? (yes/no): ")
if approval.lower() != 'yes':
print("Operation aborted by user.")
return
print(f"Function Started : protectedFromScaleIn at {datetime.now()}")
ctm.invoke(protectedFromScaleIn)
if DEBUG:
approval = input("Do you want to proceed with scale_adjustment? (yes/no): ")
if approval.lower() != 'yes':
print("Operation aborted by user.")
return
print(f"Function Started : scale_adjustment at {datetime.now()}")
ctm.invoke(scale_adjustment)
time.sleep(300)
ctm.invoke(protectedFromScaleIn)
if __name__ == "__main__":
cli()
Update values in Script
AutoScalingGroupNames = "pr-*****"
ClusterName = "pr-*****"
region_name = "ap-*****"
DEBUG = True, Set false for unattended automation.
python main.py traffic
For Advance Users run manually
python main.py protectedFromScaleIn
python main.py scale_adjustment --expand
python main.py ecs_drain_container_instance
python main ecs_drain_check
python main.py protectedFromScaleIn
python main.py scale_adjustment
python main.py protectedFromScaleIn