Initial commit
This commit is contained in:
281
scripts/spot_recommendations.py
Executable file
281
scripts/spot_recommendations.py
Executable file
@@ -0,0 +1,281 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Analyze EC2 workloads and recommend Spot instance opportunities.
|
||||
|
||||
This script identifies:
|
||||
- Fault-tolerant workloads suitable for Spot instances
|
||||
- Potential savings from Spot vs On-Demand
|
||||
- Instances in Auto Scaling Groups (good Spot candidates)
|
||||
- Non-critical workloads based on tags
|
||||
|
||||
Usage:
|
||||
python3 spot_recommendations.py [--region REGION] [--profile PROFILE]
|
||||
|
||||
Requirements:
|
||||
pip install boto3 tabulate
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import boto3
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Dict, Any
|
||||
from tabulate import tabulate
|
||||
import sys
|
||||
|
||||
|
||||
class SpotRecommendationAnalyzer:
|
||||
def __init__(self, profile: str = None, region: str = None):
|
||||
self.session = boto3.Session(profile_name=profile) if profile else boto3.Session()
|
||||
self.regions = [region] if region else self._get_all_regions()
|
||||
self.recommendations = []
|
||||
self.total_savings = 0.0
|
||||
|
||||
# Average Spot savings (typically 60-90% discount)
|
||||
self.spot_discount = 0.70 # Conservative 70% discount
|
||||
|
||||
# Tags that indicate Spot suitability
|
||||
self.spot_friendly_tags = {
|
||||
'Environment': ['dev', 'development', 'test', 'testing', 'staging', 'qa'],
|
||||
'Workload': ['batch', 'processing', 'worker', 'ci', 'build'],
|
||||
'CriticalLevel': ['low', 'non-critical', 'noncritical']
|
||||
}
|
||||
|
||||
def _get_all_regions(self) -> List[str]:
|
||||
"""Get all enabled AWS regions."""
|
||||
ec2 = self.session.client('ec2', region_name='us-east-1')
|
||||
regions = ec2.describe_regions(AllRegions=False)
|
||||
return [region['RegionName'] for region in regions['Regions']]
|
||||
|
||||
def _estimate_hourly_cost(self, instance_type: str) -> float:
|
||||
"""Rough estimate of hourly cost."""
|
||||
cost_map = {
|
||||
't3.micro': 0.0104, 't3.small': 0.0208, 't3.medium': 0.0416,
|
||||
't3.large': 0.0832, 't3.xlarge': 0.1664, 't3.2xlarge': 0.3328,
|
||||
'm5.large': 0.096, 'm5.xlarge': 0.192, 'm5.2xlarge': 0.384,
|
||||
'm5.4xlarge': 0.768, 'm5.8xlarge': 1.536,
|
||||
'c5.large': 0.085, 'c5.xlarge': 0.17, 'c5.2xlarge': 0.34,
|
||||
'c5.4xlarge': 0.68, 'c5.9xlarge': 1.53,
|
||||
'r5.large': 0.126, 'r5.xlarge': 0.252, 'r5.2xlarge': 0.504,
|
||||
'r5.4xlarge': 1.008, 'r5.8xlarge': 2.016,
|
||||
}
|
||||
|
||||
# Default fallback
|
||||
if instance_type not in cost_map:
|
||||
family = instance_type.split('.')[0]
|
||||
family_defaults = {'t3': 0.04, 'm5': 0.10, 'c5': 0.09, 'r5': 0.13}
|
||||
return family_defaults.get(family, 0.10)
|
||||
|
||||
return cost_map[instance_type]
|
||||
|
||||
def _calculate_suitability_score(self, instance: Dict, asg_member: bool) -> tuple:
|
||||
"""Calculate Spot suitability score (0-100) and reasons."""
|
||||
score = 0
|
||||
reasons = []
|
||||
|
||||
# Check if in Auto Scaling Group (high suitability)
|
||||
if asg_member:
|
||||
score += 40
|
||||
reasons.append("Part of Auto Scaling Group")
|
||||
|
||||
# Check tags for environment/workload type
|
||||
tags = {tag['Key']: tag['Value'].lower() for tag in instance.get('Tags', [])}
|
||||
|
||||
for key, spot_values in self.spot_friendly_tags.items():
|
||||
if key in tags and tags[key] in spot_values:
|
||||
score += 20
|
||||
reasons.append(f"{key}={tags[key]}")
|
||||
|
||||
# Check instance age (older instances might be more stable)
|
||||
launch_time = instance['LaunchTime']
|
||||
days_running = (datetime.now(launch_time.tzinfo) - launch_time).days
|
||||
if days_running > 30:
|
||||
score += 10
|
||||
reasons.append(f"Running {days_running} days (stable)")
|
||||
|
||||
# Check instance size (smaller instances have better Spot availability)
|
||||
instance_type = instance['InstanceType']
|
||||
if any(size in instance_type for size in ['micro', 'small', 'medium', 'large']):
|
||||
score += 15
|
||||
reasons.append("Standard size (good Spot availability)")
|
||||
|
||||
# Default baseline
|
||||
if not reasons:
|
||||
score = 30
|
||||
reasons.append("General compute workload")
|
||||
|
||||
return min(score, 100), reasons
|
||||
|
||||
def analyze_instances(self):
|
||||
"""Analyze EC2 instances for Spot opportunities."""
|
||||
print(f"\nAnalyzing EC2 instances across {len(self.regions)} region(s)...")
|
||||
|
||||
for region in self.regions:
|
||||
try:
|
||||
ec2 = self.session.client('ec2', region_name=region)
|
||||
autoscaling = self.session.client('autoscaling', region_name=region)
|
||||
|
||||
# Get all Auto Scaling Groups
|
||||
asg_instances = set()
|
||||
try:
|
||||
asgs = autoscaling.describe_auto_scaling_groups()
|
||||
for asg in asgs['AutoScalingGroups']:
|
||||
for instance in asg['Instances']:
|
||||
asg_instances.add(instance['InstanceId'])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Get all running On-Demand instances
|
||||
instances = ec2.describe_instances(
|
||||
Filters=[
|
||||
{'Name': 'instance-state-name', 'Values': ['running']},
|
||||
{'Name': 'instance-lifecycle', 'Values': ['on-demand', 'scheduled']}
|
||||
]
|
||||
)
|
||||
|
||||
for reservation in instances['Reservations']:
|
||||
for instance in reservation['Instances']:
|
||||
instance_id = instance['InstanceId']
|
||||
instance_type = instance['InstanceType']
|
||||
asg_member = instance_id in asg_instances
|
||||
|
||||
# Calculate suitability
|
||||
score, reasons = self._calculate_suitability_score(instance, asg_member)
|
||||
|
||||
# Calculate savings
|
||||
hourly_cost = self._estimate_hourly_cost(instance_type)
|
||||
monthly_savings = hourly_cost * 730 * self.spot_discount
|
||||
annual_savings = monthly_savings * 12
|
||||
|
||||
self.total_savings += annual_savings
|
||||
|
||||
# Get instance name
|
||||
name_tag = next((tag['Value'] for tag in instance.get('Tags', [])
|
||||
if tag['Key'] == 'Name'), 'N/A')
|
||||
|
||||
# Determine recommendation
|
||||
if score >= 70:
|
||||
recommendation = "Highly Recommended"
|
||||
elif score >= 50:
|
||||
recommendation = "Recommended"
|
||||
elif score >= 30:
|
||||
recommendation = "Consider (with caution)"
|
||||
else:
|
||||
recommendation = "Not Recommended"
|
||||
|
||||
self.recommendations.append({
|
||||
'Region': region,
|
||||
'Instance ID': instance_id,
|
||||
'Name': name_tag,
|
||||
'Type': instance_type,
|
||||
'In ASG': 'Yes' if asg_member else 'No',
|
||||
'Suitability Score': f"{score}/100",
|
||||
'Monthly Savings': f"${monthly_savings:.2f}",
|
||||
'Recommendation': recommendation,
|
||||
'Reasons': ', '.join(reasons[:2]) # Show top 2 reasons
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error scanning {region}: {str(e)}")
|
||||
|
||||
print(f" Analyzed {len(self.recommendations)} instances")
|
||||
|
||||
def print_report(self):
|
||||
"""Print Spot recommendations report."""
|
||||
print("\n" + "="*120)
|
||||
print("SPOT INSTANCE RECOMMENDATIONS")
|
||||
print("="*120)
|
||||
|
||||
# Sort by suitability score (descending)
|
||||
sorted_recs = sorted(self.recommendations,
|
||||
key=lambda x: int(x['Suitability Score'].split('/')[0]),
|
||||
reverse=True)
|
||||
|
||||
if sorted_recs:
|
||||
print(tabulate(sorted_recs, headers='keys', tablefmt='grid'))
|
||||
|
||||
print("\n" + "="*120)
|
||||
print(f"TOTAL ANNUAL SAVINGS POTENTIAL: ${self.total_savings:.2f}")
|
||||
print(f"(Assumes {int(self.spot_discount*100)}% average Spot discount)")
|
||||
print("="*120)
|
||||
|
||||
print("\n\nSPOT INSTANCE BEST PRACTICES:")
|
||||
print("\n1. Use Spot Instances for:")
|
||||
print(" - Stateless applications")
|
||||
print(" - Batch processing jobs")
|
||||
print(" - CI/CD and build servers")
|
||||
print(" - Data analysis and processing")
|
||||
print(" - Dev/test/staging environments")
|
||||
print(" - Auto Scaling Groups with mixed instance types")
|
||||
|
||||
print("\n2. Do NOT use Spot Instances for:")
|
||||
print(" - Databases without replicas")
|
||||
print(" - Stateful applications without checkpointing")
|
||||
print(" - Real-time, latency-sensitive services")
|
||||
print(" - Applications that can't handle interruptions")
|
||||
|
||||
print("\n3. Spot Best Practices:")
|
||||
print(" - Use Spot Fleet or Auto Scaling Groups with Spot")
|
||||
print(" - Diversify across multiple instance types")
|
||||
print(" - Implement graceful shutdown handlers (2-minute warning)")
|
||||
print(" - Use Spot Instance interruption notices")
|
||||
print(" - Consider Spot + On-Demand mix (e.g., 70/30)")
|
||||
print(" - Set appropriate max price (typically On-Demand price)")
|
||||
|
||||
print("\n4. Implementation Steps:")
|
||||
print(" - Test Spot behavior in non-production first")
|
||||
print(" - Implement interruption handling in your application")
|
||||
print(" - Use EC2 Fleet or Auto Scaling with mixed instances policy")
|
||||
print(" - Monitor Spot interruption rates")
|
||||
print(" - Set up CloudWatch alarms for Spot terminations")
|
||||
|
||||
print("\n5. Tools to Use:")
|
||||
print(" - EC2 Spot Instance Advisor (check interruption rates)")
|
||||
print(" - Auto Scaling Groups with mixed instances policy")
|
||||
print(" - Spot Fleet for diverse instance type selection")
|
||||
print(" - AWS Spot Instances best practices guide")
|
||||
|
||||
def run(self):
|
||||
"""Run Spot analysis."""
|
||||
print("="*80)
|
||||
print("AWS SPOT INSTANCE OPPORTUNITY ANALYZER")
|
||||
print("="*80)
|
||||
|
||||
self.analyze_instances()
|
||||
self.print_report()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Analyze EC2 workloads for Spot instance opportunities',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Analyze all regions with default profile
|
||||
python3 spot_recommendations.py
|
||||
|
||||
# Analyze specific region
|
||||
python3 spot_recommendations.py --region us-east-1
|
||||
|
||||
# Use named profile
|
||||
python3 spot_recommendations.py --profile production
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('--region', help='AWS region (default: all regions)')
|
||||
parser.add_argument('--profile', help='AWS profile name (default: default profile)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
analyzer = SpotRecommendationAnalyzer(
|
||||
profile=args.profile,
|
||||
region=args.region
|
||||
)
|
||||
analyzer.run()
|
||||
except Exception as e:
|
||||
print(f"Error: {str(e)}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user