Initial commit
This commit is contained in:
347
scripts/analyze_ri_recommendations.py
Executable file
347
scripts/analyze_ri_recommendations.py
Executable file
@@ -0,0 +1,347 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Analyze EC2 and RDS usage patterns to recommend Reserved Instances.
|
||||
|
||||
This script:
|
||||
- Identifies consistently running EC2 instances
|
||||
- Calculates potential savings with Reserved Instances
|
||||
- Recommends RI types (Standard vs Convertible) and commitment levels (1yr vs 3yr)
|
||||
- Analyzes RDS instances for RI opportunities
|
||||
|
||||
Usage:
|
||||
python3 analyze_ri_recommendations.py [--region REGION] [--profile PROFILE] [--days DAYS]
|
||||
|
||||
Requirements:
|
||||
pip install boto3 tabulate
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import boto3
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Dict, Any
|
||||
from collections import defaultdict
|
||||
from tabulate import tabulate
|
||||
import sys
|
||||
|
||||
|
||||
class RIAnalyzer:
|
||||
def __init__(self, profile: str = None, region: str = None, days: int = 30):
|
||||
self.session = boto3.Session(profile_name=profile) if profile else boto3.Session()
|
||||
self.regions = [region] if region else self._get_all_regions()
|
||||
self.days = days
|
||||
self.recommendations = {
|
||||
'ec2': [],
|
||||
'rds': []
|
||||
}
|
||||
self.total_potential_savings = 0.0
|
||||
|
||||
# Simplified RI discount rates (actual rates vary by region/instance type)
|
||||
self.ri_discounts = {
|
||||
'1yr_no_upfront': 0.40, # ~40% savings
|
||||
'1yr_partial_upfront': 0.42, # ~42% savings
|
||||
'1yr_all_upfront': 0.43, # ~43% savings
|
||||
'3yr_no_upfront': 0.60, # ~60% savings
|
||||
'3yr_partial_upfront': 0.62, # ~62% savings
|
||||
'3yr_all_upfront': 0.63 # ~63% savings
|
||||
}
|
||||
|
||||
def _get_all_regions(self) -> List[str]:
|
||||
"""Get all enabled AWS regions."""
|
||||
ec2 = self.session.client('ec2', region_name='us-east-1')
|
||||
regions = ec2.describe_regions(AllRegions=False)
|
||||
return [region['RegionName'] for region in regions['Regions']]
|
||||
|
||||
def _estimate_hourly_cost(self, instance_type: str) -> float:
|
||||
"""Rough estimate of On-Demand hourly cost."""
|
||||
cost_map = {
|
||||
't2.micro': 0.0116, 't2.small': 0.023, 't2.medium': 0.0464,
|
||||
't3.micro': 0.0104, 't3.small': 0.0208, 't3.medium': 0.0416,
|
||||
't3.large': 0.0832, 't3.xlarge': 0.1664, 't3.2xlarge': 0.3328,
|
||||
'm5.large': 0.096, 'm5.xlarge': 0.192, 'm5.2xlarge': 0.384,
|
||||
'm5.4xlarge': 0.768, 'm5.8xlarge': 1.536, 'm5.12xlarge': 2.304,
|
||||
'c5.large': 0.085, 'c5.xlarge': 0.17, 'c5.2xlarge': 0.34,
|
||||
'c5.4xlarge': 0.68, 'c5.9xlarge': 1.53, 'c5.18xlarge': 3.06,
|
||||
'r5.large': 0.126, 'r5.xlarge': 0.252, 'r5.2xlarge': 0.504,
|
||||
'r5.4xlarge': 1.008, 'r5.8xlarge': 2.016, 'r5.12xlarge': 3.024,
|
||||
}
|
||||
|
||||
# Default fallback based on instance family
|
||||
if instance_type not in cost_map:
|
||||
family = instance_type.split('.')[0]
|
||||
family_defaults = {'t2': 0.02, 't3': 0.02, 'm5': 0.10, 'c5': 0.09, 'r5': 0.13}
|
||||
return family_defaults.get(family, 0.10)
|
||||
|
||||
return cost_map[instance_type]
|
||||
|
||||
def _calculate_savings(self, hourly_cost: float, hours_running: float) -> Dict[str, float]:
|
||||
"""Calculate potential savings with different RI options."""
|
||||
monthly_od_cost = hourly_cost * hours_running
|
||||
|
||||
savings = {}
|
||||
for ri_type, discount in self.ri_discounts.items():
|
||||
monthly_ri_cost = monthly_od_cost * (1 - discount)
|
||||
monthly_savings = monthly_od_cost - monthly_ri_cost
|
||||
savings[ri_type] = {
|
||||
'monthly_cost': monthly_ri_cost,
|
||||
'monthly_savings': monthly_savings,
|
||||
'annual_savings': monthly_savings * 12
|
||||
}
|
||||
|
||||
return savings
|
||||
|
||||
def analyze_ec2_instances(self):
|
||||
"""Analyze EC2 instances for RI opportunities."""
|
||||
print(f"\n[1/2] Analyzing EC2 instances (last {self.days} days)...")
|
||||
|
||||
# Group instances by type and platform
|
||||
instance_groups = defaultdict(lambda: {'count': 0, 'instances': []})
|
||||
|
||||
for region in self.regions:
|
||||
try:
|
||||
ec2 = self.session.client('ec2', region_name=region)
|
||||
cloudwatch = self.session.client('cloudwatch', region_name=region)
|
||||
|
||||
instances = ec2.describe_instances(
|
||||
Filters=[{'Name': 'instance-state-name', 'Values': ['running']}]
|
||||
)
|
||||
|
||||
for reservation in instances['Reservations']:
|
||||
for instance in reservation['Instances']:
|
||||
instance_id = instance['InstanceId']
|
||||
instance_type = instance['InstanceType']
|
||||
platform = instance.get('Platform', 'Linux/UNIX')
|
||||
|
||||
# Check if instance has been running consistently
|
||||
launch_time = instance['LaunchTime']
|
||||
days_running = (datetime.now(launch_time.tzinfo) - launch_time).days
|
||||
|
||||
if days_running >= self.days:
|
||||
# Check uptime via CloudWatch
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(days=self.days)
|
||||
|
||||
try:
|
||||
metrics = cloudwatch.get_metric_statistics(
|
||||
Namespace='AWS/EC2',
|
||||
MetricName='StatusCheckFailed',
|
||||
Dimensions=[{'Name': 'InstanceId', 'Value': instance_id}],
|
||||
StartTime=start_time,
|
||||
EndTime=end_time,
|
||||
Period=3600,
|
||||
Statistics=['Sum']
|
||||
)
|
||||
|
||||
# If we have metrics, the instance has been running
|
||||
if metrics['Datapoints'] or days_running >= self.days:
|
||||
key = f"{instance_type}_{platform}_{region}"
|
||||
instance_groups[key]['count'] += 1
|
||||
instance_groups[key]['instances'].append({
|
||||
'id': instance_id,
|
||||
'type': instance_type,
|
||||
'platform': platform,
|
||||
'region': region,
|
||||
'days_running': days_running
|
||||
})
|
||||
except Exception:
|
||||
# If CloudWatch fails, still count long-running instances
|
||||
if days_running >= self.days:
|
||||
key = f"{instance_type}_{platform}_{region}"
|
||||
instance_groups[key]['count'] += 1
|
||||
instance_groups[key]['instances'].append({
|
||||
'id': instance_id,
|
||||
'type': instance_type,
|
||||
'platform': platform,
|
||||
'region': region,
|
||||
'days_running': days_running
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error scanning {region}: {str(e)}")
|
||||
|
||||
# Generate recommendations
|
||||
for key, data in instance_groups.items():
|
||||
if data['count'] > 0:
|
||||
sample = data['instances'][0]
|
||||
instance_type = sample['type']
|
||||
platform = sample['platform']
|
||||
region = sample['region']
|
||||
count = data['count']
|
||||
|
||||
hourly_cost = self._estimate_hourly_cost(instance_type)
|
||||
hours_per_month = 730 # Average hours in a month
|
||||
savings = self._calculate_savings(hourly_cost, hours_per_month * count)
|
||||
|
||||
# Recommend best option (3yr all upfront for max savings)
|
||||
best_option = savings['3yr_all_upfront']
|
||||
self.total_potential_savings += best_option['annual_savings']
|
||||
|
||||
self.recommendations['ec2'].append({
|
||||
'Region': region,
|
||||
'Instance Type': instance_type,
|
||||
'Platform': platform,
|
||||
'Count': count,
|
||||
'Current Monthly Cost': f"${hourly_cost * hours_per_month * count:.2f}",
|
||||
'1yr Savings (monthly)': f"${savings['1yr_all_upfront']['monthly_savings']:.2f}",
|
||||
'3yr Savings (monthly)': f"${savings['3yr_all_upfront']['monthly_savings']:.2f}",
|
||||
'Annual Savings (3yr)': f"${best_option['annual_savings']:.2f}",
|
||||
'Recommendation': '3yr Standard RI (All Upfront)'
|
||||
})
|
||||
|
||||
print(f" Found {len(self.recommendations['ec2'])} RI opportunities")
|
||||
|
||||
def analyze_rds_instances(self):
|
||||
"""Analyze RDS instances for RI opportunities."""
|
||||
print(f"\n[2/2] Analyzing RDS instances (last {self.days} days)...")
|
||||
|
||||
instance_groups = defaultdict(lambda: {'count': 0, 'instances': []})
|
||||
|
||||
for region in self.regions:
|
||||
try:
|
||||
rds = self.session.client('rds', region_name=region)
|
||||
instances = rds.describe_db_instances()
|
||||
|
||||
for instance in instances['DBInstances']:
|
||||
instance_id = instance['DBInstanceIdentifier']
|
||||
instance_class = instance['DBInstanceClass']
|
||||
engine = instance['Engine']
|
||||
multi_az = instance['MultiAZ']
|
||||
|
||||
# Check if instance has been running for the analysis period
|
||||
create_time = instance['InstanceCreateTime']
|
||||
days_running = (datetime.now(create_time.tzinfo) - create_time).days
|
||||
|
||||
if days_running >= self.days:
|
||||
key = f"{instance_class}_{engine}_{multi_az}_{region}"
|
||||
instance_groups[key]['count'] += 1
|
||||
instance_groups[key]['instances'].append({
|
||||
'id': instance_id,
|
||||
'class': instance_class,
|
||||
'engine': engine,
|
||||
'multi_az': multi_az,
|
||||
'region': region,
|
||||
'days_running': days_running
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error scanning {region}: {str(e)}")
|
||||
|
||||
# Generate recommendations
|
||||
for key, data in instance_groups.items():
|
||||
if data['count'] > 0:
|
||||
sample = data['instances'][0]
|
||||
instance_class = sample['class']
|
||||
engine = sample['engine']
|
||||
multi_az = sample['multi_az']
|
||||
region = sample['region']
|
||||
count = data['count']
|
||||
|
||||
# RDS pricing is roughly 2x EC2 for same instance type
|
||||
# This is a rough approximation
|
||||
base_hourly = self._estimate_hourly_cost(instance_class.replace('db.', ''))
|
||||
hourly_cost = base_hourly * 2
|
||||
if multi_az:
|
||||
hourly_cost *= 2 # Multi-AZ doubles the cost
|
||||
|
||||
hours_per_month = 730
|
||||
savings = self._calculate_savings(hourly_cost, hours_per_month * count)
|
||||
|
||||
best_option = savings['3yr_all_upfront']
|
||||
self.total_potential_savings += best_option['annual_savings']
|
||||
|
||||
self.recommendations['rds'].append({
|
||||
'Region': region,
|
||||
'Instance Class': instance_class,
|
||||
'Engine': engine,
|
||||
'Multi-AZ': 'Yes' if multi_az else 'No',
|
||||
'Count': count,
|
||||
'Current Monthly Cost': f"${hourly_cost * hours_per_month * count:.2f}",
|
||||
'1yr Savings (monthly)': f"${savings['1yr_all_upfront']['monthly_savings']:.2f}",
|
||||
'3yr Savings (monthly)': f"${savings['3yr_all_upfront']['monthly_savings']:.2f}",
|
||||
'Annual Savings (3yr)': f"${best_option['annual_savings']:.2f}",
|
||||
'Recommendation': '3yr Standard RI (All Upfront)'
|
||||
})
|
||||
|
||||
print(f" Found {len(self.recommendations['rds'])} RI opportunities")
|
||||
|
||||
def print_report(self):
|
||||
"""Print RI recommendations report."""
|
||||
print("\n" + "="*100)
|
||||
print("RESERVED INSTANCE RECOMMENDATIONS")
|
||||
print("="*100)
|
||||
|
||||
if self.recommendations['ec2']:
|
||||
print("\nEC2 RESERVED INSTANCE OPPORTUNITIES")
|
||||
print("-" * 100)
|
||||
print(tabulate(self.recommendations['ec2'], headers='keys', tablefmt='grid'))
|
||||
|
||||
if self.recommendations['rds']:
|
||||
print("\nRDS RESERVED INSTANCE OPPORTUNITIES")
|
||||
print("-" * 100)
|
||||
print(tabulate(self.recommendations['rds'], headers='keys', tablefmt='grid'))
|
||||
|
||||
print("\n" + "="*100)
|
||||
print(f"TOTAL ANNUAL SAVINGS POTENTIAL: ${self.total_potential_savings:.2f}")
|
||||
print("="*100)
|
||||
|
||||
print("\n\nRECOMMENDATIONS:")
|
||||
print("- Standard RIs offer the highest discount but no flexibility to change instance type")
|
||||
print("- Consider Convertible RIs if you need flexibility (slightly lower discount)")
|
||||
print("- All Upfront payment offers maximum savings")
|
||||
print("- Partial Upfront balances savings with cash flow")
|
||||
print("- No Upfront minimizes initial cost but reduces savings")
|
||||
print("\nNEXT STEPS:")
|
||||
print("1. Review workload stability and growth projections")
|
||||
print("2. Compare RI costs with Savings Plans for additional flexibility")
|
||||
print("3. Purchase RIs through AWS Console or CLI")
|
||||
print("4. Monitor RI utilization to ensure maximum benefit")
|
||||
|
||||
def run(self):
|
||||
"""Run RI analysis."""
|
||||
print(f"Analyzing AWS resources for RI opportunities...")
|
||||
print(f"Looking at instances running for at least {self.days} days")
|
||||
print(f"Scanning {len(self.regions)} region(s)...\n")
|
||||
|
||||
self.analyze_ec2_instances()
|
||||
self.analyze_rds_instances()
|
||||
|
||||
self.print_report()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Analyze AWS resources for Reserved Instance opportunities',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Analyze all regions with default profile
|
||||
python3 analyze_ri_recommendations.py
|
||||
|
||||
# Analyze specific region for instances running 60+ days
|
||||
python3 analyze_ri_recommendations.py --region us-east-1 --days 60
|
||||
|
||||
# Use named profile
|
||||
python3 analyze_ri_recommendations.py --profile production
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('--region', help='AWS region (default: all regions)')
|
||||
parser.add_argument('--profile', help='AWS profile name (default: default profile)')
|
||||
parser.add_argument('--days', type=int, default=30,
|
||||
help='Minimum days instance must be running (default: 30)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
analyzer = RIAnalyzer(
|
||||
profile=args.profile,
|
||||
region=args.region,
|
||||
days=args.days
|
||||
)
|
||||
analyzer.run()
|
||||
except Exception as e:
|
||||
print(f"Error: {str(e)}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
382
scripts/cost_anomaly_detector.py
Executable file
382
scripts/cost_anomaly_detector.py
Executable file
@@ -0,0 +1,382 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Detect cost anomalies and unusual spending patterns in AWS.
|
||||
|
||||
This script:
|
||||
- Analyzes Cost Explorer data for spending trends
|
||||
- Detects anomalies and unexpected cost increases
|
||||
- Identifies top cost drivers
|
||||
- Compares period-over-period spending
|
||||
|
||||
Usage:
|
||||
python3 cost_anomaly_detector.py [--profile PROFILE] [--days DAYS]
|
||||
|
||||
Requirements:
|
||||
pip install boto3 tabulate
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import boto3
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Dict, Any
|
||||
from collections import defaultdict
|
||||
from tabulate import tabulate
|
||||
import sys
|
||||
|
||||
|
||||
class CostAnomalyDetector:
|
||||
def __init__(self, profile: str = None, days: int = 30):
|
||||
self.session = boto3.Session(profile_name=profile) if profile else boto3.Session()
|
||||
self.days = days
|
||||
self.ce = self.session.client('ce', region_name='us-east-1') # Cost Explorer is global
|
||||
|
||||
self.findings = {
|
||||
'anomalies': [],
|
||||
'top_services': [],
|
||||
'trend_analysis': []
|
||||
}
|
||||
|
||||
# Anomaly detection threshold
|
||||
self.anomaly_threshold = 1.5 # 50% increase triggers alert
|
||||
|
||||
def _get_date_range(self, days: int) -> tuple:
|
||||
"""Get start and end dates for analysis."""
|
||||
end = datetime.now().date()
|
||||
start = end - timedelta(days=days)
|
||||
return start.strftime('%Y-%m-%d'), end.strftime('%Y-%m-%d')
|
||||
|
||||
def analyze_daily_costs(self):
|
||||
"""Analyze daily cost trends."""
|
||||
print(f"\n[1/4] Analyzing daily costs (last {self.days} days)...")
|
||||
|
||||
start_date, end_date = self._get_date_range(self.days)
|
||||
|
||||
try:
|
||||
response = self.ce.get_cost_and_usage(
|
||||
TimePeriod={'Start': start_date, 'End': end_date},
|
||||
Granularity='DAILY',
|
||||
Metrics=['UnblendedCost'],
|
||||
GroupBy=[{'Type': 'DIMENSION', 'Key': 'SERVICE'}]
|
||||
)
|
||||
|
||||
# Aggregate daily costs
|
||||
daily_totals = defaultdict(float)
|
||||
service_costs = defaultdict(lambda: defaultdict(float))
|
||||
|
||||
for result in response['ResultsByTime']:
|
||||
date = result['TimePeriod']['Start']
|
||||
for group in result['Groups']:
|
||||
service = group['Keys'][0]
|
||||
cost = float(group['Metrics']['UnblendedCost']['Amount'])
|
||||
|
||||
daily_totals[date] += cost
|
||||
service_costs[service][date] = cost
|
||||
|
||||
# Detect daily anomalies
|
||||
dates = sorted(daily_totals.keys())
|
||||
if len(dates) > 7:
|
||||
# Calculate baseline (average of first week)
|
||||
baseline = sum(daily_totals[d] for d in dates[:7]) / 7
|
||||
|
||||
for date in dates[7:]:
|
||||
daily_cost = daily_totals[date]
|
||||
if daily_cost > baseline * self.anomaly_threshold:
|
||||
increase_pct = ((daily_cost - baseline) / baseline) * 100
|
||||
|
||||
# Find which service caused the spike
|
||||
top_service = max(
|
||||
((svc, service_costs[svc][date]) for svc in service_costs),
|
||||
key=lambda x: x[1]
|
||||
)
|
||||
|
||||
self.findings['anomalies'].append({
|
||||
'Date': date,
|
||||
'Daily Cost': f"${daily_cost:.2f}",
|
||||
'Baseline': f"${baseline:.2f}",
|
||||
'Increase': f"+{increase_pct:.1f}%",
|
||||
'Top Service': top_service[0],
|
||||
'Service Cost': f"${top_service[1]:.2f}",
|
||||
'Severity': 'High' if increase_pct > 100 else 'Medium'
|
||||
})
|
||||
|
||||
print(f" Detected {len(self.findings['anomalies'])} cost anomalies")
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error analyzing daily costs: {str(e)}")
|
||||
|
||||
def analyze_top_services(self):
|
||||
"""Identify top cost drivers."""
|
||||
print(f"\n[2/4] Analyzing top cost drivers...")
|
||||
|
||||
start_date, end_date = self._get_date_range(self.days)
|
||||
|
||||
try:
|
||||
response = self.ce.get_cost_and_usage(
|
||||
TimePeriod={'Start': start_date, 'End': end_date},
|
||||
Granularity='MONTHLY',
|
||||
Metrics=['UnblendedCost'],
|
||||
GroupBy=[{'Type': 'DIMENSION', 'Key': 'SERVICE'}]
|
||||
)
|
||||
|
||||
service_totals = {}
|
||||
for result in response['ResultsByTime']:
|
||||
for group in result['Groups']:
|
||||
service = group['Keys'][0]
|
||||
cost = float(group['Metrics']['UnblendedCost']['Amount'])
|
||||
service_totals[service] = service_totals.get(service, 0) + cost
|
||||
|
||||
# Get top 10 services
|
||||
sorted_services = sorted(service_totals.items(), key=lambda x: x[1], reverse=True)[:10]
|
||||
|
||||
total_cost = sum(service_totals.values())
|
||||
|
||||
for service, cost in sorted_services:
|
||||
percentage = (cost / total_cost * 100) if total_cost > 0 else 0
|
||||
|
||||
self.findings['top_services'].append({
|
||||
'Service': service,
|
||||
'Cost': f"${cost:.2f}",
|
||||
'Percentage': f"{percentage:.1f}%",
|
||||
'Daily Average': f"${cost/self.days:.2f}"
|
||||
})
|
||||
|
||||
print(f" Identified top {len(self.findings['top_services'])} cost drivers")
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error analyzing top services: {str(e)}")
|
||||
|
||||
def compare_periods(self):
|
||||
"""Compare current period with previous period."""
|
||||
print(f"\n[3/4] Comparing cost trends...")
|
||||
|
||||
# Current period
|
||||
current_end = datetime.now().date()
|
||||
current_start = current_end - timedelta(days=self.days)
|
||||
|
||||
# Previous period
|
||||
previous_end = current_start - timedelta(days=1)
|
||||
previous_start = previous_end - timedelta(days=self.days)
|
||||
|
||||
try:
|
||||
# Get current period costs
|
||||
current_response = self.ce.get_cost_and_usage(
|
||||
TimePeriod={
|
||||
'Start': current_start.strftime('%Y-%m-%d'),
|
||||
'End': current_end.strftime('%Y-%m-%d')
|
||||
},
|
||||
Granularity='MONTHLY',
|
||||
Metrics=['UnblendedCost'],
|
||||
GroupBy=[{'Type': 'DIMENSION', 'Key': 'SERVICE'}]
|
||||
)
|
||||
|
||||
# Get previous period costs
|
||||
previous_response = self.ce.get_cost_and_usage(
|
||||
TimePeriod={
|
||||
'Start': previous_start.strftime('%Y-%m-%d'),
|
||||
'End': previous_end.strftime('%Y-%m-%d')
|
||||
},
|
||||
Granularity='MONTHLY',
|
||||
Metrics=['UnblendedCost'],
|
||||
GroupBy=[{'Type': 'DIMENSION', 'Key': 'SERVICE'}]
|
||||
)
|
||||
|
||||
# Aggregate by service
|
||||
current_costs = {}
|
||||
for result in current_response['ResultsByTime']:
|
||||
for group in result['Groups']:
|
||||
service = group['Keys'][0]
|
||||
cost = float(group['Metrics']['UnblendedCost']['Amount'])
|
||||
current_costs[service] = current_costs.get(service, 0) + cost
|
||||
|
||||
previous_costs = {}
|
||||
for result in previous_response['ResultsByTime']:
|
||||
for group in result['Groups']:
|
||||
service = group['Keys'][0]
|
||||
cost = float(group['Metrics']['UnblendedCost']['Amount'])
|
||||
previous_costs[service] = previous_costs.get(service, 0) + cost
|
||||
|
||||
# Compare services
|
||||
all_services = set(current_costs.keys()) | set(previous_costs.keys())
|
||||
|
||||
for service in all_services:
|
||||
current = current_costs.get(service, 0)
|
||||
previous = previous_costs.get(service, 0)
|
||||
|
||||
if previous > 0:
|
||||
change_pct = ((current - previous) / previous) * 100
|
||||
change_amount = current - previous
|
||||
elif current > 0:
|
||||
change_pct = 100
|
||||
change_amount = current
|
||||
else:
|
||||
continue
|
||||
|
||||
# Only report significant changes (> 10% or > $10)
|
||||
if abs(change_pct) > 10 or abs(change_amount) > 10:
|
||||
trend = "↑ Increase" if change_amount > 0 else "↓ Decrease"
|
||||
|
||||
self.findings['trend_analysis'].append({
|
||||
'Service': service,
|
||||
'Previous Period': f"${previous:.2f}",
|
||||
'Current Period': f"${current:.2f}",
|
||||
'Change': f"${change_amount:+.2f}",
|
||||
'Change %': f"{change_pct:+.1f}%",
|
||||
'Trend': trend
|
||||
})
|
||||
|
||||
# Sort by absolute change
|
||||
self.findings['trend_analysis'].sort(
|
||||
key=lambda x: abs(float(x['Change'].replace('$', '').replace('+', '').replace('-', ''))),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
print(f" Compared {len(self.findings['trend_analysis'])} services")
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error comparing periods: {str(e)}")
|
||||
|
||||
def get_forecast(self):
|
||||
"""Get AWS cost forecast."""
|
||||
print(f"\n[4/4] Getting cost forecast...")
|
||||
|
||||
try:
|
||||
# Get 30-day forecast
|
||||
start_date = datetime.now().date()
|
||||
end_date = start_date + timedelta(days=30)
|
||||
|
||||
response = self.ce.get_cost_forecast(
|
||||
TimePeriod={
|
||||
'Start': start_date.strftime('%Y-%m-%d'),
|
||||
'End': end_date.strftime('%Y-%m-%d')
|
||||
},
|
||||
Metric='UNBLENDED_COST',
|
||||
Granularity='MONTHLY'
|
||||
)
|
||||
|
||||
forecast_amount = float(response['Total']['Amount'])
|
||||
print(f" 30-day forecast: ${forecast_amount:.2f}")
|
||||
|
||||
return forecast_amount
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error getting forecast: {str(e)}")
|
||||
return None
|
||||
|
||||
def print_report(self, forecast_amount: float = None):
|
||||
"""Print cost anomaly report."""
|
||||
print("\n" + "="*110)
|
||||
print("AWS COST ANOMALY DETECTION REPORT")
|
||||
print("="*110)
|
||||
|
||||
# Anomalies
|
||||
if self.findings['anomalies']:
|
||||
print("\nCOST ANOMALIES DETECTED")
|
||||
print("-" * 110)
|
||||
print(tabulate(self.findings['anomalies'], headers='keys', tablefmt='grid'))
|
||||
print("\n⚠️ These dates show unusual cost spikes. Investigate immediately.")
|
||||
|
||||
# Top Services
|
||||
if self.findings['top_services']:
|
||||
print("\nTOP COST DRIVERS")
|
||||
print("-" * 110)
|
||||
print(tabulate(self.findings['top_services'], headers='keys', tablefmt='grid'))
|
||||
|
||||
# Trend Analysis
|
||||
if self.findings['trend_analysis']:
|
||||
print("\nPERIOD-OVER-PERIOD COMPARISON")
|
||||
print(f"(Current {self.days} days vs Previous {self.days} days)")
|
||||
print("-" * 110)
|
||||
# Show top 15 changes
|
||||
print(tabulate(self.findings['trend_analysis'][:15], headers='keys', tablefmt='grid'))
|
||||
|
||||
# Forecast
|
||||
if forecast_amount:
|
||||
print("\nCOST FORECAST")
|
||||
print("-" * 110)
|
||||
print(f"Projected 30-day cost: ${forecast_amount:.2f}")
|
||||
print(f"Projected monthly run rate: ${forecast_amount:.2f}")
|
||||
|
||||
print("\n" + "="*110)
|
||||
|
||||
print("\n\nRECOMMENDED ACTIONS:")
|
||||
print("\n1. For Cost Anomalies:")
|
||||
print(" - Review CloudWatch Logs for the affected service on anomaly dates")
|
||||
print(" - Check for configuration changes or deployments")
|
||||
print(" - Verify no unauthorized resource creation")
|
||||
print(" - Set up billing alerts to catch future anomalies")
|
||||
|
||||
print("\n2. For Top Cost Drivers:")
|
||||
print(" - Review each service for optimization opportunities")
|
||||
print(" - Consider Reserved Instances for consistent workloads")
|
||||
print(" - Implement auto-scaling to match demand")
|
||||
print(" - Archive or delete unused resources")
|
||||
|
||||
print("\n3. Cost Monitoring Best Practices:")
|
||||
print(" - Set up AWS Budgets with email/SNS alerts")
|
||||
print(" - Enable Cost Anomaly Detection in AWS Console")
|
||||
print(" - Tag resources for cost allocation and tracking")
|
||||
print(" - Run this script weekly to track trends")
|
||||
print(" - Review Cost Explorer monthly for detailed analysis")
|
||||
|
||||
print("\n4. Immediate Actions:")
|
||||
print(" - aws budgets create-budget (set spending alerts)")
|
||||
print(" - aws ce get-anomaly-subscriptions (enable anomaly detection)")
|
||||
print(" - Review IAM policies to prevent unauthorized spending")
|
||||
print(" - Implement cost allocation tags across all resources")
|
||||
|
||||
def run(self):
|
||||
"""Run cost anomaly detection."""
|
||||
print("="*80)
|
||||
print("AWS COST ANOMALY DETECTOR")
|
||||
print("="*80)
|
||||
print(f"Analysis period: {self.days} days")
|
||||
|
||||
self.analyze_daily_costs()
|
||||
self.analyze_top_services()
|
||||
self.compare_periods()
|
||||
forecast = self.get_forecast()
|
||||
|
||||
self.print_report(forecast)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Detect AWS cost anomalies and analyze spending trends',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Analyze last 30 days (default)
|
||||
python3 cost_anomaly_detector.py
|
||||
|
||||
# Analyze last 60 days
|
||||
python3 cost_anomaly_detector.py --days 60
|
||||
|
||||
# Use named profile
|
||||
python3 cost_anomaly_detector.py --profile production
|
||||
|
||||
Note: This script requires Cost Explorer API access, which may incur small charges.
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('--profile', help='AWS profile name (default: default profile)')
|
||||
parser.add_argument('--days', type=int, default=30,
|
||||
help='Days of cost data to analyze (default: 30)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
detector = CostAnomalyDetector(
|
||||
profile=args.profile,
|
||||
days=args.days
|
||||
)
|
||||
detector.run()
|
||||
except Exception as e:
|
||||
print(f"Error: {str(e)}", file=sys.stderr)
|
||||
print("\nNote: Cost Explorer API access is required. Ensure:", file=sys.stderr)
|
||||
print("1. Cost Explorer is enabled in AWS Console", file=sys.stderr)
|
||||
print("2. IAM user has 'ce:GetCostAndUsage' and 'ce:GetCostForecast' permissions", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
334
scripts/detect_old_generations.py
Executable file
334
scripts/detect_old_generations.py
Executable file
@@ -0,0 +1,334 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Detect old generation EC2 and RDS instances that should be migrated to newer generations.
|
||||
|
||||
This script identifies:
|
||||
- Old generation EC2 instances (t2 → t3, m4 → m5, etc.)
|
||||
- ARM/Graviton migration opportunities
|
||||
- Old generation RDS instances
|
||||
- Calculates cost savings from migration
|
||||
|
||||
Usage:
|
||||
python3 detect_old_generations.py [--region REGION] [--profile PROFILE]
|
||||
|
||||
Requirements:
|
||||
pip install boto3 tabulate
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import boto3
|
||||
from typing import List, Dict, Any
|
||||
from tabulate import tabulate
|
||||
import sys
|
||||
|
||||
|
||||
class OldGenerationDetector:
|
||||
def __init__(self, profile: str = None, region: str = None):
|
||||
self.session = boto3.Session(profile_name=profile) if profile else boto3.Session()
|
||||
self.regions = [region] if region else self._get_all_regions()
|
||||
self.findings = {
|
||||
'ec2_migrations': [],
|
||||
'graviton_opportunities': [],
|
||||
'rds_migrations': []
|
||||
}
|
||||
self.total_savings = 0.0
|
||||
|
||||
# Migration mapping: old → new generation
|
||||
self.ec2_migrations = {
|
||||
# General Purpose
|
||||
't2.micro': ('t3.micro', 0.10), # ~10% savings
|
||||
't2.small': ('t3.small', 0.10),
|
||||
't2.medium': ('t3.medium', 0.10),
|
||||
't2.large': ('t3.large', 0.10),
|
||||
't2.xlarge': ('t3.xlarge', 0.10),
|
||||
't2.2xlarge': ('t3.2xlarge', 0.10),
|
||||
'm4.large': ('m5.large', 0.04), # ~4% savings
|
||||
'm4.xlarge': ('m5.xlarge', 0.04),
|
||||
'm4.2xlarge': ('m5.2xlarge', 0.04),
|
||||
'm4.4xlarge': ('m5.4xlarge', 0.04),
|
||||
'm4.10xlarge': ('m5.12xlarge', 0.10),
|
||||
'm4.16xlarge': ('m5.24xlarge', 0.10),
|
||||
'm5.large': ('m6i.large', 0.04), # M5 → M6i
|
||||
'm5.xlarge': ('m6i.xlarge', 0.04),
|
||||
# Compute Optimized
|
||||
'c4.large': ('c5.large', 0.10), # ~10% savings
|
||||
'c4.xlarge': ('c5.xlarge', 0.10),
|
||||
'c4.2xlarge': ('c5.2xlarge', 0.10),
|
||||
'c4.4xlarge': ('c5.4xlarge', 0.10),
|
||||
'c4.8xlarge': ('c5.9xlarge', 0.10),
|
||||
'c5.large': ('c6i.large', 0.05), # C5 → C6i
|
||||
'c5.xlarge': ('c6i.xlarge', 0.05),
|
||||
# Memory Optimized
|
||||
'r4.large': ('r5.large', 0.08), # ~8% savings
|
||||
'r4.xlarge': ('r5.xlarge', 0.08),
|
||||
'r4.2xlarge': ('r5.2xlarge', 0.08),
|
||||
'r4.4xlarge': ('r5.4xlarge', 0.08),
|
||||
'r4.8xlarge': ('r5.8xlarge', 0.08),
|
||||
'r5.large': ('r6i.large', 0.03), # R5 → R6i
|
||||
'r5.xlarge': ('r6i.xlarge', 0.03),
|
||||
}
|
||||
|
||||
# Graviton migration opportunities (even better savings)
|
||||
self.graviton_migrations = {
|
||||
't3.micro': ('t4g.micro', 0.20), # ~20% savings
|
||||
't3.small': ('t4g.small', 0.20),
|
||||
't3.medium': ('t4g.medium', 0.20),
|
||||
't3.large': ('t4g.large', 0.20),
|
||||
't3.xlarge': ('t4g.xlarge', 0.20),
|
||||
't3.2xlarge': ('t4g.2xlarge', 0.20),
|
||||
'm5.large': ('m6g.large', 0.20),
|
||||
'm5.xlarge': ('m6g.xlarge', 0.20),
|
||||
'm5.2xlarge': ('m6g.2xlarge', 0.20),
|
||||
'm5.4xlarge': ('m6g.4xlarge', 0.20),
|
||||
'm6i.large': ('m6g.large', 0.20),
|
||||
'm6i.xlarge': ('m6g.xlarge', 0.20),
|
||||
'c5.large': ('c6g.large', 0.20),
|
||||
'c5.xlarge': ('c6g.xlarge', 0.20),
|
||||
'c5.2xlarge': ('c6g.2xlarge', 0.20),
|
||||
'r5.large': ('r6g.large', 0.20),
|
||||
'r5.xlarge': ('r6g.xlarge', 0.20),
|
||||
'r5.2xlarge': ('r6g.2xlarge', 0.20),
|
||||
}
|
||||
|
||||
# RDS instance migrations
|
||||
self.rds_migrations = {
|
||||
'db.t2.micro': ('db.t3.micro', 0.10),
|
||||
'db.t2.small': ('db.t3.small', 0.10),
|
||||
'db.t2.medium': ('db.t3.medium', 0.10),
|
||||
'db.m4.large': ('db.m5.large', 0.05),
|
||||
'db.m4.xlarge': ('db.m5.xlarge', 0.05),
|
||||
'db.m4.2xlarge': ('db.m5.2xlarge', 0.05),
|
||||
'db.r4.large': ('db.r5.large', 0.08),
|
||||
'db.r4.xlarge': ('db.r5.xlarge', 0.08),
|
||||
'db.r4.2xlarge': ('db.r5.2xlarge', 0.08),
|
||||
}
|
||||
|
||||
def _get_all_regions(self) -> List[str]:
|
||||
"""Get all enabled AWS regions."""
|
||||
ec2 = self.session.client('ec2', region_name='us-east-1')
|
||||
regions = ec2.describe_regions(AllRegions=False)
|
||||
return [region['RegionName'] for region in regions['Regions']]
|
||||
|
||||
def _estimate_hourly_cost(self, instance_type: str) -> float:
|
||||
"""Rough estimate of hourly cost."""
|
||||
cost_map = {
|
||||
't2.micro': 0.0116, 't2.small': 0.023, 't2.medium': 0.0464,
|
||||
't3.micro': 0.0104, 't3.small': 0.0208, 't3.medium': 0.0416,
|
||||
't3.large': 0.0832, 't3.xlarge': 0.1664, 't3.2xlarge': 0.3328,
|
||||
't4g.micro': 0.0084, 't4g.small': 0.0168, 't4g.medium': 0.0336,
|
||||
'm4.large': 0.10, 'm4.xlarge': 0.20, 'm4.2xlarge': 0.40,
|
||||
'm5.large': 0.096, 'm5.xlarge': 0.192, 'm5.2xlarge': 0.384,
|
||||
'm6i.large': 0.096, 'm6i.xlarge': 0.192,
|
||||
'm6g.large': 0.077, 'm6g.xlarge': 0.154, 'm6g.2xlarge': 0.308,
|
||||
'c4.large': 0.10, 'c4.xlarge': 0.199, 'c4.2xlarge': 0.398,
|
||||
'c5.large': 0.085, 'c5.xlarge': 0.17, 'c5.2xlarge': 0.34,
|
||||
'c6i.large': 0.085, 'c6i.xlarge': 0.17,
|
||||
'c6g.large': 0.068, 'c6g.xlarge': 0.136, 'c6g.2xlarge': 0.272,
|
||||
'r4.large': 0.133, 'r4.xlarge': 0.266, 'r4.2xlarge': 0.532,
|
||||
'r5.large': 0.126, 'r5.xlarge': 0.252, 'r5.2xlarge': 0.504,
|
||||
'r6i.large': 0.126, 'r6i.xlarge': 0.252,
|
||||
'r6g.large': 0.101, 'r6g.xlarge': 0.202, 'r6g.2xlarge': 0.403,
|
||||
}
|
||||
return cost_map.get(instance_type, 0.10)
|
||||
|
||||
def detect_ec2_migrations(self):
|
||||
"""Detect old generation EC2 instances."""
|
||||
print("\n[1/3] Scanning for old generation EC2 instances...")
|
||||
|
||||
for region in self.regions:
|
||||
try:
|
||||
ec2 = self.session.client('ec2', region_name=region)
|
||||
instances = ec2.describe_instances(
|
||||
Filters=[{'Name': 'instance-state-name', 'Values': ['running', 'stopped']}]
|
||||
)
|
||||
|
||||
for reservation in instances['Reservations']:
|
||||
for instance in reservation['Instances']:
|
||||
instance_id = instance['InstanceId']
|
||||
instance_type = instance['InstanceType']
|
||||
state = instance['State']['Name']
|
||||
|
||||
name_tag = next((tag['Value'] for tag in instance.get('Tags', [])
|
||||
if tag['Key'] == 'Name'), 'N/A')
|
||||
|
||||
# Check for standard migration
|
||||
if instance_type in self.ec2_migrations:
|
||||
new_type, savings_pct = self.ec2_migrations[instance_type]
|
||||
current_cost = self._estimate_hourly_cost(instance_type)
|
||||
new_cost = self._estimate_hourly_cost(new_type)
|
||||
monthly_savings = (current_cost - new_cost) * 730
|
||||
|
||||
if state == 'running':
|
||||
self.total_savings += monthly_savings * 12
|
||||
|
||||
self.findings['ec2_migrations'].append({
|
||||
'Region': region,
|
||||
'Instance ID': instance_id,
|
||||
'Name': name_tag,
|
||||
'Current Type': instance_type,
|
||||
'Recommended Type': new_type,
|
||||
'State': state,
|
||||
'Savings %': f"{savings_pct*100:.0f}%",
|
||||
'Monthly Savings': f"${monthly_savings:.2f}",
|
||||
'Migration Type': 'Standard Upgrade'
|
||||
})
|
||||
|
||||
# Check for Graviton migration
|
||||
elif instance_type in self.graviton_migrations:
|
||||
new_type, savings_pct = self.graviton_migrations[instance_type]
|
||||
current_cost = self._estimate_hourly_cost(instance_type)
|
||||
new_cost = self._estimate_hourly_cost(new_type)
|
||||
monthly_savings = (current_cost - new_cost) * 730
|
||||
|
||||
if state == 'running':
|
||||
self.total_savings += monthly_savings * 12
|
||||
|
||||
self.findings['graviton_opportunities'].append({
|
||||
'Region': region,
|
||||
'Instance ID': instance_id,
|
||||
'Name': name_tag,
|
||||
'Current Type': instance_type,
|
||||
'Graviton Type': new_type,
|
||||
'State': state,
|
||||
'Savings %': f"{savings_pct*100:.0f}%",
|
||||
'Monthly Savings': f"${monthly_savings:.2f}",
|
||||
'Note': 'Requires ARM64 compatibility'
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error scanning {region}: {str(e)}")
|
||||
|
||||
print(f" Found {len(self.findings['ec2_migrations'])} standard migrations")
|
||||
print(f" Found {len(self.findings['graviton_opportunities'])} Graviton opportunities")
|
||||
|
||||
def detect_rds_migrations(self):
|
||||
"""Detect old generation RDS instances."""
|
||||
print("\n[2/3] Scanning for old generation RDS instances...")
|
||||
|
||||
for region in self.regions:
|
||||
try:
|
||||
rds = self.session.client('rds', region_name=region)
|
||||
instances = rds.describe_db_instances()
|
||||
|
||||
for instance in instances['DBInstances']:
|
||||
instance_id = instance['DBInstanceIdentifier']
|
||||
instance_class = instance['DBInstanceClass']
|
||||
engine = instance['Engine']
|
||||
status = instance['DBInstanceStatus']
|
||||
|
||||
if instance_class in self.rds_migrations:
|
||||
new_class, savings_pct = self.rds_migrations[instance_class]
|
||||
|
||||
# RDS pricing is roughly 2x EC2
|
||||
base_type = instance_class.replace('db.', '')
|
||||
current_cost = self._estimate_hourly_cost(base_type) * 2
|
||||
new_cost = current_cost * (1 - savings_pct)
|
||||
monthly_savings = (current_cost - new_cost) * 730
|
||||
|
||||
if status == 'available':
|
||||
self.total_savings += monthly_savings * 12
|
||||
|
||||
self.findings['rds_migrations'].append({
|
||||
'Region': region,
|
||||
'Instance ID': instance_id,
|
||||
'Engine': engine,
|
||||
'Current Class': instance_class,
|
||||
'Recommended Class': new_class,
|
||||
'Status': status,
|
||||
'Savings %': f"{savings_pct*100:.0f}%",
|
||||
'Monthly Savings': f"${monthly_savings:.2f}"
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error scanning {region}: {str(e)}")
|
||||
|
||||
print(f" Found {len(self.findings['rds_migrations'])} RDS migrations")
|
||||
|
||||
def print_report(self):
|
||||
"""Print migration recommendations report."""
|
||||
print("\n" + "="*100)
|
||||
print("OLD GENERATION INSTANCE MIGRATION REPORT")
|
||||
print("="*100)
|
||||
|
||||
if self.findings['ec2_migrations']:
|
||||
print("\nEC2 STANDARD MIGRATION OPPORTUNITIES")
|
||||
print("-" * 100)
|
||||
print(tabulate(self.findings['ec2_migrations'], headers='keys', tablefmt='grid'))
|
||||
|
||||
if self.findings['graviton_opportunities']:
|
||||
print("\nEC2 GRAVITON (ARM64) MIGRATION OPPORTUNITIES")
|
||||
print("-" * 100)
|
||||
print(tabulate(self.findings['graviton_opportunities'], headers='keys', tablefmt='grid'))
|
||||
print("\nNOTE: Graviton instances offer significant savings but require ARM64-compatible workloads")
|
||||
print("Test thoroughly before migrating production workloads")
|
||||
|
||||
if self.findings['rds_migrations']:
|
||||
print("\nRDS MIGRATION OPPORTUNITIES")
|
||||
print("-" * 100)
|
||||
print(tabulate(self.findings['rds_migrations'], headers='keys', tablefmt='grid'))
|
||||
|
||||
print("\n" + "="*100)
|
||||
print(f"ESTIMATED ANNUAL SAVINGS: ${self.total_savings:.2f}")
|
||||
print("="*100)
|
||||
|
||||
print("\n\nMIGRATION RECOMMENDATIONS:")
|
||||
print("\nEC2 Standard Migrations (x86):")
|
||||
print("- Generally drop-in replacements with better performance")
|
||||
print("- Can be done with instance type change (stop/start required)")
|
||||
print("- Minimal to no application changes needed")
|
||||
print("\nGraviton Migrations (ARM64):")
|
||||
print("- Requires ARM64-compatible applications and dependencies")
|
||||
print("- Test in non-production first")
|
||||
print("- Most modern languages/frameworks support ARM64")
|
||||
print("- Offers best price/performance ratio")
|
||||
print("\nRDS Migrations:")
|
||||
print("- Requires database instance modification")
|
||||
print("- Triggers brief downtime during modification")
|
||||
print("- Schedule during maintenance window")
|
||||
print("- Test with Multi-AZ for minimal downtime")
|
||||
|
||||
def run(self):
|
||||
"""Run old generation detection."""
|
||||
print(f"Scanning for old generation instances across {len(self.regions)} region(s)...")
|
||||
|
||||
self.detect_ec2_migrations()
|
||||
self.detect_rds_migrations()
|
||||
|
||||
self.print_report()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Detect old generation AWS instances and recommend migrations',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Scan all regions with default profile
|
||||
python3 detect_old_generations.py
|
||||
|
||||
# Scan specific region
|
||||
python3 detect_old_generations.py --region us-east-1
|
||||
|
||||
# Use named profile
|
||||
python3 detect_old_generations.py --profile production
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('--region', help='AWS region (default: all regions)')
|
||||
parser.add_argument('--profile', help='AWS profile name (default: default profile)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
detector = OldGenerationDetector(
|
||||
profile=args.profile,
|
||||
region=args.region
|
||||
)
|
||||
detector.run()
|
||||
except Exception as e:
|
||||
print(f"Error: {str(e)}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
409
scripts/find_unused_resources.py
Executable file
409
scripts/find_unused_resources.py
Executable file
@@ -0,0 +1,409 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Find unused AWS resources that are costing money.
|
||||
|
||||
This script identifies:
|
||||
- Unattached EBS volumes
|
||||
- Old EBS snapshots
|
||||
- Unused Elastic IPs
|
||||
- Idle NAT Gateways
|
||||
- Idle EC2 instances (low utilization)
|
||||
- Unattached load balancers
|
||||
|
||||
Usage:
|
||||
python3 find_unused_resources.py [--region REGION] [--profile PROFILE] [--snapshot-age-days DAYS]
|
||||
|
||||
Requirements:
|
||||
pip install boto3 tabulate
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import boto3
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Dict, Any
|
||||
from tabulate import tabulate
|
||||
import sys
|
||||
|
||||
|
||||
class UnusedResourceFinder:
|
||||
def __init__(self, profile: str = None, region: str = None, snapshot_age_days: int = 90):
|
||||
self.session = boto3.Session(profile_name=profile) if profile else boto3.Session()
|
||||
self.regions = [region] if region else self._get_all_regions()
|
||||
self.snapshot_age_days = snapshot_age_days
|
||||
self.findings = {
|
||||
'ebs_volumes': [],
|
||||
'snapshots': [],
|
||||
'elastic_ips': [],
|
||||
'nat_gateways': [],
|
||||
'idle_instances': [],
|
||||
'load_balancers': []
|
||||
}
|
||||
self.total_cost_estimate = 0.0
|
||||
|
||||
def _get_all_regions(self) -> List[str]:
|
||||
"""Get all enabled AWS regions."""
|
||||
ec2 = self.session.client('ec2', region_name='us-east-1')
|
||||
regions = ec2.describe_regions(AllRegions=False)
|
||||
return [region['RegionName'] for region in regions['Regions']]
|
||||
|
||||
def find_unattached_volumes(self):
|
||||
"""Find unattached EBS volumes."""
|
||||
print("\n[1/6] Scanning for unattached EBS volumes...")
|
||||
|
||||
for region in self.regions:
|
||||
try:
|
||||
ec2 = self.session.client('ec2', region_name=region)
|
||||
volumes = ec2.describe_volumes(
|
||||
Filters=[{'Name': 'status', 'Values': ['available']}]
|
||||
)
|
||||
|
||||
for volume in volumes['Volumes']:
|
||||
# Rough cost estimate: gp3 $0.08/GB/month
|
||||
monthly_cost = volume['Size'] * 0.08
|
||||
self.total_cost_estimate += monthly_cost
|
||||
|
||||
self.findings['ebs_volumes'].append({
|
||||
'Region': region,
|
||||
'Volume ID': volume['VolumeId'],
|
||||
'Size (GB)': volume['Size'],
|
||||
'Type': volume['VolumeType'],
|
||||
'Created': volume['CreateTime'].strftime('%Y-%m-%d'),
|
||||
'Est. Monthly Cost': f"${monthly_cost:.2f}"
|
||||
})
|
||||
except Exception as e:
|
||||
print(f" Error scanning {region}: {str(e)}")
|
||||
|
||||
print(f" Found {len(self.findings['ebs_volumes'])} unattached volumes")
|
||||
|
||||
def find_old_snapshots(self):
|
||||
"""Find old EBS snapshots."""
|
||||
print(f"\n[2/6] Scanning for snapshots older than {self.snapshot_age_days} days...")
|
||||
|
||||
cutoff_date = datetime.now(datetime.now().astimezone().tzinfo) - timedelta(days=self.snapshot_age_days)
|
||||
|
||||
for region in self.regions:
|
||||
try:
|
||||
ec2 = self.session.client('ec2', region_name=region)
|
||||
|
||||
# Get account ID
|
||||
sts = self.session.client('sts')
|
||||
account_id = sts.get_caller_identity()['Account']
|
||||
|
||||
snapshots = ec2.describe_snapshots(OwnerIds=[account_id])
|
||||
|
||||
for snapshot in snapshots['Snapshots']:
|
||||
if snapshot['StartTime'] < cutoff_date:
|
||||
# Snapshot cost: $0.05/GB/month
|
||||
monthly_cost = snapshot['VolumeSize'] * 0.05
|
||||
self.total_cost_estimate += monthly_cost
|
||||
|
||||
age_days = (datetime.now(datetime.now().astimezone().tzinfo) - snapshot['StartTime']).days
|
||||
|
||||
self.findings['snapshots'].append({
|
||||
'Region': region,
|
||||
'Snapshot ID': snapshot['SnapshotId'],
|
||||
'Size (GB)': snapshot['VolumeSize'],
|
||||
'Age (days)': age_days,
|
||||
'Created': snapshot['StartTime'].strftime('%Y-%m-%d'),
|
||||
'Est. Monthly Cost': f"${monthly_cost:.2f}"
|
||||
})
|
||||
except Exception as e:
|
||||
print(f" Error scanning {region}: {str(e)}")
|
||||
|
||||
print(f" Found {len(self.findings['snapshots'])} old snapshots")
|
||||
|
||||
def find_unused_elastic_ips(self):
|
||||
"""Find unassociated Elastic IPs."""
|
||||
print("\n[3/6] Scanning for unused Elastic IPs...")
|
||||
|
||||
for region in self.regions:
|
||||
try:
|
||||
ec2 = self.session.client('ec2', region_name=region)
|
||||
addresses = ec2.describe_addresses()
|
||||
|
||||
for address in addresses['Addresses']:
|
||||
if 'AssociationId' not in address:
|
||||
# Unassociated EIP: ~$3.65/month
|
||||
monthly_cost = 3.65
|
||||
self.total_cost_estimate += monthly_cost
|
||||
|
||||
self.findings['elastic_ips'].append({
|
||||
'Region': region,
|
||||
'Allocation ID': address['AllocationId'],
|
||||
'Public IP': address.get('PublicIp', 'N/A'),
|
||||
'Status': 'Unassociated',
|
||||
'Est. Monthly Cost': f"${monthly_cost:.2f}"
|
||||
})
|
||||
except Exception as e:
|
||||
print(f" Error scanning {region}: {str(e)}")
|
||||
|
||||
print(f" Found {len(self.findings['elastic_ips'])} unused Elastic IPs")
|
||||
|
||||
def find_idle_nat_gateways(self):
|
||||
"""Find NAT Gateways with low traffic."""
|
||||
print("\n[4/6] Scanning for idle NAT Gateways...")
|
||||
|
||||
for region in self.regions:
|
||||
try:
|
||||
ec2 = self.session.client('ec2', region_name=region)
|
||||
nat_gateways = ec2.describe_nat_gateways(
|
||||
Filters=[{'Name': 'state', 'Values': ['available']}]
|
||||
)
|
||||
|
||||
cloudwatch = self.session.client('cloudwatch', region_name=region)
|
||||
|
||||
for nat in nat_gateways['NatGateways']:
|
||||
nat_id = nat['NatGatewayId']
|
||||
|
||||
# Check CloudWatch metrics for the last 7 days
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(days=7)
|
||||
|
||||
try:
|
||||
metrics = cloudwatch.get_metric_statistics(
|
||||
Namespace='AWS/NATGateway',
|
||||
MetricName='BytesOutToSource',
|
||||
Dimensions=[{'Name': 'NatGatewayId', 'Value': nat_id}],
|
||||
StartTime=start_time,
|
||||
EndTime=end_time,
|
||||
Period=86400, # 1 day
|
||||
Statistics=['Sum']
|
||||
)
|
||||
|
||||
total_bytes = sum([point['Sum'] for point in metrics['Datapoints']])
|
||||
avg_gb_per_day = (total_bytes / (1024**3)) / 7
|
||||
|
||||
# NAT Gateway: ~$32.85/month + data processing
|
||||
monthly_cost = 32.85
|
||||
self.total_cost_estimate += monthly_cost
|
||||
|
||||
# Flag as idle if less than 1GB/day average
|
||||
if avg_gb_per_day < 1:
|
||||
self.findings['nat_gateways'].append({
|
||||
'Region': region,
|
||||
'NAT Gateway ID': nat_id,
|
||||
'VPC': nat.get('VpcId', 'N/A'),
|
||||
'Avg Traffic (GB/day)': f"{avg_gb_per_day:.2f}",
|
||||
'Status': 'Low Traffic',
|
||||
'Est. Monthly Cost': f"${monthly_cost:.2f}"
|
||||
})
|
||||
except Exception:
|
||||
# If we can't get metrics, still report the NAT Gateway
|
||||
self.findings['nat_gateways'].append({
|
||||
'Region': region,
|
||||
'NAT Gateway ID': nat_id,
|
||||
'VPC': nat.get('VpcId', 'N/A'),
|
||||
'Avg Traffic (GB/day)': 'N/A',
|
||||
'Status': 'Metrics Unavailable',
|
||||
'Est. Monthly Cost': f"${monthly_cost:.2f}"
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error scanning {region}: {str(e)}")
|
||||
|
||||
print(f" Found {len(self.findings['nat_gateways'])} idle NAT Gateways")
|
||||
|
||||
def find_idle_instances(self):
|
||||
"""Find EC2 instances with low CPU utilization."""
|
||||
print("\n[5/6] Scanning for idle EC2 instances...")
|
||||
|
||||
for region in self.regions:
|
||||
try:
|
||||
ec2 = self.session.client('ec2', region_name=region)
|
||||
cloudwatch = self.session.client('cloudwatch', region_name=region)
|
||||
|
||||
instances = ec2.describe_instances(
|
||||
Filters=[{'Name': 'instance-state-name', 'Values': ['running']}]
|
||||
)
|
||||
|
||||
for reservation in instances['Reservations']:
|
||||
for instance in reservation['Instances']:
|
||||
instance_id = instance['InstanceId']
|
||||
instance_type = instance['InstanceType']
|
||||
|
||||
# Check CPU utilization for the last 7 days
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(days=7)
|
||||
|
||||
try:
|
||||
metrics = cloudwatch.get_metric_statistics(
|
||||
Namespace='AWS/EC2',
|
||||
MetricName='CPUUtilization',
|
||||
Dimensions=[{'Name': 'InstanceId', 'Value': instance_id}],
|
||||
StartTime=start_time,
|
||||
EndTime=end_time,
|
||||
Period=3600, # 1 hour
|
||||
Statistics=['Average']
|
||||
)
|
||||
|
||||
if metrics['Datapoints']:
|
||||
avg_cpu = sum([point['Average'] for point in metrics['Datapoints']]) / len(metrics['Datapoints'])
|
||||
max_cpu = max([point['Average'] for point in metrics['Datapoints']])
|
||||
|
||||
# Flag instances with avg CPU < 5% and max < 15%
|
||||
if avg_cpu < 5 and max_cpu < 15:
|
||||
# Rough cost estimate (varies by instance type)
|
||||
# This is approximate - you'd need pricing API for accuracy
|
||||
monthly_cost = self._estimate_instance_cost(instance_type)
|
||||
self.total_cost_estimate += monthly_cost
|
||||
|
||||
name_tag = next((tag['Value'] for tag in instance.get('Tags', []) if tag['Key'] == 'Name'), 'N/A')
|
||||
|
||||
self.findings['idle_instances'].append({
|
||||
'Region': region,
|
||||
'Instance ID': instance_id,
|
||||
'Name': name_tag,
|
||||
'Type': instance_type,
|
||||
'Avg CPU (%)': f"{avg_cpu:.2f}",
|
||||
'Max CPU (%)': f"{max_cpu:.2f}",
|
||||
'Est. Monthly Cost': f"${monthly_cost:.2f}"
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error scanning {region}: {str(e)}")
|
||||
|
||||
print(f" Found {len(self.findings['idle_instances'])} idle instances")
|
||||
|
||||
def find_unused_load_balancers(self):
|
||||
"""Find load balancers with no targets."""
|
||||
print("\n[6/6] Scanning for unused load balancers...")
|
||||
|
||||
for region in self.regions:
|
||||
try:
|
||||
# Check Application/Network Load Balancers
|
||||
elbv2 = self.session.client('elbv2', region_name=region)
|
||||
load_balancers = elbv2.describe_load_balancers()
|
||||
|
||||
for lb in load_balancers['LoadBalancers']:
|
||||
lb_arn = lb['LoadBalancerArn']
|
||||
lb_name = lb['LoadBalancerName']
|
||||
lb_type = lb['Type']
|
||||
|
||||
# Check target groups
|
||||
target_groups = elbv2.describe_target_groups(LoadBalancerArn=lb_arn)
|
||||
|
||||
has_healthy_targets = False
|
||||
for tg in target_groups['TargetGroups']:
|
||||
health = elbv2.describe_target_health(TargetGroupArn=tg['TargetGroupArn'])
|
||||
if any(target['TargetHealth']['State'] == 'healthy' for target in health['TargetHealthDescriptions']):
|
||||
has_healthy_targets = True
|
||||
break
|
||||
|
||||
if not has_healthy_targets:
|
||||
# ALB: ~$16.20/month, NLB: ~$22.35/month
|
||||
monthly_cost = 22.35 if lb_type == 'network' else 16.20
|
||||
self.total_cost_estimate += monthly_cost
|
||||
|
||||
self.findings['load_balancers'].append({
|
||||
'Region': region,
|
||||
'Name': lb_name,
|
||||
'Type': lb_type.upper(),
|
||||
'DNS': lb['DNSName'],
|
||||
'Status': 'No Healthy Targets',
|
||||
'Est. Monthly Cost': f"${monthly_cost:.2f}"
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error scanning {region}: {str(e)}")
|
||||
|
||||
print(f" Found {len(self.findings['load_balancers'])} unused load balancers")
|
||||
|
||||
def _estimate_instance_cost(self, instance_type: str) -> float:
|
||||
"""Rough estimate of monthly instance cost (On-Demand, us-east-1)."""
|
||||
# This is a simplified approximation
|
||||
cost_map = {
|
||||
't2': 0.0116, 't3': 0.0104, 't3a': 0.0094,
|
||||
'm5': 0.096, 'm5a': 0.086, 'm6i': 0.096,
|
||||
'c5': 0.085, 'c5a': 0.077, 'c6i': 0.085,
|
||||
'r5': 0.126, 'r5a': 0.113, 'r6i': 0.126,
|
||||
}
|
||||
|
||||
# Extract family (e.g., 't3' from 't3.micro')
|
||||
family = instance_type.split('.')[0]
|
||||
hourly_cost = cost_map.get(family, 0.10) # Default to $0.10/hour
|
||||
|
||||
return hourly_cost * 730 # Hours per month
|
||||
|
||||
def print_report(self):
|
||||
"""Print findings report."""
|
||||
print("\n" + "="*80)
|
||||
print("AWS UNUSED RESOURCES REPORT")
|
||||
print("="*80)
|
||||
|
||||
sections = [
|
||||
('UNATTACHED EBS VOLUMES', 'ebs_volumes'),
|
||||
('OLD SNAPSHOTS', 'snapshots'),
|
||||
('UNUSED ELASTIC IPs', 'elastic_ips'),
|
||||
('IDLE NAT GATEWAYS', 'nat_gateways'),
|
||||
('IDLE EC2 INSTANCES', 'idle_instances'),
|
||||
('UNUSED LOAD BALANCERS', 'load_balancers')
|
||||
]
|
||||
|
||||
for title, key in sections:
|
||||
findings = self.findings[key]
|
||||
if findings:
|
||||
print(f"\n{title} ({len(findings)} found)")
|
||||
print("-" * 80)
|
||||
print(tabulate(findings, headers='keys', tablefmt='grid'))
|
||||
|
||||
print("\n" + "="*80)
|
||||
print(f"ESTIMATED MONTHLY SAVINGS: ${self.total_cost_estimate:.2f}")
|
||||
print("="*80)
|
||||
print("\nNOTE: Cost estimates are approximate. Actual savings may vary.")
|
||||
print("Review each resource before deletion to avoid disrupting services.")
|
||||
|
||||
def run(self):
|
||||
"""Run all scans."""
|
||||
print(f"Scanning AWS account across {len(self.regions)} region(s)...")
|
||||
print("This may take several minutes...\n")
|
||||
|
||||
self.find_unattached_volumes()
|
||||
self.find_old_snapshots()
|
||||
self.find_unused_elastic_ips()
|
||||
self.find_idle_nat_gateways()
|
||||
self.find_idle_instances()
|
||||
self.find_unused_load_balancers()
|
||||
|
||||
self.print_report()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Find unused AWS resources that are costing money',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Scan all regions with default profile
|
||||
python3 find_unused_resources.py
|
||||
|
||||
# Scan specific region with named profile
|
||||
python3 find_unused_resources.py --region us-east-1 --profile production
|
||||
|
||||
# Find snapshots older than 180 days
|
||||
python3 find_unused_resources.py --snapshot-age-days 180
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('--region', help='AWS region (default: all regions)')
|
||||
parser.add_argument('--profile', help='AWS profile name (default: default profile)')
|
||||
parser.add_argument('--snapshot-age-days', type=int, default=90,
|
||||
help='Snapshots older than this are flagged (default: 90)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
finder = UnusedResourceFinder(
|
||||
profile=args.profile,
|
||||
region=args.region,
|
||||
snapshot_age_days=args.snapshot_age_days
|
||||
)
|
||||
finder.run()
|
||||
except Exception as e:
|
||||
print(f"Error: {str(e)}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
387
scripts/rightsizing_analyzer.py
Executable file
387
scripts/rightsizing_analyzer.py
Executable file
@@ -0,0 +1,387 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Analyze EC2 and RDS instances for rightsizing opportunities.
|
||||
|
||||
This script identifies:
|
||||
- Oversized EC2 instances (low CPU/memory utilization)
|
||||
- Oversized RDS instances (low CPU/connection utilization)
|
||||
- Recommended smaller instance types
|
||||
- Potential cost savings
|
||||
|
||||
Usage:
|
||||
python3 rightsizing_analyzer.py [--region REGION] [--profile PROFILE] [--days DAYS]
|
||||
|
||||
Requirements:
|
||||
pip install boto3 tabulate
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import boto3
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Dict, Any, Optional
|
||||
from tabulate import tabulate
|
||||
import sys
|
||||
|
||||
|
||||
class RightsizingAnalyzer:
|
||||
def __init__(self, profile: str = None, region: str = None, days: int = 14):
|
||||
self.session = boto3.Session(profile_name=profile) if profile else boto3.Session()
|
||||
self.regions = [region] if region else self._get_all_regions()
|
||||
self.days = days
|
||||
self.findings = {
|
||||
'ec2': [],
|
||||
'rds': []
|
||||
}
|
||||
self.total_savings = 0.0
|
||||
|
||||
# CPU thresholds for rightsizing
|
||||
self.cpu_thresholds = {
|
||||
'underutilized': 15, # < 15% avg CPU
|
||||
'low': 30, # < 30% avg CPU
|
||||
}
|
||||
|
||||
def _get_all_regions(self) -> List[str]:
|
||||
"""Get all enabled AWS regions."""
|
||||
ec2 = self.session.client('ec2', region_name='us-east-1')
|
||||
regions = ec2.describe_regions(AllRegions=False)
|
||||
return [region['RegionName'] for region in regions['Regions']]
|
||||
|
||||
def _estimate_hourly_cost(self, instance_type: str) -> float:
|
||||
"""Rough estimate of hourly cost."""
|
||||
cost_map = {
|
||||
't3.micro': 0.0104, 't3.small': 0.0208, 't3.medium': 0.0416,
|
||||
't3.large': 0.0832, 't3.xlarge': 0.1664, 't3.2xlarge': 0.3328,
|
||||
'm5.large': 0.096, 'm5.xlarge': 0.192, 'm5.2xlarge': 0.384,
|
||||
'm5.4xlarge': 0.768, 'm5.8xlarge': 1.536, 'm5.12xlarge': 2.304,
|
||||
'm5.16xlarge': 3.072, 'm5.24xlarge': 4.608,
|
||||
'c5.large': 0.085, 'c5.xlarge': 0.17, 'c5.2xlarge': 0.34,
|
||||
'c5.4xlarge': 0.68, 'c5.9xlarge': 1.53, 'c5.12xlarge': 2.04,
|
||||
'c5.18xlarge': 3.06, 'c5.24xlarge': 4.08,
|
||||
'r5.large': 0.126, 'r5.xlarge': 0.252, 'r5.2xlarge': 0.504,
|
||||
'r5.4xlarge': 1.008, 'r5.8xlarge': 2.016, 'r5.12xlarge': 3.024,
|
||||
'r5.16xlarge': 4.032, 'r5.24xlarge': 6.048,
|
||||
}
|
||||
|
||||
if instance_type not in cost_map:
|
||||
family = instance_type.split('.')[0]
|
||||
family_defaults = {'t3': 0.04, 'm5': 0.20, 'c5': 0.17, 'r5': 0.25}
|
||||
return family_defaults.get(family, 0.10)
|
||||
|
||||
return cost_map[instance_type]
|
||||
|
||||
def _get_smaller_instance_type(self, current_type: str) -> Optional[str]:
|
||||
"""Suggest a smaller instance type."""
|
||||
# Size progression within families
|
||||
sizes = ['nano', 'micro', 'small', 'medium', 'large', 'xlarge', '2xlarge',
|
||||
'3xlarge', '4xlarge', '8xlarge', '9xlarge', '12xlarge', '16xlarge',
|
||||
'18xlarge', '24xlarge', '32xlarge']
|
||||
|
||||
parts = current_type.split('.')
|
||||
if len(parts) != 2:
|
||||
return None
|
||||
|
||||
family, size = parts
|
||||
|
||||
if size not in sizes:
|
||||
return None
|
||||
|
||||
current_idx = sizes.index(size)
|
||||
if current_idx <= 0:
|
||||
return None # Already at smallest
|
||||
|
||||
# Go down one size
|
||||
new_size = sizes[current_idx - 1]
|
||||
return f"{family}.{new_size}"
|
||||
|
||||
def analyze_ec2_instances(self):
|
||||
"""Analyze EC2 instances for rightsizing."""
|
||||
print(f"\n[1/2] Analyzing EC2 instances (last {self.days} days)...")
|
||||
|
||||
for region in self.regions:
|
||||
try:
|
||||
ec2 = self.session.client('ec2', region_name=region)
|
||||
cloudwatch = self.session.client('cloudwatch', region_name=region)
|
||||
|
||||
instances = ec2.describe_instances(
|
||||
Filters=[{'Name': 'instance-state-name', 'Values': ['running']}]
|
||||
)
|
||||
|
||||
for reservation in instances['Reservations']:
|
||||
for instance in reservation['Instances']:
|
||||
instance_id = instance['InstanceId']
|
||||
instance_type = instance['InstanceType']
|
||||
|
||||
# Skip smallest instances (already optimized)
|
||||
if any(size in instance_type for size in ['nano', 'micro', 'small']):
|
||||
continue
|
||||
|
||||
name_tag = next((tag['Value'] for tag in instance.get('Tags', [])
|
||||
if tag['Key'] == 'Name'), 'N/A')
|
||||
|
||||
# Get CloudWatch metrics
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(days=self.days)
|
||||
|
||||
try:
|
||||
# CPU Utilization
|
||||
cpu_metrics = cloudwatch.get_metric_statistics(
|
||||
Namespace='AWS/EC2',
|
||||
MetricName='CPUUtilization',
|
||||
Dimensions=[{'Name': 'InstanceId', 'Value': instance_id}],
|
||||
StartTime=start_time,
|
||||
EndTime=end_time,
|
||||
Period=3600,
|
||||
Statistics=['Average', 'Maximum']
|
||||
)
|
||||
|
||||
if not cpu_metrics['Datapoints']:
|
||||
continue
|
||||
|
||||
avg_cpu = sum([p['Average'] for p in cpu_metrics['Datapoints']]) / len(cpu_metrics['Datapoints'])
|
||||
max_cpu = max([p['Maximum'] for p in cpu_metrics['Datapoints']])
|
||||
|
||||
# Check if underutilized
|
||||
if avg_cpu < self.cpu_thresholds['low'] and max_cpu < 60:
|
||||
smaller_type = self._get_smaller_instance_type(instance_type)
|
||||
|
||||
if smaller_type:
|
||||
current_cost = self._estimate_hourly_cost(instance_type)
|
||||
new_cost = self._estimate_hourly_cost(smaller_type)
|
||||
monthly_savings = (current_cost - new_cost) * 730
|
||||
annual_savings = monthly_savings * 12
|
||||
|
||||
self.total_savings += annual_savings
|
||||
|
||||
# Determine severity
|
||||
if avg_cpu < self.cpu_thresholds['underutilized']:
|
||||
severity = "High"
|
||||
else:
|
||||
severity = "Medium"
|
||||
|
||||
self.findings['ec2'].append({
|
||||
'Region': region,
|
||||
'Instance ID': instance_id,
|
||||
'Name': name_tag,
|
||||
'Current Type': instance_type,
|
||||
'Recommended Type': smaller_type,
|
||||
'Avg CPU (%)': f"{avg_cpu:.1f}",
|
||||
'Max CPU (%)': f"{max_cpu:.1f}",
|
||||
'Monthly Savings': f"${monthly_savings:.2f}",
|
||||
'Severity': severity
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
pass # Skip instances without metrics
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error scanning {region}: {str(e)}")
|
||||
|
||||
print(f" Found {len(self.findings['ec2'])} rightsizing opportunities")
|
||||
|
||||
def analyze_rds_instances(self):
|
||||
"""Analyze RDS instances for rightsizing."""
|
||||
print(f"\n[2/2] Analyzing RDS instances (last {self.days} days)...")
|
||||
|
||||
for region in self.regions:
|
||||
try:
|
||||
rds = self.session.client('rds', region_name=region)
|
||||
cloudwatch = self.session.client('cloudwatch', region_name=region)
|
||||
|
||||
instances = rds.describe_db_instances()
|
||||
|
||||
for instance in instances['DBInstances']:
|
||||
instance_id = instance['DBInstanceIdentifier']
|
||||
instance_class = instance['DBInstanceClass']
|
||||
engine = instance['Engine']
|
||||
|
||||
# Skip smallest instances
|
||||
if any(size in instance_class for size in ['micro', 'small']):
|
||||
continue
|
||||
|
||||
# Get CloudWatch metrics
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(days=self.days)
|
||||
|
||||
try:
|
||||
# CPU Utilization
|
||||
cpu_metrics = cloudwatch.get_metric_statistics(
|
||||
Namespace='AWS/RDS',
|
||||
MetricName='CPUUtilization',
|
||||
Dimensions=[{'Name': 'DBInstanceIdentifier', 'Value': instance_id}],
|
||||
StartTime=start_time,
|
||||
EndTime=end_time,
|
||||
Period=3600,
|
||||
Statistics=['Average', 'Maximum']
|
||||
)
|
||||
|
||||
# Database Connections
|
||||
conn_metrics = cloudwatch.get_metric_statistics(
|
||||
Namespace='AWS/RDS',
|
||||
MetricName='DatabaseConnections',
|
||||
Dimensions=[{'Name': 'DBInstanceIdentifier', 'Value': instance_id}],
|
||||
StartTime=start_time,
|
||||
EndTime=end_time,
|
||||
Period=3600,
|
||||
Statistics=['Average', 'Maximum']
|
||||
)
|
||||
|
||||
if not cpu_metrics['Datapoints']:
|
||||
continue
|
||||
|
||||
avg_cpu = sum([p['Average'] for p in cpu_metrics['Datapoints']]) / len(cpu_metrics['Datapoints'])
|
||||
max_cpu = max([p['Maximum'] for p in cpu_metrics['Datapoints']])
|
||||
|
||||
avg_conns = 0
|
||||
max_conns = 0
|
||||
if conn_metrics['Datapoints']:
|
||||
avg_conns = sum([p['Average'] for p in conn_metrics['Datapoints']]) / len(conn_metrics['Datapoints'])
|
||||
max_conns = max([p['Maximum'] for p in conn_metrics['Datapoints']])
|
||||
|
||||
# Check if underutilized
|
||||
if avg_cpu < self.cpu_thresholds['low'] and max_cpu < 60:
|
||||
smaller_class = self._get_smaller_instance_type(instance_class)
|
||||
|
||||
if smaller_class:
|
||||
# RDS pricing is roughly 2x EC2
|
||||
base_type = instance_class.replace('db.', '')
|
||||
current_cost = self._estimate_hourly_cost(base_type) * 2
|
||||
new_base = smaller_class.replace('db.', '')
|
||||
new_cost = self._estimate_hourly_cost(new_base) * 2
|
||||
|
||||
monthly_savings = (current_cost - new_cost) * 730
|
||||
annual_savings = monthly_savings * 12
|
||||
|
||||
self.total_savings += annual_savings
|
||||
|
||||
# Determine severity
|
||||
if avg_cpu < self.cpu_thresholds['underutilized']:
|
||||
severity = "High"
|
||||
else:
|
||||
severity = "Medium"
|
||||
|
||||
self.findings['rds'].append({
|
||||
'Region': region,
|
||||
'Instance ID': instance_id,
|
||||
'Engine': engine,
|
||||
'Current Class': instance_class,
|
||||
'Recommended Class': smaller_class,
|
||||
'Avg CPU (%)': f"{avg_cpu:.1f}",
|
||||
'Max CPU (%)': f"{max_cpu:.1f}",
|
||||
'Avg Connections': f"{avg_conns:.0f}",
|
||||
'Monthly Savings': f"${monthly_savings:.2f}",
|
||||
'Severity': severity
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
pass # Skip instances without metrics
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error scanning {region}: {str(e)}")
|
||||
|
||||
print(f" Found {len(self.findings['rds'])} rightsizing opportunities")
|
||||
|
||||
def print_report(self):
|
||||
"""Print rightsizing report."""
|
||||
print("\n" + "="*110)
|
||||
print("RIGHTSIZING RECOMMENDATIONS")
|
||||
print("="*110)
|
||||
|
||||
if self.findings['ec2']:
|
||||
print("\nEC2 RIGHTSIZING OPPORTUNITIES")
|
||||
print("-" * 110)
|
||||
sorted_ec2 = sorted(self.findings['ec2'],
|
||||
key=lambda x: float(x['Monthly Savings'].replace('$', '')),
|
||||
reverse=True)
|
||||
print(tabulate(sorted_ec2, headers='keys', tablefmt='grid'))
|
||||
|
||||
if self.findings['rds']:
|
||||
print("\nRDS RIGHTSIZING OPPORTUNITIES")
|
||||
print("-" * 110)
|
||||
sorted_rds = sorted(self.findings['rds'],
|
||||
key=lambda x: float(x['Monthly Savings'].replace('$', '')),
|
||||
reverse=True)
|
||||
print(tabulate(sorted_rds, headers='keys', tablefmt='grid'))
|
||||
|
||||
print("\n" + "="*110)
|
||||
print(f"TOTAL ANNUAL SAVINGS: ${self.total_savings:.2f}")
|
||||
print("="*110)
|
||||
|
||||
print("\n\nRIGHTSIZING BEST PRACTICES:")
|
||||
print("\n1. Before Rightsizing:")
|
||||
print(" - Review metrics over longer period (30+ days recommended)")
|
||||
print(" - Check for seasonal patterns or cyclical workloads")
|
||||
print(" - Verify that current size isn't required for burst capacity")
|
||||
print(" - Review application performance requirements")
|
||||
|
||||
print("\n2. Rightsizing Process:")
|
||||
print(" - Test in non-production environment first")
|
||||
print(" - Schedule during maintenance window")
|
||||
print(" - EC2: Stop instance → Change type → Start")
|
||||
print(" - RDS: Modify instance (causes brief downtime)")
|
||||
print(" - Monitor performance after change")
|
||||
|
||||
print("\n3. Important Considerations:")
|
||||
print(" - Some instance families can't be changed (requires new instance)")
|
||||
print(" - EBS-optimized settings may change with instance type")
|
||||
print(" - Network performance varies by instance size")
|
||||
print(" - Consider vertical scaling limits vs horizontal scaling")
|
||||
|
||||
print("\n4. Alternative Approaches:")
|
||||
print(" - Consider serverless options (Lambda, Fargate, Aurora Serverless)")
|
||||
print(" - Use Auto Scaling to match capacity to demand")
|
||||
print(" - Implement horizontal scaling instead of larger instances")
|
||||
print(" - Evaluate containerization for better resource utilization")
|
||||
|
||||
def run(self):
|
||||
"""Run rightsizing analysis."""
|
||||
print(f"Analyzing AWS resources for rightsizing opportunities...")
|
||||
print(f"Metrics period: {self.days} days")
|
||||
print(f"Scanning {len(self.regions)} region(s)...\n")
|
||||
|
||||
self.analyze_ec2_instances()
|
||||
self.analyze_rds_instances()
|
||||
|
||||
self.print_report()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Analyze AWS resources for rightsizing opportunities',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Analyze all regions (14 days of metrics)
|
||||
python3 rightsizing_analyzer.py
|
||||
|
||||
# Analyze with 30 days of metrics for better accuracy
|
||||
python3 rightsizing_analyzer.py --days 30
|
||||
|
||||
# Analyze specific region
|
||||
python3 rightsizing_analyzer.py --region us-east-1
|
||||
|
||||
# Use named profile
|
||||
python3 rightsizing_analyzer.py --profile production
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('--region', help='AWS region (default: all regions)')
|
||||
parser.add_argument('--profile', help='AWS profile name (default: default profile)')
|
||||
parser.add_argument('--days', type=int, default=14,
|
||||
help='Days of metrics to analyze (default: 14)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
analyzer = RightsizingAnalyzer(
|
||||
profile=args.profile,
|
||||
region=args.region,
|
||||
days=args.days
|
||||
)
|
||||
analyzer.run()
|
||||
except Exception as e:
|
||||
print(f"Error: {str(e)}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
281
scripts/spot_recommendations.py
Executable file
281
scripts/spot_recommendations.py
Executable file
@@ -0,0 +1,281 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Analyze EC2 workloads and recommend Spot instance opportunities.
|
||||
|
||||
This script identifies:
|
||||
- Fault-tolerant workloads suitable for Spot instances
|
||||
- Potential savings from Spot vs On-Demand
|
||||
- Instances in Auto Scaling Groups (good Spot candidates)
|
||||
- Non-critical workloads based on tags
|
||||
|
||||
Usage:
|
||||
python3 spot_recommendations.py [--region REGION] [--profile PROFILE]
|
||||
|
||||
Requirements:
|
||||
pip install boto3 tabulate
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import boto3
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Dict, Any
|
||||
from tabulate import tabulate
|
||||
import sys
|
||||
|
||||
|
||||
class SpotRecommendationAnalyzer:
|
||||
def __init__(self, profile: str = None, region: str = None):
|
||||
self.session = boto3.Session(profile_name=profile) if profile else boto3.Session()
|
||||
self.regions = [region] if region else self._get_all_regions()
|
||||
self.recommendations = []
|
||||
self.total_savings = 0.0
|
||||
|
||||
# Average Spot savings (typically 60-90% discount)
|
||||
self.spot_discount = 0.70 # Conservative 70% discount
|
||||
|
||||
# Tags that indicate Spot suitability
|
||||
self.spot_friendly_tags = {
|
||||
'Environment': ['dev', 'development', 'test', 'testing', 'staging', 'qa'],
|
||||
'Workload': ['batch', 'processing', 'worker', 'ci', 'build'],
|
||||
'CriticalLevel': ['low', 'non-critical', 'noncritical']
|
||||
}
|
||||
|
||||
def _get_all_regions(self) -> List[str]:
|
||||
"""Get all enabled AWS regions."""
|
||||
ec2 = self.session.client('ec2', region_name='us-east-1')
|
||||
regions = ec2.describe_regions(AllRegions=False)
|
||||
return [region['RegionName'] for region in regions['Regions']]
|
||||
|
||||
def _estimate_hourly_cost(self, instance_type: str) -> float:
|
||||
"""Rough estimate of hourly cost."""
|
||||
cost_map = {
|
||||
't3.micro': 0.0104, 't3.small': 0.0208, 't3.medium': 0.0416,
|
||||
't3.large': 0.0832, 't3.xlarge': 0.1664, 't3.2xlarge': 0.3328,
|
||||
'm5.large': 0.096, 'm5.xlarge': 0.192, 'm5.2xlarge': 0.384,
|
||||
'm5.4xlarge': 0.768, 'm5.8xlarge': 1.536,
|
||||
'c5.large': 0.085, 'c5.xlarge': 0.17, 'c5.2xlarge': 0.34,
|
||||
'c5.4xlarge': 0.68, 'c5.9xlarge': 1.53,
|
||||
'r5.large': 0.126, 'r5.xlarge': 0.252, 'r5.2xlarge': 0.504,
|
||||
'r5.4xlarge': 1.008, 'r5.8xlarge': 2.016,
|
||||
}
|
||||
|
||||
# Default fallback
|
||||
if instance_type not in cost_map:
|
||||
family = instance_type.split('.')[0]
|
||||
family_defaults = {'t3': 0.04, 'm5': 0.10, 'c5': 0.09, 'r5': 0.13}
|
||||
return family_defaults.get(family, 0.10)
|
||||
|
||||
return cost_map[instance_type]
|
||||
|
||||
def _calculate_suitability_score(self, instance: Dict, asg_member: bool) -> tuple:
|
||||
"""Calculate Spot suitability score (0-100) and reasons."""
|
||||
score = 0
|
||||
reasons = []
|
||||
|
||||
# Check if in Auto Scaling Group (high suitability)
|
||||
if asg_member:
|
||||
score += 40
|
||||
reasons.append("Part of Auto Scaling Group")
|
||||
|
||||
# Check tags for environment/workload type
|
||||
tags = {tag['Key']: tag['Value'].lower() for tag in instance.get('Tags', [])}
|
||||
|
||||
for key, spot_values in self.spot_friendly_tags.items():
|
||||
if key in tags and tags[key] in spot_values:
|
||||
score += 20
|
||||
reasons.append(f"{key}={tags[key]}")
|
||||
|
||||
# Check instance age (older instances might be more stable)
|
||||
launch_time = instance['LaunchTime']
|
||||
days_running = (datetime.now(launch_time.tzinfo) - launch_time).days
|
||||
if days_running > 30:
|
||||
score += 10
|
||||
reasons.append(f"Running {days_running} days (stable)")
|
||||
|
||||
# Check instance size (smaller instances have better Spot availability)
|
||||
instance_type = instance['InstanceType']
|
||||
if any(size in instance_type for size in ['micro', 'small', 'medium', 'large']):
|
||||
score += 15
|
||||
reasons.append("Standard size (good Spot availability)")
|
||||
|
||||
# Default baseline
|
||||
if not reasons:
|
||||
score = 30
|
||||
reasons.append("General compute workload")
|
||||
|
||||
return min(score, 100), reasons
|
||||
|
||||
def analyze_instances(self):
|
||||
"""Analyze EC2 instances for Spot opportunities."""
|
||||
print(f"\nAnalyzing EC2 instances across {len(self.regions)} region(s)...")
|
||||
|
||||
for region in self.regions:
|
||||
try:
|
||||
ec2 = self.session.client('ec2', region_name=region)
|
||||
autoscaling = self.session.client('autoscaling', region_name=region)
|
||||
|
||||
# Get all Auto Scaling Groups
|
||||
asg_instances = set()
|
||||
try:
|
||||
asgs = autoscaling.describe_auto_scaling_groups()
|
||||
for asg in asgs['AutoScalingGroups']:
|
||||
for instance in asg['Instances']:
|
||||
asg_instances.add(instance['InstanceId'])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Get all running On-Demand instances
|
||||
instances = ec2.describe_instances(
|
||||
Filters=[
|
||||
{'Name': 'instance-state-name', 'Values': ['running']},
|
||||
{'Name': 'instance-lifecycle', 'Values': ['on-demand', 'scheduled']}
|
||||
]
|
||||
)
|
||||
|
||||
for reservation in instances['Reservations']:
|
||||
for instance in reservation['Instances']:
|
||||
instance_id = instance['InstanceId']
|
||||
instance_type = instance['InstanceType']
|
||||
asg_member = instance_id in asg_instances
|
||||
|
||||
# Calculate suitability
|
||||
score, reasons = self._calculate_suitability_score(instance, asg_member)
|
||||
|
||||
# Calculate savings
|
||||
hourly_cost = self._estimate_hourly_cost(instance_type)
|
||||
monthly_savings = hourly_cost * 730 * self.spot_discount
|
||||
annual_savings = monthly_savings * 12
|
||||
|
||||
self.total_savings += annual_savings
|
||||
|
||||
# Get instance name
|
||||
name_tag = next((tag['Value'] for tag in instance.get('Tags', [])
|
||||
if tag['Key'] == 'Name'), 'N/A')
|
||||
|
||||
# Determine recommendation
|
||||
if score >= 70:
|
||||
recommendation = "Highly Recommended"
|
||||
elif score >= 50:
|
||||
recommendation = "Recommended"
|
||||
elif score >= 30:
|
||||
recommendation = "Consider (with caution)"
|
||||
else:
|
||||
recommendation = "Not Recommended"
|
||||
|
||||
self.recommendations.append({
|
||||
'Region': region,
|
||||
'Instance ID': instance_id,
|
||||
'Name': name_tag,
|
||||
'Type': instance_type,
|
||||
'In ASG': 'Yes' if asg_member else 'No',
|
||||
'Suitability Score': f"{score}/100",
|
||||
'Monthly Savings': f"${monthly_savings:.2f}",
|
||||
'Recommendation': recommendation,
|
||||
'Reasons': ', '.join(reasons[:2]) # Show top 2 reasons
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error scanning {region}: {str(e)}")
|
||||
|
||||
print(f" Analyzed {len(self.recommendations)} instances")
|
||||
|
||||
def print_report(self):
|
||||
"""Print Spot recommendations report."""
|
||||
print("\n" + "="*120)
|
||||
print("SPOT INSTANCE RECOMMENDATIONS")
|
||||
print("="*120)
|
||||
|
||||
# Sort by suitability score (descending)
|
||||
sorted_recs = sorted(self.recommendations,
|
||||
key=lambda x: int(x['Suitability Score'].split('/')[0]),
|
||||
reverse=True)
|
||||
|
||||
if sorted_recs:
|
||||
print(tabulate(sorted_recs, headers='keys', tablefmt='grid'))
|
||||
|
||||
print("\n" + "="*120)
|
||||
print(f"TOTAL ANNUAL SAVINGS POTENTIAL: ${self.total_savings:.2f}")
|
||||
print(f"(Assumes {int(self.spot_discount*100)}% average Spot discount)")
|
||||
print("="*120)
|
||||
|
||||
print("\n\nSPOT INSTANCE BEST PRACTICES:")
|
||||
print("\n1. Use Spot Instances for:")
|
||||
print(" - Stateless applications")
|
||||
print(" - Batch processing jobs")
|
||||
print(" - CI/CD and build servers")
|
||||
print(" - Data analysis and processing")
|
||||
print(" - Dev/test/staging environments")
|
||||
print(" - Auto Scaling Groups with mixed instance types")
|
||||
|
||||
print("\n2. Do NOT use Spot Instances for:")
|
||||
print(" - Databases without replicas")
|
||||
print(" - Stateful applications without checkpointing")
|
||||
print(" - Real-time, latency-sensitive services")
|
||||
print(" - Applications that can't handle interruptions")
|
||||
|
||||
print("\n3. Spot Best Practices:")
|
||||
print(" - Use Spot Fleet or Auto Scaling Groups with Spot")
|
||||
print(" - Diversify across multiple instance types")
|
||||
print(" - Implement graceful shutdown handlers (2-minute warning)")
|
||||
print(" - Use Spot Instance interruption notices")
|
||||
print(" - Consider Spot + On-Demand mix (e.g., 70/30)")
|
||||
print(" - Set appropriate max price (typically On-Demand price)")
|
||||
|
||||
print("\n4. Implementation Steps:")
|
||||
print(" - Test Spot behavior in non-production first")
|
||||
print(" - Implement interruption handling in your application")
|
||||
print(" - Use EC2 Fleet or Auto Scaling with mixed instances policy")
|
||||
print(" - Monitor Spot interruption rates")
|
||||
print(" - Set up CloudWatch alarms for Spot terminations")
|
||||
|
||||
print("\n5. Tools to Use:")
|
||||
print(" - EC2 Spot Instance Advisor (check interruption rates)")
|
||||
print(" - Auto Scaling Groups with mixed instances policy")
|
||||
print(" - Spot Fleet for diverse instance type selection")
|
||||
print(" - AWS Spot Instances best practices guide")
|
||||
|
||||
def run(self):
|
||||
"""Run Spot analysis."""
|
||||
print("="*80)
|
||||
print("AWS SPOT INSTANCE OPPORTUNITY ANALYZER")
|
||||
print("="*80)
|
||||
|
||||
self.analyze_instances()
|
||||
self.print_report()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Analyze EC2 workloads for Spot instance opportunities',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Analyze all regions with default profile
|
||||
python3 spot_recommendations.py
|
||||
|
||||
# Analyze specific region
|
||||
python3 spot_recommendations.py --region us-east-1
|
||||
|
||||
# Use named profile
|
||||
python3 spot_recommendations.py --profile production
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('--region', help='AWS region (default: all regions)')
|
||||
parser.add_argument('--profile', help='AWS profile name (default: default profile)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
analyzer = SpotRecommendationAnalyzer(
|
||||
profile=args.profile,
|
||||
region=args.region
|
||||
)
|
||||
analyzer.run()
|
||||
except Exception as e:
|
||||
print(f"Error: {str(e)}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user