Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 17:51:09 +08:00
commit 9d4643f587
14 changed files with 4713 additions and 0 deletions

View File

@@ -0,0 +1,347 @@
#!/usr/bin/env python3
"""
Analyze EC2 and RDS usage patterns to recommend Reserved Instances.
This script:
- Identifies consistently running EC2 instances
- Calculates potential savings with Reserved Instances
- Recommends RI types (Standard vs Convertible) and commitment levels (1yr vs 3yr)
- Analyzes RDS instances for RI opportunities
Usage:
python3 analyze_ri_recommendations.py [--region REGION] [--profile PROFILE] [--days DAYS]
Requirements:
pip install boto3 tabulate
"""
import argparse
import boto3
from datetime import datetime, timedelta
from typing import List, Dict, Any
from collections import defaultdict
from tabulate import tabulate
import sys
class RIAnalyzer:
def __init__(self, profile: str = None, region: str = None, days: int = 30):
self.session = boto3.Session(profile_name=profile) if profile else boto3.Session()
self.regions = [region] if region else self._get_all_regions()
self.days = days
self.recommendations = {
'ec2': [],
'rds': []
}
self.total_potential_savings = 0.0
# Simplified RI discount rates (actual rates vary by region/instance type)
self.ri_discounts = {
'1yr_no_upfront': 0.40, # ~40% savings
'1yr_partial_upfront': 0.42, # ~42% savings
'1yr_all_upfront': 0.43, # ~43% savings
'3yr_no_upfront': 0.60, # ~60% savings
'3yr_partial_upfront': 0.62, # ~62% savings
'3yr_all_upfront': 0.63 # ~63% savings
}
def _get_all_regions(self) -> List[str]:
"""Get all enabled AWS regions."""
ec2 = self.session.client('ec2', region_name='us-east-1')
regions = ec2.describe_regions(AllRegions=False)
return [region['RegionName'] for region in regions['Regions']]
def _estimate_hourly_cost(self, instance_type: str) -> float:
"""Rough estimate of On-Demand hourly cost."""
cost_map = {
't2.micro': 0.0116, 't2.small': 0.023, 't2.medium': 0.0464,
't3.micro': 0.0104, 't3.small': 0.0208, 't3.medium': 0.0416,
't3.large': 0.0832, 't3.xlarge': 0.1664, 't3.2xlarge': 0.3328,
'm5.large': 0.096, 'm5.xlarge': 0.192, 'm5.2xlarge': 0.384,
'm5.4xlarge': 0.768, 'm5.8xlarge': 1.536, 'm5.12xlarge': 2.304,
'c5.large': 0.085, 'c5.xlarge': 0.17, 'c5.2xlarge': 0.34,
'c5.4xlarge': 0.68, 'c5.9xlarge': 1.53, 'c5.18xlarge': 3.06,
'r5.large': 0.126, 'r5.xlarge': 0.252, 'r5.2xlarge': 0.504,
'r5.4xlarge': 1.008, 'r5.8xlarge': 2.016, 'r5.12xlarge': 3.024,
}
# Default fallback based on instance family
if instance_type not in cost_map:
family = instance_type.split('.')[0]
family_defaults = {'t2': 0.02, 't3': 0.02, 'm5': 0.10, 'c5': 0.09, 'r5': 0.13}
return family_defaults.get(family, 0.10)
return cost_map[instance_type]
def _calculate_savings(self, hourly_cost: float, hours_running: float) -> Dict[str, float]:
"""Calculate potential savings with different RI options."""
monthly_od_cost = hourly_cost * hours_running
savings = {}
for ri_type, discount in self.ri_discounts.items():
monthly_ri_cost = monthly_od_cost * (1 - discount)
monthly_savings = monthly_od_cost - monthly_ri_cost
savings[ri_type] = {
'monthly_cost': monthly_ri_cost,
'monthly_savings': monthly_savings,
'annual_savings': monthly_savings * 12
}
return savings
def analyze_ec2_instances(self):
"""Analyze EC2 instances for RI opportunities."""
print(f"\n[1/2] Analyzing EC2 instances (last {self.days} days)...")
# Group instances by type and platform
instance_groups = defaultdict(lambda: {'count': 0, 'instances': []})
for region in self.regions:
try:
ec2 = self.session.client('ec2', region_name=region)
cloudwatch = self.session.client('cloudwatch', region_name=region)
instances = ec2.describe_instances(
Filters=[{'Name': 'instance-state-name', 'Values': ['running']}]
)
for reservation in instances['Reservations']:
for instance in reservation['Instances']:
instance_id = instance['InstanceId']
instance_type = instance['InstanceType']
platform = instance.get('Platform', 'Linux/UNIX')
# Check if instance has been running consistently
launch_time = instance['LaunchTime']
days_running = (datetime.now(launch_time.tzinfo) - launch_time).days
if days_running >= self.days:
# Check uptime via CloudWatch
end_time = datetime.now()
start_time = end_time - timedelta(days=self.days)
try:
metrics = cloudwatch.get_metric_statistics(
Namespace='AWS/EC2',
MetricName='StatusCheckFailed',
Dimensions=[{'Name': 'InstanceId', 'Value': instance_id}],
StartTime=start_time,
EndTime=end_time,
Period=3600,
Statistics=['Sum']
)
# If we have metrics, the instance has been running
if metrics['Datapoints'] or days_running >= self.days:
key = f"{instance_type}_{platform}_{region}"
instance_groups[key]['count'] += 1
instance_groups[key]['instances'].append({
'id': instance_id,
'type': instance_type,
'platform': platform,
'region': region,
'days_running': days_running
})
except Exception:
# If CloudWatch fails, still count long-running instances
if days_running >= self.days:
key = f"{instance_type}_{platform}_{region}"
instance_groups[key]['count'] += 1
instance_groups[key]['instances'].append({
'id': instance_id,
'type': instance_type,
'platform': platform,
'region': region,
'days_running': days_running
})
except Exception as e:
print(f" Error scanning {region}: {str(e)}")
# Generate recommendations
for key, data in instance_groups.items():
if data['count'] > 0:
sample = data['instances'][0]
instance_type = sample['type']
platform = sample['platform']
region = sample['region']
count = data['count']
hourly_cost = self._estimate_hourly_cost(instance_type)
hours_per_month = 730 # Average hours in a month
savings = self._calculate_savings(hourly_cost, hours_per_month * count)
# Recommend best option (3yr all upfront for max savings)
best_option = savings['3yr_all_upfront']
self.total_potential_savings += best_option['annual_savings']
self.recommendations['ec2'].append({
'Region': region,
'Instance Type': instance_type,
'Platform': platform,
'Count': count,
'Current Monthly Cost': f"${hourly_cost * hours_per_month * count:.2f}",
'1yr Savings (monthly)': f"${savings['1yr_all_upfront']['monthly_savings']:.2f}",
'3yr Savings (monthly)': f"${savings['3yr_all_upfront']['monthly_savings']:.2f}",
'Annual Savings (3yr)': f"${best_option['annual_savings']:.2f}",
'Recommendation': '3yr Standard RI (All Upfront)'
})
print(f" Found {len(self.recommendations['ec2'])} RI opportunities")
def analyze_rds_instances(self):
"""Analyze RDS instances for RI opportunities."""
print(f"\n[2/2] Analyzing RDS instances (last {self.days} days)...")
instance_groups = defaultdict(lambda: {'count': 0, 'instances': []})
for region in self.regions:
try:
rds = self.session.client('rds', region_name=region)
instances = rds.describe_db_instances()
for instance in instances['DBInstances']:
instance_id = instance['DBInstanceIdentifier']
instance_class = instance['DBInstanceClass']
engine = instance['Engine']
multi_az = instance['MultiAZ']
# Check if instance has been running for the analysis period
create_time = instance['InstanceCreateTime']
days_running = (datetime.now(create_time.tzinfo) - create_time).days
if days_running >= self.days:
key = f"{instance_class}_{engine}_{multi_az}_{region}"
instance_groups[key]['count'] += 1
instance_groups[key]['instances'].append({
'id': instance_id,
'class': instance_class,
'engine': engine,
'multi_az': multi_az,
'region': region,
'days_running': days_running
})
except Exception as e:
print(f" Error scanning {region}: {str(e)}")
# Generate recommendations
for key, data in instance_groups.items():
if data['count'] > 0:
sample = data['instances'][0]
instance_class = sample['class']
engine = sample['engine']
multi_az = sample['multi_az']
region = sample['region']
count = data['count']
# RDS pricing is roughly 2x EC2 for same instance type
# This is a rough approximation
base_hourly = self._estimate_hourly_cost(instance_class.replace('db.', ''))
hourly_cost = base_hourly * 2
if multi_az:
hourly_cost *= 2 # Multi-AZ doubles the cost
hours_per_month = 730
savings = self._calculate_savings(hourly_cost, hours_per_month * count)
best_option = savings['3yr_all_upfront']
self.total_potential_savings += best_option['annual_savings']
self.recommendations['rds'].append({
'Region': region,
'Instance Class': instance_class,
'Engine': engine,
'Multi-AZ': 'Yes' if multi_az else 'No',
'Count': count,
'Current Monthly Cost': f"${hourly_cost * hours_per_month * count:.2f}",
'1yr Savings (monthly)': f"${savings['1yr_all_upfront']['monthly_savings']:.2f}",
'3yr Savings (monthly)': f"${savings['3yr_all_upfront']['monthly_savings']:.2f}",
'Annual Savings (3yr)': f"${best_option['annual_savings']:.2f}",
'Recommendation': '3yr Standard RI (All Upfront)'
})
print(f" Found {len(self.recommendations['rds'])} RI opportunities")
def print_report(self):
"""Print RI recommendations report."""
print("\n" + "="*100)
print("RESERVED INSTANCE RECOMMENDATIONS")
print("="*100)
if self.recommendations['ec2']:
print("\nEC2 RESERVED INSTANCE OPPORTUNITIES")
print("-" * 100)
print(tabulate(self.recommendations['ec2'], headers='keys', tablefmt='grid'))
if self.recommendations['rds']:
print("\nRDS RESERVED INSTANCE OPPORTUNITIES")
print("-" * 100)
print(tabulate(self.recommendations['rds'], headers='keys', tablefmt='grid'))
print("\n" + "="*100)
print(f"TOTAL ANNUAL SAVINGS POTENTIAL: ${self.total_potential_savings:.2f}")
print("="*100)
print("\n\nRECOMMENDATIONS:")
print("- Standard RIs offer the highest discount but no flexibility to change instance type")
print("- Consider Convertible RIs if you need flexibility (slightly lower discount)")
print("- All Upfront payment offers maximum savings")
print("- Partial Upfront balances savings with cash flow")
print("- No Upfront minimizes initial cost but reduces savings")
print("\nNEXT STEPS:")
print("1. Review workload stability and growth projections")
print("2. Compare RI costs with Savings Plans for additional flexibility")
print("3. Purchase RIs through AWS Console or CLI")
print("4. Monitor RI utilization to ensure maximum benefit")
def run(self):
"""Run RI analysis."""
print(f"Analyzing AWS resources for RI opportunities...")
print(f"Looking at instances running for at least {self.days} days")
print(f"Scanning {len(self.regions)} region(s)...\n")
self.analyze_ec2_instances()
self.analyze_rds_instances()
self.print_report()
def main():
parser = argparse.ArgumentParser(
description='Analyze AWS resources for Reserved Instance opportunities',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Analyze all regions with default profile
python3 analyze_ri_recommendations.py
# Analyze specific region for instances running 60+ days
python3 analyze_ri_recommendations.py --region us-east-1 --days 60
# Use named profile
python3 analyze_ri_recommendations.py --profile production
"""
)
parser.add_argument('--region', help='AWS region (default: all regions)')
parser.add_argument('--profile', help='AWS profile name (default: default profile)')
parser.add_argument('--days', type=int, default=30,
help='Minimum days instance must be running (default: 30)')
args = parser.parse_args()
try:
analyzer = RIAnalyzer(
profile=args.profile,
region=args.region,
days=args.days
)
analyzer.run()
except Exception as e:
print(f"Error: {str(e)}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()

382
scripts/cost_anomaly_detector.py Executable file
View File

@@ -0,0 +1,382 @@
#!/usr/bin/env python3
"""
Detect cost anomalies and unusual spending patterns in AWS.
This script:
- Analyzes Cost Explorer data for spending trends
- Detects anomalies and unexpected cost increases
- Identifies top cost drivers
- Compares period-over-period spending
Usage:
python3 cost_anomaly_detector.py [--profile PROFILE] [--days DAYS]
Requirements:
pip install boto3 tabulate
"""
import argparse
import boto3
from datetime import datetime, timedelta
from typing import List, Dict, Any
from collections import defaultdict
from tabulate import tabulate
import sys
class CostAnomalyDetector:
def __init__(self, profile: str = None, days: int = 30):
self.session = boto3.Session(profile_name=profile) if profile else boto3.Session()
self.days = days
self.ce = self.session.client('ce', region_name='us-east-1') # Cost Explorer is global
self.findings = {
'anomalies': [],
'top_services': [],
'trend_analysis': []
}
# Anomaly detection threshold
self.anomaly_threshold = 1.5 # 50% increase triggers alert
def _get_date_range(self, days: int) -> tuple:
"""Get start and end dates for analysis."""
end = datetime.now().date()
start = end - timedelta(days=days)
return start.strftime('%Y-%m-%d'), end.strftime('%Y-%m-%d')
def analyze_daily_costs(self):
"""Analyze daily cost trends."""
print(f"\n[1/4] Analyzing daily costs (last {self.days} days)...")
start_date, end_date = self._get_date_range(self.days)
try:
response = self.ce.get_cost_and_usage(
TimePeriod={'Start': start_date, 'End': end_date},
Granularity='DAILY',
Metrics=['UnblendedCost'],
GroupBy=[{'Type': 'DIMENSION', 'Key': 'SERVICE'}]
)
# Aggregate daily costs
daily_totals = defaultdict(float)
service_costs = defaultdict(lambda: defaultdict(float))
for result in response['ResultsByTime']:
date = result['TimePeriod']['Start']
for group in result['Groups']:
service = group['Keys'][0]
cost = float(group['Metrics']['UnblendedCost']['Amount'])
daily_totals[date] += cost
service_costs[service][date] = cost
# Detect daily anomalies
dates = sorted(daily_totals.keys())
if len(dates) > 7:
# Calculate baseline (average of first week)
baseline = sum(daily_totals[d] for d in dates[:7]) / 7
for date in dates[7:]:
daily_cost = daily_totals[date]
if daily_cost > baseline * self.anomaly_threshold:
increase_pct = ((daily_cost - baseline) / baseline) * 100
# Find which service caused the spike
top_service = max(
((svc, service_costs[svc][date]) for svc in service_costs),
key=lambda x: x[1]
)
self.findings['anomalies'].append({
'Date': date,
'Daily Cost': f"${daily_cost:.2f}",
'Baseline': f"${baseline:.2f}",
'Increase': f"+{increase_pct:.1f}%",
'Top Service': top_service[0],
'Service Cost': f"${top_service[1]:.2f}",
'Severity': 'High' if increase_pct > 100 else 'Medium'
})
print(f" Detected {len(self.findings['anomalies'])} cost anomalies")
except Exception as e:
print(f" Error analyzing daily costs: {str(e)}")
def analyze_top_services(self):
"""Identify top cost drivers."""
print(f"\n[2/4] Analyzing top cost drivers...")
start_date, end_date = self._get_date_range(self.days)
try:
response = self.ce.get_cost_and_usage(
TimePeriod={'Start': start_date, 'End': end_date},
Granularity='MONTHLY',
Metrics=['UnblendedCost'],
GroupBy=[{'Type': 'DIMENSION', 'Key': 'SERVICE'}]
)
service_totals = {}
for result in response['ResultsByTime']:
for group in result['Groups']:
service = group['Keys'][0]
cost = float(group['Metrics']['UnblendedCost']['Amount'])
service_totals[service] = service_totals.get(service, 0) + cost
# Get top 10 services
sorted_services = sorted(service_totals.items(), key=lambda x: x[1], reverse=True)[:10]
total_cost = sum(service_totals.values())
for service, cost in sorted_services:
percentage = (cost / total_cost * 100) if total_cost > 0 else 0
self.findings['top_services'].append({
'Service': service,
'Cost': f"${cost:.2f}",
'Percentage': f"{percentage:.1f}%",
'Daily Average': f"${cost/self.days:.2f}"
})
print(f" Identified top {len(self.findings['top_services'])} cost drivers")
except Exception as e:
print(f" Error analyzing top services: {str(e)}")
def compare_periods(self):
"""Compare current period with previous period."""
print(f"\n[3/4] Comparing cost trends...")
# Current period
current_end = datetime.now().date()
current_start = current_end - timedelta(days=self.days)
# Previous period
previous_end = current_start - timedelta(days=1)
previous_start = previous_end - timedelta(days=self.days)
try:
# Get current period costs
current_response = self.ce.get_cost_and_usage(
TimePeriod={
'Start': current_start.strftime('%Y-%m-%d'),
'End': current_end.strftime('%Y-%m-%d')
},
Granularity='MONTHLY',
Metrics=['UnblendedCost'],
GroupBy=[{'Type': 'DIMENSION', 'Key': 'SERVICE'}]
)
# Get previous period costs
previous_response = self.ce.get_cost_and_usage(
TimePeriod={
'Start': previous_start.strftime('%Y-%m-%d'),
'End': previous_end.strftime('%Y-%m-%d')
},
Granularity='MONTHLY',
Metrics=['UnblendedCost'],
GroupBy=[{'Type': 'DIMENSION', 'Key': 'SERVICE'}]
)
# Aggregate by service
current_costs = {}
for result in current_response['ResultsByTime']:
for group in result['Groups']:
service = group['Keys'][0]
cost = float(group['Metrics']['UnblendedCost']['Amount'])
current_costs[service] = current_costs.get(service, 0) + cost
previous_costs = {}
for result in previous_response['ResultsByTime']:
for group in result['Groups']:
service = group['Keys'][0]
cost = float(group['Metrics']['UnblendedCost']['Amount'])
previous_costs[service] = previous_costs.get(service, 0) + cost
# Compare services
all_services = set(current_costs.keys()) | set(previous_costs.keys())
for service in all_services:
current = current_costs.get(service, 0)
previous = previous_costs.get(service, 0)
if previous > 0:
change_pct = ((current - previous) / previous) * 100
change_amount = current - previous
elif current > 0:
change_pct = 100
change_amount = current
else:
continue
# Only report significant changes (> 10% or > $10)
if abs(change_pct) > 10 or abs(change_amount) > 10:
trend = "↑ Increase" if change_amount > 0 else "↓ Decrease"
self.findings['trend_analysis'].append({
'Service': service,
'Previous Period': f"${previous:.2f}",
'Current Period': f"${current:.2f}",
'Change': f"${change_amount:+.2f}",
'Change %': f"{change_pct:+.1f}%",
'Trend': trend
})
# Sort by absolute change
self.findings['trend_analysis'].sort(
key=lambda x: abs(float(x['Change'].replace('$', '').replace('+', '').replace('-', ''))),
reverse=True
)
print(f" Compared {len(self.findings['trend_analysis'])} services")
except Exception as e:
print(f" Error comparing periods: {str(e)}")
def get_forecast(self):
"""Get AWS cost forecast."""
print(f"\n[4/4] Getting cost forecast...")
try:
# Get 30-day forecast
start_date = datetime.now().date()
end_date = start_date + timedelta(days=30)
response = self.ce.get_cost_forecast(
TimePeriod={
'Start': start_date.strftime('%Y-%m-%d'),
'End': end_date.strftime('%Y-%m-%d')
},
Metric='UNBLENDED_COST',
Granularity='MONTHLY'
)
forecast_amount = float(response['Total']['Amount'])
print(f" 30-day forecast: ${forecast_amount:.2f}")
return forecast_amount
except Exception as e:
print(f" Error getting forecast: {str(e)}")
return None
def print_report(self, forecast_amount: float = None):
"""Print cost anomaly report."""
print("\n" + "="*110)
print("AWS COST ANOMALY DETECTION REPORT")
print("="*110)
# Anomalies
if self.findings['anomalies']:
print("\nCOST ANOMALIES DETECTED")
print("-" * 110)
print(tabulate(self.findings['anomalies'], headers='keys', tablefmt='grid'))
print("\n⚠️ These dates show unusual cost spikes. Investigate immediately.")
# Top Services
if self.findings['top_services']:
print("\nTOP COST DRIVERS")
print("-" * 110)
print(tabulate(self.findings['top_services'], headers='keys', tablefmt='grid'))
# Trend Analysis
if self.findings['trend_analysis']:
print("\nPERIOD-OVER-PERIOD COMPARISON")
print(f"(Current {self.days} days vs Previous {self.days} days)")
print("-" * 110)
# Show top 15 changes
print(tabulate(self.findings['trend_analysis'][:15], headers='keys', tablefmt='grid'))
# Forecast
if forecast_amount:
print("\nCOST FORECAST")
print("-" * 110)
print(f"Projected 30-day cost: ${forecast_amount:.2f}")
print(f"Projected monthly run rate: ${forecast_amount:.2f}")
print("\n" + "="*110)
print("\n\nRECOMMENDED ACTIONS:")
print("\n1. For Cost Anomalies:")
print(" - Review CloudWatch Logs for the affected service on anomaly dates")
print(" - Check for configuration changes or deployments")
print(" - Verify no unauthorized resource creation")
print(" - Set up billing alerts to catch future anomalies")
print("\n2. For Top Cost Drivers:")
print(" - Review each service for optimization opportunities")
print(" - Consider Reserved Instances for consistent workloads")
print(" - Implement auto-scaling to match demand")
print(" - Archive or delete unused resources")
print("\n3. Cost Monitoring Best Practices:")
print(" - Set up AWS Budgets with email/SNS alerts")
print(" - Enable Cost Anomaly Detection in AWS Console")
print(" - Tag resources for cost allocation and tracking")
print(" - Run this script weekly to track trends")
print(" - Review Cost Explorer monthly for detailed analysis")
print("\n4. Immediate Actions:")
print(" - aws budgets create-budget (set spending alerts)")
print(" - aws ce get-anomaly-subscriptions (enable anomaly detection)")
print(" - Review IAM policies to prevent unauthorized spending")
print(" - Implement cost allocation tags across all resources")
def run(self):
"""Run cost anomaly detection."""
print("="*80)
print("AWS COST ANOMALY DETECTOR")
print("="*80)
print(f"Analysis period: {self.days} days")
self.analyze_daily_costs()
self.analyze_top_services()
self.compare_periods()
forecast = self.get_forecast()
self.print_report(forecast)
def main():
parser = argparse.ArgumentParser(
description='Detect AWS cost anomalies and analyze spending trends',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Analyze last 30 days (default)
python3 cost_anomaly_detector.py
# Analyze last 60 days
python3 cost_anomaly_detector.py --days 60
# Use named profile
python3 cost_anomaly_detector.py --profile production
Note: This script requires Cost Explorer API access, which may incur small charges.
"""
)
parser.add_argument('--profile', help='AWS profile name (default: default profile)')
parser.add_argument('--days', type=int, default=30,
help='Days of cost data to analyze (default: 30)')
args = parser.parse_args()
try:
detector = CostAnomalyDetector(
profile=args.profile,
days=args.days
)
detector.run()
except Exception as e:
print(f"Error: {str(e)}", file=sys.stderr)
print("\nNote: Cost Explorer API access is required. Ensure:", file=sys.stderr)
print("1. Cost Explorer is enabled in AWS Console", file=sys.stderr)
print("2. IAM user has 'ce:GetCostAndUsage' and 'ce:GetCostForecast' permissions", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()

334
scripts/detect_old_generations.py Executable file
View File

@@ -0,0 +1,334 @@
#!/usr/bin/env python3
"""
Detect old generation EC2 and RDS instances that should be migrated to newer generations.
This script identifies:
- Old generation EC2 instances (t2 → t3, m4 → m5, etc.)
- ARM/Graviton migration opportunities
- Old generation RDS instances
- Calculates cost savings from migration
Usage:
python3 detect_old_generations.py [--region REGION] [--profile PROFILE]
Requirements:
pip install boto3 tabulate
"""
import argparse
import boto3
from typing import List, Dict, Any
from tabulate import tabulate
import sys
class OldGenerationDetector:
def __init__(self, profile: str = None, region: str = None):
self.session = boto3.Session(profile_name=profile) if profile else boto3.Session()
self.regions = [region] if region else self._get_all_regions()
self.findings = {
'ec2_migrations': [],
'graviton_opportunities': [],
'rds_migrations': []
}
self.total_savings = 0.0
# Migration mapping: old → new generation
self.ec2_migrations = {
# General Purpose
't2.micro': ('t3.micro', 0.10), # ~10% savings
't2.small': ('t3.small', 0.10),
't2.medium': ('t3.medium', 0.10),
't2.large': ('t3.large', 0.10),
't2.xlarge': ('t3.xlarge', 0.10),
't2.2xlarge': ('t3.2xlarge', 0.10),
'm4.large': ('m5.large', 0.04), # ~4% savings
'm4.xlarge': ('m5.xlarge', 0.04),
'm4.2xlarge': ('m5.2xlarge', 0.04),
'm4.4xlarge': ('m5.4xlarge', 0.04),
'm4.10xlarge': ('m5.12xlarge', 0.10),
'm4.16xlarge': ('m5.24xlarge', 0.10),
'm5.large': ('m6i.large', 0.04), # M5 → M6i
'm5.xlarge': ('m6i.xlarge', 0.04),
# Compute Optimized
'c4.large': ('c5.large', 0.10), # ~10% savings
'c4.xlarge': ('c5.xlarge', 0.10),
'c4.2xlarge': ('c5.2xlarge', 0.10),
'c4.4xlarge': ('c5.4xlarge', 0.10),
'c4.8xlarge': ('c5.9xlarge', 0.10),
'c5.large': ('c6i.large', 0.05), # C5 → C6i
'c5.xlarge': ('c6i.xlarge', 0.05),
# Memory Optimized
'r4.large': ('r5.large', 0.08), # ~8% savings
'r4.xlarge': ('r5.xlarge', 0.08),
'r4.2xlarge': ('r5.2xlarge', 0.08),
'r4.4xlarge': ('r5.4xlarge', 0.08),
'r4.8xlarge': ('r5.8xlarge', 0.08),
'r5.large': ('r6i.large', 0.03), # R5 → R6i
'r5.xlarge': ('r6i.xlarge', 0.03),
}
# Graviton migration opportunities (even better savings)
self.graviton_migrations = {
't3.micro': ('t4g.micro', 0.20), # ~20% savings
't3.small': ('t4g.small', 0.20),
't3.medium': ('t4g.medium', 0.20),
't3.large': ('t4g.large', 0.20),
't3.xlarge': ('t4g.xlarge', 0.20),
't3.2xlarge': ('t4g.2xlarge', 0.20),
'm5.large': ('m6g.large', 0.20),
'm5.xlarge': ('m6g.xlarge', 0.20),
'm5.2xlarge': ('m6g.2xlarge', 0.20),
'm5.4xlarge': ('m6g.4xlarge', 0.20),
'm6i.large': ('m6g.large', 0.20),
'm6i.xlarge': ('m6g.xlarge', 0.20),
'c5.large': ('c6g.large', 0.20),
'c5.xlarge': ('c6g.xlarge', 0.20),
'c5.2xlarge': ('c6g.2xlarge', 0.20),
'r5.large': ('r6g.large', 0.20),
'r5.xlarge': ('r6g.xlarge', 0.20),
'r5.2xlarge': ('r6g.2xlarge', 0.20),
}
# RDS instance migrations
self.rds_migrations = {
'db.t2.micro': ('db.t3.micro', 0.10),
'db.t2.small': ('db.t3.small', 0.10),
'db.t2.medium': ('db.t3.medium', 0.10),
'db.m4.large': ('db.m5.large', 0.05),
'db.m4.xlarge': ('db.m5.xlarge', 0.05),
'db.m4.2xlarge': ('db.m5.2xlarge', 0.05),
'db.r4.large': ('db.r5.large', 0.08),
'db.r4.xlarge': ('db.r5.xlarge', 0.08),
'db.r4.2xlarge': ('db.r5.2xlarge', 0.08),
}
def _get_all_regions(self) -> List[str]:
"""Get all enabled AWS regions."""
ec2 = self.session.client('ec2', region_name='us-east-1')
regions = ec2.describe_regions(AllRegions=False)
return [region['RegionName'] for region in regions['Regions']]
def _estimate_hourly_cost(self, instance_type: str) -> float:
"""Rough estimate of hourly cost."""
cost_map = {
't2.micro': 0.0116, 't2.small': 0.023, 't2.medium': 0.0464,
't3.micro': 0.0104, 't3.small': 0.0208, 't3.medium': 0.0416,
't3.large': 0.0832, 't3.xlarge': 0.1664, 't3.2xlarge': 0.3328,
't4g.micro': 0.0084, 't4g.small': 0.0168, 't4g.medium': 0.0336,
'm4.large': 0.10, 'm4.xlarge': 0.20, 'm4.2xlarge': 0.40,
'm5.large': 0.096, 'm5.xlarge': 0.192, 'm5.2xlarge': 0.384,
'm6i.large': 0.096, 'm6i.xlarge': 0.192,
'm6g.large': 0.077, 'm6g.xlarge': 0.154, 'm6g.2xlarge': 0.308,
'c4.large': 0.10, 'c4.xlarge': 0.199, 'c4.2xlarge': 0.398,
'c5.large': 0.085, 'c5.xlarge': 0.17, 'c5.2xlarge': 0.34,
'c6i.large': 0.085, 'c6i.xlarge': 0.17,
'c6g.large': 0.068, 'c6g.xlarge': 0.136, 'c6g.2xlarge': 0.272,
'r4.large': 0.133, 'r4.xlarge': 0.266, 'r4.2xlarge': 0.532,
'r5.large': 0.126, 'r5.xlarge': 0.252, 'r5.2xlarge': 0.504,
'r6i.large': 0.126, 'r6i.xlarge': 0.252,
'r6g.large': 0.101, 'r6g.xlarge': 0.202, 'r6g.2xlarge': 0.403,
}
return cost_map.get(instance_type, 0.10)
def detect_ec2_migrations(self):
"""Detect old generation EC2 instances."""
print("\n[1/3] Scanning for old generation EC2 instances...")
for region in self.regions:
try:
ec2 = self.session.client('ec2', region_name=region)
instances = ec2.describe_instances(
Filters=[{'Name': 'instance-state-name', 'Values': ['running', 'stopped']}]
)
for reservation in instances['Reservations']:
for instance in reservation['Instances']:
instance_id = instance['InstanceId']
instance_type = instance['InstanceType']
state = instance['State']['Name']
name_tag = next((tag['Value'] for tag in instance.get('Tags', [])
if tag['Key'] == 'Name'), 'N/A')
# Check for standard migration
if instance_type in self.ec2_migrations:
new_type, savings_pct = self.ec2_migrations[instance_type]
current_cost = self._estimate_hourly_cost(instance_type)
new_cost = self._estimate_hourly_cost(new_type)
monthly_savings = (current_cost - new_cost) * 730
if state == 'running':
self.total_savings += monthly_savings * 12
self.findings['ec2_migrations'].append({
'Region': region,
'Instance ID': instance_id,
'Name': name_tag,
'Current Type': instance_type,
'Recommended Type': new_type,
'State': state,
'Savings %': f"{savings_pct*100:.0f}%",
'Monthly Savings': f"${monthly_savings:.2f}",
'Migration Type': 'Standard Upgrade'
})
# Check for Graviton migration
elif instance_type in self.graviton_migrations:
new_type, savings_pct = self.graviton_migrations[instance_type]
current_cost = self._estimate_hourly_cost(instance_type)
new_cost = self._estimate_hourly_cost(new_type)
monthly_savings = (current_cost - new_cost) * 730
if state == 'running':
self.total_savings += monthly_savings * 12
self.findings['graviton_opportunities'].append({
'Region': region,
'Instance ID': instance_id,
'Name': name_tag,
'Current Type': instance_type,
'Graviton Type': new_type,
'State': state,
'Savings %': f"{savings_pct*100:.0f}%",
'Monthly Savings': f"${monthly_savings:.2f}",
'Note': 'Requires ARM64 compatibility'
})
except Exception as e:
print(f" Error scanning {region}: {str(e)}")
print(f" Found {len(self.findings['ec2_migrations'])} standard migrations")
print(f" Found {len(self.findings['graviton_opportunities'])} Graviton opportunities")
def detect_rds_migrations(self):
"""Detect old generation RDS instances."""
print("\n[2/3] Scanning for old generation RDS instances...")
for region in self.regions:
try:
rds = self.session.client('rds', region_name=region)
instances = rds.describe_db_instances()
for instance in instances['DBInstances']:
instance_id = instance['DBInstanceIdentifier']
instance_class = instance['DBInstanceClass']
engine = instance['Engine']
status = instance['DBInstanceStatus']
if instance_class in self.rds_migrations:
new_class, savings_pct = self.rds_migrations[instance_class]
# RDS pricing is roughly 2x EC2
base_type = instance_class.replace('db.', '')
current_cost = self._estimate_hourly_cost(base_type) * 2
new_cost = current_cost * (1 - savings_pct)
monthly_savings = (current_cost - new_cost) * 730
if status == 'available':
self.total_savings += monthly_savings * 12
self.findings['rds_migrations'].append({
'Region': region,
'Instance ID': instance_id,
'Engine': engine,
'Current Class': instance_class,
'Recommended Class': new_class,
'Status': status,
'Savings %': f"{savings_pct*100:.0f}%",
'Monthly Savings': f"${monthly_savings:.2f}"
})
except Exception as e:
print(f" Error scanning {region}: {str(e)}")
print(f" Found {len(self.findings['rds_migrations'])} RDS migrations")
def print_report(self):
"""Print migration recommendations report."""
print("\n" + "="*100)
print("OLD GENERATION INSTANCE MIGRATION REPORT")
print("="*100)
if self.findings['ec2_migrations']:
print("\nEC2 STANDARD MIGRATION OPPORTUNITIES")
print("-" * 100)
print(tabulate(self.findings['ec2_migrations'], headers='keys', tablefmt='grid'))
if self.findings['graviton_opportunities']:
print("\nEC2 GRAVITON (ARM64) MIGRATION OPPORTUNITIES")
print("-" * 100)
print(tabulate(self.findings['graviton_opportunities'], headers='keys', tablefmt='grid'))
print("\nNOTE: Graviton instances offer significant savings but require ARM64-compatible workloads")
print("Test thoroughly before migrating production workloads")
if self.findings['rds_migrations']:
print("\nRDS MIGRATION OPPORTUNITIES")
print("-" * 100)
print(tabulate(self.findings['rds_migrations'], headers='keys', tablefmt='grid'))
print("\n" + "="*100)
print(f"ESTIMATED ANNUAL SAVINGS: ${self.total_savings:.2f}")
print("="*100)
print("\n\nMIGRATION RECOMMENDATIONS:")
print("\nEC2 Standard Migrations (x86):")
print("- Generally drop-in replacements with better performance")
print("- Can be done with instance type change (stop/start required)")
print("- Minimal to no application changes needed")
print("\nGraviton Migrations (ARM64):")
print("- Requires ARM64-compatible applications and dependencies")
print("- Test in non-production first")
print("- Most modern languages/frameworks support ARM64")
print("- Offers best price/performance ratio")
print("\nRDS Migrations:")
print("- Requires database instance modification")
print("- Triggers brief downtime during modification")
print("- Schedule during maintenance window")
print("- Test with Multi-AZ for minimal downtime")
def run(self):
"""Run old generation detection."""
print(f"Scanning for old generation instances across {len(self.regions)} region(s)...")
self.detect_ec2_migrations()
self.detect_rds_migrations()
self.print_report()
def main():
parser = argparse.ArgumentParser(
description='Detect old generation AWS instances and recommend migrations',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Scan all regions with default profile
python3 detect_old_generations.py
# Scan specific region
python3 detect_old_generations.py --region us-east-1
# Use named profile
python3 detect_old_generations.py --profile production
"""
)
parser.add_argument('--region', help='AWS region (default: all regions)')
parser.add_argument('--profile', help='AWS profile name (default: default profile)')
args = parser.parse_args()
try:
detector = OldGenerationDetector(
profile=args.profile,
region=args.region
)
detector.run()
except Exception as e:
print(f"Error: {str(e)}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()

409
scripts/find_unused_resources.py Executable file
View File

@@ -0,0 +1,409 @@
#!/usr/bin/env python3
"""
Find unused AWS resources that are costing money.
This script identifies:
- Unattached EBS volumes
- Old EBS snapshots
- Unused Elastic IPs
- Idle NAT Gateways
- Idle EC2 instances (low utilization)
- Unattached load balancers
Usage:
python3 find_unused_resources.py [--region REGION] [--profile PROFILE] [--snapshot-age-days DAYS]
Requirements:
pip install boto3 tabulate
"""
import argparse
import boto3
from datetime import datetime, timedelta
from typing import List, Dict, Any
from tabulate import tabulate
import sys
class UnusedResourceFinder:
def __init__(self, profile: str = None, region: str = None, snapshot_age_days: int = 90):
self.session = boto3.Session(profile_name=profile) if profile else boto3.Session()
self.regions = [region] if region else self._get_all_regions()
self.snapshot_age_days = snapshot_age_days
self.findings = {
'ebs_volumes': [],
'snapshots': [],
'elastic_ips': [],
'nat_gateways': [],
'idle_instances': [],
'load_balancers': []
}
self.total_cost_estimate = 0.0
def _get_all_regions(self) -> List[str]:
"""Get all enabled AWS regions."""
ec2 = self.session.client('ec2', region_name='us-east-1')
regions = ec2.describe_regions(AllRegions=False)
return [region['RegionName'] for region in regions['Regions']]
def find_unattached_volumes(self):
"""Find unattached EBS volumes."""
print("\n[1/6] Scanning for unattached EBS volumes...")
for region in self.regions:
try:
ec2 = self.session.client('ec2', region_name=region)
volumes = ec2.describe_volumes(
Filters=[{'Name': 'status', 'Values': ['available']}]
)
for volume in volumes['Volumes']:
# Rough cost estimate: gp3 $0.08/GB/month
monthly_cost = volume['Size'] * 0.08
self.total_cost_estimate += monthly_cost
self.findings['ebs_volumes'].append({
'Region': region,
'Volume ID': volume['VolumeId'],
'Size (GB)': volume['Size'],
'Type': volume['VolumeType'],
'Created': volume['CreateTime'].strftime('%Y-%m-%d'),
'Est. Monthly Cost': f"${monthly_cost:.2f}"
})
except Exception as e:
print(f" Error scanning {region}: {str(e)}")
print(f" Found {len(self.findings['ebs_volumes'])} unattached volumes")
def find_old_snapshots(self):
"""Find old EBS snapshots."""
print(f"\n[2/6] Scanning for snapshots older than {self.snapshot_age_days} days...")
cutoff_date = datetime.now(datetime.now().astimezone().tzinfo) - timedelta(days=self.snapshot_age_days)
for region in self.regions:
try:
ec2 = self.session.client('ec2', region_name=region)
# Get account ID
sts = self.session.client('sts')
account_id = sts.get_caller_identity()['Account']
snapshots = ec2.describe_snapshots(OwnerIds=[account_id])
for snapshot in snapshots['Snapshots']:
if snapshot['StartTime'] < cutoff_date:
# Snapshot cost: $0.05/GB/month
monthly_cost = snapshot['VolumeSize'] * 0.05
self.total_cost_estimate += monthly_cost
age_days = (datetime.now(datetime.now().astimezone().tzinfo) - snapshot['StartTime']).days
self.findings['snapshots'].append({
'Region': region,
'Snapshot ID': snapshot['SnapshotId'],
'Size (GB)': snapshot['VolumeSize'],
'Age (days)': age_days,
'Created': snapshot['StartTime'].strftime('%Y-%m-%d'),
'Est. Monthly Cost': f"${monthly_cost:.2f}"
})
except Exception as e:
print(f" Error scanning {region}: {str(e)}")
print(f" Found {len(self.findings['snapshots'])} old snapshots")
def find_unused_elastic_ips(self):
"""Find unassociated Elastic IPs."""
print("\n[3/6] Scanning for unused Elastic IPs...")
for region in self.regions:
try:
ec2 = self.session.client('ec2', region_name=region)
addresses = ec2.describe_addresses()
for address in addresses['Addresses']:
if 'AssociationId' not in address:
# Unassociated EIP: ~$3.65/month
monthly_cost = 3.65
self.total_cost_estimate += monthly_cost
self.findings['elastic_ips'].append({
'Region': region,
'Allocation ID': address['AllocationId'],
'Public IP': address.get('PublicIp', 'N/A'),
'Status': 'Unassociated',
'Est. Monthly Cost': f"${monthly_cost:.2f}"
})
except Exception as e:
print(f" Error scanning {region}: {str(e)}")
print(f" Found {len(self.findings['elastic_ips'])} unused Elastic IPs")
def find_idle_nat_gateways(self):
"""Find NAT Gateways with low traffic."""
print("\n[4/6] Scanning for idle NAT Gateways...")
for region in self.regions:
try:
ec2 = self.session.client('ec2', region_name=region)
nat_gateways = ec2.describe_nat_gateways(
Filters=[{'Name': 'state', 'Values': ['available']}]
)
cloudwatch = self.session.client('cloudwatch', region_name=region)
for nat in nat_gateways['NatGateways']:
nat_id = nat['NatGatewayId']
# Check CloudWatch metrics for the last 7 days
end_time = datetime.now()
start_time = end_time - timedelta(days=7)
try:
metrics = cloudwatch.get_metric_statistics(
Namespace='AWS/NATGateway',
MetricName='BytesOutToSource',
Dimensions=[{'Name': 'NatGatewayId', 'Value': nat_id}],
StartTime=start_time,
EndTime=end_time,
Period=86400, # 1 day
Statistics=['Sum']
)
total_bytes = sum([point['Sum'] for point in metrics['Datapoints']])
avg_gb_per_day = (total_bytes / (1024**3)) / 7
# NAT Gateway: ~$32.85/month + data processing
monthly_cost = 32.85
self.total_cost_estimate += monthly_cost
# Flag as idle if less than 1GB/day average
if avg_gb_per_day < 1:
self.findings['nat_gateways'].append({
'Region': region,
'NAT Gateway ID': nat_id,
'VPC': nat.get('VpcId', 'N/A'),
'Avg Traffic (GB/day)': f"{avg_gb_per_day:.2f}",
'Status': 'Low Traffic',
'Est. Monthly Cost': f"${monthly_cost:.2f}"
})
except Exception:
# If we can't get metrics, still report the NAT Gateway
self.findings['nat_gateways'].append({
'Region': region,
'NAT Gateway ID': nat_id,
'VPC': nat.get('VpcId', 'N/A'),
'Avg Traffic (GB/day)': 'N/A',
'Status': 'Metrics Unavailable',
'Est. Monthly Cost': f"${monthly_cost:.2f}"
})
except Exception as e:
print(f" Error scanning {region}: {str(e)}")
print(f" Found {len(self.findings['nat_gateways'])} idle NAT Gateways")
def find_idle_instances(self):
"""Find EC2 instances with low CPU utilization."""
print("\n[5/6] Scanning for idle EC2 instances...")
for region in self.regions:
try:
ec2 = self.session.client('ec2', region_name=region)
cloudwatch = self.session.client('cloudwatch', region_name=region)
instances = ec2.describe_instances(
Filters=[{'Name': 'instance-state-name', 'Values': ['running']}]
)
for reservation in instances['Reservations']:
for instance in reservation['Instances']:
instance_id = instance['InstanceId']
instance_type = instance['InstanceType']
# Check CPU utilization for the last 7 days
end_time = datetime.now()
start_time = end_time - timedelta(days=7)
try:
metrics = cloudwatch.get_metric_statistics(
Namespace='AWS/EC2',
MetricName='CPUUtilization',
Dimensions=[{'Name': 'InstanceId', 'Value': instance_id}],
StartTime=start_time,
EndTime=end_time,
Period=3600, # 1 hour
Statistics=['Average']
)
if metrics['Datapoints']:
avg_cpu = sum([point['Average'] for point in metrics['Datapoints']]) / len(metrics['Datapoints'])
max_cpu = max([point['Average'] for point in metrics['Datapoints']])
# Flag instances with avg CPU < 5% and max < 15%
if avg_cpu < 5 and max_cpu < 15:
# Rough cost estimate (varies by instance type)
# This is approximate - you'd need pricing API for accuracy
monthly_cost = self._estimate_instance_cost(instance_type)
self.total_cost_estimate += monthly_cost
name_tag = next((tag['Value'] for tag in instance.get('Tags', []) if tag['Key'] == 'Name'), 'N/A')
self.findings['idle_instances'].append({
'Region': region,
'Instance ID': instance_id,
'Name': name_tag,
'Type': instance_type,
'Avg CPU (%)': f"{avg_cpu:.2f}",
'Max CPU (%)': f"{max_cpu:.2f}",
'Est. Monthly Cost': f"${monthly_cost:.2f}"
})
except Exception:
pass
except Exception as e:
print(f" Error scanning {region}: {str(e)}")
print(f" Found {len(self.findings['idle_instances'])} idle instances")
def find_unused_load_balancers(self):
"""Find load balancers with no targets."""
print("\n[6/6] Scanning for unused load balancers...")
for region in self.regions:
try:
# Check Application/Network Load Balancers
elbv2 = self.session.client('elbv2', region_name=region)
load_balancers = elbv2.describe_load_balancers()
for lb in load_balancers['LoadBalancers']:
lb_arn = lb['LoadBalancerArn']
lb_name = lb['LoadBalancerName']
lb_type = lb['Type']
# Check target groups
target_groups = elbv2.describe_target_groups(LoadBalancerArn=lb_arn)
has_healthy_targets = False
for tg in target_groups['TargetGroups']:
health = elbv2.describe_target_health(TargetGroupArn=tg['TargetGroupArn'])
if any(target['TargetHealth']['State'] == 'healthy' for target in health['TargetHealthDescriptions']):
has_healthy_targets = True
break
if not has_healthy_targets:
# ALB: ~$16.20/month, NLB: ~$22.35/month
monthly_cost = 22.35 if lb_type == 'network' else 16.20
self.total_cost_estimate += monthly_cost
self.findings['load_balancers'].append({
'Region': region,
'Name': lb_name,
'Type': lb_type.upper(),
'DNS': lb['DNSName'],
'Status': 'No Healthy Targets',
'Est. Monthly Cost': f"${monthly_cost:.2f}"
})
except Exception as e:
print(f" Error scanning {region}: {str(e)}")
print(f" Found {len(self.findings['load_balancers'])} unused load balancers")
def _estimate_instance_cost(self, instance_type: str) -> float:
"""Rough estimate of monthly instance cost (On-Demand, us-east-1)."""
# This is a simplified approximation
cost_map = {
't2': 0.0116, 't3': 0.0104, 't3a': 0.0094,
'm5': 0.096, 'm5a': 0.086, 'm6i': 0.096,
'c5': 0.085, 'c5a': 0.077, 'c6i': 0.085,
'r5': 0.126, 'r5a': 0.113, 'r6i': 0.126,
}
# Extract family (e.g., 't3' from 't3.micro')
family = instance_type.split('.')[0]
hourly_cost = cost_map.get(family, 0.10) # Default to $0.10/hour
return hourly_cost * 730 # Hours per month
def print_report(self):
"""Print findings report."""
print("\n" + "="*80)
print("AWS UNUSED RESOURCES REPORT")
print("="*80)
sections = [
('UNATTACHED EBS VOLUMES', 'ebs_volumes'),
('OLD SNAPSHOTS', 'snapshots'),
('UNUSED ELASTIC IPs', 'elastic_ips'),
('IDLE NAT GATEWAYS', 'nat_gateways'),
('IDLE EC2 INSTANCES', 'idle_instances'),
('UNUSED LOAD BALANCERS', 'load_balancers')
]
for title, key in sections:
findings = self.findings[key]
if findings:
print(f"\n{title} ({len(findings)} found)")
print("-" * 80)
print(tabulate(findings, headers='keys', tablefmt='grid'))
print("\n" + "="*80)
print(f"ESTIMATED MONTHLY SAVINGS: ${self.total_cost_estimate:.2f}")
print("="*80)
print("\nNOTE: Cost estimates are approximate. Actual savings may vary.")
print("Review each resource before deletion to avoid disrupting services.")
def run(self):
"""Run all scans."""
print(f"Scanning AWS account across {len(self.regions)} region(s)...")
print("This may take several minutes...\n")
self.find_unattached_volumes()
self.find_old_snapshots()
self.find_unused_elastic_ips()
self.find_idle_nat_gateways()
self.find_idle_instances()
self.find_unused_load_balancers()
self.print_report()
def main():
parser = argparse.ArgumentParser(
description='Find unused AWS resources that are costing money',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Scan all regions with default profile
python3 find_unused_resources.py
# Scan specific region with named profile
python3 find_unused_resources.py --region us-east-1 --profile production
# Find snapshots older than 180 days
python3 find_unused_resources.py --snapshot-age-days 180
"""
)
parser.add_argument('--region', help='AWS region (default: all regions)')
parser.add_argument('--profile', help='AWS profile name (default: default profile)')
parser.add_argument('--snapshot-age-days', type=int, default=90,
help='Snapshots older than this are flagged (default: 90)')
args = parser.parse_args()
try:
finder = UnusedResourceFinder(
profile=args.profile,
region=args.region,
snapshot_age_days=args.snapshot_age_days
)
finder.run()
except Exception as e:
print(f"Error: {str(e)}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()

387
scripts/rightsizing_analyzer.py Executable file
View File

@@ -0,0 +1,387 @@
#!/usr/bin/env python3
"""
Analyze EC2 and RDS instances for rightsizing opportunities.
This script identifies:
- Oversized EC2 instances (low CPU/memory utilization)
- Oversized RDS instances (low CPU/connection utilization)
- Recommended smaller instance types
- Potential cost savings
Usage:
python3 rightsizing_analyzer.py [--region REGION] [--profile PROFILE] [--days DAYS]
Requirements:
pip install boto3 tabulate
"""
import argparse
import boto3
from datetime import datetime, timedelta
from typing import List, Dict, Any, Optional
from tabulate import tabulate
import sys
class RightsizingAnalyzer:
def __init__(self, profile: str = None, region: str = None, days: int = 14):
self.session = boto3.Session(profile_name=profile) if profile else boto3.Session()
self.regions = [region] if region else self._get_all_regions()
self.days = days
self.findings = {
'ec2': [],
'rds': []
}
self.total_savings = 0.0
# CPU thresholds for rightsizing
self.cpu_thresholds = {
'underutilized': 15, # < 15% avg CPU
'low': 30, # < 30% avg CPU
}
def _get_all_regions(self) -> List[str]:
"""Get all enabled AWS regions."""
ec2 = self.session.client('ec2', region_name='us-east-1')
regions = ec2.describe_regions(AllRegions=False)
return [region['RegionName'] for region in regions['Regions']]
def _estimate_hourly_cost(self, instance_type: str) -> float:
"""Rough estimate of hourly cost."""
cost_map = {
't3.micro': 0.0104, 't3.small': 0.0208, 't3.medium': 0.0416,
't3.large': 0.0832, 't3.xlarge': 0.1664, 't3.2xlarge': 0.3328,
'm5.large': 0.096, 'm5.xlarge': 0.192, 'm5.2xlarge': 0.384,
'm5.4xlarge': 0.768, 'm5.8xlarge': 1.536, 'm5.12xlarge': 2.304,
'm5.16xlarge': 3.072, 'm5.24xlarge': 4.608,
'c5.large': 0.085, 'c5.xlarge': 0.17, 'c5.2xlarge': 0.34,
'c5.4xlarge': 0.68, 'c5.9xlarge': 1.53, 'c5.12xlarge': 2.04,
'c5.18xlarge': 3.06, 'c5.24xlarge': 4.08,
'r5.large': 0.126, 'r5.xlarge': 0.252, 'r5.2xlarge': 0.504,
'r5.4xlarge': 1.008, 'r5.8xlarge': 2.016, 'r5.12xlarge': 3.024,
'r5.16xlarge': 4.032, 'r5.24xlarge': 6.048,
}
if instance_type not in cost_map:
family = instance_type.split('.')[0]
family_defaults = {'t3': 0.04, 'm5': 0.20, 'c5': 0.17, 'r5': 0.25}
return family_defaults.get(family, 0.10)
return cost_map[instance_type]
def _get_smaller_instance_type(self, current_type: str) -> Optional[str]:
"""Suggest a smaller instance type."""
# Size progression within families
sizes = ['nano', 'micro', 'small', 'medium', 'large', 'xlarge', '2xlarge',
'3xlarge', '4xlarge', '8xlarge', '9xlarge', '12xlarge', '16xlarge',
'18xlarge', '24xlarge', '32xlarge']
parts = current_type.split('.')
if len(parts) != 2:
return None
family, size = parts
if size not in sizes:
return None
current_idx = sizes.index(size)
if current_idx <= 0:
return None # Already at smallest
# Go down one size
new_size = sizes[current_idx - 1]
return f"{family}.{new_size}"
def analyze_ec2_instances(self):
"""Analyze EC2 instances for rightsizing."""
print(f"\n[1/2] Analyzing EC2 instances (last {self.days} days)...")
for region in self.regions:
try:
ec2 = self.session.client('ec2', region_name=region)
cloudwatch = self.session.client('cloudwatch', region_name=region)
instances = ec2.describe_instances(
Filters=[{'Name': 'instance-state-name', 'Values': ['running']}]
)
for reservation in instances['Reservations']:
for instance in reservation['Instances']:
instance_id = instance['InstanceId']
instance_type = instance['InstanceType']
# Skip smallest instances (already optimized)
if any(size in instance_type for size in ['nano', 'micro', 'small']):
continue
name_tag = next((tag['Value'] for tag in instance.get('Tags', [])
if tag['Key'] == 'Name'), 'N/A')
# Get CloudWatch metrics
end_time = datetime.now()
start_time = end_time - timedelta(days=self.days)
try:
# CPU Utilization
cpu_metrics = cloudwatch.get_metric_statistics(
Namespace='AWS/EC2',
MetricName='CPUUtilization',
Dimensions=[{'Name': 'InstanceId', 'Value': instance_id}],
StartTime=start_time,
EndTime=end_time,
Period=3600,
Statistics=['Average', 'Maximum']
)
if not cpu_metrics['Datapoints']:
continue
avg_cpu = sum([p['Average'] for p in cpu_metrics['Datapoints']]) / len(cpu_metrics['Datapoints'])
max_cpu = max([p['Maximum'] for p in cpu_metrics['Datapoints']])
# Check if underutilized
if avg_cpu < self.cpu_thresholds['low'] and max_cpu < 60:
smaller_type = self._get_smaller_instance_type(instance_type)
if smaller_type:
current_cost = self._estimate_hourly_cost(instance_type)
new_cost = self._estimate_hourly_cost(smaller_type)
monthly_savings = (current_cost - new_cost) * 730
annual_savings = monthly_savings * 12
self.total_savings += annual_savings
# Determine severity
if avg_cpu < self.cpu_thresholds['underutilized']:
severity = "High"
else:
severity = "Medium"
self.findings['ec2'].append({
'Region': region,
'Instance ID': instance_id,
'Name': name_tag,
'Current Type': instance_type,
'Recommended Type': smaller_type,
'Avg CPU (%)': f"{avg_cpu:.1f}",
'Max CPU (%)': f"{max_cpu:.1f}",
'Monthly Savings': f"${monthly_savings:.2f}",
'Severity': severity
})
except Exception as e:
pass # Skip instances without metrics
except Exception as e:
print(f" Error scanning {region}: {str(e)}")
print(f" Found {len(self.findings['ec2'])} rightsizing opportunities")
def analyze_rds_instances(self):
"""Analyze RDS instances for rightsizing."""
print(f"\n[2/2] Analyzing RDS instances (last {self.days} days)...")
for region in self.regions:
try:
rds = self.session.client('rds', region_name=region)
cloudwatch = self.session.client('cloudwatch', region_name=region)
instances = rds.describe_db_instances()
for instance in instances['DBInstances']:
instance_id = instance['DBInstanceIdentifier']
instance_class = instance['DBInstanceClass']
engine = instance['Engine']
# Skip smallest instances
if any(size in instance_class for size in ['micro', 'small']):
continue
# Get CloudWatch metrics
end_time = datetime.now()
start_time = end_time - timedelta(days=self.days)
try:
# CPU Utilization
cpu_metrics = cloudwatch.get_metric_statistics(
Namespace='AWS/RDS',
MetricName='CPUUtilization',
Dimensions=[{'Name': 'DBInstanceIdentifier', 'Value': instance_id}],
StartTime=start_time,
EndTime=end_time,
Period=3600,
Statistics=['Average', 'Maximum']
)
# Database Connections
conn_metrics = cloudwatch.get_metric_statistics(
Namespace='AWS/RDS',
MetricName='DatabaseConnections',
Dimensions=[{'Name': 'DBInstanceIdentifier', 'Value': instance_id}],
StartTime=start_time,
EndTime=end_time,
Period=3600,
Statistics=['Average', 'Maximum']
)
if not cpu_metrics['Datapoints']:
continue
avg_cpu = sum([p['Average'] for p in cpu_metrics['Datapoints']]) / len(cpu_metrics['Datapoints'])
max_cpu = max([p['Maximum'] for p in cpu_metrics['Datapoints']])
avg_conns = 0
max_conns = 0
if conn_metrics['Datapoints']:
avg_conns = sum([p['Average'] for p in conn_metrics['Datapoints']]) / len(conn_metrics['Datapoints'])
max_conns = max([p['Maximum'] for p in conn_metrics['Datapoints']])
# Check if underutilized
if avg_cpu < self.cpu_thresholds['low'] and max_cpu < 60:
smaller_class = self._get_smaller_instance_type(instance_class)
if smaller_class:
# RDS pricing is roughly 2x EC2
base_type = instance_class.replace('db.', '')
current_cost = self._estimate_hourly_cost(base_type) * 2
new_base = smaller_class.replace('db.', '')
new_cost = self._estimate_hourly_cost(new_base) * 2
monthly_savings = (current_cost - new_cost) * 730
annual_savings = monthly_savings * 12
self.total_savings += annual_savings
# Determine severity
if avg_cpu < self.cpu_thresholds['underutilized']:
severity = "High"
else:
severity = "Medium"
self.findings['rds'].append({
'Region': region,
'Instance ID': instance_id,
'Engine': engine,
'Current Class': instance_class,
'Recommended Class': smaller_class,
'Avg CPU (%)': f"{avg_cpu:.1f}",
'Max CPU (%)': f"{max_cpu:.1f}",
'Avg Connections': f"{avg_conns:.0f}",
'Monthly Savings': f"${monthly_savings:.2f}",
'Severity': severity
})
except Exception as e:
pass # Skip instances without metrics
except Exception as e:
print(f" Error scanning {region}: {str(e)}")
print(f" Found {len(self.findings['rds'])} rightsizing opportunities")
def print_report(self):
"""Print rightsizing report."""
print("\n" + "="*110)
print("RIGHTSIZING RECOMMENDATIONS")
print("="*110)
if self.findings['ec2']:
print("\nEC2 RIGHTSIZING OPPORTUNITIES")
print("-" * 110)
sorted_ec2 = sorted(self.findings['ec2'],
key=lambda x: float(x['Monthly Savings'].replace('$', '')),
reverse=True)
print(tabulate(sorted_ec2, headers='keys', tablefmt='grid'))
if self.findings['rds']:
print("\nRDS RIGHTSIZING OPPORTUNITIES")
print("-" * 110)
sorted_rds = sorted(self.findings['rds'],
key=lambda x: float(x['Monthly Savings'].replace('$', '')),
reverse=True)
print(tabulate(sorted_rds, headers='keys', tablefmt='grid'))
print("\n" + "="*110)
print(f"TOTAL ANNUAL SAVINGS: ${self.total_savings:.2f}")
print("="*110)
print("\n\nRIGHTSIZING BEST PRACTICES:")
print("\n1. Before Rightsizing:")
print(" - Review metrics over longer period (30+ days recommended)")
print(" - Check for seasonal patterns or cyclical workloads")
print(" - Verify that current size isn't required for burst capacity")
print(" - Review application performance requirements")
print("\n2. Rightsizing Process:")
print(" - Test in non-production environment first")
print(" - Schedule during maintenance window")
print(" - EC2: Stop instance → Change type → Start")
print(" - RDS: Modify instance (causes brief downtime)")
print(" - Monitor performance after change")
print("\n3. Important Considerations:")
print(" - Some instance families can't be changed (requires new instance)")
print(" - EBS-optimized settings may change with instance type")
print(" - Network performance varies by instance size")
print(" - Consider vertical scaling limits vs horizontal scaling")
print("\n4. Alternative Approaches:")
print(" - Consider serverless options (Lambda, Fargate, Aurora Serverless)")
print(" - Use Auto Scaling to match capacity to demand")
print(" - Implement horizontal scaling instead of larger instances")
print(" - Evaluate containerization for better resource utilization")
def run(self):
"""Run rightsizing analysis."""
print(f"Analyzing AWS resources for rightsizing opportunities...")
print(f"Metrics period: {self.days} days")
print(f"Scanning {len(self.regions)} region(s)...\n")
self.analyze_ec2_instances()
self.analyze_rds_instances()
self.print_report()
def main():
parser = argparse.ArgumentParser(
description='Analyze AWS resources for rightsizing opportunities',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Analyze all regions (14 days of metrics)
python3 rightsizing_analyzer.py
# Analyze with 30 days of metrics for better accuracy
python3 rightsizing_analyzer.py --days 30
# Analyze specific region
python3 rightsizing_analyzer.py --region us-east-1
# Use named profile
python3 rightsizing_analyzer.py --profile production
"""
)
parser.add_argument('--region', help='AWS region (default: all regions)')
parser.add_argument('--profile', help='AWS profile name (default: default profile)')
parser.add_argument('--days', type=int, default=14,
help='Days of metrics to analyze (default: 14)')
args = parser.parse_args()
try:
analyzer = RightsizingAnalyzer(
profile=args.profile,
region=args.region,
days=args.days
)
analyzer.run()
except Exception as e:
print(f"Error: {str(e)}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()

281
scripts/spot_recommendations.py Executable file
View File

@@ -0,0 +1,281 @@
#!/usr/bin/env python3
"""
Analyze EC2 workloads and recommend Spot instance opportunities.
This script identifies:
- Fault-tolerant workloads suitable for Spot instances
- Potential savings from Spot vs On-Demand
- Instances in Auto Scaling Groups (good Spot candidates)
- Non-critical workloads based on tags
Usage:
python3 spot_recommendations.py [--region REGION] [--profile PROFILE]
Requirements:
pip install boto3 tabulate
"""
import argparse
import boto3
from datetime import datetime, timedelta
from typing import List, Dict, Any
from tabulate import tabulate
import sys
class SpotRecommendationAnalyzer:
def __init__(self, profile: str = None, region: str = None):
self.session = boto3.Session(profile_name=profile) if profile else boto3.Session()
self.regions = [region] if region else self._get_all_regions()
self.recommendations = []
self.total_savings = 0.0
# Average Spot savings (typically 60-90% discount)
self.spot_discount = 0.70 # Conservative 70% discount
# Tags that indicate Spot suitability
self.spot_friendly_tags = {
'Environment': ['dev', 'development', 'test', 'testing', 'staging', 'qa'],
'Workload': ['batch', 'processing', 'worker', 'ci', 'build'],
'CriticalLevel': ['low', 'non-critical', 'noncritical']
}
def _get_all_regions(self) -> List[str]:
"""Get all enabled AWS regions."""
ec2 = self.session.client('ec2', region_name='us-east-1')
regions = ec2.describe_regions(AllRegions=False)
return [region['RegionName'] for region in regions['Regions']]
def _estimate_hourly_cost(self, instance_type: str) -> float:
"""Rough estimate of hourly cost."""
cost_map = {
't3.micro': 0.0104, 't3.small': 0.0208, 't3.medium': 0.0416,
't3.large': 0.0832, 't3.xlarge': 0.1664, 't3.2xlarge': 0.3328,
'm5.large': 0.096, 'm5.xlarge': 0.192, 'm5.2xlarge': 0.384,
'm5.4xlarge': 0.768, 'm5.8xlarge': 1.536,
'c5.large': 0.085, 'c5.xlarge': 0.17, 'c5.2xlarge': 0.34,
'c5.4xlarge': 0.68, 'c5.9xlarge': 1.53,
'r5.large': 0.126, 'r5.xlarge': 0.252, 'r5.2xlarge': 0.504,
'r5.4xlarge': 1.008, 'r5.8xlarge': 2.016,
}
# Default fallback
if instance_type not in cost_map:
family = instance_type.split('.')[0]
family_defaults = {'t3': 0.04, 'm5': 0.10, 'c5': 0.09, 'r5': 0.13}
return family_defaults.get(family, 0.10)
return cost_map[instance_type]
def _calculate_suitability_score(self, instance: Dict, asg_member: bool) -> tuple:
"""Calculate Spot suitability score (0-100) and reasons."""
score = 0
reasons = []
# Check if in Auto Scaling Group (high suitability)
if asg_member:
score += 40
reasons.append("Part of Auto Scaling Group")
# Check tags for environment/workload type
tags = {tag['Key']: tag['Value'].lower() for tag in instance.get('Tags', [])}
for key, spot_values in self.spot_friendly_tags.items():
if key in tags and tags[key] in spot_values:
score += 20
reasons.append(f"{key}={tags[key]}")
# Check instance age (older instances might be more stable)
launch_time = instance['LaunchTime']
days_running = (datetime.now(launch_time.tzinfo) - launch_time).days
if days_running > 30:
score += 10
reasons.append(f"Running {days_running} days (stable)")
# Check instance size (smaller instances have better Spot availability)
instance_type = instance['InstanceType']
if any(size in instance_type for size in ['micro', 'small', 'medium', 'large']):
score += 15
reasons.append("Standard size (good Spot availability)")
# Default baseline
if not reasons:
score = 30
reasons.append("General compute workload")
return min(score, 100), reasons
def analyze_instances(self):
"""Analyze EC2 instances for Spot opportunities."""
print(f"\nAnalyzing EC2 instances across {len(self.regions)} region(s)...")
for region in self.regions:
try:
ec2 = self.session.client('ec2', region_name=region)
autoscaling = self.session.client('autoscaling', region_name=region)
# Get all Auto Scaling Groups
asg_instances = set()
try:
asgs = autoscaling.describe_auto_scaling_groups()
for asg in asgs['AutoScalingGroups']:
for instance in asg['Instances']:
asg_instances.add(instance['InstanceId'])
except Exception:
pass
# Get all running On-Demand instances
instances = ec2.describe_instances(
Filters=[
{'Name': 'instance-state-name', 'Values': ['running']},
{'Name': 'instance-lifecycle', 'Values': ['on-demand', 'scheduled']}
]
)
for reservation in instances['Reservations']:
for instance in reservation['Instances']:
instance_id = instance['InstanceId']
instance_type = instance['InstanceType']
asg_member = instance_id in asg_instances
# Calculate suitability
score, reasons = self._calculate_suitability_score(instance, asg_member)
# Calculate savings
hourly_cost = self._estimate_hourly_cost(instance_type)
monthly_savings = hourly_cost * 730 * self.spot_discount
annual_savings = monthly_savings * 12
self.total_savings += annual_savings
# Get instance name
name_tag = next((tag['Value'] for tag in instance.get('Tags', [])
if tag['Key'] == 'Name'), 'N/A')
# Determine recommendation
if score >= 70:
recommendation = "Highly Recommended"
elif score >= 50:
recommendation = "Recommended"
elif score >= 30:
recommendation = "Consider (with caution)"
else:
recommendation = "Not Recommended"
self.recommendations.append({
'Region': region,
'Instance ID': instance_id,
'Name': name_tag,
'Type': instance_type,
'In ASG': 'Yes' if asg_member else 'No',
'Suitability Score': f"{score}/100",
'Monthly Savings': f"${monthly_savings:.2f}",
'Recommendation': recommendation,
'Reasons': ', '.join(reasons[:2]) # Show top 2 reasons
})
except Exception as e:
print(f" Error scanning {region}: {str(e)}")
print(f" Analyzed {len(self.recommendations)} instances")
def print_report(self):
"""Print Spot recommendations report."""
print("\n" + "="*120)
print("SPOT INSTANCE RECOMMENDATIONS")
print("="*120)
# Sort by suitability score (descending)
sorted_recs = sorted(self.recommendations,
key=lambda x: int(x['Suitability Score'].split('/')[0]),
reverse=True)
if sorted_recs:
print(tabulate(sorted_recs, headers='keys', tablefmt='grid'))
print("\n" + "="*120)
print(f"TOTAL ANNUAL SAVINGS POTENTIAL: ${self.total_savings:.2f}")
print(f"(Assumes {int(self.spot_discount*100)}% average Spot discount)")
print("="*120)
print("\n\nSPOT INSTANCE BEST PRACTICES:")
print("\n1. Use Spot Instances for:")
print(" - Stateless applications")
print(" - Batch processing jobs")
print(" - CI/CD and build servers")
print(" - Data analysis and processing")
print(" - Dev/test/staging environments")
print(" - Auto Scaling Groups with mixed instance types")
print("\n2. Do NOT use Spot Instances for:")
print(" - Databases without replicas")
print(" - Stateful applications without checkpointing")
print(" - Real-time, latency-sensitive services")
print(" - Applications that can't handle interruptions")
print("\n3. Spot Best Practices:")
print(" - Use Spot Fleet or Auto Scaling Groups with Spot")
print(" - Diversify across multiple instance types")
print(" - Implement graceful shutdown handlers (2-minute warning)")
print(" - Use Spot Instance interruption notices")
print(" - Consider Spot + On-Demand mix (e.g., 70/30)")
print(" - Set appropriate max price (typically On-Demand price)")
print("\n4. Implementation Steps:")
print(" - Test Spot behavior in non-production first")
print(" - Implement interruption handling in your application")
print(" - Use EC2 Fleet or Auto Scaling with mixed instances policy")
print(" - Monitor Spot interruption rates")
print(" - Set up CloudWatch alarms for Spot terminations")
print("\n5. Tools to Use:")
print(" - EC2 Spot Instance Advisor (check interruption rates)")
print(" - Auto Scaling Groups with mixed instances policy")
print(" - Spot Fleet for diverse instance type selection")
print(" - AWS Spot Instances best practices guide")
def run(self):
"""Run Spot analysis."""
print("="*80)
print("AWS SPOT INSTANCE OPPORTUNITY ANALYZER")
print("="*80)
self.analyze_instances()
self.print_report()
def main():
parser = argparse.ArgumentParser(
description='Analyze EC2 workloads for Spot instance opportunities',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Analyze all regions with default profile
python3 spot_recommendations.py
# Analyze specific region
python3 spot_recommendations.py --region us-east-1
# Use named profile
python3 spot_recommendations.py --profile production
"""
)
parser.add_argument('--region', help='AWS region (default: all regions)')
parser.add_argument('--profile', help='AWS profile name (default: default profile)')
args = parser.parse_args()
try:
analyzer = SpotRecommendationAnalyzer(
profile=args.profile,
region=args.region
)
analyzer.run()
except Exception as e:
print(f"Error: {str(e)}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()