#!/usr/bin/env python3 """ Analyze metrics from Prometheus or CloudWatch and detect anomalies. Supports: rate of change analysis, spike detection, trend analysis. """ import argparse import sys import json from datetime import datetime, timedelta from typing import Dict, List, Any, Optional import statistics try: import requests except ImportError: print("⚠️ Warning: 'requests' library not found. Install with: pip install requests") sys.exit(1) try: import boto3 except ImportError: boto3 = None class MetricAnalyzer: def __init__(self, source: str, endpoint: Optional[str] = None, region: str = "us-east-1"): self.source = source self.endpoint = endpoint self.region = region if source == "cloudwatch" and boto3: self.cloudwatch = boto3.client('cloudwatch', region_name=region) elif source == "cloudwatch" and not boto3: print("⚠️ boto3 not installed. Install with: pip install boto3") sys.exit(1) def query_prometheus(self, query: str, hours: int = 24) -> List[Dict]: """Query Prometheus for metric data.""" if not self.endpoint: print("❌ Prometheus endpoint required") sys.exit(1) try: # Query range for last N hours end_time = datetime.now() start_time = end_time - timedelta(hours=hours) params = { 'query': query, 'start': start_time.timestamp(), 'end': end_time.timestamp(), 'step': '5m' # 5-minute resolution } response = requests.get(f"{self.endpoint}/api/v1/query_range", params=params, timeout=30) response.raise_for_status() data = response.json() if data['status'] != 'success': print(f"❌ Prometheus query failed: {data}") return [] return data['data']['result'] except Exception as e: print(f"❌ Error querying Prometheus: {e}") return [] def query_cloudwatch(self, namespace: str, metric_name: str, dimensions: Dict[str, str], hours: int = 24, stat: str = "Average") -> List[Dict]: """Query CloudWatch for metric data.""" try: end_time = datetime.now() start_time = end_time - timedelta(hours=hours) dimensions_list = [{'Name': k, 'Value': v} for k, v in dimensions.items()] response = self.cloudwatch.get_metric_statistics( Namespace=namespace, MetricName=metric_name, Dimensions=dimensions_list, StartTime=start_time, EndTime=end_time, Period=300, # 5-minute intervals Statistics=[stat] ) return sorted(response['Datapoints'], key=lambda x: x['Timestamp']) except Exception as e: print(f"❌ Error querying CloudWatch: {e}") return [] def detect_anomalies(self, values: List[float], sensitivity: float = 2.0) -> Dict[str, Any]: """Detect anomalies using standard deviation method.""" if len(values) < 10: return { "anomalies_detected": False, "message": "Insufficient data points for anomaly detection" } mean = statistics.mean(values) stdev = statistics.stdev(values) threshold_upper = mean + (sensitivity * stdev) threshold_lower = mean - (sensitivity * stdev) anomalies = [] for i, value in enumerate(values): if value > threshold_upper or value < threshold_lower: anomalies.append({ "index": i, "value": value, "deviation": abs(value - mean) / stdev if stdev > 0 else 0 }) return { "anomalies_detected": len(anomalies) > 0, "count": len(anomalies), "anomalies": anomalies, "stats": { "mean": mean, "stdev": stdev, "threshold_upper": threshold_upper, "threshold_lower": threshold_lower, "total_points": len(values) } } def analyze_trend(self, values: List[float]) -> Dict[str, Any]: """Analyze trend using simple linear regression.""" if len(values) < 2: return {"trend": "unknown", "message": "Insufficient data"} n = len(values) x = list(range(n)) x_mean = sum(x) / n y_mean = sum(values) / n numerator = sum((x[i] - x_mean) * (values[i] - y_mean) for i in range(n)) denominator = sum((x[i] - x_mean) ** 2 for i in range(n)) if denominator == 0: return {"trend": "flat", "slope": 0} slope = numerator / denominator # Determine trend direction if abs(slope) < 0.01 * y_mean: # Less than 1% change per interval trend = "stable" elif slope > 0: trend = "increasing" else: trend = "decreasing" return { "trend": trend, "slope": slope, "rate_of_change": (slope / y_mean * 100) if y_mean != 0 else 0 } def print_results(results: Dict[str, Any]): """Pretty print analysis results.""" print("\n" + "="*60) print("📊 METRIC ANALYSIS RESULTS") print("="*60) if "error" in results: print(f"\n❌ Error: {results['error']}") return print(f"\n📈 Data Points: {results.get('data_points', 0)}") # Trend analysis if "trend" in results: trend_emoji = {"increasing": "📈", "decreasing": "📉", "stable": "➡️"}.get(results["trend"]["trend"], "❓") print(f"\n{trend_emoji} Trend: {results['trend']['trend'].upper()}") if "rate_of_change" in results["trend"]: print(f" Rate of Change: {results['trend']['rate_of_change']:.2f}% per interval") # Anomaly detection if "anomalies" in results: anomaly_data = results["anomalies"] if anomaly_data["anomalies_detected"]: print(f"\n⚠️ ANOMALIES DETECTED: {anomaly_data['count']}") print(f" Mean: {anomaly_data['stats']['mean']:.2f}") print(f" Std Dev: {anomaly_data['stats']['stdev']:.2f}") print(f" Threshold: [{anomaly_data['stats']['threshold_lower']:.2f}, {anomaly_data['stats']['threshold_upper']:.2f}]") print("\n Top Anomalies:") for anomaly in sorted(anomaly_data['anomalies'], key=lambda x: x['deviation'], reverse=True)[:5]: print(f" • Index {anomaly['index']}: {anomaly['value']:.2f} ({anomaly['deviation']:.2f}σ)") else: print("\n✅ No anomalies detected") print("\n" + "="*60) def main(): parser = argparse.ArgumentParser( description="Analyze metrics from Prometheus or CloudWatch", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Prometheus: Analyze request rate python3 analyze_metrics.py prometheus \\ --endpoint http://localhost:9090 \\ --query 'rate(http_requests_total[5m])' \\ --hours 24 # CloudWatch: Analyze CPU utilization python3 analyze_metrics.py cloudwatch \\ --namespace AWS/EC2 \\ --metric CPUUtilization \\ --dimensions InstanceId=i-1234567890abcdef0 \\ --hours 48 """ ) parser.add_argument('source', choices=['prometheus', 'cloudwatch'], help='Metric source') parser.add_argument('--endpoint', help='Prometheus endpoint URL') parser.add_argument('--query', help='PromQL query') parser.add_argument('--namespace', help='CloudWatch namespace') parser.add_argument('--metric', help='CloudWatch metric name') parser.add_argument('--dimensions', help='CloudWatch dimensions (key=value,key2=value2)') parser.add_argument('--hours', type=int, default=24, help='Hours of data to analyze (default: 24)') parser.add_argument('--sensitivity', type=float, default=2.0, help='Anomaly detection sensitivity (std deviations, default: 2.0)') parser.add_argument('--region', default='us-east-1', help='AWS region (default: us-east-1)') args = parser.parse_args() analyzer = MetricAnalyzer(args.source, args.endpoint, args.region) # Query metrics if args.source == 'prometheus': if not args.query: print("❌ --query required for Prometheus") sys.exit(1) print(f"🔍 Querying Prometheus: {args.query}") results = analyzer.query_prometheus(args.query, args.hours) if not results: print("❌ No data returned") sys.exit(1) # Extract values from first result series values = [float(v[1]) for v in results[0].get('values', [])] elif args.source == 'cloudwatch': if not all([args.namespace, args.metric, args.dimensions]): print("❌ --namespace, --metric, and --dimensions required for CloudWatch") sys.exit(1) dims = dict(item.split('=') for item in args.dimensions.split(',')) print(f"🔍 Querying CloudWatch: {args.namespace}/{args.metric}") results = analyzer.query_cloudwatch(args.namespace, args.metric, dims, args.hours) if not results: print("❌ No data returned") sys.exit(1) values = [point['Average'] for point in results] # Analyze metrics analysis_results = { "data_points": len(values), "trend": analyzer.analyze_trend(values), "anomalies": analyzer.detect_anomalies(values, args.sensitivity) } print_results(analysis_results) if __name__ == "__main__": main()