Initial commit
This commit is contained in:
117
skills/must-gather-analyzer/scripts/analyze_prometheus.py
Executable file
117
skills/must-gather-analyzer/scripts/analyze_prometheus.py
Executable file
@@ -0,0 +1,117 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Analyze Prometheus data from must-gather data.
|
||||
Shows Prometheus status, targets, and active alerts.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
def parse_json_file(file_path: Path) -> Optional[Dict[str, Any]]:
|
||||
"""Parse a JSON file."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
doc = json.load(f)
|
||||
return doc
|
||||
except (FileNotFoundError, json.JSONDecodeError, OSError) as e:
|
||||
print(f"Error: Failed to parse {file_path}: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def print_alerts_table(alerts):
|
||||
"""Print alerts in a table format."""
|
||||
if not alerts:
|
||||
print("No alerts found.")
|
||||
return
|
||||
|
||||
print("ALERTS")
|
||||
print(f"{'STATE':<10} {'NAMESPACE':<50} {'NAME':<50} {'SEVERITY':<10} {'SINCE':<20} LABELS")
|
||||
|
||||
for alert in alerts:
|
||||
state = alert.get('state', '')
|
||||
since = alert.get('activeAt', '')[:19] + 'Z' # timestamps are always UTC.
|
||||
labels = alert.get('labels', {})
|
||||
namespace = labels.pop('namespace', '')[:50]
|
||||
name = labels.pop('alertname', '')[:50]
|
||||
severity = labels.pop('severity', '')[:10]
|
||||
|
||||
print(f"{state:<10} {namespace:<50} {name:<50} {severity:<10} {since:<20} {labels}")
|
||||
|
||||
|
||||
def analyze_prometheus(must_gather_path: str, namespace: Optional[str] = None):
|
||||
"""Analyze Prometheus data in a must-gather directory."""
|
||||
base_path = Path(must_gather_path)
|
||||
|
||||
# Retrieve active alerts.
|
||||
rules_path = base_path / "monitoring" / "prometheus" / "rules.json"
|
||||
rules = parse_json_file(rules_path)
|
||||
|
||||
if rules is None:
|
||||
return 1
|
||||
status = rules.get("status", "")
|
||||
if status != "success":
|
||||
print(f"{rules_path}: unexpected status {status}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if "data" not in rules or "groups" not in rules["data"]:
|
||||
print(f"Error: Unexpected JSON structure in {rules_path}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
alerts = []
|
||||
for group in rules["data"]["groups"]:
|
||||
for rule in group["rules"]:
|
||||
if rule["type"] == 'alerting' and rule["state"] != 'inactive':
|
||||
for alert in rule["alerts"]:
|
||||
if namespace is None or namespace == '':
|
||||
alerts.append(alert)
|
||||
elif alert.get('labels', {}).get('namespace', '') == namespace:
|
||||
alerts.append(alert)
|
||||
|
||||
# Sort alerts by namespace, alertname and severity.
|
||||
alerts.sort(key=lambda x: (x.get('labels', {}).get('namespace', ''), x.get('labels', {}).get('alertname', ''), x.get('labels', {}).get('severity', '')))
|
||||
|
||||
# Print results
|
||||
print_alerts_table(alerts)
|
||||
|
||||
# Summary
|
||||
total_alerts = len(alerts)
|
||||
pending = sum(1 for alert in alerts if alert.get('state') == 'pending')
|
||||
firing = sum(1 for alert in alerts if alert.get('state') == 'firing')
|
||||
|
||||
print(f"\n{'='*80}")
|
||||
print(f"SUMMARY")
|
||||
print(f"Active alerts: {total_alerts} total ({pending} pending, {firing} firing)")
|
||||
print(f"{'='*80}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Analyze Prometheus data from must-gather data',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
%(prog)s ./must-gather
|
||||
%(prog)s ./must-gather --namespace openshift-monitoring
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('must_gather_path', help='Path to must-gather directory')
|
||||
parser.add_argument('-n', '--namespace', help='Filter information by namespace')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.isdir(args.must_gather_path):
|
||||
print(f"Error: Directory not found: {args.must_gather_path}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
return analyze_prometheus(args.must_gather_path, args.namespace)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
||||
Reference in New Issue
Block a user