118 lines
3.8 KiB
Python
Executable File
118 lines
3.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Analyze Prometheus data from must-gather data.
|
|
Shows Prometheus status, targets, and active alerts.
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import json
|
|
import argparse
|
|
from pathlib import Path
|
|
from typing import List, Dict, Any, Optional
|
|
|
|
def parse_json_file(file_path: Path) -> Optional[Dict[str, Any]]:
|
|
"""Parse a JSON file."""
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
doc = json.load(f)
|
|
return doc
|
|
except (FileNotFoundError, json.JSONDecodeError, OSError) as e:
|
|
print(f"Error: Failed to parse {file_path}: {e}", file=sys.stderr)
|
|
return None
|
|
|
|
def print_alerts_table(alerts):
|
|
"""Print alerts in a table format."""
|
|
if not alerts:
|
|
print("No alerts found.")
|
|
return
|
|
|
|
print("ALERTS")
|
|
print(f"{'STATE':<10} {'NAMESPACE':<50} {'NAME':<50} {'SEVERITY':<10} {'SINCE':<20} LABELS")
|
|
|
|
for alert in alerts:
|
|
state = alert.get('state', '')
|
|
since = alert.get('activeAt', '')[:19] + 'Z' # timestamps are always UTC.
|
|
labels = alert.get('labels', {})
|
|
namespace = labels.pop('namespace', '')[:50]
|
|
name = labels.pop('alertname', '')[:50]
|
|
severity = labels.pop('severity', '')[:10]
|
|
|
|
print(f"{state:<10} {namespace:<50} {name:<50} {severity:<10} {since:<20} {labels}")
|
|
|
|
|
|
def analyze_prometheus(must_gather_path: str, namespace: Optional[str] = None):
|
|
"""Analyze Prometheus data in a must-gather directory."""
|
|
base_path = Path(must_gather_path)
|
|
|
|
# Retrieve active alerts.
|
|
rules_path = base_path / "monitoring" / "prometheus" / "rules.json"
|
|
rules = parse_json_file(rules_path)
|
|
|
|
if rules is None:
|
|
return 1
|
|
status = rules.get("status", "")
|
|
if status != "success":
|
|
print(f"{rules_path}: unexpected status {status}", file=sys.stderr)
|
|
return 1
|
|
|
|
if "data" not in rules or "groups" not in rules["data"]:
|
|
print(f"Error: Unexpected JSON structure in {rules_path}", file=sys.stderr)
|
|
return 1
|
|
|
|
alerts = []
|
|
for group in rules["data"]["groups"]:
|
|
for rule in group["rules"]:
|
|
if rule["type"] == 'alerting' and rule["state"] != 'inactive':
|
|
for alert in rule["alerts"]:
|
|
if namespace is None or namespace == '':
|
|
alerts.append(alert)
|
|
elif alert.get('labels', {}).get('namespace', '') == namespace:
|
|
alerts.append(alert)
|
|
|
|
# Sort alerts by namespace, alertname and severity.
|
|
alerts.sort(key=lambda x: (x.get('labels', {}).get('namespace', ''), x.get('labels', {}).get('alertname', ''), x.get('labels', {}).get('severity', '')))
|
|
|
|
# Print results
|
|
print_alerts_table(alerts)
|
|
|
|
# Summary
|
|
total_alerts = len(alerts)
|
|
pending = sum(1 for alert in alerts if alert.get('state') == 'pending')
|
|
firing = sum(1 for alert in alerts if alert.get('state') == 'firing')
|
|
|
|
print(f"\n{'='*80}")
|
|
print(f"SUMMARY")
|
|
print(f"Active alerts: {total_alerts} total ({pending} pending, {firing} firing)")
|
|
print(f"{'='*80}")
|
|
|
|
return 0
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Analyze Prometheus data from must-gather data',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
%(prog)s ./must-gather
|
|
%(prog)s ./must-gather --namespace openshift-monitoring
|
|
"""
|
|
)
|
|
|
|
parser.add_argument('must_gather_path', help='Path to must-gather directory')
|
|
parser.add_argument('-n', '--namespace', help='Filter information by namespace')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not os.path.isdir(args.must_gather_path):
|
|
print(f"Error: Directory not found: {args.must_gather_path}", file=sys.stderr)
|
|
return 1
|
|
|
|
return analyze_prometheus(args.must_gather_path, args.namespace)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|
|
|