Files
gh-openshift-eng-ai-helpers…/skills/must-gather-analyzer/scripts/analyze_etcd.py
2025-11-30 08:46:06 +08:00

207 lines
6.8 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Analyze etcd information from must-gather data.
Shows etcd cluster health, member status, and diagnostics.
"""
import sys
import os
import json
from pathlib import Path
from typing import Dict, Any, List, Optional
def parse_etcd_info(must_gather_path: Path) -> Dict[str, Any]:
"""Parse etcd_info directory for cluster health information."""
etcd_data = {
'member_health': [],
'member_list': [],
'endpoint_health': [],
'endpoint_status': []
}
# Find etcd_info directory
etcd_dirs = list(must_gather_path.glob("etcd_info")) + \
list(must_gather_path.glob("*/etcd_info"))
if not etcd_dirs:
return etcd_data
etcd_info_dir = etcd_dirs[0]
# Parse member health
member_health_file = etcd_info_dir / "endpoint_health.json"
if member_health_file.exists():
try:
with open(member_health_file, 'r') as f:
data = json.load(f)
etcd_data['member_health'] = data if isinstance(data, list) else [data]
except Exception as e:
print(f"Warning: Failed to parse endpoint_health.json: {e}", file=sys.stderr)
# Parse member list
member_list_file = etcd_info_dir / "member_list.json"
if member_list_file.exists():
try:
with open(member_list_file, 'r') as f:
data = json.load(f)
if isinstance(data, dict) and 'members' in data:
etcd_data['member_list'] = data['members']
elif isinstance(data, list):
etcd_data['member_list'] = data
except Exception as e:
print(f"Warning: Failed to parse member_list.json: {e}", file=sys.stderr)
# Parse endpoint health
endpoint_health_file = etcd_info_dir / "endpoint_health.json"
if endpoint_health_file.exists():
try:
with open(endpoint_health_file, 'r') as f:
data = json.load(f)
etcd_data['endpoint_health'] = data if isinstance(data, list) else [data]
except Exception as e:
print(f"Warning: Failed to parse endpoint_health.json: {e}", file=sys.stderr)
# Parse endpoint status
endpoint_status_file = etcd_info_dir / "endpoint_status.json"
if endpoint_status_file.exists():
try:
with open(endpoint_status_file, 'r') as f:
data = json.load(f)
etcd_data['endpoint_status'] = data if isinstance(data, list) else [data]
except Exception as e:
print(f"Warning: Failed to parse endpoint_status.json: {e}", file=sys.stderr)
return etcd_data
def print_member_health(members: List[Dict[str, Any]]):
"""Print etcd member health status."""
if not members:
print("No member health data found.")
return
print("ETCD MEMBER HEALTH")
print(f"{'ENDPOINT':<60} {'HEALTH':<10} {'TOOK':<10} ERROR")
for member in members:
endpoint = member.get('endpoint', 'unknown')[:60]
health = 'true' if member.get('health') else 'false'
took = member.get('took', '')
error = member.get('error', '')
print(f"{endpoint:<60} {health:<10} {took:<10} {error}")
def print_member_list(members: List[Dict[str, Any]]):
"""Print etcd member list."""
if not members:
print("\nNo member list data found.")
return
print("\nETCD MEMBER LIST")
print(f"{'ID':<20} {'NAME':<40} {'PEER URLS':<60} {'CLIENT URLS':<60}")
for member in members:
member_id = str(member.get('ID', member.get('id', 'unknown')))[:20]
name = member.get('name', 'unknown')[:40]
peer_urls = ','.join(member.get('peerURLs', []))[:60]
client_urls = ','.join(member.get('clientURLs', []))[:60]
print(f"{member_id:<20} {name:<40} {peer_urls:<60} {client_urls:<60}")
def print_endpoint_status(endpoints: List[Dict[str, Any]]):
"""Print etcd endpoint status."""
if not endpoints:
print("\nNo endpoint status data found.")
return
print("\nETCD ENDPOINT STATUS")
print(f"{'ENDPOINT':<60} {'LEADER':<20} {'VERSION':<10} {'DB SIZE':<10} {'IS LEARNER'}")
for endpoint in endpoints:
ep = endpoint.get('Endpoint', 'unknown')[:60]
status = endpoint.get('Status', {})
leader = str(status.get('leader', 'unknown'))[:20]
version = status.get('version', 'unknown')[:10]
db_size = status.get('dbSize', 0)
db_size_mb = f"{db_size / (1024*1024):.1f}MB" if db_size else '0MB'
is_learner = 'true' if status.get('isLearner') else 'false'
print(f"{ep:<60} {leader:<20} {version:<10} {db_size_mb:<10} {is_learner}")
def print_summary(etcd_data: Dict[str, Any]):
"""Print summary of etcd cluster health."""
member_health = etcd_data.get('member_health', [])
member_list = etcd_data.get('member_list', [])
total_members = len(member_list)
healthy_members = sum(1 for m in member_health if m.get('health'))
print(f"\n{'='*80}")
print(f"ETCD CLUSTER SUMMARY")
print(f"{'='*80}")
print(f"Total Members: {total_members}")
print(f"Healthy Members: {healthy_members}/{len(member_health) if member_health else total_members}")
if healthy_members < total_members:
print(f" ⚠️ Warning: Not all members are healthy!")
elif healthy_members == total_members and total_members > 0:
print(f" ✅ All members healthy")
# Check for quorum
if total_members >= 3:
quorum = (total_members // 2) + 1
if healthy_members >= quorum:
print(f" ✅ Quorum achieved ({healthy_members}/{quorum})")
else:
print(f" ❌ Quorum lost! ({healthy_members}/{quorum})")
print(f"{'='*80}\n")
def analyze_etcd(must_gather_path: str):
"""Analyze etcd information in a must-gather directory."""
base_path = Path(must_gather_path)
etcd_data = parse_etcd_info(base_path)
if not any(etcd_data.values()):
print("No etcd_info data found in must-gather.")
print("Expected location: etcd_info/ directory")
return 1
# Print summary first
print_summary(etcd_data)
# Print detailed information
print_member_health(etcd_data.get('member_health', []))
print_member_list(etcd_data.get('member_list', []))
print_endpoint_status(etcd_data.get('endpoint_status', []))
return 0
def main():
if len(sys.argv) < 2:
print("Usage: analyze_etcd.py <must-gather-directory>", file=sys.stderr)
print("\nExample:", file=sys.stderr)
print(" analyze_etcd.py ./must-gather.local.123456789", file=sys.stderr)
return 1
must_gather_path = sys.argv[1]
if not os.path.isdir(must_gather_path):
print(f"Error: Directory not found: {must_gather_path}", file=sys.stderr)
return 1
return analyze_etcd(must_gather_path)
if __name__ == '__main__':
sys.exit(main())