From cf9da0685054e5cf8a052d6e870e0cb5fb7a67ad Mon Sep 17 00:00:00 2001 From: Zhongwei Li Date: Sun, 30 Nov 2025 08:46:06 +0800 Subject: [PATCH] Initial commit --- .claude-plugin/plugin.json | 14 + README.md | 3 + commands/analyze.md | 262 +++++++++++ commands/ovn-dbs.md | 266 +++++++++++ plugin.lock.json | 93 ++++ skills/must-gather-analyzer/SKILL.md | 285 +++++++++++ .../scripts/analyze_clusteroperators.py | 199 ++++++++ .../scripts/analyze_clusterversion.py | 261 ++++++++++ .../scripts/analyze_etcd.py | 206 ++++++++ .../scripts/analyze_events.py | 201 ++++++++ .../scripts/analyze_network.py | 281 +++++++++++ .../scripts/analyze_nodes.py | 224 +++++++++ .../scripts/analyze_ovn_dbs.py | 444 ++++++++++++++++++ .../scripts/analyze_pods.py | 224 +++++++++ .../scripts/analyze_prometheus.py | 117 +++++ .../scripts/analyze_pvs.py | 235 +++++++++ 16 files changed, 3315 insertions(+) create mode 100644 .claude-plugin/plugin.json create mode 100644 README.md create mode 100644 commands/analyze.md create mode 100644 commands/ovn-dbs.md create mode 100644 plugin.lock.json create mode 100644 skills/must-gather-analyzer/SKILL.md create mode 100755 skills/must-gather-analyzer/scripts/analyze_clusteroperators.py create mode 100755 skills/must-gather-analyzer/scripts/analyze_clusterversion.py create mode 100755 skills/must-gather-analyzer/scripts/analyze_etcd.py create mode 100755 skills/must-gather-analyzer/scripts/analyze_events.py create mode 100755 skills/must-gather-analyzer/scripts/analyze_network.py create mode 100755 skills/must-gather-analyzer/scripts/analyze_nodes.py create mode 100755 skills/must-gather-analyzer/scripts/analyze_ovn_dbs.py create mode 100755 skills/must-gather-analyzer/scripts/analyze_pods.py create mode 100755 skills/must-gather-analyzer/scripts/analyze_prometheus.py create mode 100755 skills/must-gather-analyzer/scripts/analyze_pvs.py diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..b8e6f04 --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,14 @@ +{ + "name": "must-gather", + "description": "A plugin to analyze and report on must-gather data", + "version": "0.0.1", + "author": { + "name": "openshift" + }, + "skills": [ + "./skills" + ], + "commands": [ + "./commands" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2b9f5a7 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# must-gather + +A plugin to analyze and report on must-gather data diff --git a/commands/analyze.md b/commands/analyze.md new file mode 100644 index 0000000..1a54cea --- /dev/null +++ b/commands/analyze.md @@ -0,0 +1,262 @@ +--- +description: Quick analysis of must-gather data - runs all analysis scripts and provides comprehensive cluster diagnostics +argument-hint: [must-gather-path] [component] +--- + +## Name +must-gather:analyze + +## Synopsis +``` +/must-gather:analyze [must-gather-path] [component] +``` + +## Description + +The `analyze` command performs comprehensive analysis of OpenShift must-gather diagnostic data. It runs specialized Python analysis scripts to extract and summarize cluster health information across multiple components. + +The command can analyze: +- Cluster version and update status +- Cluster operator health (degraded, progressing, unavailable) +- Node conditions and resource status +- Pod failures, restarts, and crash loops +- Network configuration and OVN health +- OVN databases - logical topology, ACLs, pods +- Kubernetes events (warnings and errors) +- etcd cluster health and quorum status +- Persistent volume and claim status +- Prometheus alerts + +You can request analysis of the entire cluster or focus on a specific component. + +## Prerequisites + +**Required Directory Structure:** + +Must-gather data typically has this structure: +``` +must-gather/ +└── registry-ci-openshift-org-origin-...-sha256-/ + ├── cluster-scoped-resources/ + ├── namespaces/ + └── ... +``` + +The actual must-gather directory is the subdirectory with the hash name, not the parent directory. + +**Required Scripts:** + +Analysis scripts are bundled with this plugin at: +``` +/skills/must-gather-analyzer/scripts/ +├── analyze_clusterversion.py +├── analyze_clusteroperators.py +├── analyze_nodes.py +├── analyze_pods.py +├── analyze_network.py +├── analyze_ovn_dbs.py +├── analyze_events.py +├── analyze_etcd.py +└── analyze_pvs.py +``` + +Where `` is the directory where this plugin is installed (typically `~/.cursor/commands/ai-helpers/plugins/must-gather/` or similar). + +## Error Handling + +**CRITICAL: Script-Only Analysis** + +- **NEVER** attempt to analyze must-gather data directly using bash commands, grep, or manual file reading +- **ONLY** use the provided Python scripts in `plugins/must-gather/skills/must-gather-analyzer/scripts/` +- If scripts are missing or not found: + 1. Stop immediately + 2. Inform the user that the analysis scripts are not available + 3. Ask the user to ensure the scripts are installed at the correct path + 4. Do NOT attempt alternative approaches + +**Script Availability Check:** + +Before running any analysis: + +1. Locate the scripts directory by searching for a known script: + ```bash + SCRIPT_PATH=$(find ~ -name "analyze_clusteroperators.py" -path "*/must-gather/skills/must-gather-analyzer/scripts/*" 2>/dev/null | head -1) + + if [ -z "$SCRIPT_PATH" ]; then + echo "ERROR: Must-gather analysis scripts not found." + echo "Please ensure the must-gather plugin from ai-helpers is properly installed." + exit 1 + fi + + # All scripts are in the same directory, so just get the directory + SCRIPTS_DIR=$(dirname "$SCRIPT_PATH") + ``` + +2. If scripts cannot be found, STOP and report to the user: + ``` + The must-gather analysis scripts could not be located. Please ensure the must-gather plugin from openshift-eng/ai-helpers is properly installed in your Claude Code plugins directory. + ``` + +## Implementation + +The command performs the following steps: + +1. **Validate Must-Gather Path**: + - If path not provided as argument, ask the user + - Check if path contains `cluster-scoped-resources/` and `namespaces/` directories + - If user provides root directory, automatically find the correct subdirectory + - Verify the path exists and is readable + +2. **Determine Analysis Scope**: + + **STEP 1: Check for SPECIFIC component keywords** + + If the user mentions a specific component, run ONLY that script: + - "pods", "pod status", "containers", "crashloop", "failing pods" → `analyze_pods.py` ONLY + - "etcd", "etcd health", "quorum" → `analyze_etcd.py` ONLY + - "network", "networking", "ovn", "connectivity" → `analyze_network.py` ONLY + - "ovn databases", "ovn-dbs", "ovn db", "logical switches", "acls" → `analyze_ovn_dbs.py` ONLY + - "nodes", "node status", "node conditions" → `analyze_nodes.py` ONLY + - "operators", "cluster operators", "degraded" → `analyze_clusteroperators.py` ONLY + - "version", "cluster version", "update", "upgrade" → `analyze_clusterversion.py` ONLY + - "events", "warnings", "errors" → `analyze_events.py` ONLY + - "storage", "pv", "pvc", "volumes", "persistent" → `analyze_pvs.py` ONLY + - "alerts", "prometheus", "monitoring" → `analyze_prometheus.py` ONLY + + **STEP 2: No specific component mentioned** + + If generic request like "analyze must-gather", "/must-gather:analyze", or "check the cluster", run ALL scripts in this order: + 1. ClusterVersion (`analyze_clusterversion.py`) + 2. Cluster Operators (`analyze_clusteroperators.py`) + 3. Nodes (`analyze_nodes.py`) + 4. Pods - problems only (`analyze_pods.py --problems-only`) + 5. Network (`analyze_network.py`) + 6. Events - warnings only (`analyze_events.py --type Warning --count 50`) + 7. etcd (`analyze_etcd.py`) + 8. Storage (`analyze_pvs.py`) + 9. Monitoring (`analyze_prometheus.py`) + +3. **Locate Plugin Scripts**: + - Use the script availability check from the Error Handling section to find the plugin root + - Store the scripts directory path in `$SCRIPTS_DIR` + +4. **Execute Analysis Scripts**: + ```bash + python3 "$SCRIPTS_DIR/