commit 07f84861a9519f73fcb95ce8fd75aeb17b53eff3 Author: Zhongwei Li Date: Sun Nov 30 08:46:11 2025 +0800 Initial commit diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..51a67da --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,11 @@ +{ + "name": "olm", + "description": "OLM (Operator Lifecycle Manager) plugin for operator management and debugging", + "version": "0.1.0", + "author": { + "name": "github.com/openshift-eng" + }, + "commands": [ + "./commands" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..4e7651e --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# olm + +OLM (Operator Lifecycle Manager) plugin for operator management and debugging diff --git a/commands/approve.md b/commands/approve.md new file mode 100644 index 0000000..3aa5437 --- /dev/null +++ b/commands/approve.md @@ -0,0 +1,305 @@ +--- +description: Approve pending InstallPlans for operator installations and upgrades +argument-hint: [namespace] [--all] +--- + +## Name +olm:approve + +## Synopsis +``` +/olm:approve [namespace] [--all] +``` + +## Description +The `olm:approve` command approves pending InstallPlans for operators with manual approval mode. This is required for operators that have `installPlanApproval: Manual` in their Subscription to proceed with installation or upgrades. + +This command helps you: +- Approve operator installations that are waiting for manual approval +- Approve operator upgrades +- Review what will be installed/upgraded before approval +- Batch approve multiple pending InstallPlans + +## Implementation + +The command performs the following steps: + +1. **Parse Arguments**: + - `$1`: Operator name (required) - Name of the operator + - `$2`: Namespace (optional) - Namespace where operator is installed + - If not provided, searches for the operator across all namespaces + - `$3`: Flag (optional): + - `--all`: Approve all pending InstallPlans in the namespace + +2. **Prerequisites Check**: + - Verify `oc` CLI is installed: `which oc` + - Verify cluster access: `oc whoami` + - Check if user has sufficient privileges + +3. **Locate Operator**: + - If namespace provided, verify operator exists: + ```bash + oc get subscription {operator-name} -n {namespace} --ignore-not-found + ``` + - If no namespace provided, search across all namespaces: + ```bash + oc get subscription --all-namespaces -o json | jq -r '.items[] | select(.spec.name=="{operator-name}") | .metadata.namespace' + ``` + - If not found, display error with suggestions + +4. **Check Subscription Approval Mode**: + - Get Subscription approval mode: + ```bash + oc get subscription {operator-name} -n {namespace} -o jsonpath='{.spec.installPlanApproval}' + ``` + - If mode is "Automatic", display informational message: + ``` + ℹ️ Operator '{operator-name}' has automatic approval enabled. + InstallPlans are approved automatically and don't require manual intervention. + + Current Subscription approval mode: Automatic + + To switch to manual approval mode: + oc patch subscription {operator-name} -n {namespace} \ + --type merge --patch '{"spec":{"installPlanApproval":"Manual"}}' + ``` + - Exit if automatic (no approval needed) + +5. **Find Pending InstallPlans**: + - Get all InstallPlans for the operator: + ```bash + oc get installplan -n {namespace} -o json + ``` + - Filter for unapproved plans related to this operator: + ```bash + oc get installplan -n {namespace} -o json | \ + jq '.items[] | select(.spec.approved==false and .spec.clusterServiceVersionNames[] | contains("{operator-name}"))' + ``` + - If no pending InstallPlans found: + ``` + ✓ No pending InstallPlans found for operator '{operator-name}' + + The operator is up to date or already approved. + + To check operator status: /olm:status {operator-name} {namespace} + ``` + - Exit with success + +6. **Display InstallPlan Details**: + For each pending InstallPlan, display: + ``` + ⏸️ Pending InstallPlan Found + + InstallPlan: {installplan-name} + Namespace: {namespace} + Phase: {phase} + Approved: false + + ClusterServiceVersions to be installed/upgraded: + - {csv-name-1} ({version-1}) + - {csv-name-2} ({version-2}) + + Resources to be created/updated: + - CustomResourceDefinitions: {crd-count} + - ServiceAccounts: {sa-count} + - ClusterRoles: {role-count} + - Deployments: {deployment-count} + + [If upgrade:] + Current Version: {current-version} + Target Version: {target-version} + ``` + +7. **Request User Confirmation** (unless `--all` or `--force` flag): + - Display confirmation prompt: + ``` + Do you want to approve this InstallPlan? (yes/no) + ``` + - If user says no, skip this InstallPlan + - If user says yes, proceed to approval + +8. **Approve InstallPlan**: + - Patch the InstallPlan to approve it: + ```bash + oc patch installplan {installplan-name} -n {namespace} \ + --type merge --patch '{"spec":{"approved":true}}' + ``` + - Verify approval: + ```bash + oc get installplan {installplan-name} -n {namespace} -o jsonpath='{.spec.approved}' + ``` + - Display confirmation: + ``` + ✓ InstallPlan approved: {installplan-name} + ``` + - Reference: https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-approving-operator-upgrades_olm-updating-operators + +9. **Monitor InstallPlan Execution** (optional): + - Watch InstallPlan phase change to "Complete": + ```bash + oc get installplan {installplan-name} -n {namespace} -w --timeout=120s + ``` + - Display progress: + ``` + 🔄 InstallPlan executing... + ⏳ Installing resources... + ``` + +10. **Verify Installation/Upgrade**: + - Wait for CSV to reach "Succeeded" phase: + ```bash + oc get csv -n {namespace} -o json | \ + jq -r '.items[] | select(.status.phase=="Succeeded") | .metadata.name' + ``` + - Display result: + ``` + ✓ Operator installation/upgrade complete + + CSV: {csv-name} + Version: {version} + Phase: Succeeded + + To check operator status: /olm:status {operator-name} {namespace} + ``` + +11. **Handle Multiple InstallPlans** (if `--all` flag): + - Process all pending InstallPlans for the operator + - Display summary: + ``` + ✓ Approved {count} InstallPlan(s) + + Approved: + - {installplan-1} + - {installplan-2} + + Monitoring installation progress... + ``` + +12. **Display Approval Summary**: + ``` + ✓ Approval Complete! + + Operator: {operator-name} + Namespace: {namespace} + Approved InstallPlans: {count} + + InstallPlan Status: + - {installplan-1}: Complete + - {installplan-2}: Installing... + + Monitor progress: watch oc get csv,installplan -n {namespace} + ``` + +## Return Value +- **Success**: InstallPlan(s) approved successfully +- **No Pending Plans**: No InstallPlans require approval +- **Automatic Mode**: Operator has automatic approval (no action needed) +- **Error**: Approval failed with specific error message +- **Format**: Structured output showing: + - Approved InstallPlan names + - Installation/upgrade status + - Next steps or related commands + +## Examples + +1. **Approve pending InstallPlan for an operator**: + ``` + /olm:approve openshift-cert-manager-operator + ``` + +2. **Approve with specific namespace**: + ``` + /olm:approve external-secrets-operator eso-operator + ``` + +3. **Approve all pending InstallPlans**: + ``` + /olm:approve openshift-cert-manager-operator cert-manager-operator --all + ``` + This approves all pending InstallPlans for the operator in the namespace. + +4. **Check and approve after upgrade command**: + ``` + /olm:upgrade openshift-cert-manager-operator --channel=tech-preview + # Wait for InstallPlan to be created + /olm:approve openshift-cert-manager-operator + ``` + +## Arguments +- **$1** (operator-name): Name of the operator (required) + - Example: "openshift-cert-manager-operator" + - Must match the operator's Subscription name +- **$2** (namespace): Namespace where operator is installed (optional) + - If not provided, searches all namespaces + - Example: "cert-manager-operator" +- **$3** (flag): Optional flag + - `--all`: Approve all pending InstallPlans for this operator + - Useful when multiple upgrades are pending + - Skips individual confirmation prompts + +## Notes + +- **Manual Approval Mode**: This command only works for operators with `installPlanApproval: Manual` in their Subscription +- **Automatic Operators**: Operators with automatic approval don't need this command +- **Review Before Approval**: Always review what will be installed/upgraded before approving +- **Multiple InstallPlans**: An operator may have multiple pending InstallPlans if updates accumulated while waiting for approval +- **InstallPlan Retention**: Approved InstallPlans remain in the namespace for audit purposes + +## Troubleshooting + +- **No pending InstallPlans**: + ```bash + # List all InstallPlans + oc get installplan -n {namespace} + + # Check if operator is in automatic mode + oc get subscription {operator-name} -n {namespace} -o jsonpath='{.spec.installPlanApproval}' + ``` + +- **InstallPlan not executing after approval**: + ```bash + # Check InstallPlan status + oc describe installplan {installplan-name} -n {namespace} + + # Check for errors + oc get events -n {namespace} --sort-by='.lastTimestamp' | grep InstallPlan + ``` + +- **CSV not reaching Succeeded phase**: + ```bash + # Check CSV status + oc describe csv -n {namespace} + + # Check operator deployment + oc get deployments -n {namespace} + + # Check operator logs + oc logs -n {namespace} deployment/{operator-deployment} + ``` + +- **Permission denied**: + ```bash + # Check if you can patch InstallPlans + oc auth can-i patch installplan -n {namespace} + ``` + +- **Multiple namespaces found**: + - Specify the namespace explicitly in the command: + ``` + /olm:approve {operator-name} {specific-namespace} + ``` + +## Related Commands + +- `/olm:status ` - Check if InstallPlans are pending approval +- `/olm:upgrade ` - Trigger upgrade and approve in one command +- `/olm:install ` - Install operator with approval mode +- `/olm:list` - List operators and their approval modes + +## Additional Resources + +- [Red Hat OpenShift: Approving Operator Upgrades](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-approving-operator-upgrades_olm-updating-operators) +- [Red Hat OpenShift: Updating Installed Operators](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-updating-operators) +- [Operator Lifecycle Manager Documentation](https://olm.operatorframework.io/) + + diff --git a/commands/catalog.md b/commands/catalog.md new file mode 100644 index 0000000..cd43964 --- /dev/null +++ b/commands/catalog.md @@ -0,0 +1,433 @@ +--- +description: Manage catalog sources for discovering and installing operators +argument-hint: [arguments] +--- + +## Name +olm:catalog + +## Synopsis +``` +/olm:catalog list +/olm:catalog add [--namespace=openshift-marketplace] +/olm:catalog remove [--namespace=openshift-marketplace] +/olm:catalog refresh [--namespace=openshift-marketplace] +/olm:catalog status [--namespace=openshift-marketplace] +``` + +## Description +The `olm:catalog` command manages catalog sources for operator discovery and installation. Catalog sources provide the list of operators available for installation in the cluster. + +This command helps you: +- List all available catalog sources and their health status +- Add custom or private catalog sources +- Remove catalog sources +- Refresh catalog sources to get latest operator updates + +## Implementation + +### Subcommand: list + +1. **Get All CatalogSources**: + ```bash + oc get catalogsource -n openshift-marketplace -o json + ``` + +2. **Parse CatalogSource Data**: + For each catalog, extract: + - Name: `.metadata.name` + - Display Name: `.spec.displayName` + - Publisher: `.spec.publisher` + - Source Type: `.spec.sourceType` (grpc, configmap, etc.) + - Image: `.spec.image` (for grpc type) + - Connection State: `.status.connectionState.lastObservedState` + - Last Updated: `.status.connectionState.lastUpdatedTime` + - Number of Operators: Count from PackageManifests with this catalog + +3. **Get Catalog Pod Status**: + ```bash + oc get pods -n openshift-marketplace -l olm.catalogSource={catalog-name} + ``` + +4. **Format Output**: + ``` + ═══════════════════════════════════════════════════════════ + CATALOG SOURCES + ═══════════════════════════════════════════════════════════ + + NAME STATUS OPERATORS LAST UPDATED SOURCE TYPE + redhat-operators READY 150 2h ago grpc + certified-operators READY 45 3h ago grpc + community-operators READY 200 1h ago grpc + redhat-marketplace READY 30 4h ago grpc + custom-catalog FAILED 0 - grpc + + ═══════════════════════════════════════════════════════════ + DETAILS + ═══════════════════════════════════════════════════════════ + + redhat-operators: + Display Name: Red Hat Operators + Publisher: Red Hat + Image: registry.redhat.io/redhat/redhat-operator-index:v4.20 + Pod: redhat-operators-abc123 (Running) + + custom-catalog (FAILED): + Display Name: Custom Catalog + Publisher: My Company + Image: registry.example.com/custom-catalog:latest + Pod: custom-catalog-xyz789 (CrashLoopBackOff) + Error: ImagePullBackOff + + To troubleshoot: + /olm:catalog status custom-catalog + ``` + +### Subcommand: add + +1. **Parse Arguments**: + - `name`: Catalog source name (required) + - `image`: Catalog image (required) + - `--namespace`: Target namespace (default: openshift-marketplace) + - `--display-name`: Display name (optional) + - `--publisher`: Publisher name (optional) + +2. **Validate Image**: + - Check if image format is valid + - Optionally test image accessibility (if possible) + +3. **Create CatalogSource Manifest**: + ```yaml + apiVersion: operators.coreos.com/v1alpha1 + kind: CatalogSource + metadata: + name: {name} + namespace: {namespace} + spec: + sourceType: grpc + image: {image} + displayName: {display-name} + publisher: {publisher} + updateStrategy: + registryPoll: + interval: 30m + ``` + +4. **Apply CatalogSource**: + ```bash + oc apply -f /tmp/catalogsource-{name}.yaml + ``` + +5. **Wait for CatalogSource to be Ready**: + ```bash + oc wait --for=condition=READY catalogsource/{name} -n {namespace} --timeout=300s + ``` + +6. **Verify Pod is Running**: + ```bash + oc get pods -n {namespace} -l olm.catalogSource={name} + ``` + +7. **Display Result**: + ``` + ✓ Catalog source added: {name} + + Name: {name} + Namespace: {namespace} + Image: {image} + Status: READY + Pod: {pod-name} (Running) + + To search operators: /olm:search --catalog {name} + ``` + +### Subcommand: remove + +1. **Parse Arguments**: + - `name`: Catalog source name (required) + - `--namespace`: Namespace (default: openshift-marketplace) + +2. **Check if CatalogSource Exists**: + ```bash + oc get catalogsource {name} -n {namespace} --ignore-not-found + ``` + +3. **Check for Operators Using This Catalog**: + ```bash + oc get subscription --all-namespaces -o json | \ + jq -r '.items[] | select(.spec.source=="{name}") | "\(.metadata.namespace)/\(.metadata.name)"' + ``` + +4. **Display Warning** (if operators found): + ``` + WARNING: The following operators are using this catalog: + - namespace-1/operator-1 + - namespace-2/operator-2 + + Removing this catalog will prevent these operators from receiving updates. + + Do you want to continue? (yes/no) + ``` + +5. **Delete CatalogSource**: + ```bash + oc delete catalogsource {name} -n {namespace} + ``` + +6. **Wait for Pod to be Deleted**: + ```bash + oc wait --for=delete pod -l olm.catalogSource={name} -n {namespace} --timeout=60s + ``` + +7. **Display Result**: + ``` + ✓ Catalog source removed: {name} + ``` + +### Subcommand: refresh + +1. **Parse Arguments**: + - `name`: Catalog source name (required) + - `--namespace`: Namespace (default: openshift-marketplace) + +2. **Get Current CatalogSource**: + ```bash + oc get catalogsource {name} -n {namespace} -o json + ``` + +3. **Trigger Refresh by Deleting Pod**: + ```bash + oc delete pod -n {namespace} -l olm.catalogSource={name} + ``` + - This forces OLM to recreate the pod and re-fetch catalog data + +4. **Wait for New Pod to be Ready**: + ```bash + oc wait --for=condition=Ready pod -l olm.catalogSource={name} -n {namespace} --timeout=300s + ``` + +5. **Verify Catalog is Updated**: + ```bash + oc get catalogsource {name} -n {namespace} -o json | \ + jq -r '.status.connectionState.lastUpdatedTime' + ``` + +6. **Display Result**: + ``` + ✓ Catalog source refreshed: {name} + + Last Updated: {timestamp} + Status: READY + Pod: {pod-name} (Running) + + New operators may now be available: /olm:search --catalog {name} + ``` + +### Subcommand: status + +1. **Parse Arguments**: + - `name`: Catalog source name (required) + - `--namespace`: Namespace (default: openshift-marketplace) + +2. **Get CatalogSource Details**: + ```bash + oc get catalogsource {name} -n {namespace} -o json + ``` + +3. **Get Pod Details**: + ```bash + oc get pods -n {namespace} -l olm.catalogSource={name} -o json + ``` + +4. **Get Recent Events**: + ```bash + oc get events -n {namespace} --field-selector involvedObject.name={name} --sort-by='.lastTimestamp' + ``` + +5. **Count Available Operators**: + ```bash + oc get packagemanifests -n openshift-marketplace -o json | \ + jq -r '.items[] | select(.status.catalogSource=="{name}") | .metadata.name' | wc -l + ``` + +6. **Verify Catalog Connectivity**: + - Check if catalog is serving content by verifying PackageManifest count > 0 + - If count is 0 but pod is Running, indicates connectivity or catalog index issues + - Review catalog pod logs for gRPC errors, image pull issues, or index corruption: + ```bash + oc logs -n {namespace} {catalog-pod-name} + ``` + +7. **Format Comprehensive Status Report**: + ``` + ═══════════════════════════════════════════════════════════ + CATALOG SOURCE STATUS: {name} + ═══════════════════════════════════════════════════════════ + + General Information: + Name: {name} + Namespace: {namespace} + Display Name: {display-name} + Publisher: {publisher} + Source Type: {source-type} + Image: {image} + + Connection Status: + State: {state} (READY | CONNECTING | CONNECTION_FAILED) + Last Updated: {timestamp} + Last Successful: {timestamp} + + Pod Status: + Name: {pod-name} + Status: {status} (Running | CrashLoopBackOff | ImagePullBackOff) + Ready: {ready-containers}/{total-containers} + Restarts: {restart-count} + Age: {age} + + Catalog Content: + Operators Available: {count} + + [If issues detected:] + ⚠️ Issues Detected: + - Pod in CrashLoopBackOff + - Last update: 24h ago (stale) + - Connection state: CONNECTION_FAILED + + Recent Events: + {timestamp} Warning: Failed to pull image + {timestamp} Warning: Back-off restarting failed container + + Troubleshooting Steps: + 1. Check pod logs: oc logs -n {namespace} {pod-name} + 2. Check image accessibility + 3. Refresh catalog: /olm:catalog refresh {name} + 4. Verify network connectivity (for disconnected environments) + + Related Commands: + - Refresh: /olm:catalog refresh {name} + - List operators: /olm:search --catalog {name} + ``` + +## Return Value +- **list**: Table of all catalog sources with status +- **add**: Confirmation of added catalog with details +- **remove**: Confirmation of removed catalog +- **refresh**: Confirmation of refresh with updated timestamp +- **status**: Comprehensive status report for specific catalog + +## Examples + +1. **List all catalog sources**: + ``` + /olm:catalog list + ``` + +2. **Add custom catalog**: + ``` + /olm:catalog add my-catalog registry.example.com/my-catalog:v1.0 + ``` + +3. **Add catalog with metadata**: + ``` + /olm:catalog add my-catalog registry.example.com/catalog:latest \ + --display-name="My Custom Catalog" \ + --publisher="My Company" + ``` + +4. **Remove catalog**: + ``` + /olm:catalog remove my-catalog + ``` + +5. **Refresh catalog to get latest operators**: + ``` + /olm:catalog refresh redhat-operators + ``` + +6. **Check catalog health**: + ``` + /olm:catalog status custom-catalog + ``` + +7. **Add catalog for disconnected environment**: + ``` + /olm:catalog add disconnected-operators \ + mirror-registry.local:5000/olm/redhat-operators:v4.20 \ + --namespace=openshift-marketplace + ``` + +## Arguments + +### list +No arguments required. + +### add +- **name** (required): Name for the catalog source +- **image** (required): Container image containing the catalog +- **--namespace**: Target namespace (default: openshift-marketplace) +- **--display-name**: Human-readable display name +- **--publisher**: Publisher/organization name + +### remove +- **name** (required): Name of the catalog source to remove +- **--namespace**: Namespace (default: openshift-marketplace) + +### refresh +- **name** (required): Name of the catalog source to refresh +- **--namespace**: Namespace (default: openshift-marketplace) + +### status +- **name** (required): Name of the catalog source to check +- **--namespace**: Namespace (default: openshift-marketplace) + +## Troubleshooting + +- **Catalog pod failing**: + ```bash + # Check pod logs + oc logs -n openshift-marketplace {catalog-pod-name} + + # Check image pull issues + oc describe pod -n openshift-marketplace {catalog-pod-name} + ``` + +- **No operators showing up**: + ```bash + # Verify catalog is ready + /olm:catalog status {catalog-name} + + # Check PackageManifests + oc get packagemanifests -n openshift-marketplace + ``` + +- **Image pull errors (disconnected environment)**: + - Verify image registry is accessible + - Check pull secrets are configured + - Ensure image has been mirrored correctly + +- **Stale catalog data**: + ```bash + # Force refresh + /olm:catalog refresh {catalog-name} + ``` + +- **Connection failures**: + ```bash + # Check catalog source definition + oc get catalogsource {catalog-name} -n openshift-marketplace -o yaml + + # Run cluster diagnostics + /olm:diagnose --cluster + ``` + +## Related Commands + +- `/olm:search` - Search for operators in catalogs +- `/olm:install` - Install operators from catalogs +- `/olm:diagnose` - Diagnose catalog health issues + +## Additional Resources +- [Building Catalog Images with opm](https://olm.operatorframework.io/docs/tasks/creating-catalog-from-index/) +- [Operator Lifecycle Manager Documentation](https://olm.operatorframework.io/) + + diff --git a/commands/debug.md b/commands/debug.md new file mode 100644 index 0000000..d158bbd --- /dev/null +++ b/commands/debug.md @@ -0,0 +1,217 @@ +--- +description: Debug OLM issues using must-gather logs and source code analysis +argument-hint: [olm-version] +--- + +## Name +olm:debug + +## Synopsis +``` +/olm:debug [olm-version] +``` + +## Description +The `olm:debug` command analyzes OLM (Operator Lifecycle Manager) issues by correlating must-gather logs with the appropriate OLM source code. It automatically determines the OCP version from the must-gather logs, checks out the corresponding branch from the relevant OLM repositories, queries Jira for known bugs in the OCPBUGS project (OLM component), and provides detailed analysis and debugging insights. + +## Arguments +- **$1** (required): Issue description - A brief description of the OLM issue being investigated +- **$2** (required): Must-gather path - Absolute or relative path to the must-gather log directory +- **$3** (optional): OLM version - Either `olmv0` (default) or `olmv1` + - `olmv0`: Uses operator-framework-olm repository + - `olmv1`: Uses operator-framework-operator-controller and cluster-olm-operator repositories + +## Implementation + +### Phase 1: Environment Setup and Validation + +1. **Validate arguments** + - Check that issue description is provided + - Verify must-gather path exists and is accessible + - Set OLM version to `olmv0` if not specified + +2. **Parse must-gather logs to determine OCP version** + - Look for version information in must-gather logs + - Common locations: + - `cluster-scoped-resources/core/nodes/*.yaml` - check node annotations + - `cluster-scoped-resources/config.openshift.io/clusterversions/*.yaml` + - Extract OCP version (e.g., `4.14`, `4.15`, `4.16`) + - Determine corresponding branch name (e.g., `release-4.14`) + +3. **Create working directory** + - Use `.work/olm-debug//` for temporary files + - Create subdirectories: `repos/`, `analysis/`, `logs/` + +### Phase 2: Repository Setup + +4. **Clone appropriate repositories based on OLM version** + + **For olmv0:** + - Clone `https://github.com/openshift/operator-framework-olm.git` + - Checkout branch `release-` (e.g., `release-4.14`) + - If branch doesn't exist, try `main` or `master` branch + + **For olmv1:** + - Clone `https://github.com/openshift/operator-framework-operator-controller.git` + - Clone `https://github.com/openshift/cluster-olm-operator.git` + - For each repo, checkout branch `release-` + - If branch doesn't exist, try `main` or `master` branch + +5. **Verify repository setup** + - Confirm branches are checked out successfully + - List key directories to understand codebase structure + +### Phase 3: Log Analysis + +6. **Extract relevant OLM logs from must-gather** + - For olmv0, look for: + - `namespaces/openshift-operator-lifecycle-manager/` logs + - OLM operator logs: `pods/catalog-operator-*/`, `pods/olm-operator-*/` + - CSV (ClusterServiceVersion) resources + - Subscription resources + - InstallPlan resources + - For olmv1, look for: + - `namespaces/openshift-operator-controller/` logs + - Operator controller logs + - ClusterExtension resources + - Catalog resources + +7. **Identify error patterns and relevant logs** + - Search for ERROR, WARN, FATAL level logs + - Extract stack traces + - Identify failed reconciliations + - Note timestamps of issues + +### Phase 4: Known Bug Search in Jira + +8. **Query Jira for known OLM bugs** + - Search OCPBUGS project with component "olm" + - Use Jira REST API or web scraping to fetch bugs + - Query parameters: + - Project: `OCPBUGS` + - Component: `olm` + - Affects Version: Matches the OCP version (e.g., `4.14.0`, `4.15.0`) + - Status: Open, In Progress, or Recently Resolved + - API endpoint example: + ``` + https://issues.redhat.com/rest/api/2/search?jql=project=OCPBUGS AND component=olm AND affectedVersion~"4.14" + ``` + +9. **Match errors with known bugs** + - Extract error messages and keywords from logs + - Search for matching patterns in Jira bug summaries and descriptions + - Look for similar symptoms in bug reports + - Identify potential matches based on: + - Error message similarity + - Affected OCP version + - Component affected (catalog-operator, olm-operator, etc.) + - Symptom descriptions + +10. **Categorize and prioritize matches** + - High priority: Exact error message match with same OCP version + - Medium priority: Similar symptoms with same component + - Low priority: Related issues in same version range + - Note bugs that have patches or workarounds available + +### Phase 5: Code Correlation + +11. **Map errors to source code** + - Search cloned repositories for: + - Error messages found in logs + - Function names from stack traces + - Related controllers and reconcilers + - Use grep/ripgrep to find relevant code sections + +12. **Analyze relevant code sections** + - Read the source code around identified errors + - Understand the reconciliation logic + - Identify potential root causes + +### Phase 6: Analysis and Recommendations + +13. **Generate detailed analysis report** + - Summary of the issue + - OCP and OLM version information + - Timeline of events from logs + - Known bugs section with Jira links + - Relevant code sections with explanations + - Potential root causes + - Recommended debugging steps + - Suggested fixes or workarounds + +14. **Create output files** + - `analysis.md`: Detailed analysis report + - `relevant-logs.txt`: Extracted relevant log entries + - `code-references.md`: Links to relevant source code sections with line numbers + - `known-bugs.md`: List of potentially related Jira bugs with match confidence + +### Error Handling + +- **Must-gather path not found**: Provide clear error message with expected path format +- **Unable to determine OCP version**: Ask user to provide OCP version manually +- **Repository clone failures**: Check network connectivity, provide manual clone instructions +- **Branch not found**: Fall back to main/master branch and warn user about version mismatch +- **No relevant logs found**: Provide guidance on what logs to look for manually +- **Jira access failures**: Continue with analysis if Jira is unavailable; note in report that known bug search was skipped +- **Jira authentication required**: Provide instructions for setting up Jira credentials if needed + +## Return Value + +The command generates the following outputs in `.work/olm-debug//`: + +- **analysis.md**: Comprehensive analysis report including: + - Issue summary + - Version information (OCP, OLM) + - Log analysis with timeline + - Known bugs section with links to matching Jira issues + - Code correlation and root cause analysis + - Recommendations + +- **relevant-logs.txt**: Extracted relevant log entries from must-gather + +- **code-references.md**: Links to relevant source code files with line numbers + +- **known-bugs.md**: List of potentially related Jira bugs including: + - Bug ID and link (e.g., OCPBUGS-12345) + - Bug summary and status + - Match confidence (High/Medium/Low) + - Affected versions + - Available workarounds or patches + +- **repos/**: Cloned repository directories for further manual investigation + +## Examples + +1. **Basic usage with olmv0 (default)**: + ``` + /olm:debug "CSV stuck in pending state" /path/to/must-gather + ``` + +2. **Debug olmv1 issue**: + ``` + /olm:debug "ClusterExtension installation failing" /path/to/must-gather olmv1 + ``` + +3. **Debug with detailed issue description**: + ``` + /olm:debug "Operator upgrade from v1.0 to v2.0 fails with dependency resolution error" ~/Downloads/must-gather.local.123456 olmv0 + ``` + +## Notes + +- The command requires `git` to be installed for cloning repositories +- Network access is required to clone from GitHub and access Jira +- Large must-gather archives may take time to process +- The analysis is based on pattern matching and may require manual verification +- For private repositories, ensure GitHub credentials are configured +- Jira access to https://issues.redhat.com/ may require authentication for full access +- Known bug matching is based on text similarity and may produce false positives +- Always verify suggested bug matches by reading the full bug description + +## See Also + +- OLM Documentation: https://olm.operatorframework.io/ +- OpenShift OLM: https://docs.openshift.com/container-platform/latest/operators/understanding/olm/olm-understanding-olm.html +- Must-gather documentation: https://docs.openshift.com/container-platform/latest/support/gathering-cluster-data.html +- OCPBUGS Jira Project: https://issues.redhat.com/projects/OCPBUGS/ +- Jira REST API: https://docs.atlassian.com/jira-software/REST/latest/ diff --git a/commands/diagnose.md b/commands/diagnose.md new file mode 100644 index 0000000..83f6d27 --- /dev/null +++ b/commands/diagnose.md @@ -0,0 +1,410 @@ +--- +description: Diagnose and optionally fix common OLM and operator issues +argument-hint: [operator-name] [namespace] [--fix] [--cluster] +--- + +## Name +olm:diagnose + +## Synopsis +``` +/olm:diagnose [operator-name] [namespace] [--fix] [--cluster] +``` + +## Description +The `olm:diagnose` command diagnoses common OLM and operator issues, including orphaned CRDs, stuck namespaces, failed installations, and catalog source problems. It can optionally attempt to fix detected issues automatically. + +This command helps you: +- Detect and clean up orphaned CRDs from deleted operators +- Fix namespaces stuck in Terminating state +- Identify and resolve failed operator installations +- Detect conflicting OperatorGroups +- Check catalog source health +- Identify resources preventing clean uninstallation +- Generate comprehensive troubleshooting reports + +## Implementation + +The command performs the following steps: + +1. **Parse Arguments**: + - `$1`: Operator name (optional) - Specific operator to diagnose + - `$2`: Namespace (optional) - Specific namespace to check + - `$3+`: Flags (optional): + - `--fix`: Automatically attempt to fix detected issues (requires confirmation) + - `--cluster`: Run cluster-wide diagnostics (catalog sources, global CRDs, etc.) + +2. **Prerequisites Check**: + - Verify `oc` CLI is installed: `which oc` + - Verify cluster access: `oc whoami` + - Check if user has cluster-admin or sufficient privileges + - Warn if running without `--fix` flag (dry-run mode) + +3. **Determine Scope**: + - **Operator-specific**: If operator name provided, focus on that operator + - **Namespace-specific**: If namespace provided, check all operators in that namespace + - **Cluster-wide**: If `--cluster` flag or no arguments, check entire cluster + +4. **Scan for Orphaned CRDs**: + - Get all CRDs in the cluster: + ```bash + oc get crd -o json + ``` + - For each CRD, check if there's a corresponding operator: + - Look for CSVs that own this CRD + - Look for active Subscriptions related to this CRD + - Identify orphaned CRDs (no owning operator found): + ```bash + # Find CRDs without active operators + # This is a simplified check - actual implementation should verify operator ownership + oc get crd -o json | jq -r '.items[] | + select(.metadata.annotations["operators.coreos.com/owner"] // "" | length == 0) | + .metadata.name' + ``` + - Check if CRs exist for orphaned CRDs: + ```bash + oc get --all-namespaces --ignore-not-found + ``` + - Report findings: + ``` + ⚠️ Orphaned CRDs Detected + + The following CRDs have no active operator: + - certificates.cert-manager.io (3 CR instances in 2 namespaces) + - issuers.cert-manager.io (5 CR instances in 3 namespaces) + + These CRDs may be leftovers from uninstalled operators. + + [If --fix flag:] + Do you want to delete these CRDs and their CRs? (yes/no) + WARNING: This will delete all custom resources of these types! + ``` + +5. **Check for Stuck Namespaces**: + - Get all namespaces in Terminating state: + ```bash + oc get namespaces -o json | jq -r '.items[] | select(.status.phase=="Terminating") | .metadata.name' + ``` + - For each stuck namespace: + - Check remaining resources: + ```bash + oc api-resources --verbs=list --namespaced -o name | \ + xargs -n 1 oc get --show-kind --ignore-not-found -n {namespace} + ``` + - Check namespace finalizers: + ```bash + oc get namespace {namespace} -o jsonpath='{.metadata.finalizers}' + ``` + - Identify blocking resources + - Report findings: + ``` + ❌ Stuck Namespace Detected + + Namespace: {namespace} + State: Terminating (stuck for {duration}) + + Blocking resources: + - CustomResourceDefinition: {crd-name} (finalizer: {finalizer}) + - ServiceAccount: {sa-name} (token secret) + + Finalizers on namespace: + - kubernetes + + [If --fix flag:] + Attempted fixes: + 1. Delete remaining resources + 2. Remove finalizers from CRs + 3. Patch namespace to remove finalizers (CAUTION) + + WARNING: Force-deleting namespace can cause cluster instability. + ``` + +6. **Scan for Failed Operator Installations**: + - Get all CSVs not in "Succeeded" phase: + ```bash + oc get csv --all-namespaces -o json | \ + jq -r '.items[] | select(.status.phase != "Succeeded") | "\(.metadata.namespace)/\(.metadata.name): \(.status.phase)"' + ``` + - For each failed CSV: + - Get failure reason: `.status.reason` + - Get failure message: `.status.message` + - Check related InstallPlan status + - Check deployment status + - Check recent events + - Report findings: + ``` + ❌ Failed Operator Installation + + Operator: {operator-name} + Namespace: {namespace} + CSV: {csv-name} + Phase: Failed + Reason: {reason} + Message: {message} + + Related InstallPlan: {installplan-name} (Phase: {phase}) + + Recent Events: + - {timestamp} Warning: {event-message} + + Troubleshooting suggestions: + - Check operator logs: oc logs -n {namespace} deployment/{deployment} + - Check image pull issues: oc describe pod -n {namespace} + - Verify catalog source health + - Check RBAC permissions + ``` + +7. **Check for Conflicting OperatorGroups**: + - Get all OperatorGroups per namespace: + ```bash + oc get operatorgroup --all-namespaces -o json + ``` + - Identify namespaces with multiple OperatorGroups (conflict): + ```bash + oc get operatorgroup --all-namespaces -o json | \ + jq -r '.items | group_by(.metadata.namespace) | .[] | select(length > 1) | .[0].metadata.namespace' + ``` + - Check for OperatorGroups with overlapping target namespaces + - Report findings: + ``` + ⚠️ Conflicting OperatorGroups Detected + + Namespace: {namespace} + OperatorGroups: {count} + - {og-1} (targets: {target-namespaces-1}) + - {og-2} (targets: {target-namespaces-2}) + + Multiple OperatorGroups in a namespace can cause conflicts. + Only one OperatorGroup should exist per namespace. + + [If --fix flag:] + Keep which OperatorGroup? (1/2) + ``` + +8. **Verify Catalog Source Health** (if `--cluster` flag): + - Get all CatalogSources: + ```bash + oc get catalogsource -n openshift-marketplace -o json + ``` + - For each catalog: + - Check status: `.status.connectionState.lastObservedState` + - Check pod status + - Check last update time + - Verify grpc connection + - Report findings: + ``` + 🔍 Catalog Source Health Check + + ✓ redhat-operators: READY (last updated: 2h ago) + ✓ certified-operators: READY (last updated: 3h ago) + ✓ community-operators: READY (last updated: 1h ago) + ❌ custom-catalog: CONNECTION_FAILED (pod: CrashLoopBackOff) + + [If issues found:] + Unhealthy Catalog: custom-catalog + Pod: custom-catalog-abc123 (Status: CrashLoopBackOff) + + To troubleshoot: + oc logs -n openshift-marketplace custom-catalog-abc123 + oc describe catalogsource custom-catalog -n openshift-marketplace + ``` + +9. **Check for Subscription/CSV Mismatches**: + - Get all Subscriptions: + ```bash + oc get subscription --all-namespaces -o json + ``` + - For each Subscription: + - Compare `installedCSV` with `currentCSV` + - Check if CSV exists + - Verify CSV phase + - Report findings: + ``` + ⚠️ Subscription/CSV Mismatch + + Operator: {operator-name} + Namespace: {namespace} + Installed CSV: {installed-csv} + Current CSV: {current-csv} + + CSV {installed-csv} not found in namespace. + This may indicate a failed installation or upgrade. + + Suggested fix: + oc delete subscription {operator-name} -n {namespace} + /olm:install {operator-name} {namespace} + ``` + +10. **Check for Pending Manual Approvals**: + - Find all unapproved InstallPlans: + ```bash + oc get installplan --all-namespaces -o json | \ + jq -r '.items[] | select(.spec.approved==false)' + ``` + - Report findings: + ``` + ℹ️ Pending Manual Approvals + + The following operators have pending InstallPlans requiring approval: + + - Operator: openshift-cert-manager-operator + Namespace: cert-manager-operator + InstallPlan: install-abc123 + Target Version: v1.14.0 + To approve: /olm:approve openshift-cert-manager-operator cert-manager-operator + + - Operator: external-secrets-operator + Namespace: eso-operator + InstallPlan: install-def456 + Target Version: v0.11.0 + To approve: /olm:approve external-secrets-operator eso-operator + ``` + +11. **Generate Comprehensive Report**: + ``` + ═══════════════════════════════════════════════════════════ + OLM HEALTH CHECK REPORT + ═══════════════════════════════════════════════════════════ + + Scan Scope: [Operator-specific | Namespace | Cluster-wide] + Scan Time: {timestamp} + + ✓ HEALTHY CHECKS: {count} + - Catalog sources operational + - No conflicting OperatorGroups + - All CSVs in Succeeded phase + + ⚠️ WARNINGS: {count} + - {warning-count} orphaned CRDs detected + - {warning-count} pending manual approvals + + ❌ ERRORS: {count} + - {error-count} stuck namespaces + - {error-count} failed operator installations + - {error-count} unhealthy catalog sources + + ═══════════════════════════════════════════════════════════ + DETAILED FINDINGS + ═══════════════════════════════════════════════════════════ + + [Details for each finding...] + + ═══════════════════════════════════════════════════════════ + RECOMMENDATIONS + ═══════════════════════════════════════════════════════════ + + 1. Clean up orphaned CRDs: /olm:diagnose --fix + 2. Fix stuck namespace: /olm:diagnose {namespace} --fix + 3. Approve pending upgrades: /olm:approve {operator-name} + + For more details on troubleshooting, see: + https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-troubleshooting-operator-issues + ``` + +12. **Auto-Fix Issues** (if `--fix` flag): + - For each detected issue, ask for confirmation + - Attempt fixes based on issue type: + - **Orphaned CRDs**: Delete CRs first, then CRDs + - **Stuck namespaces**: Delete remaining resources, remove finalizers + - **Failed installations**: Restart by deleting and recreating + - **Conflicting OperatorGroups**: Remove unwanted OperatorGroup + - **Unhealthy catalogs**: Restart catalog pod + - Display results of each fix attempt + - Generate final summary + +## Return Value +- **Success**: Report generated with findings +- **Issues Found**: Detailed report with warnings and errors +- **Fixed**: Issues resolved (if `--fix` flag used) +- **Format**: Structured report showing: + - Summary of health checks + - Detailed findings for each issue + - Recommendations and next steps + - Links to documentation + +## Examples + +1. **Check specific operator**: + ``` + /olm:diagnose openshift-cert-manager-operator + ``` + +2. **Cluster-wide health check**: + ``` + /olm:diagnose --cluster + ``` + +3. **Diagnose and fix issues**: + ``` + /olm:diagnose openshift-cert-manager-operator cert-manager-operator --fix + ``` + +4. **Full cluster scan with auto-fix**: + ``` + /olm:diagnose --cluster --fix + ``` + +## Arguments +- **$1** (operator-name): Name of specific operator to diagnose (optional) + - If not provided, checks all operators (or cluster-wide with `--cluster`) + - Example: "openshift-cert-manager-operator" +- **$2** (namespace): Specific namespace to check (optional) + - If not provided with operator-name, searches all namespaces + - Example: "cert-manager-operator" +- **$3+** (flags): Optional flags + - `--fix`: Attempt to automatically fix detected issues + - Prompts for confirmation before each fix + - Use with caution in production environments + - `--cluster`: Run cluster-wide diagnostics + - Checks catalog sources + - Scans for orphaned CRDs across all namespaces + - Identifies global issues + +## Troubleshooting + +- **Permission denied**: + ```bash + # Check required permissions + oc auth can-i get crd + oc auth can-i get csv --all-namespaces + oc auth can-i patch namespace + ``` + +- **Unable to fix stuck namespace**: + - Some resources may require manual intervention + - Check API service availability: + ```bash + oc get apiservice + ``` + +- **CRDs won't delete**: + ```bash + # Check for remaining CRs + oc get --all-namespaces + + # Check for finalizers + oc get crd -o jsonpath='{.metadata.finalizers}' + ``` + +- **Catalog source issues persist**: + ```bash + # Restart catalog pod + oc delete pod -n openshift-marketplace + + # Check catalog source definition + oc get catalogsource -n openshift-marketplace -o yaml + ``` + +## Related Commands + +- `/olm:status ` - Check specific operator status +- `/olm:list` - List all operators +- `/olm:uninstall ` - Clean uninstall with orphan cleanup +- `/olm:approve ` - Approve pending InstallPlans + +## Additional Resources + +- [Troubleshooting Operator Issues](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-troubleshooting-operator-issues) +- [Operator Lifecycle Manager Documentation](https://olm.operatorframework.io/) + + diff --git a/commands/install.md b/commands/install.md new file mode 100644 index 0000000..ccc0bcf --- /dev/null +++ b/commands/install.md @@ -0,0 +1,272 @@ +--- +description: Install a day-2 operator using Operator Lifecycle Manager +argument-hint: [namespace] [channel] [source] [--approval=Automatic|Manual] +--- + +## Name +olm:install + +## Synopsis +``` +/olm:install [namespace] [channel] [source] [--approval=Automatic|Manual] +``` + +## Description +The `olm:install` command installs a day-2 operator in an OpenShift cluster using Operator Lifecycle Manager (OLM). It automates the creation of the required namespace, OperatorGroup, and Subscription resources needed to install an operator. + +This command handles the complete operator installation workflow: +- Creates or verifies the target namespace exists +- Creates an OperatorGroup if needed +- Creates a Subscription to install the operator +- Verifies the installation by checking the operator's CSV (ClusterServiceVersion) status +- Provides detailed feedback on the installation progress + +The command is designed to work with operators from the OperatorHub catalog, including Red Hat certified operators, community operators, and custom catalog sources. + +## Implementation + +The command performs the following steps: + +1. **Parse Arguments**: + - `$1`: Operator name (required) - The name of the operator to install (e.g., "openshift-cert-manager-operator") + - `$2`: Namespace (optional) - Target namespace for the operator. If not provided, defaults to `{operator-name}-operator` (e.g., "cert-manager-operator") + - `$3`: Channel (optional) - Subscription channel. If not provided, discovers the default channel from the operator's PackageManifest + - `$4`: Source (optional) - CatalogSource name. Defaults to "redhat-operators" for Red Hat operators + - `$5+`: Flags (optional): + - `--approval=Automatic|Manual`: InstallPlan approval mode (default: Automatic) + - Automatic: Operator upgrades are automatically installed + - Manual: Operator upgrades require manual approval via `/olm:approve` or `oc patch` + +2. **Prerequisites Check**: + - Verify `oc` CLI is installed: `which oc` + - Verify cluster access: `oc whoami` + - Check if user has cluster-admin or sufficient privileges + - If not installed or not authenticated, provide clear instructions + +3. **Discover Operator Metadata** (if channel or source not provided): + - Search for the operator in available catalogs: + ```bash + oc get packagemanifests -n openshift-marketplace | grep {operator-name} + ``` + - Get the PackageManifest details: + ```bash + oc get packagemanifest {operator-name} -n openshift-marketplace -o json + ``` + - Extract: + - Default channel: `.status.defaultChannel` + - CatalogSource: `.status.catalogSource` + - CatalogSourceNamespace: `.status.catalogSourceNamespace` + - If operator not found, provide error with list of available operators + +4. **Create Namespace**: + - Check if namespace exists: `oc get namespace {namespace} --ignore-not-found` + - If not exists, create it: + ```bash + oc create namespace {namespace} + ``` + - If exists, inform user and continue + +5. **Create OperatorGroup**: + - Check if OperatorGroup exists in the namespace: + ```bash + oc get operatorgroup -n {namespace} --ignore-not-found + ``` + - If no OperatorGroup exists, create one: + ```yaml + apiVersion: operators.coreos.com/v1 + kind: OperatorGroup + metadata: + name: {namespace}-operatorgroup + namespace: {namespace} + spec: + targetNamespaces: + - {namespace} + ``` + - Save to temporary file and apply: + ```bash + oc apply -f /tmp/operatorgroup-{operator-name}.yaml + ``` + - If OperatorGroup already exists, inform user and continue + +6. **Create Subscription**: + - Parse approval mode from flags (default: Automatic) + - Create Subscription manifest: + ```yaml + apiVersion: operators.coreos.com/v1alpha1 + kind: Subscription + metadata: + name: {operator-name} + namespace: {namespace} + spec: + channel: {channel} + name: {operator-name} + source: {source} + sourceNamespace: openshift-marketplace + installPlanApproval: {Automatic|Manual} + ``` + - Save to temporary file and apply: + ```bash + oc apply -f /tmp/subscription-{operator-name}.yaml + ``` + - Display the created subscription details + - If approval mode is Manual, display informational message: + ``` + ℹ️ InstallPlan approval set to Manual + You will need to manually approve InstallPlans for this operator. + Use: /olm:approve {operator-name} {namespace} + + Reference: https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-approving-operator-upgrades_olm-updating-operators + ``` + +7. **Verify Installation**: + - Wait for InstallPlan to be created: + ```bash + oc get installplan -n {namespace} -l operators.coreos.com/operator={operator-name} + ``` + - If approval mode is Manual, check if InstallPlan needs approval: + ```bash + oc get installplan -n {namespace} -o json | jq '.items[] | select(.spec.approved==false)' + ``` + - If Manual and not approved, display message: + ``` + ⏸️ InstallPlan created but requires manual approval + + InstallPlan: {installplan-name} + To approve: /olm:approve {operator-name} {namespace} + Or manually: oc patch installplan {installplan-name} -n {namespace} \ + --type merge --patch '{"spec":{"approved":true}}' + + Waiting for approval... + ``` + - Wait for CSV to be created and reach "Succeeded" phase: + ```bash + oc get csv -n {namespace} -w + ``` + - Use a timeout of 5 minutes for the installation to complete (10 minutes if Manual approval) + - Poll every 10 seconds to check CSV status + - Display progress updates to the user + +8. **Display Results**: + - Show the installed operator's CSV name and version + - Show the operator deployment status: + ```bash + oc get deployments -n {namespace} + ``` + - List any pods created by the operator: + ```bash + oc get pods -n {namespace} + ``` + - Display success message with next steps or usage instructions + +9. **Cleanup Temporary Files**: + - Remove temporary YAML files created during installation: + ```bash + rm -f /tmp/operatorgroup-{operator-name}.yaml /tmp/subscription-{operator-name}.yaml + ``` + +## Return Value +- **Success**: Operator installed successfully with details about the CSV, deployments, and pods +- **Error**: Installation failed with specific error message and troubleshooting suggestions +- **Format**: Structured output showing: + - Namespace created/used + - OperatorGroup status + - Subscription created + - CSV status and version + - Deployment and pod status + +## Examples + +1. **Install cert-manager-operator with defaults**: + ``` + /olm:install openshift-cert-manager-operator + ``` + This will: + - Create namespace `cert-manager-operator` + - Discover default channel from PackageManifest + - Use `redhat-operators` catalog source + - Install the operator + +2. **Install cert-manager-operator with custom namespace**: + ``` + /olm:install openshift-cert-manager-operator my-cert-manager + ``` + This will install the operator in the `my-cert-manager` namespace. + +3. **Install with specific channel**: + ``` + /olm:install openshift-cert-manager-operator cert-manager-operator stable-v1 + ``` + This will install from the `stable-v1` channel. + +4. **Install from community catalog**: + ``` + /olm:install prometheus community-operators stable community-operators + ``` + This will install Prometheus from the community-operators catalog. + +5. **Install Red Hat Advanced Cluster Security**: + ``` + /olm:install rhacs-operator rhacs-operator stable + ``` + +6. **Install with manual approval mode**: + ``` + /olm:install openshift-cert-manager-operator cert-manager-operator stable-v1 redhat-operators --approval=Manual + ``` + This will install the operator but require manual approval for all upgrades. + +7. **Install with all parameters specified**: + ``` + /olm:install external-secrets-operator eso-operator stable-v0.10 redhat-operators --approval=Automatic + ``` + +## Arguments +- **$1** (operator-name): The name of the operator to install (required) + - Example: "openshift-cert-manager-operator" + - Must match the name in the operator's PackageManifest +- **$2** (namespace): Target namespace for the operator installation (optional) + - Default: `{operator-name}` (operator name without "openshift-" prefix if present) + - Example: "cert-manager-operator" +- **$3** (channel): Subscription channel (optional) + - Default: Auto-discovered from PackageManifest's default channel + - Example: "stable-v1", "tech-preview", "stable" +- **$4** (source): CatalogSource name (optional) + - Default: "redhat-operators" + - Other options: "certified-operators", "community-operators", "redhat-marketplace" +- **$5+** (flags): Optional flags + - `--approval=Automatic|Manual`: InstallPlan approval mode + - **Automatic** (default): Operator upgrades are automatically installed without user intervention + - **Manual**: Operator upgrades require explicit approval. Useful for: + - Production environments requiring change control + - Testing upgrades before applying + - Preventing unexpected operator updates + - Reference: https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-approving-operator-upgrades_olm-updating-operators + +## Notes + +- **Automatic Channel Discovery**: If no channel is specified, the command automatically discovers and uses the operator's default channel from its PackageManifest +- **Namespace Convention**: By default, operators are installed in a namespace following the pattern `{operator-name}-operator` +- **OperatorGroup Scope**: The created OperatorGroup targets only the installation namespace for better isolation +- **InstallPlan Approval**: Set to "Automatic" by default for seamless installation. Can be changed to "Manual" using `--approval=Manual` flag +- **Manual Approval Mode**: When using `--approval=Manual`: + - Initial installation may require manual approval of the InstallPlan + - All future upgrades will require explicit approval via `/olm:approve` command + - Provides better control over operator updates in production environments +- **Verification Timeout**: The command waits up to 5 minutes for the operator to install successfully (10 minutes for manual approval mode) +- **Cleanup**: Temporary YAML files are automatically removed after installation + +## Troubleshooting + +- **Operator not found**: Run `oc get packagemanifests -n openshift-marketplace` to see available operators +- **Permission denied**: Ensure you have cluster-admin privileges or the necessary RBAC permissions +- **Installation timeout**: Check the InstallPlan and CSV status manually: + ```bash + oc get installplan -n {namespace} + oc get csv -n {namespace} + oc describe csv -n {namespace} + ``` +- **Operator pod not starting**: Check pod logs: + ```bash + oc logs -n {namespace} deployment/{operator-deployment} + ``` + diff --git a/commands/list.md b/commands/list.md new file mode 100644 index 0000000..f93abae --- /dev/null +++ b/commands/list.md @@ -0,0 +1,174 @@ +--- +description: List installed operators in the cluster +argument-hint: [namespace] [--all-namespaces] +--- + +## Name +olm:list + +## Synopsis +``` +/olm:list [namespace] [--all-namespaces] +``` + +## Description +The `olm:list` command lists all installed operators in an OpenShift cluster, showing their status, version, and namespace. This command provides a quick overview of the operator landscape in your cluster. + +This command helps you: +- Discover what operators are currently installed +- Check operator versions and status at a glance +- Identify operators that may need attention (failed, upgrading, etc.) +- Get a comprehensive view across namespaces + +The command presents information in an easy-to-read table format with key details about each operator's ClusterServiceVersion (CSV) and Subscription. + +## Implementation + +The command performs the following steps: + +1. **Parse Arguments**: + - `$1`: Namespace (optional) - Target namespace to list operators from + - `$2`: Flag (optional): + - `--all-namespaces` or `-A`: List operators across all namespaces (default behavior if no namespace specified) + +2. **Prerequisites Check**: + - Verify `oc` CLI is installed: `which oc` + - Verify cluster access: `oc whoami` + - If not installed or not authenticated, provide clear instructions + +3. **Determine Scope**: + - If namespace is specified: List operators only in that namespace + - If `--all-namespaces` flag or no arguments: List operators cluster-wide + - Default behavior: Show all operators across all namespaces + +4. **Fetch Operator Data**: + - Get all ClusterServiceVersions (CSVs): + ```bash + # For specific namespace + oc get csv -n {namespace} -o json + + # For all namespaces + oc get csv --all-namespaces -o json + ``` + - Get all Subscriptions: + ```bash + # For specific namespace + oc get subscription -n {namespace} -o json + + # For all namespaces + oc get subscription --all-namespaces -o json + ``` + +5. **Parse and Correlate Data**: + - For each CSV, extract: + - Name: `.metadata.name` + - Namespace: `.metadata.namespace` + - Display Name: `.spec.displayName` + - Version: `.spec.version` + - Phase/Status: `.status.phase` (e.g., "Succeeded", "Installing", "Failed") + - Install Time: `.metadata.creationTimestamp` + - For each Subscription, extract: + - Operator Name: `.spec.name` + - Channel: `.spec.channel` + - Source: `.spec.source` + - Installed CSV: `.status.installedCSV` + - Current CSV: `.status.currentCSV` + - Correlate Subscriptions with CSVs to show complete operator information + +6. **Format Output as Table**: + Create a formatted table with columns: + ``` + NAMESPACE OPERATOR NAME VERSION STATUS CHANNEL SOURCE + cert-manager-operator cert-manager-operator v1.13.1 Succeeded stable-v1 redhat-operators + external-secrets-operator external-secrets-operator v0.10.5 Succeeded stable-v0.10 redhat-operators + openshift-pipelines openshift-pipelines-operator-rh v1.14.4 Succeeded latest redhat-operators + ``` + +7. **Add Summary Statistics**: + - Total operators installed: X + - By status: + - Succeeded: X + - Installing: X + - Upgrading: X + - Failed: X + - By catalog source: + - redhat-operators: X + - certified-operators: X + - community-operators: X + - custom catalogs: X + +8. **Highlight Issues** (if any): + - List operators with status other than "Succeeded": + ``` + ⚠️ Operators requiring attention: + - namespace/operator-name: Failed (reason: ...) + - namespace/operator-name: Installing (waiting for...) + ``` + +9. **Provide Actionable Suggestions**: + - If operators are in "Failed" state, suggest: `/olm:status {operator-name} {namespace}` for details + - If no operators found, suggest: `/olm:search {operator-name}` to find available operators + - If upgrades available, suggest: `/olm:status {operator-name}` to check upgrade options + +## Return Value +- **Success**: Formatted table of installed operators with summary statistics +- **Empty**: No operators found message with suggestion to install operators +- **Error**: Connection or permission error with troubleshooting guidance +- **Format**: + - Table with columns: NAMESPACE, OPERATOR NAME, VERSION, STATUS, CHANNEL, SOURCE + - Summary statistics + - Warnings for operators requiring attention + +## Examples + +1. **List all operators cluster-wide**: + ``` + /olm:list + ``` + +2. **List operators in a specific namespace**: + ``` + /olm:list cert-manager-operator + `` + +## Arguments +- **$1** (namespace): Target namespace to list operators from (optional) + - If not provided, lists operators from all namespaces + - Example: "cert-manager-operator" +- **$2** (flag): Optional flag (optional) + - `--all-namespaces` or `-A`: Explicitly list all operators cluster-wide + - Default behavior if no namespace is provided + +## Notes + +- **Performance**: For large clusters with many operators, the command may take a few seconds to collect all data +- **Status Values**: Common CSV status values include: + - `Succeeded`: Operator is healthy and running + - `Installing`: Operator is being installed + - `Upgrading`: Operator is being upgraded + - `Failed`: Operator installation or operation failed + - `Replacing`: Old version being replaced + - `Deleting`: Operator is being removed +- **Correlation**: The command correlates Subscriptions with CSVs to provide complete operator information +- **Sorting**: Results are sorted by namespace, then by operator name + +## Troubleshooting + +- **Permission denied**: Ensure you have permissions to list CSVs and Subscriptions: + ```bash + oc auth can-i list csv --all-namespaces + oc auth can-i list subscription --all-namespaces + ``` +- **Slow response**: For large clusters, use namespace-specific queries to speed up results +- **Missing operators**: Some operators may not have Subscriptions if installed manually; these will still appear based on CSV presence +- **Version mismatch**: If Subscription's `installedCSV` differs from `currentCSV`, an upgrade may be in progress + +## Related Commands + +- `/olm:status [namespace]` - Get detailed status of a specific operator +- `/olm:install ` - Install a new operator +- `/olm:search ` - Search for available operators in catalogs + +## Additional Resources +- [Operator Lifecycle Manager Documentation](https://olm.operatorframework.io/) + diff --git a/commands/opm.md b/commands/opm.md new file mode 100644 index 0000000..bd30fe0 --- /dev/null +++ b/commands/opm.md @@ -0,0 +1,359 @@ +--- +description: Execute opm (Operator Package Manager) commands for building and managing operator catalogs +argument-hint: [arguments...] +--- + +## Name +olm:opm + +## Synopsis +```bash +/olm:opm build-index-image [--cacheless] [--arch=] [--base-image=] [--builder-image=] +/olm:opm build-semver-index-image [--cacheless] [--arch=] [--base-image=] [--builder-image=] +/olm:opm generate-semver-template [--output=] [--major=true|false] [--minor=true|false] +/olm:opm list packages +/olm:opm list channels [package-name] +/olm:opm list bundles [package-name] +``` + +## Description +The `olm:opm` command provides a unified interface to `opm` (Operator Package Manager) operations for building and managing operator catalog indexes. It supports building catalog indexes, generating semver templates, and querying catalog contents. + +## Arguments +- `$1`: **action** - The action to perform: + - `build-index-image`: Build an index from an existing catalog directory + - `build-semver-index-image`: Build an index from a semver template + - `generate-semver-template`: Generate a semver template file + - `list`: List catalog contents (requires second argument: `packages`, `channels`, or `bundles`) +- `$2+`: Additional arguments specific to each action (see Actions section below) + +## Actions + +### build-index-image +Build an operator catalog index image from an existing catalog directory. + +**Synopsis:** +```bash +/olm:opm build-index-image [--cacheless] [--arch=] [--base-image=] [--builder-image=] +``` + +**Arguments:** +- `$2`: **catalog-path** - Path to the catalog directory containing the index configuration +- `$3`: **index-image-tag** - Full image tag for the resulting index image (e.g., `quay.io/myorg/mycatalog:v1.0.0`) +- `--cacheless`: Optional flag to build a cacheless image (uses `scratch` as base image; `--base-image` and `--builder-image` are ignored when this is set) +- `--arch=`: Optional architecture specification (default: `multi` for multi-arch build; can specify single arch like `amd64`, `arm64`, `ppc64le`, `s390x`) +- `--base-image=`: Optional base image for the index (default: `quay.io/operator-framework/opm:latest`; ignored if `--cacheless` is set) +- `--builder-image=`: Optional builder image (default: `quay.io/operator-framework/opm:latest`; ignored if `--cacheless` is set) + +**Examples:** +```bash +/olm:opm build-index-image catalog quay.io/myorg/mycatalog:v1.0.0 +/olm:opm build-index-image catalog quay.io/myorg/mycatalog:v1.0.0 --cacheless +/olm:opm build-index-image catalog quay.io/myorg/mycatalog:v1.0.0 --arch=amd64 +``` + +### build-semver-index-image +Build a multi-architecture operator catalog index image using the semver template format. + +**Synopsis:** +```bash +/olm:opm build-semver-index-image [--cacheless] [--arch=] [--base-image=] [--builder-image=] +``` + +**Arguments:** +- `$2`: **semver-template-file** - Path to the semver template configuration file (e.g., `catalog-config.yaml`) +- `$3`: **index-image-tag** - Full image tag for the resulting index image (e.g., `quay.io/myorg/mycatalog:v1.0.0`) +- `--cacheless`: Optional flag to build a cacheless image (uses `scratch` as base image; `--base-image` and `--builder-image` are ignored when this is set) +- `--arch=`: Optional architecture specification (default: `multi` for multi-arch build; can specify single arch like `amd64`, `arm64`, `ppc64le`, `s390x`) +- `--base-image=`: Optional base image for the index (default: `quay.io/operator-framework/opm:latest`; ignored if `--cacheless` is set) +- `--builder-image=`: Optional builder image (default: `quay.io/operator-framework/opm:latest`; ignored if `--cacheless` is set) + +**Examples:** +```bash +/olm:opm build-semver-index-image catalog-config.yaml quay.io/myorg/mycatalog:v1.0.0 +/olm:opm build-semver-index-image catalog-config.yaml quay.io/myorg/mycatalog:v1.0.0 --cacheless +/olm:opm build-semver-index-image catalog-config.yaml quay.io/myorg/mycatalog:v1.0.0 --arch=amd64 +/olm:opm build-semver-index-image catalog-config.yaml quay.io/myorg/mycatalog:v1.0.0 --arch=multi +``` + +### generate-semver-template +Generate a semver template configuration file for building operator catalogs. + +**Synopsis:** +```bash +/olm:opm generate-semver-template [--output=] [--major=true|false] [--minor=true|false] +``` + +**Arguments:** +- `$2`: **bundle-list** - Comma-separated list of bundle image references (e.g., `quay.io/org/bundle:v1.0.0,quay.io/org/bundle:v1.0.1`) +- `--output=`: Optional output file path (default: `catalog-semver-config.yaml` in current directory) +- `--major=true|false`: Optional flag to generate major version channels (default: `true`) +- `--minor=true|false`: Optional flag to generate minor version channels (default: `false`) + +**Examples:** +```bash +/olm:opm generate-semver-template quay.io/org/bundle:v1.0.0,quay.io/org/bundle:v1.0.1 +/olm:opm generate-semver-template quay.io/org/bundle:v1.0.0,quay.io/org/bundle:v1.0.1 --output=my-catalog.yaml +/olm:opm generate-semver-template quay.io/org/bundle:v1.0.0,quay.io/org/bundle:v1.1.0 --minor=true +``` + +### list packages +List all operator packages available in a catalog index. + +**Synopsis:** +```bash +/olm:opm list packages +``` + +**Arguments:** +- `$2`: **list** - Must be "list" +- `$3`: **packages** - Must be "packages" +- `$4`: **index-ref** - Catalog index reference, either: + - Image tag: `quay.io/myorg/mycatalog:v1.0.0` + - Directory path: `./catalog` or `/path/to/catalog` + +**Examples:** +```bash +/olm:opm list packages quay.io/olmqe/nginx8518-index-test:v1 +/olm:opm list packages ./catalog +``` + +### list channels +List channels for operator packages in a catalog index. + +**Synopsis:** +```bash +/olm:opm list channels [package-name] +``` + +**Arguments:** +- `$2`: **list** - Must be "list" +- `$3`: **channels** - Must be "channels" +- `$4`: **index-ref** - Catalog index reference (image tag or directory path) +- `$5`: **package-name** (Optional) - Name of a specific package to list channels for + +**Examples:** +```bash +/olm:opm list channels quay.io/olmqe/nginx8518-index-test:v1 +/olm:opm list channels quay.io/olmqe/nginx8518-index-test:v1 nginx85187 +/olm:opm list channels ./catalog +``` + +### list bundles +List bundles for operator packages in a catalog index. + +**Synopsis:** +```bash +/olm:opm list bundles [package-name] +``` + +**Arguments:** +- `$2`: **list** - Must be "list" +- `$3`: **bundles** - Must be "bundles" +- `$4`: **index-ref** - Catalog index reference (image tag or directory path) +- `$5`: **package-name** (Optional) - Name of a specific package to list bundles for + +**Examples:** +```bash +/olm:opm list bundles quay.io/olmqe/nginx8518-index-test:v1 +/olm:opm list bundles quay.io/olmqe/nginx8518-index-test:v1 nginx85187 +/olm:opm list bundles ./catalog +``` + +## Implementation + +### Step 1: Parse Action +- Extract the action from `$1` +- Validate the action is one of: `build-index-image`, `build-semver-index-image`, `generate-semver-template`, `list` +- If invalid action, display error with available actions + +### Step 2: Check Prerequisites +Verify required tools are installed: +- Check for `opm`: `which opm` + - If not found, provide installation instructions: +- For build actions, also check for `podman`: `which podman` + - If not found, provide installation instructions based on user's platform + +### Step 3: Route to Action Handler +Based on the action, call the appropriate implementation: + +#### For `build-index-image`: +1. **Parse Arguments and Set Defaults** + - Extract catalog path from `$2` + - Extract index image tag from `$3` + - Parse optional flags: `--cacheless`, `--arch`, `--base-image`, `--builder-image` + - Set defaults: arch=`multi`, base-image=`quay.io/operator-framework/opm:latest`, builder-image=`quay.io/operator-framework/opm:latest` + +2. **Verify Catalog Directory** + - Check catalog directory exists: `test -d ` + +3. **Validate Catalog** + ```bash + opm validate + ``` + +4. **Generate Dockerfile** + - If cacheless: `opm generate dockerfile --base-image=scratch` + - If normal: `opm generate dockerfile -b -i ` + +5. **Determine Build Platform** + - If arch=`multi`: `linux/amd64,linux/arm64,linux/ppc64le,linux/s390x` + - Otherwise: `linux/` + +6. **Create Podman Manifest** + ```bash + podman manifest rm 2>/dev/null || true + podman manifest create + ``` + +7. **Build Image** + ```bash + podman build --platform --manifest . -f catalog.Dockerfile + ``` + +8. **Push Manifest** + ```bash + podman manifest push + ``` + +9. **List Bundles in Index** + ```bash + opm alpha list bundles + ``` + +10. **Display Success Message** + +#### For `build-semver-index-image`: +1. **Parse Arguments and Set Defaults** + - Extract semver template file from `$2` + - Extract index image tag from `$3` + - Parse optional flags: `--cacheless`, `--arch`, `--base-image`, `--builder-image` + - Set defaults: arch=`multi`, base-image=`quay.io/operator-framework/opm:latest`, builder-image=`quay.io/operator-framework/opm:latest` + +2. **Verify Template File** + - Check file exists: `test -f ` + +3. **Create Catalog and Render Template** + ```bash + mkdir -p catalog + opm alpha render-template semver -o yaml > catalog/index.yaml + ``` + +4. **Validate Catalog** + ```bash + opm validate catalog + ``` + +5. **Generate Dockerfile** + - If cacheless: `opm generate dockerfile catalog --base-image=scratch` + - If normal: `opm generate dockerfile catalog -b -i ` + +6. **Determine Build Platform** + - If arch=`multi`: `linux/amd64,linux/arm64,linux/ppc64le,linux/s390x` + - Otherwise: `linux/` + +7. **Create Podman Manifest** + ```bash + podman manifest rm 2>/dev/null || true + podman manifest create + ``` + +8. **Build Image** + ```bash + podman build --platform --manifest . -f catalog.Dockerfile + ``` + +9. **Push Manifest** + ```bash + podman manifest push + ``` + +10. **List Bundles in Index** + ```bash + opm alpha list bundles + ``` + +11. **Display Success Message** + +#### For `generate-semver-template`: +1. **Parse Arguments and Set Defaults** + - Extract bundle list from `$2` + - Parse optional flags: `--output`, `--major`, `--minor` + - Set defaults: output=`catalog-semver-config.yaml`, major=`true`, minor=`false` + +2. **Validate Bundle List** + - Split by commas + - Validate each bundle is a valid image reference + +3. **Generate YAML Content** + ```yaml + Schema: olm.semver + GenerateMajorChannels: + GenerateMinorChannels: + Candidate: + Bundles: + - Image: + - Image: + ``` + +4. **Write Template File** + - Check if file exists and confirm overwrite if needed + - Write YAML content + +5. **Validate Generated File** + - Read back and verify YAML is well-formed + +6. **Display Success Message** + - Show file path, bundles included, settings + - Suggest next step: `/olm:opm build-semver-index-image ` + +#### For `list`: +1. **Parse List Type** + - Extract list type from `$2` (must be `packages`, `channels`, or `bundles`) + - If invalid, display error with available types + +2. **Parse Index Reference and Optional Package** + - Extract index-ref from `$3` + - Extract optional package-name from `$4` (for channels and bundles) + +3. **Determine Reference Type** + - Check if directory: `test -d ` + +4. **Execute List Command** + - For packages: `opm alpha list packages ` + - For channels: `opm alpha list channels [package-name]` + - For bundles: `opm alpha list bundles [package-name]` + +5. **Display Results** + - Show the output with appropriate formatting + - Display count of items found + +## Return Value + +**Format**: Varies by action + +- **build-index-image / build-semver-index-image**: Success message with image tag, architectures, and bundle list +- **generate-semver-template**: Success message with file path and configuration details +- **list**: Table or list of catalog contents + +On failure, displays: +- Clear error message indicating which step/action failed +- Relevant tool output for debugging +- Suggestions for resolution + +## Notes + +- Ensure you are authenticated to container registries before building/pushing images (use `podman login`) +- For build operations, the `catalog.Dockerfile` is created in the current working directory +- Multi-architecture builds can be time-consuming +- Cacheless builds result in smaller images and use `scratch` as the base image +- When using `--cacheless`, the `--base-image` and `--builder-image` options are ignored (scratch is always used as base) +- Index references can be either image tags or local directory paths +- Bundle images must be accessible from where you build the catalog +- Image tags should include the full registry hostname (e.g., `quay.io/org/image:tag` not `quay/org/image:tag`) + +## Related Commands + +- `/olm:install` - Install an operator using OLM +- `/olm:catalog` - Manage catalog sources +- `/olm:debug` - Debug OLM issues diff --git a/commands/search.md b/commands/search.md new file mode 100644 index 0000000..72773c6 --- /dev/null +++ b/commands/search.md @@ -0,0 +1,247 @@ +--- +description: Search for available operators in catalog sources +argument-hint: [query] [--catalog ] +--- + +## Name +olm:search + +## Synopsis +``` +/olm:search [query] [--catalog ] +``` + +## Description +The `olm:search` command searches for available operators in the cluster's catalog sources (OperatorHub). It helps you discover operators that can be installed, showing their names, descriptions, versions, channels, and catalog sources. + +This command helps you: +- Find operators by name, description, or keywords +- Discover what operators are available for installation +- View operator details before installing +- Check available versions and channels +- Identify which catalog source contains a specific operator + +The command searches across all available catalog sources (redhat-operators, certified-operators, community-operators, redhat-marketplace, and custom catalogs) and presents results in an easy-to-read format. + +## Implementation + +The command performs the following steps: + +1. **Parse Arguments**: + - `$1`: Query string (optional) - Search term for filtering operators + - If not provided, lists all available operators + - Can be partial name, keyword, or description + - `$2+`: Flags (optional): + - `--catalog `: Limit search to specific catalog source + - `--exact`: Only show exact name matches + - `--installed`: Show only installed operators (combination with /olm:list) + +2. **Prerequisites Check**: + - Verify `oc` CLI is installed: `which oc` + - Verify cluster access: `oc whoami` + - If not installed or not authenticated, provide clear instructions + +3. **Fetch Catalog Data**: + - Get all PackageManifests from openshift-marketplace: + ```bash + oc get packagemanifests -n openshift-marketplace -o json + ``` + - If `--catalog` flag is specified, filter by catalog source: + ```bash + oc get packagemanifests -n openshift-marketplace -o json | jq '.items[] | select(.status.catalogSource=="{catalog-name}")' + ``` + +4. **Parse PackageManifest Data**: + - For each PackageManifest, extract: + - Name: `.metadata.name` + - Display Name: `.status.channels[0].currentCSVDesc.displayName` + - Description: `.status.channels[0].currentCSVDesc.description` + - Provider: `.status.provider.name` + - Catalog Source: `.status.catalogSource` + - Catalog Namespace: `.status.catalogSourceNamespace` + - Default Channel: `.status.defaultChannel` + - All Channels: `.status.channels[].name` + - Latest Version: `.status.channels[] | select(.name==.status.defaultChannel) | .currentCSVDesc.version` + - Categories: `.status.channels[0].currentCSVDesc.annotations["categories"]` + - Capabilities: `.status.channels[0].currentCSVDesc.annotations["capabilities"]` + +5. **Apply Search Filter** (if query provided): + - Case-insensitive search across: + - Operator name (`.metadata.name`) + - Display name (`.status.channels[0].currentCSVDesc.displayName`) + - Description (`.status.channels[0].currentCSVDesc.description`) + - Provider name (`.status.provider.name`) + - Categories + - If `--exact` flag, only match exact operator names + +6. **Sort Results**: + - Primary sort: By catalog source (redhat-operators first, then certified, community, etc.) + - Secondary sort: By operator name alphabetically + +7. **Format Search Results**: + + **A. Summary Header** + ``` + Found X operators matching "{query}" + ``` + + **B. Results List** + For each operator: + ``` + ┌───────────────────────────────────────────────────────────── + │ cert-manager-operator for Red Hat OpenShift + ├───────────────────────────────────────────────────────────── + │ Name: openshift-cert-manager-operator + │ Provider: Red Hat + │ Catalog: redhat-operators + │ Default: stable-v1 + │ Channels: stable-v1, tech-preview-v1.13 + │ Version: v1.13.1 + │ Categories: Security + │ + │ Description: Manages the lifecycle of TLS certificates... + │ + │ Install: /olm:install openshift-cert-manager-operator + └───────────────────────────────────────────────────────────── + ``` + +8. **Group by Catalog** (optional, for better readability): + ``` + ═════════════════════════════════════════════════════════════ + RED HAT OPERATORS (3) + ═════════════════════════════════════════════════════════════ + + [List of operators from redhat-operators] + + ═════════════════════════════════════════════════════════════ + CERTIFIED OPERATORS (1) + ═════════════════════════════════════════════════════════════ + + [List of operators from certified-operators] + + ═════════════════════════════════════════════════════════════ + COMMUNITY OPERATORS (2) + ═════════════════════════════════════════════════════════════ + + [List of operators from community-operators] + ``` + +9. **Provide Installation Guidance**: + - For each operator, show ready-to-use install command: + ``` + To install: /olm:install {operator-name} + ``` + - For operators with specific channel recommendations, note them + +10. **Handle No Results**: + - If no operators match the query: + ``` + No operators found matching "{query}" + + Suggestions: + - Try a broader search term + - List all available operators: /olm:search + - Check specific catalog: /olm:search {query} --catalog redhat-operators + ``` + +11. **Show Popular/Recommended Operators** (if no query provided): + - Highlight commonly used operators: + - cert-manager + - external-secrets-operator + - OpenShift Pipelines + - OpenShift GitOps + - Service Mesh + - etc. + +## Return Value +- **Success**: List of matching operators with detailed information +- **No Results**: Message indicating no matches with suggestions +- **Error**: Connection or permission error with troubleshooting guidance +- **Format**: + - Summary of search results + - Detailed operator information cards + - Installation commands for each operator + - Grouped by catalog source + +## Examples + +1. **Search for cert-manager operator**: + ``` + /olm:search cert-manager + ``` + +2. **Search for secrets-related operators**: + ``` + /olm:search secrets + ``` + Output listing multiple operators related to secrets management. + +3. **List all operators** (no query): + ``` + /olm:search + ``` + +4. **Search in specific catalog**: + ``` + /olm:search prometheus --catalog community-operators + ``` + Output showing only Prometheus-related operators from community-operators catalog. + +5. **Exact name match**: + ``` + /olm:search external-secrets-operator --exact + ``` + Output showing only the exact match for external-secrets-operator. + +6. **Search for operators by category** (e.g., security): + ``` + /olm:search security + ``` + Output listing all security-related operators. + +## Arguments +- **$1** (query): Search term to filter operators (optional) + - If not provided, lists all available operators (may be very long) + - Searches across name, display name, description, provider + - Case-insensitive partial matching + - Example: "cert", "secrets", "security", "monitoring" +- **$2+** (flags): Optional flags + - `--catalog `: Limit search to specific catalog + - Values: "redhat-operators", "certified-operators", "community-operators", "redhat-marketplace", or custom catalog name + - `--exact`: Only show exact name matches (no partial matching) + - `--installed`: Show only operators that are currently installed + + +## Troubleshooting + +- **No operators found**: + - Verify catalog sources are available: + ```bash + oc get catalogsources -n openshift-marketplace + ``` + - Check if catalog sources are healthy: + ```bash + oc get pods -n openshift-marketplace + ``` +- **Slow search**: + - Use more specific search terms + - Search in specific catalog: `--catalog redhat-operators` +- **Incomplete information**: + - Some operators may have limited metadata in their PackageManifest +- **Permission denied**: + - Ensure you can read PackageManifests: + ```bash + oc auth can-i list packagemanifests -n openshift-marketplace + ``` + +## Related Commands + +- `/olm:install ` - Install an operator found in search results +- `/olm:list` - List installed operators +- `/olm:status ` - Check status of an installed operator + +## Additional Resources + +- [OperatorHub.io](https://operatorhub.io/) - Browse operators online +- [Operator Lifecycle Manager Documentation](https://olm.operatorframework.io/) + diff --git a/commands/status.md b/commands/status.md new file mode 100644 index 0000000..603ceb0 --- /dev/null +++ b/commands/status.md @@ -0,0 +1,351 @@ +--- +description: Get detailed status and health information for an operator +argument-hint: [namespace] +--- + +## Name +olm:status + +## Synopsis +``` +/olm:status [namespace] +``` + +## Description +The `olm:status` command provides comprehensive health and status information for a specific operator in an OpenShift cluster. It displays detailed information about the operator's CSV, Subscription, InstallPlan, deployments, and pods to help diagnose issues and verify proper operation. + +This command helps you: +- Check if an operator is running correctly +- Diagnose installation or upgrade problems +- View operator version and available updates +- Inspect operator deployments and pods +- Review recent events and conditions +- Identify resource issues or configuration problems + +## Implementation + +The command performs the following steps: + +1. **Parse Arguments**: + - `$1`: Operator name (required) - Name of the operator to inspect + - `$2`: Namespace (optional) - Namespace where operator is installed + - If not provided, searches for the operator across all namespaces + - If multiple instances found, prompts user to specify namespace + +2. **Prerequisites Check**: + - Verify `oc` CLI is installed: `which oc` + - Verify cluster access: `oc whoami` + - If not installed or not authenticated, provide clear instructions + +3. **Locate Operator**: + - If namespace provided, verify operator exists in that namespace: + ```bash + oc get subscription {operator-name} -n {namespace} --ignore-not-found + ``` + - If no namespace provided, search across all namespaces: + ```bash + oc get subscription --all-namespaces -o json | jq -r '.items[] | select(.spec.name=="{operator-name}") | .metadata.namespace' + ``` + - If not found, display error with suggestions + - If multiple instances found, list them and ask user to specify namespace + +4. **Gather Subscription Information**: + - Get Subscription details: + ```bash + oc get subscription {operator-name} -n {namespace} -o json + ``` + - Extract: + - Channel: `.spec.channel` + - Install Plan Approval: `.spec.installPlanApproval` + - Source: `.spec.source` + - Source Namespace: `.spec.sourceNamespace` + - Installed CSV: `.status.installedCSV` + - Current CSV: `.status.currentCSV` + - State: `.status.state` + - Conditions: `.status.conditions[]` + +5. **Gather CSV Information**: + - Get CSV details: + ```bash + oc get csv {csv-name} -n {namespace} -o json + ``` + - Extract: + - Display Name: `.spec.displayName` + - Version: `.spec.version` + - Phase: `.status.phase` + - Message: `.status.message` + - Reason: `.status.reason` + - Creation Time: `.metadata.creationTimestamp` + - Conditions: `.status.conditions[]` + - Requirements: `.status.requirementStatus[]` + +6. **Gather InstallPlan Information**: + - Get related InstallPlans: + ```bash + oc get installplan -n {namespace} -o json + ``` + - Find InstallPlans related to this operator by checking `.spec.clusterServiceVersionNames` + - Extract: + - Name: `.metadata.name` + - Phase: `.status.phase` (e.g., "Complete", "Installing", "Failed") + - Approved: `.spec.approved` + - Bundle Resources: `.status.bundleLookups[]` + +7. **Gather Deployment Information**: + - Get deployments owned by the CSV: + ```bash + oc get deployments -n {namespace} -o json + ``` + - Filter deployments with owner reference to the CSV + - For each deployment, extract: + - Name: `.metadata.name` + - Ready Replicas: `.status.readyReplicas` / `.status.replicas` + - Available: `.status.availableReplicas` + - Conditions: `.status.conditions[]` + +8. **Gather Pod Information**: + - Get pods managed by operator deployments: + ```bash + oc get pods -n {namespace} -l app={operator-label} -o json + ``` + - For each pod, extract: + - Name: `.metadata.name` + - Status: `.status.phase` + - Ready: Count of ready containers vs total + - Restarts: Sum of `.status.containerStatuses[].restartCount` + - Age: Calculate from `.metadata.creationTimestamp` + +9. **Check for Recent Events**: + - Get events related to the operator: + ```bash + oc get events -n {namespace} --field-selector involvedObject.name={csv-name} --sort-by='.lastTimestamp' + ``` + - Show last 5-10 events, especially warnings and errors + +10. **Check for Available Updates**: + - Get PackageManifest to check for newer versions: + ```bash + oc get packagemanifest {operator-name} -n openshift-marketplace -o json + ``` + - Extract current channel information: + - Current channel from Subscription: `.spec.channel` + - Latest version in current channel + - Available channels + - Compare installed CSV version with latest available version + - Check for pending InstallPlans: + ```bash + oc get installplan -n {namespace} -o json | jq '.items[] | select(.spec.approved==false)' + ``` + - Determine if manual approval is required: + ```bash + oc get subscription {operator-name} -n {namespace} -o jsonpath='{.spec.installPlanApproval}' + ``` + - Reference: https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-approving-operator-upgrades_olm-updating-operators + +11. **Format Comprehensive Report**: + Create a structured report with sections: + + **A. Overview** + ``` + Operator: {display-name} + Name: {operator-name} + Namespace: {namespace} + Version: {version} + Status: {phase} + ``` + + **B. Subscription** + ``` + Channel: {channel} + Source: {source} + Install Plan Approval: {approval-mode} (Automatic|Manual) + State: {state} + Installed CSV: {installed-csv-name} + Current CSV: {current-csv-name} + ``` + + **C. ClusterServiceVersion (CSV)** + ``` + Name: {csv-name} + Phase: {phase} + Message: {message} + Requirements: [list requirements status] + ``` + + **D. InstallPlan** + ``` + Name: {installplan-name} + Phase: {phase} (Complete|Installing|RequiresApproval|Failed) + Approved: {true/false} + + [If Phase=RequiresApproval and Approved=false:] + ⚠️ Manual approval required for installation/upgrade + To approve: /olm:approve {operator-name} {namespace} + Or manually: oc patch installplan {installplan-name} -n {namespace} \ + --type merge --patch '{"spec":{"approved":true}}' + ``` + + **E. Deployments** + ``` + NAME READY AVAILABLE AGE + cert-manager 1/1 1 5d + cert-manager-webhook 1/1 1 5d + ``` + + **F. Pods** + ``` + NAME STATUS READY RESTARTS AGE + cert-manager-7d4f8f8b4-abcde Running 1/1 0 5d + cert-manager-webhook-6b7c9d5f-fghij Running 1/1 0 5d + ``` + + **G. Recent Events** (if any warnings/errors) + ``` + 5m Warning InstallPlanFailed Failed to install... + 2m Normal InstallSucceeded Successfully installed + ``` + + **H. Update Information** + ``` + Current Version: {current-version} + Latest Available: {latest-version} (in channel: {channel}) + Update Status: [Up to date | Update available | Unknown] + + Available Channels: + - stable-v1 (latest: v1.13.1) + - tech-preview-v1.14 (latest: v1.14.0) + + [If update available in current channel:] + 📦 Update available: {current-version} → {latest-version} + To update: /olm:upgrade {operator-name} {namespace} + + [If newer version in different channel:] + 💡 Newer version available in channel '{new-channel}': {newer-version} + To switch channels: /olm:upgrade {operator-name} {namespace} --channel={new-channel} + ``` + + **I. Health Summary** + ``` + ✅ Operator is healthy and running + ⚠️ Operator has warnings (see events) + ❌ Operator is not healthy (see details) + 🔄 Operator is upgrading (Current: {old-version} → Target: {new-version}) + ⏸️ Operator upgrade pending manual approval + ``` + +12. **Provide Actionable Recommendations**: + - If operator is failed: + ``` + ❌ Operator failed: {reason} + + Troubleshooting steps: + 1. Check operator logs: oc logs -n {namespace} deployment/{operator-deployment} + 2. Check events: oc get events -n {namespace} --sort-by='.lastTimestamp' + 3. Check CSV conditions: oc describe csv {csv-name} -n {namespace} + 4. Run diagnostics: /olm:diagnose {operator-name} {namespace} + ``` + - If upgrade available: + ``` + 📦 Update available: {current} → {latest} + To upgrade: /olm:upgrade {operator-name} {namespace} + ``` + - If pods are crashing: + ``` + ⚠️ Pods are crashing (restarts: {count}) + Check logs: oc logs -n {namespace} {pod-name} + Previous logs: oc logs -n {namespace} {pod-name} --previous + ``` + - If InstallPlan requires approval: + ``` + ⏸️ InstallPlan requires manual approval + + InstallPlan: {installplan-name} + Version: {target-version} + + To approve: /olm:approve {operator-name} {namespace} + Or manually: oc patch installplan {installplan-name} -n {namespace} \ + --type merge --patch '{"spec":{"approved":true}}' + + To switch to automatic approvals: + oc patch subscription {operator-name} -n {namespace} \ + --type merge --patch '{"spec":{"installPlanApproval":"Automatic"}}' + ``` + - If operator is upgrading: + ``` + 🔄 Operator upgrade in progress: {old-version} → {new-version} + Monitor progress: watch oc get csv,installplan -n {namespace} + ``` + +## Return Value +- **Success**: Comprehensive status report with all operator details +- **Not Found**: Error message with suggestions to list operators or check spelling +- **Multiple Instances**: List of namespaces where operator is installed +- **Error**: Connection or permission error with troubleshooting guidance +- **Format**: Multi-section report with: + - Overview + - Subscription details + - CSV status + - InstallPlan status + - Deployment status + - Pod status + - Recent events + - Health summary + - Recommendations + +## Examples + +1. **Check status of cert-manager operator**: + ``` + /olm:status openshift-cert-manager-operator + ``` + +2. **Check status with specific namespace**: + ``` + /olm:status external-secrets-operator external-secrets-operator + ``` + +## Arguments +- **$1** (operator-name): Name of the operator to inspect (required) + - Example: "openshift-cert-manager-operator" + - Must match the operator's Subscription name +- **$2** (namespace): Namespace where operator is installed (optional) + - If not provided, searches all namespaces + - Example: "cert-manager-operator" + +## Notes + +- **Comprehensive View**: This command aggregates data from multiple resources (Subscription, CSV, InstallPlan, Deployments, Pods) for a complete picture +- **Permissions**: Requires read permissions for subscriptions, csvs, installplans, deployments, pods, and events in the target namespace +- **Performance**: May take a few seconds to gather all information for large operators with many resources +- **Auto-Discovery**: If namespace is not specified, the command automatically finds the operator across all namespaces +- **Health Checks**: The command evaluates multiple factors to determine overall operator health +- **Troubleshooting**: Provides context-aware recommendations based on detected issues + +## Troubleshooting + +- **Operator not found**: + - Verify operator name: `oc get subscriptions --all-namespaces | grep {operator-name}` + - List all operators: `/olm:list` +- **Multiple instances found**: + - Specify namespace explicitly: `/olm:status {operator-name} {namespace}` +- **Permission denied**: + - Ensure you have read permissions in the target namespace + - Check: `oc auth can-i get csv -n {namespace}` +- **Incomplete information**: + - Some operators may not have all resources (e.g., manually installed CSVs without Subscriptions) + +## Related Commands + +- `/olm:list` - List all installed operators +- `/olm:install ` - Install a new operator +- `/olm:uninstall ` - Uninstall an operator +- `/olm:upgrade ` - Upgrade an operator +- `/olm:approve ` - Approve pending InstallPlans +- `/olm:diagnose ` - Diagnose and fix operator issues + +## Additional Resources + +- [Viewing Operator Status](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-status-viewing-operator-status) +- [Updating Installed Operators](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-updating-operators) +- [Troubleshooting Operator Issues](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-troubleshooting-operator-issues) + diff --git a/commands/uninstall.md b/commands/uninstall.md new file mode 100644 index 0000000..36c3ec1 --- /dev/null +++ b/commands/uninstall.md @@ -0,0 +1,392 @@ +--- +description: Uninstall a day-2 operator and optionally remove its resources +argument-hint: [namespace] [--remove-crds] [--remove-namespace] +--- + +## Name +olm:uninstall + +## Synopsis +``` +/olm:uninstall [namespace] [--remove-crds] [--remove-namespace] +``` + +## Description +The `olm:uninstall` command uninstalls a day-2 operator from an OpenShift cluster by removing its Subscription, ClusterServiceVersion (CSV), and optionally its Custom Resource Definitions (CRDs) and namespace. + +This command provides a comprehensive uninstallation workflow: +- Removes the operator's Subscription +- Deletes the ClusterServiceVersion (CSV) +- Optionally removes operator-managed deployments +- Optionally deletes Custom Resource Definitions (CRDs) +- Optionally removes the operator's namespace +- Provides detailed feedback on each step + +The command is designed to safely clean up operators installed via OLM, with optional flags for thorough cleanup of all operator-related resources. + +## Implementation + +The command performs the following steps: + +1. **Parse Arguments**: + - `$1`: Operator name (required) - The name of the operator to uninstall + - `$2`: Namespace (optional) - The namespace where operator is installed. If not provided, defaults to `{operator-name}-operator` + - `$3+`: Flags (optional): + - `--remove-crds`: Remove Custom Resource Definitions after uninstalling + - `--remove-namespace`: Remove the operator's namespace after cleanup + - `--force`: Skip confirmation prompts + +2. **Prerequisites Check**: + - Verify `oc` CLI is installed: `which oc` + - Verify cluster access: `oc whoami` + - Check if user has cluster-admin or sufficient privileges + +3. **Verify Operator Installation**: + - Check if namespace exists: + ```bash + oc get namespace {namespace} --ignore-not-found + ``` + - Check if subscription exists: + ```bash + oc get subscription {operator-name} -n {namespace} --ignore-not-found + ``` + - If not found, display error: "Operator {operator-name} is not installed in namespace {namespace}" + - List what will be uninstalled + +4. **Display Uninstallation Plan**: + - Show operator details: + ```bash + oc get subscription {operator-name} -n {namespace} -o yaml + oc get csv -n {namespace} + ``` + - Display what will be removed: + - Subscription name and namespace + - CSV name and version + - Deployments (if any) + - CRDs (if `--remove-crds` flag is set) + - Namespace (if `--remove-namespace` flag is set) + +5. **Request User Confirmation** (unless `--force` flag is set): + - Display warning: + ``` + WARNING: You are about to uninstall {operator-name} from namespace {namespace}. + This will remove: + - Subscription: {subscription-name} + - ClusterServiceVersion: {csv-name} + - Operator deployments + [- Custom Resource Definitions (if --remove-crds is set)] + [- Namespace {namespace} (if --remove-namespace is set)] + + Are you sure you want to continue? (yes/no) + ``` + - Wait for user confirmation + - If user says no, abort operation + +6. **Delete Subscription**: + - Remove the operator's subscription: + ```bash + oc delete subscription {operator-name} -n {namespace} + ``` + - Verify deletion: + ```bash + oc get subscription {operator-name} -n {namespace} --ignore-not-found + ``` + - Display result + +7. **Delete ClusterServiceVersion (CSV)**: + - Get the CSV name: + ```bash + oc get csv -n {namespace} -o jsonpath='{.items[?(@.spec.displayName contains "{operator-name}")].metadata.name}' + ``` + - Delete the CSV: + ```bash + oc delete csv {csv-name} -n {namespace} + ``` + - This will automatically remove operator deployments + - Verify CSV is deleted: + ```bash + oc get csv -n {namespace} --ignore-not-found + ``` + +8. **Remove Operator Deployments** (if still present): + - List deployments created by the operator: + ```bash + oc get deployments -n {namespace} + ``` + - For operators like cert-manager with labeled resources: + ```bash + oc delete deployment -n {namespace} -l app.kubernetes.io/instance={operator-base-name} + ``` + - Verify deployments are deleted: + ```bash + oc get deployments -n {namespace} + ``` + +8.5. **Check for Orphaned Custom Resources** (before removing CRDs): + - Get list of CRDs managed by the operator from CSV: + ```bash + oc get csv -n {namespace} -o jsonpath='{.items[0].spec.customresourcedefinitions.owned[*].name}' + ``` + - For each CRD, search for CR instances across all namespaces: + ```bash + oc get --all-namespaces --ignore-not-found + ``` + - If CRs exist, list them with details: + ``` + WARNING: Found custom resources that may prevent clean uninstallation: + - namespace-1/ (kind: ) + - namespace-2/ (kind: ) + + These resources should be deleted before uninstalling the operator. + Do you want to delete these custom resources? (yes/no) + ``` + - If user confirms, delete each CR: + ```bash + oc delete -n + ``` + - This prevents namespace from getting stuck in Terminating state + - Reference: https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-reinstalling-operators-after-failed-uninstallation_olm-troubleshooting-operator-issues + +9. **Remove Custom Resource Definitions** (if `--remove-crds` flag is set): + - **WARNING**: Display critical warning to user: + ``` + WARNING: Removing CRDs will delete ALL custom resources of these types across the entire cluster! + This action is irreversible and will affect all namespaces. + + Are you absolutely sure you want to remove CRDs? (yes/no) + ``` + - If user confirms, proceed with CRD removal + - Get list of CRDs owned by the operator: + ```bash + oc get csv {csv-name} -n {namespace} -o jsonpath='{.spec.customresourcedefinitions.owned[*].name}' + ``` + - For each CRD, check if custom resources exist: + ```bash + oc get {crd-name} --all-namespaces --ignore-not-found + ``` + - Display warning if custom resources exist + - Delete CRDs: + ```bash + oc delete crd {crd-name} + ``` + +10. **Remove Namespace** (if `--remove-namespace` flag is set): + - **WARNING**: Display warning: + ``` + WARNING: Removing namespace {namespace} will delete all resources in this namespace! + + Are you sure you want to remove namespace {namespace}? (yes/no) + ``` + - If user confirms: + ```bash + oc delete namespace {namespace} + ``` + - Monitor namespace deletion with timeout: + ```bash + oc wait --for=delete namespace/{namespace} --timeout=120s + ``` + - If namespace gets stuck in "Terminating" state after 120 seconds: + - Check for resources preventing deletion: + ```bash + oc api-resources --verbs=list --namespaced -o name | \ + xargs -n 1 oc get --show-kind --ignore-not-found -n {namespace} + ``` + - Check for finalizers on the namespace: + ```bash + oc get namespace {namespace} -o jsonpath='{.metadata.finalizers}' + ``` + - Display helpful error message: + ``` + ERROR: Namespace {namespace} is stuck in Terminating state. + + Possible causes: + - Resources with finalizers preventing deletion + - API services that are unavailable + - Custom resources that cannot be deleted + + To diagnose and fix, run: /olm:diagnose {operator-name} {namespace} + + Manual troubleshooting: + 1. Check remaining resources: + oc api-resources --verbs=list --namespaced -o name | \ + xargs -n 1 oc get --show-kind --ignore-not-found -n {namespace} + + 2. Check namespace finalizers: + oc get namespace {namespace} -o yaml | grep -A5 finalizers + + WARNING: Do NOT force-delete the namespace as it can lead to unstable cluster behavior. + See: https://access.redhat.com/solutions/4165791 + ``` + - Exit with error code + - Note: OperatorGroup will be automatically deleted with the namespace + +11. **Post-Uninstall Verification**: + - Verify all resources are cleaned up: + ```bash + oc get subscription,csv,installplan -n {namespace} --ignore-not-found + ``` + - Check if any CRDs remain (if they were supposed to be deleted): + ```bash + oc get crd | grep + ``` + - If uninstalling without `--remove-namespace`, check namespace is clean: + ```bash + oc get all -n {namespace} + ``` + - Display any remaining resources with suggestions for cleanup + +12. **Display Uninstallation Summary**: + - Show what was successfully removed: + ``` + ✓ Uninstallation Summary: + ✓ Subscription '{operator-name}' deleted + ✓ CSV '{csv-name}' deleted + ✓ Operator deployments removed + [✓ X custom resources deleted] + [✓ Y CRDs removed] + [✓ Namespace '{namespace}' deleted] + ``` + - If CRDs or namespace were NOT removed, provide instructions: + ``` + Note: The following resources were NOT removed: + - Custom Resource Definitions (use --remove-crds to remove) + - Namespace {namespace} (use --remove-namespace to remove) + + To completely remove all operator resources, run: + /olm:uninstall {operator-name} {namespace} --remove-crds --remove-namespace + ``` + - **Important warning about reinstallation**: + ``` + IMPORTANT: Before reinstalling this operator, verify all resources are cleaned: + + oc get subscription,csv,installplan -n {namespace} + oc get crd | grep + + Failure to completely uninstall may cause reinstallation issues. + See: https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-reinstalling-operators-after-failed-uninstallation_olm-troubleshooting-operator-issues + ``` + +## Return Value +- **Success**: Operator uninstalled successfully with summary of removed resources +- **Partial Success**: Some resources removed with warnings about remaining resources +- **Error**: Uninstallation failed with specific error message +- **Format**: Structured output showing: + - Subscription deletion status + - CSV deletion status + - Deployment removal status + - CRD removal status (if applicable) + - Namespace deletion status (if applicable) + +## Examples + +1. **Uninstall cert-manager-operator (basic)**: + ``` + /olm:uninstall openshift-cert-manager-operator + ``` + +2. **Uninstall with custom namespace**: + ``` + /olm:uninstall openshift-cert-manager-operator my-cert-manager + ``` + +3. **Complete cleanup including namespace**: + ``` + /olm:uninstall openshift-cert-manager-operator cert-manager-operator --remove-crds --remove-namespace + ``` + This performs a complete cleanup of all operator-related resources. + +4. **Force uninstall without prompts**: + ``` + /olm:uninstall openshift-cert-manager-operator cert-manager-operator --force + ``` + Skips all confirmation prompts (use with caution!). + +## Arguments +- **$1** (operator-name): The name of the operator to uninstall (required) + - Example: "openshift-cert-manager-operator" + - Must match the Subscription name +- **$2** (namespace): The namespace where operator is installed (optional) + - Default: `{operator-name}` (operator name without "openshift-" prefix) + - Example: "cert-manager-operator" +- **$3+** (flags): Optional flags (can combine multiple): + - `--remove-crds`: Remove Custom Resource Definitions (WARNING: affects entire cluster) + - `--remove-namespace`: Remove the operator's namespace and all its resources + - `--force`: Skip all confirmation prompts (use with caution) + +## Safety Features + +1. **Multiple Confirmations**: Separate confirmations for CRD and namespace removal +2. **Detailed Warnings**: Clear warnings about the scope of deletions +3. **Verification Steps**: Checks that resources exist before attempting deletion +4. **Summary Report**: Detailed summary of what was and wasn't removed +5. **Graceful Failures**: Continues with remaining steps if individual deletions fail + +## Troubleshooting + +- **Subscription not found**: Verify the operator name and namespace: + ```bash + oc get subscriptions --all-namespaces | grep {operator-name} + ``` +- **CSV won't delete**: Check for finalizers: + ```bash + oc get csv {csv-name} -n {namespace} -o yaml | grep finalizers + ``` + If finalizers are present, they may be waiting for resources to be cleaned up. Check operator logs and events. + +- **Namespace stuck in Terminating**: This is a common issue after operator uninstallation. + ```bash + # Find remaining resources + oc api-resources --verbs=list --namespaced -o name | \ + xargs -n 1 oc get --show-kind --ignore-not-found -n {namespace} + + # Check namespace finalizers + oc get namespace {namespace} -o yaml | grep -A5 finalizers + ``` + **IMPORTANT**: Do not force-delete the namespace. This can cause cluster instability. + Instead, use `/olm:diagnose {operator-name} {namespace}` to diagnose and fix the issue. + +- **CRDs won't delete**: Check for remaining custom resources: + ```bash + oc get {crd-name} --all-namespaces + ``` + CRDs cannot be deleted while CR instances exist. Delete all CRs first. + +- **Custom resources won't delete**: Some CRs may have finalizers preventing deletion: + ```bash + oc get -n -o yaml | grep finalizers + ``` + The operator controller (if still running) should remove finalizers. If operator is already deleted, you may need to manually patch the CR to remove finalizers (use with extreme caution). + +- **Permission denied**: Ensure you have cluster-admin privileges for CRD deletion: + ```bash + oc auth can-i delete crd + ``` + +- **Reinstallation fails after uninstall**: This usually means cleanup was incomplete. + Run these checks before reinstalling: + ```bash + # Check for remaining subscriptions/CSVs + oc get subscription,csv -n {namespace} + + # Check for remaining CRDs + oc get crd | grep + + # Check if namespace is clean or stuck + oc get namespace {namespace} + ``` + See: https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-reinstalling-operators-after-failed-uninstallation_olm-troubleshooting-operator-issues + +## Related Commands + +- `/olm:install` - Install a day-2 operator +- `/olm:list` - List installed operators +- `/olm:status` - Check operator status before uninstalling +- `/olm:diagnose` - Diagnose and fix uninstallation issues +- `/olm:upgrade` - Upgrade an operator + +## Additional Resources + +- [Red Hat OpenShift: Deleting Operators from a cluster](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-deleting-operators-from-a-cluster) +- [Red Hat OpenShift: Reinstalling Operators after failed uninstallation](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-reinstalling-operators-after-failed-uninstallation_olm-troubleshooting-operator-issues) +- [Operator Lifecycle Manager Documentation](https://olm.operatorframework.io/) + diff --git a/commands/upgrade.md b/commands/upgrade.md new file mode 100644 index 0000000..75434f6 --- /dev/null +++ b/commands/upgrade.md @@ -0,0 +1,349 @@ +--- +description: Update an operator to the latest version or switch channels +argument-hint: [namespace] [--channel=] [--approve] +--- + +## Name +olm:upgrade + +## Synopsis +``` +/olm:upgrade [namespace] [--channel=] [--approve] +``` + +## Description +The `olm:upgrade` command updates an installed operator to the latest version in its current channel or switches to a different channel. It can also approve pending InstallPlans for operators with manual approval mode. + +This command helps you: +- Update operators to the latest version in their channel +- Switch operators to different channels (e.g., stable to tech-preview) +- Approve pending upgrade InstallPlans for manual approval mode +- Monitor upgrade progress +- Rollback on failure (if possible via OLM) + +## Implementation + +The command performs the following steps: + +1. **Parse Arguments**: + - `$1`: Operator name (required) - Name of the operator to upgrade + - `$2`: Namespace (optional) - Namespace where operator is installed + - If not provided, searches for the operator across all namespaces + - `$3+`: Flags (optional): + - `--channel=`: Switch to a different channel + - `--approve`: Automatically approve pending InstallPlan (for manual approval mode) + +2. **Prerequisites Check**: + - Verify `oc` CLI is installed: `which oc` + - Verify cluster access: `oc whoami` + - Check if user has sufficient privileges + +3. **Locate Operator**: + - If namespace provided, verify operator exists: + ```bash + oc get subscription {operator-name} -n {namespace} --ignore-not-found + ``` + - If no namespace provided, search across all namespaces: + ```bash + oc get subscription --all-namespaces -o json | jq -r '.items[] | select(.spec.name=="{operator-name}") | .metadata.namespace' + ``` + - If not found, display error with suggestions + - If multiple instances found, prompt user to specify namespace + +4. **Get Current State**: + - Get current Subscription: + ```bash + oc get subscription {operator-name} -n {namespace} -o json + ``` + - Extract: + - Current channel: `.spec.channel` + - Install plan approval: `.spec.installPlanApproval` + - Installed CSV: `.status.installedCSV` + - Current CSV: `.status.currentCSV` + - Get current CSV version: + ```bash + oc get csv {installed-csv} -n {namespace} -o jsonpath='{.spec.version}' + ``` + +5. **Check for Available Updates**: + - Get PackageManifest: + ```bash + oc get packagemanifest {operator-name} -n openshift-marketplace -o json + ``` + - Extract available channels and their latest versions + - If `--channel` flag is specified, verify channel exists + - If no channel flag, check for updates in current channel + - Compare current version with latest available version + - Reference: https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-updating-operators + +6. **Display Upgrade Plan**: + ``` + Operator Upgrade Plan: + + Operator: {display-name} + Namespace: {namespace} + Current Version: {current-version} + Current Channel: {current-channel} + + [If switching channels:] + Target Channel: {new-channel} + Target Version: {new-version} + + [If upgrading in same channel:] + Latest Version: {latest-version} (in channel: {current-channel}) + + Approval Mode: {Automatic|Manual} + ``` + +7. **Check for Pending InstallPlans** (for manual approval mode): + - Get pending InstallPlans: + ```bash + oc get installplan -n {namespace} -o json | jq '.items[] | select(.spec.approved==false)' + ``` + - If pending InstallPlan exists and `--approve` flag is set: + - Display InstallPlan details + - Approve the InstallPlan (skip to step 9) + - If pending InstallPlan exists and no `--approve` flag: + ``` + ⏸️ Pending InstallPlan found (requires manual approval) + + InstallPlan: {installplan-name} + Target Version: {target-version} + + To approve: /olm:upgrade {operator-name} {namespace} --approve + Or use: /olm:approve {operator-name} {namespace} + ``` + - Exit, waiting for user to approve + +8. **Perform Channel Switch** (if `--channel` flag provided): + - Confirm with user (unless `--force` flag): + ``` + WARNING: Switching channels may upgrade or downgrade the operator. + + Current: {current-channel} ({current-version}) + Target: {new-channel} ({target-version}) + + Continue? (yes/no) + ``` + - Update Subscription to new channel: + ```bash + oc patch subscription {operator-name} -n {namespace} \ + --type merge --patch '{"spec":{"channel":"{new-channel}"}}' + ``` + - Display confirmation: + ``` + ✓ Subscription updated to channel: {new-channel} + ``` + +9. **Approve Pending InstallPlan** (if `--approve` flag or automatic approval): + - If approval mode is Manual and `--approve` flag is set: + ```bash + oc patch installplan {installplan-name} -n {namespace} \ + --type merge --patch '{"spec":{"approved":true}}' + ``` + - Display approval confirmation: + ``` + ✓ InstallPlan approved: {installplan-name} + ``` + +10. **Monitor Upgrade Progress**: + - Wait for new InstallPlan to be created (if switching channels): + ```bash + oc get installplan -n {namespace} -w --timeout=60s + ``` + - Wait for new CSV to reach "Succeeded" phase: + ```bash + oc get csv -n {namespace} -w --timeout=300s + ``` + - Display progress updates: + ``` + 🔄 Upgrade in progress... + ⏳ Waiting for InstallPlan to complete... + ⏳ New CSV installing: {new-csv-name} + ⏳ Old CSV replacing: {old-csv-name} + ``` + - Poll every 10 seconds to check status + - Timeout: 10 minutes for upgrade to complete + +11. **Verify Upgrade Success**: + - Check new CSV status: + ```bash + oc get csv -n {namespace} -o json + ``` + - Verify new CSV phase is "Succeeded" + - Get new version: + ```bash + oc get csv {new-csv-name} -n {namespace} -o jsonpath='{.spec.version}' + ``` + - Check deployments are healthy: + ```bash + oc get deployments -n {namespace} + ``` + - Check pods are running: + ```bash + oc get pods -n {namespace} + ``` + +12. **Display Upgrade Summary**: + ``` + ✓ Operator Upgrade Complete! + + Operator: {display-name} + Namespace: {namespace} + Previous Version: {old-version} + Current Version: {new-version} + Channel: {channel} + + Deployment Status: + - {deployment-1}: 1/1 replicas ready + - {deployment-2}: 1/1 replicas ready + + To check status: /olm:status {operator-name} {namespace} + ``` + +13. **Handle Upgrade Failures**: + - If upgrade fails or times out: + ``` + ❌ Operator upgrade failed + + Current State: + - CSV: {csv-name} (Phase: {phase}) + - Message: {error-message} + + Troubleshooting steps: + 1. Check CSV status: oc describe csv {csv-name} -n {namespace} + 2. Check events: oc get events -n {namespace} --sort-by='.lastTimestamp' + 3. Check InstallPlan: oc get installplan -n {namespace} + 4. Run diagnostics: /olm:diagnose {operator-name} {namespace} + + To rollback (if OLM supports): + oc patch subscription {operator-name} -n {namespace} \ + --type merge --patch '{"spec":{"channel":"{old-channel}"}}' + ``` + +## Return Value +- **Success**: Operator upgraded successfully with new version details +- **Pending Approval**: Upgrade waiting for manual approval with instructions +- **No Update Available**: Operator is already at the latest version +- **Error**: Upgrade failed with specific error message and troubleshooting guidance +- **Format**: Structured output showing: + - Previous and current versions + - Channel information + - Deployment and pod status + - Next steps or related commands + +## Examples + +1. **Check for and install updates in current channel**: + ``` + /olm:upgrade openshift-cert-manager-operator + ``` + +2. **Upgrade with specific namespace**: + ``` + /olm:upgrade external-secrets-operator eso-operator + ``` + +3. **Switch to a different channel**: + ``` + /olm:upgrade openshift-cert-manager-operator cert-manager-operator --channel=tech-preview-v1.14 + ``` + This switches from stable-v1 to tech-preview-v1.14 channel. + +4. **Approve pending upgrade (manual approval mode)**: + ``` + /olm:upgrade openshift-cert-manager-operator --approve + ``` + +5. **Switch channel and approve in one command**: + ``` + /olm:upgrade prometheus prometheus-operator --channel=beta --approve + ``` + +## Arguments +- **$1** (operator-name): Name of the operator to upgrade (required) + - Example: "openshift-cert-manager-operator" + - Must match the operator's Subscription name +- **$2** (namespace): Namespace where operator is installed (optional) + - If not provided, searches all namespaces + - Example: "cert-manager-operator" +- **$3+** (flags): Optional flags + - `--channel=`: Switch to specified channel + - Example: `--channel=stable-v1`, `--channel=tech-preview` + - Triggers upgrade/downgrade to the version in that channel + - `--approve`: Automatically approve pending InstallPlan + - Only needed for operators with Manual approval mode + - Equivalent to `/olm:approve` command + +## Notes + +- **Automatic Updates**: Operators with `installPlanApproval: Automatic` will upgrade automatically when new versions are available in their channel +- **Manual Approval**: Operators with `installPlanApproval: Manual` require explicit approval via `--approve` flag or `/olm:approve` command +- **Channel Switching**: Changing channels may result in upgrade or downgrade depending on the versions in each channel +- **Rollback**: OLM has limited rollback support. Switching back to the previous channel may work, but data migration issues may occur +- **Upgrade Timing**: Upgrades happen according to the operator's upgrade strategy (some may cause downtime) + +## Troubleshooting + +- **No updates available**: + ```bash + # Check current version + oc get csv -n {namespace} + + # Check available versions + oc get packagemanifest {operator-name} -n openshift-marketplace -o json + ``` + +- **Upgrade stuck or pending**: + ```bash + # Check InstallPlan status + oc get installplan -n {namespace} + + # Check for events + oc get events -n {namespace} --sort-by='.lastTimestamp' | tail -20 + ``` + +- **Manual approval required**: + ```bash + # List pending InstallPlans + oc get installplan -n {namespace} -o json | jq '.items[] | select(.spec.approved==false)' + + # Approve specific InstallPlan + /olm:approve {operator-name} {namespace} + ``` + +- **Upgrade failed**: + ```bash + # Check CSV status + oc describe csv -n {namespace} + + # Check operator logs + oc logs -n {namespace} deployment/{operator-deployment} + + # Run diagnostics + /olm:diagnose {operator-name} {namespace} + ``` + +- **Rollback needed**: + - OLM doesn't have built-in rollback + - Can try switching back to previous channel, but may have issues: + ```bash + oc patch subscription {operator-name} -n {namespace} \ + --type merge --patch '{"spec":{"channel":"{old-channel}"}}' + ``` + - Consider backup/restore of custom resources before upgrading + +## Related Commands + +- `/olm:status ` - Check current version and available updates +- `/olm:approve ` - Approve pending InstallPlans +- `/olm:install ` - Install an operator +- `/olm:diagnose ` - Diagnose upgrade issues + +## Additional Resources + +- [Red Hat OpenShift: Updating Installed Operators](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-updating-operators) +- [Red Hat OpenShift: Approving Operator Upgrades](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-approving-operator-upgrades_olm-updating-operators) +- [Operator Lifecycle Manager Documentation](https://olm.operatorframework.io/) + + diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..163524e --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,85 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:openshift-eng/ai-helpers:plugins/olm", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "ca3a4be43d9d3f1c57e02c8f719552b255241fe4", + "treeHash": "dea6ca8dc8c867699dd577fdb3fd8456ad8c54775c9c219dfb4790d7a0164652", + "generatedAt": "2025-11-28T10:27:29.451529Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "olm", + "description": "OLM (Operator Lifecycle Manager) plugin for operator management and debugging", + "version": "0.1.0" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "400a3af553f3a727dc7e9492df2edda6e5fa7f539fe4b865136e3d22bd27ad48" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "418a80b7970ce4427c6ed9165caaeb96c661aff7217b7221b8cfdeaa2c70ba90" + }, + { + "path": "commands/uninstall.md", + "sha256": "0659fc3bb23d71b11c3a7abf09fe58215b3ac22d7e4d1e048fb7d38a77f1e7c9" + }, + { + "path": "commands/debug.md", + "sha256": "36afa5a3c1df621a7e3a5cdfe142b3381421aae47c87aa1175b0dc9d8b1ff60b" + }, + { + "path": "commands/search.md", + "sha256": "3a5044702885042e9961ef6397687c1ae27386feeac95da4ae8837b93895572c" + }, + { + "path": "commands/diagnose.md", + "sha256": "02f50a81f36864d7590e01e98cc91351b8347d8cf58f6cde97a36fb3e4e040c4" + }, + { + "path": "commands/status.md", + "sha256": "b1e80e62abc86db3ebb251cf95925b2f6961945e7d7e2d3674e8f6d100e3be5f" + }, + { + "path": "commands/install.md", + "sha256": "ade4e4729f202ff468b98ad1d0d2ce15ba52e441ae89938c27f3f749b0a07359" + }, + { + "path": "commands/list.md", + "sha256": "e9dc374d23f4b8f06ec8d469a5da63a39b42826a091e74d942440b470074029c" + }, + { + "path": "commands/catalog.md", + "sha256": "7cf44fd3fdb1d96b84b72f9e5ce3496e4b30544e926093ae5a82172cf7434621" + }, + { + "path": "commands/approve.md", + "sha256": "fb457e5e1960acb42dbf30663774d65f1c26bc9a802d67af02268e816eef7e0c" + }, + { + "path": "commands/upgrade.md", + "sha256": "fa0c02a2ea769f6172b4eda8099d07a86cb7d16f8adda1e80565698915784b0d" + }, + { + "path": "commands/opm.md", + "sha256": "289172093e9a3ab742bc971b4ae443f961ce06f5c01dfc4ffde095bd5ec70141" + } + ], + "dirSha256": "dea6ca8dc8c867699dd577fdb3fd8456ad8c54775c9c219dfb4790d7a0164652" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file