From 11bee70e53e58b4aae5a1e95e4a19464abd83312 Mon Sep 17 00:00:00 2001 From: Zhongwei Li Date: Sun, 30 Nov 2025 09:00:26 +0800 Subject: [PATCH] Initial commit --- .claude-plugin/plugin.json | 12 + README.md | 3 + plugin.lock.json | 81 +++ skills/buildkite-status/SKILL.md | 592 ++++++++++++++++++ .../references/annotation-patterns.md | 196 ++++++ .../references/buildkite-states.md | 107 ++++ .../references/tool-capabilities.md | 291 +++++++++ .../references/troubleshooting.md | 365 +++++++++++ .../references/url-parsing.md | 272 ++++++++ .../scripts/find-commit-builds.js | 137 ++++ .../scripts/get-build-logs.js | 107 ++++ .../scripts/parse-buildkite-url.js | 84 +++ .../scripts/wait-for-build.js | 155 +++++ 13 files changed, 2402 insertions(+) create mode 100644 .claude-plugin/plugin.json create mode 100644 README.md create mode 100644 plugin.lock.json create mode 100644 skills/buildkite-status/SKILL.md create mode 100644 skills/buildkite-status/references/annotation-patterns.md create mode 100644 skills/buildkite-status/references/buildkite-states.md create mode 100644 skills/buildkite-status/references/tool-capabilities.md create mode 100644 skills/buildkite-status/references/troubleshooting.md create mode 100644 skills/buildkite-status/references/url-parsing.md create mode 100755 skills/buildkite-status/scripts/find-commit-builds.js create mode 100755 skills/buildkite-status/scripts/get-build-logs.js create mode 100755 skills/buildkite-status/scripts/parse-buildkite-url.js create mode 100755 skills/buildkite-status/scripts/wait-for-build.js diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..201c7dd --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,12 @@ +{ + "name": "ci-cd-tools", + "description": "CI/CD pipeline tools and integrations", + "version": "1.0.0", + "author": { + "name": "Josh Nichols", + "email": "josh@technicalpickles.com" + }, + "skills": [ + "./skills" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..f2cf466 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# ci-cd-tools + +CI/CD pipeline tools and integrations diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..958fe60 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,81 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:technicalpickles/pickled-claude-plugins:plugins/ci-cd-tools", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "fd3fb228885b46b8133e352d21d3914dd06f9554", + "treeHash": "07ab8cafe621666a03bb940dae73f249d824114affbbb46c4c8b202f157bb1c7", + "generatedAt": "2025-11-28T10:28:36.223024Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "ci-cd-tools", + "description": "CI/CD pipeline tools and integrations", + "version": "1.0.0" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "6b5fe70c92bc2ed5978a9989de443c5a1309844ec9be30c2b22f43ea6d3fe3a7" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "da8e209b5b52810ee061a6fb16af1a02a181c26e7f8bb4c1b499f0d94c1ac641" + }, + { + "path": "skills/buildkite-status/SKILL.md", + "sha256": "733066694d44fe9128fca348deb283bb69e249ec6efef1f36159fa70bbe5507a" + }, + { + "path": "skills/buildkite-status/references/url-parsing.md", + "sha256": "4e16daf6eae951a64673d27b2231d3334274aeeeaf7e51084d633bdd610ccd85" + }, + { + "path": "skills/buildkite-status/references/troubleshooting.md", + "sha256": "49a32ad4944b551020e0f985493656944004193423693e95c977a2787c8bc5f6" + }, + { + "path": "skills/buildkite-status/references/buildkite-states.md", + "sha256": "05c3bd32e5552301310cf86a7efc75f73d1a9f33f854d0d84f953bac820b2bd4" + }, + { + "path": "skills/buildkite-status/references/annotation-patterns.md", + "sha256": "a59d03b7fabfc3dc382547134604741f56e52edb8487b209ca8649b46ff02019" + }, + { + "path": "skills/buildkite-status/references/tool-capabilities.md", + "sha256": "c590a1763df6a86d71759a7b6b6bacde00468502a1db86b0496c7c27be88d86b" + }, + { + "path": "skills/buildkite-status/scripts/parse-buildkite-url.js", + "sha256": "11fba63d54ba7363cfe464bc6bfdd0fb9966f069a9f41aac6ec8c025b39a1fa4" + }, + { + "path": "skills/buildkite-status/scripts/find-commit-builds.js", + "sha256": "6a18b4a568e9c2b42b11130ebdc73af5b153772a91346aae44656a6e644ee6b4" + }, + { + "path": "skills/buildkite-status/scripts/wait-for-build.js", + "sha256": "8baae518c2aa73b3297e39f5330cbcae5208515285ee7627bf09178b94660d42" + }, + { + "path": "skills/buildkite-status/scripts/get-build-logs.js", + "sha256": "e1a96e3dba1eddb0ef982fba9bf3de09aab01d73c2030f3ac2e29e73531c9d78" + } + ], + "dirSha256": "07ab8cafe621666a03bb940dae73f249d824114affbbb46c4c8b202f157bb1c7" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/buildkite-status/SKILL.md b/skills/buildkite-status/SKILL.md new file mode 100644 index 0000000..df499af --- /dev/null +++ b/skills/buildkite-status/SKILL.md @@ -0,0 +1,592 @@ +--- +name: buildkite-status +description: Use when checking Buildkite CI status for PRs, branches, or builds - provides workflows for monitoring build status, investigating failures, and handling post-push scenarios with progressive detail disclosure. Use when tempted to use GitHub tools instead of Buildkite-native tools, or when a Buildkite tool fails and you want to fall back to familiar alternatives. +--- + +# Buildkite Status + +## Overview + +This skill provides workflows and tools for checking and monitoring Buildkite CI status. It focuses on **checking status and investigating failures** rather than creating or configuring pipelines. Use this skill when working with Buildkite builds, especially for PR workflows, post-push monitoring, and failure investigation. + +## When to Use This Skill + +Use this skill when: + +- Checking CI status for the current branch or PR +- Investigating why a build failed +- Monitoring builds after a git push +- Waiting for builds to complete +- Checking build status across multiple repos/PRs +- Understanding what "broken" or other Buildkite states mean + +## Tool Hierarchy and Selection + +**CRITICAL**: Always use Buildkite-native tools. Never fall back to GitHub tools (`gh pr view`, GitHub API, etc.) - they only show summaries and lose critical information (annotations, logs, real-time updates, state distinctions). + +Use tools in this priority order: + +### Primary: MCP Tools (Always Use These First) + +**Reliability**: Direct Buildkite API access, always available +**Capabilities**: All operations (list, get, wait, unblock) +**When**: Default choice for ALL workflows + +Available MCP tools: + +- `buildkite:get_build` - Get detailed build information +- `buildkite:list_builds` - List builds for a pipeline +- `buildkite:list_annotations` - Get annotations for a build +- `buildkite:get_pipeline` - Get pipeline configuration +- `buildkite:list_pipelines` - List all pipelines in an org +- **`buildkite:wait_for_build`** - Wait for a build to complete (PREFERRED for monitoring) +- **`buildkite:get_logs`** - Retrieve job logs (CRITICAL for debugging failures) +- `buildkite:get_logs_info` - Get log metadata +- `buildkite:list_artifacts` - List build artifacts + +### Secondary: bktide CLI (Convenience) + +**Purpose**: Human-readable terminal output +**Limitation**: External dependency, requires npm/npx +**When**: Interactive terminal work when MCP output is too verbose + +**Critical Limitation**: bktide CANNOT retrieve job logs. It only displays build summaries and job lists. For log retrieval, always use MCP tools. + +Common commands: + +```bash +npx bktide pipelines # List pipelines +npx bktide builds / # List builds +npx bktide build /# # Get build details +npx bktide annotations /# # Show annotations +``` + +### Tertiary: Bundled Scripts (Helper Wrappers) + +**Purpose**: Pre-built workflows combining multiple tool calls +**Limitation**: External dependencies (bktide, specific versions) +**When**: Convenience wrappers only - use MCP tools if scripts fail + +This skill includes scripts for common workflows: + +- **`scripts/wait-for-build.js`** - Background monitoring script that polls until build completion +- **`scripts/find-commit-builds.js`** - Find builds matching a specific commit SHA + +### Tool Capability Matrix + +Different tools have different capabilities. Understanding these limitations prevents wasted effort. + +**Key Capabilities:** + +| Capability | MCP Tools | bktide | Scripts | +| ----------------- | --------- | ------ | ------- | +| List builds | ✅ | ✅ | ✅ | +| Get build details | ✅ | ✅ | ✅ | +| Get annotations | ✅ | ✅ | ❌ | +| **Retrieve logs** | **✅** | **❌** | **✅** | +| Wait for build | ✅ | ❌ | ✅ | +| Unblock jobs | ✅ | ❌ | ❌ | + +**Most Important**: Only MCP tools and scripts can retrieve job logs. bktide cannot. + +For complete capability details and examples, see [references/tool-capabilities.md](references/tool-capabilities.md). + +### When Tools Fail: Fallback Hierarchy + +**If wait-for-build.js script fails:** + +1. ✅ Use `buildkite:wait_for_build` MCP tool instead (preferred) +2. ✅ Use `buildkite:get_build` MCP tool in a polling loop +3. ❌ Do NOT fall back to `gh pr view` or GitHub tools + +**If bktide fails:** + +1. ✅ Use equivalent MCP tool +2. ❌ Do NOT fall back to GitHub tools + +**If MCP tools fail:** + +1. ✅ Check MCP server connection status +2. ✅ Restart MCP connection +3. ✅ Report the MCP failure to your human partner +4. ❌ Do NOT fall back to GitHub tools + +**Critical**: One tool failing does NOT mean the entire skill is invalid. Move up the hierarchy, don't abandon Buildkite tools. + +## Core Workflows + +### 1. Investigating a Build from URL (Most Common) + +When a user provides a Buildkite URL for a failing build, follow this workflow to investigate. + +**Example URL formats:** + +- Build URL: `https://buildkite.com/org/pipeline/builds/12345` +- Step URL: `https://buildkite.com/org/pipeline/builds/12345/steps/canvas?sid=019a5f...` + +**Step 1: Extract build identifiers from URL** + +Parse the URL to extract: + +- Organization slug (e.g., "gusto") +- Pipeline slug (e.g., "payroll-building-blocks") +- Build number (e.g., "12345") + +Ignore the `sid` query parameter - it's a step ID, not needed for initial investigation. + +**Step 2: Get build overview** + +```javascript +mcp__MCPProxy__call_tool('buildkite:get_build', { + org_slug: '', + pipeline_slug: '', + build_number: '', + detail_level: 'summary', +}); +``` + +Check the overall build state: `passed`, `failed`, `running`, `blocked`, `canceled`. + +**Step 3: Identify failed jobs** + +If build state is `failed`, get detailed job information: + +```javascript +mcp__MCPProxy__call_tool('buildkite:get_build', { + org_slug: '', + pipeline_slug: '', + build_number: '', + detail_level: 'detailed', + job_state: 'failed', +}); +``` + +This returns only jobs with `state: "failed"` (not "broken" - see state reference). + +**Step 4: Retrieve logs for failed jobs** + +For each failed job, extract its `uuid` field and retrieve logs. See "Retrieving Job Logs" workflow below for detailed instructions. + +**Step 5: Analyze error output** + +Look for: + +- Stack traces +- Test failure messages +- Exit codes and error messages +- File paths and line numbers + +**Step 6: Help reproduce locally** + +Based on the error, suggest: + +- Which tests to run locally +- Environment setup needed +- Commands to reproduce the failure + +--- + +### 2. Retrieving Job Logs + +**CRITICAL**: This is the most important capability. Without logs, you cannot debug failures. + +Once you've identified a failed job, retrieve its logs to see the actual error. + +**Prerequisites:** + +- Organization slug +- Pipeline slug +- Build number +- Job UUID (from build details) + +**Important**: Job UUIDs ≠ Step IDs. URLs contain step IDs (`sid=019a5f...`), but MCP tools need job UUIDs from the build details response. + +**Step 1: Get the job UUID** + +If you have a job label (e.g., "ste rspec"), use `get_build` with `detail_level: "detailed"`: + +```javascript +mcp__MCPProxy__call_tool('buildkite:get_build', { + org_slug: 'gusto', + pipeline_slug: 'payroll-building-blocks', + build_number: '29627', + detail_level: 'detailed', + job_state: 'failed', +}); +``` + +In the response, find the job by matching the `label` field. Extract its `uuid` field (format: `019a5f20-2d30-4c67-9edd-...`). + +**Step 2: Retrieve logs using the job UUID** + +Use the MCP tool to get logs: + +```javascript +mcp__MCPProxy__call_tool('buildkite:get_logs', { + org_slug: 'gusto', + pipeline_slug: 'payroll-building-blocks', + build_number: '29627', + job_id: '', +}); +``` + +The response contains the log output from the job execution. + +**Common Issues:** + +- **"job not found" error**: You likely provided a step ID instead of a job UUID. Step IDs come from URLs (`sid=019a5f...`). Job UUIDs come from `get_build` API responses. Solution: Call `get_build` with `detail_level: "detailed"` to find the correct job UUID. + +- **Empty logs**: The job may not have started yet, or logs may not be available yet. Check the job's `state` field first - it should be in a terminal state (`passed`, `failed`, `canceled`). + +- **Multiple jobs with same label**: Some pipelines parallelize jobs with the same label (e.g., "rspec (1/10)", "rspec (2/10)"). Filter by the full label string to find the specific failed job. + +**Fallback Strategy:** + +If MCP tools fail (e.g., connection issues, permissions), you can: + +1. Construct the log URL manually and view in browser: + + ``` + https://buildkite.com/{org}/{pipeline}/builds/{build}/jobs/{job-uuid} + ``` + +2. Use the bundled script (if available): + ```bash + ~/.claude/skills/buildkite-status/scripts/get-build-logs.js + ``` + +**Why bktide Cannot Help:** + +The bktide CLI does NOT have a logs command. It can show build summaries and job lists, but cannot retrieve log content. Always use MCP tools for log retrieval. + +See [references/tool-capabilities.md](references/tool-capabilities.md) for complete tool capability matrix. + +--- + +### 3. Checking Current Branch/PR Status + +This is the most common workflow when working on a branch: + +**Step 1: Identify the pipeline and branch** + +Determine which pipeline(s) run on PRs for this repository. Common patterns: + +- Repository name matches pipeline slug +- Monorepo may have pipeline named after the main repo + +**Step 2: Find builds for the current branch** + +Use MCP tools to list recent builds: + +```javascript +mcp__MCPProxy__call_tool('buildkite:list_builds', { + org_slug: '', + pipeline_slug: '', + branch: '', + detail_level: 'summary', +}); +``` + +Or use bktide: + +```bash +npx bktide builds --format json / +``` + +**Step 3: Progressive disclosure of status** + +Follow this pattern when examining builds: + +1. **Overall state** - Is it `passed`, `failed`, `running`, `blocked`, or `canceled`? +2. **Job summary** - How many jobs passed/failed/broken? +3. **Annotations** (if present) - Check for test failures, warnings, or errors +4. **Failed job details** - Get logs for actually failed jobs (not just "broken") + +### 4. Post-Push Monitoring Workflow + +After pushing code, follow this workflow to monitor the CI build: + +**Step 1: Find builds for the pushed commit** + +Use the find-commit-builds script: + +```bash +~/.claude/skills/buildkite-status/scripts/find-commit-builds.js +``` + +Or manually search using MCP tools with commit filter. + +**Step 2: Monitor the build** + +**Option A (Preferred): Use MCP wait_for_build tool** + +```javascript +mcp__MCPProxy__call_tool('buildkite:wait_for_build', { + org_slug: '', + pipeline_slug: '', + build_number: '', + timeout: 1800, + poll_interval: 30, +}); +``` + +This will: + +- Poll every 30 seconds (configurable with `poll_interval`) +- Report status changes +- Complete when build reaches terminal state (passed/failed/canceled) +- Timeout after 30 minutes (configurable with `timeout`) + +**Option B (Fallback): Use wait-for-build.js script** + +If you prefer background execution: + +```bash +~/.claude/skills/buildkite-status/scripts/wait-for-build.js --timeout 1800 --interval 30 +``` + +**If the script fails** (e.g., bktide dependency error), use Option A - the MCP tool is more reliable. + +**Step 3: Check on progress** + +Periodically check the background job or wait for it to complete. When it finishes, check the exit code: + +- 0 = passed +- 1 = failed +- 2 = canceled +- 3 = timeout + +**Step 4: Investigate failures** + +If the build failed, follow the "### 1. Investigating a Build from URL" workflow above. + +### 5. Investigating Failures (Deprecated) + +**Note**: This workflow is deprecated. Use "### 1. Investigating a Build from URL" and "### 2. Retrieving Job Logs" instead for a more complete investigation process. + +When a build has failed, use this systematic approach: + +**Step 1: Get build overview** + +```javascript +mcp__MCPProxy__call_tool('buildkite:get_build', { + org_slug: '', + pipeline_slug: '', + build_number: '', + detail_level: 'detailed', + job_state: 'failed', // Only show failed jobs +}); +``` + +This gives you: + +- Overall build state +- Job summary (how many failed vs broken) +- List of failed jobs only + +**Step 2: Check annotations** + +Some projects put test failures in annotations: + +```javascript +mcp__MCPProxy__call_tool('buildkite:list_annotations', { + org_slug: '', + pipeline_slug: '', + build_number: '', +}); +``` + +Look for annotations with `style: "error"` or `style: "warning"`. + +**Important**: Not all projects use annotations. See [references/annotation-patterns.md](references/annotation-patterns.md) for project-specific patterns. + +**Step 3: Examine failed jobs** + +For each failed job (not "broken" - see state reference below): + +1. Get the job details from the build data +2. Check the job's log output +3. Look for stack traces, error messages, or test failures + +**Step 4: Understand "broken" vs "failed"** + +**Critical**: A job showing as "broken" is often NOT a failure. It typically means: + +- The job was skipped because an earlier job failed +- The job's dependencies weren't met +- Conditional pipeline logic determined the job wasn't needed + +See [references/buildkite-states.md](references/buildkite-states.md) for complete state explanations. + +**Example**: In large monorepos, many jobs show "broken" because they were skipped due to file changes not affecting them. This is normal and expected. + +### 6. Checking Blocked Builds + +When a build is in `blocked` state, it's waiting for manual approval: + +**Step 1: Identify the block step** + +Get the build with `detail_level: "detailed"` and look for jobs with `state: "blocked"`. + +**Step 2: Review what's being blocked** + +Block steps typically have a `label` describing what approval is needed (e.g., "Deploy to Production"). + +**Step 3: Unblock if appropriate** + +Use the MCP tool to unblock: + +```javascript +mcp__MCPProxy__call_tool('buildkite:unblock_job', { + org_slug: '', + pipeline_slug: '', + build_number: '', + job_id: '', + fields: {}, // Optional form fields if the block step has inputs +}); +``` + +## Understanding Buildkite States + +Buildkite has several states that can be confusing. Here's a quick reference: + +### Build States + +- `passed` - All jobs completed successfully ✅ +- `failed` - One or more jobs failed ❌ +- `running` - Build is currently executing 🔄 +- `blocked` - Waiting for manual approval 🚫 +- `canceled` - Build was canceled ⛔ + +### Job States + +- `passed` - Job succeeded ✅ +- `failed` - Job failed with non-zero exit ❌ +- `broken` - **MISLEADING**: Usually means skipped due to pipeline logic, NOT a failure ⚠️ +- `soft_failed` - Failed but marked as non-blocking 〰️ +- `skipped` - Job was skipped ⏭️ + +**For complete state reference and project-specific patterns**, read [references/buildkite-states.md](references/buildkite-states.md). + +## Progressive Disclosure Pattern + +Always follow this pattern when checking build status: + +1. **Start broad**: Overall build state (passed/failed/running) +2. **Check summary**: Job counts (how many passed/failed/broken) +3. **Check annotations**: If present, they often contain key information +4. **Drill into failures**: Only examine failed jobs (not broken) +5. **Read logs**: Get actual error messages and stack traces + +Don't immediately jump to logs - the build state and annotations often tell you what you need to know. + +## Project-Specific Patterns + +### Large Projects / Monorepos + +- **Use annotations heavily**: Test failures are usually summarized in annotations +- **Many "broken" jobs**: Normal due to conditional execution +- **Complex job graphs**: Jobs have dependencies and conditional logic +- **Check annotations first**: They save time vs reading all logs + +### Small Projects + +- **No annotations**: All information is in job logs +- **Simpler job structure**: Fewer dependencies and conditions +- **"Broken" is unusual**: May indicate an actual problem +- **Read logs directly**: No annotations to summarize failures + +## Anti-Patterns: What NOT to Do + +### ❌ Falling Back to GitHub Tools + +**Don't**: Use `gh pr view`, `gh pr checks`, or GitHub API to check Buildkite status + +**Why**: GitHub shows Buildkite check summary only. You lose: + +- Real-time build logs and output +- Annotations with test failure details +- Job-level breakdown and states +- Ability to distinguish "broken" (skipped) from "failed" +- Direct build monitoring and waiting +- Proper state information + +**Reality**: Always use Buildkite tools. GitHub summarizes; Buildkite is the source of truth. + +### ❌ Abandoning Skill on Tool Failure + +**Don't**: "The script failed, so I'll use GitHub tools instead" + +**Why**: The skill documents MULTIPLE tool tiers: + +- MCP tools (primary, always available) +- bktide CLI (secondary, convenience) +- Scripts (tertiary, helpers) + +**Reality**: One tool failing doesn't invalidate the skill. Follow the fallback hierarchy - move to MCP tools, don't abandon Buildkite entirely. + +### ❌ Emergency Override Rationalization + +**Don't**: "This is urgent, I don't have time to follow the skill" + +**Why**: Skills exist ESPECIALLY for high-pressure situations. Disciplined workflows prevent mistakes when you're rushed. Making wrong tool choices under pressure wastes MORE time debugging. + +**Reality**: Following the skill is FASTER than recovering from wrong decisions. Taking 2 minutes to use the right tool saves 20 minutes of confusion. + +### ❌ "I Already Know X" Rationalization + +**Don't**: "I already know `gh pr view` works, why learn Buildkite tools?" + +**Why**: Familiarity ≠ effectiveness. You'll spend more time working around GitHub's limitations than learning the proper tools. + +**Reality**: Invest 2 minutes learning Buildkite MCP tools once. Save hours across all future builds. + +## Red Flags - STOP + +If you catch yourself thinking ANY of these thoughts, you're about to violate this skill: + +- "The script failed, so the skill doesn't apply" +- "This is an emergency, no time for the skill" +- "I already know gh pr view works" +- "GitHub tools show the same information" +- "I'll just check GitHub quickly" +- "One tool failed, so I'll use what I know" +- "The skill is for normal situations, not emergencies" +- "I don't have time to learn new tools right now" + +**These are rationalizations. Stop. Follow the tool hierarchy. Use Buildkite MCP tools.** + +## Common Mistakes to Avoid + +1. **Treating "broken" as "failed"**: Broken usually means skipped, not failed +2. **Ignoring annotations**: They often contain the most actionable information +3. **Not filtering by state**: Use `job_state: "failed"` to focus on actual failures +4. **Missing blocked builds**: A blocked build won't progress without manual intervention +5. **Polling in foreground**: Use MCP `wait_for_build` tool or background scripts + +## Tips for Efficient Status Checking + +1. **Use detail levels**: Start with `detail_level: "summary"` to reduce data +2. **Filter by job state**: Request only failed jobs when investigating +3. **Background monitoring**: Run wait-for-build.js in background after pushing +4. **Check annotations first**: For projects that use them, they're faster than logs +5. **Trust the scripts**: The bundled scripts handle polling, timeouts, and edge cases + +## Resources + +### References + +- **[buildkite-states.md](references/buildkite-states.md)** - Complete guide to Buildkite states, including the misleading "broken" state and project-specific patterns +- **[annotation-patterns.md](references/annotation-patterns.md)** - How different projects use annotations and when to check them +- **[tool-capabilities.md](references/tool-capabilities.md)** - Comprehensive capability matrix for MCP tools, bktide, and scripts +- **[url-parsing.md](references/url-parsing.md)** - Understanding Buildkite URLs, step IDs vs job UUIDs +- **[troubleshooting.md](references/troubleshooting.md)** - Common errors, solutions, and decision tree for when stuck + +### Scripts + +- **[wait-for-build.js](scripts/wait-for-build.js)** - Background monitoring with timeout and polling +- **[find-commit-builds.js](scripts/find-commit-builds.js)** - Find builds for a specific commit +- **[get-build-logs.js](scripts/get-build-logs.js)** - Helper for log retrieval with UUID resolution (placeholder) +- **[parse-buildkite-url.js](scripts/parse-buildkite-url.js)** - Extract components from Buildkite URLs + +Run scripts with `--help` for usage information. diff --git a/skills/buildkite-status/references/annotation-patterns.md b/skills/buildkite-status/references/annotation-patterns.md new file mode 100644 index 0000000..da9898c --- /dev/null +++ b/skills/buildkite-status/references/annotation-patterns.md @@ -0,0 +1,196 @@ +# Buildkite Annotation Patterns + +Annotations are build-level messages that appear at the top of a build page. They can contain success messages, warnings, errors, or informational content. **Not all projects use annotations consistently.** + +## What Are Annotations? + +Annotations are created by build steps using the `buildkite-agent annotate` command. They appear prominently at the top of the build page and can be styled with different colors/icons. + +### Annotation Styles + +- **`success`** - Green, checkmark icon, positive message +- **`info`** - Blue, info icon, informational message (most common) +- **`warning`** - Yellow, warning icon, something to be aware of +- **`error`** - Red, error icon, indicates problems + +## Project-Specific Patterns + +### Projects That Use Annotations Heavily (e.g., zen-payroll) + +These projects surface important information in annotations: + +1. **Test Failures**: RSpec, Jest, or other test failures may be summarized in annotations + + - Failed test count + - Links to failed test files + - Stack traces or error messages + +2. **Coverage Reports**: Code coverage changes or drops below thresholds + +3. **Linting Errors**: Rubocop, ESLint violations grouped by severity + +4. **Build Resources**: Links to documentation, help channels, or common issues + +5. **Security Scans**: Dependency vulnerabilities, security warnings + +6. **Performance Issues**: Slow tests, memory issues, or other performance concerns + +**When checking status**: Always look at annotations first for these projects. They often contain the most actionable information. + +### Projects Without Annotations (e.g., gusto-karafka) + +Smaller or simpler projects may not use annotations at all. For these projects: + +- **All failure information is in job logs**: Must read individual job output +- **No centralized summary**: Need to check each failed job separately +- **Simpler debugging path**: Less information to parse, but more manual work + +## Accessing Annotations + +### Via MCP Tools + +```javascript +// List all annotations for a build +mcp__MCPProxy__call_tool('buildkite:list_annotations', { + org_slug: 'gusto', + pipeline_slug: 'zenpayroll', + build_number: '1359675', +}); +``` + +Annotation response includes: + +- `context`: Unique identifier for the annotation +- `style`: success/info/warning/error +- `body_html`: HTML content of the annotation +- `created_at`: Timestamp + +### Via bktide + +```bash +npx bktide annotations gusto/zenpayroll#1359675 +``` + +## Interpreting Annotations + +### 1. Start with Error-Styled Annotations + +Check for `style: "error"` first - these indicate critical problems: + +- Test suite failures +- Build failures +- Security issues + +### 2. Check Warning Annotations + +`style: "warning"` may indicate: + +- Degraded performance +- Coverage drops +- Flaky tests +- Deprecated dependencies + +### 3. Info Annotations for Context + +`style: "info"` often contains: + +- Build metadata +- Links to resources +- Change summaries +- Help information + +### 4. Success Annotations + +`style: "success"` indicates: + +- All tests passed +- Coverage improved +- Performance metrics good + +## Common Annotation Patterns + +### Test Failure Annotations + +Typically include: + +``` +❌ 15 tests failed + +spec/models/user_spec.rb + - validates email format + - validates password strength + +spec/controllers/api_controller_spec.rb + - returns 401 when unauthorized +``` + +**Action**: Read the listed test failures, then examine the job logs for full details. + +### Build Resource Annotations + +``` +Having problems with your build? +- Check build documentation: [link] +- Ask in #build-stability Slack channel +``` + +**Action**: These are informational - reference them if you're stuck debugging. + +### Coverage Annotations + +``` +⚠️ Code coverage decreased by 2.5% +Current: 85.3% | Previous: 87.8% +``` + +**Action**: May or may not be actionable depending on project policy. + +## When Annotations Are Missing + +If a build has no annotations: + +1. **Don't assume success**: Check the overall build state +2. **Look at job logs**: All failure information will be in individual jobs +3. **Check job states**: Failed jobs will have `state: "failed"` +4. **Read failed job logs**: Use MCP tools or bktide to get logs + +## Inconsistencies Across Projects + +Be aware that annotation usage varies wildly: + +- **Some projects**: Every failure is annotated +- **Some projects**: Only critical failures annotated +- **Some projects**: No annotations at all +- **Some projects**: Annotations are informational only, not diagnostic + +**Never rely solely on annotations.** Always check: + +1. Overall build state +2. Job states +3. Annotations (if present) +4. Job logs for failed jobs + +## Example Workflows + +### Checking a Failed Build With Annotations + +1. Get build status → state is `failed` +2. List annotations → find error-styled annotation with test failures +3. Note which tests failed from annotation +4. Get detailed logs for failed job +5. Read stack traces and error messages + +### Checking a Failed Build Without Annotations + +1. Get build status → state is `failed` +2. Check job summary → identify which jobs failed +3. Get detailed information for each failed job +4. Read logs for each failed job +5. Identify root cause from logs + +### Checking a Passing Build + +1. Get build status → state is `passed` +2. Optionally check annotations for warnings or info +3. Note any "broken" jobs (may be expected) +4. No need to read logs unless investigating performance diff --git a/skills/buildkite-status/references/buildkite-states.md b/skills/buildkite-status/references/buildkite-states.md new file mode 100644 index 0000000..b9b796e --- /dev/null +++ b/skills/buildkite-status/references/buildkite-states.md @@ -0,0 +1,107 @@ +# Buildkite Build and Job States + +Understanding Buildkite states is critical for correctly interpreting build status. Some states are misleading or require additional context. + +## Build States + +### Terminal States + +- **`passed`** - All jobs completed successfully +- **`failed`** - One or more jobs failed +- **`canceled`** - Build was manually canceled +- **`skipped`** - Build was skipped (e.g., due to branch filters) +- **`blocked`** - Build is waiting for manual approval via block step + +### Active States + +- **`running`** - Build is currently executing +- **`scheduled`** - Build is queued and waiting to start +- **`creating`** - Build is being created + +## Job States + +### Terminal States + +- **`passed`** - Job completed successfully +- **`failed`** - Job failed with non-zero exit code +- **`canceled`** - Job was canceled +- **`skipped`** - Job was skipped +- **`timed_out`** - Job exceeded time limit + +### Special States (Often Misleading) + +- **`broken`** - This is the most misleading state. It can mean: + + - Job was skipped because an earlier job in the pipeline failed + - Job was skipped due to dependency conditions not being met + - Job was skipped due to conditional logic in the pipeline config + - **NOT necessarily a failure of this specific job** + + Example: In the zen-payroll pipeline, many jobs show as "broken" but are actually skipped because their dependencies indicated they weren't needed (e.g., no relevant file changes). + +- **`soft_failed`** - Job failed but was marked as "soft fail" (doesn't block pipeline) + - Shows as failed but doesn't cause overall build failure + - Often used for optional checks or flaky tests + +### Active States + +- **`waiting`** - Job is waiting for dependencies +- **`waiting_failed`** - Job was waiting but its dependency failed +- **`assigned`** - Job has been assigned to an agent +- **`accepted`** - Agent has accepted the job +- **`running`** - Job is currently executing +- **`blocked`** - Job is a block step waiting for manual unblock + +## Interpreting Build Status + +### Progressive Disclosure Pattern + +When checking build status, follow this pattern: + +1. **Start with overall state**: `passed`, `failed`, `canceled`, `blocked` +2. **If failed, check job summary**: How many jobs failed vs broken vs passed? +3. **Examine failed jobs specifically**: Don't assume "broken" means the job itself failed +4. **Check annotations**: Some projects surface important failures in annotations +5. **Inspect logs**: For actual failures, read the job logs + +### Common Pitfalls + +1. **Treating "broken" as "failed"**: A "broken" job is often just skipped due to pipeline logic, not an actual failure. + +2. **Ignoring soft fails**: Jobs marked as `soft_failed` may contain important information even though they don't block the build. + +3. **Missing blocked builds**: A `blocked` build is waiting for approval and won't progress without manual intervention. + +4. **Overlooking job dependencies**: Jobs may be skipped (`broken`) because their dependencies weren't met, which is expected behavior. + +## Project-Specific Patterns + +### zen-payroll Pipeline + +- **Heavy use of conditional execution**: Many jobs are conditionally skipped based on file changes +- **"broken" is normal**: A build with many "broken" jobs may still be perfectly healthy +- **Check annotations**: Important test failures are often surfaced in build annotations +- **Multiple test suites**: Different test types (unit, integration, system) have different failure patterns + +### Smaller Pipelines (e.g., gusto-karafka) + +- **Fewer conditional jobs**: Most jobs are expected to run +- **"broken" usually indicates a problem**: Less conditional logic means broken jobs are more likely to be actual issues +- **Simpler job graphs**: Easier to trace why a job didn't run +- **May not use annotations**: Failures are usually just in job logs + +## When to Investigate + +Investigate a build when: + +1. Overall build state is `failed` +2. Jobs show `failed` state (not just `broken`) +3. Build is `blocked` and you need to unblock it +4. Annotations contain error messages +5. Job logs show actual errors (red output, stack traces, test failures) + +Don't automatically investigate when: + +1. Build is `passed` (even if some jobs are `broken`) +2. Jobs are `soft_failed` unless specifically requested +3. Jobs are `broken` due to conditional execution (check pipeline config) diff --git a/skills/buildkite-status/references/tool-capabilities.md b/skills/buildkite-status/references/tool-capabilities.md new file mode 100644 index 0000000..2adc7dc --- /dev/null +++ b/skills/buildkite-status/references/tool-capabilities.md @@ -0,0 +1,291 @@ +# Tool Capabilities Reference + +This document provides complete capability information for all Buildkite status checking tools. + +## Overview + +Three tool categories exist with different strengths and limitations: + +1. **MCP Tools** - Direct Buildkite API access via Model Context Protocol +2. **bktide CLI** - Human-readable command-line tool (npm package) +3. **Bundled Scripts** - Helper wrappers in this skill's `scripts/` directory + +## Capability Matrix + +| Capability | MCP Tools | bktide | Scripts | Notes | +| --------------------- | ------------------------------- | ----------------------- | -------------------------- | -------------------------- | +| List organizations | ✅ `buildkite:list_orgs` | ❌ | ❌ | | +| List pipelines | ✅ `buildkite:list_pipelines` | ✅ `bktide pipelines` | ❌ | | +| List builds | ✅ `buildkite:list_builds` | ✅ `bktide builds` | ✅ `find-commit-builds.js` | Scripts are specialized | +| Get build details | ✅ `buildkite:get_build` | ✅ `bktide build` | ❌ | | +| Get annotations | ✅ `buildkite:list_annotations` | ✅ `bktide annotations` | ❌ | | +| **Retrieve job logs** | **✅ `buildkite:get_logs`** | **❌ NO** | **✅ `get-build-logs.js`** | **bktide cannot get logs** | +| Get log metadata | ✅ `buildkite:get_logs_info` | ❌ | ❌ | | +| List artifacts | ✅ `buildkite:list_artifacts` | ❌ | ❌ | | +| Wait for build | ✅ `buildkite:wait_for_build` | ❌ | ✅ `wait-for-build.js` | MCP preferred | +| Unblock jobs | ✅ `buildkite:unblock_job` | ❌ | ❌ | | +| Real-time updates | ✅ | ❌ | ✅ | Via polling | +| Human-readable output | ❌ (JSON) | ✅ | Varies | | +| Works offline | ❌ | ❌ | ❌ | All need network | +| Requires auth | ✅ (MCP config) | ✅ (BK_TOKEN) | ✅ (uses bktide) | | + +## Detailed Tool Information + +### MCP Tools (Primary) + +**Access Method:** `mcp__MCPProxy__call_tool("buildkite:", {...})` + +**Authentication:** Configured in MCP server settings (typically uses `BUILDKITE_API_TOKEN`) + +**Pros:** + +- Complete API coverage +- Always available (no external dependencies) +- Real-time data +- Structured JSON responses + +**Cons:** + +- Verbose JSON output +- Requires parsing for human reading + +**Key Tools:** + +#### `buildkite:get_build` + +Get detailed build information including job states, timing, and metadata. + +Parameters: + +- `org_slug` (required): Organization slug +- `pipeline_slug` (required): Pipeline slug +- `build_number` (required): Build number +- `detail_level` (optional): "summary" | "detailed" | "complete" +- `job_state` (optional): Filter jobs by state ("failed", "passed", etc.) + +Returns: Build object with jobs array, state, timing, author, etc. + +#### `buildkite:get_logs` + +**THE CRITICAL TOOL** - Retrieve actual log output from a job. + +Parameters: + +- `org_slug` (required): Organization slug +- `pipeline_slug` (required): Pipeline slug +- `build_number` (required): Build number +- `job_id` (required): Job UUID (NOT step ID from URL) + +Returns: Log text content + +**Common Issues:** + +- "job not found" → Using step ID instead of job UUID +- Empty response → Job hasn't started or finished yet + +#### `buildkite:wait_for_build` + +Poll build until completion. + +Parameters: + +- `org_slug` (required): Organization slug +- `pipeline_slug` (required): Pipeline slug +- `build_number` (required): Build number +- `timeout` (optional): Seconds until timeout (default: 1800) +- `poll_interval` (optional): Seconds between checks (default: 30) + +Returns: Final build state when complete or timeout + +### bktide CLI (Secondary) + +**Access Method:** `npx bktide ` + +**Authentication:** `BK_TOKEN` environment variable or `~/.bktide/config` + +**Pros:** + +- Human-readable colored output +- Intuitive command structure +- Good for interactive terminal work + +**Cons:** + +- External npm dependency +- **CANNOT retrieve job logs** (most critical limitation) +- Limited compared to full API +- Requires npx/node installed + +**Key Commands:** + +```bash +npx bktide pipelines # List pipelines +npx bktide builds / # List recent builds +npx bktide build // # Build details +npx bktide build // --jobs # Show job summary +npx bktide build // --failed # Show failed jobs only +npx bktide annotations // # Show annotations +``` + +**Critical**: bktide has NO command for retrieving logs. The `build` command shows job states and names, but NOT log content. + +### Bundled Scripts (Tertiary) + +**Access Method:** `~/.claude/skills/buildkite-status/scripts/