From 7e16d8a2a10d27a5fb9b2e2e6275bca351703bf1 Mon Sep 17 00:00:00 2001 From: Zhongwei Li Date: Sun, 30 Nov 2025 08:39:24 +0800 Subject: [PATCH] Initial commit --- .claude-plugin/plugin.json | 15 + README.md | 3 + commands/analyze-quality.md | 481 ++++++++++++ commands/analyze.md | 192 +++++ commands/bugfix.md | 1166 +++++++++++++++++++++++++++++ commands/build.md | 441 +++++++++++ commands/checklist.md | 313 ++++++++ commands/clarify.md | 189 +++++ commands/complete.md | 778 +++++++++++++++++++ commands/constitution.md | 90 +++ commands/coordinate.md | 570 ++++++++++++++ commands/deprecate.md | 568 ++++++++++++++ commands/fix.md | 495 ++++++++++++ commands/hotfix.md | 477 ++++++++++++ commands/impact.md | 196 +++++ commands/implement.md | 993 ++++++++++++++++++++++++ commands/init.md | 570 ++++++++++++++ commands/metrics-export.md | 326 ++++++++ commands/metrics.md | 395 ++++++++++ commands/modify.md | 1032 +++++++++++++++++++++++++ commands/orchestrate-feature.md | 486 ++++++++++++ commands/orchestrate-validate.md | 413 ++++++++++ commands/orchestrate.md | 741 ++++++++++++++++++ commands/plan.md | 455 +++++++++++ commands/refactor.md | 427 +++++++++++ commands/release.md | 944 +++++++++++++++++++++++ commands/rollback.md | 576 ++++++++++++++ commands/security-audit.md | 940 +++++++++++++++++++++++ commands/ship.md | 244 ++++++ commands/specify.md | 431 +++++++++++ commands/suggest.md | 292 ++++++++ commands/tasks.md | 214 ++++++ commands/upgrade.md | 631 ++++++++++++++++ commands/validate.md | 314 ++++++++ plugin.lock.json | 189 +++++ skills/specswarm-build/SKILL.md | 73 ++ skills/specswarm-fix/SKILL.md | 73 ++ skills/specswarm-modify/SKILL.md | 100 +++ skills/specswarm-ship/SKILL.md | 74 ++ skills/specswarm-upgrade/SKILL.md | 74 ++ 40 files changed, 16981 insertions(+) create mode 100644 .claude-plugin/plugin.json create mode 100644 README.md create mode 100644 commands/analyze-quality.md create mode 100644 commands/analyze.md create mode 100644 commands/bugfix.md create mode 100644 commands/build.md create mode 100644 commands/checklist.md create mode 100644 commands/clarify.md create mode 100644 commands/complete.md create mode 100644 commands/constitution.md create mode 100644 commands/coordinate.md create mode 100644 commands/deprecate.md create mode 100644 commands/fix.md create mode 100644 commands/hotfix.md create mode 100644 commands/impact.md create mode 100644 commands/implement.md create mode 100644 commands/init.md create mode 100644 commands/metrics-export.md create mode 100644 commands/metrics.md create mode 100644 commands/modify.md create mode 100644 commands/orchestrate-feature.md create mode 100644 commands/orchestrate-validate.md create mode 100644 commands/orchestrate.md create mode 100644 commands/plan.md create mode 100644 commands/refactor.md create mode 100644 commands/release.md create mode 100644 commands/rollback.md create mode 100644 commands/security-audit.md create mode 100644 commands/ship.md create mode 100644 commands/specify.md create mode 100644 commands/suggest.md create mode 100644 commands/tasks.md create mode 100644 commands/upgrade.md create mode 100644 commands/validate.md create mode 100644 plugin.lock.json create mode 100644 skills/specswarm-build/SKILL.md create mode 100644 skills/specswarm-fix/SKILL.md create mode 100644 skills/specswarm-modify/SKILL.md create mode 100644 skills/specswarm-ship/SKILL.md create mode 100644 skills/specswarm-upgrade/SKILL.md diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..663a4b3 --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,15 @@ +{ + "name": "specswarm", + "description": "Complete software development toolkit: spec-driven workflows, autonomous orchestration, lifecycle management (bugfix/modify/refactor), quality validation, and performance monitoring. v3.5.0: 5th core workflow (modify) + README simplification (670β†’400 lines) + comprehensive docs (COMMANDS.md, SETUP.md, FEATURES.md). v3.4.0: Confidence-based auto-execution. v3.3.9: MANDATORY/ALWAYS directive descriptions.", + "version": "3.5.0", + "author": { + "name": "Marty Bonacci", + "url": "https://github.com/MartyBonacci" + }, + "skills": [ + "./skills" + ], + "commands": [ + "./commands" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..8d9ae52 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# specswarm + +Complete software development toolkit: spec-driven workflows, autonomous orchestration, lifecycle management (bugfix/modify/refactor), quality validation, and performance monitoring. v3.5.0: 5th core workflow (modify) + README simplification (670β†’400 lines) + comprehensive docs (COMMANDS.md, SETUP.md, FEATURES.md). v3.4.0: Confidence-based auto-execution. v3.3.9: MANDATORY/ALWAYS directive descriptions. diff --git a/commands/analyze-quality.md b/commands/analyze-quality.md new file mode 100644 index 0000000..3e49844 --- /dev/null +++ b/commands/analyze-quality.md @@ -0,0 +1,481 @@ +--- +description: Comprehensive codebase quality analysis with prioritized recommendations +--- + + + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Goal + +Perform comprehensive codebase quality analysis to identify improvement opportunities and generate prioritized recommendations. + +**Key Capabilities**: +1. **Test Coverage Gaps**: Find files without tests +2. **Architecture Issues**: Detect anti-patterns and violations +3. **Missing Documentation**: Identify undocumented code +4. **Performance Issues**: Find optimization opportunities +5. **Security Issues**: Detect vulnerabilities +6. **Quality Scoring**: Calculate module-level quality scores + +**Coverage**: Provides holistic codebase health assessment + +--- + +## Execution Steps + +### 1. Initialize Analysis Context + +**YOU MUST NOW initialize the analysis using the Bash tool:** + +1. **Get repository root:** + ```bash + git rev-parse --show-toplevel 2>/dev/null || pwd + ``` + Store as REPO_ROOT. + +2. **Display analysis banner:** + ``` + πŸ“Š Codebase Quality Analysis + ============================ + + Analyzing: {REPO_ROOT} + Started: {TIMESTAMP} + ``` + +3. **Detect project type** by reading package.json: + - Framework: Vite, Next.js, Remix, CRA, or Generic + - Language: JavaScript, TypeScript, Python, Go, etc. + - Test framework: Vitest, Jest, Pytest, etc. + +--- + +### 2. Test Coverage Gap Analysis + +**YOU MUST NOW analyze test coverage gaps:** + +1. **Find all source files** using Bash: + ```bash + find ${REPO_ROOT} -type f \( -name "*.ts" -o -name "*.tsx" -o -name "*.js" -o -name "*.jsx" \) \ + -not -path "*/node_modules/*" \ + -not -path "*/dist/*" \ + -not -path "*/build/*" \ + -not -path "*/.next/*" \ + -not -path "*/test/*" \ + -not -path "*/tests/*" \ + -not -path "*/__tests__/*" + ``` + Store count as TOTAL_SOURCE_FILES. + +2. **Find all test files** using Bash: + ```bash + find ${REPO_ROOT} -type f \( \ + -name "*.test.ts" -o -name "*.test.tsx" -o -name "*.test.js" -o -name "*.test.jsx" -o \ + -name "*.spec.ts" -o -name "*.spec.tsx" -o -name "*.spec.js" -o -name "*.spec.jsx" \ + \) -not -path "*/node_modules/*" + ``` + Store count as TOTAL_TEST_FILES. + +3. **Calculate test coverage ratio:** + ``` + TEST_RATIO = (TOTAL_TEST_FILES / TOTAL_SOURCE_FILES) * 100 + ``` + +4. **Find files without corresponding tests:** + - For each source file in app/, src/, lib/ + - Check if corresponding test file exists + - Add to UNTESTED_FILES list if no test found + +5. **Display test coverage gaps:** + ``` + πŸ“‹ Test Coverage Gaps + ===================== + + Source Files: {TOTAL_SOURCE_FILES} + Test Files: {TOTAL_TEST_FILES} + Test Ratio: {TEST_RATIO}% + + Files Without Tests ({COUNT}): + {TOP_10_UNTESTED_FILES} + + Priority: {HIGH/MEDIUM/LOW} + Impact: Missing tests for {COUNT} files + ``` + +--- + +### 3. Architecture Pattern Analysis + +**YOU MUST NOW analyze architectural patterns:** + +1. **Run SSR pattern validator** using Bash: + ```bash + bash ~/.claude/plugins/marketplaces/specswarm-marketplace/plugins/specswarm/lib/ssr-validator.sh + ``` + Capture issues count and details. + +2. **Scan for common anti-patterns** using Grep: + + a. **useEffect with fetch** (should use loaders): + ```bash + grep -rn "useEffect.*fetch" app/ src/ --include="*.tsx" --include="*.ts" + ``` + + b. **Client-side state for server data** (should use loader data): + ```bash + grep -rn "useState.*fetch\|useState.*axios" app/ src/ --include="*.tsx" + ``` + + c. **Class components** (should use functional): + ```bash + grep -rn "class.*extends React.Component" app/ src/ --include="*.tsx" --include="*.jsx" + ``` + + d. **Inline styles** (should use Tailwind/CSS modules): + ```bash + grep -rn "style={{" app/ src/ --include="*.tsx" --include="*.jsx" + ``` + +3. **Display architecture issues:** + ``` + πŸ—οΈ Architecture Issues + ====================== + + SSR Patterns: {SSR_ISSUES} issues + - Hardcoded URLs: {COUNT} + - Relative URLs in SSR: {COUNT} + + React Anti-Patterns: {REACT_ISSUES} issues + - useEffect with fetch: {COUNT} + - Client-side state: {COUNT} + - Class components: {COUNT} + + Styling Issues: {STYLE_ISSUES} issues + - Inline styles: {COUNT} + + Priority: {CRITICAL/HIGH/MEDIUM/LOW} + Impact: Architectural debt in {TOTAL_ISSUES} locations + ``` + +--- + +### 4. Documentation Gap Analysis + +**YOU MUST NOW analyze documentation gaps:** + +1. **Find functions without JSDoc** using Grep: + ```bash + grep -rn "^function\|^export function\|^const.*= (" app/ src/ --include="*.ts" --include="*.tsx" | \ + while read line; do + # Check if previous line has /** comment + done + ``` + +2. **Find components without prop types:** + ```bash + grep -rn "export.*function.*Component\|export.*const.*Component" app/ src/ --include="*.tsx" + ``` + Check if they have TypeScript interface/type definitions. + +3. **Find API endpoints without OpenAPI/comments:** + ```bash + find app/routes/ app/api/ -name "*.ts" -o -name "*.tsx" + ``` + Check for route handler documentation. + +4. **Display documentation gaps:** + ``` + πŸ“š Documentation Gaps + ===================== + + Functions without JSDoc: {COUNT} + Components without prop types: {COUNT} + API endpoints without docs: {COUNT} + + Priority: {MEDIUM/LOW} + Impact: {COUNT} undocumented items + ``` + +--- + +### 5. Performance Issue Detection + +**YOU MUST NOW detect performance issues:** + +1. **Run bundle size analyzer** (Phase 3 Enhancement) using Bash: + ```bash + bash ~/.claude/plugins/marketplaces/specswarm-marketplace/plugins/speclabs/lib/bundle-size-monitor.sh ${REPO_ROOT} + ``` + Capture: + - Total bundle size + - List of large bundles (>500KB) + - List of critical bundles (>1MB) + - Bundle size score (0-20 points) + +2. **Find missing lazy loading:** + ```bash + grep -rn "import.*from" app/pages/ app/routes/ --include="*.tsx" | \ + grep -v "React.lazy\|lazy(" + ``` + +3. **Find unoptimized images:** + ```bash + find public/ static/ app/ -type f \( -name "*.jpg" -o -name "*.png" \) -size +100k + ``` + +4. **Display performance issues:** + ``` + ⚑ Performance Issues + ===================== + + Bundle Sizes (Phase 3): + - Total: {TOTAL_SIZE} + - Large bundles (>500KB): {COUNT} + - Critical bundles (>1MB): {COUNT} + - Top offenders: + * {LARGEST_BUNDLE_1} + * {LARGEST_BUNDLE_2} + - Score: {SCORE}/20 points + + Missing Lazy Loading: {COUNT} routes + Unoptimized Images: {COUNT} files (>{SIZE}) + + Priority: {HIGH/MEDIUM} + Impact: Page load performance degraded by {TOTAL_SIZE} + ``` + +--- + +### 6. Security Issue Detection + +**YOU MUST NOW detect security issues:** + +1. **Find exposed secrets** using Grep: + ```bash + grep -rn "API_KEY\|SECRET\|PASSWORD.*=" app/ src/ --include="*.ts" --include="*.tsx" | \ + grep -v "process.env\|import.meta.env" + ``` + +2. **Find missing input validation:** + ```bash + grep -rn "formData.get\|request.body\|params\." app/routes/ app/api/ --include="*.ts" + ``` + Check if followed by validation logic. + +3. **Find potential XSS vulnerabilities:** + ```bash + grep -rn "dangerouslySetInnerHTML\|innerHTML" app/ src/ --include="*.tsx" --include="*.jsx" + ``` + +4. **Display security issues:** + ``` + πŸ”’ Security Issues + ================== + + Exposed Secrets: {COUNT} potential leaks + Missing Input Validation: {COUNT} endpoints + XSS Vulnerabilities: {COUNT} locations + + Priority: {CRITICAL if >0, else LOW} + Impact: Security risk in {COUNT} locations + ``` + +--- + +### 7. Module-Level Quality Scoring + +**YOU MUST NOW calculate quality scores per module:** + +1. **Group files by module/directory:** + - app/pages/ + - app/components/ + - app/utils/ + - src/services/ + - etc. + +2. **For each module, calculate score:** + - Test Coverage: Has tests? (+25 points) + - Documentation: Has JSDoc/types? (+15 points) + - No Anti-Patterns: Clean architecture? (+20 points) + - No Security Issues: Secure? (+20 points) + - Performance: Optimized? (+20 points) + - Total: 0-100 points + +3. **Display module scores:** + ``` + πŸ“Š Module Quality Scores + ======================== + + app/pages/: 75/100 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘ (Good) + - Test Coverage: βœ“ 25/25 + - Documentation: βœ“ 15/15 + - Architecture: ⚠️ 15/20 (2 anti-patterns) + - Security: βœ“ 20/20 + - Performance: βœ— 0/20 (missing lazy load) + + app/components/: 45/100 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘ (Needs Improvement) + - Test Coverage: βœ— 0/25 (no tests) + - Documentation: ⚠️ 10/15 (missing prop types) + - Architecture: βœ“ 20/20 + - Security: βœ“ 20/20 + - Performance: ⚠️ 10/20 (large bundle) + + Overall Codebase Score: {AVERAGE}/100 + ``` + +--- + +### 8. Prioritized Recommendations + +**YOU MUST NOW generate prioritized recommendations:** + +1. **Critical Priority** (security, production failures): + - Exposed secrets + - SSR pattern violations + - Security vulnerabilities + +2. **High Priority** (quality, maintainability): + - Missing test coverage for core modules + - Major architecture anti-patterns + - Large bundle sizes + +3. **Medium Priority** (improvement opportunities): + - Missing documentation + - Minor anti-patterns + - Missing lazy loading + +4. **Low Priority** (nice-to-have): + - Additional JSDoc + - Inline style cleanup + - Image optimization + +5. **Display recommendations:** + ``` + πŸ“ˆ Prioritized Recommendations + =============================== + + πŸ”΄ CRITICAL (Fix Immediately): + 1. Remove 3 exposed API keys from client code + Impact: Security breach risk + Files: app/config.ts:12, app/utils/api.ts:45, src/client.ts:8 + Fix: Move to environment variables + + 2. Fix 11 hardcoded URLs in SSR contexts + Impact: Production deployment will fail + Files: See SSR validation report + Fix: Use getApiUrl() helper + + 🟠 HIGH (Fix This Week): + 3. Add tests for 15 untested core modules + Impact: No regression protection + Modules: app/pages/, app/components/Auth/ + Fix: Generate test templates + + 4. Reduce bundle size by 2.5MB + Impact: Slow page loads + Files: dist/main.js (3.2MB), dist/vendor.js (1.8MB) + Fix: Implement code splitting and lazy loading + + 🟑 MEDIUM (Fix This Sprint): + 5. Add JSDoc to 45 functions + Impact: Maintainability + Fix: Use IDE code generation + + 6. Replace 12 useEffect fetches with loaders + Impact: Architecture consistency + Fix: Migrate to React Router loaders + + 🟒 LOW (Nice to Have): + 7. Optimize 8 large images + Impact: Minor performance gain + Fix: Use next/image or image optimization service + + Estimated Impact: Fixing critical + high items would increase quality score from {CURRENT}/100 β†’ {PROJECTED}/100 + ``` + +--- + +### 9. Generate Analysis Report + +**YOU MUST NOW generate final report:** + +1. **Create summary:** + ``` + ═══════════════════════════════════════════════ + Quality Analysis Report + ═══════════════════════════════════════════════ + + Overall Quality Score: {SCORE}/100 {STATUS_EMOJI} + + Breakdown: + - Test Coverage: {SCORE}/100 + - Architecture: {SCORE}/100 + - Documentation: {SCORE}/100 + - Performance: {SCORE}/100 + - Security: {SCORE}/100 + + Issues Found: + - Critical: {COUNT} πŸ”΄ + - High: {COUNT} 🟠 + - Medium: {COUNT} 🟑 + - Low: {COUNT} 🟒 + + Total Issues: {TOTAL} + ``` + +2. **Save report** to file: + - Write to: `.specswarm/quality-analysis-{TIMESTAMP}.md` + - Include all sections above + - Add file-by-file details + - Include fix commands/examples + +3. **Display next steps:** + ``` + πŸ“‹ Next Steps + ============= + + 1. Review full report: .specswarm/quality-analysis-{TIMESTAMP}.md + 2. Fix critical issues first (security + SSR) + 3. Run quality validation: /specswarm:implement --validate-only + 4. Track improvements in metrics.json + + Commands: + - View detailed SSR issues: bash plugins/specswarm/lib/ssr-validator.sh + - Generate test templates: /specswarm:implement {feature} + - Re-run analysis: /specswarm:analyze-quality + ``` + +--- + +## Success Criteria + +βœ… All analysis sections completed +βœ… Issues categorized by priority +βœ… Recommendations generated with impact estimates +βœ… Report saved to .specswarm/ +βœ… Next steps provided + +--- + +## Operating Principles + +1. **Comprehensive**: Scan entire codebase, not just recent changes +2. **Prioritized**: Critical issues first, nice-to-haves last +3. **Actionable**: Provide specific fixes, not just problems +4. **Impact-Aware**: Estimate quality score improvement +5. **Automated**: Runnable without user interaction +6. **Repeatable**: Track improvements over time + +--- + +**Note**: This command provides a holistic view of codebase quality. Use it before major releases, after adding new features, or when quality scores decline. diff --git a/commands/analyze.md b/commands/analyze.md new file mode 100644 index 0000000..8db401a --- /dev/null +++ b/commands/analyze.md @@ -0,0 +1,192 @@ +--- +description: Perform a non-destructive cross-artifact consistency and quality analysis across spec.md, plan.md, and tasks.md after task generation. +--- + + + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Goal + +Identify inconsistencies, duplications, ambiguities, and underspecified items across the three core artifacts (`spec.md`, `plan.md`, `tasks.md`) before implementation. This command MUST run only after `/tasks` has successfully produced a complete `tasks.md`. + +## Operating Constraints + +**STRICTLY READ-ONLY**: Do **not** modify any files. Output a structured analysis report. Offer an optional remediation plan (user must explicitly approve before any follow-up editing commands would be invoked manually). + +**Constitution Authority**: The project constitution (`.specswarm/constitution.md`) is **non-negotiable** within this analysis scope. Constitution conflicts are automatically CRITICAL and require adjustment of the spec, plan, or tasksβ€”not dilution, reinterpretation, or silent ignoring of the principle. If a principle itself needs to change, that must occur in a separate, explicit constitution update outside `/analyze`. + +## Execution Steps + +### 1. Initialize Analysis Context + +Locate the feature directory by searching for `spec.md`, `plan.md`, and `tasks.md` files in the repository. Derive absolute paths: + +- SPEC = FEATURE_DIR/spec.md +- PLAN = FEATURE_DIR/plan.md +- TASKS = FEATURE_DIR/tasks.md + +Abort with an error message if any required file is missing (instruct the user to run missing prerequisite command). + +### 2. Load Artifacts (Progressive Disclosure) + +Load only the minimal necessary context from each artifact: + +**From spec.md:** + +- Overview/Context +- Functional Requirements +- Non-Functional Requirements +- User Stories +- Edge Cases (if present) + +**From plan.md:** + +- Architecture/stack choices +- Data Model references +- Phases +- Technical constraints + +**From tasks.md:** + +- Task IDs +- Descriptions +- Phase grouping +- Parallel markers [P] +- Referenced file paths + +**From constitution:** + +- Load `.specswarm/constitution.md` for principle validation + +### 3. Build Semantic Models + +Create internal representations (do not include raw artifacts in output): + +- **Requirements inventory**: Each functional + non-functional requirement with a stable key (derive slug based on imperative phrase; e.g., "User can upload file" β†’ `user-can-upload-file`) +- **User story/action inventory**: Discrete user actions with acceptance criteria +- **Task coverage mapping**: Map each task to one or more requirements or stories (inference by keyword / explicit reference patterns like IDs or key phrases) +- **Constitution rule set**: Extract principle names and MUST/SHOULD normative statements + +### 4. Detection Passes (Token-Efficient Analysis) + +Focus on high-signal findings. Limit to 50 findings total; aggregate remainder in overflow summary. + +#### A. Duplication Detection + +- Identify near-duplicate requirements +- Mark lower-quality phrasing for consolidation + +#### B. Ambiguity Detection + +- Flag vague adjectives (fast, scalable, secure, intuitive, robust) lacking measurable criteria +- Flag unresolved placeholders (TODO, TKTK, ???, ``, etc.) + +#### C. Underspecification + +- Requirements with verbs but missing object or measurable outcome +- User stories missing acceptance criteria alignment +- Tasks referencing files or components not defined in spec/plan + +#### D. Constitution Alignment + +- Any requirement or plan element conflicting with a MUST principle +- Missing mandated sections or quality gates from constitution + +#### E. Coverage Gaps + +- Requirements with zero associated tasks +- Tasks with no mapped requirement/story +- Non-functional requirements not reflected in tasks (e.g., performance, security) + +#### F. Inconsistency + +- Terminology drift (same concept named differently across files) +- Data entities referenced in plan but absent in spec (or vice versa) +- Task ordering contradictions (e.g., integration tasks before foundational setup tasks without dependency note) +- Conflicting requirements (e.g., one requires Next.js while other specifies Vue) + +### 5. Severity Assignment + +Use this heuristic to prioritize findings: + +- **CRITICAL**: Violates constitution MUST, missing core spec artifact, or requirement with zero coverage that blocks baseline functionality +- **HIGH**: Duplicate or conflicting requirement, ambiguous security/performance attribute, untestable acceptance criterion +- **MEDIUM**: Terminology drift, missing non-functional task coverage, underspecified edge case +- **LOW**: Style/wording improvements, minor redundancy not affecting execution order + +### 6. Produce Compact Analysis Report + +Output a Markdown report (no file writes) with the following structure: + +## Specification Analysis Report + +| ID | Category | Severity | Location(s) | Summary | Recommendation | +|----|----------|----------|-------------|---------|----------------| +| A1 | Duplication | HIGH | spec.md:L120-134 | Two similar requirements ... | Merge phrasing; keep clearer version | + +(Add one row per finding; generate stable IDs prefixed by category initial.) + +**Coverage Summary Table:** + +| Requirement Key | Has Task? | Task IDs | Notes | +|-----------------|-----------|----------|-------| + +**Constitution Alignment Issues:** (if any) + +**Unmapped Tasks:** (if any) + +**Metrics:** + +- Total Requirements +- Total Tasks +- Coverage % (requirements with >=1 task) +- Ambiguity Count +- Duplication Count +- Critical Issues Count + +### 7. Provide Next Actions + +At end of report, output a concise Next Actions block: + +- If CRITICAL issues exist: Recommend resolving before `/implement` +- If only LOW/MEDIUM: User may proceed, but provide improvement suggestions +- Provide explicit command suggestions: e.g., "Run /specify with refinement", "Run /plan to adjust architecture", "Manually edit tasks.md to add coverage for 'performance-metrics'" + +### 8. Offer Remediation + +Ask the user: "Would you like me to suggest concrete remediation edits for the top N issues?" (Do NOT apply them automatically.) + +## Operating Principles + +### Context Efficiency + +- **Minimal high-signal tokens**: Focus on actionable findings, not exhaustive documentation +- **Progressive disclosure**: Load artifacts incrementally; don't dump all content into analysis +- **Token-efficient output**: Limit findings table to 50 rows; summarize overflow +- **Deterministic results**: Rerunning without changes should produce consistent IDs and counts + +### Analysis Guidelines + +- **NEVER modify files** (this is read-only analysis) +- **NEVER hallucinate missing sections** (if absent, report them accurately) +- **Prioritize constitution violations** (these are always CRITICAL) +- **Use examples over exhaustive rules** (cite specific instances, not generic patterns) +- **Report zero issues gracefully** (emit success report with coverage statistics) + +## Context + +{ARGS} diff --git a/commands/bugfix.md b/commands/bugfix.md new file mode 100644 index 0000000..fc27f03 --- /dev/null +++ b/commands/bugfix.md @@ -0,0 +1,1166 @@ +--- +description: Regression-test-first bug fixing workflow with smart SpecSwarm/SpecTest integration +--- + + + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Goal + +Execute regression-test-first bug fixing workflow to ensure bugs are fixed correctly and prevent future regressions. + +**Key Principles**: +1. **Test First**: Write regression test before fixing +2. **Verify Failure**: Confirm test reproduces bug +3. **Fix**: Implement solution +4. **Verify Success**: Confirm test passes +5. **No Regressions**: Validate no new bugs introduced + +**Coverage**: Addresses ~40% of development work (bug fixes) + +--- + +## Smart Integration Detection + +Before starting workflow, detect available plugins for enhanced capabilities: + +```bash +# Get repository root and plugin directory +REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PLUGIN_DIR="$(dirname "$SCRIPT_DIR")" + +# Check for SpecSwarm (tech stack enforcement) +SPECSWARM_INSTALLED=$(claude plugin list | grep -q "specswarm" && echo "true" || echo "false") + +# Check for SpecLabs (experimental features) +SPECLABS_INSTALLED=$(claude plugin list | grep -q "speclabs" && echo "true" || echo "false") + +# Check for web project and Chrome DevTools MCP (web debugging enhancement) +if [ -f "$PLUGIN_DIR/lib/web-project-detector.sh" ]; then + source "$PLUGIN_DIR/lib/web-project-detector.sh" + + # Determine if Chrome DevTools should be used + if should_use_chrome_devtools "$REPO_ROOT"; then + CHROME_DEVTOOLS_MODE="enabled" + echo "🌐 Web Project Detected: $WEB_FRAMEWORK" + echo "🎯 Chrome DevTools MCP: Available for enhanced browser debugging" + elif is_web_project "$REPO_ROOT"; then + CHROME_DEVTOOLS_MODE="fallback" + echo "🌐 Web Project Detected: $WEB_FRAMEWORK" + echo "πŸ“¦ Using Playwright fallback (Chrome DevTools MCP not available)" + else + CHROME_DEVTOOLS_MODE="disabled" + # Non-web project - no message needed + fi +else + CHROME_DEVTOOLS_MODE="disabled" +fi + +# Configure workflow based on detection +if [ "$SPECLABS_INSTALLED" = "true" ]; then + EXECUTION_MODE="parallel" + ENABLE_HOOKS=true + ENABLE_METRICS=true + echo "🎯 Smart Integration: SpecLabs detected (experimental features enabled)" +elif [ "$SPECSWARM_INSTALLED" = "true" ]; then + EXECUTION_MODE="sequential" + ENABLE_TECH_VALIDATION=true + echo "🎯 Smart Integration: SpecSwarm detected (tech stack enforcement enabled)" +else + EXECUTION_MODE="sequential" + echo "ℹ️ Running in basic mode (install SpecSwarm/SpecTest for enhanced capabilities)" +fi +``` + +--- + +## Pre-Workflow Hook (if SpecTest installed) + +```bash +if [ "$ENABLE_HOOKS" = "true" ]; then + echo "🎣 Pre-Bugfix Hook" + + # Validate prerequisites + echo "βœ“ Checking repository status..." + git status --porcelain | head -5 + + # Load tech stack (if SpecSwarm also installed) + if [ "$SPECSWARM_INSTALLED" = "true" ]; then + echo "βœ“ Loading tech stack: .specswarm/tech-stack.md" + TECH_STACK_EXISTS=$([ -f ".specswarm/tech-stack.md" ] && echo "true" || echo "false") + if [ "$TECH_STACK_EXISTS" = "true" ]; then + echo "βœ“ Tech stack validation enabled" + fi + fi + + # Initialize metrics + WORKFLOW_START_TIME=$(date +%s) + echo "βœ“ Metrics initialized" + echo "" +fi +``` + +--- + +## Execution Steps + +### 1. Discover Bug Context + +```bash +# Get repository root +REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) + +# Source features location helper +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PLUGIN_DIR="$(dirname "$SCRIPT_DIR")" +source "$PLUGIN_DIR/lib/features-location.sh" + +# Initialize features directory (handles migration if needed) +get_features_dir "$REPO_ROOT" + +# Detect branch +CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null) + +# Try to extract feature number from branch name (bugfix/NNN-*) +FEATURE_NUM=$(echo "$CURRENT_BRANCH" | grep -oE 'bugfix/([0-9]{3})' | grep -oE '[0-9]{3}') + +# If no feature number in branch, prompt user +if [ -z "$FEATURE_NUM" ]; then + echo "πŸ› Bugfix Workflow" + echo "" + echo "No bugfix branch detected. Please provide bug number:" + echo "Example: 042 (for bugfix/042-login-timeout)" + # Wait for user input + read -p "Bug number: " FEATURE_NUM + + # Validate + if [ -z "$FEATURE_NUM" ]; then + echo "❌ Error: Bug number required" + exit 1 + fi + + # Pad to 3 digits + FEATURE_NUM=$(printf "%03d" $FEATURE_NUM) +fi + +# Find or create feature directory using helper +if ! find_feature_dir "$FEATURE_NUM" "$REPO_ROOT"; then + echo "⚠️ Feature directory not found for bug ${FEATURE_NUM}" + echo "" + echo "Please provide bug description (kebab-case):" + echo "Example: login-timeout" + read -p "Description: " BUG_DESC + + if [ -z "$BUG_DESC" ]; then + echo "❌ Error: Bug description required" + exit 1 + fi + + FEATURE_DIR="${FEATURES_DIR}/${FEATURE_NUM}-${BUG_DESC}" + mkdir -p "$FEATURE_DIR" + echo "βœ“ Created: $FEATURE_DIR" +fi + +BUGFIX_SPEC="${FEATURE_DIR}/bugfix.md" +REGRESSION_TEST_SPEC="${FEATURE_DIR}/regression-test.md" +TASKS_FILE="${FEATURE_DIR}/tasks.md" +``` + +Output to user: +``` +πŸ› Bugfix Workflow - Feature ${FEATURE_NUM} +βœ“ Branch detected: ${CURRENT_BRANCH} +βœ“ Feature directory: ${FEATURE_DIR} +``` + +--- + +### 2. Create Bugfix Specification + +If `$BUGFIX_SPEC` doesn't exist, create it using this template: + +```markdown +# Bug ${FEATURE_NUM}: [Bug Title] + +**Status**: Active +**Created**: YYYY-MM-DD +**Priority**: [High/Medium/Low] +**Severity**: [Critical/Major/Minor] + +## Symptoms + +[What behavior is observed? What's going wrong?] + +- Observable symptom 1 +- Observable symptom 2 +- Observable symptom 3 + +## Reproduction Steps + +1. Step 1 to reproduce +2. Step 2 to reproduce +3. Step 3 to reproduce + +**Expected Behavior**: [What should happen?] + +**Actual Behavior**: [What actually happens?] + +## Root Cause Analysis + +**IMPORTANT: Use ultrathinking for deep analysis** + +Before documenting the root cause, you MUST: +1. **Ultrathink** - Perform deep, multi-layer analysis +2. Check database configuration (transforms, defaults, constraints) +3. Analyze property access patterns (camelCase vs snake_case mismatches) +4. Examine caching behavior (frontend, backend, browser) +5. Trace data flow from database β†’ backend β†’ frontend +6. Look for cascading bugs (one bug hiding another) + +**Root Cause Documentation:** + +[What is causing the bug?] + +- Component/module affected +- Code location (file:line) +- Logic error or edge case missed +- Conditions required to trigger +- Configuration mismatches (transforms, environment, etc.) + +## Impact Assessment + +**Affected Users**: [Who is impacted?] +- All users / Specific user role / Edge case users + +**Affected Features**: [What features are broken?] +- Feature A: Completely broken +- Feature B: Partially degraded + +**Severity Justification**: [Why this priority/severity?] + +**Workaround Available**: [Yes/No - if yes, describe] + +## Regression Test Requirements + +[What test will prove the bug exists and validate the fix?] + +1. Test scenario 1 +2. Test scenario 2 +3. Test scenario 3 + +**Test Success Criteria**: +- βœ… Test fails before fix (proves bug exists) +- βœ… Test passes after fix (proves bug fixed) +- βœ… No new regressions introduced + +## Proposed Solution + +[High-level approach to fixing the bug] + +**Changes Required**: +- File 1: [what needs to change] +- File 2: [what needs to change] + +**Risks**: [Any risks with this fix?] + +**Alternative Approaches**: [Other ways to fix? Why not chosen?] + +--- + +## Tech Stack Compliance + +[If SpecSwarm installed, validate solution against tech stack] + +**Tech Stack File**: .specswarm/tech-stack.md +**Validation Status**: [Pending/Compliant/Non-Compliant] + +--- + +## Metadata + +**Workflow**: Bugfix (regression-test-first) +**Created By**: SpecLab Plugin v1.0.0 +**Smart Integration**: ${INTEGRATION_MODE} +``` + +Populate template by: +1. Load user input (if provided in `$ARGUMENTS`) +2. **ULTRATHINK** - Perform deep root cause analysis: + - Search for error logs, stack traces + - Check database schema configuration (transforms, defaults) + - Analyze property naming patterns (camelCase vs snake_case) + - Check caching behavior (frontend loaders, backend, browser) + - Trace data flow from database through all layers + - Look for multi-layer bugs (frontend + backend + database) + - Check recent commits for related changes + - Review issues/PRs mentioning bug number +3. Analyze repository context: + - Identify all affected layers (database, backend, frontend) + - Check for configuration mismatches + - Look for similar bugs that might indicate patterns +4. Prompt user for missing critical information: + - Bug title + - Symptoms (what's broken?) + - Reproduction steps + - Priority/severity + +Write completed specification to `$BUGFIX_SPEC`. + +Output to user: +``` +πŸ“‹ Bugfix Specification +βœ“ Created: ${BUGFIX_SPEC} +βœ“ Bug documented with symptoms, root cause, impact +``` + +--- + +### 2.5. Orchestrator Detection + +After creating the bugfix specification, analyze whether this is an orchestration opportunity: + +```bash +# Load detection library +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PLUGIN_DIR="$(dirname "$SCRIPT_DIR")" + +if [ -f "$PLUGIN_DIR/lib/orchestrator-detection.sh" ]; then + source "$PLUGIN_DIR/lib/orchestrator-detection.sh" + + echo "" + echo "πŸ” Analyzing bugfix complexity..." + echo "" + + # Count bugs in specification (look for "### Bug #" headers or symptoms list) + BUG_COUNT=$(grep -cE "^### Bug #|^- Bug [0-9]+" "$BUGFIX_SPEC" 2>/dev/null || echo "1") + + # If user input mentions multiple bugs, count them + if [ -n "$ARGUMENTS" ]; then + ARG_BUG_COUNT=$(echo "$ARGUMENTS" | grep -oE "bug" | wc -l) + if [ "$ARG_BUG_COUNT" -gt "$BUG_COUNT" ]; then + BUG_COUNT=$ARG_BUG_COUNT + fi + fi + + # Get list of affected files from bugfix spec + AFFECTED_FILES=$(grep -A 20 "^## Proposed Solution" "$BUGFIX_SPEC" | grep -E "^- File |^ - " | sed 's/^- File //' | sed 's/^ - //' | tr '\n' ' ' || echo "") + + # If no files found in spec yet, check user input + if [ -z "$AFFECTED_FILES" ] && [ -n "$ARGUMENTS" ]; then + AFFECTED_FILES=$(echo "$ARGUMENTS" | grep -oE "[a-zA-Z0-9/_-]+\.(ts|tsx|js|jsx|py|go|java)" | tr '\n' ' ') + fi + + # Build context info based on spec content + CONTEXT_INFO="" + if grep -qiE "log|logging|console" "$BUGFIX_SPEC"; then + CONTEXT_INFO="${CONTEXT_INFO} requires_logging" + fi + if grep -qiE "restart|reload|rebuild" "$BUGFIX_SPEC"; then + CONTEXT_INFO="${CONTEXT_INFO} requires_server_restarts" + fi + + # Check if orchestrator should be used + DETECTION_RESULT=$(should_use_orchestrator "$BUG_COUNT" "$AFFECTED_FILES" "$CONTEXT_INFO" 2>/dev/null) + + if [ $? -eq 0 ]; then + echo "🎯 Orchestrator Detection" + echo "========================" + echo "" + + # Extract recommendation details + RECOMMENDATION=$(echo "$DETECTION_RESULT" | grep -o '"recommendation": "[^"]*"' | cut -d'"' -f4) + REASON=$(echo "$DETECTION_RESULT" | grep -o '"reason": "[^"]*"' | cut -d'"' -f4) + CONFIDENCE=$(echo "$DETECTION_RESULT" | grep -o '"confidence": "[^"]*"' | cut -d'"' -f4) + + echo "Reason: $REASON" + echo "Confidence: $CONFIDENCE" + echo "" + + if [ "$RECOMMENDATION" = "orchestrator" ]; then + echo "πŸ’‘ STRONG RECOMMENDATION: Use Project Orchestrator" + echo "" + echo "This is a textbook orchestration case!" + echo "" + echo "Benefits:" + echo " β€’ Estimated time savings: 40-60%" + echo " β€’ Parallel bug investigation" + echo " β€’ Better context management" + echo " β€’ Coordinated server restarts" + echo " β€’ Specialized agents per domain" + echo "" + echo "Detected signals:" + echo " - $BUG_COUNT bug(s) to fix" + + if [ -n "$AFFECTED_FILES" ]; then + DOMAIN_NAMES=$(get_domain_names "$AFFECTED_FILES" 2>/dev/null) + if [ -n "$DOMAIN_NAMES" ]; then + echo " - Spans domains: $DOMAIN_NAMES" + fi + fi + + if echo "$CONTEXT_INFO" | grep -q "requires_logging"; then + echo " - Requires extensive logging" + fi + if echo "$CONTEXT_INFO" | grep -q "requires_server_restarts"; then + echo " - Requires server restarts" + fi + + echo "" + echo "Choose workflow:" + echo " 1. Continue with sequential bugfix (traditional)" + echo " 2. Switch to Orchestrator debug mode (recommended)" + echo "" + read -p "Your choice (1 or 2): " WORKFLOW_CHOICE + + if [ "$WORKFLOW_CHOICE" = "2" ]; then + echo "" + echo "βœ… Switching to Orchestrator debug mode..." + echo "" + echo "Next step: Create debug workflow specification" + echo "" + echo "Run one of these commands:" + echo " β€’ /debug:coordinate \"$ARGUMENTS\"" + echo " β€’ /project-orchestrator:debug --spec=\"$BUGFIX_SPEC\"" + echo "" + echo "The orchestrator will:" + echo " 1. Add strategic logging across affected files" + echo " 2. Spawn specialist agents per domain" + echo " 3. Coordinate parallel investigation" + echo " 4. Integrate and test all fixes together" + echo "" + exit 0 + else + echo "" + echo "βœ… Continuing with sequential workflow" + echo "" + echo "Note: You can switch to orchestrator mode anytime by running:" + echo " /debug:coordinate \"\"" + echo "" + fi + + elif [ "$RECOMMENDATION" = "consider_orchestrator" ]; then + echo "πŸ’­ SUGGESTION: Orchestrator may help with this workflow" + echo "" + echo "Complexity signals detected. Consider using orchestrator if:" + echo " β€’ Workflow becomes more complex than expected" + echo " β€’ Multiple iterations needed (log β†’ restart β†’ fix cycle)" + echo " β€’ You find more bugs during investigation" + echo "" + echo "To switch to orchestrator mode, run:" + echo " /debug:coordinate \"\"" + echo "" + fi + else + echo "ℹ️ Single bug detected - sequential workflow is optimal" + echo "" + fi +else + echo "ℹ️ Orchestrator detection not available (lib not found)" + echo "" +fi +``` + +Continue with normal workflow... + +--- + +### 3. Create Regression Test Specification + +Create `$REGRESSION_TEST_SPEC` using this template: + +```markdown +# Regression Test: Bug ${FEATURE_NUM} + +**Purpose**: Prove bug exists, validate fix, prevent future regressions + +**Test Type**: Regression Test +**Created**: YYYY-MM-DD + +--- + +## Test Objective + +Write a test that: +1. βœ… **Fails before fix** (proves bug exists) +2. βœ… **Passes after fix** (proves bug fixed) +3. βœ… **Prevents regression** (catches if bug reintroduced) + +--- + +## Test Specification + +### Test Setup + +[What needs to be set up before test runs?] + +- Initial state +- Test data +- Mock/stub configuration +- Dependencies + +### Test Execution + +[What actions does the test perform?] + +1. Action 1 +2. Action 2 +3. Action 3 + +### Test Assertions + +[What validations prove bug exists/fixed?] + +- βœ… Assertion 1: [expected outcome] +- βœ… Assertion 2: [expected outcome] +- βœ… Assertion 3: [expected outcome] + +### Test Teardown + +[Any cleanup needed?] + +--- + +## Test Implementation + +### Test File Location + +[Where should this test live?] + +**File**: [path/to/test/file] +**Function/Test Name**: `test_bug_${FEATURE_NUM}_[description]` + +### Test Validation Criteria + +**Before Fix**: +- ❌ Test MUST fail (proves bug reproduction) +- If test passes before fix, test is invalid + +**After Fix**: +- βœ… Test MUST pass (proves bug fixed) +- βœ… All existing tests still pass (no regressions) + +--- + +## Edge Cases to Test + +[Related scenarios that should also be tested] + +1. Edge case 1 +2. Edge case 2 +3. Edge case 3 + +--- + +## Metadata + +**Workflow**: Bugfix (regression-test-first) +**Created By**: SpecLab Plugin v1.0.0 +``` + +Write regression test specification to `$REGRESSION_TEST_SPEC`. + +Output to user: +``` +πŸ“‹ Regression Test Specification +βœ“ Created: ${REGRESSION_TEST_SPEC} +βœ“ Test must fail before fix, pass after fix +``` + +--- + +### 4. Generate Tasks + +Create `$TASKS_FILE` with regression-test-first methodology: + +```markdown +# Tasks: Bug ${FEATURE_NUM} - [Bug Title] + +**Workflow**: Bugfix (Regression-Test-First) +**Status**: Active +**Created**: YYYY-MM-DD + +--- + +## Execution Strategy + +**Mode**: ${EXECUTION_MODE} +**Smart Integration**: +${INTEGRATION_SUMMARY} + +--- + +## Phase 1: Regression Test Creation + +### T001: Write Regression Test +**Description**: Implement test specified in regression-test.md +**File**: [test file path] +**Validation**: Test code follows test specification +**Parallel**: No (foundational) + +### T002: Verify Test Fails +**Description**: Run regression test and confirm it fails (proves bug exists) +**Command**: [test command] +**Expected**: Test fails with error matching bug symptoms +**Validation**: Test failure proves bug reproduction +**Parallel**: No (depends on T001) + +--- + +## Phase 2: Bug Fix Implementation + +### T003: Implement Fix +**Description**: Apply solution from bugfix.md +**Files**: [list files to modify] +**Changes**: [high-level change description] +**Tech Stack Validation**: ${TECH_VALIDATION_ENABLED} +**Parallel**: No (core fix) + +[If multiple independent fixes needed, mark additional tasks with [P]] + +### T004: Verify Test Passes +**Description**: Run regression test and confirm it passes (proves bug fixed) +**Command**: [test command] +**Expected**: Test passes +**Validation**: Test success proves bug fixed +**Parallel**: No (depends on T003) + +--- + +## Phase 3: Regression Validation + +### T005: Run Full Test Suite +**Description**: Verify no new regressions introduced +**Command**: [full test suite command] +**Expected**: All tests pass (existing + new regression test) +**Validation**: 100% test pass rate +**Parallel**: No (final validation) + +[If SpecSwarm installed, add tech stack validation task] +${TECH_STACK_VALIDATION_TASK} + +--- + +## Summary + +**Total Tasks**: 5 (minimum) +**Estimated Time**: 1-3 hours (varies by bug complexity) +**Parallel Opportunities**: Limited (regression-test-first is sequential by nature) + +**Success Criteria**: +- βœ… Regression test created +- βœ… Test failed before fix (proved bug) +- βœ… Fix implemented +- βœ… Test passed after fix (proved solution) +- βœ… No new regressions +${TECH_COMPLIANCE_CRITERION} +``` + +Write tasks to `$TASKS_FILE`. + +Output to user: +``` +πŸ“Š Tasks Generated +βœ“ Created: ${TASKS_FILE} +βœ“ 5 tasks (regression-test-first methodology) +βœ“ Phase 1: Create regression test (T001-T002) +βœ“ Phase 2: Implement fix (T003-T004) +βœ“ Phase 3: Validate no regressions (T005) +``` + +--- + +### 5. Execute Workflow + +Now execute the tasks using the appropriate mode: + +#### If SpecTest Installed (Parallel + Hooks): + +``` +⚑ Executing Bugfix Workflow with SpecTest Integration + +🎣 Pre-Task Hook +βœ“ Validating prerequisites +βœ“ Environment ready + +Phase 1: Regression Test Creation (T001-T002) +β†’ T001: Write Regression Test + [execute task] +β†’ T002: Verify Test Fails + [execute task, confirm failure] + βœ… Test failed as expected (bug reproduced) + +Phase 2: Bug Fix Implementation (T003-T004) +β†’ T003: Implement Fix + [execute task] + [if SpecSwarm installed, validate against tech stack] +β†’ T004: Verify Test Passes + [execute task] + βœ… Test passed (bug fixed!) + +Phase 3: Regression Validation (T005) +β†’ T005: Run Full Test Suite + [execute task] + βœ… All tests pass (no regressions) + +🎣 Post-Workflow Hook +βœ“ All tasks completed +βœ“ Bug fixed successfully +βœ“ Regression test added +βœ“ No new regressions +πŸ“Š Metrics updated +``` + +#### If Only SpecSwarm Installed (Sequential + Tech Validation): + +``` +πŸ”§ Executing Bugfix Workflow with SpecSwarm Integration + +Phase 1: Regression Test Creation +T001: Write Regression Test + [execute task] +T002: Verify Test Fails + [execute task] + βœ… Test failed (bug reproduced) + +Phase 2: Bug Fix Implementation +T003: Implement Fix + [execute task] + πŸ” Tech Stack Validation + βœ“ Loading tech stack: .specswarm/tech-stack.md + βœ“ Validating fix against tech stack + βœ“ Compliant: All changes follow tech stack +T004: Verify Test Passes + [execute task] + βœ… Test passed (bug fixed!) + +Phase 3: Regression Validation +T005: Run Full Test Suite + [execute task] + βœ… All tests pass (no regressions) +``` + +#### Basic Mode (No Plugins): + +``` +πŸ› Executing Bugfix Workflow + +Phase 1: Regression Test Creation +T001-T002: [execute sequentially] + +Phase 2: Bug Fix Implementation +T003-T004: [execute sequentially] + +Phase 3: Regression Validation +T005: [execute] + +βœ… Workflow Complete +``` + +--- + +## Chrome DevTools MCP Integration (Web Projects Only) + +If `$CHROME_DEVTOOLS_MODE` is "enabled", use Chrome DevTools MCP tools for enhanced debugging during test execution: + +### During T002: Verify Test Fails (Bug Reproduction) + +When running regression test to reproduce bug: + +```bash +if [ "$CHROME_DEVTOOLS_MODE" = "enabled" ]; then + echo "" + echo "🌐 Chrome DevTools MCP Available" + echo " Using MCP tools for enhanced browser debugging..." + echo "" +fi +``` + +**Available MCP Tools for Bug Investigation:** +1. **list_console_messages** - Capture console errors during test execution +2. **list_network_requests** - Monitor API calls and network failures +3. **get_console_message** - Inspect specific error messages +4. **take_screenshot** - Visual evidence of bug state +5. **evaluate_script** - Inspect runtime state (variables, DOM, etc.) + +**Example Usage During Test Execution:** +- After test fails, use `list_console_messages` to capture JavaScript errors +- Use `list_network_requests` to identify failed API calls +- Use `take_screenshot` to document visual bugs +- Use `evaluate_script` to inspect application state when bug occurs + +### During T004: Verify Test Passes (Fix Validation) + +When validating fix works correctly: + +```bash +if [ "$CHROME_DEVTOOLS_MODE" = "enabled" ]; then + echo "" + echo "🌐 Validating fix with Chrome DevTools MCP..." + echo " βœ“ Monitoring console for errors" + echo " βœ“ Checking network requests" + echo "" +fi +``` + +**Validation Checklist:** +- βœ… No console errors (use `list_console_messages`) +- βœ… All network requests succeed (use `list_network_requests`) +- βœ… Visual state correct (use `take_screenshot`) +- βœ… Application state valid (use `evaluate_script`) + +### MCP vs Playwright Fallback + +**If Chrome DevTools MCP is enabled:** +- Richer debugging capabilities +- Persistent browser profile (`~/.cache/chrome-devtools-mcp/`) +- Real-time console monitoring +- Network request inspection +- No Chromium download needed (saves ~200MB) + +**If using Playwright fallback:** +- Standard browser automation +- Downloads Chromium (~200MB) if not installed +- Basic screenshot and interaction capabilities +- Works identically from user perspective + +**Important:** Both modes provide full functionality. Chrome DevTools MCP enhances debugging experience but is NOT required. + +--- + +## Post-Workflow Hook (if SpecTest installed) + +```bash +if [ "$ENABLE_HOOKS" = "true" ]; then + echo "" + echo "🎣 Post-Bugfix Hook" + + # Calculate metrics + WORKFLOW_END_TIME=$(date +%s) + WORKFLOW_DURATION=$((WORKFLOW_END_TIME - WORKFLOW_START_TIME)) + WORKFLOW_HOURS=$(echo "scale=1; $WORKFLOW_DURATION / 3600" | bc) + + # Validate completion + echo "βœ“ Bug fixed successfully" + echo "βœ“ Regression test added" + echo "βœ“ No new regressions introduced" + + # Tech stack compliance + if [ "$SPECSWARM_INSTALLED" = "true" ]; then + echo "βœ“ Tech stack compliant" + fi + + # Update metrics file + METRICS_FILE=".specswarm/workflow-metrics.json" + # [Update JSON with bugfix metrics] + echo "πŸ“Š Metrics saved: ${METRICS_FILE}" + + echo "" + echo "⏱️ Time to Fix: ${WORKFLOW_HOURS}h" + echo "" +fi +``` + +--- + +## Quality Impact Validation + +**Purpose**: Ensure bug fix doesn't introduce new issues or reduce code quality + +**YOU MUST NOW validate quality impact using these steps:** + +1. **Check if quality standards exist** using the Read tool: + - Try to read `${REPO_ROOT}.specswarm/quality-standards.md` + - If file doesn't exist: Skip quality validation, go to Final Output + - If file exists: Continue with validation + +2. **Execute quality validation** using the Bash tool: + + a. **Display header:** + ``` + πŸ§ͺ Quality Impact Analysis + ========================== + ``` + + b. **Run unit tests:** + ```bash + cd ${REPO_ROOT} && npx vitest run --reporter=verbose 2>&1 | tail -50 + ``` + Parse output for test counts (total, passed, failed). + + c. **Measure code coverage** (if tool available): + ```bash + npx vitest run --coverage 2>&1 | grep -A 10 "Coverage" || echo "Coverage not configured" + ``` + Parse coverage percentage. + + d. **Calculate quality score:** + - Unit Tests: 25 points if all pass, 0 if any fail + - Coverage: 25 points * (coverage_pct / 80) + - Other components: Use same scoring as implement workflow + +3. **Compare before/after quality:** + + a. **Load previous quality score** from metrics.json: + - Read `${REPO_ROOT}.specswarm/metrics.json` + - Find most recent quality score before this bugfix + - Store as BEFORE_SCORE + + b. **Calculate current quality score:** + - Use scores from step 2 + - Store as AFTER_SCORE + + c. **Calculate delta:** + - QUALITY_DELTA = AFTER_SCORE - BEFORE_SCORE + +4. **Display quality impact report:** + ``` + πŸ“Š Quality Impact Report + ======================== + + Before Fix: {BEFORE_SCORE}/100 + After Fix: {AFTER_SCORE}/100 + Change: {QUALITY_DELTA > 0 ? '+' : ''}{QUALITY_DELTA} points + + Test Results: + - Total Tests: {TOTAL} + - Passing: {PASSED} ({PASS_RATE}%) + - Failing: {FAILED} + + Code Coverage: {COVERAGE}% + + {VERDICT} + ``` + +5. **Determine verdict:** + + a. **If QUALITY_DELTA < 0 (quality decreased):** + - Display: "⚠️ WARNING: Bug fix reduced code quality by {abs(QUALITY_DELTA)} points" + - Ask user: + ``` + This suggests the fix may have introduced new issues. + + Would you like to: + 1. Review the changes and improve quality + 2. Continue anyway (NOT RECOMMENDED) + 3. Revert fix and try different approach + + Choose (1/2/3): + ``` + - If user chooses 1: Halt, suggest improvements + - If user chooses 2: Display strong warning and continue + - If user chooses 3: Offer to revert last commit + + b. **If QUALITY_DELTA == 0 (no change):** + - Display: "βœ… Quality maintained - Fix didn't introduce regressions" + - Continue to Final Output + + c. **If QUALITY_DELTA > 0 (quality improved):** + - Display: "πŸŽ‰ Quality improved! Fix increased quality by {QUALITY_DELTA} points" + - Highlight improvements (more tests passing, higher coverage, etc.) + - Continue to Final Output + +6. **Save quality metrics:** + - Update `${REPO_ROOT}.specswarm/metrics.json` + - Add quality_delta field to bugfix record + - Include before/after scores + +**IMPORTANT**: This validation catches regressions introduced by bug fixes. If quality decreases, investigate before proceeding. + +--- + +## Chain Bug Detection (Phase 3 Enhancement) + +**Purpose**: Detect if this bug fix introduced new bugs (prevent Bug 912 β†’ Bug 913 scenarios) + +**YOU MUST NOW check for chain bugs using the Bash tool:** + +1. **Run chain bug detector:** + ```bash + bash ~/.claude/plugins/marketplaces/specswarm-marketplace/plugins/speclabs/lib/chain-bug-detector.sh ${REPO_ROOT} + ``` + +2. **Parse detector output:** + - Exit code 0: No chain bugs detected (βœ“ SAFE) + - Exit code 1: Chain bugs detected (⚠️ WARNING) + +3. **If chain bugs detected:** + + a. **Display warning:** + ``` + ⛓️ CHAIN BUG DETECTED + ═══════════════════════ + + This bug fix may have introduced new issues: + - Tests decreased: {BEFORE} β†’ {AFTER} + - SSR issues: {COUNT} + - TypeScript errors: {COUNT} + + This suggests the fix created new problems. + ``` + + b. **Offer actions:** + ``` + What would you like to do? + 1. Review fix and improve (analyze with /specswarm:analyze-quality) + 2. Use orchestrator for complex fix (/speclabs:coordinate) + 3. Revert and try different approach (git revert HEAD) + 4. Continue anyway (NOT RECOMMENDED) + + Choose (1/2/3/4): + ``` + + c. **Execute user choice:** + - Option 1: Run `/specswarm:analyze-quality` for full analysis + - Option 2: Display orchestrator command to use + - Option 3: Offer to revert last commit + - Option 4: Display strong warning and continue + +4. **If no chain bugs:** + - Display: "βœ“ No chain bugs detected - Fix is clean" + - Continue to Final Output + +**Impact**: Prevents cascading bug fixes (Bug 912 β†’ 913 β†’ 914 chains) + +--- + +## Final Output + +``` +βœ… Bugfix Workflow Complete - Feature ${FEATURE_NUM} + +πŸ“‹ Artifacts Created: +- ${BUGFIX_SPEC} +- ${REGRESSION_TEST_SPEC} +- ${TASKS_FILE} + +πŸ“Š Results: +- Bug fixed successfully +- Regression test created and passing +- No new regressions introduced +${TECH_STACK_COMPLIANCE_RESULT} + +⏱️ Time to Fix: ${WORKFLOW_DURATION} +${PARALLEL_SPEEDUP_RESULT} + +πŸ“ˆ Next Steps: +1. Review artifacts in: ${FEATURE_DIR} +2. Commit changes: git add . && git commit -m "fix: bug ${FEATURE_NUM}" +3. View metrics: /specswarm:workflow-metrics ${FEATURE_NUM} +4. ${SUGGEST_NEXT_COMMAND} +``` + +**Post-Command Tip** (display only for web projects without Chrome DevTools MCP): + +```bash +if [ "$CHROME_DEVTOOLS_MODE" = "fallback" ]; then + echo "" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "πŸ’‘ TIP: Enhanced Web Debugging Available" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "" + echo "For web projects, Chrome DevTools MCP provides:" + echo " β€’ Real-time console error monitoring" + echo " β€’ Network request inspection" + echo " β€’ Runtime state debugging" + echo " β€’ Saves ~200MB Chromium download" + echo "" + echo "Install Chrome DevTools MCP:" + echo " claude mcp add ChromeDevTools/chrome-devtools-mcp" + echo "" + echo "Your workflow will automatically use it next time!" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "" +fi +``` + +--- + +## Error Handling + +**If branch detection fails**: +- Prompt user for bug number +- Offer to create bugfix branch + +**If feature directory missing**: +- Prompt for bug description +- Create feature directory + +**If regression test fails to fail**: +- ❌ Error: "Regression test must fail before fix (proves bug exists)" +- Prompt user to review test or bug specification + +**If test suite has regressions**: +- ❌ Error: "New regressions detected. Fix must not break existing tests." +- List failed tests +- Suggest investigating breaking changes + +--- + +## Operating Principles + +1. **Ultrathink First**: Deep multi-layer analysis before jumping to solutions +2. **Test First**: Always write regression test before fixing +3. **Verify Reproduction**: Test must fail to prove bug exists +4. **Single Responsibility**: Each task has one clear goal +5. **No Regressions**: Full test suite must pass +6. **Tech Compliance**: Validate fix against tech stack (if SpecSwarm installed) +7. **Metrics Tracking**: Record time-to-fix, regression coverage +8. **Smart Integration**: Leverage SpecSwarm/SpecTest when available + +### Best Practice: Ultrathinking + +**Always use ultrathinking for root cause analysis.** Many bugs have multiple layers: + +**Example - Bug 918 (Password Reset "Already Used" Error):** +- **Surface symptom**: "Reset Link Already Used" error on fresh tokens +- **First layer found**: Multiple tokens allowed per user (Bug 916) +- **Second layer suspected**: Frontend caching issue (Bug 917 - red herring) +- **ACTUAL root cause** (found with ultrathink): Property name mismatch + - Database config: `postgres.camel` transforms `used_at` β†’ `usedAt` + - Code accessed: `result.used_at` (wrong name) β†’ returned `undefined` + - Logic treated `undefined` as "already used" β†’ fresh tokens rejected + +**Without ultrathink**: Found Bug 916, missed actual cause +**With ultrathink**: Found Bug 918, resolved the issue + +**Ultrathinking checklist:** +- βœ… Check database configuration (transforms, defaults, constraints) +- βœ… Analyze property access patterns (naming mismatches) +- βœ… Examine caching behavior (all layers) +- βœ… Trace data flow end-to-end +- βœ… Look for cascading bugs (one hiding another) +- βœ… Validate assumptions about how libraries work + +--- + +## Success Criteria + +βœ… Bugfix specification documents bug thoroughly +βœ… Regression test created +βœ… Test failed before fix (proved bug reproduction) +βœ… Fix implemented +βœ… Test passed after fix (proved bug fixed) +βœ… No new regressions introduced +βœ… Tech stack compliant (if SpecSwarm installed) +βœ… Metrics tracked (if SpecTest installed) + +--- + +**Workflow Coverage**: Addresses ~40% of development work (bug fixes) +**Integration**: Smart detection of SpecSwarm (tech enforcement) and SpecTest (parallel/hooks) +**Graduation Path**: Proven workflow will graduate to SpecSwarm stable diff --git a/commands/build.md b/commands/build.md new file mode 100644 index 0000000..80365d3 --- /dev/null +++ b/commands/build.md @@ -0,0 +1,441 @@ +--- +description: Build complete feature from specification to implementation - simplified workflow +args: + - name: feature_description + description: Natural language description of the feature to build + required: true + - name: --validate + description: Run browser validation with Playwright after implementation + required: false + - name: --quality-gate + description: Set minimum quality score (default 80) + required: false +--- + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Goal + +Build a complete feature from natural language description through implementation and quality validation. + +**Purpose**: Simplify feature development by orchestrating the complete workflow in a single command. + +**Workflow**: Specify β†’ Clarify β†’ Plan β†’ Tasks β†’ Implement β†’ (Validate) β†’ Quality Analysis + +**User Experience**: +- Single command instead of 7+ manual steps +- Interactive clarification (only pause point) +- Autonomous execution through implementation +- Quality validated automatically +- Ready for final merge with `/specswarm:ship` + +--- + +## Pre-Flight Checks + +```bash +# Parse arguments +FEATURE_DESC="" +RUN_VALIDATE=false +QUALITY_GATE=80 + +# Extract feature description (first non-flag argument) +for arg in $ARGUMENTS; do + if [ "${arg:0:2}" != "--" ] && [ -z "$FEATURE_DESC" ]; then + FEATURE_DESC="$arg" + elif [ "$arg" = "--validate" ]; then + RUN_VALIDATE=true + elif [ "$arg" = "--quality-gate" ]; then + shift + QUALITY_GATE="$1" + fi +done + +# Validate feature description +if [ -z "$FEATURE_DESC" ]; then + echo "❌ Error: Feature description required" + echo "" + echo "Usage: /specswarm:build \"feature description\" [--validate] [--quality-gate N]" + echo "" + echo "Examples:" + echo " /specswarm:build \"Add user authentication with email/password\"" + echo " /specswarm:build \"Implement dark mode toggle\" --validate" + echo " /specswarm:build \"Add shopping cart\" --validate --quality-gate 85" + exit 1 +fi + +# Get project root +if ! git rev-parse --git-dir > /dev/null 2>&1; then + echo "❌ Error: Not in a git repository" + echo "" + echo "SpecSwarm requires an existing git repository to manage feature branches." + echo "" + echo "If you're starting a new project, scaffold it first:" + echo "" + echo " # React + Vite" + echo " npm create vite@latest my-app -- --template react-ts" + echo "" + echo " # Next.js" + echo " npx create-next-app@latest" + echo "" + echo " # Astro" + echo " npm create astro@latest" + echo "" + echo " # Vue" + echo " npm create vue@latest" + echo "" + echo "Then initialize git and SpecSwarm:" + echo " cd my-app" + echo " git init" + echo " git add ." + echo " git commit -m \"Initial project scaffold\"" + echo " /specswarm:init" + echo "" + echo "For existing projects, initialize git:" + echo " git init" + echo " git add ." + echo " git commit -m \"Initial commit\"" + echo "" + exit 1 +fi + +REPO_ROOT=$(git rev-parse --show-toplevel) +cd "$REPO_ROOT" +``` + +--- + +## Execution Steps + +### Step 1: Display Welcome Banner + +```bash +echo "πŸ—οΈ SpecSwarm Build - Complete Feature Development" +echo "══════════════════════════════════════════" +echo "" +echo "Feature: $FEATURE_DESC" +echo "" +echo "This workflow will:" +echo " 1. Create detailed specification" +echo " 2. Ask clarification questions (interactive)" +echo " 3. Generate implementation plan" +echo " 4. Generate task breakdown" +echo " 5. Implement all tasks" +if [ "$RUN_VALIDATE" = true ]; then +echo " 6. Run browser validation (Playwright)" +echo " 7. Analyze code quality" +else +echo " 6. Analyze code quality" +fi +echo "" +echo "You'll only be prompted during Step 2 (clarification)." +echo "All other steps run automatically." +echo "" +read -p "Press Enter to start, or Ctrl+C to cancel..." +echo "" +``` + +--- + +### Step 2: Phase 1 - Specification + +**YOU MUST NOW run the specify command using the SlashCommand tool:** + +```bash +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "πŸ“‹ Phase 1: Creating Specification" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "" +``` + +``` +Use the SlashCommand tool to execute: /specswarm:specify "$FEATURE_DESC" +``` + +Wait for completion. Verify spec.md was created. + +```bash +echo "" +echo "βœ… Specification created" +echo "" +``` + +--- + +### Step 3: Phase 2 - Clarification (INTERACTIVE) + +**YOU MUST NOW run the clarify command using the SlashCommand tool:** + +```bash +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "❓ Phase 2: Clarification Questions" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "" +echo "⚠️ INTERACTIVE: Please answer the clarification questions." +echo "" +``` + +``` +Use the SlashCommand tool to execute: /specswarm:clarify +``` + +**IMPORTANT**: This step is interactive. Wait for user to answer questions. + +```bash +echo "" +echo "βœ… Clarification complete" +echo "" +``` + +--- + +### Step 4: Phase 3 - Planning + +**YOU MUST NOW run the plan command using the SlashCommand tool:** + +```bash +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "πŸ—ΊοΈ Phase 3: Generating Implementation Plan" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "" +``` + +``` +Use the SlashCommand tool to execute: /specswarm:plan +``` + +Wait for plan.md to be generated. + +```bash +echo "" +echo "βœ… Implementation plan created" +echo "" +``` + +--- + +### Step 5: Phase 4 - Task Generation + +**YOU MUST NOW run the tasks command using the SlashCommand tool:** + +```bash +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "πŸ“ Phase 4: Generating Task Breakdown" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "" +``` + +``` +Use the SlashCommand tool to execute: /specswarm:tasks +``` + +Wait for tasks.md to be generated. + +```bash +# Count tasks +TASK_COUNT=$(grep -c '^###[[:space:]]*T[0-9]' tasks.md 2>/dev/null || echo "0") + +echo "" +echo "βœ… Task breakdown created ($TASK_COUNT tasks)" +echo "" +``` + +--- + +### Step 6: Phase 5 - Implementation + +**YOU MUST NOW run the implement command using the SlashCommand tool:** + +```bash +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "βš™οΈ Phase 5: Implementing Feature" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "" +echo "This will execute all $TASK_COUNT tasks automatically." +echo "Estimated time: 2-5 minutes per task" +echo "" +``` + +``` +Use the SlashCommand tool to execute: /specswarm:implement +``` + +Wait for implementation to complete. + +```bash +echo "" +echo "βœ… Implementation complete" +echo "" +``` + +--- + +### Step 7: Phase 6 - Browser Validation (Optional) + +**IF --validate flag was provided, run validation:** + +```bash +if [ "$RUN_VALIDATE" = true ]; then + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "🌐 Phase 6: Browser Validation" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "" + echo "Running AI-powered interaction flow validation with Playwright..." + echo "" +fi +``` + +**IF RUN_VALIDATE = true, use the SlashCommand tool:** + +``` +Use the SlashCommand tool to execute: /specswarm:validate +``` + +```bash +if [ "$RUN_VALIDATE" = true ]; then + echo "" + echo "βœ… Validation complete" + echo "" +fi +``` + +--- + +### Step 8: Phase 7 - Quality Analysis + +**YOU MUST NOW run the quality analysis using the SlashCommand tool:** + +```bash +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "πŸ“Š Phase 7: Code Quality Analysis" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "" +``` + +``` +Use the SlashCommand tool to execute: /specswarm:analyze-quality +``` + +Wait for quality analysis to complete. Extract the quality score. + +Store quality score as QUALITY_SCORE. + +--- + +### Step 9: Final Report + +**Display completion summary:** + +```bash +echo "" +echo "══════════════════════════════════════════" +echo "πŸŽ‰ FEATURE BUILD COMPLETE" +echo "══════════════════════════════════════════" +echo "" +echo "Feature: $FEATURE_DESC" +echo "" +echo "βœ… Specification created" +echo "βœ… Clarification completed" +echo "βœ… Plan generated" +echo "βœ… Tasks generated ($TASK_COUNT tasks)" +echo "βœ… Implementation complete" +if [ "$RUN_VALIDATE" = true ]; then +echo "βœ… Browser validation passed" +fi +echo "βœ… Quality analyzed (Score: ${QUALITY_SCORE}%)" +echo "" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "πŸ“ NEXT STEPS" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "" +echo "1. πŸ§ͺ Manual Testing" +echo " - Test the feature in your browser/app" +echo " - Verify all functionality works as expected" +echo " - Check edge cases and error handling" +echo "" +echo "2. πŸ” Code Review (Optional)" +echo " - Review generated code for best practices" +echo " - Check for security issues" +echo " - Verify tech stack compliance" +echo "" +echo "3. 🚒 Ship When Ready" +echo " Run: /specswarm:ship" +echo "" +echo " This will:" +echo " - Validate quality meets threshold ($QUALITY_GATE%)" +echo " - Merge to parent branch if passing" +echo " - Complete the feature workflow" +echo "" + +if [ "$QUALITY_SCORE" -lt "$QUALITY_GATE" ]; then + echo "⚠️ WARNING: Quality score (${QUALITY_SCORE}%) below threshold (${QUALITY_GATE}%)" + echo " Consider addressing quality issues before shipping." + echo " Review the quality analysis output above for specific improvements." + echo "" +fi + +echo "══════════════════════════════════════════" +``` + +--- + +## Error Handling + +If any phase fails: + +1. **Specify fails**: Display error, suggest checking feature description clarity +2. **Clarify fails**: Display error, suggest re-running clarify separately +3. **Plan fails**: Display error, suggest reviewing spec.md for completeness +4. **Tasks fails**: Display error, suggest reviewing plan.md +5. **Implement fails**: Display error, suggest re-running implement or using bugfix +6. **Validate fails**: Display validation errors, suggest fixing and re-validating +7. **Quality analysis fails**: Display error, continue (quality optional for build) + +**All errors should report clearly and suggest remediation.** + +--- + +## Design Philosophy + +**Simplicity**: 1 command instead of 7+ manual steps + +**Efficiency**: Autonomous execution except for clarification (user only pauses once) + +**Quality**: Built-in quality analysis ensures code standards + +**Flexibility**: Optional validation and configurable quality gates + +**User Experience**: Clear progress indicators and final next steps + +--- + +## Comparison to Manual Workflow + +**Before** (Manual): +```bash +/specswarm:specify "feature description" +/specswarm:clarify +/specswarm:plan +/specswarm:tasks +/specswarm:implement +/specswarm:analyze-quality +/specswarm:complete +``` +**7 commands**, ~5 minutes of manual orchestration + +**After** (Build): +```bash +/specswarm:build "feature description" --validate +# [Answer clarification questions] +# [Wait for completion] +/specswarm:ship +``` +**2 commands**, 1 interactive pause, fully automated execution + +**Time Savings**: 85-90% reduction in manual orchestration overhead diff --git a/commands/checklist.md b/commands/checklist.md new file mode 100644 index 0000000..ef6b2ea --- /dev/null +++ b/commands/checklist.md @@ -0,0 +1,313 @@ +--- +description: Generate a custom checklist for the current feature based on user requirements. +--- + + + + +## Checklist Purpose: "Unit Tests for English" + +**CRITICAL CONCEPT**: Checklists are **UNIT TESTS FOR REQUIREMENTS WRITING** - they validate the quality, clarity, and completeness of requirements in a given domain. + +**NOT for verification/testing**: +- ❌ NOT "Verify the button clicks correctly" +- ❌ NOT "Test error handling works" +- ❌ NOT "Confirm the API returns 200" +- ❌ NOT checking if code/implementation matches the spec + +**FOR requirements quality validation**: +- βœ… "Are visual hierarchy requirements defined for all card types?" (completeness) +- βœ… "Is 'prominent display' quantified with specific sizing/positioning?" (clarity) +- βœ… "Are hover state requirements consistent across all interactive elements?" (consistency) +- βœ… "Are accessibility requirements defined for keyboard navigation?" (coverage) +- βœ… "Does the spec define what happens when logo image fails to load?" (edge cases) + +**Metaphor**: If your spec is code written in English, the checklist is its unit test suite. You're testing whether the requirements are well-written, complete, unambiguous, and ready for implementation - NOT whether the implementation works. + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Execution Steps + +1. **Discover Feature Context**: + ```bash + REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) + + # Source features location helper + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + PLUGIN_DIR="$(dirname "$SCRIPT_DIR")" + source "$PLUGIN_DIR/lib/features-location.sh" + + # Initialize features directory + get_features_dir "$REPO_ROOT" + + BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null) + FEATURE_NUM=$(echo "$BRANCH" | grep -oE '^[0-9]{3}') + [ -z "$FEATURE_NUM" ] && FEATURE_NUM=$(list_features "$REPO_ROOT" | grep -oE '^[0-9]{3}' | sort -nr | head -1) + + find_feature_dir "$FEATURE_NUM" "$REPO_ROOT" + # FEATURE_DIR is now set by find_feature_dir + ``` + +2. **Clarify intent (dynamic)**: Derive up to THREE initial contextual clarifying questions (no pre-baked catalog). They MUST: + - Be generated from the user's phrasing + extracted signals from spec/plan/tasks + - Only ask about information that materially changes checklist content + - Be skipped individually if already unambiguous in `$ARGUMENTS` + - Prefer precision over breadth + + Generation algorithm: + 1. Extract signals: feature domain keywords (e.g., auth, latency, UX, API), risk indicators ("critical", "must", "compliance"), stakeholder hints ("QA", "review", "security team"), and explicit deliverables ("a11y", "rollback", "contracts"). + 2. Cluster signals into candidate focus areas (max 4) ranked by relevance. + 3. Identify probable audience & timing (author, reviewer, QA, release) if not explicit. + 4. Detect missing dimensions: scope breadth, depth/rigor, risk emphasis, exclusion boundaries, measurable acceptance criteria. + 5. Formulate questions chosen from these archetypes: + - Scope refinement (e.g., "Should this include integration touchpoints with X and Y or stay limited to local module correctness?") + - Risk prioritization (e.g., "Which of these potential risk areas should receive mandatory gating checks?") + - Depth calibration (e.g., "Is this a lightweight pre-commit sanity list or a formal release gate?") + - Audience framing (e.g., "Will this be used by the author only or peers during PR review?") + - Boundary exclusion (e.g., "Should we explicitly exclude performance tuning items this round?") + - Scenario class gap (e.g., "No recovery flows detectedβ€”are rollback / partial failure paths in scope?") + + Question formatting rules: + - If presenting options, generate a compact table with columns: Option | Candidate | Why It Matters + - Limit to A–E options maximum; omit table if a free-form answer is clearer + - Never ask the user to restate what they already said + - Avoid speculative categories (no hallucination). If uncertain, ask explicitly: "Confirm whether X belongs in scope." + + Defaults when interaction impossible: + - Depth: Standard + - Audience: Reviewer (PR) if code-related; Author otherwise + - Focus: Top 2 relevance clusters + + Output the questions (label Q1/Q2/Q3). After answers: if β‰₯2 scenario classes (Alternate / Exception / Recovery / Non-Functional domain) remain unclear, you MAY ask up to TWO more targeted follow‑ups (Q4/Q5) with a one-line justification each (e.g., "Unresolved recovery path risk"). Do not exceed five total questions. Skip escalation if user explicitly declines more. + +3. **Understand user request**: Combine `$ARGUMENTS` + clarifying answers: + - Derive checklist theme (e.g., security, review, deploy, ux) + - Consolidate explicit must-have items mentioned by user + - Map focus selections to category scaffolding + - Infer any missing context from spec/plan/tasks (do NOT hallucinate) + +4. **Load feature context**: Read from FEATURE_DIR: + - spec.md: Feature requirements and scope + - plan.md (if exists): Technical details, dependencies + - tasks.md (if exists): Implementation tasks + + **Context Loading Strategy**: + - Load only necessary portions relevant to active focus areas (avoid full-file dumping) + - Prefer summarizing long sections into concise scenario/requirement bullets + - Use progressive disclosure: add follow-on retrieval only if gaps detected + - If source docs are large, generate interim summary items instead of embedding raw text + +5. **Generate checklist** - Create "Unit Tests for Requirements": + - Create `FEATURE_DIR/checklists/` directory if it doesn't exist + - Generate unique checklist filename: + - Use short, descriptive name based on domain (e.g., `ux.md`, `api.md`, `security.md`) + - Format: `[domain].md` + - If file exists, append to existing file + - Number items sequentially starting from CHK001 + - Each `/speckit.checklist` run creates a NEW file (never overwrites existing checklists) + + **CORE PRINCIPLE - Test the Requirements, Not the Implementation**: + Every checklist item MUST evaluate the REQUIREMENTS THEMSELVES for: + - **Completeness**: Are all necessary requirements present? + - **Clarity**: Are requirements unambiguous and specific? + - **Consistency**: Do requirements align with each other? + - **Measurability**: Can requirements be objectively verified? + - **Coverage**: Are all scenarios/edge cases addressed? + + **Category Structure** - Group items by requirement quality dimensions: + - **Requirement Completeness** (Are all necessary requirements documented?) + - **Requirement Clarity** (Are requirements specific and unambiguous?) + - **Requirement Consistency** (Do requirements align without conflicts?) + - **Acceptance Criteria Quality** (Are success criteria measurable?) + - **Scenario Coverage** (Are all flows/cases addressed?) + - **Edge Case Coverage** (Are boundary conditions defined?) + - **Non-Functional Requirements** (Performance, Security, Accessibility, etc. - are they specified?) + - **Dependencies & Assumptions** (Are they documented and validated?) + - **Ambiguities & Conflicts** (What needs clarification?) + + **HOW TO WRITE CHECKLIST ITEMS - "Unit Tests for English"**: + + ❌ **WRONG** (Testing implementation): + - "Verify landing page displays 3 episode cards" + - "Test hover states work on desktop" + - "Confirm logo click navigates home" + + βœ… **CORRECT** (Testing requirements quality): + - "Are the exact number and layout of featured episodes specified?" [Completeness] + - "Is 'prominent display' quantified with specific sizing/positioning?" [Clarity] + - "Are hover state requirements consistent across all interactive elements?" [Consistency] + - "Are keyboard navigation requirements defined for all interactive UI?" [Coverage] + - "Is the fallback behavior specified when logo image fails to load?" [Edge Cases] + - "Are loading states defined for asynchronous episode data?" [Completeness] + - "Does the spec define visual hierarchy for competing UI elements?" [Clarity] + + **ITEM STRUCTURE**: + Each item should follow this pattern: + - Question format asking about requirement quality + - Focus on what's WRITTEN (or not written) in the spec/plan + - Include quality dimension in brackets [Completeness/Clarity/Consistency/etc.] + - Reference spec section `[Spec Β§X.Y]` when checking existing requirements + - Use `[Gap]` marker when checking for missing requirements + + **EXAMPLES BY QUALITY DIMENSION**: + + Completeness: + - "Are error handling requirements defined for all API failure modes? [Gap]" + - "Are accessibility requirements specified for all interactive elements? [Completeness]" + - "Are mobile breakpoint requirements defined for responsive layouts? [Gap]" + + Clarity: + - "Is 'fast loading' quantified with specific timing thresholds? [Clarity, Spec Β§NFR-2]" + - "Are 'related episodes' selection criteria explicitly defined? [Clarity, Spec Β§FR-5]" + - "Is 'prominent' defined with measurable visual properties? [Ambiguity, Spec Β§FR-4]" + + Consistency: + - "Do navigation requirements align across all pages? [Consistency, Spec Β§FR-10]" + - "Are card component requirements consistent between landing and detail pages? [Consistency]" + + Coverage: + - "Are requirements defined for zero-state scenarios (no episodes)? [Coverage, Edge Case]" + - "Are concurrent user interaction scenarios addressed? [Coverage, Gap]" + - "Are requirements specified for partial data loading failures? [Coverage, Exception Flow]" + + Measurability: + - "Are visual hierarchy requirements measurable/testable? [Acceptance Criteria, Spec Β§FR-1]" + - "Can 'balanced visual weight' be objectively verified? [Measurability, Spec Β§FR-2]" + + **Scenario Classification & Coverage** (Requirements Quality Focus): + - Check if requirements exist for: Primary, Alternate, Exception/Error, Recovery, Non-Functional scenarios + - For each scenario class, ask: "Are [scenario type] requirements complete, clear, and consistent?" + - If scenario class missing: "Are [scenario type] requirements intentionally excluded or missing? [Gap]" + - Include resilience/rollback when state mutation occurs: "Are rollback requirements defined for migration failures? [Gap]" + + **Traceability Requirements**: + - MINIMUM: β‰₯80% of items MUST include at least one traceability reference + - Each item should reference: spec section `[Spec Β§X.Y]`, or use markers: `[Gap]`, `[Ambiguity]`, `[Conflict]`, `[Assumption]` + - If no ID system exists: "Is a requirement & acceptance criteria ID scheme established? [Traceability]" + + **Surface & Resolve Issues** (Requirements Quality Problems): + Ask questions about the requirements themselves: + - Ambiguities: "Is the term 'fast' quantified with specific metrics? [Ambiguity, Spec Β§NFR-1]" + - Conflicts: "Do navigation requirements conflict between Β§FR-10 and Β§FR-10a? [Conflict]" + - Assumptions: "Is the assumption of 'always available podcast API' validated? [Assumption]" + - Dependencies: "Are external podcast API requirements documented? [Dependency, Gap]" + - Missing definitions: "Is 'visual hierarchy' defined with measurable criteria? [Gap]" + + **Content Consolidation**: + - Soft cap: If raw candidate items > 40, prioritize by risk/impact + - Merge near-duplicates checking the same requirement aspect + - If >5 low-impact edge cases, create one item: "Are edge cases X, Y, Z addressed in requirements? [Coverage]" + + **🚫 ABSOLUTELY PROHIBITED** - These make it an implementation test, not a requirements test: + - ❌ Any item starting with "Verify", "Test", "Confirm", "Check" + implementation behavior + - ❌ References to code execution, user actions, system behavior + - ❌ "Displays correctly", "works properly", "functions as expected" + - ❌ "Click", "navigate", "render", "load", "execute" + - ❌ Test cases, test plans, QA procedures + - ❌ Implementation details (frameworks, APIs, algorithms) + + **βœ… REQUIRED PATTERNS** - These test requirements quality: + - βœ… "Are [requirement type] defined/specified/documented for [scenario]?" + - βœ… "Is [vague term] quantified/clarified with specific criteria?" + - βœ… "Are requirements consistent between [section A] and [section B]?" + - βœ… "Can [requirement] be objectively measured/verified?" + - βœ… "Are [edge cases/scenarios] addressed in requirements?" + - βœ… "Does the spec define [missing aspect]?" + +6. **Structure Reference**: Generate the checklist following the canonical template in `templates/checklist-template.md` for title, meta section, category headings, and ID formatting. If template is unavailable, use: H1 title, purpose/created meta lines, `##` category sections containing `- [ ] CHK### ` lines with globally incrementing IDs starting at CHK001. + +7. **Report**: Output full path to created checklist, item count, and remind user that each run creates a new file. Summarize: + - Focus areas selected + - Depth level + - Actor/timing + - Any explicit user-specified must-have items incorporated + +**Important**: Each `/speckit.checklist` command invocation creates a checklist file using short, descriptive names unless file already exists. This allows: + +- Multiple checklists of different types (e.g., `ux.md`, `test.md`, `security.md`) +- Simple, memorable filenames that indicate checklist purpose +- Easy identification and navigation in the `checklists/` folder + +To avoid clutter, use descriptive types and clean up obsolete checklists when done. + +## Example Checklist Types & Sample Items + +**UX Requirements Quality:** `ux.md` + +Sample items (testing the requirements, NOT the implementation): +- "Are visual hierarchy requirements defined with measurable criteria? [Clarity, Spec Β§FR-1]" +- "Is the number and positioning of UI elements explicitly specified? [Completeness, Spec Β§FR-1]" +- "Are interaction state requirements (hover, focus, active) consistently defined? [Consistency]" +- "Are accessibility requirements specified for all interactive elements? [Coverage, Gap]" +- "Is fallback behavior defined when images fail to load? [Edge Case, Gap]" +- "Can 'prominent display' be objectively measured? [Measurability, Spec Β§FR-4]" + +**API Requirements Quality:** `api.md` + +Sample items: +- "Are error response formats specified for all failure scenarios? [Completeness]" +- "Are rate limiting requirements quantified with specific thresholds? [Clarity]" +- "Are authentication requirements consistent across all endpoints? [Consistency]" +- "Are retry/timeout requirements defined for external dependencies? [Coverage, Gap]" +- "Is versioning strategy documented in requirements? [Gap]" + +**Performance Requirements Quality:** `performance.md` + +Sample items: +- "Are performance requirements quantified with specific metrics? [Clarity]" +- "Are performance targets defined for all critical user journeys? [Coverage]" +- "Are performance requirements under different load conditions specified? [Completeness]" +- "Can performance requirements be objectively measured? [Measurability]" +- "Are degradation requirements defined for high-load scenarios? [Edge Case, Gap]" + +**Security Requirements Quality:** `security.md` + +Sample items: +- "Are authentication requirements specified for all protected resources? [Coverage]" +- "Are data protection requirements defined for sensitive information? [Completeness]" +- "Is the threat model documented and requirements aligned to it? [Traceability]" +- "Are security requirements consistent with compliance obligations? [Consistency]" +- "Are security failure/breach response requirements defined? [Gap, Exception Flow]" + +## Anti-Examples: What NOT To Do + +**❌ WRONG - These test implementation, not requirements:** + +```markdown +- [ ] CHK001 - Verify landing page displays 3 episode cards [Spec Β§FR-001] +- [ ] CHK002 - Test hover states work correctly on desktop [Spec Β§FR-003] +- [ ] CHK003 - Confirm logo click navigates to home page [Spec Β§FR-010] +- [ ] CHK004 - Check that related episodes section shows 3-5 items [Spec Β§FR-005] +``` + +**βœ… CORRECT - These test requirements quality:** + +```markdown +- [ ] CHK001 - Are the number and layout of featured episodes explicitly specified? [Completeness, Spec Β§FR-001] +- [ ] CHK002 - Are hover state requirements consistently defined for all interactive elements? [Consistency, Spec Β§FR-003] +- [ ] CHK003 - Are navigation requirements clear for all clickable brand elements? [Clarity, Spec Β§FR-010] +- [ ] CHK004 - Is the selection criteria for related episodes documented? [Gap, Spec Β§FR-005] +- [ ] CHK005 - Are loading state requirements defined for asynchronous episode data? [Gap] +- [ ] CHK006 - Can "visual hierarchy" requirements be objectively measured? [Measurability, Spec Β§FR-001] +``` + +**Key Differences:** +- Wrong: Tests if the system works correctly +- Correct: Tests if the requirements are written correctly +- Wrong: Verification of behavior +- Correct: Validation of requirement quality +- Wrong: "Does it do X?" +- Correct: "Is X clearly specified?" diff --git a/commands/clarify.md b/commands/clarify.md new file mode 100644 index 0000000..a551a52 --- /dev/null +++ b/commands/clarify.md @@ -0,0 +1,189 @@ +--- +description: Identify underspecified areas in the current feature spec by asking up to 5 highly targeted clarification questions and encoding answers back into the spec. +--- + + + + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Outline + +Goal: Detect and reduce ambiguity or missing decision points in the active feature specification and record the clarifications directly in the spec file. + +Note: This clarification workflow is expected to run (and be completed) BEFORE invoking `/speckit.plan`. If the user explicitly states they are skipping clarification (e.g., exploratory spike), you may proceed, but must warn that downstream rework risk increases. + +Execution steps: + +1. **Discover Feature Context**: + ```bash + REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) + + # Source features location helper + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + PLUGIN_DIR="$(dirname "$SCRIPT_DIR")" + source "$PLUGIN_DIR/lib/features-location.sh" + + # Initialize features directory + get_features_dir "$REPO_ROOT" + + BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null) + FEATURE_NUM=$(echo "$BRANCH" | grep -oE '^[0-9]{3}') + [ -z "$FEATURE_NUM" ] && FEATURE_NUM=$(list_features "$REPO_ROOT" | grep -oE '^[0-9]{3}' | sort -nr | head -1) + + find_feature_dir "$FEATURE_NUM" "$REPO_ROOT" + # FEATURE_DIR is now set by find_feature_dir + FEATURE_SPEC="${FEATURE_DIR}/spec.md" + ``` + + Validate: If FEATURE_SPEC doesn't exist, ERROR: "No spec found. Run `/specify` first." + +2. Load the current spec file. Perform a structured ambiguity & coverage scan using this taxonomy. For each category, mark status: Clear / Partial / Missing. Produce an internal coverage map used for prioritization (do not output raw map unless no questions will be asked). + + Functional Scope & Behavior: + - Core user goals & success criteria + - Explicit out-of-scope declarations + - User roles / personas differentiation + + Domain & Data Model: + - Entities, attributes, relationships + - Identity & uniqueness rules + - Lifecycle/state transitions + - Data volume / scale assumptions + + Interaction & UX Flow: + - Critical user journeys / sequences + - Error/empty/loading states + - Accessibility or localization notes + + Non-Functional Quality Attributes: + - Performance (latency, throughput targets) + - Scalability (horizontal/vertical, limits) + - Reliability & availability (uptime, recovery expectations) + - Observability (logging, metrics, tracing signals) + - Security & privacy (authN/Z, data protection, threat assumptions) + - Compliance / regulatory constraints (if any) + + Integration & External Dependencies: + - External services/APIs and failure modes + - Data import/export formats + - Protocol/versioning assumptions + + Edge Cases & Failure Handling: + - Negative scenarios + - Rate limiting / throttling + - Conflict resolution (e.g., concurrent edits) + + Constraints & Tradeoffs: + - Technical constraints (language, storage, hosting) + - Explicit tradeoffs or rejected alternatives + + Terminology & Consistency: + - Canonical glossary terms + - Avoided synonyms / deprecated terms + + Completion Signals: + - Acceptance criteria testability + - Measurable Definition of Done style indicators + + Misc / Placeholders: + - TODO markers / unresolved decisions + - Ambiguous adjectives ("robust", "intuitive") lacking quantification + + For each category with Partial or Missing status, add a candidate question opportunity unless: + - Clarification would not materially change implementation or validation strategy + - Information is better deferred to planning phase (note internally) + +3. Generate (internally) a prioritized queue of candidate clarification questions (maximum 5). Do NOT output them all at once. Apply these constraints: + - Maximum of 10 total questions across the whole session. + - Each question must be answerable with EITHER: + * A short multiple‑choice selection (2–5 distinct, mutually exclusive options), OR + * A one-word / short‑phrase answer (explicitly constrain: "Answer in <=5 words"). + - Only include questions whose answers materially impact architecture, data modeling, task decomposition, test design, UX behavior, operational readiness, or compliance validation. + - Ensure category coverage balance: attempt to cover the highest impact unresolved categories first; avoid asking two low-impact questions when a single high-impact area (e.g., security posture) is unresolved. + - Exclude questions already answered, trivial stylistic preferences, or plan-level execution details (unless blocking correctness). + - Favor clarifications that reduce downstream rework risk or prevent misaligned acceptance tests. + - If more than 5 categories remain unresolved, select the top 5 by (Impact * Uncertainty) heuristic. + +4. Sequential questioning loop (interactive): + - Present EXACTLY ONE question at a time. + - For multiple‑choice questions render options as a Markdown table: + + | Option | Description | + |--------|-------------| + | A |