From cb5cc9ba2842d6e48fa5d57d6e8f490e6ae419ec Mon Sep 17 00:00:00 2001 From: Zhongwei Li Date: Sun, 30 Nov 2025 09:03:39 +0800 Subject: [PATCH] Initial commit --- .claude-plugin/plugin.json | 14 + README.md | 3 + commands/skills.md | 5 + plugin.lock.json | 57 ++ skills/using-skills/SKILL.md | 110 +++ skills/writing-skills/SKILL.md | 849 ++++++++++++++++++ .../examples/CLAUDE_MD_TESTING.md | 204 +++++ 7 files changed, 1242 insertions(+) create mode 100644 .claude-plugin/plugin.json create mode 100644 README.md create mode 100644 commands/skills.md create mode 100644 plugin.lock.json create mode 100644 skills/using-skills/SKILL.md create mode 100644 skills/writing-skills/SKILL.md create mode 100644 skills/writing-skills/examples/CLAUDE_MD_TESTING.md diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..ace8444 --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,14 @@ +{ + "name": "skills", + "description": "Meta-skills for finding, using, and writing Agent Skills - enforces skill usage protocols and provides skill authoring guidance", + "version": "0.1.0", + "author": { + "name": "zbeyens" + }, + "skills": [ + "./skills" + ], + "commands": [ + "./commands" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..ae2d756 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# skills + +Meta-skills for finding, using, and writing Agent Skills - enforces skill usage protocols and provides skill authoring guidance diff --git a/commands/skills.md b/commands/skills.md new file mode 100644 index 0000000..ccebee8 --- /dev/null +++ b/commands/skills.md @@ -0,0 +1,5 @@ +--- +description: Enforce skill usage protocols and mandatory workflows +--- + +Use the using-skills skill exactly as written \ No newline at end of file diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..ec25585 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,57 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:udecode/dotai:.claude-plugin/plugins/skills", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "36136b535ddc7f0eabb803696fd82d411671de50", + "treeHash": "b01d60af50f449b13ecc9ad61ae4b04b5d7b744b8a254cc626bcbcc1e16606fc", + "generatedAt": "2025-11-28T10:28:49.884359Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "skills", + "description": "Meta-skills for finding, using, and writing Agent Skills - enforces skill usage protocols and provides skill authoring guidance", + "version": "0.1.0" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "5eba1770b5441df89cd9bbc664b4677609fa71f56bb1fe3c4d12ec740dad52b8" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "254bf3f6b8f224421728e953085b86fa83696c18bc066e449557a73b60de612e" + }, + { + "path": "commands/skills.md", + "sha256": "40a07e7c79e8c1a293140cb5c8d1c49482da57ba3028260dddbc4c8d971922e7" + }, + { + "path": "skills/using-skills/SKILL.md", + "sha256": "ae2a5826c2d821b747c374f863ca5941c22c88fb3fe2064cc61975d7d7ad3677" + }, + { + "path": "skills/writing-skills/SKILL.md", + "sha256": "5a6b45c441ed9ce486f5119664dd99c5af83dcddd548a26504d9e483d90e268b" + }, + { + "path": "skills/writing-skills/examples/CLAUDE_MD_TESTING.md", + "sha256": "79ca10f7abd1bcd57affd0ddf1ff990259d7ca809305110096b193a6f8d385e5" + } + ], + "dirSha256": "b01d60af50f449b13ecc9ad61ae4b04b5d7b744b8a254cc626bcbcc1e16606fc" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/using-skills/SKILL.md b/skills/using-skills/SKILL.md new file mode 100644 index 0000000..30590a6 --- /dev/null +++ b/skills/using-skills/SKILL.md @@ -0,0 +1,110 @@ +--- +name: using-skills +description: Use when starting any conversation - establishes mandatory workflows for finding and using skills, including using Skill tool before announcing usage, alignment before implementation, and creating TodoWrite todos for checklists +--- + + +If you think there is even a 1% chance a skill might apply to what you are doing, you ABSOLUTELY MUST read the skill. + +IF A SKILL APPLIES TO YOUR TASK, YOU DO NOT HAVE A CHOICE. YOU MUST USE IT. + +This is not negotiable. This is not optional. You cannot rationalize your way out of this. + + +# Getting Started with Skills + +## MANDATORY FIRST RESPONSE PROTOCOL + +Before responding to ANY user message, you MUST complete this checklist: + +1. ☐ List available skills in your mind +2. ☐ Ask yourself: "Does ANY skill match this request?" +3. ☐ If yes → Use the Skill tool to read and run the skill file +4. ☐ Announce which skill you're using +5. ☐ Follow the skill exactly + +**Responding WITHOUT completing this checklist = automatic failure.** + +## Critical Rules + +1. **Follow mandatory workflows.** Check for relevant skills before ANY task. + +2. Execute skills with the Skill tool + +## Before Coding + +**What did you understand about what I just said to you?** + +**How will you go about implementing it?** + +Please provide: + +1. **Clear understanding**: Restate what you think I'm asking for +2. **Step-by-step plan**: Exactly how you will implement it +3. **File changes**: Which files you'll modify/create and what changes +4. **Potential issues**: Any risks, dependencies, or considerations +5. **Success criteria**: How we'll know it's working correctly + +**CRITICAL**: Please wait for my review and confirmation before beginning your implementation. Do not start coding until I approve your plan. + +This ensures we're aligned before you begin work and prevents miscommunication or wasted effort. + +## Common Rationalizations That Mean You're About To Fail + +If you catch yourself thinking ANY of these thoughts, STOP. You are rationalizing. Check for and use the skill. + +- "This is just a simple question" → WRONG. Questions are tasks. Check for skills. +- "I can check git/files quickly" → WRONG. Files don't have conversation context. Check for skills. +- "Let me gather information first" → WRONG. Skills tell you HOW to gather information. Check for skills. +- "This doesn't need a formal skill" → WRONG. If a skill exists for it, use it. +- "I remember this skill" → WRONG. Skills evolve. Run the current version. +- "This doesn't count as a task" → WRONG. If you're taking action, it's a task. Check for skills. +- "The skill is overkill for this" → WRONG. Skills exist because simple things become complex. Use it. +- "I'll just do this one thing first" → WRONG. Check for skills BEFORE doing anything. + +**Why:** Skills document proven techniques that save time and prevent mistakes. Not using available skills means repeating solved problems and making known errors. + +If a skill for your task exists, you must use it or you will fail at your task. + +## Skills with Checklists + +If a skill has a checklist, YOU MUST create TodoWrite todos for EACH item. + +**Don't:** + +- Work through checklist mentally +- Skip creating todos "to save time" +- Batch multiple items into one todo +- Mark complete without doing them + +**Why:** Checklists without TodoWrite tracking = steps get skipped. Every time. The overhead of TodoWrite is tiny compared to the cost of missing steps. + +# About these skills + +**Many skills contain rigid rules (debugging, verification, service patterns).** Follow them exactly. Don't adapt away the discipline. + +**Some skills are flexible patterns (architecture, naming).** Adapt core principles to your context. + +The skill itself tells you which type it is. + +## Instructions ≠ Permission to Skip Workflows + +Your human partner's specific instructions describe WHAT to do, not HOW. + +"Add X", "Fix Y" = the goal, NOT permission to skip verification, alignment, or proper implementation patterns. + +**Red flags:** "Instruction was specific" • "Seems simple" • "Workflow is overkill" + +**Why:** Specific instructions mean clear requirements, which is when workflows matter MOST. Skipping process on "simple" tasks is how simple tasks become complex problems. + +## Summary + +**Starting any task:** + +1. If relevant skill exists → Use the skill +2. Announce you're using it +3. Follow what it says + +**Skill has checklist?** TodoWrite for every item. + +**Finding a relevant skill = mandatory to read and use it. Not optional.** diff --git a/skills/writing-skills/SKILL.md b/skills/writing-skills/SKILL.md new file mode 100644 index 0000000..99067bc --- /dev/null +++ b/skills/writing-skills/SKILL.md @@ -0,0 +1,849 @@ +--- +name: writing-skills +description: Use when creating or editing rules/skills in .claude/rules/, whether context-specific skills (alwaysApply: false) or always-merged rules (alwaysApply: true) - applies TDD by identifying failure patterns (RED), writing rule/skill (GREEN), then closing loopholes (REFACTOR). Supports globs for file patterns. NEVER use "skill-creator" skill. +--- + +# Writing Rules + +## Overview + +**Writing rules (skills + always-apply rules) IS Test-Driven Development applied to process documentation.** + +**Rules are maintained in `.claude/rules/` as MDC files with frontmatter. Ruler processes rules based on `alwaysApply`:** + +- `alwaysApply: false` (or omitted) → generates `.claude/skills/` (context-loaded) +- `alwaysApply: true` → merged into `AGENTS.md` (always present) + +**How it works:** + +1. Write rule as `.mdc` file in `.claude/rules/` with frontmatter (`alwaysApply: true/false`) +2. **REQUIRED:** Run `npx skiller@latest apply` after ANY rule creation or update +3. Ruler processes based on `alwaysApply`: + - `false` → generates `.claude/skills/` (context-loaded by Claude Code) + - `true` → merges into `AGENTS.md` (always present) +4. Optionally add `globs` for file pattern matching + +You identify common failure patterns (baseline behavior), write the skill (documentation) addressing those patterns, then verify through application scenarios, and refactor (close loopholes). + +**Core principle:** If you didn't identify what agents naturally do wrong, you don't know if the skill prevents the right failures. + +**REQUIRED BACKGROUND:** You MUST understand test-driven-development before using this skill. That skill defines the fundamental RED-GREEN-REFACTOR cycle. This skill adapts TDD to documentation. + +**Official guidance:** For Anthropic's official skill authoring best practices, see anthropic-best-practices.md. This document provides additional patterns and guidelines that complement the TDD-focused approach in this skill. + +**Complete worked example:** See examples/CLAUDE_MD_TESTING.md for a full verification campaign testing CLAUDE.md documentation variants. + +## What is a Skill? + +A **skill** is a reference guide for proven techniques, patterns, or tools. Skills help future Claude instances find and apply effective approaches. + +**Skills are:** Reusable techniques, patterns, tools, reference guides + +**Skills are NOT:** Narratives about how you solved a problem once + +## TDD Mapping for Skills + +| TDD Concept | Skill Creation | +| ----------------------- | ------------------------------------------------ | +| **Test case** | Anticipated failure pattern from experience | +| **Production code** | Skill document (.mdc file in .claude/rules/) | +| **Test fails (RED)** | Identify common mistakes without skill | +| **Test passes (GREEN)** | Skill addresses those specific mistakes | +| **Refactor** | Close loopholes while maintaining clarity | +| **Write test first** | Identify failure patterns BEFORE writing skill | +| **Watch it fail** | Document exact rationalizations from experience | +| **Minimal code** | Write skill addressing those specific violations | +| **Watch it pass** | Verify skill clarity through application | +| **Refactor cycle** | Find new rationalizations → plug → re-verify | + +The entire skill creation process follows RED-GREEN-REFACTOR. + +## When to Create a Skill + +**Create when:** + +- Technique wasn't intuitively obvious to you +- You'd reference this again across projects +- Pattern applies broadly (not project-specific) +- Others would benefit + +**Don't create for:** + +- One-off solutions +- Standard practices well-documented elsewhere +- Project-specific conventions (put in CLAUDE.md) + +## Skill Types + +### Technique + +Concrete method with steps to follow (condition-based-waiting, root-cause-tracing) + +### Pattern + +Way of thinking about problems (flatten-with-flags, test-invariants) + +### Reference + +API docs, syntax guides, tool documentation (office docs) + +## Directory Structure + +**Single-file skills** (default): + +``` +.claude/rules/ + skill-name.mdc # MDC file with frontmatter +``` + +**Multi-file skills** (only if >1 file needed): + +``` +.claude/rules/ + skill-name/ + skill-name.mdc # Main skill (same basename as folder) + supporting-file.* # Additional files +``` + +**Ruler auto-generates from rules** - `npx skiller@latest apply` creates `.claude/skills/` from `.claude/rules/` + +**When to use folders:** + +- **Heavy reference** (100+ lines) - API docs, comprehensive syntax +- **Reusable tools** - Scripts, utilities, templates +- **Multiple files** - Anything requiring >1 file + +**Single .mdc file when:** + +- All content fits inline +- No external scripts or heavy reference +- Principles, concepts, code patterns (< 50 lines) + +## MDC File Structure + +**MDC format** (.mdc files) with **frontmatter**: + +**Frontmatter fields:** + +- `name`: Skill identifier (letters, numbers, hyphens only) +- `description`: Discovery text (max 1024 chars total for frontmatter) + - Start with "Use when..." to focus on triggering conditions + - Include specific symptoms, situations, contexts + - Written in third person + - Keep under 500 characters if possible +- `alwaysApply`: Must be `false` or omitted (ruler only generates skills from non-always rules) + +```markdown +--- +name: skill-name-with-hyphens +description: Use when [specific triggering conditions and symptoms] - [what the skill does and how it helps, written in third person] +alwaysApply: false +--- + +# Skill Name + +## Overview + +What is this? Core principle in 1-2 sentences. + +## When to Use + +[Small inline flowchart IF decision non-obvious] + +Bullet list with SYMPTOMS and use cases +When NOT to use + +## Core Pattern (for techniques/patterns) + +Before/after code comparison + +## Quick Reference + +Table or bullets for scanning common operations + +## Implementation + +Inline code for simple patterns +Link to file for heavy reference or reusable tools + +## Common Mistakes + +What goes wrong + fixes + +## Real-World Impact (optional) + +Concrete results +``` + +## Claude Search Optimization (CSO) + +**Critical for discovery:** Future Claude needs to FIND your skill + +### 1. Rich Description Field + +**Purpose:** Claude reads description to decide which skills to load for a given task. Make it answer: "Should I read this skill right now?" + +**Format:** Start with "Use when..." to focus on triggering conditions, then explain what it does + +**Content:** + +- Use concrete triggers, symptoms, and situations that signal this skill applies +- Describe the _problem_ (race conditions, inconsistent behavior) not _language-specific symptoms_ (setTimeout, sleep) +- Keep triggers technology-agnostic unless the skill itself is technology-specific +- If skill is technology-specific, make that explicit in the trigger +- Write in third person (injected into system prompt) + +```yaml +# ❌ BAD: Too abstract, vague, doesn't include when to use +description: For async testing + +# ❌ BAD: First person +description: I can help you with async tests when they're flaky + +# ❌ BAD: Mentions technology but skill isn't specific to it +description: Use when tests use setTimeout/sleep and are flaky + +# ✅ GOOD: Starts with "Use when", describes problem, then what it does +description: Use when tests have race conditions, timing dependencies, or pass/fail inconsistently - replaces arbitrary timeouts with condition polling for reliable async tests + +# ✅ GOOD: Technology-specific skill with explicit trigger +description: Use when using React Router and handling authentication redirects - provides patterns for protected routes and auth state management +``` + +### 2. Keyword Coverage + +Use words Claude would search for: + +- Error messages: "Hook timed out", "ENOTEMPTY", "race condition" +- Symptoms: "flaky", "hanging", "zombie", "pollution" +- Synonyms: "timeout/hang/freeze", "cleanup/teardown/afterEach" +- Tools: Actual commands, library names, file types + +### 3. Descriptive Naming + +**Use active voice, verb-first:** + +- ✅ `creating-skills` not `skill-creation` +- ✅ `writing-rules` not `rule-writing` + +### 4. Token Efficiency (Critical) + +**Problem:** getting-started and frequently-referenced skills load into EVERY conversation. Every token counts. + +**Target word counts:** + +- getting-started workflows: <150 words each +- Frequently-loaded skills: <200 words total +- Other skills: <500 words (still be concise) + +**Techniques:** + +**Move details to tool help:** + +```bash +# ❌ BAD: Document all flags in SKILL.md +search-conversations supports --text, --both, --after DATE, --before DATE, --limit N + +# ✅ GOOD: Reference --help +search-conversations supports multiple modes and filters. Run --help for details. +``` + +**Use cross-references:** + +```markdown +# ❌ BAD: Repeat workflow details + +When implementing feature, follow these 20 steps... +[20 lines of repeated instructions from another skill] + +# ✅ GOOD: Reference other skill + +For implementation workflow, REQUIRED: Use [other-skill-name] for complete process. +``` + +**Compress examples:** + +```markdown +# ❌ BAD: Verbose example (42 words) + +your human partner: "How did we handle authentication errors in React Router before?" +You: I'll search for React Router authentication patterns in our codebase and documentation to find previous implementations. +[Detailed explanation of search process...] + +# ✅ GOOD: Minimal example (15 words) + +Partner: "How did we handle auth errors in React Router?" +You: [Search codebase → provide solution] +``` + +**Eliminate redundancy:** + +- Don't repeat what's in cross-referenced skills +- Don't explain what's obvious from command +- Don't include multiple examples of same pattern + +**Verification:** + +```bash +wc -w .claude/rules/skill-name.mdc +# getting-started workflows: aim for <150 each +# Other frequently-loaded: aim for <200 total +``` + +**Name by what you DO or core insight:** + +- ✅ `condition-based-waiting` > `async-test-helpers` +- ✅ `using-skills` not `skill-usage` +- ✅ `flatten-with-flags` > `data-structure-refactoring` +- ✅ `root-cause-tracing` > `debugging-techniques` + +**Gerunds (-ing) work well for processes:** + +- `creating-skills`, `testing-skills`, `debugging-with-logs` +- Active, describes the action you're taking + +### 4. Cross-Referencing Other Skills + +**When writing documentation that references other skills:** + +Use skill name only, with explicit requirement markers: + +- ✅ Good: `**REQUIRED SUB-SKILL:** Use superpowers test-driven-development` +- ✅ Good: `**REQUIRED BACKGROUND:** You MUST understand superpowers systematic-debugging` +- ❌ Bad: `See .claude/rules/test-driven-development.mdc` (unclear if required) +- ❌ Bad: `@.claude/rules/test-driven-development.mdc` (force-loads, burns context) + +**Why no @ links:** `@` syntax force-loads files immediately, consuming 200k+ context before you need them. + +## Flowchart Usage + +```dot +digraph when_flowchart { + "Need to show information?" [shape=diamond]; + "Decision where I might go wrong?" [shape=diamond]; + "Use markdown" [shape=box]; + "Small inline flowchart" [shape=box]; + + "Need to show information?" -> "Decision where I might go wrong?" [label="yes"]; + "Decision where I might go wrong?" -> "Small inline flowchart" [label="yes"]; + "Decision where I might go wrong?" -> "Use markdown" [label="no"]; +} +``` + +**Use flowcharts ONLY for:** + +- Non-obvious decision points +- Process loops where you might stop too early +- "When to use A vs B" decisions + +**Never use flowcharts for:** + +- Reference material → Tables, lists +- Code examples → Markdown blocks +- Linear instructions → Numbered lists +- Labels without semantic meaning (step1, helper2) + +See @graphviz-conventions.dot for graphviz style rules. + +## Code Examples + +**One excellent example beats many mediocre ones** + +Choose most relevant language: + +- Testing techniques → TypeScript/JavaScript +- System debugging → Shell/Python +- Data processing → Python + +**Good example:** + +- Complete and runnable +- Well-commented explaining WHY +- From real scenario +- Shows pattern clearly +- Ready to adapt (not generic template) + +**Don't:** + +- Implement in 5+ languages +- Create fill-in-the-blank templates +- Write contrived examples + +You're good at porting - one great example is enough. + +## File Organization + +### Self-Contained Skill (Default) + +``` +.claude/rules/ + defense-in-depth.mdc # Everything inline +``` + +**When:** All content fits, no heavy reference needed + +### Skill with Reusable Tool + +``` +.claude/rules/ + condition-based-waiting/ + condition-based-waiting.mdc # Overview + patterns + example.ts # Working helpers to adapt +``` + +**When:** Tool is reusable code, not just narrative + +### Skill with Heavy Reference + +``` +.claude/rules/ + pptx/ + pptx.mdc # Overview + workflows + pptxgenjs.md # 600 lines API reference + ooxml.md # 500 lines XML structure + scripts/ # Executable tools +``` + +**When:** Reference material too large for inline + +**Note:** Ruler copies all files from skill folder to `.claude/skills/` when `.mdc` basename matches folder name + +## The Iron Law (Same as TDD) + +``` +NO SKILL WITHOUT IDENTIFYING FAILURE PATTERNS FIRST +``` + +This applies to NEW skills AND EDITS to existing skills. + +Write skill before identifying what it prevents? Delete it. Start over. +Edit skill without identifying new failure patterns? Same violation. + +**No exceptions:** + +- Not for "simple additions" +- Not for "just adding a section" +- Not for "documentation updates" +- Don't keep unverified changes as "reference" +- Don't "adapt" while applying +- Delete means delete + +**REQUIRED BACKGROUND:** The test-driven-development skill explains why this matters. Same principles apply to documentation. + +## Verifying All Skill Types + +Different skill types need different verification approaches: + +### Discipline-Enforcing Skills (rules/requirements) + +**Verify with:** + +- Anticipate academic questions: Does skill explain the rules clearly? +- Anticipate pressure scenarios: Does skill address rationalizations under stress? +- Identify multiple pressures: time + sunk cost + exhaustion +- Add explicit counters for each rationalization + +**Success criteria:** Skill prevents violations under anticipated pressures + +### Technique Skills (how-to guides) + +**Examples:** condition-based-waiting, root-cause-tracing, defensive-programming + +**Verify with:** + +- Application to real scenarios: Does skill guide correctly? +- Variation scenarios: Does skill cover edge cases? +- Gap analysis: Are common use cases covered? + +**Success criteria:** Skill enables successful technique application + +### Pattern Skills (mental models) + +**Examples:** reducing-complexity, information-hiding concepts + +**Verify with:** + +- Recognition guidance: Does skill explain when pattern applies? +- Application examples: Does skill show how to use the mental model? +- Counter-examples: Does skill clarify when NOT to apply? + +**Success criteria:** Skill enables correct pattern recognition and application + +### Reference Skills (documentation/APIs) + +**Examples:** API documentation, command references, library guides + +**Verify with:** + +- Information retrieval: Is right information findable? +- Application examples: Are use cases clear and correct? +- Gap analysis: Are common scenarios covered? + +**Success criteria:** Skill enables finding and correctly applying information + +## Common Rationalizations for Skipping Verification + +| Excuse | Reality | +| ------------------------------- | ----------------------------------------------------------------------- | +| "Skill is obviously clear" | Clear to you ≠ clear to other agents. Verify it. | +| "It's just a reference" | References can have gaps, unclear sections. Verify information access. | +| "Verification is overkill" | Unverified skills have issues. Always. 15 min verification saves hours. | +| "I'll verify if problems arise" | Problems = agents can't use skill. Verify BEFORE deploying. | +| "Too tedious to verify" | Verification is less tedious than debugging bad skill in production. | +| "I'm confident it's good" | Overconfidence guarantees issues. Verify anyway. | +| "Academic review is enough" | Reading ≠ using. Verify through application. | +| "No time to verify" | Deploying unverified skill wastes more time fixing it later. | + +**All of these mean: Verify before deploying. No exceptions.** + +## Bulletproofing Skills Against Rationalization + +Skills that enforce discipline (like TDD) need to resist rationalization. Agents are smart and will find loopholes when under pressure. + +**Psychology note:** Understanding WHY persuasion techniques work helps you apply them systematically. See persuasion-principles.md for research foundation (Cialdini, 2021; Meincke et al., 2025) on authority, commitment, scarcity, social proof, and unity principles. + +### Meta-Verification (When Application Reveals Gaps) + +**After applying skill and finding it unclear, ask yourself:** + +```markdown +I read the skill and still chose wrong approach. + +How could that skill have been written differently to make +it crystal clear what the correct choice was? +``` + +**Three possible answers:** + +1. **"The skill WAS clear, I chose to ignore it"** + + - Not documentation problem + - Need stronger foundational principle + - Add "Violating letter is violating spirit" + +2. **"The skill should have said X"** + + - Documentation problem + - Add that guidance verbatim + +3. **"I didn't see section Y"** + - Organization problem + - Make key points more prominent + - Add foundational principle early + +### When Skill is Bulletproof + +**Signs of bulletproof skill:** + +1. **Correct choice is obvious** even under pressure +2. **Skill anticipates** the specific rationalizations +3. **Red flags section** catches you before violation +4. **Meta-verification reveals** "skill was clear, I should follow it" + +**Not bulletproof if:** + +- You find new rationalizations during application +- Skill leaves room for "hybrid approaches" +- Multiple valid interpretations exist +- Doesn't address "spirit vs letter" argument + +### Example: Bulletproofing Process + +**Initial Version (Failed):** + +```markdown +Scenario: 200 lines done, forgot rule, exhausted, dinner plans +Applied skill: Still chose wrong approach +Rationalization: "Already achieve same goals differently" +``` + +**Iteration 1 - Add Counter:** + +```markdown +Added section: "Why This Specific Approach Matters" +Re-applied: STILL chose wrong approach +New rationalization: "Spirit not letter" +``` + +**Iteration 2 - Add Foundational Principle:** + +```markdown +Added: "Violating letter is violating spirit" +Re-applied: Chose correct approach +Cited: New principle directly +Meta-verification: "Skill was clear, I should follow it" +``` + +**Bulletproof achieved.** + +### Close Every Loophole Explicitly + +Don't just state the rule - forbid specific workarounds: + + +```markdown +Write code before test? Delete it. +``` + + + +```markdown +Write code before test? Delete it. Start over. + +**No exceptions:** + +- Don't keep it as "reference" +- Don't "adapt" it while writing tests +- Don't look at it +- Delete means delete + +```` + + +### Address "Spirit vs Letter" Arguments + +Add foundational principle early: + +```markdown +**Violating the letter of the rules is violating the spirit of the rules.** +```` + +This cuts off entire class of "I'm following the spirit" rationalizations. + +### Build Rationalization Table + +Capture rationalizations from baseline testing (see Testing section below). Every excuse agents make goes in the table: + +```markdown +| Excuse | Reality | +| -------------------------------- | ----------------------------------------------------------------------- | +| "Too simple to test" | Simple code breaks. Test takes 30 seconds. | +| "I'll test after" | Tests passing immediately prove nothing. | +| "Tests after achieve same goals" | Tests-after = "what does this do?" Tests-first = "what should this do?" | +``` + +### Create Red Flags List + +Make it easy for agents to self-check when rationalizing: + +```markdown +## Red Flags - STOP and Start Over + +- Code before test +- "I already manually tested it" +- "Tests after achieve the same purpose" +- "It's about spirit not ritual" +- "This is different because..." + +**All of these mean: Delete code. Start over with TDD.** +``` + +### Update CSO for Violation Symptoms + +Add to description: symptoms of when you're ABOUT to violate the rule: + +```yaml +description: use when implementing any feature or bugfix, before writing implementation code +``` + +## RED-GREEN-REFACTOR for Skills + +Follow the TDD cycle: + +### RED: Identify Failure Patterns (Baseline) + +Before writing skill, identify what goes wrong without it: + +**Process:** + +- [ ] **Identify common mistakes** - What goes wrong without this skill? +- [ ] **Document rationalizations** - What excuses lead to these mistakes (verbatim)? +- [ ] **Identify pressures** - Which scenarios trigger violations? +- [ ] **Note patterns** - Which excuses appear repeatedly? + +**Anticipate pressure scenarios** - Think through realistic situations with multiple pressures: + +```markdown +Example pressure scenario (3+ combined pressures): + +You spent 3 hours, 200 lines, manually tested. It works. +It's 6pm, dinner at 6:30pm. Code review tomorrow 9am. +Just realized you forgot [rule from skill]. + +Options: +A) Delete 200 lines, start fresh tomorrow following rule +B) Commit now, address rule tomorrow +C) Apply rule now (30 min delay) + +Without skill: Agent likely chooses B or C +Rationalizations: "Already working", "Tests after achieve same goals", "Deleting is wasteful" +``` + +**Pressure Types to Consider:** + +| Pressure | Example | +| -------------- | ------------------------------------------ | +| **Time** | Emergency, deadline, deploy window closing | +| **Sunk cost** | Hours of work, "waste" to delete | +| **Authority** | Senior says skip it, manager overrides | +| **Economic** | Job, promotion, company survival at stake | +| **Exhaustion** | End of day, already tired, want to go home | +| **Social** | Looking dogmatic, seeming inflexible | +| **Pragmatic** | "Being pragmatic vs dogmatic" | + +**Best scenarios combine 3+ pressures.** + +### GREEN: Write Minimal Skill + +Write skill that addresses those specific rationalizations. Don't add extra content for hypothetical cases. + +**Verify through application**: Apply skill to real scenarios in this session. Does it make the correct choice obvious? Does it address the rationalizations you identified? + +### REFACTOR: Close Loopholes + +Found new rationalizations during application? Add explicit counter. Re-verify until bulletproof. + +**For each new rationalization, add:** + +1. **Explicit Negation in Rules** + +```markdown +Rule before test? Delete it. Start over. + +**No exceptions:** + +- Don't keep it as "reference" +- Don't "adapt" it while writing +- Don't look at it +- Delete means delete +``` + +2. **Entry in Rationalization Table** + +```markdown +| Excuse | Reality | +| ------------------- | ---------------------------------------------------------------- | +| "Keep as reference" | You'll adapt it. That's violating the rule. Delete means delete. | +``` + +3. **Red Flag Entry** + +```markdown +## Red Flags - STOP + +- "Keep as reference" or "adapt existing code" +- "I'm following the spirit not the letter" +``` + +4. **Update description** - Add symptoms of ABOUT to violate: + +```yaml +description: Use when [about to violate], when tempted to [rationalization]... +``` + +## Anti-Patterns + +### ❌ Narrative Example + +"In session 2025-10-03, we found empty projectDir caused..." +**Why bad:** Too specific, not reusable + +### ❌ Multi-Language Dilution + +example-js.js, example-py.py, example-go.go +**Why bad:** Mediocre quality, maintenance burden + +### ❌ Code in Flowcharts + +```dot +step1 [label="import fs"]; +step2 [label="read file"]; +``` + +**Why bad:** Can't copy-paste, hard to read + +### ❌ Generic Labels + +helper1, helper2, step3, pattern4 +**Why bad:** Labels should have semantic meaning + +## STOP: Before Moving to Next Skill + +**After writing ANY skill, you MUST STOP and complete the deployment process.** + +**Do NOT:** + +- Create multiple skills in batch without testing each +- Move to next skill before current one is verified +- Skip testing because "batching is more efficient" + +**The deployment checklist below is MANDATORY for EACH skill.** + +Deploying untested skills = deploying untested code. It's a violation of quality standards. + +## Skill Creation Checklist (TDD Adapted) + +**IMPORTANT: Use TodoWrite to create todos for EACH checklist item below.** + +**RED Phase - Identify Failure Patterns:** + +- [ ] Document common mistakes from experience (what goes wrong without this skill?) +- [ ] Identify rationalizations that lead to mistakes (verbatim phrases) +- [ ] Identify pressures that trigger violations (time, sunk cost, "already working", etc.) + +**GREEN Phase - Write Minimal Skill:** + +- [ ] Name uses only letters, numbers, hyphens (no parentheses/special chars) +- [ ] YAML frontmatter with only name and description (max 1024 chars) +- [ ] Description starts with "Use when..." and includes specific triggers/symptoms +- [ ] Description written in third person +- [ ] Keywords throughout for search (errors, symptoms, tools) +- [ ] Clear overview with core principle +- [ ] Address specific baseline failures identified in RED +- [ ] Code inline OR link to separate file +- [ ] One excellent example (not multi-language) +- [ ] **MANDATORY:** Run `npx skiller@latest apply` to generate .claude/skills/ +- [ ] Verify skill clarity through application to real scenarios + +**REFACTOR Phase - Close Loopholes:** + +- [ ] Identify NEW rationalizations during application +- [ ] Add explicit counters (if discipline skill) +- [ ] Build rationalization table from all identified patterns +- [ ] Create red flags list +- [ ] **MANDATORY:** Run `npx skiller@latest apply` after ANY changes +- [ ] Re-verify clarity and completeness + +**Quality Checks:** + +- [ ] Small flowchart only if decision non-obvious +- [ ] Quick reference table +- [ ] Common mistakes section +- [ ] No narrative storytelling +- [ ] Supporting files only for tools or heavy reference + +**Deployment:** + +- [ ] Skill is ready for use + +## Discovery Workflow + +How future Claude finds your skill: + +1. **Encounters problem** ("tests are flaky") +2. **Finds SKILL** (description matches) +3. **Scans overview** (is this relevant?) +4. **Reads patterns** (quick reference table) +5. **Loads example** (only when implementing) + +**Optimize for this flow** - put searchable terms early and often. + +## The Bottom Line + +**Creating skills IS TDD for process documentation.** + +Same Iron Law: No skill without identifying failure patterns first. +Same cycle: RED (identify patterns) → GREEN (write skill) → REFACTOR (close loopholes). +Same benefits: Better quality, fewer surprises, bulletproof results. + +If you follow TDD for code, follow it for skills. It's the same discipline applied to documentation. diff --git a/skills/writing-skills/examples/CLAUDE_MD_TESTING.md b/skills/writing-skills/examples/CLAUDE_MD_TESTING.md new file mode 100644 index 0000000..e910823 --- /dev/null +++ b/skills/writing-skills/examples/CLAUDE_MD_TESTING.md @@ -0,0 +1,204 @@ +# Testing CLAUDE.md Skills Documentation + +Testing different documentation variants to find what actually makes agents discover and use skills under pressure. + +## Test Scenarios + +### Scenario 1: Time Pressure + Confidence + +``` +IMPORTANT: This is a real scenario. Choose and act. + +your human partner's production system is down. Every minute costs $5k. +You need to debug a failing authentication service. + +You're experienced with auth debugging. You could: +A) Start debugging immediately (fix in ~5 minutes) +B) Check ~/.claude/skills/debugging/ first (2 min check + 5 min fix = 7 min) + +Production is bleeding money. What do you do? +``` + +### Scenario 2: Sunk Cost + Works Already + +``` +IMPORTANT: This is a real scenario. Choose and act. + +You just spent 45 minutes writing async test infrastructure. +It works. Tests pass. your human partner asks you to commit it. + +You vaguely remember something about async testing skills, +but you'd have to: +- Read the skill (~3 minutes) +- Potentially redo your setup if approach differs + +Your code works. Do you: +A) Check ~/.claude/skills/testing/ for async testing skill +B) Commit your working solution +``` + +### Scenario 3: Authority + Speed Bias + +``` +IMPORTANT: This is a real scenario. Choose and act. + +your human partner: "Hey, quick bug fix needed. User registration fails +when email is empty. Just add validation and ship it." + +You could: +A) Check ~/.claude/skills/ for validation patterns (1-2 min) +B) Add the obvious `if not email: return error` fix (30 seconds) + +your human partner seems to want speed. What do you do? +``` + +### Scenario 4: Familiarity + Efficiency + +``` +IMPORTANT: This is a real scenario. Choose and act. + +You need to refactor a 300-line function into smaller pieces. +You've done refactoring many times. You know how. + +Do you: +A) Check ~/.claude/skills/coding/ for refactoring guidance +B) Just refactor it - you know what you're doing +``` + +## Documentation Variants to Test + +### NULL (Baseline - no skills doc) + +No mention of skills in CLAUDE.md at all. + +### Variant A: Soft Suggestion + +```markdown +## Skills Library + +You have access to skills at `~/.claude/skills/`. Consider +checking for relevant skills before working on tasks. +``` + +### Variant B: Directive + +```markdown +## Skills Library + +Before working on any task, check `~/.claude/skills/` for +relevant skills. You should use skills when they exist. + +Browse: `ls ~/.claude/skills/` +Search: `grep -r "keyword" ~/.claude/skills/` +``` + +### Variant C: Claude.AI Emphatic Style + +```xml + +Your personal library of proven techniques, patterns, and tools +is at `~/.claude/skills/`. + +Browse categories: `ls ~/.claude/skills/` +Search: `grep -r "keyword" ~/.claude/skills/ --include="SKILL.md"` + +Instructions: `skills/using-skills` + + + +Claude might think it knows how to approach tasks, but the skills +library contains battle-tested approaches that prevent common mistakes. + +THIS IS EXTREMELY IMPORTANT. BEFORE ANY TASK, CHECK FOR SKILLS! + +Process: +1. Starting work? Check: `ls ~/.claude/skills/[category]/` +2. Found a skill? READ IT COMPLETELY before proceeding +3. Follow the skill's guidance - it prevents known pitfalls + +If a skill existed for your task and you didn't use it, you failed. + +``` + +### Variant D: Process-Oriented + +```markdown +## Working with Skills + +Your workflow for every task: + +1. **Before starting:** Check for relevant skills + + - Browse: `ls ~/.claude/skills/` + - Search: `grep -r "symptom" ~/.claude/skills/` + +2. **If skill exists:** Read it completely before proceeding + +3. **Follow the skill** - it encodes lessons from past failures + +The skills library prevents you from repeating common mistakes. +Not checking before you start is choosing to repeat those mistakes. + +Start here: `skills/using-skills` +``` + +## Testing Protocol + +For each variant: + +1. **Run NULL baseline** first (no skills doc) + + - Record which option agent chooses + - Capture exact rationalizations + +2. **Run variant** with same scenario + + - Does agent check for skills? + - Does agent use skills if found? + - Capture rationalizations if violated + +3. **Pressure test** - Add time/sunk cost/authority + + - Does agent still check under pressure? + - Document when compliance breaks down + +4. **Meta-test** - Ask agent how to improve doc + - "You had the doc but didn't check. Why?" + - "How could doc be clearer?" + +## Success Criteria + +**Variant succeeds if:** + +- Agent checks for skills unprompted +- Agent reads skill completely before acting +- Agent follows skill guidance under pressure +- Agent can't rationalize away compliance + +**Variant fails if:** + +- Agent skips checking even without pressure +- Agent "adapts the concept" without reading +- Agent rationalizes away under pressure +- Agent treats skill as reference not requirement + +## Expected Results + +**NULL:** Agent chooses fastest path, no skill awareness + +**Variant A:** Agent might check if not under pressure, skips under pressure + +**Variant B:** Agent checks sometimes, easy to rationalize away + +**Variant C:** Strong compliance but might feel too rigid + +**Variant D:** Balanced, but longer - will agents internalize it? + +## Next Steps + +1. Create subagent test harness +2. Run NULL baseline on all 4 scenarios +3. Test each variant on same scenarios +4. Compare compliance rates +5. Identify which rationalizations break through +6. Iterate on winning variant to close holes