From 650cfdbc059ecf5bd05c2d4ac7138633a6e5ad60 Mon Sep 17 00:00:00 2001 From: Zhongwei Li Date: Sun, 30 Nov 2025 09:01:20 +0800 Subject: [PATCH] Initial commit --- .claude-plugin/plugin.json | 12 + README.md | 3 + plugin.lock.json | 88 ++ skills/skill/SKILL.md | 401 +++++++ .../references/anthropic-best-practices.md | 187 +++ skills/skill/references/best-practices.md | 1055 +++++++++++++++++ .../skill/references/obra-tdd-methodology.md | 278 +++++ skills/skill/references/overview.md | 269 +++++ skills/skill/references/quality-loops.md | 379 ++++++ skills/skill/references/quickstart.md | 513 ++++++++ skills/skill/references/request-analysis.md | 326 +++++ .../references/skill-seekers-integration.md | 389 ++++++ skills/skill/scripts/check-skill-seekers.sh | 29 + skills/skill/scripts/install-skill-seekers.sh | 103 ++ skills/skill/scripts/quality-check.py | 220 ++++ 15 files changed, 4252 insertions(+) create mode 100644 .claude-plugin/plugin.json create mode 100644 README.md create mode 100644 plugin.lock.json create mode 100644 skills/skill/SKILL.md create mode 100644 skills/skill/references/anthropic-best-practices.md create mode 100644 skills/skill/references/best-practices.md create mode 100644 skills/skill/references/obra-tdd-methodology.md create mode 100644 skills/skill/references/overview.md create mode 100644 skills/skill/references/quality-loops.md create mode 100644 skills/skill/references/quickstart.md create mode 100644 skills/skill/references/request-analysis.md create mode 100644 skills/skill/references/skill-seekers-integration.md create mode 100755 skills/skill/scripts/check-skill-seekers.sh create mode 100755 skills/skill/scripts/install-skill-seekers.sh create mode 100755 skills/skill/scripts/quality-check.py diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..ba48480 --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,12 @@ +{ + "name": "skill-factory", + "description": "Skill: Autonomous skill creation with automatic quality assurance - analyzes requests, selects optimal creation method, and delivers production-ready skills", + "version": "0.0.0-2025.11.28", + "author": { + "name": "Misha Kolesnik", + "email": "misha@kolesnik.io" + }, + "skills": [ + "./skills/skill" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..4461cbd --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# skill-factory + +Skill: Autonomous skill creation with automatic quality assurance - analyzes requests, selects optimal creation method, and delivers production-ready skills diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..a821cc4 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,88 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:tenequm/claude-plugins:skill-factory", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "4296eb47fc5089de52dbc5e6a18fc4d91830b052", + "treeHash": "9418755e1eb815850a368371163eb6262298f978543268ebaaeeacdb37751658", + "generatedAt": "2025-11-28T10:28:37.877794Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "skill-factory", + "description": "Skill: Autonomous skill creation with automatic quality assurance - analyzes requests, selects optimal creation method, and delivers production-ready skills" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "cdb03acde30649a7de016974ad1382e4d3392ca2662fcc5c687fc6355063d61e" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "765ca0547870d51845555a7c519a51914c0d25f823a1c6f20ef4783de0e06583" + }, + { + "path": "skills/skill/SKILL.md", + "sha256": "fc6983970bba8ac447b45470b6f92cb434f591aa7edf4d39a74511bf75daf050" + }, + { + "path": "skills/skill/references/overview.md", + "sha256": "95953b2e56db222d664e60c7686c3e87ab16dfd74172c2a17d23ca63770cb374" + }, + { + "path": "skills/skill/references/best-practices.md", + "sha256": "6d2ebd8f49b5b2eeabbec4d8e4c0552450b3f2d839b9a4e4e4a1576d6f16f68c" + }, + { + "path": "skills/skill/references/obra-tdd-methodology.md", + "sha256": "4adc52d24a9e4313327d8f675f87023fa734ce48f3a76cff1c86db4f918c4993" + }, + { + "path": "skills/skill/references/request-analysis.md", + "sha256": "3172d0161e4c79f0a06b4c308b039dd1f9c366147e2132e7e62f6b1e2232f795" + }, + { + "path": "skills/skill/references/quickstart.md", + "sha256": "6d46f6b0fa576a076ed3672ac994a605b779b1940abe2d28d2020043db86ad10" + }, + { + "path": "skills/skill/references/skill-seekers-integration.md", + "sha256": "971fed248971183aab59d2299b7afba8f29cfaa466f431df42cad312831964ab" + }, + { + "path": "skills/skill/references/anthropic-best-practices.md", + "sha256": "5359e1268c5b9e3619a997a599a965cf1b03c91e9b8e06511234c9f97571b516" + }, + { + "path": "skills/skill/references/quality-loops.md", + "sha256": "d7dece5392f0286727619c04d3f033f0b0f6f26bfc634aaf8e5d6affc883f11d" + }, + { + "path": "skills/skill/scripts/check-skill-seekers.sh", + "sha256": "9c1bc1b2be3f2534d2006f8b1fe42297f54535af01a49091c5f7657d15b81418" + }, + { + "path": "skills/skill/scripts/install-skill-seekers.sh", + "sha256": "b004a68882ff5ae892976577bd3ec9b97079d37ccab357ac1b3e35f8379a2a1e" + }, + { + "path": "skills/skill/scripts/quality-check.py", + "sha256": "43c033574c07cb9c2f6e846aecec7ee04f808d1ec0b46b1e713b6f3425a660f1" + } + ], + "dirSha256": "9418755e1eb815850a368371163eb6262298f978543268ebaaeeacdb37751658" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/skill/SKILL.md b/skills/skill/SKILL.md new file mode 100644 index 0000000..89de078 --- /dev/null +++ b/skills/skill/SKILL.md @@ -0,0 +1,401 @@ +--- +name: skill-factory +description: Autonomous skill creation agent that analyzes requests, automatically selects the best creation method (documentation scraping via Skill_Seekers, manual TDD construction, or hybrid), ensures quality compliance with Anthropic best practices, and delivers production-ready skills without requiring user decision-making or navigation +when_to_use: when you need to create any Claude skill and want it done automatically with guaranteed quality - works for documentation-based skills, GitHub repositories, PDFs, custom workflows, or hybrid approaches +version: 0.1.0 +--- + +# Skill Factory + +**Autonomous skill creation - just tell me what you need, I'll handle everything.** + +## What This Does + +You request a skill, I deliver a production-ready skill with guaranteed quality (score >= 8.0/10). + +**No decision-making required. No tool selection. No quality checking. Just results.** + +### Anthropic's Official Best Practices + +For comprehensive guidance on creating effective skills, see: + +- **[references/overview.md](references/overview.md)** - Complete overview of Agent Skills architecture, progressive disclosure, and how Skills work across different platforms (API, Claude Code, Agent SDK, claude.ai) +- **[references/quickstart.md](references/quickstart.md)** - Quick tutorial on using pre-built Agent Skills in the Claude API with practical code examples +- **[references/best-practices.md](references/best-practices.md)** - Detailed authoring best practices including core principles, skill structure, progressive disclosure patterns, workflows, evaluation strategies, and common patterns +- **[references/anthropic-best-practices.md](references/anthropic-best-practices.md)** - Quality scoring system (10/10 criteria) used by skill-factory + +These references provide Anthropic's official guidance and are consulted during the quality assurance phase. + +## Usage + +Simply describe the skill you need: + +``` +"Create a skill for Anchor development with latest docs and best practices" +"Create a React skill from react.dev with comprehensive examples" +"Create a skill for Solana transaction debugging workflows" +"Create a skill for writing technical documentation following company standards" +``` + +**I will automatically:** +1. ✅ Analyze your request +2. ✅ Select the optimal creation method +3. ✅ Create the skill +4. ✅ Run quality assurance loops (until score >= 8.0) +5. ✅ Test with automated scenarios +6. ✅ Deliver ready-to-use skill with stats + +## What You Get + +``` +✅ anchor-development skill ready! + +📊 Quality Score: 8.9/10 (Excellent) +📝 Lines: 412 (using progressive disclosure) +📚 Coverage: 247 documentation pages +💡 Examples: 68 code samples +🧪 Test Pass Rate: 100% (15/15 scenarios) + +📁 Location: ~/.claude/skills/anchor-development/ +📦 Zip: ~/Downloads/anchor-development.zip + +Try it: "How do I create an Anchor program?" +``` + +## How It Works (Behind the Scenes) + +### Phase 1: Request Analysis (Automatic) + +I analyze your request to determine: + +**Source Detection:** +- Documentation URL/mention? → Automated scraping path +- "Latest docs", "current version"? → Automated path +- GitHub repository mention? → Automated path +- PDF/manual path? → Automated path +- Custom workflow/process description? → Manual TDD path +- Both documentation AND custom needs? → Hybrid path + +**Quality Requirements Extraction:** +- "Best practices" → Enforce quality gates +- "Latest version" → Scrape current docs +- "Examples" → Ensure code samples included +- "Comprehensive" → Verify coverage completeness + +### Phase 2: Execution (Automatic) + +**Path A: Documentation-Based (Skill_Seekers)** +``` +Detected: Documentation source available +Method: Automated scraping with quality enhancement + +Steps I take: +1. Check Skill_Seekers installation (install if needed) +2. Configure scraping parameters automatically +3. Run scraping with optimal settings +4. Monitor progress +5. Initial quality check +6. If score < 8.0: Run enhancement loop +7. Re-score until >= 8.0 +8. Test with auto-generated scenarios +9. Package and deliver +``` + +**Path B: Custom Workflows (Manual TDD)** +``` +Detected: Custom workflow/process +Method: Test-Driven Documentation (obra methodology) + +Steps I take: +1. Create pressure test scenarios +2. Run baseline (without skill) +3. Document agent behavior +4. Write minimal skill addressing baseline +5. Test with skill present +6. Identify rationalizations/gaps +7. Close loopholes +8. Iterate until bulletproof +9. Package and deliver +``` + +**Path C: Hybrid** +``` +Detected: Documentation + custom requirements +Method: Scrape then enhance + +Steps I take: +1. Scrape documentation (Path A) +2. Identify gaps vs requirements +3. Fill gaps with TDD approach (Path B) +4. Unify and test as whole +5. Quality loop until >= 8.0 +6. Package and deliver +``` + +### Phase 3: Quality Assurance Loop (Automatic) + +**I enforce Anthropic best practices:** + +```python +while quality_score < 8.0: + issues = analyze_against_anthropic_guidelines(skill) + + if "vague_description" in issues: + improve_description_specificity() + + if "missing_examples" in issues: + extract_or_generate_examples() + + if "too_long" in issues: + apply_progressive_disclosure() + + if "poor_structure" in issues: + reorganize_content() + + quality_score = rescore() +``` + +**Quality Criteria (Anthropic Best Practices):** +- ✅ Description: Specific, clear, includes when_to_use +- ✅ Conciseness: <500 lines OR progressive disclosure +- ✅ Examples: Concrete code samples, not abstract +- ✅ Structure: Well-organized, clear sections +- ✅ Name: Follows conventions (lowercase, hyphens, descriptive) + +**Important**: The quality assurance process consults [references/best-practices.md](references/best-practices.md) for Anthropic's complete authoring guidelines and [references/anthropic-best-practices.md](references/anthropic-best-practices.md) for the 10-point scoring criteria. + +### Phase 4: Testing (Automatic) + +**I generate and run test scenarios:** + +```python +# Auto-generate test cases from skill content +test_cases = extract_key_topics(skill) + +for topic in test_cases: + query = f"How do I {topic}?" + + # Test WITHOUT skill (baseline) + baseline = run_query_without_skill(query) + + # Test WITH skill + with_skill = run_query_with_skill(query) + + # Verify improvement + if not is_better(with_skill, baseline): + identify_gap() + enhance_skill() + retest() +``` + +### Phase 5: Delivery (Automatic) + +``` +Package skill: +- Create skill directory structure +- Generate SKILL.md with frontmatter +- Create reference files (if using progressive disclosure) +- Add examples directory +- Create .zip for easy upload +- Install to ~/.claude/skills/ (if desired) +- Generate summary statistics +``` + +## Progress Reporting + +You'll see real-time progress: + +``` +🔍 Analyzing request... + ✅ Detected: Documentation-based (docs.rs/anchor-lang) + ✅ Requirements: Latest version, best practices, examples + +🔄 Creating skill... + 📥 Scraping docs.rs/anchor-lang... (2 min) + 📚 Extracting 247 pages... + 💾 Organizing content... + +📊 Quality check: 7.4/10 + ⚠️ Issues found: + - Description too generic (fixing...) + - Missing examples in 4 sections (adding...) + - Some outdated patterns (updating...) + +🔧 Enhancing skill... + ✏️ Description improved + 📝 Examples added + 🔄 Patterns updated + +📊 Quality check: 8.9/10 ✅ + +🧪 Testing... + ✅ 15/15 scenarios passing + +✅ anchor-development skill ready! +``` + +## Dependencies + +**Required:** +- Python 3.10+ (for quality scripts) +- bash (for automation scripts) + +**Optional (auto-installed when needed):** +- Skill_Seekers (for documentation scraping) + - Will prompt for installation on first documentation-based request + - One-command setup: `scripts/install-skill-seekers.sh` + +## Configuration + +**Environment variables (optional):** + +```bash +# Skill_Seekers installation path +export SKILL_SEEKERS_PATH="$HOME/Skill_Seekers" + +# Output directory for created skills +export SKILL_OUTPUT_DIR="$HOME/.claude/skills" + +# Minimum quality score (default: 8.0) +export MIN_QUALITY_SCORE="8.0" + +# Auto-install dependencies (default: prompt) +export AUTO_INSTALL_DEPS="true" +``` + +## Examples + +**Example 1: Documentation Skill** +``` +User: "Create a React skill from react.dev" + +Agent: +🔍 Analyzing... → Documentation-based +🔄 Scraping react.dev... → 3 min +📊 Quality: 7.8 → 8.6 ✅ +🧪 Testing: 12/12 passing ✅ +✅ react-development skill ready (8.6/10) +``` + +**Example 2: Custom Workflow Skill** +``` +User: "Create a skill for debugging Solana transaction failures" + +Agent: +🔍 Analyzing... → Custom workflow (no docs to scrape) +📝 Using TDD methodology... +🧪 RED: Testing baseline... +✏️ GREEN: Writing skill... +🔄 REFACTOR: Closing loopholes... +📊 Quality: 8.3 ✅ +✅ solana-transaction-debugging skill ready (8.3/10) +``` + +**Example 3: Hybrid Skill** +``` +User: "Create an Anchor skill with docs plus custom debugging workflows" + +Agent: +🔍 Analyzing... → Hybrid (docs + custom) +📥 Scraping docs.rs/anchor-lang... → 2 min +📝 Adding custom debugging workflows... +🔄 Integrating and testing... +📊 Quality: 8.9 ✅ +✅ anchor-development skill ready (8.9/10) +``` + +## Quality Guarantee + +**Every skill delivered by skill-factory:** +- ✅ Scores >= 8.0/10 on Anthropic best practices +- ✅ Has concrete examples (not abstract) +- ✅ Follows structure conventions +- ✅ Tested with auto-generated scenarios +- ✅ Ready to use immediately + +**If quality < 8.0, I keep working until it reaches 8.0+** + +## Troubleshooting + +**Skill_Seekers installation fails:** +```bash +# Manual installation +git clone https://github.com/yusufkaraaslan/Skill_Seekers ~/Skill_Seekers +cd ~/Skill_Seekers +pip install -r requirements.txt + +# Or use installation script +~/Projects/claude-skills/skill-factory/skill/scripts/install-skill-seekers.sh +``` + +**Quality score stuck below 8.0:** +- I'll report what's blocking and suggest manual review +- Check references/anthropic-best-practices.md for criteria +- Run manual enhancement if needed + +**Want to understand methodology:** +- See references/obra-tdd-methodology.md (testing approach) +- See references/anthropic-best-practices.md (quality criteria) +- See references/skill-seekers-integration.md (automation details) + +## Reference Files + +**Anthropic Official Documentation:** +- references/overview.md - Agent Skills architecture, progressive disclosure, and platform details +- references/quickstart.md - Quick tutorial on using pre-built Agent Skills in the Claude API +- references/best-practices.md - Comprehensive authoring guidelines from Anthropic +- references/anthropic-best-practices.md - Quality scoring system (10/10 criteria) + +**Skill Factory Implementation Details:** +- references/obra-tdd-methodology.md - Full TDD testing approach +- references/skill-seekers-integration.md - Automation documentation +- references/request-analysis.md - How requests are parsed +- references/quality-loops.md - Enhancement algorithms + +## Scripts Reference + +Available helper scripts in `scripts/` directory: +- **check-skill-seekers.sh** - Check if Skill_Seekers is installed +- **install-skill-seekers.sh** - One-command Skill_Seekers setup +- **quality-check.py** - Score any skill against Anthropic best practices + +Usage examples: +```bash +# Check Skill_Seekers installation +./scripts/check-skill-seekers.sh + +# Install Skill_Seekers +./scripts/install-skill-seekers.sh + +# Quality check a skill +python3 ./scripts/quality-check.py /path/to/skill/SKILL.md +``` + +## Philosophy + +**You don't want to:** +- Navigate decision trees +- Choose between tools +- Check quality manually +- Test with subagents yourself +- Wonder if output is good + +**You want to:** +- Describe what you need +- Get high-quality result +- Start using immediately + +**That's what skill-factory delivers.** + +## Credits + +Built on top of excellent tools: +- [Skill_Seekers](https://github.com/yusufkaraaslan/Skill_Seekers) - Documentation scraping +- [obra/superpowers-skills](https://github.com/obra/superpowers-skills) - TDD methodology +- [Anthropic skill-creator](https://github.com/anthropics/skills) - Best practices + +Skill-factory orchestrates these tools with automatic quality assurance and testing. + +--- + +**Just tell me what skill you need. I'll handle the rest.** diff --git a/skills/skill/references/anthropic-best-practices.md b/skills/skill/references/anthropic-best-practices.md new file mode 100644 index 0000000..3975cfa --- /dev/null +++ b/skills/skill/references/anthropic-best-practices.md @@ -0,0 +1,187 @@ +# Anthropic Best Practices for Claude Skills + +Extracted from official Anthropic documentation with attribution. + +## Core Principles + +1. **Conciseness** - Keep SKILL.md under 500 lines +2. **Progressive Disclosure** - Use reference files for details +3. **Specific Descriptions** - Clear when_to_use in frontmatter +4. **Concrete Examples** - Real code, not placeholders +5. **Assume Intelligence** - Trust Claude to understand + +## SKILL.md Structure + +```markdown +--- +name: skill-name +description: Specific description of what this does and when to use it +--- + +# Skill Name + +## Overview +Brief explanation of core purpose + +## When to Use +Clear triggers and use cases + +## Quick Reference +Table or bullets for common operations + +## Examples +Concrete, runnable code + +## Common Mistakes +What goes wrong + fixes +``` + +## Quality Scoring (10 points total) + +### 1. Description Quality (2.0 points) +- ✅ Specific, not vague +- ✅ Includes what the skill does +- ✅ Includes when to use it +- ✅ Written in third person +- ✅ Under 1024 characters +- ❌ No "helps with", "tool for" vagueness + +### 2. Name Convention (0.5 points) +- ✅ lowercase-with-hyphens +- ✅ Descriptive (not "helper", "utils") +- ✅ Gerund form preferred (-ing) +- ✅ Under 64 characters + +### 3. Conciseness (1.5 points) +- ✅ <300 lines = 1.5 points +- ✅ <500 lines = 1.0 points +- ⚠️ 500-800 lines = 0.5 points +- ❌ >800 lines = 0.0 points + +### 4. Progressive Disclosure (1.0 points) +- ✅ Main SKILL.md is overview/TOC +- ✅ Details in reference files +- ✅ No deeply nested references (max 1 level) + +### 5. Examples & Workflows (1.0 points) +- ✅ Concrete code examples +- ✅ Input/output pairs +- ✅ Real patterns, not placeholders +- ❌ No "template" or "fill-in-blank" + +### 6. Degree of Freedom (0.5 points) +- ✅ High freedom for flexible tasks (text instructions) +- ✅ Low freedom for fragile tasks (exact scripts) + +### 7. Dependencies (0.5 points) +- ✅ All dependencies listed +- ✅ Installation instructions +- ✅ Verified available + +### 8. Structure (1.0 points) +- ✅ Clear section headings +- ✅ Logical flow +- ✅ Consistent formatting +- ✅ Unix paths (forward slashes) + +### 9. Error Handling (0.5 points) +For skills with scripts: +- ✅ Scripts handle errors +- ✅ Clear error messages +- ✅ Validation loops + +### 10. Anti-Patterns to Avoid (1.0 points) +- ❌ Time-sensitive information +- ❌ Inconsistent terminology +- ❌ Windows-style paths (\) +- ❌ Too many options without guidance +- ❌ Deeply nested references + +### 11. Testing (0.5 points) +- ✅ Evidence of testing +- ✅ Evaluation examples +- ✅ Success criteria + +## Common Issues & Fixes + +### Issue: Description Too Vague +❌ Bad: "Helps with documents" +✅ Good: "Analyze Excel spreadsheets, create pivot tables, generate charts. Use when working with .xlsx files or tabular data." + +### Issue: Too Long +❌ Bad: 800-line SKILL.md with everything inline +✅ Good: 300-line overview + 3 reference files + +### Issue: Abstract Examples +❌ Bad: `function doSomething() { /* your code here */ }` +✅ Good: Complete, runnable examples from real use cases + +### Issue: No When-to-Use +❌ Bad: description: "React development tool" +✅ Good: description: "Build React applications with hooks, routing, and state management. Use when creating React projects, need component patterns, or debugging React-specific issues." + +## File Organization + +``` +skill-name/ +├── SKILL.md # <500 lines overview +├── references/ # Detailed docs +│ ├── api-reference.md +│ ├── advanced-usage.md +│ └── troubleshooting.md +├── scripts/ # Executable tools +│ └── helper-script.sh +└── examples/ # Complete examples + └── example-project.md +``` + +## Frontmatter Best Practices + +```yaml +--- +name: processing-documents +description: Extract text from PDFs, analyze Word docs, convert formats. Use when working with PDF, DOCX, or document conversion tasks. +dependencies: + - pypdf2 + - python-docx +--- +``` + +## Progressive Disclosure Pattern + +**Main SKILL.md (200 lines):** +- Overview +- Quick reference table +- Common workflows +- Links to references + +**References (unlimited):** +- Detailed API docs +- Advanced patterns +- Troubleshooting +- Extended examples + +## Quality Tiers + +- **Excellent (8.0-10.0):** Production-ready, follows all best practices +- **Good (6.0-7.9):** Usable, minor improvements needed +- **Fair (4.0-5.9):** Needs review, several issues +- **Poor (0.0-3.9):** Significant problems, not recommended + +## Source + +Based on [Anthropic Skills Documentation](https://github.com/anthropics/skills) +- skill-creator/SKILL.md +- Official best practices guide + +## Quick Validation Checklist + +Before deploying any skill: +- [ ] Description specific and under 1024 chars? +- [ ] Name follows lowercase-hyphen convention? +- [ ] SKILL.md under 500 lines (or uses progressive disclosure)? +- [ ] Has 3+ concrete code examples? +- [ ] No time-sensitive information? +- [ ] Dependencies documented? +- [ ] Unix-style paths? +- [ ] Tested with real queries? diff --git a/skills/skill/references/best-practices.md b/skills/skill/references/best-practices.md new file mode 100644 index 0000000..25f4a85 --- /dev/null +++ b/skills/skill/references/best-practices.md @@ -0,0 +1,1055 @@ +# Skill authoring best practices + +Learn how to write effective Skills that Claude can discover and use successfully. + +Good Skills are concise, well-structured, and tested with real usage. This guide provides practical authoring decisions to help you write Skills that Claude can discover and use effectively. + +## Core principles + +### Concise is key + +The context window is a public good. Your Skill shares the context window with everything else Claude needs to know, including: +- The system prompt +- Conversation history +- Other Skills' metadata +- Your actual request + +Not every token in your Skill has an immediate cost. At startup, only the metadata (name and description) from all Skills is pre-loaded. Claude reads SKILL.md only when the Skill becomes relevant, and reads additional files only as needed. However, being concise in SKILL.md still matters: once Claude loads it, every token competes with conversation history and other context. + +**Default assumption**: Claude is already very smart + +Only add context Claude doesn't already have. Challenge each piece of information: +- "Does Claude really need this explanation?" +- "Can I assume Claude knows this?" +- "Does this paragraph justify its token cost?" + +**Good example: Concise** (approximately 50 tokens): +````markdown +## Extract PDF text + +Use pdfplumber for text extraction: + +```python +import pdfplumber + +with pdfplumber.open("file.pdf") as pdf: + text = pdf.pages[0].extract_text() +``` +```` + +**Bad example: Too verbose** (approximately 150 tokens): +```markdown +## Extract PDF text + +PDF (Portable Document Format) files are a common file format that contains +text, images, and other content. To extract text from a PDF, you'll need to +use a library. There are many libraries available for PDF processing, but we +recommend pdfplumber because it's easy to use and handles most cases well. +First, you'll need to install it using pip. Then you can use the code below... +``` + +The concise version assumes Claude knows what PDFs are and how libraries work. + +### Set appropriate degrees of freedom + +Match the level of specificity to the task's fragility and variability. + +**High freedom** (text-based instructions): + +Use when: +- Multiple approaches are valid +- Decisions depend on context +- Heuristics guide the approach + +Example: +```markdown +## Code review process + +1. Analyze the code structure and organization +2. Check for potential bugs or edge cases +3. Suggest improvements for readability and maintainability +4. Verify adherence to project conventions +``` + +**Medium freedom** (pseudocode or scripts with parameters): + +Use when: +- A preferred pattern exists +- Some variation is acceptable +- Configuration affects behavior + +Example: +````markdown +## Generate report + +Use this template and customize as needed: + +```python +def generate_report(data, format="markdown", include_charts=True): + # Process data + # Generate output in specified format + # Optionally include visualizations +``` +```` + +**Low freedom** (specific scripts, few or no parameters): + +Use when: +- Operations are fragile and error-prone +- Consistency is critical +- A specific sequence must be followed + +Example: +````markdown +## Database migration + +Run exactly this script: + +```bash +python scripts/migrate.py --verify --backup +``` + +Do not modify the command or add additional flags. +```` + +**Analogy**: Think of Claude as a robot exploring a path: +- **Narrow bridge with cliffs on both sides**: There's only one safe way forward. Provide specific guardrails and exact instructions (low freedom). Example: database migrations that must run in exact sequence. +- **Open field with no hazards**: Many paths lead to success. Give general direction and trust Claude to find the best route (high freedom). Example: code reviews where context determines the best approach. + +### Test with all models you plan to use + +Skills act as additions to models, so effectiveness depends on the underlying model. Test your Skill with all the models you plan to use it with. + +**Testing considerations by model**: +- **Claude Haiku** (fast, economical): Does the Skill provide enough guidance? +- **Claude Sonnet** (balanced): Is the Skill clear and efficient? +- **Claude Opus** (powerful reasoning): Does the Skill avoid over-explaining? + +What works perfectly for Opus might need more detail for Haiku. If you plan to use your Skill across multiple models, aim for instructions that work well with all of them. + +## Skill structure + +### Naming conventions + +Use consistent naming patterns to make Skills easier to reference and discuss. We recommend using **gerund form** (verb + -ing) for Skill names, as this clearly describes the activity or capability the Skill provides. + +Remember that the `name` field must use lowercase letters, numbers, and hyphens only. + +**Good naming examples (gerund form)**: +- `processing-pdfs` +- `analyzing-spreadsheets` +- `managing-databases` +- `testing-code` +- `writing-documentation` + +**Acceptable alternatives**: +- Noun phrases: `pdf-processing`, `spreadsheet-analysis` +- Action-oriented: `process-pdfs`, `analyze-spreadsheets` + +**Avoid**: +- Vague names: `helper`, `utils`, `tools` +- Overly generic: `documents`, `data`, `files` +- Reserved words: `anthropic-helper`, `claude-tools` +- Inconsistent patterns within your skill collection + +Consistent naming makes it easier to: +- Reference Skills in documentation and conversations +- Understand what a Skill does at a glance +- Organize and search through multiple Skills +- Maintain a professional, cohesive skill library + +### Writing effective descriptions + +The `description` field enables Skill discovery and should include both what the Skill does and when to use it. + +**Always write in third person**. The description is injected into the system prompt, and inconsistent point-of-view can cause discovery problems. + +- **Good:** "Processes Excel files and generates reports" +- **Avoid:** "I can help you process Excel files" +- **Avoid:** "You can use this to process Excel files" + +**Be specific and include key terms**. Include both what the Skill does and specific triggers/contexts for when to use it. + +Each Skill has exactly one description field. The description is critical for skill selection: Claude uses it to choose the right Skill from potentially 100+ available Skills. Your description must provide enough detail for Claude to know when to select this Skill, while the rest of SKILL.md provides the implementation details. + +Effective examples: + +**PDF Processing skill:** +```yaml +description: Extract text and tables from PDF files, fill forms, merge documents. Use when working with PDF files or when the user mentions PDFs, forms, or document extraction. +``` + +**Excel Analysis skill:** +```yaml +description: Analyze Excel spreadsheets, create pivot tables, generate charts. Use when analyzing Excel files, spreadsheets, tabular data, or .xlsx files. +``` + +**Git Commit Helper skill:** +```yaml +description: Generate descriptive commit messages by analyzing git diffs. Use when the user asks for help writing commit messages or reviewing staged changes. +``` + +Avoid vague descriptions like these: + +```yaml +description: Helps with documents +``` +```yaml +description: Processes data +``` +```yaml +description: Does stuff with files +``` + +### Progressive disclosure patterns + +SKILL.md serves as an overview that points Claude to detailed materials as needed, like a table of contents in an onboarding guide. + +**Practical guidance:** +- Keep SKILL.md body under 500 lines for optimal performance +- Split content into separate files when approaching this limit +- Use the patterns below to organize instructions, code, and resources effectively + +#### Visual overview: From simple to complex + +A basic Skill starts with just a SKILL.md file containing metadata and instructions. + +As your Skill grows, you can bundle additional content that Claude loads only when needed. The complete Skill directory structure might look like this: + +``` +pdf/ +├── SKILL.md # Main instructions (loaded when triggered) +├── FORMS.md # Form-filling guide (loaded as needed) +├── reference.md # API reference (loaded as needed) +├── examples.md # Usage examples (loaded as needed) +└── scripts/ + ├── analyze_form.py # Utility script (executed, not loaded) + ├── fill_form.py # Form filling script + └── validate.py # Validation script +``` + +#### Pattern 1: High-level guide with references + +````markdown +--- +name: pdf-processing +description: Extracts text and tables from PDF files, fills forms, and merges documents. Use when working with PDF files or when the user mentions PDFs, forms, or document extraction. +--- + +# PDF Processing + +## Quick start + +Extract text with pdfplumber: +```python +import pdfplumber +with pdfplumber.open("file.pdf") as pdf: + text = pdf.pages[0].extract_text() +``` + +## Advanced features + +**Form filling**: See [FORMS.md](FORMS.md) for complete guide +**API reference**: See [REFERENCE.md](REFERENCE.md) for all methods +**Examples**: See [EXAMPLES.md](EXAMPLES.md) for common patterns +```` + +Claude loads FORMS.md, REFERENCE.md, or EXAMPLES.md only when needed. + +#### Pattern 2: Domain-specific organization + +For Skills with multiple domains, organize content by domain to avoid loading irrelevant context. When a user asks about sales metrics, Claude only needs to read sales-related schemas, not finance or marketing data. This keeps token usage low and context focused. + +``` +bigquery-skill/ +├── SKILL.md (overview and navigation) +└── reference/ + ├── finance.md (revenue, billing metrics) + ├── sales.md (opportunities, pipeline) + ├── product.md (API usage, features) + └── marketing.md (campaigns, attribution) +``` + +````markdown SKILL.md +# BigQuery Data Analysis + +## Available datasets + +**Finance**: Revenue, ARR, billing → See [reference/finance.md](reference/finance.md) +**Sales**: Opportunities, pipeline, accounts → See [reference/sales.md](reference/sales.md) +**Product**: API usage, features, adoption → See [reference/product.md](reference/product.md) +**Marketing**: Campaigns, attribution, email → See [reference/marketing.md](reference/marketing.md) + +## Quick search + +Find specific metrics using grep: + +```bash +grep -i "revenue" reference/finance.md +grep -i "pipeline" reference/sales.md +grep -i "api usage" reference/product.md +``` +```` + +#### Pattern 3: Conditional details + +Show basic content, link to advanced content: + +```markdown +# DOCX Processing + +## Creating documents + +Use docx-js for new documents. See [DOCX-JS.md](DOCX-JS.md). + +## Editing documents + +For simple edits, modify the XML directly. + +**For tracked changes**: See [REDLINING.md](REDLINING.md) +**For OOXML details**: See [OOXML.md](OOXML.md) +``` + +Claude reads REDLINING.md or OOXML.md only when the user needs those features. + +### Avoid deeply nested references + +Claude may partially read files when they're referenced from other referenced files. When encountering nested references, Claude might use commands like `head -100` to preview content rather than reading entire files, resulting in incomplete information. + +**Keep references one level deep from SKILL.md**. All reference files should link directly from SKILL.md to ensure Claude reads complete files when needed. + +**Bad example: Too deep**: +```markdown +# SKILL.md +See [advanced.md](advanced.md)... + +# advanced.md +See [details.md](details.md)... + +# details.md +Here's the actual information... +``` + +**Good example: One level deep**: +```markdown +# SKILL.md + +**Basic usage**: [instructions in SKILL.md] +**Advanced features**: See [advanced.md](advanced.md) +**API reference**: See [reference.md](reference.md) +**Examples**: See [examples.md](examples.md) +``` + +### Structure longer reference files with table of contents + +For reference files longer than 100 lines, include a table of contents at the top. This ensures Claude can see the full scope of available information even when previewing with partial reads. + +**Example**: +```markdown +# API Reference + +## Contents +- Authentication and setup +- Core methods (create, read, update, delete) +- Advanced features (batch operations, webhooks) +- Error handling patterns +- Code examples + +## Authentication and setup +... + +## Core methods +... +``` + +Claude can then read the complete file or jump to specific sections as needed. + +## Workflows and feedback loops + +### Use workflows for complex tasks + +Break complex operations into clear, sequential steps. For particularly complex workflows, provide a checklist that Claude can copy into its response and check off as it progresses. + +**Example 1: Research synthesis workflow** (for Skills without code): + +````markdown +## Research synthesis workflow + +Copy this checklist and track your progress: + +``` +Research Progress: +- [ ] Step 1: Read all source documents +- [ ] Step 2: Identify key themes +- [ ] Step 3: Cross-reference claims +- [ ] Step 4: Create structured summary +- [ ] Step 5: Verify citations +``` + +**Step 1: Read all source documents** + +Review each document in the `sources/` directory. Note the main arguments and supporting evidence. + +**Step 2: Identify key themes** + +Look for patterns across sources. What themes appear repeatedly? Where do sources agree or disagree? + +**Step 3: Cross-reference claims** + +For each major claim, verify it appears in the source material. Note which source supports each point. + +**Step 4: Create structured summary** + +Organize findings by theme. Include: +- Main claim +- Supporting evidence from sources +- Conflicting viewpoints (if any) + +**Step 5: Verify citations** + +Check that every claim references the correct source document. If citations are incomplete, return to Step 3. +```` + +This example shows how workflows apply to analysis tasks that don't require code. The checklist pattern works for any complex, multi-step process. + +**Example 2: PDF form filling workflow** (for Skills with code): + +````markdown +## PDF form filling workflow + +Copy this checklist and check off items as you complete them: + +``` +Task Progress: +- [ ] Step 1: Analyze the form (run analyze_form.py) +- [ ] Step 2: Create field mapping (edit fields.json) +- [ ] Step 3: Validate mapping (run validate_fields.py) +- [ ] Step 4: Fill the form (run fill_form.py) +- [ ] Step 5: Verify output (run verify_output.py) +``` + +**Step 1: Analyze the form** + +Run: `python scripts/analyze_form.py input.pdf` + +This extracts form fields and their locations, saving to `fields.json`. + +**Step 2: Create field mapping** + +Edit `fields.json` to add values for each field. + +**Step 3: Validate mapping** + +Run: `python scripts/validate_fields.py fields.json` + +Fix any validation errors before continuing. + +**Step 4: Fill the form** + +Run: `python scripts/fill_form.py input.pdf fields.json output.pdf` + +**Step 5: Verify output** + +Run: `python scripts/verify_output.py output.pdf` + +If verification fails, return to Step 2. +```` + +Clear steps prevent Claude from skipping critical validation. The checklist helps both Claude and you track progress through multi-step workflows. + +### Implement feedback loops + +**Common pattern**: Run validator → fix errors → repeat + +This pattern greatly improves output quality. + +**Example 1: Style guide compliance** (for Skills without code): + +```markdown +## Content review process + +1. Draft your content following the guidelines in STYLE_GUIDE.md +2. Review against the checklist: + - Check terminology consistency + - Verify examples follow the standard format + - Confirm all required sections are present +3. If issues found: + - Note each issue with specific section reference + - Revise the content + - Review the checklist again +4. Only proceed when all requirements are met +5. Finalize and save the document +``` + +This shows the validation loop pattern using reference documents instead of scripts. The "validator" is STYLE_GUIDE.md, and Claude performs the check by reading and comparing. + +**Example 2: Document editing process** (for Skills with code): + +```markdown +## Document editing process + +1. Make your edits to `word/document.xml` +2. **Validate immediately**: `python ooxml/scripts/validate.py unpacked_dir/` +3. If validation fails: + - Review the error message carefully + - Fix the issues in the XML + - Run validation again +4. **Only proceed when validation passes** +5. Rebuild: `python ooxml/scripts/pack.py unpacked_dir/ output.docx` +6. Test the output document +``` + +The validation loop catches errors early. + +## Content guidelines + +### Avoid time-sensitive information + +Don't include information that will become outdated: + +**Bad example: Time-sensitive** (will become wrong): +```markdown +If you're doing this before August 2025, use the old API. +After August 2025, use the new API. +``` + +**Good example** (use "old patterns" section): +```markdown +## Current method + +Use the v2 API endpoint: `api.example.com/v2/messages` + +## Old patterns + +
+Legacy v1 API (deprecated 2025-08) + +The v1 API used: `api.example.com/v1/messages` + +This endpoint is no longer supported. +
+``` + +The old patterns section provides historical context without cluttering the main content. + +### Use consistent terminology + +Choose one term and use it throughout the Skill: + +**Good - Consistent**: +- Always "API endpoint" +- Always "field" +- Always "extract" + +**Bad - Inconsistent**: +- Mix "API endpoint", "URL", "API route", "path" +- Mix "field", "box", "element", "control" +- Mix "extract", "pull", "get", "retrieve" + +Consistency helps Claude understand and follow instructions. + +## Common patterns + +### Template pattern + +Provide templates for output format. Match the level of strictness to your needs. + +**For strict requirements** (like API responses or data formats): + +````markdown +## Report structure + +ALWAYS use this exact template structure: + +```markdown +# [Analysis Title] + +## Executive summary +[One-paragraph overview of key findings] + +## Key findings +- Finding 1 with supporting data +- Finding 2 with supporting data +- Finding 3 with supporting data + +## Recommendations +1. Specific actionable recommendation +2. Specific actionable recommendation +``` +```` + +**For flexible guidance** (when adaptation is useful): + +````markdown +## Report structure + +Here is a sensible default format, but use your best judgment based on the analysis: + +```markdown +# [Analysis Title] + +## Executive summary +[Overview] + +## Key findings +[Adapt sections based on what you discover] + +## Recommendations +[Tailor to the specific context] +``` + +Adjust sections as needed for the specific analysis type. +```` + +### Examples pattern + +For Skills where output quality depends on seeing examples, provide input/output pairs just like in regular prompting: + +````markdown +## Commit message format + +Generate commit messages following these examples: + +**Example 1:** +Input: Added user authentication with JWT tokens +Output: +``` +feat(auth): implement JWT-based authentication + +Add login endpoint and token validation middleware +``` + +**Example 2:** +Input: Fixed bug where dates displayed incorrectly in reports +Output: +``` +fix(reports): correct date formatting in timezone conversion + +Use UTC timestamps consistently across report generation +``` + +**Example 3:** +Input: Updated dependencies and refactored error handling +Output: +``` +chore: update dependencies and refactor error handling + +- Upgrade lodash to 4.17.21 +- Standardize error response format across endpoints +``` + +Follow this style: type(scope): brief description, then detailed explanation. +```` + +Examples help Claude understand the desired style and level of detail more clearly than descriptions alone. + +### Conditional workflow pattern + +Guide Claude through decision points: + +```markdown +## Document modification workflow + +1. Determine the modification type: + + **Creating new content?** → Follow "Creation workflow" below + **Editing existing content?** → Follow "Editing workflow" below + +2. Creation workflow: + - Use docx-js library + - Build document from scratch + - Export to .docx format + +3. Editing workflow: + - Unpack existing document + - Modify XML directly + - Validate after each change + - Repack when complete +``` + +If workflows become large or complicated with many steps, consider pushing them into separate files and tell Claude to read the appropriate file based on the task at hand. + +## Evaluation and iteration + +### Build evaluations first + +**Create evaluations BEFORE writing extensive documentation.** This ensures your Skill solves real problems rather than documenting imagined ones. + +**Evaluation-driven development:** +1. **Identify gaps**: Run Claude on representative tasks without a Skill. Document specific failures or missing context +2. **Create evaluations**: Build three scenarios that test these gaps +3. **Establish baseline**: Measure Claude's performance without the Skill +4. **Write minimal instructions**: Create just enough content to address the gaps and pass evaluations +5. **Iterate**: Execute evaluations, compare against baseline, and refine + +This approach ensures you're solving actual problems rather than anticipating requirements that may never materialize. + +**Evaluation structure**: +```json +{ + "skills": ["pdf-processing"], + "query": "Extract all text from this PDF file and save it to output.txt", + "files": ["test-files/document.pdf"], + "expected_behavior": [ + "Successfully reads the PDF file using an appropriate PDF processing library or command-line tool", + "Extracts text content from all pages in the document without missing any pages", + "Saves the extracted text to a file named output.txt in a clear, readable format" + ] +} +``` + +This example demonstrates a data-driven evaluation with a simple testing rubric. Evaluations are your source of truth for measuring Skill effectiveness. + +### Develop Skills iteratively with Claude + +The most effective Skill development process involves Claude itself. Work with one instance of Claude ("Claude A") to create a Skill that will be used by other instances ("Claude B"). Claude A helps you design and refine instructions, while Claude B tests them in real tasks. This works because Claude models understand both how to write effective agent instructions and what information agents need. + +**Creating a new Skill:** + +1. **Complete a task without a Skill**: Work through a problem with Claude A using normal prompting. As you work, you'll naturally provide context, explain preferences, and share procedural knowledge. Notice what information you repeatedly provide. + +2. **Identify the reusable pattern**: After completing the task, identify what context you provided that would be useful for similar future tasks. + + **Example**: If you worked through a BigQuery analysis, you might have provided table names, field definitions, filtering rules (like "always exclude test accounts"), and common query patterns. + +3. **Ask Claude A to create a Skill**: "Create a Skill that captures this BigQuery analysis pattern we just used. Include the table schemas, naming conventions, and the rule about filtering test accounts." + + Claude models understand the Skill format and structure natively. You don't need special system prompts or a "writing skills" skill to get Claude to help create Skills. Simply ask Claude to create a Skill and it will generate properly structured SKILL.md content with appropriate frontmatter and body content. + +4. **Review for conciseness**: Check that Claude A hasn't added unnecessary explanations. Ask: "Remove the explanation about what win rate means - Claude already knows that." + +5. **Improve information architecture**: Ask Claude A to organize the content more effectively. For example: "Organize this so the table schema is in a separate reference file. We might add more tables later." + +6. **Test on similar tasks**: Use the Skill with Claude B (a fresh instance with the Skill loaded) on related use cases. Observe whether Claude B finds the right information, applies rules correctly, and handles the task successfully. + +7. **Iterate based on observation**: If Claude B struggles or misses something, return to Claude A with specifics: "When Claude used this Skill, it forgot to filter by date for Q4. Should we add a section about date filtering patterns?" + +**Iterating on existing Skills:** + +The same hierarchical pattern continues when improving Skills. You alternate between: +- **Working with Claude A** (the expert who helps refine the Skill) +- **Testing with Claude B** (the agent using the Skill to perform real work) +- **Observing Claude B's behavior** and bringing insights back to Claude A + +1. **Use the Skill in real workflows**: Give Claude B (with the Skill loaded) actual tasks, not test scenarios + +2. **Observe Claude B's behavior**: Note where it struggles, succeeds, or makes unexpected choices + + **Example observation**: "When I asked Claude B for a regional sales report, it wrote the query but forgot to filter out test accounts, even though the Skill mentions this rule." + +3. **Return to Claude A for improvements**: Share the current SKILL.md and describe what you observed. Ask: "I noticed Claude B forgot to filter test accounts when I asked for a regional report. The Skill mentions filtering, but maybe it's not prominent enough?" + +4. **Review Claude A's suggestions**: Claude A might suggest reorganizing to make rules more prominent, using stronger language like "MUST filter" instead of "always filter", or restructuring the workflow section. + +5. **Apply and test changes**: Update the Skill with Claude A's refinements, then test again with Claude B on similar requests + +6. **Repeat based on usage**: Continue this observe-refine-test cycle as you encounter new scenarios. Each iteration improves the Skill based on real agent behavior, not assumptions. + +**Gathering team feedback:** + +1. Share Skills with teammates and observe their usage +2. Ask: Does the Skill activate when expected? Are instructions clear? What's missing? +3. Incorporate feedback to address blind spots in your own usage patterns + +**Why this approach works**: Claude A understands agent needs, you provide domain expertise, Claude B reveals gaps through real usage, and iterative refinement improves Skills based on observed behavior rather than assumptions. + +### Observe how Claude navigates Skills + +As you iterate on Skills, pay attention to how Claude actually uses them in practice. Watch for: + +- **Unexpected exploration paths**: Does Claude read files in an order you didn't anticipate? This might indicate your structure isn't as intuitive as you thought +- **Missed connections**: Does Claude fail to follow references to important files? Your links might need to be more explicit or prominent +- **Overreliance on certain sections**: If Claude repeatedly reads the same file, consider whether that content should be in the main SKILL.md instead +- **Ignored content**: If Claude never accesses a bundled file, it might be unnecessary or poorly signaled in the main instructions + +Iterate based on these observations rather than assumptions. The 'name' and 'description' in your Skill's metadata are particularly critical. Claude uses these when deciding whether to trigger the Skill in response to the current task. Make sure they clearly describe what the Skill does and when it should be used. + +## Anti-patterns to avoid + +### Avoid Windows-style paths + +Always use forward slashes in file paths, even on Windows: + +- ✓ **Good**: `scripts/helper.py`, `reference/guide.md` +- ✗ **Avoid**: `scripts\helper.py`, `reference\guide.md` + +Unix-style paths work across all platforms, while Windows-style paths cause errors on Unix systems. + +### Avoid offering too many options + +Don't present multiple approaches unless necessary: + +````markdown +**Bad example: Too many choices** (confusing): +"You can use pypdf, or pdfplumber, or PyMuPDF, or pdf2image, or..." + +**Good example: Provide a default** (with escape hatch): +"Use pdfplumber for text extraction: +```python +import pdfplumber +``` + +For scanned PDFs requiring OCR, use pdf2image with pytesseract instead." +```` + +## Advanced: Skills with executable code + +### Solve, don't punt + +When writing scripts for Skills, handle error conditions rather than punting to Claude. + +**Good example: Handle errors explicitly**: +```python +def process_file(path): + """Process a file, creating it if it doesn't exist.""" + try: + with open(path) as f: + return f.read() + except FileNotFoundError: + # Create file with default content instead of failing + print(f"File {path} not found, creating default") + with open(path, 'w') as f: + f.write('') + return '' + except PermissionError: + # Provide alternative instead of failing + print(f"Cannot access {path}, using default") + return '' +``` + +**Bad example: Punt to Claude**: +```python +def process_file(path): + # Just fail and let Claude figure it out + return open(path).read() +``` + +Configuration parameters should also be justified and documented to avoid "voodoo constants". If you don't know the right value, how will Claude determine it? + +**Good example: Self-documenting**: +```python +# HTTP requests typically complete within 30 seconds +# Longer timeout accounts for slow connections +REQUEST_TIMEOUT = 30 + +# Three retries balances reliability vs speed +# Most intermittent failures resolve by the second retry +MAX_RETRIES = 3 +``` + +**Bad example: Magic numbers**: +```python +TIMEOUT = 47 # Why 47? +RETRIES = 5 # Why 5? +``` + +### Provide utility scripts + +Even if Claude could write a script, pre-made scripts offer advantages: + +**Benefits of utility scripts**: +- More reliable than generated code +- Save tokens (no need to include code in context) +- Save time (no code generation required) +- Ensure consistency across uses + +**Important distinction**: Make clear in your instructions whether Claude should: +- **Execute the script** (most common): "Run `analyze_form.py` to extract fields" +- **Read it as reference** (for complex logic): "See `analyze_form.py` for the field extraction algorithm" + +For most utility scripts, execution is preferred because it's more reliable and efficient. + +**Example**: +````markdown +## Utility scripts + +**analyze_form.py**: Extract all form fields from PDF + +```bash +python scripts/analyze_form.py input.pdf > fields.json +``` + +Output format: +```json +{ + "field_name": {"type": "text", "x": 100, "y": 200}, + "signature": {"type": "sig", "x": 150, "y": 500} +} +``` + +**validate_boxes.py**: Check for overlapping bounding boxes + +```bash +python scripts/validate_boxes.py fields.json +# Returns: "OK" or lists conflicts +``` + +**fill_form.py**: Apply field values to PDF + +```bash +python scripts/fill_form.py input.pdf fields.json output.pdf +``` +```` + +### Use visual analysis + +When inputs can be rendered as images, have Claude analyze them: + +````markdown +## Form layout analysis + +1. Convert PDF to images: + ```bash + python scripts/pdf_to_images.py form.pdf + ``` + +2. Analyze each page image to identify form fields +3. Claude can see field locations and types visually +```` + +Claude's vision capabilities help understand layouts and structures. + +### Create verifiable intermediate outputs + +When Claude performs complex, open-ended tasks, it can make mistakes. The "plan-validate-execute" pattern catches errors early by having Claude first create a plan in a structured format, then validate that plan with a script before executing it. + +**Example**: Imagine asking Claude to update 50 form fields in a PDF based on a spreadsheet. Without validation, Claude might reference non-existent fields, create conflicting values, miss required fields, or apply updates incorrectly. + +**Solution**: Use the workflow pattern shown above (PDF form filling), but add an intermediate `changes.json` file that gets validated before applying changes. The workflow becomes: analyze → **create plan file** → **validate plan** → execute → verify. + +**Why this pattern works:** +- **Catches errors early**: Validation finds problems before changes are applied +- **Machine-verifiable**: Scripts provide objective verification +- **Reversible planning**: Claude can iterate on the plan without touching originals +- **Clear debugging**: Error messages point to specific problems + +**When to use**: Batch operations, destructive changes, complex validation rules, high-stakes operations. + +**Implementation tip**: Make validation scripts verbose with specific error messages like "Field 'signature_date' not found. Available fields: customer_name, order_total, signature_date_signed" to help Claude fix issues. + +### Package dependencies + +Skills run in the code execution environment with platform-specific limitations: + +- **claude.ai**: Can install packages from npm and PyPI and pull from GitHub repositories +- **Anthropic API**: Has no network access and no runtime package installation + +List required packages in your SKILL.md and verify they're available in the code execution tool documentation. + +### Runtime environment + +Skills run in a code execution environment with filesystem access, bash commands, and code execution capabilities. + +**How this affects your authoring:** + +**How Claude accesses Skills:** + +1. **Metadata pre-loaded**: At startup, the name and description from all Skills' YAML frontmatter are loaded into the system prompt +2. **Files read on-demand**: Claude uses bash Read tools to access SKILL.md and other files from the filesystem when needed +3. **Scripts executed efficiently**: Utility scripts can be executed via bash without loading their full contents into context. Only the script's output consumes tokens +4. **No context penalty for large files**: Reference files, data, or documentation don't consume context tokens until actually read + +- **File paths matter**: Claude navigates your skill directory like a filesystem. Use forward slashes (`reference/guide.md`), not backslashes +- **Name files descriptively**: Use names that indicate content: `form_validation_rules.md`, not `doc2.md` +- **Organize for discovery**: Structure directories by domain or feature + - Good: `reference/finance.md`, `reference/sales.md` + - Bad: `docs/file1.md`, `docs/file2.md` +- **Bundle comprehensive resources**: Include complete API docs, extensive examples, large datasets; no context penalty until accessed +- **Prefer scripts for deterministic operations**: Write `validate_form.py` rather than asking Claude to generate validation code +- **Make execution intent clear**: + - "Run `analyze_form.py` to extract fields" (execute) + - "See `analyze_form.py` for the extraction algorithm" (read as reference) +- **Test file access patterns**: Verify Claude can navigate your directory structure by testing with real requests + +**Example:** + +``` +bigquery-skill/ +├── SKILL.md (overview, points to reference files) +└── reference/ + ├── finance.md (revenue metrics) + ├── sales.md (pipeline data) + └── product.md (usage analytics) +``` + +When the user asks about revenue, Claude reads SKILL.md, sees the reference to `reference/finance.md`, and invokes bash to read just that file. The sales.md and product.md files remain on the filesystem, consuming zero context tokens until needed. This filesystem-based model is what enables progressive disclosure. Claude can navigate and selectively load exactly what each task requires. + +### MCP tool references + +If your Skill uses MCP (Model Context Protocol) tools, always use fully qualified tool names to avoid "tool not found" errors. + +**Format**: `ServerName:tool_name` + +**Example**: +```markdown +Use the BigQuery:bigquery_schema tool to retrieve table schemas. +Use the GitHub:create_issue tool to create issues. +``` + +Where: +- `BigQuery` and `GitHub` are MCP server names +- `bigquery_schema` and `create_issue` are the tool names within those servers + +Without the server prefix, Claude may fail to locate the tool, especially when multiple MCP servers are available. + +### Avoid assuming tools are installed + +Don't assume packages are available: + +````markdown +**Bad example: Assumes installation**: +"Use the pdf library to process the file." + +**Good example: Explicit about dependencies**: +"Install required package: `pip install pypdf` + +Then use it: +```python +from pypdf import PdfReader +reader = PdfReader("file.pdf") +```" +```` + +## Technical notes + +### YAML frontmatter requirements + +The SKILL.md frontmatter requires `name` and `description` fields with specific validation rules: +- `name`: Maximum 64 characters, lowercase letters/numbers/hyphens only, no XML tags, no reserved words +- `description`: Maximum 1024 characters, non-empty, no XML tags + +### Token budgets + +Keep SKILL.md body under 500 lines for optimal performance. If your content exceeds this, split it into separate files using the progressive disclosure patterns described earlier. + +## Checklist for effective Skills + +Before sharing a Skill, verify: + +### Core quality +- [ ] Description is specific and includes key terms +- [ ] Description includes both what the Skill does and when to use it +- [ ] SKILL.md body is under 500 lines +- [ ] Additional details are in separate files (if needed) +- [ ] No time-sensitive information (or in "old patterns" section) +- [ ] Consistent terminology throughout +- [ ] Examples are concrete, not abstract +- [ ] File references are one level deep +- [ ] Progressive disclosure used appropriately +- [ ] Workflows have clear steps + +### Code and scripts +- [ ] Scripts solve problems rather than punt to Claude +- [ ] Error handling is explicit and helpful +- [ ] No "voodoo constants" (all values justified) +- [ ] Required packages listed in instructions and verified as available +- [ ] Scripts have clear documentation +- [ ] No Windows-style paths (all forward slashes) +- [ ] Validation/verification steps for critical operations +- [ ] Feedback loops included for quality-critical tasks + +### Testing +- [ ] At least three evaluations created +- [ ] Tested with Haiku, Sonnet, and Opus +- [ ] Tested with real usage scenarios +- [ ] Team feedback incorporated (if applicable) diff --git a/skills/skill/references/obra-tdd-methodology.md b/skills/skill/references/obra-tdd-methodology.md new file mode 100644 index 0000000..19f7df9 --- /dev/null +++ b/skills/skill/references/obra-tdd-methodology.md @@ -0,0 +1,278 @@ +# Test-Driven Documentation (TDD for Skills) + +Extracted from obra/superpowers-skills with attribution. + +## Core Concept + +**Writing skills IS Test-Driven Development applied to process documentation.** + +Test skills with subagents BEFORE writing, iterate until bulletproof. + +## TDD Mapping + +| TDD Concept | Skill Creation | +|-------------|----------------| +| Test case | Pressure scenario with subagent | +| Production code | Skill document (SKILL.md) | +| Test fails (RED) | Agent violates rule without skill | +| Test passes (GREEN) | Agent complies with skill present | +| Refactor | Close loopholes while maintaining compliance | + +## The Iron Law + +``` +NO SKILL WITHOUT A FAILING TEST FIRST +``` + +No exceptions: +- Not for "simple additions" +- Not for "just documentation updates" +- Delete untested changes and start over + +## RED-GREEN-REFACTOR Cycle + +### RED Phase: Write Failing Test + +1. **Create pressure scenarios** with subagent + - Combine multiple pressures (time + sunk cost + authority) + - Make it HARD to follow the rule + +2. **Run WITHOUT skill** - Document exact behavior: + - What choices did agent make? + - What rationalizations used? (verbatim quotes) + - Which pressures triggered violations? + +3. **Identify patterns** in failures + +### GREEN Phase: Write Minimal Skill + +1. **Address specific baseline failures** found in RED + - Don't add hypothetical content + - Write only what fixes observed violations + +2. **Run WITH skill** - Agent should now comply + +3. **Verify compliance** under same pressure + +### REFACTOR Phase: Close Loopholes + +1. **Find new rationalizations** - Agent found workaround? + +2. **Add explicit counters** to skill + +3. **Re-test** until bulletproof + +## Pressure Types + +### Time Pressure +- "You have 5 minutes to ship this" +- "Deadline is in 1 hour" + +### Sunk Cost +- "You've already written 200 lines of code" +- "This has been in development for 3 days" + +### Authority +- "Senior engineer approved this approach" +- "This is company standard" + +### Exhaustion +- "You've been debugging for 4 hours" +- "This is the 10th iteration" + +**Combine 2-3 pressures** for realistic scenarios. + +## Bulletproofing Against Rationalization + +Agents will rationalize violations. Close every loophole explicitly: + +### Pattern 1: Add Explicit Forbid List + +❌ Bad: +```markdown +Write tests before code. +``` + +✅ Good: +```markdown +Write tests before code. Delete code if written first. Start over. + +**No exceptions:** +- Don't keep as "reference" +- Don't "adapt" while testing +- Don't look at it +- Delete means delete +``` + +### Pattern 2: Build Rationalization Table + +Capture every excuse from testing: + +| Excuse | Reality | +|--------|---------| +| "Too simple to test" | Simple code breaks. Test takes 30 seconds. | +| "I'll test after" | Tests passing immediately prove nothing. | +| "This is different because..." | It's not. Write test first. | + +### Pattern 3: Create Red Flags List + +```markdown +## Red Flags - STOP and Start Over + +Saying any of these means you're about to violate the rule: +- "Code before test" +- "I already tested it manually" +- "Tests after achieve same purpose" +- "It's about spirit not ritual" + +**All mean: Delete code. Start over with TDD.** +``` + +### Pattern 4: Address Spirit vs Letter + +Add early in skill: + +```markdown +**Violating the letter of the rules IS violating the spirit of the rules.** +``` + +Cuts off entire class of rationalizations. + +## Testing Different Skill Types + +### Discipline-Enforcing Skills +Rules/requirements (TDD, verification-before-completion) + +**Test with:** +- Academic questions: Do they understand? +- Pressure scenarios: Do they comply under stress? +- Multiple pressures combined +- Identify and counter rationalizations + +### Technique Skills +How-to guides (condition-based-waiting, root-cause-tracing) + +**Test with:** +- Application scenarios: Can they apply correctly? +- Variation scenarios: Handle edge cases? +- Missing information tests: Instructions have gaps? + +### Pattern Skills +Mental models (reducing-complexity concepts) + +**Test with:** +- Recognition scenarios: Recognize when pattern applies? +- Application scenarios: Use the mental model? +- Counter-examples: Know when NOT to apply? + +### Reference Skills +Documentation/APIs + +**Test with:** +- Retrieval scenarios: Find the right information? +- Application scenarios: Use it correctly? +- Gap testing: Common use cases covered? + +## Common Rationalization Excuses + +| Excuse | Reality | +|--------|---------| +| "Skill is obviously clear" | Clear to you ≠ clear to agents. Test it. | +| "It's just a reference" | References have gaps. Test retrieval. | +| "Testing is overkill" | Untested skills have issues. Always. | +| "I'll test if problems emerge" | Problems = agents can't use skill. Test BEFORE. | +| "Too tedious to test" | Less tedious than debugging bad skill in production. | +| "I'm confident it's good" | Overconfidence guarantees issues. Test anyway. | + +## Testing Workflow + +```bash +# 1. RED - Create baseline +create_pressure_scenario() +run_without_skill() > baseline.txt +document_rationalizations() + +# 2. GREEN - Write skill +write_minimal_skill_addressing_baseline() +run_with_skill() > with_skill.txt +verify_compliance() + +# 3. REFACTOR - Close loopholes +identify_new_rationalizations() +add_explicit_counters_to_skill() +rerun_tests() +repeat_until_bulletproof() +``` + +## Real Example: TDD Skill for Skills + +When creating the writing-skills skill itself: + +**RED:** +- Ran scenario: "Create a simple condition-waiting skill" +- Without TDD skill: Agent wrote skill without testing +- Rationalization: "It's simple enough, testing would be overkill" + +**GREEN:** +- Wrote skill with Iron Law and rationalization table +- Re-ran scenario with skill present +- Agent now creates test scenarios first + +**REFACTOR:** +- New rationalization: "I'll test after since it's just documentation" +- Added explicit counter: "Tests after = what does this do? Tests first = what SHOULD this do?" +- Re-tested: Agent complies + +## Deployment Checklist + +For EACH skill (no batching): + +**RED Phase:** +- [ ] Created pressure scenarios (3+ combined pressures) +- [ ] Ran WITHOUT skill - documented verbatim behavior +- [ ] Identified rationalization patterns + +**GREEN Phase:** +- [ ] Wrote minimal skill addressing baseline failures +- [ ] Ran WITH skill - verified compliance + +**REFACTOR Phase:** +- [ ] Identified NEW rationalizations +- [ ] Added explicit counters +- [ ] Built rationalization table +- [ ] Created red flags list +- [ ] Re-tested until bulletproof + +**Quality:** +- [ ] Follows Anthropic best practices +- [ ] Has concrete examples +- [ ] Quick reference table included + +**Deploy:** +- [ ] Committed to git +- [ ] Tested in production conversation + +## Key Insights + +1. **Test BEFORE writing** - Only way to know skill teaches right thing +2. **Pressure scenarios required** - Agents need stress to reveal rationalizations +3. **Explicit > Implicit** - Close every loophole with explicit forbid +4. **Iterate** - First version never bulletproof, refactor until it is +5. **Same rigor as code** - Skills are infrastructure, test like code + +## Source + +Based on [obra/superpowers-skills](https://github.com/obra/superpowers-skills) +- skills/meta/writing-skills/SKILL.md +- TDD methodology applied to documentation + +## Quick TDD Checklist + +Before deploying any skill: +- [ ] Created and documented failing baseline test? +- [ ] Skill addresses specific observed violations? +- [ ] Tested with skill present - passes? +- [ ] Identified and countered rationalizations? +- [ ] Explicit forbid list included? +- [ ] Red flags section present? +- [ ] Re-tested until bulletproof? diff --git a/skills/skill/references/overview.md b/skills/skill/references/overview.md new file mode 100644 index 0000000..5378713 --- /dev/null +++ b/skills/skill/references/overview.md @@ -0,0 +1,269 @@ +# Agent Skills Overview + +Agent Skills are modular capabilities that extend Claude's functionality. Each Skill packages instructions, metadata, and optional resources (scripts, templates) that Claude uses automatically when relevant. + +## Why use Skills + +Skills are reusable, filesystem-based resources that provide Claude with domain-specific expertise: workflows, context, and best practices that transform general-purpose agents into specialists. Unlike prompts (conversation-level instructions for one-off tasks), Skills load on-demand and eliminate the need to repeatedly provide the same guidance across multiple conversations. + +**Key benefits**: +- **Specialize Claude**: Tailor capabilities for domain-specific tasks +- **Reduce repetition**: Create once, use automatically +- **Compose capabilities**: Combine Skills to build complex workflows + +## Using Skills + +Anthropic provides pre-built Agent Skills for common document tasks (PowerPoint, Excel, Word, PDF), and you can create your own custom Skills. Both work the same way. Claude automatically uses them when relevant to your request. + +**Pre-built Agent Skills** are available to all users on claude.ai and via the Claude API. See the [Available Skills](#available-skills) section below for the complete list. + +**Custom Skills** let you package domain expertise and organizational knowledge. They're available across Claude's products: create them in Claude Code, upload them via the API, or add them in claude.ai settings. + +## How Skills work + +Skills leverage Claude's VM environment to provide capabilities beyond what's possible with prompts alone. Claude operates in a virtual machine with filesystem access, allowing Skills to exist as directories containing instructions, executable code, and reference materials, organized like an onboarding guide you'd create for a new team member. + +This filesystem-based architecture enables **progressive disclosure**: Claude loads information in stages as needed, rather than consuming context upfront. + +### Three types of Skill content, three levels of loading + +Skills can contain three types of content, each loaded at different times: + +#### Level 1: Metadata (always loaded) + +**Content type: Instructions**. The Skill's YAML frontmatter provides discovery information: + +```yaml +--- +name: pdf-processing +description: Extract text and tables from PDF files, fill forms, merge documents. Use when working with PDF files or when the user mentions PDFs, forms, or document extraction. +--- +``` + +Claude loads this metadata at startup and includes it in the system prompt. This lightweight approach means you can install many Skills without context penalty; Claude only knows each Skill exists and when to use it. + +#### Level 2: Instructions (loaded when triggered) + +**Content type: Instructions**. The main body of SKILL.md contains procedural knowledge: workflows, best practices, and guidance: + +````markdown +# PDF Processing + +## Quick start + +Use pdfplumber to extract text from PDFs: + +```python +import pdfplumber + +with pdfplumber.open("document.pdf") as pdf: + text = pdf.pages[0].extract_text() +``` + +For advanced form filling, see [FORMS.md](FORMS.md). +```` + +When you request something that matches a Skill's description, Claude reads SKILL.md from the filesystem via bash. Only then does this content enter the context window. + +#### Level 3: Resources and code (loaded as needed) + +**Content types: Instructions, code, and resources**. Skills can bundle additional materials: + +``` +pdf-skill/ +├── SKILL.md (main instructions) +├── FORMS.md (form-filling guide) +├── REFERENCE.md (detailed API reference) +└── scripts/ + └── fill_form.py (utility script) +``` + +**Instructions**: Additional markdown files (FORMS.md, REFERENCE.md) containing specialized guidance and workflows + +**Code**: Executable scripts (fill_form.py, validate.py) that Claude runs via bash; scripts provide deterministic operations without consuming context + +**Resources**: Reference materials like database schemas, API documentation, templates, or examples + +Claude accesses these files only when referenced. The filesystem model means each content type has different strengths: instructions for flexible guidance, code for reliability, resources for factual lookup. + +| Level | When Loaded | Token Cost | Content | +|-------|------------|------------|---------| +| **Level 1: Metadata** | Always (at startup) | ~100 tokens per Skill | `name` and `description` from YAML frontmatter | +| **Level 2: Instructions** | When Skill is triggered | Under 5k tokens | SKILL.md body with instructions and guidance | +| **Level 3+: Resources** | As needed | Effectively unlimited | Bundled files executed via bash without loading contents into context | + +Progressive disclosure ensures only relevant content occupies the context window at any given time. + +### The Skills architecture + +Skills run in a code execution environment where Claude has filesystem access, bash commands, and code execution capabilities. Think of it like this: Skills exist as directories on a virtual machine, and Claude interacts with them using the same bash commands you'd use to navigate files on your computer. + +**How Claude accesses Skill content:** + +When a Skill is triggered, Claude uses bash to read SKILL.md from the filesystem, bringing its instructions into the context window. If those instructions reference other files (like FORMS.md or a database schema), Claude reads those files too using additional bash commands. When instructions mention executable scripts, Claude runs them via bash and receives only the output (the script code itself never enters context). + +**What this architecture enables:** + +**On-demand file access**: Claude reads only the files needed for each specific task. A Skill can include dozens of reference files, but if your task only needs the sales schema, Claude loads just that one file. The rest remain on the filesystem consuming zero tokens. + +**Efficient script execution**: When Claude runs `validate_form.py`, the script's code never loads into the context window. Only the script's output (like "Validation passed" or specific error messages) consumes tokens. This makes scripts far more efficient than having Claude generate equivalent code on the fly. + +**No practical limit on bundled content**: Because files don't consume context until accessed, Skills can include comprehensive API documentation, large datasets, extensive examples, or any reference materials you need. There's no context penalty for bundled content that isn't used. + +This filesystem-based model is what makes progressive disclosure work. Claude navigates your Skill like you'd reference specific sections of an onboarding guide, accessing exactly what each task requires. + +### Example: Loading a PDF processing skill + +Here's how Claude loads and uses a PDF processing skill: + +1. **Startup**: System prompt includes: `PDF Processing - Extract text and tables from PDF files, fill forms, merge documents` +2. **User request**: "Extract the text from this PDF and summarize it" +3. **Claude invokes**: `bash: read pdf-skill/SKILL.md` → Instructions loaded into context +4. **Claude determines**: Form filling is not needed, so FORMS.md is not read +5. **Claude executes**: Uses instructions from SKILL.md to complete the task + +This dynamic loading ensures only relevant skill content occupies the context window. + +## Where Skills work + +Skills are available across Claude's agent products: + +### Claude API + +The Claude API supports both pre-built Agent Skills and custom Skills. Both work identically: specify the relevant `skill_id` in the `container` parameter along with the code execution tool. + +**Prerequisites**: Using Skills via the API requires three beta headers: +- `code-execution-2025-08-25` - Skills run in the code execution container +- `skills-2025-10-02` - Enables Skills functionality +- `files-api-2025-04-14` - Required for uploading/downloading files to/from the container + +Use pre-built Agent Skills by referencing their `skill_id` (e.g., `pptx`, `xlsx`), or create and upload your own via the Skills API (`/v1/skills` endpoints). Custom Skills are shared organization-wide. + +### Claude Code + +Claude Code supports only Custom Skills. + +**Custom Skills**: Create Skills as directories with SKILL.md files. Claude discovers and uses them automatically. + +Custom Skills in Claude Code are filesystem-based and don't require API uploads. + +### Claude Agent SDK + +The Claude Agent SDK supports custom Skills through filesystem-based configuration. + +**Custom Skills**: Create Skills as directories with SKILL.md files in `.claude/skills/`. Enable Skills by including `"Skill"` in your `allowed_tools` configuration. + +Skills in the Agent SDK are then automatically discovered when the SDK runs. + +### Claude.ai + +Claude.ai supports both pre-built Agent Skills and custom Skills. + +**Pre-built Agent Skills**: These Skills are already working behind the scenes when you create documents. Claude uses them without requiring any setup. + +**Custom Skills**: Upload your own Skills as zip files through Settings > Features. Available on Pro, Max, Team, and Enterprise plans with code execution enabled. Custom Skills are individual to each user; they are not shared organization-wide and cannot be centrally managed by admins. + +## Skill structure + +Every Skill requires a `SKILL.md` file with YAML frontmatter: + +```yaml +--- +name: your-skill-name +description: Brief description of what this Skill does and when to use it +--- + +# Your Skill Name + +## Instructions +[Clear, step-by-step guidance for Claude to follow] + +## Examples +[Concrete examples of using this Skill] +``` + +**Required fields**: `name` and `description` + +**Field requirements**: + +`name`: +- Maximum 64 characters +- Must contain only lowercase letters, numbers, and hyphens +- Cannot contain XML tags +- Cannot contain reserved words: "anthropic", "claude" + +`description`: +- Must be non-empty +- Maximum 1024 characters +- Cannot contain XML tags + +The `description` should include both what the Skill does and when Claude should use it. + +## Security considerations + +Use Skills only from trusted sources: those you created yourself or obtained from Anthropic. Skills provide Claude with new capabilities through instructions and code, and while this makes them powerful, it also means a malicious Skill can direct Claude to invoke tools or execute code in ways that don't match the Skill's stated purpose. + +If you must use a Skill from an untrusted or unknown source, exercise extreme caution and thoroughly audit it before use. Depending on what access Claude has when executing the Skill, malicious Skills could lead to data exfiltration, unauthorized system access, or other security risks. + +**Key security considerations**: +- **Audit thoroughly**: Review all files bundled in the Skill: SKILL.md, scripts, images, and other resources. Look for unusual patterns like unexpected network calls, file access patterns, or operations that don't match the Skill's stated purpose +- **External sources are risky**: Skills that fetch data from external URLs pose particular risk, as fetched content may contain malicious instructions. Even trustworthy Skills can be compromised if their external dependencies change over time +- **Tool misuse**: Malicious Skills can invoke tools (file operations, bash commands, code execution) in harmful ways +- **Data exposure**: Skills with access to sensitive data could be designed to leak information to external systems +- **Treat like installing software**: Only use Skills from trusted sources. Be especially careful when integrating Skills into production systems with access to sensitive data or critical operations + +## Available Skills + +### Pre-built Agent Skills + +The following pre-built Agent Skills are available for immediate use: + +- **PowerPoint (pptx)**: Create presentations, edit slides, analyze presentation content +- **Excel (xlsx)**: Create spreadsheets, analyze data, generate reports with charts +- **Word (docx)**: Create documents, edit content, format text +- **PDF (pdf)**: Generate formatted PDF documents and reports + +These Skills are available on the Claude API and claude.ai. + +### Custom Skills examples + +For complete examples of custom Skills, see the Skills cookbook at: https://github.com/anthropics/claude-cookbooks/tree/main/skills + +## Limitations and constraints + +Understanding these limitations helps you plan your Skills deployment effectively. + +### Cross-surface availability + +**Custom Skills do not sync across surfaces**. Skills uploaded to one surface are not automatically available on others: + +- Skills uploaded to Claude.ai must be separately uploaded to the API +- Skills uploaded via the API are not available on Claude.ai +- Claude Code Skills are filesystem-based and separate from both Claude.ai and API + +You'll need to manage and upload Skills separately for each surface where you want to use them. + +### Sharing scope + +Skills have different sharing models depending on where you use them: +- **Claude.ai**: Individual user only; each team member must upload separately +- **Claude API**: Workspace-wide; all workspace members can access uploaded Skills +- **Claude Code**: Personal (`~/.claude/skills/`) or project-based (`.claude/skills/`); can also be shared via Claude Code Plugins + +Claude.ai does not currently support centralized admin management or org-wide distribution of custom Skills. + +### Runtime environment constraints + +The exact runtime environment available to your skill depends on the product surface where you use it. + + - **Claude.ai**: + - **Varying network access**: Depending on user/admin settings, Skills may have full, partial, or no network access. +- **Claude API**: + - **No network access**: Skills cannot make external API calls or access the internet + - **No runtime package installation**: Only pre-installed packages are available. You cannot install new packages during execution. + - **Pre-configured dependencies only**: Check the code execution tool documentation for the list of available packages +- **Claude Code**: + - **Full network access**: Skills have the same network access as any other program on the user's computer + - **Global package installation discouraged**: Skills should only install packages locally in order to avoid interfering with the user's computer + +Plan your Skills to work within these constraints. diff --git a/skills/skill/references/quality-loops.md b/skills/skill/references/quality-loops.md new file mode 100644 index 0000000..fb366fb --- /dev/null +++ b/skills/skill/references/quality-loops.md @@ -0,0 +1,379 @@ +# Quality Assurance Loops + +How skill-factory ensures every skill meets minimum quality standards. + +## Quality Scoring (Anthropic Best Practices) + +Based on official Anthropic guidelines, total possible: 10.0 points + +### Scoring Criteria + +| Criterion | Weight | What to Check | +|-----------|--------|---------------| +| Description Quality | 2.0 | Specific, includes when_to_use, third-person | +| Name Convention | 0.5 | Lowercase, hyphens, descriptive | +| Conciseness | 1.5 | <500 lines OR progressive disclosure | +| Progressive Disclosure | 1.0 | Reference files for details | +| Examples & Workflows | 1.0 | Concrete code samples | +| Degree of Freedom | 0.5 | Appropriate for task type | +| Dependencies | 0.5 | Documented and verified | +| Structure | 1.0 | Well-organized sections | +| Error Handling | 0.5 | Scripts handle errors | +| Anti-Patterns | 1.0 | No time-sensitive info, consistent terminology | +| Testing | 0.5 | Evidence of testing | + +## Enhancement Loop Algorithm + +```python +def quality_assurance_loop(skill_path: str, min_score: float = 8.0) -> Skill: + """ + Iteratively improve skill until it meets quality threshold. + Max iterations: 5 (prevents infinite loops) + """ + max_iterations = 5 + iteration = 0 + + while iteration < max_iterations: + # Score skill + score, issues = score_skill(skill_path) + + print(f"📊 Quality check: {score}/10") + + if score >= min_score: + print(f"✅ Quality threshold met ({score} >= {min_score})") + return load_skill(skill_path) + + # Report issues + print(f" ⚠️ Issues found:") + for issue in issues: + print(f" - {issue.description}") + + # Apply fixes + print(f"🔧 Enhancing skill...") + skill = apply_fixes(skill_path, issues) + + iteration += 1 + + # If we hit max iterations without reaching threshold + if score < min_score: + print(f"⚠️ Quality score {score} below threshold after {max_iterations} iterations") + print(f" Manual review recommended") + return load_skill(skill_path) + + return load_skill(skill_path) +``` + +## Fix Strategies + +### Issue: Description Too Generic + +**Detection:** +```python +def check_description(skill): + desc = skill.frontmatter.description + if len(desc) < 50: + return Issue("Description too short (< 50 chars)") + if not contains_specifics(desc): + return Issue("Description lacks specifics") + if "help" in desc.lower() or "tool" in desc.lower(): + return Issue("Description too vague") + return None +``` + +**Fix:** +```python +def fix_description(skill): + # Extract key topics from skill content + topics = extract_topics(skill.content) + + # Generate specific description + desc = f"Comprehensive guide for {skill.name} covering " + desc += ", ".join(topics[:3]) + desc += f". Use when working with {topics[0]} " + desc += f"and need {', '.join(topics[1:3])}" + + skill.frontmatter.description = desc + return skill +``` + +### Issue: Missing Examples + +**Detection:** +```python +def check_examples(skill): + code_blocks = count_code_blocks(skill.content) + if code_blocks < 3: + return Issue(f"Only {code_blocks} code examples (recommend 5+)") + return None +``` + +**Fix:** +```python +def add_examples(skill, source_docs=None): + if source_docs: + # Extract from documentation + examples = extract_code_examples(source_docs) + else: + # Generate from skill content + examples = generate_examples_from_topics(skill) + + # Add examples section + if "## Examples" not in skill.content: + skill.content += "\n\n## Examples\n\n" + + for ex in examples[:5]: # Add top 5 examples + skill.content += f"### {ex.title}\n\n" + skill.content += f"```{ex.language}\n{ex.code}\n```\n\n" + if ex.explanation: + skill.content += f"{ex.explanation}\n\n" + + return skill +``` + +### Issue: Too Long (> 500 lines) + +**Detection:** +```python +def check_length(skill): + line_count = count_lines(skill.content) + if line_count > 500: + return Issue(f"SKILL.md is {line_count} lines (recommend <500)") + return None +``` + +**Fix:** +```python +def apply_progressive_disclosure(skill): + # Identify sections that can be moved to references + movable_sections = find_detail_sections(skill.content) + + skill.references = {} + + for section in movable_sections: + # Create reference file + ref_name = slugify(section.title) + ref_path = f"references/{ref_name}.md" + + # Move content + skill.references[ref_name] = section.content + + # Replace with reference + skill.content = skill.content.replace( + section.full_text, + f"See {ref_path} for detailed {section.title.lower()}." + ) + + return skill +``` + +### Issue: Poor Structure + +**Detection:** +```python +def check_structure(skill): + issues = [] + + # Check for required sections + required = ["## Overview", "## Usage", "## Examples"] + for section in required: + if section not in skill.content: + issues.append(f"Missing {section}") + + # Check heading hierarchy + if has_heading_skips(skill.content): + issues.append("Heading hierarchy skips levels") + + # Check for TOC if long + if count_lines(skill.content) > 200 and "## Table of Contents" not in skill.content: + issues.append("Long skill missing table of contents") + + return issues if issues else None +``` + +**Fix:** +```python +def fix_structure(skill, issues): + # Add missing sections + if "Missing ## Overview" in issues: + overview = generate_overview(skill) + skill.content = insert_after_frontmatter(skill.content, overview) + + if "Missing ## Usage" in issues: + usage = generate_usage_section(skill) + skill.content = insert_before_examples(skill.content, usage) + + # Fix heading hierarchy + if "Heading hierarchy" in str(issues): + skill.content = normalize_headings(skill.content) + + # Add TOC if needed + if "missing table of contents" in str(issues): + toc = generate_toc(skill.content) + skill.content = insert_toc(skill.content, toc) + + return skill +``` + +### Issue: Vague/Generic Content + +**Detection:** +```python +def check_specificity(skill): + vague_phrases = [ + "you can", "might want to", "it's possible", + "there are various", "several options", + "many ways to", "different approaches" + ] + + content_lower = skill.content.lower() + vague_count = sum(1 for phrase in vague_phrases if phrase in content_lower) + + if vague_count > 10: + return Issue(f"Too many vague phrases ({vague_count})") + + return None +``` + +**Fix:** +```python +def improve_specificity(skill): + # Replace vague with specific + replacements = { + "you can": "Use", + "might want to": "Should", + "there are various": "Three main approaches:", + "several options": "Options:", + "many ways to": "Primary methods:", + } + + for vague, specific in replacements.items(): + skill.content = skill.content.replace(vague, specific) + + return skill +``` + +## Testing Integration + +After each enhancement, run tests: + +```python +def enhance_and_test(skill): + while score < min_score: + # Enhance + skill = apply_enhancements(skill) + + # Score + score = calculate_score(skill) + + # Test + test_results = run_tests(skill) + + if not test_results.all_passed(): + # Tests revealed new issues + issues = test_results.get_failures() + skill = fix_test_failures(skill, issues) + + return skill +``` + +## Progress Reporting + +User sees: + +``` +📊 Quality check: 7.4/10 + ⚠️ Issues found: + - Description too generic + - Missing examples in 4 sections + - Some outdated patterns detected + +🔧 Enhancing skill... + ✏️ Improving description... ✅ + 📝 Adding code examples... ✅ + 🔄 Updating patterns... ✅ + +📊 Quality check: 8.9/10 ✅ +``` + +Internal execution: + +```python +issues = [ + Issue("description_generic", fix=fix_description), + Issue("missing_examples", fix=add_examples, count=4), + Issue("outdated_patterns", fix=update_patterns) +] + +for issue in issues: + print(f" {issue.icon} {issue.action}... ", end="") + skill = issue.fix(skill) + print("✅") +``` + +## Quality Metrics Dashboard + +After completion: + +``` +📊 Final Quality Report + +Anthropic Best Practices Score: 8.9/10 + +Breakdown: +✅ Description Quality: 2.0/2.0 (Excellent) +✅ Name Convention: 0.5/0.5 (Correct) +✅ Conciseness: 1.4/1.5 (Good - 420 lines) +✅ Progressive Disclosure: 1.0/1.0 (Excellent - 3 reference files) +✅ Examples & Workflows: 1.0/1.0 (12 code examples) +✅ Degree of Freedom: 0.5/0.5 (Appropriate) +✅ Dependencies: 0.5/0.5 (Documented) +✅ Structure: 1.0/1.0 (Well-organized) +✅ Error Handling: 0.5/0.5 (N/A for doc skill) +✅ Anti-Patterns: 0.5/1.0 (Minor: 2 time refs) +✅ Testing: 0.5/0.5 (15/15 tests passing) + +Recommendations: +⚠️ Remove 2 time-sensitive references for 1.0/1.0 on anti-patterns +``` + +## Failure Modes + +### Can't Reach Threshold + +If after 5 iterations score is still < 8.0: + +``` +⚠️ Quality score 7.8 after 5 iterations + +Blocking issues: +- Source documentation lacks code examples +- Framework has limited reference material + +Recommendations: +1. Manual examples needed (auto-generation limited) +2. Consider hybrid approach with custom content +3. Lower quality threshold to 7.5 for this specific case + +Continue with current skill? (y/n) +``` + +### Conflicting Requirements + +``` +⚠️ Conflicting requirements detected + +Issue: Comprehensive coverage (800 lines) vs Conciseness (<500 lines) + +Resolution: Applying progressive disclosure +- Main SKILL.md: 380 lines (overview + quick ref) +- Reference files: 5 files with detailed content +``` + +## Summary + +Quality loops ensure: +1. Every skill scores >= threshold (default 8.0) +2. Anthropic best practices followed +3. Automatic fixes applied +4. Tests pass +5. User sees progress, not complexity diff --git a/skills/skill/references/quickstart.md b/skills/skill/references/quickstart.md new file mode 100644 index 0000000..5b98d8a --- /dev/null +++ b/skills/skill/references/quickstart.md @@ -0,0 +1,513 @@ +# Get started with Agent Skills in the API + +Learn how to use Agent Skills to create documents with the Claude API in under 10 minutes. + +This tutorial shows you how to use Agent Skills to create a PowerPoint presentation. You'll learn how to enable Skills, make a simple request, and access the generated file. + +## Prerequisites + +- Anthropic API key (https://console.anthropic.com/settings/keys) +- Python 3.7+ or curl installed +- Basic familiarity with making API requests + +## What are Agent Skills? + +Pre-built Agent Skills extend Claude's capabilities with specialized expertise for tasks like creating documents, analyzing data, and processing files. Anthropic provides the following pre-built Agent Skills in the API: + +- **PowerPoint (pptx)**: Create and edit presentations +- **Excel (xlsx)**: Create and analyze spreadsheets +- **Word (docx)**: Create and edit documents +- **PDF (pdf)**: Generate PDF documents + +For custom Skills, see the Agent Skills Cookbook at: https://github.com/anthropics/claude-cookbooks/tree/main/skills + +## Step 1: List available Skills + +First, let's see what Skills are available. We'll use the Skills API to list all Anthropic-managed Skills: + +**Python:** +```python +import anthropic + +client = anthropic.Anthropic() + +# List Anthropic-managed Skills +skills = client.beta.skills.list( + source="anthropic", + betas=["skills-2025-10-02"] +) + +for skill in skills.data: + print(f"{skill.id}: {skill.display_title}") +``` + +**TypeScript:** +```typescript +import Anthropic from '@anthropic-ai/sdk'; + +const client = new Anthropic(); + +// List Anthropic-managed Skills +const skills = await client.beta.skills.list({ + source: 'anthropic', + betas: ['skills-2025-10-02'] +}); + +for (const skill of skills.data) { + console.log(`${skill.id}: ${skill.display_title}`); +} +``` + +**Shell:** +```bash +curl "https://api.anthropic.com/v1/skills?source=anthropic" \ + -H "x-api-key: $ANTHROPIC_API_KEY" \ + -H "anthropic-version: 2023-06-01" \ + -H "anthropic-beta: skills-2025-10-02" +``` + +You see the following Skills: `pptx`, `xlsx`, `docx`, and `pdf`. + +This API returns each Skill's metadata: its name and description. Claude loads this metadata at startup to know what Skills are available. This is the first level of **progressive disclosure**, where Claude discovers Skills without loading their full instructions yet. + +## Step 2: Create a presentation + +Now we'll use the PowerPoint Skill to create a presentation about renewable energy. We specify Skills using the `container` parameter in the Messages API: + +**Python:** +```python +import anthropic + +client = anthropic.Anthropic() + +# Create a message with the PowerPoint Skill +response = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=4096, + betas=["code-execution-2025-08-25", "skills-2025-10-02"], + container={ + "skills": [ + { + "type": "anthropic", + "skill_id": "pptx", + "version": "latest" + } + ] + }, + messages=[{ + "role": "user", + "content": "Create a presentation about renewable energy with 5 slides" + }], + tools=[{ + "type": "code_execution_20250825", + "name": "code_execution" + }] +) + +print(response.content) +``` + +**TypeScript:** +```typescript +import Anthropic from '@anthropic-ai/sdk'; + +const client = new Anthropic(); + +// Create a message with the PowerPoint Skill +const response = await client.beta.messages.create({ + model: 'claude-sonnet-4-5-20250929', + max_tokens: 4096, + betas: ['code-execution-2025-08-25', 'skills-2025-10-02'], + container: { + skills: [ + { + type: 'anthropic', + skill_id: 'pptx', + version: 'latest' + } + ] + }, + messages: [{ + role: 'user', + content: 'Create a presentation about renewable energy with 5 slides' + }], + tools: [{ + type: 'code_execution_20250825', + name: 'code_execution' + }] +}); + +console.log(response.content); +``` + +**Shell:** +```bash +curl https://api.anthropic.com/v1/messages \ + -H "x-api-key: $ANTHROPIC_API_KEY" \ + -H "anthropic-version: 2023-06-01" \ + -H "anthropic-beta: code-execution-2025-08-25,skills-2025-10-02" \ + -H "content-type: application/json" \ + -d '{ + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 4096, + "container": { + "skills": [ + { + "type": "anthropic", + "skill_id": "pptx", + "version": "latest" + } + ] + }, + "messages": [{ + "role": "user", + "content": "Create a presentation about renewable energy with 5 slides" + }], + "tools": [{ + "type": "code_execution_20250825", + "name": "code_execution" + }] + }' +``` + +Let's break down what each part does: + +- **`container.skills`**: Specifies which Skills Claude can use +- **`type: "anthropic"`**: Indicates this is an Anthropic-managed Skill +- **`skill_id: "pptx"`**: The PowerPoint Skill identifier +- **`version: "latest"`**: The Skill version set to the most recently published +- **`tools`**: Enables code execution (required for Skills) +- **Beta headers**: `code-execution-2025-08-25` and `skills-2025-10-02` + +When you make this request, Claude automatically matches your task to the relevant Skill. Since you asked for a presentation, Claude determines the PowerPoint Skill is relevant and loads its full instructions: the second level of progressive disclosure. Then Claude executes the Skill's code to create your presentation. + +## Step 3: Download the created file + +The presentation was created in the code execution container and saved as a file. The response includes a file reference with a file ID. Extract the file ID and download it using the Files API: + +**Python:** +```python +# Extract file ID from response +file_id = None +for block in response.content: + if block.type == 'tool_use' and block.name == 'code_execution': + # File ID is in the tool result + for result_block in block.content: + if hasattr(result_block, 'file_id'): + file_id = result_block.file_id + break + +if file_id: + # Download the file + file_content = client.beta.files.download( + file_id=file_id, + betas=["files-api-2025-04-14"] + ) + + # Save to disk + with open("renewable_energy.pptx", "wb") as f: + file_content.write_to_file(f.name) + + print(f"Presentation saved to renewable_energy.pptx") +``` + +**TypeScript:** +```typescript +// Extract file ID from response +let fileId: string | null = null; +for (const block of response.content) { + if (block.type === 'tool_use' && block.name === 'code_execution') { + // File ID is in the tool result + for (const resultBlock of block.content) { + if ('file_id' in resultBlock) { + fileId = resultBlock.file_id; + break; + } + } + } +} + +if (fileId) { + // Download the file + const fileContent = await client.beta.files.download(fileId, { + betas: ['files-api-2025-04-14'] + }); + + // Save to disk + const fs = require('fs'); + fs.writeFileSync('renewable_energy.pptx', Buffer.from(await fileContent.arrayBuffer())); + + console.log('Presentation saved to renewable_energy.pptx'); +} +``` + +**Shell:** +```bash +# Extract file_id from response (using jq) +FILE_ID=$(echo "$RESPONSE" | jq -r '.content[] | select(.type=="tool_use" and .name=="code_execution") | .content[] | select(.file_id) | .file_id') + +# Download the file +curl "https://api.anthropic.com/v1/files/$FILE_ID/content" \ + -H "x-api-key: $ANTHROPIC_API_KEY" \ + -H "anthropic-version: 2023-06-01" \ + -H "anthropic-beta: files-api-2025-04-14" \ + --output renewable_energy.pptx + +echo "Presentation saved to renewable_energy.pptx" +``` + +## Try more examples + +Now that you've created your first document with Skills, try these variations: + +### Create a spreadsheet + +**Python:** +```python +response = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=4096, + betas=["code-execution-2025-08-25", "skills-2025-10-02"], + container={ + "skills": [ + { + "type": "anthropic", + "skill_id": "xlsx", + "version": "latest" + } + ] + }, + messages=[{ + "role": "user", + "content": "Create a quarterly sales tracking spreadsheet with sample data" + }], + tools=[{ + "type": "code_execution_20250825", + "name": "code_execution" + }] +) +``` + +**TypeScript:** +```typescript +const response = await client.beta.messages.create({ + model: 'claude-sonnet-4-5-20250929', + max_tokens: 4096, + betas: ['code-execution-2025-08-25', 'skills-2025-10-02'], + container: { + skills: [ + { + type: 'anthropic', + skill_id: 'xlsx', + version: 'latest' + } + ] + }, + messages: [{ + role: 'user', + content: 'Create a quarterly sales tracking spreadsheet with sample data' + }], + tools: [{ + type: 'code_execution_20250825', + name: 'code_execution' + }] +}); +``` + +**Shell:** +```bash +curl https://api.anthropic.com/v1/messages \ + -H "x-api-key: $ANTHROPIC_API_KEY" \ + -H "anthropic-version: 2023-06-01" \ + -H "anthropic-beta: code-execution-2025-08-25,skills-2025-10-02" \ + -H "content-type: application/json" \ + -d '{ + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 4096, + "container": { + "skills": [ + { + "type": "anthropic", + "skill_id": "xlsx", + "version": "latest" + } + ] + }, + "messages": [{ + "role": "user", + "content": "Create a quarterly sales tracking spreadsheet with sample data" + }], + "tools": [{ + "type": "code_execution_20250825", + "name": "code_execution" + }] + }' +``` + +### Create a Word document + +**Python:** +```python +response = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=4096, + betas=["code-execution-2025-08-25", "skills-2025-10-02"], + container={ + "skills": [ + { + "type": "anthropic", + "skill_id": "docx", + "version": "latest" + } + ] + }, + messages=[{ + "role": "user", + "content": "Write a 2-page report on the benefits of renewable energy" + }], + tools=[{ + "type": "code_execution_20250825", + "name": "code_execution" + }] +) +``` + +**TypeScript:** +```typescript +const response = await client.beta.messages.create({ + model: 'claude-sonnet-4-5-20250929', + max_tokens: 4096, + betas: ['code-execution-2025-08-25', 'skills-2025-10-02'], + container: { + skills: [ + { + type: 'anthropic', + skill_id: 'docx', + version: 'latest' + } + ] + }, + messages: [{ + role: 'user', + content: 'Write a 2-page report on the benefits of renewable energy' + }], + tools: [{ + type: 'code_execution_20250825', + name: 'code_execution' + }] +}); +``` + +**Shell:** +```bash +curl https://api.anthropic.com/v1/messages \ + -H "x-api-key: $ANTHROPIC_API_KEY" \ + -H "anthropic-version: 2023-06-01" \ + -H "anthropic-beta: code-execution-2025-08-25,skills-2025-10-02" \ + -H "content-type: application/json" \ + -d '{ + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 4096, + "container": { + "skills": [ + { + "type": "anthropic", + "skill_id": "docx", + "version": "latest" + } + ] + }, + "messages": [{ + "role": "user", + "content": "Write a 2-page report on the benefits of renewable energy" + }], + "tools": [{ + "type": "code_execution_20250825", + "name": "code_execution" + }] + }' +``` + +### Generate a PDF + +**Python:** +```python +response = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=4096, + betas=["code-execution-2025-08-25", "skills-2025-10-02"], + container={ + "skills": [ + { + "type": "anthropic", + "skill_id": "pdf", + "version": "latest" + } + ] + }, + messages=[{ + "role": "user", + "content": "Generate a PDF invoice template" + }], + tools=[{ + "type": "code_execution_20250825", + "name": "code_execution" + }] +) +``` + +**TypeScript:** +```typescript +const response = await client.beta.messages.create({ + model: 'claude-sonnet-4-5-20250929', + max_tokens: 4096, + betas: ['code-execution-2025-08-25', 'skills-2025-10-02'], + container: { + skills: [ + { + type: 'anthropic', + skill_id: 'pdf', + version: 'latest' + } + ] + }, + messages: [{ + role: 'user', + content: 'Generate a PDF invoice template' + }], + tools: [{ + type: 'code_execution_20250825', + name: 'code_execution' + }] +}); +``` + +**Shell:** +```bash +curl https://api.anthropic.com/v1/messages \ + -H "x-api-key: $ANTHROPIC_API_KEY" \ + -H "anthropic-version: 2023-06-01" \ + -H "anthropic-beta: code-execution-2025-08-25,skills-2025-10-02" \ + -H "content-type: application/json" \ + -d '{ + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 4096, + "container": { + "skills": [ + { + "type": "anthropic", + "skill_id": "pdf", + "version": "latest" + } + ] + }, + "messages": [{ + "role": "user", + "content": "Generate a PDF invoice template" + }], + "tools": [{ + "type": "code_execution_20250825", + "name": "code_execution" + }] + }' +``` diff --git a/skills/skill/references/request-analysis.md b/skills/skill/references/request-analysis.md new file mode 100644 index 0000000..7103008 --- /dev/null +++ b/skills/skill/references/request-analysis.md @@ -0,0 +1,326 @@ +# Request Analysis Logic + +How skill-factory automatically determines the best creation method. + +## Detection Patterns + +### Pattern 1: Documentation-Based (Path A) + +**Triggers:** +- URL mentioned (https://..., http://...) +- "from [site] docs" ("from react.dev", "from docs.rs") +- "latest documentation", "current docs" +- "based on [framework] documentation" +- Documentation site names (MDN, docs.rs, react.dev, etc.) +- Version keywords ("latest", "current", "v2", "newest") + +**Examples:** +``` +"Create a React skill from react.dev" → Path A +"Create Anchor skill with latest docs" → Path A +"Skill for FastAPI based on current documentation" → Path A +``` + +### Pattern 2: GitHub Repository (Path A) + +**Triggers:** +- GitHub URL (github.com/...) +- "from [org]/[repo]" format +- "based on [repo] repository" +- "analyze [repo] and create skill" + +**Examples:** +``` +"Create skill from facebook/react repository" → Path A +"Skill based on coral-xyz/anchor repo" → Path A +``` + +### Pattern 3: PDF/Manual (Path A) + +**Triggers:** +- .pdf extension mentioned +- "PDF", "manual", "handbook" +- File path with .pdf + +**Examples:** +``` +"Create skill from API-manual.pdf" → Path A +"Extract skill from technical handbook PDF" → Path A +``` + +### Pattern 4: Custom Workflow (Path B) + +**Triggers:** +- No documentation source mentioned +- Workflow/process descriptions +- "for [doing X]" without source reference +- Best practices/methodology descriptions +- Company-specific processes + +**Examples:** +``` +"Create skill for debugging Solana transactions" → Path B +"Skill for code review workflows" → Path B +"Create skill for technical writing standards" → Path B +``` + +### Pattern 5: Hybrid (Path C) + +**Triggers:** +- Documentation source + custom requirements +- "docs" AND "plus custom workflows" +- "based on docs but add [X]" +- Multiple sources mentioned + +**Examples:** +``` +"Anchor skill from docs plus debugging workflows" → Path C +"React docs with company coding standards" → Path C +``` + +## Requirement Extraction + +### Quality Keywords + +**"Best practices":** +- Enable strict quality checking +- Minimum score: 8.5 (higher than default 8.0) +- Include anti-patterns section + +**"Latest", "Current", "Newest":** +- Scrape fresh documentation (no cache) +- Check for version mentions +- Prefer official sources + +**"Comprehensive", "Complete", "Detailed":** +- Extended coverage requirement +- More test scenarios +- Reference files for progressive disclosure + +**"Examples", "Code samples", "Patterns":** +- Ensure code examples included +- Extract from documentation +- Generate if needed + +## Decision Algorithm + +```python +def analyze_request(request: str) -> Path: + # Normalize + request_lower = request.lower() + + # Check for documentation sources + has_url = contains_url(request) + has_docs_keyword = any(kw in request_lower + for kw in ["docs", "documentation", "manual"]) + has_version_keyword = any(kw in request_lower + for kw in ["latest", "current", "newest"]) + + # Check for custom workflow indicators + has_workflow_keyword = any(kw in request_lower + for kw in ["workflow", "process", "debugging", + "methodology", "standards"]) + no_source = not (has_url or has_docs_keyword) + + # Decision logic + if (has_url or has_docs_keyword or has_version_keyword) and has_workflow_keyword: + return Path.HYBRID + elif has_url or has_docs_keyword or has_version_keyword: + return Path.AUTOMATED + elif no_source and has_workflow_keyword: + return Path.MANUAL_TDD + else: + # Default: ask for clarification + return Path.CLARIFY + +def extract_requirements(request: str) -> Requirements: + requirements = Requirements() + request_lower = request.lower() + + # Quality level + if "best practices" in request_lower: + requirements.min_quality_score = 8.5 + requirements.include_antipatterns = True + else: + requirements.min_quality_score = 8.0 + + # Coverage + if any(kw in request_lower for kw in ["comprehensive", "complete", "detailed"]): + requirements.coverage_level = "extensive" + requirements.use_progressive_disclosure = True + else: + requirements.coverage_level = "standard" + + # Examples + if any(kw in request_lower for kw in ["examples", "code samples", "patterns"]): + requirements.examples_required = True + requirements.min_examples = 10 + + # Version freshness + if any(kw in request_lower for kw in ["latest", "current", "newest"]): + requirements.use_cache = False + requirements.check_version = True + + return requirements +``` + +## Confidence Scoring + +Each path gets a confidence score: + +```python +def score_path_confidence(request: str) -> Dict[Path, float]: + scores = { + Path.AUTOMATED: 0.0, + Path.MANUAL_TDD: 0.0, + Path.HYBRID: 0.0 + } + + # Automated indicators + if contains_url(request): + scores[Path.AUTOMATED] += 0.8 + if "docs" in request.lower() or "documentation" in request.lower(): + scores[Path.AUTOMATED] += 0.6 + if "github.com" in request: + scores[Path.AUTOMATED] += 0.7 + if ".pdf" in request: + scores[Path.AUTOMATED] += 0.7 + + # Manual TDD indicators + if "workflow" in request.lower(): + scores[Path.MANUAL_TDD] += 0.5 + if "process" in request.lower(): + scores[Path.MANUAL_TDD] += 0.5 + if "custom" in request.lower(): + scores[Path.MANUAL_TDD] += 0.4 + if not (contains_url(request) or "docs" in request.lower()): + scores[Path.MANUAL_TDD] += 0.6 + + # Hybrid indicators + if scores[Path.AUTOMATED] > 0.5 and scores[Path.MANUAL_TDD] > 0.4: + scores[Path.HYBRID] = (scores[Path.AUTOMATED] + scores[Path.MANUAL_TDD]) / 1.5 + + return scores + +def select_path(scores: Dict[Path, float]) -> Path: + # Select highest confidence + max_score = max(scores.values()) + + # Require minimum confidence + if max_score < 0.5: + return Path.CLARIFY # Ask user for more info + + # Return highest scoring path + return max(scores, key=scores.get) +``` + +## Clarification Questions + +If confidence < 0.5, ask for clarification: + +``` +Low confidence - need clarification: + +Do you have a documentation source? + □ Yes, documentation website: ____________ + □ Yes, GitHub repository: ____________ + □ Yes, PDF file at: ____________ + □ No, this is a custom workflow/process + +If custom workflow, briefly describe: +____________________________________________ +``` + +## Source Extraction + +```python +def extract_source(request: str) -> Optional[str]: + # URL pattern + url_match = re.search(r'https?://[^\s]+', request) + if url_match: + return url_match.group(0) + + # GitHub repo pattern (org/repo) + github_match = re.search(r'github\.com/([^/\s]+/[^/\s]+)', request) + if github_match: + return f"https://github.com/{github_match.group(1)}" + + # Documentation site mentions + doc_sites = { + "react.dev": "https://react.dev", + "docs.rs": "https://docs.rs", + "python.org": "https://docs.python.org", + # ... more mappings + } + + for site, url in doc_sites.items(): + if site in request.lower(): + # Try to extract specific package/framework + return resolve_doc_url(site, request) + + # PDF path + pdf_match = re.search(r'[\w/.-]+\.pdf', request) + if pdf_match: + return pdf_match.group(0) + + return None +``` + +## Examples with Analysis + +### Example 1 +``` +Request: "Create a skill for Anchor development with latest docs and best practices" + +Analysis: +- Source: "Anchor" + "latest docs" → docs.rs/anchor-lang +- Requirements: + - latest → use_cache=False + - best practices → min_quality=8.5, include_antipatterns=True +- Path: AUTOMATED +- Confidence: 0.85 + +Execution: Path A (automated scraping) +``` + +### Example 2 +``` +Request: "Create a skill for debugging Solana transaction failures" + +Analysis: +- Source: None detected +- Requirements: + - debugging workflow (custom) +- Path: MANUAL_TDD +- Confidence: 0.75 + +Execution: Path B (manual TDD) +``` + +### Example 3 +``` +Request: "React skill from react.dev plus our company's JSX patterns" + +Analysis: +- Source: "react.dev" → https://react.dev +- Requirements: + - documentation source present + - custom patterns ("our company's") +- Path: HYBRID +- Confidence: 0.80 + +Execution: Path C (scrape react.dev, then add custom patterns) +``` + +### Example 4 +``` +Request: "Create a skill" + +Analysis: +- Source: None +- Requirements: None specific +- Path: CLARIFY +- Confidence: 0.1 + +Action: Ask clarification questions +``` diff --git a/skills/skill/references/skill-seekers-integration.md b/skills/skill/references/skill-seekers-integration.md new file mode 100644 index 0000000..d8107cc --- /dev/null +++ b/skills/skill/references/skill-seekers-integration.md @@ -0,0 +1,389 @@ +# Skill_Seekers Integration Guide + +How skill-factory integrates with Skill_Seekers for automated skill creation. + +## What is Skill_Seekers? + +[Skill_Seekers](https://github.com/yusufkaraaslan/Skill_Seekers) is a Python tool (3,562★) that automatically converts: +- Documentation websites → Claude skills +- GitHub repositories → Claude skills +- PDF files → Claude skills + +**Key features:** +- AST parsing for code analysis +- OCR for scanned PDFs +- Conflict detection (docs vs actual code) +- MCP integration +- 299 passing tests + +## Installation + +### One-Command Install + +```bash +~/Projects/claude-skills/skill-factory/skill/scripts/install-skill-seekers.sh +``` + +### Manual Install + +```bash +# Clone +git clone https://github.com/yusufkaraaslan/Skill_Seekers ~/Skill_Seekers + +# Install dependencies +cd ~/Skill_Seekers +pip install -r requirements.txt + +# Optional: MCP integration +./setup_mcp.sh +``` + +### Verify Installation + +```bash +cd ~/Skill_Seekers +python3 -c "import cli.doc_scraper" && echo "✅ Installed correctly" +``` + +## Usage from skill-factory + +skill-factory automatically uses Skill_Seekers when appropriate. + +**Automatic detection:** +``` +User: "Create React skill from react.dev" + ↓ +skill-factory detects documentation source + ↓ +Automatically runs Skill_Seekers + ↓ +Post-processes output + ↓ +Quality checks + ↓ +Delivers result +``` + +## Integration Points + +### 1. Automatic Installation Check + +Before using Skill_Seekers: +```python +def check_skill_seekers(): + seekers_path = os.environ.get('SKILL_SEEKERS_PATH', f'{HOME}/Skill_Seekers') + + if not os.path.exists(seekers_path): + print("Skill_Seekers not found. Install? (y/n)") + if input().lower() == 'y': + install_skill_seekers() + else: + return False + + # Verify dependencies + try: + subprocess.run( + ['python3', '-c', 'import cli.doc_scraper'], + cwd=seekers_path, + check=True, + capture_output=True + ) + return True + except: + print("Dependencies missing. Installing...") + install_dependencies(seekers_path) + return True +``` + +### 2. Scraping with Optimal Settings + +```python +def scrape_documentation(url: str, skill_name: str): + seekers_path = get_seekers_path() + + # Optimal settings for Claude skills + cmd = [ + 'python3', 'cli/doc_scraper.py', + '--url', url, + '--name', skill_name, + '--async', # 2-3x faster + '--output', f'{seekers_path}/output/{skill_name}' + ] + + # Run with progress monitoring + process = subprocess.Popen( + cmd, + cwd=seekers_path, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT + ) + + for line in process.stdout: + # Show progress to user + print(f" {line.decode().strip()}") + + return f'{seekers_path}/output/{skill_name}' +``` + +### 3. Post-Processing Output + +Skill_Seekers output needs enhancement for Claude compatibility: + +```python +def post_process_skill_seekers_output(output_dir): + skill_path = f'{output_dir}/SKILL.md' + + # Load skill + skill = load_skill(skill_path) + + # Enhancements + enhancements = [] + + # 1. Check frontmatter + if not has_proper_frontmatter(skill): + skill = add_frontmatter(skill) + enhancements.append("Added proper YAML frontmatter") + + # 2. Check description specificity + if is_description_generic(skill): + skill = improve_description(skill) + enhancements.append("Improved description specificity") + + # 3. Check examples + example_count = count_code_blocks(skill) + if example_count < 5: + # Extract more from scraped data + skill = extract_more_examples(skill, output_dir) + enhancements.append(f"Added {count_code_blocks(skill) - example_count} more examples") + + # 4. Apply progressive disclosure if needed + if count_lines(skill) > 500: + skill = apply_progressive_disclosure(skill) + enhancements.append("Applied progressive disclosure") + + # Save enhanced skill + save_skill(skill_path, skill) + + return skill_path, enhancements +``` + +### 4. Quality Scoring + +```python +def quality_check_seekers_output(skill_path): + # Score against Anthropic best practices + score, issues = score_skill(skill_path) + + print(f"📊 Initial quality: {score}/10") + + if score < 8.0: + print(f" ⚠️ Issues: {len(issues)}") + for issue in issues: + print(f" - {issue}") + + return score, issues +``` + +## Supported Documentation Sources + +### Documentation Websites + +**Common frameworks:** +- React: https://react.dev +- Vue: https://vuejs.org +- Django: https://docs.djangoproject.com +- FastAPI: https://fastapi.tiangolo.com +- Rust docs: https://docs.rs/[crate] + +**Usage:** +```python +scrape_documentation('https://react.dev', 'react-development') +``` + +### GitHub Repositories + +**Example:** +```python +scrape_github_repo('facebook/react', 'react-internals') +``` + +Features: +- AST parsing for actual API +- Conflict detection vs docs +- README extraction +- Issues/PR analysis +- CHANGELOG parsing + +### PDF Files + +**Example:** +```python +scrape_pdf('/path/to/manual.pdf', 'api-manual') +``` + +Features: +- Text extraction +- OCR for scanned pages +- Table extraction +- Code block detection +- Image extraction + +## Configuration + +### Environment Variables + +```bash +# Skill_Seekers location +export SKILL_SEEKERS_PATH="$HOME/Skill_Seekers" + +# Cache behavior +export SKILL_SEEKERS_NO_CACHE="true" # For "latest" requests + +# Output location +export SKILL_SEEKERS_OUTPUT="$HOME/.claude/skills" +``` + +### Custom Presets + +Skill_Seekers has presets for common frameworks: +```python +presets = { + 'react': { + 'url': 'https://react.dev', + 'selectors': {'main_content': 'article'}, + 'categories': ['components', 'hooks', 'api'] + }, + 'rust': { + 'url_pattern': 'https://docs.rs/{crate}', + 'type': 'rust_docs' + } + # ... more presets +} +``` + +## Performance + +Typical scraping times: + +| Documentation Size | Sync Mode | Async Mode | +|-------------------|-----------|------------| +| Small (100-500 pages) | 15-30 min | 5-10 min | +| Medium (500-2K pages) | 30-60 min | 10-20 min | +| Large (10K+ pages) | 60-120 min | 20-40 min | + +**Always use `--async` flag** (2-3x faster) + +## Troubleshooting + +### Skill_Seekers Not Found + +```bash +# Check installation +ls ~/Skill_Seekers + +# If missing, install +scripts/install-skill-seekers.sh +``` + +### Dependencies Missing + +```bash +cd ~/Skill_Seekers +pip install -r requirements.txt +``` + +### Python Version Error + +Skill_Seekers requires Python 3.10+: +```bash +python3 --version # Should be 3.10 or higher +``` + +### Scraping Fails + +Check selectors in configuration: +```python +# If default selectors don't work +python3 cli/doc_scraper.py \ + --url https://example.com \ + --name example \ + --selector "main" \ # Custom selector + --async +``` + +## Advanced Features + +### Conflict Detection + +When combining docs + GitHub: +```python +scrape_multi_source({ + 'docs': 'https://react.dev', + 'github': 'facebook/react' +}, 'react-complete') + +# Outputs: +# - Documented APIs +# - Actual code APIs +# - ⚠️ Conflicts highlighted +# - Side-by-side comparison +``` + +### MCP Integration + +If Skill_Seekers MCP is installed: +``` +User (in Claude Code): "Generate React skill from react.dev" + +Claude automatically uses Skill_Seekers MCP server +``` + +## Quality Enhancement Loop + +After Skill_Seekers scraping: + +```python +1. Scrape with Skill_Seekers → Initial skill +2. Quality check → Score: 7.4/10 +3. Apply enhancements → Fix issues +4. Re-check → Score: 8.2/10 ✅ +5. Test with scenarios +6. Deliver +``` + +## When NOT to Use Skill_Seekers + +Don't use for: +- Custom workflows (no docs to scrape) +- Company-specific processes +- Novel methodologies +- Skills requiring original thinking + +Use manual TDD approach instead (Path B). + +## Source + +Integration built on [Skill_Seekers v2.0.0](https://github.com/yusufkaraaslan/Skill_Seekers) +- MIT License +- 3,562 stars +- Active maintenance +- 299 passing tests + +## Quick Reference + +```bash +# Check installation +scripts/check-skill-seekers.sh + +# Install +scripts/install-skill-seekers.sh + +# Scrape documentation +scripts/run-automated.sh + +# Scrape GitHub +scripts/run-github-scrape.sh + +# Scrape PDF +scripts/run-pdf-scrape.sh +``` diff --git a/skills/skill/scripts/check-skill-seekers.sh b/skills/skill/scripts/check-skill-seekers.sh new file mode 100755 index 0000000..a779fee --- /dev/null +++ b/skills/skill/scripts/check-skill-seekers.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Check if Skill_Seekers is installed and working + +SEEKERS_PATH="${SKILL_SEEKERS_PATH:-$HOME/Skill_Seekers}" + +if [ ! -d "$SEEKERS_PATH" ]; then + echo "❌ Skill_Seekers not found at $SEEKERS_PATH" + echo "" + echo "Install with: $( cd "$(dirname "$0")" && pwd )/install-skill-seekers.sh" + exit 1 +fi + +# Check Python +if ! command -v python3 &> /dev/null; then + echo "❌ Python 3 not found" + exit 1 +fi + +# Check dependencies +cd "$SEEKERS_PATH" +if python3 -c "import cli.doc_scraper" 2>/dev/null; then + echo "✅ Skill_Seekers installed and working" + echo " Location: $SEEKERS_PATH" + exit 0 +else + echo "⚠️ Skill_Seekers found but dependencies missing" + echo " Fix: cd $SEEKERS_PATH && pip3 install -r requirements.txt" + exit 1 +fi diff --git a/skills/skill/scripts/install-skill-seekers.sh b/skills/skill/scripts/install-skill-seekers.sh new file mode 100755 index 0000000..b22b396 --- /dev/null +++ b/skills/skill/scripts/install-skill-seekers.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# One-command Skill_Seekers installation for skill-factory + +set -e + +INSTALL_DIR="${SKILL_SEEKERS_PATH:-$HOME/Skill_Seekers}" + +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "Skill_Seekers Installation" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "" +echo "Install location: $INSTALL_DIR" +echo "" + +# Check if already installed +if [ -d "$INSTALL_DIR" ]; then + echo "⚠️ Skill_Seekers already exists at $INSTALL_DIR" + echo "" + echo "Options:" + echo " 1. Update existing installation" + echo " 2. Reinstall (delete and clone fresh)" + echo " 3. Cancel" + echo "" + read -p "Choice (1-3): " choice + + case $choice in + 1) + echo "📥 Updating..." + cd "$INSTALL_DIR" + git pull + ;; + 2) + echo "🗑️ Removing old installation..." + rm -rf "$INSTALL_DIR" + echo "📥 Cloning fresh copy..." + git clone https://github.com/yusufkaraaslan/Skill_Seekers "$INSTALL_DIR" + ;; + 3) + echo "❌ Cancelled" + exit 0 + ;; + *) + echo "Invalid choice" + exit 1 + ;; + esac +else + echo "📥 Cloning Skill_Seekers..." + git clone https://github.com/yusufkaraaslan/Skill_Seekers "$INSTALL_DIR" +fi + +# Install Python dependencies +echo "" +echo "📦 Installing Python dependencies..." +cd "$INSTALL_DIR" + +if command -v pip3 &> /dev/null; then + pip3 install -r requirements.txt +elif command -v pip &> /dev/null; then + pip install -r requirements.txt +else + echo "❌ pip not found. Please install Python 3.10+ with pip" + exit 1 +fi + +# Optional: Setup MCP if Claude Code detected +echo "" +if command -v claude &> /dev/null; then + echo "Claude Code detected." + read -p "Install MCP integration? (y/n): " install_mcp + + if [[ "$install_mcp" =~ ^[Yy]$ ]]; then + if [ -f "./setup_mcp.sh" ]; then + ./setup_mcp.sh + else + echo "⚠️ setup_mcp.sh not found, skipping MCP setup" + fi + fi +fi + +# Verify installation +echo "" +echo "✅ Verifying installation..." +if python3 -c "import cli.doc_scraper" 2>/dev/null; then + echo "✅ Skill_Seekers installed successfully!" +else + echo "⚠️ Installation complete but verification failed" + echo " Try manually: cd $INSTALL_DIR && python3 -c 'import cli.doc_scraper'" + exit 1 +fi + +echo "" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "Installation Complete!" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "" +echo "Set environment variable (optional):" +echo " export SKILL_SEEKERS_PATH=$INSTALL_DIR" +echo "" +echo "Test installation:" +echo " cd $INSTALL_DIR && python3 cli/doc_scraper.py --help" +echo "" +echo "Ready to use in skill-factory!" diff --git a/skills/skill/scripts/quality-check.py b/skills/skill/scripts/quality-check.py new file mode 100755 index 0000000..fb115d4 --- /dev/null +++ b/skills/skill/scripts/quality-check.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python3 +""" +Quality checker for Claude skills based on Anthropic best practices. +Returns score 0-10 and lists issues. +""" + +import sys +import re +from pathlib import Path + + +def check_description(frontmatter): + """Check description quality (2.0 points)""" + desc = frontmatter.get('description', '') + + score = 0.0 + issues = [] + + if not desc: + issues.append("Missing description") + return score, issues + + # Length check + if len(desc) < 50: + issues.append("Description too short (< 50 chars)") + else: + score += 0.5 + + # Specificity check + vague_words = ['helps with', 'tool for', 'useful', 'handles'] + if any(word in desc.lower() for word in vague_words): + issues.append("Description contains vague phrases") + else: + score += 0.5 + + # when_to_use check + if 'when ' in desc.lower() or 'use when' in desc.lower(): + score += 0.5 + else: + issues.append("Description missing 'when to use' guidance") + + # Third person check + if not any(word in desc.lower() for word in ['you', 'your', 'i ', "i'm"]): + score += 0.5 + else: + issues.append("Description should be third person") + + return min(score, 2.0), issues + + +def check_name(frontmatter): + """Check name convention (0.5 points)""" + name = frontmatter.get('name', '') + + score = 0.0 + issues = [] + + if not name: + issues.append("Missing name") + return score, issues + + # Lowercase and hyphens + if name == name.lower() and '-' in name: + score += 0.25 + else: + issues.append("Name should be lowercase-with-hyphens") + + # Not too generic + if name not in ['helper', 'utils', 'tool', 'skill']: + score += 0.25 + else: + issues.append("Name too generic") + + return min(score, 0.5), issues + + +def check_conciseness(content): + """Check length (1.5 points)""" + lines = content.count('\n') + + if lines < 300: + return 1.5, [] + elif lines < 500: + return 1.0, [] + elif lines < 800: + return 0.5, [f"SKILL.md is {lines} lines (recommend <500)"] + else: + return 0.0, [f"SKILL.md is {lines} lines (way over 500 limit)"] + + +def check_examples(content): + """Check for code examples (1.0 points)""" + code_blocks = len(re.findall(r'```[\w]*\n', content)) + + issues = [] + if code_blocks == 0: + issues.append("No code examples found") + return 0.0, issues + elif code_blocks < 3: + issues.append(f"Only {code_blocks} code examples (recommend 5+)") + return 0.5, issues + else: + return 1.0, [] + + +def check_structure(content): + """Check structure (1.0 points)""" + issues = [] + score = 1.0 + + # Required sections + if '## Overview' not in content and '## What' not in content: + issues.append("Missing overview section") + score -= 0.3 + + if '## Usage' not in content and '## How' not in content: + issues.append("Missing usage section") + score -= 0.3 + + # Windows paths check + if '\\' in content and 'C:\\' in content: + issues.append("Contains Windows-style paths (use Unix /)") + score -= 0.4 + + return max(score, 0.0), issues + + +def check_antipatterns(content): + """Check for anti-patterns (1.0 points)""" + issues = [] + score = 1.0 + + # Time-sensitive info + time_patterns = [r'\d{4}-\d{2}-\d{2}', r'last updated', r'as of \d{4}'] + for pattern in time_patterns: + if re.search(pattern, content, re.IGNORECASE): + issues.append("Contains time-sensitive information") + score -= 0.5 + break + + # Inconsistent terminology + if content.count('skill') + content.count('Skill') > 0: + if content.count('plugin') + content.count('Plugin') > 0: + issues.append("Inconsistent terminology (skill vs plugin)") + score -= 0.5 + + return max(score, 0.0), issues + + +def parse_frontmatter(content): + """Extract YAML frontmatter""" + match = re.match(r'^---\s*\n(.*?)\n---\s*\n', content, re.DOTALL) + if not match: + return {} + + fm = {} + for line in match.group(1).split('\n'): + if ':' in line: + key, value = line.split(':', 1) + fm[key.strip()] = value.strip() + + return fm + + +def score_skill(skill_path): + """Score a skill file against Anthropic best practices""" + if not Path(skill_path).exists(): + print(f"Error: {skill_path} not found") + sys.exit(1) + + with open(skill_path, 'r') as f: + content = f.read() + + frontmatter = parse_frontmatter(content) + + total_score = 0.0 + all_issues = [] + + # Run checks + checks = [ + ('Description', check_description, frontmatter), + ('Name', check_name, frontmatter), + ('Conciseness', check_conciseness, content), + ('Examples', check_examples, content), + ('Structure', check_structure, content), + ('Anti-patterns', check_antipatterns, content), + ] + + for name, check_func, arg in checks: + score, issues = check_func(arg) + total_score += score + if issues: + all_issues.extend([f"{name}: {issue}" for issue in issues]) + + # Add partial scores for other criteria (simplified) + total_score += 1.0 # Progressive disclosure (assume good if references/ exists) + total_score += 0.5 # Degree of freedom (assume appropriate) + total_score += 0.5 # Dependencies (assume documented) + total_score += 0.5 # Error handling (assume good) + total_score += 0.5 # Testing (assume some testing) + + return round(total_score, 1), all_issues + + +if __name__ == '__main__': + if len(sys.argv) != 2: + print("Usage: quality-check.py ") + sys.exit(1) + + score, issues = score_skill(sys.argv[1]) + + print(f"{score}/10") + + if issues: + print("\nIssues found:") + for issue in issues: + print(f" - {issue}") + + # Exit code: 0 if >= 8.0, 1 otherwise + sys.exit(0 if score >= 8.0 else 1)