Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:42:37 +08:00
commit 097db974e4
10 changed files with 2407 additions and 0 deletions

View File

@@ -0,0 +1,12 @@
{
"name": "mcp-skill-creator",
"description": "Create workflow-optimized skills from MCP servers. Transform MCP tools into personalized skills with progressive disclosure, parallel execution, and embedded user preferences. Based on Anthropic's MCP + Code Execution best practices.",
"version": "0.0.0-2025.11.28",
"author": {
"name": "Meta-Skills Collection",
"email": "onlrrr@gmail.com"
},
"skills": [
"./skills/mcp-skill-creator"
]
}

3
README.md Normal file
View File

@@ -0,0 +1,3 @@
# mcp-skill-creator
Create workflow-optimized skills from MCP servers. Transform MCP tools into personalized skills with progressive disclosure, parallel execution, and embedded user preferences. Based on Anthropic's MCP + Code Execution best practices.

68
plugin.lock.json Normal file
View File

@@ -0,0 +1,68 @@
{
"$schema": "internal://schemas/plugin.lock.v1.json",
"pluginId": "gh:nemori-ai/skills:mcp-skill-creator",
"normalized": {
"repo": null,
"ref": "refs/tags/v20251128.0",
"commit": "4fe9a796199a264d199fc49b31bce34dbec94653",
"treeHash": "f11b997af2799468ec157fe6e16c4c56e08a75c3c7781d3822ed64858bcbe35a",
"generatedAt": "2025-11-28T10:27:18.244532Z",
"toolVersion": "publish_plugins.py@0.2.0"
},
"origin": {
"remote": "git@github.com:zhongweili/42plugin-data.git",
"branch": "master",
"commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
"repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
},
"manifest": {
"name": "mcp-skill-creator",
"description": "Create workflow-optimized skills from MCP servers. Transform MCP tools into personalized skills with progressive disclosure, parallel execution, and embedded user preferences. Based on Anthropic's MCP + Code Execution best practices."
},
"content": {
"files": [
{
"path": "README.md",
"sha256": "a7e9a24e4a055c659a0857d8437a38fc299b73344659633b61087bbdef79c02b"
},
{
"path": ".claude-plugin/plugin.json",
"sha256": "88b82153dbc5f6516dca023720700ef52a38fc866a080fe25dfe2e32abe8b068"
},
{
"path": "skills/mcp-skill-creator/SKILL.md",
"sha256": "f1a46837885844641245978d6036c8663ade8deb914b8a5d3366802ddacb039f"
},
{
"path": "skills/mcp-skill-creator/LICENSE.txt",
"sha256": "fd6432f2e4aef05eeb89a06d97a65b90cd5b012555f7656d28b7f30b509f0739"
},
{
"path": "skills/mcp-skill-creator/references/example-config.json",
"sha256": "49534d18c7ab8c21985018adc9e94532b0991972842b51c7a7c599f8bcb7da7f"
},
{
"path": "skills/mcp-skill-creator/references/quick-start.md",
"sha256": "94fd4497c98074079ecb009d0960b1b071c41086e3735ca0814b307695ec12b1"
},
{
"path": "skills/mcp-skill-creator/references/mcp-best-practices.md",
"sha256": "47bb07ea3015965efc2ce78857fc6c2007bd2932d9cec567fc9f70087c94fe95"
},
{
"path": "skills/mcp-skill-creator/scripts/mcp_introspector.py",
"sha256": "f0f15f9933c55a9c3d61e0fe2b54c96dccd487d76603b410ac363d5a56fa0ab6"
},
{
"path": "skills/mcp-skill-creator/scripts/generate_mcp_wrappers.py",
"sha256": "b6403c14c1c4b6c5da970e0b85a2a6db27fe3b9055e6fd0443fed79fa340bc0d"
}
],
"dirSha256": "f11b997af2799468ec157fe6e16c4c56e08a75c3c7781d3822ed64858bcbe35a"
},
"security": {
"scannedAt": null,
"scannerVersion": null,
"flags": []
}
}

View File

@@ -0,0 +1,28 @@
MIT License
Copyright (c) 2025 MCP Skill Creator Contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
---
This skill implements concepts from Anthropic's MCP engineering blog post:
https://www.anthropic.com/engineering/code-execution-with-mcp
The MCP (Model Context Protocol) is an open standard developed by Anthropic.

View File

@@ -0,0 +1,672 @@
---
name: mcp-skill-creator
description: Meta-skill for creating workflow-optimized skills from MCP servers. Use when users want to create a custom skill that integrates one or more MCP servers into a specialized workflow. The user provides MCP server configurations and describes their work scenario (workflow, preferences, SOPs), and this skill generates a new skill with optimized scripts following Anthropic's MCP + code execution best practices.
---
# MCP-Powered Skill Creator
This meta-skill creates workflow-optimized skills from MCP servers using code execution patterns inspired by [Anthropic's MCP engineering practices](https://www.anthropic.com/engineering/code-execution-with-mcp).
## Core Concept
Transform MCP servers into specialized, personalized workflow skills by:
1. **Progressive Disclosure**: Generate code APIs for MCP tools, loaded on-demand (not all upfront)
2. **Context Efficiency**: Process data in execution environment, minimize token usage
3. **Workflow Optimization**: Combine MCP calls with parallel execution, filtering, control flow
4. **Personalization**: Embed user preferences, SOPs, and domain knowledge into the skill
## When to Use This Skill
Use this skill when a user wants to:
- Create a custom skill from one or more MCP servers
- Optimize a workflow that involves multiple MCP tool calls
- Build reusable automation scripts for specific work scenarios
- Capture personal SOPs and preferences into a skill
## ⚠️ IMPORTANT: Before You Start
**ALWAYS check and install dependencies FIRST before doing anything else:**
```bash
python3 -c "import mcp; print('✓ MCP SDK is installed')" 2>/dev/null || pip3 install mcp --break-system-packages
```
**Automatic Installation Process:**
1. First, check if MCP SDK is installed
2. If not installed, **automatically install it** using `pip3 install mcp --break-system-packages`
3. Verify installation succeeded before continuing
4. Then proceed with skill creation
**DO NOT ask the user to manually install dependencies** - you should handle this automatically as part of the skill creation process.
**Why this matters**: The introspector and generated scripts require the `mcp` package. Installing it upfront ensures a smooth workflow.
## Skill Creation Process
Follow these steps to create an MCP-powered skill. This process combines programmatic MCP infrastructure generation with LLM-driven skill design, following skill-creator principles.
### Overview of Steps
0. **Prerequisites** - Install required dependencies
1. **Gather Input** - Collect MCP servers and workflow description from user
2. **Generate MCP Infrastructure** - Use scripts to introspect servers and create wrappers (programmatic)
3. **Understand the Workflow** - Analyze user's scenario with concrete examples (LLM-driven)
4. **Plan Skill Contents** - Determine what scripts, references, and guidance to include (LLM-driven)
5. **Implement the Skill** - Write workflow scripts and SKILL.md with embedded preferences (LLM-driven)
6. **Package and Deliver** - Create distributable .skill file
### Step 0: Prerequisites (Automatic)
**You should automatically check and install the MCP SDK if needed:**
```bash
python3 -c "import mcp; print('✓ MCP SDK is installed')" 2>/dev/null || pip3 install mcp --break-system-packages
```
**Process:**
1. Check if MCP SDK is already installed
2. If not, install it automatically with `pip3 install mcp --break-system-packages`
3. Verify installation succeeded
4. Inform the user that dependencies have been installed
**Why needed**: The introspector and generated scripts use the `mcp` package to connect to MCP servers.
**DO NOT ask the user to manually install** - handle this automatically as part of the workflow.
### Step 1: Gather Input
Before starting, collect the following from the user:
**MCP Server Configurations** (required):
```json
{
"mcp_servers": [
{
"name": "server-name",
"command": ["npx", "-y", "@modelcontextprotocol/server-..."]
}
]
}
```
**Workflow Description** (required):
- Clear description of the workflow steps
- Can be numbered list, sequential narrative, or structured steps
- Example: "First I visit the official website, then check ProductHunt, then search Twitter/Reddit, finally generate a report"
**User Preferences** (optional):
- How they like to work
- What data they prioritize
- Quality standards
- Example: "I prefer quantitative metrics over qualitative descriptions"
**Standard Operating Procedures** (optional):
- Company-specific practices
- Domain knowledge
- Best practices
- Example: "Always verify information from at least 3 sources"
If user provides a single configuration file, parse it to extract these components.
### Step 2: Generate MCP Infrastructure (Programmatic)
This step generates the MCP client infrastructure and tool discovery utilities.
#### 2.1 Introspect MCP Servers
Use `scripts/mcp_introspector.py` to discover available tools:
```bash
# Create MCP config file in skill directory
echo '{
"servers": [
{
"name": "filesystem",
"command": ["npx", "-y", "@modelcontextprotocol/server-filesystem", "/path/to/dir"]
}
]
}' > <skill-dir>/mcp_config.json
# Run introspection
python scripts/mcp_introspector.py <skill-dir>/mcp_config.json introspection.json
```
This produces a JSON file with all available tools, their parameters, and descriptions.
#### 2.2 Generate MCP Client and Tool Discovery
Use `scripts/generate_mcp_wrappers.py` to create the infrastructure:
```bash
python scripts/generate_mcp_wrappers.py introspection.json <skill-dir>
```
This creates:
- `scripts/mcp_client.py` - **Working** MCP client with proper connection management
- `scripts/list_mcp_tools.py` - **Dynamic tool discovery** (Progressive Disclosure)
- `scripts/tools/<server>/` - (Optional) Type-safe wrappers for each tool
**Generated structure**:
```
<skill-dir>/
├── mcp_config.json # Server configuration
├── scripts/
│ ├── mcp_client.py # ✅ Working implementation
│ ├── list_mcp_tools.py # 🆕 View tool docs on-demand
│ └── workflows/ # (You'll create these)
│ └── your_workflow.py
```
#### 2.3 How to View MCP Tool Documentation
**Progressive Disclosure** means tools are discovered on-demand, not pre-loaded. Three ways to view docs:
1. **Dynamic Query** (Recommended):
```bash
cd <skill-dir>/scripts
python list_mcp_tools.py
```
Shows all available tools with parameters and descriptions.
2. **Generate Static Reference**:
```bash
python list_mcp_tools.py > references/mcp_tools_reference.txt
```
Save for offline reference.
3. **In SKILL.md**:
List only the most commonly used tools, full docs available via method 1 or 2.
**Key Insight**: You don't need wrapper files for each tool. Just use `call_mcp_tool()` directly:
```python
from mcp_client import call_mcp_tool
result = await call_mcp_tool('filesystem', 'search_files', {
'path': '/path',
'pattern': 'myfile'
})
```
### Step 3: Understand the Workflow (LLM-Driven)
Now analyze the user's workflow description to understand what this skill needs to accomplish. Similar to skill-creator's Step 1.
**Ask clarifying questions if needed**:
- "What specific data sources do you use?"
- "How do you handle cases where data is missing?"
- "What does the final output look like?"
- "Are there any steps that must happen sequentially vs in parallel?"
**Identify workflow characteristics**:
- Which steps are data fetching operations (candidates for parallelization)
- Which steps are data processing (candidates for execution environment filtering)
- Which steps have complex control flow (loops, conditionals, polling)
- What intermediate state needs to be preserved
**Example Analysis**:
User says: "I research products by checking the official site, ProductHunt, Twitter, and Reddit, then create a report"
Analysis:
- 4 data fetch operations (parallel opportunity)
- 1 aggregation step (data filtering opportunity)
- 1 output generation (report creation)
- Sequential dependency: Fetch all → Aggregate → Generate report
### Step 4: Plan Skill Contents (LLM-Driven)
Based on the workflow analysis, determine what to include in the skill. Follow skill-creator principles.
#### 4.1 Decide on Scripts vs Guidance
**Create scripts when**:
- Same code would be rewritten repeatedly
- Complex workflow with multiple MCP calls
- Parallel execution can improve performance
- Data filtering reduces context usage significantly
- Polling/monitoring patterns
**Use text guidance when**:
- Workflow varies significantly each time
- Simple single-tool operations
- User needs flexibility in approach
- Context helps more than code
#### 4.2 Plan Script Structure
For each workflow script to create, determine:
**Script purpose**: What part of the workflow does it handle?
**MCP tools needed**: Which tools from which servers?
**Optimization patterns**:
- Parallel execution: `asyncio.gather()` for independent fetches
- Data filtering: Process in execution environment before returning
- Control flow: Loops, conditionals, error handling
- State persistence: Save intermediate results to filesystem
**Parameters**: What inputs does the script need?
**Output**: What does it return? (Prefer summaries over full data)
#### 4.3 Plan SKILL.md Structure
Determine what goes in SKILL.md:
**Essential**:
- Workflow overview (user's mental model)
- When to use each script
- User preferences (embedded as guidance)
- SOPs (embedded as procedural instructions)
- Available MCP tools (reference, not full docs)
**Optional references/**:
- Detailed MCP tool catalog
- Complex schemas or API documentation
- Additional examples
- Troubleshooting guides
### Step 5: Implement the Skill (LLM-Driven)
Now create the actual skill files. This is where you write code and documentation.
#### 5.1 Write Workflow Scripts
For each planned script, create `scripts/workflows/<script_name>.py`:
**Follow these patterns from Anthropic's MCP best practices**:
**Pattern 1: Parallel Fetch + Aggregate**
```python
async def research_pipeline(product_url: str, product_name: str) -> dict:
"""Complete research workflow with parallel data gathering"""
# Parallel fetch from multiple sources
official_task = google_devtools.fetch_page(product_url)
twitter_task = x_com.search_tweets(f'"{product_name}"')
reddit_task = reddit.search_discussions(product_name)
# Execute concurrently (3x faster than sequential)
official, twitter, reddit = await asyncio.gather(
official_task, twitter_task, reddit_task
)
# Filter and aggregate in execution environment
# (keeps raw data out of context)
key_features = extract_features(official, top_n=10)
sentiment = analyze_sentiment([twitter, reddit])
highlights = extract_highlights(twitter + reddit, top_n=5)
# Return summary (not full data)
return {
'key_features': key_features,
'sentiment': sentiment,
'highlights': highlights,
'source_count': len(twitter) + len(reddit)
}
```
**Pattern 2: Polling/Monitoring**
```python
async def wait_for_deployment(channel: str, keyword: str, timeout: int = 300):
"""Poll Slack channel for deployment completion"""
start = time.time()
while time.time() - start < timeout:
messages = await slack.get_channel_history(channel, limit=10)
if any(keyword in m['text'].lower() for m in messages):
return {'status': 'complete', 'message': messages[0]}
await asyncio.sleep(10)
return {'status': 'timeout'}
```
**Pattern 3: Bulk Processing**
```python
async def sync_contacts(sheet_id: str, crm_object: str):
"""Sync contacts from sheet to CRM (privacy-preserving)"""
# Load data once
contacts = await google_sheets.get_sheet(sheet_id)
# Filter in execution environment (not in context)
valid = [c for c in contacts if validate_email(c['email'])]
# Batch update (PII never enters model context)
results = []
for batch in chunked(valid, batch_size=50):
batch_results = await asyncio.gather(*[
crm.update_record(crm_object, contact)
for contact in batch
])
results.extend(batch_results)
# Return summary only
return {
'processed': len(valid),
'successful': sum(1 for r in results if r['success'])
}
```
**Key Principles for Scripts**:
- Use `async`/`await` for IO-bound MCP calls
- Combine related operations into single scripts
- Filter/aggregate data before returning to model
- Return summaries, not raw data
- Include type hints and docstrings
- Add helper functions for data processing
#### 5.2 Write SKILL.md
Create the SKILL.md following skill-creator structure, with MCP-specific additions.
**YAML Frontmatter**:
```yaml
---
name: <skill-name>
description: <Brief description of workflow + when to use + MCP servers involved>
---
```
**Body Structure**:
```markdown
# [Skill Name]
[Overview of what this skill does]
## Prerequisites
This skill requires the MCP SDK. **The scripts will automatically check and install it if needed.**
If you want to manually verify or install:
```bash
python3 -c "import mcp; print('✓ MCP SDK ready!')" 2>/dev/null || pip3 install mcp --break-system-packages
```
**Why needed**: This skill uses MCP tools to [brief explanation of what MCP servers do]. The workflow scripts require the `mcp` package to connect to MCP servers.
**Note**: When you run any workflow script, it will automatically check for MCP SDK and display a helpful error message if not installed.
## Workflow Overview
[User's workflow steps in their own language]
[USER PREFERENCES - Embedded as guidance]
When using this skill:
- [Preference 1]
- [Preference 2]
[SOPs - Embedded as procedural instructions]
Standard procedure:
1. [SOP step 1]
2. [SOP step 2]
## Quick Start
**Before running workflows, ensure MCP SDK is installed** (see Prerequisites above).
[Simple example of using the main workflow script]
## Available Workflows
### [Primary Workflow Script]
**Use when**: [Scenario]
**Location**: `scripts/workflows/<script>.py`
**Usage**:
```python
from scripts.workflows import workflow_name
result = await workflow_name(params)
```
**Optimizations**:
- Parallel execution of [X] data sources
- Context-efficient data filtering
- [Other optimizations]
[Repeat for other workflow scripts]
## MCP Tools Available
[Brief overview of integrated MCP servers]
### [Server Name]
**Tools**: [Count] available
**Location**: `scripts/tools/[server]/`
**Key tools**: [List 3-5 most relevant]
**Discovery**: Use `ls scripts/tools/[server]/` to see all tools
[Repeat for each server]
## Advanced Usage
[Guidance on combining scripts, customization, etc.]
## Performance Notes
[Context optimization benefits, speedups from parallelization]
```
**Critical**: Embed user preferences and SOPs directly into the workflow guidance, not as separate sections. They should inform HOW to use the skill.
**Example of embedded preferences**:
```markdown
## Workflow Overview
This skill automates product research with the following steps:
1. Official website analysis
2. Community feedback gathering
3. Report generation
**Research approach**: Always prioritize quantitative metrics (user counts, ratings) over qualitative descriptions. Recent information (last 6 months) is valued over older reviews. Cross-reference official claims against community feedback to identify contradictions.
```
#### 5.3 Create References (If Needed)
Only create `references/` files if SKILL.md would exceed 500 lines or if there's detailed reference material that doesn't belong in the main workflow.
Possible reference files:
- `references/mcp_tools.md` - Detailed catalog of all MCP tools
- `references/schemas.md` - Data schemas for APIs
- `references/examples.md` - Additional usage examples
### Step 6: Package and Deliver
Once the skill is complete:
1. **Review the skill structure**
- SKILL.md is clear and under 500 lines
- Scripts are tested (or marked as TODO)
- User preferences are embedded
- MCP tool wrappers are generated
2. **Package the skill**
```bash
python /mnt/skills/public/skill-creator/scripts/package_skill.py <skill-dir>
```
3. **Provide to user**
- Share the .skill file
- Explain key workflows
- Highlight personalized aspects (preferences, SOPs)
## Key Differences from Standard Skill-Creator
This meta-skill extends skill-creator with MCP-specific capabilities:
### 1. MCP Infrastructure (Unique)
**Standard skill-creator**: You manually write scripts or provide instructions
**MCP skill-creator**: Programmatically generates type-safe tool wrappers from MCP servers, enabling progressive disclosure
### 2. Optimization Focus
**Standard skill-creator**: General workflow guidance
**MCP skill-creator**: Specific optimization patterns (parallel execution, data filtering, control flow, privacy)
### 3. Personalization Depth
**Standard skill-creator**: Domain knowledge in references/
**MCP skill-creator**: User preferences and SOPs embedded directly into workflow guidance
## Best Practices
### When to Create Scripts vs Text Guidance
**Create scripts for**:
- End-to-end workflows with 3+ steps
- Operations that benefit from parallelization
- Data processing that should happen in execution environment
- Polling/monitoring patterns
- Bulk operations
**Use text guidance for**:
- Ad-hoc tool usage
- Workflows with high variability
- Simple single-tool operations
- Exploratory tasks
### Embedding User Preferences
❌ **Don't**: Create separate "User Preferences" section
✅ **Do**: Weave preferences into workflow guidance
Example:
```markdown
## Workflow Overview
This skill follows your research methodology:
1. Start with official sources (per your SOP)
2. Gather community feedback in parallel
3. Cross-reference claims (highlighting contradictions as you prefer)
4. Generate report with quantitative metrics emphasized
```
### Optimization Patterns
Always consider these opportunities when analyzing workflows:
**Parallel Execution**: Any independent fetch operations
**Data Filtering**: Processing that reduces data size
**Control Flow**: Loops, conditionals, error handling
**State Persistence**: Long-running or resumable workflows
**Privacy**: Sensitive data that shouldn't enter context
## Example: Product Research Skill
**User Input**:
```
MCP Servers: puppeteer, twitter, reddit
Workflow: Research products by visiting official site, checking ProductHunt,
searching Twitter/Reddit, then creating markdown report
Preferences: Quantitative metrics > qualitative, recent info > old
SOPs: Start with official sources, cross-reference claims, cite sources
```
**Generated Skill**:
`SKILL.md`:
```markdown
---
name: product-research-workflow
description: Automated product research integrating official sources and community platforms
---
# Product Research Workflow
Research internet products efficiently by gathering data from official sources
and community platforms, with emphasis on quantitative metrics and recent information.
## Workflow Overview
This skill implements your standard research process:
1. **Official Source Analysis**: Visit product website and extract key features,
pricing, and positioning (per your SOP: always start with official sources)
2. **Community Intelligence**: Gather feedback from ProductHunt, Twitter, and Reddit
in parallel (optimized for speed)
3. **Cross-Reference**: Identify contradictions between official claims and community
feedback (your preference for critical analysis)
4. **Report Generation**: Create comprehensive markdown report with quantitative
metrics emphasized (ratings, user counts, pricing comparisons)
## Quick Start
```python
from scripts.workflows import product_research_pipeline
report = await product_research_pipeline(
product_url='https://example.com',
product_name='ExampleApp'
)
```
## Available Workflows
### product_research_pipeline
**Use when**: Researching any new internet product or SaaS tool
**Optimizations**:
- 3x faster via parallel social media gathering
- Context-efficient: processes 1000s of posts, returns top 10 insights
- Recent info prioritized (last 6 months)
[... rest of SKILL.md with embedded preferences and SOPs ...]
```
`scripts/workflows/product_research_pipeline.py`:
```python
async def product_research_pipeline(product_url: str, product_name: str):
# Official source
official = await puppeteer.fetch_page(product_url)
# Parallel community research (3x faster)
twitter, reddit, ph = await asyncio.gather(
twitter_mcp.search_tweets(f'"{product_name}"', recent_days=180),
reddit_mcp.search(product_name, time_filter='6months'),
producthunt_mcp.get_product(product_name)
)
# Filter in execution env (user preference: quantitative focus)
metrics = extract_quantitative_metrics(official)
sentiment = calculate_sentiment_score([twitter, reddit, ph])
recent_feedback = filter_recent(twitter + reddit, days=180)
contradictions = find_contradictions(official, recent_feedback)
# Return summary (not raw data)
return {
'official_metrics': metrics,
'sentiment_score': sentiment,
'recent_mention_count': len(recent_feedback),
'contradictions': contradictions[:5],
'top_praise': extract_top_feedback(recent_feedback, 'positive', 3),
'top_complaints': extract_top_feedback(recent_feedback, 'negative', 3)
}
```
## References
For detailed MCP optimization patterns and examples:
- `references/mcp-best-practices.md` - Comprehensive guide to MCP + code execution
- `references/quick-start.md` - Step-by-step tutorial
- `references/example-config.json` - Complete configuration example

View File

@@ -0,0 +1,37 @@
{
"_comment": "Example configuration for creating an MCP-powered skill",
"mcp_servers": [
{
"name": "puppeteer",
"command": ["npx", "-y", "@modelcontextprotocol/server-puppeteer"]
},
{
"name": "twitter",
"command": ["npx", "-y", "@modelcontextprotocol/server-twitter"]
},
{
"name": "reddit",
"command": ["npx", "-y", "@modelcontextprotocol/server-reddit"]
}
],
"workflow_description": "When I research a new internet product:\n\n1. First, I visit the official website and read about their features, pricing, and positioning\n2. Then I search Twitter for recent mentions and community sentiment\n3. I also check Reddit discussions in relevant subreddits like r/SaaS and r/ProductHunt\n4. Finally, I combine all findings into a comprehensive markdown report with:\n - Executive summary\n - Key features and differentiators\n - Pricing analysis\n - Community sentiment\n - Pros and cons from users\n - My recommendations",
"preferences": [
"I prefer to see quantitative metrics when available (user counts, ratings, conversion rates)",
"I value recent information (last 6 months) much more than old reviews",
"I like to highlight contradictions between what the company claims and what users actually experience",
"For pricing, I want to see how it compares to direct competitors"
],
"sop": [
"Always start with official sources to establish ground truth",
"Cross-reference official claims against community feedback",
"Look for red flags like frequent complaints about specific features or missing functionality",
"Prioritize reviews from actual users over promotional content or affiliate marketing",
"Include direct quotes from notable reviews or discussions",
"End report with actionable recommendations based on research"
]
}

View File

@@ -0,0 +1,367 @@
# MCP Code Execution Best Practices
This reference document provides detailed guidance on implementing efficient MCP integrations using code execution patterns, based on [Anthropic's MCP engineering blog post](https://www.anthropic.com/engineering/code-execution-with-mcp).
## Core Principles
### 1. Progressive Disclosure
**Problem**: Loading all MCP tool definitions upfront wastes context window space.
**Solution**: Present tools as code APIs on a filesystem, allowing models to load only what they need.
```
scripts/
├── tools/
│ ├── google-drive/
│ │ ├── getDocument.ts
│ │ ├── listFiles.ts
│ │ └── index.ts
│ └── salesforce/
│ ├── updateRecord.ts
│ └── index.ts
```
**Benefits**:
- Reduces initial context from 150,000 tokens to 2,000 tokens (98.7% reduction)
- Scales to thousands of tools without overwhelming the model
- Tools loaded on-demand as needed
**Implementation**:
```python
# Agent explores filesystem
tools_available = os.listdir('scripts/tools/google-drive/')
# Agent reads only needed tool definitions
with open('scripts/tools/google-drive/getDocument.py') as f:
tool_code = f.read()
```
### 2. Context-Efficient Data Handling
**Problem**: Intermediate results flowing through context window consume excessive tokens.
**Bad Example**:
```python
# Without code execution - all data flows through context
TOOL CALL: gdrive.getSheet(sheetId: 'abc123')
returns 10,000 rows to model
model filters in context
passes filtered data to next tool
```
**Good Example**:
```python
# With code execution - filter in execution environment
sheet_data = await gdrive.getSheet({'sheetId': 'abc123'})
# Filter in execution environment (no context cost)
pending_orders = [
row for row in sheet_data
if row['Status'] == 'pending' and row['Amount'] > 1000
]
# Only return summary to model
print(f"Found {len(pending_orders)} high-value pending orders")
print(pending_orders[:5]) # Show first 5 for review
```
**Benefits**:
- Processes 10,000 rows but only sends 5 to model
- Reduces token usage by 99.5%
- Faster execution, lower costs
### 3. Parallel Execution
**Problem**: Sequential tool calls waste time when operations are independent.
**Bad Example**:
```python
# Sequential execution
twitter_data = await x_com.search_tweets(query)
# Wait for Twitter...
reddit_data = await reddit.search_discussions(query)
# Wait for Reddit...
```
**Good Example**:
```python
# Parallel execution with asyncio.gather()
twitter_task = x_com.search_tweets(query)
reddit_task = reddit.search_discussions(query)
producthunt_task = producthunt.search(query)
# Execute all concurrently
results = await asyncio.gather(
twitter_task,
reddit_task,
producthunt_task
)
twitter_data, reddit_data, ph_data = results
```
**Benefits**:
- 3x faster execution (if all APIs take similar time)
- Better user experience
- Efficient resource utilization
### 4. Complex Control Flow
**Problem**: Implementing loops and conditionals via sequential tool calls is inefficient.
**Bad Example**:
```python
# Agent alternates between tool calls and sleep
TOOL CALL: slack.getMessages()
no deployment message
SLEEP: 5 seconds
TOOL CALL: slack.getMessages()
no deployment message
SLEEP: 5 seconds
# ... repeat many times
```
**Good Example**:
```python
# Implement control flow in code
async def wait_for_deployment(channel: str, timeout: int = 300):
start_time = time.time()
while time.time() - start_time < timeout:
messages = await slack.getChannelHistory(channel, limit=10)
if any('deployment complete' in m['text'].lower() for m in messages):
return {'status': 'success', 'message': messages[0]}
await asyncio.sleep(10)
return {'status': 'timeout'}
```
**Benefits**:
- Single code execution instead of 60+ tool calls
- Faster time to first token
- More reliable error handling
### 5. Privacy-Preserving Operations
**Problem**: Sensitive data flowing through model context raises privacy concerns.
**Solution**: Keep sensitive data in execution environment, only share summaries.
```python
# Load sensitive customer data
customers = await gdrive.getSheet({'sheetId': 'customer_contacts'})
# Process PII in execution environment (never shown to model)
for customer in customers:
await salesforce.updateRecord({
'objectType': 'Lead',
'recordId': customer['salesforce_id'],
'data': {
'Email': customer['email'], # PII stays in execution env
'Phone': customer['phone'], # PII stays in execution env
'Name': customer['name'] # PII stays in execution env
}
})
# Only summary goes to model
print(f"Updated {len(customers)} customer records")
print("✓ All contact information synchronized")
```
**Optional Enhancement**: Tokenize PII automatically in MCP client:
```python
# What model sees (if PII is tokenized):
[
{'email': '[EMAIL_1]', 'phone': '[PHONE_1]', 'name': '[NAME_1]'},
{'email': '[EMAIL_2]', 'phone': '[PHONE_2]', 'name': '[NAME_2]'}
]
# Real data flows Google Sheets → Salesforce without entering model context
```
### 6. State Persistence and Skills
**Problem**: Agents cannot build on previous work without memory.
**Solution**: Use filesystem to persist intermediate results and reusable functions.
**State Persistence**:
```python
# Save intermediate results
import json
intermediate_data = await fetch_and_process()
with open('./workspace/state.json', 'w') as f:
json.dump(intermediate_data, f)
# Later execution picks up where it left off
with open('./workspace/state.json') as f:
state = json.load(f)
```
**Skill Evolution**:
```python
# Save reusable function as a skill
# In ./skills/save-sheet-as-csv.py
import pandas as pd
from scripts.tools import gdrive
async def save_sheet_as_csv(sheet_id: str, output_path: str):
"""
Reusable function to export Google Sheet as CSV
"""
data = await gdrive.getSheet({'sheetId': sheet_id})
df = pd.DataFrame(data)
df.to_csv(output_path, index=False)
return output_path
# Later, in any workflow:
from skills.save_sheet_as_csv import save_sheet_as_csv
csv_path = await save_sheet_as_csv('abc123', './data/export.csv')
```
**Add SKILL.md** to create structured skill:
```markdown
---
name: sheet-csv-exporter
description: Export Google Sheets to CSV format
---
# Sheet CSV Exporter
Provides a reusable function for exporting Google Sheets to CSV files.
## Usage
```python
from skills.save_sheet_as_csv import save_sheet_as_csv
csv_path = await save_sheet_as_csv(
sheet_id='your-sheet-id',
output_path='./output/data.csv'
)
```
```
## Token Usage Comparison
| Approach | Token Usage | Latency | Privacy |
|----------|-------------|---------|---------|
| **Direct Tool Calls** | 150,000+ tokens (all tool definitions loaded) | High (sequential calls) | ⚠️ All data through context |
| **Code Execution with MCP** | 2,000 tokens (load on demand) | Low (parallel execution) | ✅ Data filtered/tokenized |
**Savings**: 98.7% token reduction, 3-5x faster execution
## When to Use Code Execution
✅ **Use code execution when**:
- Working with many MCP tools (>10 tools)
- Processing large datasets (>1000 rows)
- Need parallel API calls
- Workflow involves loops/conditionals
- Privacy concerns with sensitive data
- Building reusable workflows
❌ **Avoid code execution when**:
- Simple single tool call
- Small data amounts
- Quick ad-hoc tasks
- No performance concerns
- Execution environment unavailable
## Implementation Considerations
### Security
- Sandbox execution environment properly
- Limit resource usage (CPU, memory, time)
- Monitor for malicious code patterns
- Validate all inputs
### Error Handling
```python
try:
result = await mcp_tool(params)
except Exception as e:
# Log error
logger.error(f"MCP tool failed: {e}")
# Return graceful fallback
return {'error': str(e), 'status': 'failed'}
```
### Testing
- Test scripts in isolation
- Mock MCP tool responses
- Verify error handling
- Check performance gains
## Examples from Production
### Example 1: Document Processing Pipeline
```python
async def process_contracts(folder_id: str):
"""Process all contracts in a folder"""
# 1. List all files (single MCP call)
files = await gdrive.listFiles({'folderId': folder_id})
# 2. Filter in execution environment
pdf_files = [f for f in files if f['type'] == 'pdf']
# 3. Parallel processing
results = await asyncio.gather(*[
extract_contract_data(f['id'])
for f in pdf_files
])
# 4. Aggregate and save
summary = aggregate_contract_summary(results)
# Only summary to model
return {
'total_contracts': len(pdf_files),
'processed': len(results),
'summary': summary[:500] # Truncate for context
}
```
### Example 2: Social Media Monitoring
```python
async def monitor_brand_mentions(brand: str):
"""Monitor brand across multiple platforms"""
# Parallel fetch from multiple sources
twitter_task = x_com.search_tweets(f'"{brand}"')
reddit_task = reddit.search(brand, subreddits=['technology'])
hn_task = hackernews.search(brand)
mentions = await asyncio.gather(
twitter_task, reddit_task, hn_task
)
# Sentiment analysis in execution environment
sentiment = analyze_sentiment_batch(mentions)
# Filter and aggregate
recent_mentions = filter_last_24h(mentions)
key_insights = extract_key_insights(recent_mentions)
return {
'mention_count': len(recent_mentions),
'sentiment': sentiment,
'key_insights': key_insights,
'platforms': {
'twitter': len(mentions[0]),
'reddit': len(mentions[1]),
'hackernews': len(mentions[2])
}
}
```
## Further Reading
- [MCP Official Documentation](https://modelcontextprotocol.io/)
- [Anthropic MCP Engineering Blog](https://www.anthropic.com/engineering/code-execution-with-mcp)
- [Cloudflare Code Mode](https://blog.cloudflare.com/code-mode/)

View File

@@ -0,0 +1,497 @@
# Quick Start Guide
This guide will help you create your first MCP-powered skill using the mcp-skill-creator meta-skill.
## Prerequisites
1. Python 3.10 or higher
2. MCP SDK installed:
```bash
pip install mcp --break-system-packages
```
3. Node.js (for running MCP servers via npx)
## Overview
Creating an MCP-powered skill is a two-phase process:
**Phase 1 (Programmatic)**: Generate MCP infrastructure using provided scripts
- Introspect MCP servers to discover tools
- Generate type-safe Python wrappers
**Phase 2 (LLM-Driven)**: Create the skill following skill-creator principles
- Analyze workflow and identify optimization opportunities
- Write workflow scripts combining MCP tools
- Embed user preferences and SOPs into SKILL.md
- Package the final skill
## Step-by-Step Process
### Step 1: Gather Input from User
Collect the following information:
**MCP Servers** (required):
```json
{
"mcp_servers": [
{
"name": "puppeteer",
"command": ["npx", "-y", "@modelcontextprotocol/server-puppeteer"]
},
{
"name": "twitter",
"command": ["npx", "-y", "@modelcontextprotocol/server-twitter"]
}
]
}
```
**Workflow Description** (required):
- User's step-by-step workflow
- Can be natural language, numbered list, or sequential narrative
- Example: "First I visit the official website, then check Twitter and Reddit, finally create a report"
**Preferences** (optional):
- "I prefer quantitative metrics over qualitative descriptions"
- "Recent information is more valuable than old"
- "Always cross-reference claims"
**SOPs** (optional):
- "Start with official sources before checking community feedback"
- "Cite all sources in the final report"
- "Highlight contradictions between official and community perspectives"
### Step 2: Generate MCP Infrastructure (Programmatic)
#### 2.1 Create MCP Configuration File
```bash
# Save MCP server config
cat > mcp_config.json << EOF
{
"servers": [
{
"name": "puppeteer",
"command": ["npx", "-y", "@modelcontextprotocol/server-puppeteer"]
},
{
"name": "twitter",
"command": ["npx", "-y", "@modelcontextprotocol/server-twitter"]
}
]
}
EOF
```
#### 2.2 Introspect MCP Servers
```bash
python scripts/mcp_introspector.py mcp_config.json introspection.json
```
This discovers all available tools from the MCP servers and saves them to `introspection.json`.
#### 2.3 Generate Tool Wrappers
```bash
mkdir -p my-skill/scripts
python scripts/generate_mcp_wrappers.py introspection.json my-skill
```
This creates:
- `my-skill/scripts/mcp_client.py` - Base MCP client
- `my-skill/scripts/tools/<server>/<tool>.py` - Type-safe wrappers for each tool
- `my-skill/scripts/tools/<server>/__init__.py` - Package initialization
**Generated structure**:
```
my-skill/
└── scripts/
├── mcp_client.py
└── tools/
├── puppeteer/
│ ├── __init__.py
│ ├── fetch_page.py
│ └── screenshot.py
└── twitter/
├── __init__.py
└── search_tweets.py
```
### Step 3: Analyze Workflow (LLM-Driven)
Now use Claude to analyze the user's workflow description:
**Ask yourself**:
- What are the distinct workflow steps?
- Which steps are data fetching (candidates for parallelization)?
- Which steps are data processing (candidates for execution environment filtering)?
- Are there any loops, conditionals, or polling patterns?
- What intermediate state needs to be preserved?
**Example Analysis**:
User workflow: "Research products by checking official site, Twitter, Reddit, then create report"
Analysis:
- **Step 1**: Fetch official website (data fetch)
- **Step 2**: Search Twitter (data fetch - can parallelize with step 3)
- **Step 3**: Search Reddit (data fetch - can parallelize with step 2)
- **Step 4**: Aggregate data (data processing - filter in execution env)
- **Step 5**: Generate report (output)
Optimization opportunities:
- Parallel execution: Steps 2-3
- Data filtering: Step 4 (process 1000s of posts, return top 10)
- Context efficiency: Return summary, not raw data
### Step 4: Plan Skill Contents (LLM-Driven)
Based on the workflow analysis, decide what to include:
**Should you create a script?**
- ✅ Yes: Multi-step workflow, parallel execution opportunities, data filtering needed
- ❌ No: Single tool call, high variability, exploratory task
**For each script, plan**:
- Purpose and scope
- Which MCP tools it uses
- Optimization patterns (parallel, filtering, control flow)
- Parameters and return value
**Example Plan**:
Script: `scripts/workflows/product_research_pipeline.py`
- Purpose: Complete product research workflow
- MCP tools: puppeteer.fetch_page, twitter.search_tweets, reddit.search_discussions
- Optimizations:
- Parallel execution of Twitter + Reddit searches
- Data filtering: Extract top 10 insights from 1000+ posts
- Return summary, not raw data
- Parameters: product_url, product_name
- Returns: {features, sentiment, highlights}
### Step 5: Implement the Skill (LLM-Driven)
#### 5.1 Create Workflow Scripts
Write `my-skill/scripts/workflows/product_research_pipeline.py`:
```python
import asyncio
from scripts.tools import puppeteer, twitter, reddit
async def product_research_pipeline(product_url: str, product_name: str):
"""
Complete product research workflow
Optimizations:
- Parallel Twitter + Reddit searches (2x faster)
- Data filtering in execution environment
- Returns summary, not raw data
"""
# Fetch official website
official = await puppeteer.fetch_page(product_url)
# Parallel social media research
twitter_data, reddit_data = await asyncio.gather(
twitter.search_tweets(f'"{product_name}"'),
reddit.search_discussions(product_name, subreddits=['SaaS'])
)
# Process in execution environment (not in context)
key_features = extract_features(official, top_n=10)
sentiment = analyze_sentiment([twitter_data, reddit_data])
highlights = extract_highlights(twitter_data + reddit_data, top_n=5)
# Return summary only
return {
'key_features': key_features,
'sentiment': sentiment,
'highlights': highlights,
'mention_count': len(twitter_data) + len(reddit_data)
}
def extract_features(html: str, top_n: int) -> list:
"""Extract key features from website HTML"""
# TODO: Implement feature extraction logic
return []
def analyze_sentiment(social_data: list) -> dict:
"""Analyze sentiment from social media posts"""
# TODO: Implement sentiment analysis
return {'score': 0, 'summary': ''}
def extract_highlights(posts: list, top_n: int) -> list:
"""Extract most relevant highlights from posts"""
# TODO: Implement highlight extraction
return []
```
**Key principles**:
- Use `async`/`await` for IO-bound operations
- Combine related MCP calls into single scripts
- Filter/aggregate data in execution environment
- Return summaries, not raw data
- Include helper functions for data processing
#### 5.2 Write SKILL.md
Create `my-skill/SKILL.md` embedding user preferences and SOPs:
```markdown
---
name: product-research-workflow
description: Automated product research integrating official sources and social platforms with emphasis on quantitative metrics and recent information
---
# Product Research Workflow
Efficiently research internet products by gathering data from official sources
and social platforms, following your standard research methodology.
## Workflow Overview
This skill implements your research process with built-in optimizations:
1. **Official Source Analysis**: Visit product website to extract key features
and positioning (your SOP: always start with official sources)
2. **Social Intelligence Gathering**: Search Twitter and Reddit in parallel
for community feedback (optimized: 2x faster than sequential)
3. **Cross-Reference Analysis**: Identify contradictions between official claims
and community feedback (your preference: highlight discrepancies)
4. **Report Generation**: Create comprehensive report emphasizing quantitative
metrics like ratings and user counts (your preference: quant > qual)
## Quick Start
```python
from scripts.workflows import product_research_pipeline
report = await product_research_pipeline(
product_url='https://example.com',
product_name='ExampleApp'
)
```
## Available Workflows
### product_research_pipeline
**Use when**: Researching any new internet product or SaaS tool
**Location**: `scripts/workflows/product_research_pipeline.py`
**Optimizations**:
- 2x faster via parallel social media gathering
- Context-efficient: processes 1000s of posts, returns top 10 insights
- Recent info prioritized (aligns with your preference)
**Usage**:
```python
from scripts.workflows import product_research_pipeline
result = await product_research_pipeline(
product_url='https://product.com',
product_name='ProductName'
)
# Result structure:
# {
# 'key_features': ['Feature 1', 'Feature 2', ...],
# 'sentiment': {'score': 0.75, 'summary': '...'},
# 'highlights': ['Highlight 1', ...],
# 'mention_count': 247
# }
```
## MCP Tools Available
### puppeteer
**Tools**: 5 available
**Location**: `scripts/tools/puppeteer/`
**Key tools**: fetch_page, screenshot, pdf_export
**Discovery**: Use `ls scripts/tools/puppeteer/` to see all tools
### twitter
**Tools**: 3 available
**Location**: `scripts/tools/twitter/`
**Key tools**: search_tweets, get_user_timeline
**Discovery**: Use `ls scripts/tools/twitter/` to see all tools
## Performance Notes
- **Token reduction**: 98.7% fewer tokens vs loading all tools upfront
- **Speed**: 2x faster via parallel execution
- **Context efficiency**: Processes large datasets, returns summaries
## Advanced Usage
For custom workflows, combine individual MCP tools:
```python
from scripts.tools import puppeteer, twitter
# Custom combination
official_data = await puppeteer.fetch_page(url)
tweets = await twitter.search_tweets(query)
# Your own processing logic...
```
```
**Critical**: Notice how preferences and SOPs are embedded into the workflow description, not as separate sections.
### Step 6: Package and Deliver
Once the skill is complete:
```bash
python /mnt/skills/public/skill-creator/scripts/package_skill.py my-skill
```
This creates `my-skill.skill` file ready for distribution.
## Complete Example
Let's create a product research skill from start to finish:
### 1. Gather Input
User provides:
```
MCP Servers: puppeteer, twitter
Workflow: "Visit official site, check Twitter, create report"
Preferences: "Quantitative metrics preferred, recent info valued"
SOPs: "Always start with official sources"
```
### 2. Generate Infrastructure
```bash
# Create config
cat > mcp_config.json << 'EOF'
{
"servers": [
{"name": "puppeteer", "command": ["npx", "-y", "@modelcontextprotocol/server-puppeteer"]},
{"name": "twitter", "command": ["npx", "-y", "@modelcontextprotocol/server-twitter"]}
]
}
EOF
# Introspect
python scripts/mcp_introspector.py mcp_config.json introspection.json
# Generate wrappers
mkdir -p product-research-skill
python scripts/generate_mcp_wrappers.py introspection.json product-research-skill
```
### 3-5. Create Skill (LLM)
Claude analyzes workflow, creates workflow script, writes SKILL.md with embedded preferences.
### 6. Package
```bash
python /mnt/skills/public/skill-creator/scripts/package_skill.py product-research-skill
```
Done! You now have `product-research-skill.skill`.
## Tips for Success
### Writing Good Workflow Descriptions
✅ **Good**:
- "First I visit the official website, then check Twitter and Reddit, finally create a report"
- Numbered steps with clear actions
- Mentions data sources explicitly
❌ **Bad**:
- "I research products" (too vague)
- No clear sequence
- Missing data sources
### Embedding Preferences Effectively
✅ **Good**: Weave into workflow guidance
```markdown
This skill gathers quantitative metrics (ratings, user counts) from multiple
sources, prioritizing recent information over older reviews.
```
❌ **Bad**: Separate section
```markdown
## User Preferences
- Likes quantitative metrics
- Prefers recent info
```
### When to Create Scripts vs Guidance
**Create Scripts**:
- 3+ step workflows
- Parallel execution opportunities
- Data filtering needs
- Repeated code patterns
**Use Text Guidance**:
- Ad-hoc exploration
- High variability
- Simple tool usage
- Flexibility needed
## Troubleshooting
### MCP Connection Errors
```bash
# Test MCP server manually
npx -y @modelcontextprotocol/server-puppeteer
# Check output for errors
```
### Import Errors
```bash
# Ensure MCP SDK installed
pip install mcp --break-system-packages
# Verify Python path
export PYTHONPATH="${PYTHONPATH}:$(pwd)/my-skill"
```
### Generated Code Issues
- Review generated wrappers in `scripts/tools/`
- Customize as needed (they're templates)
- Test scripts before packaging
## Next Steps
- Read `references/mcp-best-practices.md` for advanced patterns
- Explore `references/example-config.json` for complete examples
- Customize generated scripts for your specific needs
- Build a library of workflow skills!
## Getting Help
- MCP best practices: `references/mcp-best-practices.md`
- [MCP Documentation](https://modelcontextprotocol.io/)
- [Anthropic's MCP Blog](https://www.anthropic.com/engineering/code-execution-with-mcp)

View File

@@ -0,0 +1,387 @@
#!/usr/bin/env python3
"""
MCP Wrapper Generator
Creates type-safe Python wrappers for MCP tools based on their JSON Schema definitions.
Usage:
python generate_mcp_wrappers.py <introspection.json> <output_dir>
"""
import json
import sys
from pathlib import Path
from typing import Any
def json_schema_to_python_type(schema: dict) -> str:
"""Convert JSON Schema type to Python type hint"""
if not schema or 'type' not in schema:
return 'Any'
type_map = {
'string': 'str',
'integer': 'int',
'number': 'float',
'boolean': 'bool',
'array': 'list',
'object': 'dict',
}
schema_type = schema.get('type')
if schema_type in type_map:
python_type = type_map[schema_type]
# Handle arrays with item types
if schema_type == 'array' and 'items' in schema:
item_type = json_schema_to_python_type(schema['items'])
return f'list[{item_type}]'
return python_type
return 'Any'
def sanitize_name(name: str) -> str:
"""Sanitize tool name for Python function name"""
# Replace hyphens and dots with underscores
return name.replace('-', '_').replace('.', '_')
def generate_wrapper_function(tool: dict, server_name: str) -> str:
"""Generate Python wrapper function for a single MCP tool"""
name = sanitize_name(tool['name'])
original_name = tool['name']
description = tool.get('description', 'No description available')
input_schema = tool.get('input_schema', {})
# Parse parameters
properties = input_schema.get('properties', {})
required = input_schema.get('required', [])
# Build parameter list
params = []
param_docs = []
for param_name, param_schema in properties.items():
python_type = json_schema_to_python_type(param_schema)
param_desc = param_schema.get('description', 'No description')
if param_name in required:
params.append(f"{param_name}: {python_type}")
else:
params.append(f"{param_name}: {python_type} | None = None")
param_docs.append(f" {param_name}: {param_desc}")
params_str = ', '.join(params)
param_docs_str = '\n'.join(param_docs) if param_docs else ' No parameters'
# Generate function
function_code = f'''async def {name}({params_str}) -> dict:
"""
{description}
Args:
{param_docs_str}
Returns:
dict: Result from MCP tool call
"""
from .mcp_client import call_mcp_tool
params = {{}}
'''
# Add parameter packing code
for param_name in properties.keys():
function_code += f''' if {param_name} is not None:
params['{param_name}'] = {param_name}
'''
function_code += f'''
return await call_mcp_tool('{server_name}', '{original_name}', params)
'''
return function_code
def generate_mcp_client(output_dir: Path):
"""Generate the base MCP client module with working implementation"""
client_code = '''"""
Base MCP Client
Provides foundational infrastructure for calling MCP tools.
Progressive disclosure: Load tool definitions on-demand using list_mcp_tools.py
"""
import asyncio
import json
import sys
from pathlib import Path
from typing import Any
# Check for MCP SDK and provide helpful error message
try:
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
except ImportError:
print("\\n" + "="*70)
print("❌ ERROR: MCP SDK not installed")
print("="*70)
print("\\nThis skill requires the MCP SDK to connect to MCP servers.")
print("\\n📦 Install it with:")
print("\\n pip3 install mcp --break-system-packages")
print("\\n✓ Verify installation:")
print("\\n python3 -c \\"import mcp; print('MCP SDK ready!')\\"")
print("\\n💡 See SKILL.md Prerequisites section for more details.")
print("\\n" + "="*70 + "\\n")
sys.exit(1)
def load_mcp_config():
"""Load MCP server configuration from config file"""
# Look for config in skill directory
skill_dir = Path(__file__).parent.parent
config_path = skill_dir / 'mcp_config.json'
if config_path.exists():
with open(config_path) as f:
config = json.load(f)
return {s['name']: s for s in config.get('servers', [])}
return {}
MCP_SERVER_CONFIG = load_mcp_config()
async def call_mcp_tool(server_name: str, tool_name: str, params: dict) -> Any:
"""
Call an MCP tool
Args:
server_name: Name of the MCP server
tool_name: Name of the tool to call
params: Parameters to pass to the tool
Returns:
Result from the MCP tool
"""
if server_name not in MCP_SERVER_CONFIG:
raise ValueError(
f"MCP server '{server_name}' not configured. "
f"Please add it to mcp_config.json"
)
server_config = MCP_SERVER_CONFIG[server_name]
server_command = server_config['command']
server_params = StdioServerParameters(
command=server_command[0],
args=server_command[1:] if len(server_command) > 1 else [],
)
# Use context manager properly - create new connection for each call
# This is simpler and avoids connection management issues
async with stdio_client(server_params) as (read, write):
async with ClientSession(read, write) as session:
await session.initialize()
result = await session.call_tool(tool_name, params)
# Extract content from result
if hasattr(result, 'content') and result.content:
# Return the first content item's text
return result.content[0].text if result.content[0].text else result.content[0]
return result
'''
client_path = output_dir / 'scripts' / 'mcp_client.py'
client_path.parent.mkdir(parents=True, exist_ok=True)
client_path.write_text(client_code)
print(f" Generated: {client_path}")
def generate_server_wrappers(server_name: str, tools: list[dict], output_dir: Path):
"""Generate wrapper files for all tools in a server"""
server_dir = output_dir / 'scripts' / 'tools' / server_name
server_dir.mkdir(parents=True, exist_ok=True)
print(f"\nGenerating wrappers for '{server_name}' ({len(tools)} tools)...")
# Generate __init__.py
init_imports = []
for tool in tools:
func_name = sanitize_name(tool['name'])
init_imports.append(f"from .{func_name} import {func_name}")
init_code = '\n'.join(init_imports) + '\n\n__all__ = [' + ', '.join(f"'{sanitize_name(t['name'])}'" for t in tools) + ']\n'
(server_dir / '__init__.py').write_text(init_code)
# Generate individual tool files
for tool in tools:
func_name = sanitize_name(tool['name'])
tool_code = generate_wrapper_function(tool, server_name)
(server_dir / f"{func_name}.py").write_text(tool_code)
print(f" ✓ Generated wrappers in: {server_dir}")
def generate_list_tools_script(output_dir: Path):
"""Generate script to list all MCP tools dynamically (Progressive Disclosure)"""
script_code = '''"""
List all available MCP tools from configured servers
This implements Progressive Disclosure - tools are discovered on-demand
rather than pre-loading all definitions into context.
"""
import asyncio
import json
import sys
from pathlib import Path
# Check for MCP SDK and provide helpful error message
try:
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
except ImportError:
print("\\n" + "="*70)
print("❌ ERROR: MCP SDK not installed")
print("="*70)
print("\\nThis skill requires the MCP SDK to list available tools.")
print("\\n📦 Install it with:")
print("\\n pip3 install mcp --break-system-packages")
print("\\n✓ Verify installation:")
print("\\n python3 -c \\"import mcp; print('MCP SDK ready!')\\"")
print("\\n💡 See SKILL.md Prerequisites section for more details.")
print("\\n" + "="*70 + "\\n")
sys.exit(1)
async def list_server_tools(server_name: str, server_command: list):
"""List all tools from an MCP server"""
server_params = StdioServerParameters(
command=server_command[0],
args=server_command[1:] if len(server_command) > 1 else [],
)
async with stdio_client(server_params) as (read, write):
async with ClientSession(read, write) as session:
await session.initialize()
# List all tools
tools_result = await session.list_tools()
return [
{
'name': tool.name,
'description': tool.description,
'input_schema': tool.inputSchema
}
for tool in tools_result.tools
]
async def main():
# Load config
skill_dir = Path(__file__).parent.parent
config_path = skill_dir / 'mcp_config.json'
with open(config_path) as f:
config = json.load(f)
print("="*70)
print("MCP TOOLS REFERENCE")
print("="*70)
for server in config['servers']:
server_name = server['name']
server_command = server['command']
print(f"\\n📦 Server: {server_name}")
print("-"*70)
try:
tools = await list_server_tools(server_name, server_command)
print(f"Total tools: {len(tools)}\\n")
for tool in tools:
print(f"🔧 {tool['name']}")
print(f" Description: {tool['description']}")
# Show parameters
schema = tool.get('input_schema', {})
properties = schema.get('properties', {})
required = schema.get('required', [])
if properties:
print(f" Parameters:")
for param_name, param_info in properties.items():
param_type = param_info.get('type', 'unknown')
param_desc = param_info.get('description', 'No description')
req = " (required)" if param_name in required else " (optional)"
print(f" - {param_name}: {param_type}{req}")
if param_desc != 'No description':
print(f" {param_desc}")
print()
except Exception as e:
print(f"✗ Error: {e}\\n")
if __name__ == '__main__':
asyncio.run(main())
'''
script_path = output_dir / 'scripts' / 'list_mcp_tools.py'
script_path.parent.mkdir(parents=True, exist_ok=True)
script_path.write_text(script_code)
print(f" Generated: {script_path}")
def main():
if len(sys.argv) < 3:
print("Usage: generate_mcp_wrappers.py <introspection.json> <output_dir>")
print("\nGenerates Python wrapper code for MCP tools based on introspection results.")
sys.exit(1)
introspection_file = sys.argv[1]
output_dir = Path(sys.argv[2])
# Load introspection data
print(f"Loading introspection data from {introspection_file}")
with open(introspection_file) as f:
introspection_data = json.load(f)
# Generate base MCP client
print("\nGenerating base MCP client...")
generate_mcp_client(output_dir)
# Generate tool listing script (Progressive Disclosure)
print("\nGenerating list_mcp_tools.py...")
generate_list_tools_script(output_dir)
# Generate wrappers for each server
for server_name, server_data in introspection_data.items():
if server_data.get('status') == 'success':
generate_server_wrappers(
server_name,
server_data['tools'],
output_dir
)
else:
print(f"\n✗ Skipping '{server_name}' (introspection failed)")
print("\n✓ Wrapper generation complete!")
print(f"\nGenerated files in: {output_dir / 'scripts'}")
print(" - mcp_client.py: Base client infrastructure (working implementation)")
print(" - list_mcp_tools.py: Dynamic tool discovery (Progressive Disclosure)")
print(" - tools/<server>/*.py: Type-safe tool wrappers (optional)")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,336 @@
#!/usr/bin/env python3
"""
MCP Server Introspector
Connects to MCP servers and extracts tool definitions,
generating structured data for skill creation.
Usage:
python mcp_introspector.py config.json output.json
"""
import asyncio
import json
import subprocess
import sys
from pathlib import Path
from typing import Any, Optional
# Try to import MCP SDK, provide helpful error if not available
try:
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
except ImportError:
print("\n" + "="*70)
print("❌ ERROR: MCP SDK not installed")
print("="*70)
print("\nThe MCP SDK is required to introspect MCP servers.")
print("\n📦 Install it with:")
print("\n pip3 install mcp --break-system-packages")
print("\n✓ Verify installation:")
print("\n python3 -c \"import mcp; print('MCP SDK ready!')\"")
print("\n" + "="*70 + "\n")
sys.exit(1)
def check_command_available(command: str) -> bool:
"""Check if a command is available in PATH"""
try:
subprocess.run([command, "--version"], capture_output=True, check=False)
return True
except FileNotFoundError:
return False
def extract_npm_package(server_command: list[str]) -> Optional[str]:
"""
Extract npm package name from MCP server command.
Examples:
["npx", "-y", "@modelcontextprotocol/server-filesystem", ...]
-> "@modelcontextprotocol/server-filesystem"
"""
if not server_command or server_command[0] not in ["npx", "npm"]:
return None
# Find the package name (first argument that starts with @ or is not a flag)
for arg in server_command[1:]:
if not arg.startswith("-"):
return arg
return None
def install_npm_package(package_name: str) -> bool:
"""
Install npm package globally.
Args:
package_name: npm package to install (e.g., "@modelcontextprotocol/server-filesystem")
Returns:
True if installation succeeded
"""
print(f"📦 Installing MCP server: {package_name}")
try:
# Try global install first
result = subprocess.run(
["npm", "install", "-g", package_name],
capture_output=True,
text=True,
timeout=120
)
if result.returncode == 0:
print(f" ✓ Installed {package_name} globally")
return True
else:
# If global install fails, try local install
print(f" ⚠️ Global install failed, trying local install...")
result = subprocess.run(
["npm", "install", package_name],
capture_output=True,
text=True,
timeout=120
)
if result.returncode == 0:
print(f" ✓ Installed {package_name} locally")
return True
else:
print(f" ✗ Failed to install {package_name}")
print(f" Error: {result.stderr}")
return False
except subprocess.TimeoutExpired:
print(f" ✗ Installation timed out (>120s)")
return False
except Exception as e:
print(f" ✗ Installation error: {e}")
return False
def ensure_mcp_server_installed(server_name: str, server_command: list[str]) -> bool:
"""
Ensure MCP server is installed before introspection.
Returns:
True if server is ready (already installed or successfully installed)
"""
# Check if npm/npx is available
if not check_command_available("npm"):
print("⚠️ npm not found. Skipping automatic installation.")
print(" The server command will be tried as-is.")
return True # Continue anyway, might work
# Extract package name
package_name = extract_npm_package(server_command)
if not package_name:
# Not an npm-based server, skip
return True
print(f"Checking MCP server: {package_name}")
# Check if already installed globally
try:
result = subprocess.run(
["npm", "list", "-g", package_name],
capture_output=True,
text=True,
timeout=10
)
if result.returncode == 0:
print(f" ✓ Already installed globally")
return True
except:
pass
# Check if installed locally
try:
result = subprocess.run(
["npm", "list", package_name],
capture_output=True,
text=True,
timeout=10
)
if result.returncode == 0:
print(f" ✓ Already installed locally")
return True
except:
pass
# Not installed, attempt installation
print(f" ⚠️ Not installed, installing now...")
return install_npm_package(package_name)
async def introspect_mcp_server(server_name: str, server_command: list[str]) -> dict:
"""
Connect to an MCP server and list all available tools
Args:
server_name: Name of the MCP server
server_command: Command to start MCP server (e.g., ['npx', '-y', '@modelcontextprotocol/server-filesystem', '/tmp'])
Returns:
Dictionary with server capabilities and tool definitions
"""
print(f"Introspecting MCP server: {server_name}")
try:
server_params = StdioServerParameters(
command=server_command[0],
args=server_command[1:] if len(server_command) > 1 else [],
)
async with stdio_client(server_params) as (read, write):
async with ClientSession(read, write) as session:
await session.initialize()
# List all tools
tools_result = await session.list_tools()
tools = []
for tool in tools_result.tools:
tools.append({
'name': tool.name,
'description': tool.description,
'input_schema': tool.inputSchema
})
print(f" ✓ Found {len(tools)} tools in {server_name}")
return {
'server_name': server_name,
'server_command': server_command,
'tool_count': len(tools),
'tools': tools,
'status': 'success'
}
except Exception as e:
error_msg = str(e)
print(f" ✗ Error introspecting {server_name}: {error_msg}")
# Provide helpful hints for common errors
if "npm" in error_msg.lower() or "npx" in error_msg.lower():
print("\n💡 Troubleshooting npm/npx errors:")
print(" 1. Check network connection (can you access npmjs.org?)")
print(" 2. Try running the MCP server command manually:")
print(f" {' '.join(server_command)}")
print(" 3. Check npm configuration: cat ~/.npmrc")
print(" 4. If behind a proxy, configure npm proxy settings")
print(" 5. Try clearing npm cache: npm cache clean --force")
elif "connection" in error_msg.lower() or "timeout" in error_msg.lower():
print("\n💡 Connection issue - the MCP server may not be responding.")
print(" 1. Verify the server command is correct")
print(" 2. Check if the server requires authentication or additional setup")
print(" 3. Try running the command manually to see detailed error")
return {
'server_name': server_name,
'server_command': server_command,
'status': 'error',
'error': error_msg
}
async def introspect_all_servers(server_configs: list[dict]) -> dict:
"""
Introspect multiple MCP servers
Args:
server_configs: List of {name, command} dicts
Returns:
Dictionary mapping server names to their capabilities
"""
results = {}
for config in server_configs:
name = config['name']
command = config['command']
# Ensure MCP server is installed before introspection
if not ensure_mcp_server_installed(name, command):
print(f" ⚠️ Skipping {name} - installation failed")
results[name] = {
'server_name': name,
'server_command': command,
'status': 'error',
'error': 'Failed to install MCP server'
}
continue
result = await introspect_mcp_server(name, command)
results[name] = result
return results
def load_config(config_path: str) -> list[dict]:
"""
Load MCP server configuration from JSON file
Expected format:
{
"servers": [
{
"name": "filesystem",
"command": ["npx", "-y", "@modelcontextprotocol/server-filesystem", "/tmp"]
},
...
]
}
"""
with open(config_path) as f:
config = json.load(f)
return config.get('servers', [])
def main():
if len(sys.argv) < 3:
print("Usage: mcp_introspector.py <config.json> <output.json>")
print("\nConfig format:")
print(json.dumps({
"servers": [
{
"name": "example-server",
"command": ["npx", "-y", "@modelcontextprotocol/server-example"]
}
]
}, indent=2))
sys.exit(1)
config_path = sys.argv[1]
output_path = sys.argv[2]
# Load configuration
print(f"Loading MCP server configuration from {config_path}")
server_configs = load_config(config_path)
print(f"Found {len(server_configs)} servers to introspect\n")
# Introspect all servers
results = asyncio.run(introspect_all_servers(server_configs))
# Save results
with open(output_path, 'w') as f:
json.dump(results, f, indent=2)
print(f"\n✓ Introspection results saved to {output_path}")
# Summary
success_count = sum(1 for r in results.values() if r.get('status') == 'success')
total_tools = sum(r.get('tool_count', 0) for r in results.values() if r.get('status') == 'success')
print(f"\nSummary:")
print(f" Servers successfully introspected: {success_count}/{len(server_configs)}")
print(f" Total tools discovered: {total_tools}")
if __name__ == '__main__':
main()