From e32589a11aa270ff91c9eed3cc15636606dc2a20 Mon Sep 17 00:00:00 2001 From: Zhongwei Li Date: Sun, 30 Nov 2025 08:42:03 +0800 Subject: [PATCH] Initial commit --- .claude-plugin/plugin.json | 15 ++++ README.md | 3 + commands/extract.md | 36 +++++++++ commands/map.md | 35 ++++++++ commands/scrape.md | 34 ++++++++ commands/search.md | 43 ++++++++++ plugin.lock.json | 61 ++++++++++++++ skills/firecrawl/SKILL.md | 160 +++++++++++++++++++++++++++++++++++++ 8 files changed, 387 insertions(+) create mode 100644 .claude-plugin/plugin.json create mode 100644 README.md create mode 100644 commands/extract.md create mode 100644 commands/map.md create mode 100644 commands/scrape.md create mode 100644 commands/search.md create mode 100644 plugin.lock.json create mode 100644 skills/firecrawl/SKILL.md diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..62637a4 --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,15 @@ +{ + "name": "firecrawl", + "description": "Firecrawl plugin for Claude Code", + "version": "1.0.0", + "author": { + "name": "Nathan Vale", + "email": "hi@nathanvale.com" + }, + "skills": [ + "./skills" + ], + "commands": [ + "./commands" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e9e0790 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# firecrawl + +Firecrawl plugin for Claude Code diff --git a/commands/extract.md b/commands/extract.md new file mode 100644 index 0000000..0d198f6 --- /dev/null +++ b/commands/extract.md @@ -0,0 +1,36 @@ +--- +description: Extract structured data from a URL using LLM +argument-hint: --prompt "..." [--schema '{...}'] +--- + +# Firecrawl Extract + +Extract data from: `$ARGUMENTS` + +## Instructions + +1. Parse the arguments: + - First argument is the URL to extract from + - Required `--prompt "text"` - What to extract + - Optional `--schema '{...}'` - JSON Schema for structured output + +2. Run the CLI command: + ```bash + cd /Users/nathanvale/code/side-quest-marketplace/plugins/firecrawl && bun run src/cli.ts extract $ARGUMENTS + ``` + +3. Present the extracted data to the user + +## Example Usage + +``` +/firecrawl:extract https://example.com --prompt "Extract the main heading and description" +/firecrawl:extract https://store.example.com/product --prompt "Extract price and name" --schema '{"price": {"type": "number"}, "name": {"type": "string"}}' +``` + +## Notes + +- Extraction is async - the command polls for completion +- May take 10-30 seconds depending on page complexity +- Use --schema for type-safe structured output +- Requires FIRECRAWL_API_KEY environment variable diff --git a/commands/map.md b/commands/map.md new file mode 100644 index 0000000..bcc6d73 --- /dev/null +++ b/commands/map.md @@ -0,0 +1,35 @@ +--- +description: Discover all URLs on a website +argument-hint: [--limit N] +--- + +# Firecrawl Map + +Map URLs on the website: `$ARGUMENTS` + +## Instructions + +1. Parse the arguments: + - First argument is the base URL to map + - Optional `--limit N` flag (default: 100) + +2. Run the CLI command: + ```bash + cd /Users/nathanvale/code/side-quest-marketplace/plugins/firecrawl && bun run src/cli.ts map $ARGUMENTS + ``` + +3. Present the discovered URLs to the user + +## Example Usage + +``` +/firecrawl:map https://docs.example.com +/firecrawl:map https://example.com --limit 50 +``` + +## Notes + +- Returns up to 50 URLs in output (configurable via --limit) +- Includes page titles when available +- Useful for understanding site structure before scraping specific pages +- Requires FIRECRAWL_API_KEY environment variable diff --git a/commands/scrape.md b/commands/scrape.md new file mode 100644 index 0000000..baeb8b7 --- /dev/null +++ b/commands/scrape.md @@ -0,0 +1,34 @@ +--- +description: Scrape content from a URL as markdown +argument-hint: [--format markdown|summary] +--- + +# Firecrawl Scrape + +Scrape content from the URL: `$ARGUMENTS` + +## Instructions + +1. Parse the arguments: + - First argument is the URL to scrape + - Optional `--format` flag: `markdown` (default) or `summary` + +2. Run the CLI command: + ```bash + cd /Users/nathanvale/code/side-quest-marketplace/plugins/firecrawl && bun run src/cli.ts scrape $ARGUMENTS + ``` + +3. Present the results to the user in a clean format + +## Example Usage + +``` +/firecrawl:scrape https://docs.example.com/api +/firecrawl:scrape https://example.com --format summary +``` + +## Notes + +- Returns markdown content by default +- Content is truncated at 8000 characters to save tokens +- Requires FIRECRAWL_API_KEY environment variable diff --git a/commands/search.md b/commands/search.md new file mode 100644 index 0000000..be6da95 --- /dev/null +++ b/commands/search.md @@ -0,0 +1,43 @@ +--- +description: Search the web and get results with content +argument-hint: [--limit N] +--- + +# Firecrawl Search + +Search the web for: `$ARGUMENTS` + +## Instructions + +1. Parse the arguments: + - All non-flag arguments form the search query + - Optional `--limit N` flag (default: 5) + +2. Run the CLI command: + ```bash + cd /Users/nathanvale/code/side-quest-marketplace/plugins/firecrawl && bun run src/cli.ts search $ARGUMENTS + ``` + +3. Present the search results to the user + +## Example Usage + +``` +/firecrawl:search typescript error handling best practices +/firecrawl:search "react hooks" --limit 10 +/firecrawl:search site:github.com firecrawl examples +``` + +## Search Operators + +- `"exact phrase"` - Match exact phrase +- `-term` - Exclude term +- `site:example.com` - Limit to domain +- `intitle:word` - Word in page title +- `inurl:word` - Word in URL + +## Notes + +- Returns up to 10 results with titles, URLs, and descriptions +- Includes scraped markdown content when available +- Requires FIRECRAWL_API_KEY environment variable diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..25e0b75 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,61 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:nathanvale/side-quest-marketplace:plugins/firecrawl", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "ed09532002431a9e5d5ba51192ce3ef8b00139c1", + "treeHash": "0e13ccb8c53587c0b955044810575ee39dd9f499a3b232d85ec9e6d41b7d7797", + "generatedAt": "2025-11-28T10:27:16.117501Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "firecrawl", + "description": "Firecrawl plugin for Claude Code", + "version": "1.0.0" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "9a9da2b2acff95b3bf6836478e18d6ac5a28259b07492cb72ce286818a567c69" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "5ae05dbd9b7cda39cc43cf6e3d50a3ed78fb6dae126056b05f4757928431261f" + }, + { + "path": "commands/search.md", + "sha256": "aad8741bea57f3d2eef66361a0680477c8623f9d19effc03eac5f246291174ff" + }, + { + "path": "commands/map.md", + "sha256": "ec3bc473621fea05ebf604c650efaa9f95d5c88aecea543286f3f7513152f843" + }, + { + "path": "commands/extract.md", + "sha256": "9c7e7d60aed2b25cdec7b30fbd5d3d31410446201cf9fa0df113a961e1c62924" + }, + { + "path": "commands/scrape.md", + "sha256": "4d2a4ee16bd217b5803534c2f1aecd0c385742b63863975534569eb6d5b5adc9" + }, + { + "path": "skills/firecrawl/SKILL.md", + "sha256": "2c2261bcb98091f855bf48f98122eef3f3a7dbe7cd2445da1f2468b10877de14" + } + ], + "dirSha256": "0e13ccb8c53587c0b955044810575ee39dd9f499a3b232d85ec9e6d41b7d7797" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/firecrawl/SKILL.md b/skills/firecrawl/SKILL.md new file mode 100644 index 0000000..ac0f4f5 --- /dev/null +++ b/skills/firecrawl/SKILL.md @@ -0,0 +1,160 @@ +--- +name: firecrawl +description: Web scraping, search, and data extraction using Firecrawl API. Use when users need to fetch web content, discover URLs on sites, search the web, or extract structured data from pages. +--- + +# Firecrawl + +## Overview + +Firecrawl is a powerful web scraping and search API. This plugin provides a token-efficient interface for Claude Code through slash commands. + +## When to Use This Skill + +- **Scraping**: Fetch content from a single URL as markdown +- **Mapping**: Discover all URLs on a website +- **Searching**: Search the web and optionally scrape results +- **Extracting**: Pull structured data from pages using LLM + +## Quick Reference + +| Task | Command | +|------|---------| +| Scrape a page | `/firecrawl:scrape ` | +| Map a site | `/firecrawl:map ` | +| Search the web | `/firecrawl:search ` | +| Extract data | `/firecrawl:extract --prompt "..."` | + +## Commands + +### Scrape - Get Page Content + +Fetches and converts a web page to clean markdown. + +``` +/firecrawl:scrape https://example.com +/firecrawl:scrape https://example.com --format summary +``` + +**Options:** +- `--format markdown|summary` - Output format (default: markdown) + +**Best for:** +- Reading documentation +- Fetching article content +- Getting page text for analysis + +### Map - Discover URLs + +Finds all URLs on a website. Useful for understanding site structure. + +``` +/firecrawl:map https://example.com +/firecrawl:map https://example.com --limit 50 +``` + +**Options:** +- `--limit N` - Maximum URLs to return (default: 100) + +**Best for:** +- Understanding site structure +- Finding specific pages before scraping +- Discovering documentation sections + +### Search - Web Search + +Searches the web and returns results with optional content scraping. + +``` +/firecrawl:search typescript tutorials +/firecrawl:search "react hooks guide" --limit 10 +``` + +**Options:** +- `--limit N` - Maximum results (default: 5) + +**Supports search operators:** +- `"exact phrase"` - Exact match +- `-term` - Exclude term +- `site:example.com` - Limit to domain +- `intitle:word` - Word in title + +**Best for:** +- Finding information across the web +- Researching topics +- Finding documentation + +### Extract - Structured Data + +Extracts specific data from pages using LLM prompts. + +``` +/firecrawl:extract https://example.com --prompt "Extract the main heading and description" +/firecrawl:extract https://example.com/product --prompt "Extract price and features" --schema '{"price": {"type": "number"}, "features": {"type": "array"}}' +``` + +**Options:** +- `--prompt "text"` - What to extract (required) +- `--schema '{...}'` - JSON Schema for structured output + +**Best for:** +- Extracting specific data points +- Getting structured information +- Pulling prices, names, dates, etc. + +## Environment Setup + +Requires `FIRECRAWL_API_KEY` environment variable. + +Get your API key from: https://firecrawl.dev + +## Examples + +### Research a Topic +``` +/firecrawl:search "best practices for TypeScript error handling" --limit 5 +``` + +### Read Documentation +``` +/firecrawl:scrape https://docs.example.com/api/authentication +``` + +### Map a Documentation Site +``` +/firecrawl:map https://docs.example.com --limit 200 +``` + +### Extract Product Info +``` +/firecrawl:extract https://store.example.com/product/123 --prompt "Extract product name, price, and availability" +``` + +## Token Efficiency + +This plugin is designed for minimal token consumption: + +- **Scrape**: Returns clean markdown, truncated at 8000 chars +- **Map**: Shows up to 50 URLs with titles +- **Search**: Limits to 10 results with summaries +- **Extract**: Returns only requested data + +Compare to the full MCP server which loads ~14k tokens just from tool definitions. + +## Error Handling + +Common errors: +- `API key required` - Set FIRECRAWL_API_KEY +- `Invalid URL` - Check URL format +- `Rate limited` - Wait and retry (auto-handled) +- `Site blocked` - Some sites block scraping + +## Pricing Note + +Firecrawl charges per operation: +- Scrape: 1 credit per page +- Map: 1 credit per call +- Search: 1 credit per result +- Extract: Varies by complexity + +Check https://firecrawl.dev/pricing for current rates.