From 9b5e9af4368643598a12d7bf1f86142a876a2932 Mon Sep 17 00:00:00 2001 From: Zhongwei Li Date: Sat, 29 Nov 2025 18:17:27 +0800 Subject: [PATCH] Initial commit --- .claude-plugin/plugin.json | 12 ++ README.md | 3 + plugin.lock.json | 53 ++++++++ skills/web/SKILL.md | 53 ++++++++ skills/web/scripts/fetch.py | 245 +++++++++++++++++++++++++++++++++++ skills/web/scripts/search.py | 217 +++++++++++++++++++++++++++++++ 6 files changed, 583 insertions(+) create mode 100644 .claude-plugin/plugin.json create mode 100644 README.md create mode 100644 plugin.lock.json create mode 100644 skills/web/SKILL.md create mode 100755 skills/web/scripts/fetch.py create mode 100755 skills/web/scripts/search.py diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..231d53a --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,12 @@ +{ + "name": "agent-tools", + "description": "Essential agent tools for web search and content fetching", + "version": "0.0.0-2025.11.28", + "author": { + "name": "Daniel Scholl", + "email": "dascholl@microsoft.com" + }, + "skills": [ + "./skills" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..6fb9d13 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# agent-tools + +Essential agent tools for web search and content fetching diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..49fc6f6 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,53 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:danielscholl/agent-skills:plugins/agent-tools", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "8af7d9e8fd0aaa99572bf420e24caa40cbe4a8a9", + "treeHash": "41787e1d1ec744f1a9a93c816511cc259fecd6a8e94eaed20b49147e20bf9dd9", + "generatedAt": "2025-11-28T10:16:01.097435Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "agent-tools", + "description": "Essential agent tools for web search and content fetching", + "version": null + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "414afc53a2b005ed8e31199a57ee6757d39011139152e22f1652b07548e64680" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "f5e8fe1bdd389500c1341f26f4bbf0418a117907492c1154e692d0547b326b3e" + }, + { + "path": "skills/web/SKILL.md", + "sha256": "dd357603c075689f0885fb430c586421c8bc7aa9a5d9dfb43465110dc9f2ddbd" + }, + { + "path": "skills/web/scripts/fetch.py", + "sha256": "5b5b7e932b7209c81127e04cc626f602376efb00141ca6875d3f30ff4530e51f" + }, + { + "path": "skills/web/scripts/search.py", + "sha256": "74ed105d0a4cf8d1490b39f80d5f5678fb1e74849b912015a21e801b648b6329" + } + ], + "dirSha256": "41787e1d1ec744f1a9a93c816511cc259fecd6a8e94eaed20b49147e20bf9dd9" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/web/SKILL.md b/skills/web/SKILL.md new file mode 100644 index 0000000..a6a2b1a --- /dev/null +++ b/skills/web/SKILL.md @@ -0,0 +1,53 @@ +--- +name: web +description: "Search the web using Brave Search API and fetch page content with HTML-to-markdown conversion. Use when user needs current information, online documentation, or specific URL content beyond knowledge cutoff." +version: 1.0.0 +brief_description: "Web search and page fetching with Brave API" +triggers: + keywords: [web, search, internet, online, fetch, url, webpage, website, current, recent, latest, news, documentation, brave] + verbs: [search, fetch, get, find, lookup, retrieve] + patterns: + - "https?://" + - "www\\." + - "search.*for" + - "look.*up" + - "find.*information" + - "current.*(?:news|info|data)" + - "recent.*(?:news|info|data)" +allowed-tools: Bash, WebFetch, WebSearch +--- + +# web + +## 🎯 Triggers +**When user wants to:** +- Search the internet +- Get current/recent information +- Fetch content from a URL +- Access online documentation + +**Skip when:** +- Answer is within knowledge cutoff +- Local file operation + +## Scripts + +### search +**What:** Search the web using Brave Search API +**Pattern:** User wants to search → `script_run web search --query "USER_QUERY" --json` +**Example:** "Search for python tutorials" → `script_run web search --query "python tutorials" --json` + +### fetch +**What:** Fetch and convert web page to markdown +**Pattern:** User provides URL → `script_run web fetch --url "USER_URL" --json` +**Example:** "Get https://example.com" → `script_run web fetch --url "https://example.com" --json` + +## Quick Reference +``` +User: "Search for X" → script_run web search --query "X" --json +User: "Fetch https://..." → script_run web fetch --url "https://..." --json +User: "Find recent news on X" → script_run web search --query "X news" --json +``` + +## Requires +- `BRAVE_API_KEY` environment variable for search diff --git a/skills/web/scripts/fetch.py b/skills/web/scripts/fetch.py new file mode 100755 index 0000000..5ff7b04 --- /dev/null +++ b/skills/web/scripts/fetch.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python3 +# /// script +# dependencies = [ +# "httpx", +# "beautifulsoup4", +# "markdownify", +# "click", +# ] +# /// + +""" +Web Fetch Script + +Fetch and parse web page content, converting HTML to clean markdown. +Supports timeout configuration and size limits. + +Usage: + uv run fetch.py --url "https://example.com" + uv run fetch.py --url "https://example.com" --json + uv run fetch.py --url "https://example.com" --timeout 60 +""" + +import json +import os +import sys +from urllib.parse import urlparse + +import click +import httpx +from bs4 import BeautifulSoup # type: ignore +from markdownify import markdownify as md # type: ignore + +# Configuration defaults +DEFAULT_TIMEOUT = 30 # seconds +DEFAULT_MAX_SIZE = 1048576 # 1MB + + +def is_valid_url(url: str) -> bool: + """Validate URL format. + + Args: + url: URL to validate + + Returns: + True if URL is valid (http/https), False otherwise + """ + try: + parsed = urlparse(url) + return parsed.scheme in ("http", "https") and bool(parsed.netloc) + except Exception: + return False + + +def html_to_markdown(html: str) -> str: + """Convert HTML content to clean markdown. + + Removes scripts, styles, and other non-content elements before conversion. + + Args: + html: HTML content to convert + + Returns: + Markdown representation of the content + """ + # Parse HTML + soup = BeautifulSoup(html, "html.parser") + + # Remove script and style elements + for element in soup(["script", "style", "nav", "footer", "header"]): + element.decompose() + + # Convert to markdown + markdown = md(str(soup), heading_style="ATX", strip=["img"]) + + # Clean up excessive whitespace + lines = [line.strip() for line in markdown.split("\n")] + clean_lines = [line for line in lines if line] # Remove empty lines + + return "\n\n".join(clean_lines) + + +def fetch_web_page(url: str, timeout: int, max_size: int) -> dict: + """Fetch and parse web page content. + + Args: + url: URL to fetch content from + timeout: Request timeout in seconds + max_size: Maximum response size in bytes + + Returns: + Dict with success, result/error, and message + """ + # Validate URL + if not is_valid_url(url): + return { + "success": False, + "error": "invalid_url", + "message": f"URL must start with http:// or https://. Got: {url}", + } + + try: + # Fetch content with timeout + with httpx.Client(timeout=timeout, follow_redirects=True) as client: + response = client.get(url) + response.raise_for_status() + + # Check content size + content_length = len(response.content) + if content_length > max_size: + return { + "success": False, + "error": "content_too_large", + "message": f"Response size ({content_length} bytes) exceeds maximum ({max_size} bytes)", + } + + # Parse HTML to markdown + markdown_content = html_to_markdown(response.text) + + return { + "success": True, + "result": markdown_content, + "message": f"Successfully fetched {len(markdown_content)} characters from {url}", + } + + except httpx.TimeoutException: + return { + "success": False, + "error": "timeout", + "message": f"Request timed out after {timeout} seconds", + } + + except httpx.HTTPStatusError as e: + return { + "success": False, + "error": "http_error", + "message": f"HTTP {e.response.status_code}: {e.response.reason_phrase}", + } + + except httpx.RequestError as e: + return { + "success": False, + "error": "request_error", + "message": f"Request failed: {str(e)}", + } + + except Exception as e: + return { + "success": False, + "error": "unknown_error", + "message": f"Unexpected error: {str(e)}", + } + + +def format_markdown_output(url: str, markdown: str) -> str: + """Format markdown for human-readable output. + + Args: + url: Source URL + markdown: Markdown content + + Returns: + Formatted string for display + """ + lines = [] + lines.append("\n" + "=" * 60) + lines.append(f"📄 Content from {url}") + lines.append("=" * 60) + lines.append("") + lines.append(markdown) + lines.append("") + lines.append("=" * 60) + return "\n".join(lines) + + +@click.command() +@click.option("--url", required=True, help="URL to fetch content from") +@click.option( + "--timeout", + default=None, + type=int, + help=f"Request timeout in seconds (default: {DEFAULT_TIMEOUT})", +) +@click.option( + "--max-size", + default=None, + type=int, + help=f"Maximum response size in bytes (default: {DEFAULT_MAX_SIZE})", +) +@click.option( + "--json", + "output_json", + is_flag=True, + help="Output as JSON instead of human-readable format", +) +def main(url: str, timeout: int | None, max_size: int | None, output_json: bool) -> None: + """ + Fetch and parse web page content. + + Retrieves HTML content from the specified URL and converts it to clean + markdown format. Handles timeouts, HTTP errors, and invalid URLs gracefully. + + Environment variables: + WEB_FETCH_TIMEOUT - Default timeout in seconds + WEB_FETCH_MAX_SIZE - Default max response size in bytes + + Examples: + uv run fetch.py --url "https://example.com" + uv run fetch.py --url "https://example.com" --json + uv run fetch.py --url "https://example.com" --timeout 60 + """ + try: + # Get timeout from env or use default + if timeout is None: + timeout = int(os.getenv("WEB_FETCH_TIMEOUT", str(DEFAULT_TIMEOUT))) + + # Get max_size from env or use default + if max_size is None: + max_size = int(os.getenv("WEB_FETCH_MAX_SIZE", str(DEFAULT_MAX_SIZE))) + + # Fetch and parse + result = fetch_web_page(url, timeout, max_size) + + # Output results + if output_json: + click.echo(json.dumps(result, indent=2)) + else: + if result["success"]: + formatted = format_markdown_output(url, result["result"]) + click.echo(formatted) + else: + click.echo(f"❌ Error ({result['error']}): {result['message']}", err=True) + + sys.exit(0 if result["success"] else 1) + + except Exception as e: + if output_json: + error_data = {"success": False, "error": "script_error", "message": str(e)} + click.echo(json.dumps(error_data, indent=2)) + else: + click.echo(f"❌ Script Error: {e}", err=True) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/skills/web/scripts/search.py b/skills/web/scripts/search.py new file mode 100755 index 0000000..3192654 --- /dev/null +++ b/skills/web/scripts/search.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 +# /// script +# dependencies = [ +# "httpx", +# "click", +# ] +# /// + +""" +Web Search Script using Brave Search API + +Search the internet using Brave Search API. +Requires BRAVE_API_KEY environment variable. + +Usage: + uv run search.py --query "anthropic claude" + uv run search.py --query "python" --count 10 + uv run search.py --query "keyword" --json +""" + +import json +import os +import sys + +import click +import httpx + +# Configuration +API_BASE_URL = "https://api.search.brave.com/res/v1" +API_TIMEOUT = 30.0 # seconds +USER_AGENT = "agent-base-web-access/1.0" + + +class BraveSearchClient: + """HTTP client for Brave Search API""" + + def __init__(self, api_key: str): + """Initialize Brave Search client + + Args: + api_key: Brave Search API key + """ + self.api_key = api_key + self.client = httpx.Client( + base_url=API_BASE_URL, + timeout=API_TIMEOUT, + headers={ + "User-Agent": USER_AGENT, + "X-Subscription-Token": api_key, + "Accept": "application/json", + }, + ) + + def __enter__(self) -> "BraveSearchClient": + """Context manager entry""" + return self + + def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None: + """Context manager exit - cleanup""" + self.client.close() + + def search(self, query: str, count: int = 10) -> list[dict]: + """Search using Brave Search API + + Args: + query: Search query + count: Number of results to return (max 20) + + Returns: + List of search results with title, url, description + + Raises: + Exception if API call fails + """ + try: + params = { + "q": query, + "count": str(min(count, 20)), # Brave API max is 20 + } + + response = self.client.get("/web/search", params=params) + response.raise_for_status() + data = response.json() + + # Extract web results + web_results = data.get("web", {}).get("results", []) + + # Format results + results = [] + for result in web_results: + results.append( + { + "title": result.get("title", ""), + "url": result.get("url", ""), + "description": result.get("description", ""), + } + ) + + return results + + except httpx.HTTPStatusError as e: + raise Exception(f"API error: {e.response.status_code} - {e.response.text}") + except httpx.RequestError as e: + raise Exception(f"Network error: {str(e)}") + except Exception as e: + raise Exception(f"Unexpected error: {str(e)}") + + +def format_search_result(result: dict, index: int) -> str: + """Format a single search result""" + title = result.get("title", "N/A") + url = result.get("url", "N/A") + description = result.get("description", "") + + lines = [] + lines.append(f"{index}. {title}") + lines.append(f" {url}") + + if description: + # Truncate long descriptions + desc = description[:200] + "..." if len(description) > 200 else description + lines.append(f" {desc}") + + return "\n".join(lines) + + +def format_search_results(query: str, results: list[dict]) -> str: + """Format search results for human-readable output + + Args: + query: Search query + results: List of search results + + Returns: + Formatted string for display + """ + lines = [] + lines.append("\n" + "=" * 60) + lines.append(f"🔍 Web Search Results for '{query}'") + lines.append("=" * 60) + + if not results: + lines.append("\nNo results found for your search.") + lines.append("\nTip: Try different keywords.") + else: + lines.append(f"Found {len(results)} results:\n") + + for i, result in enumerate(results, 1): + lines.append(format_search_result(result, i)) + lines.append("") + + lines.append("─" * 60) + lines.append("Powered by Brave Search API") + + lines.append("=" * 60) + return "\n".join(lines) + + +@click.command() +@click.option("--query", required=True, help="Search query") +@click.option("--count", default=10, type=int, help="Number of results (max 20)") +@click.option( + "--json", + "output_json", + is_flag=True, + help="Output as JSON instead of human-readable format", +) +def main(query: str, count: int, output_json: bool) -> None: + """ + Search the web using Brave Search API. + + Requires BRAVE_API_KEY environment variable. + + Examples: + uv run search.py --query "anthropic claude" + uv run search.py --query "python" --count 5 --json + """ + try: + # Get API key from environment + api_key = os.getenv("BRAVE_API_KEY") + if not api_key: + raise ValueError( + "BRAVE_API_KEY environment variable is required. " + "Get a free API key at https://brave.com/search/api/" + ) + + if not query.strip(): + raise ValueError("Query cannot be empty") + + # Search via API + with BraveSearchClient(api_key) as client: + results = client.search(query, count) + + # Output results + if output_json: + click.echo(json.dumps(results, indent=2)) + else: + formatted = format_search_results(query, results) + click.echo(formatted) + + sys.exit(0) + + except Exception as e: + if output_json: + error_data = { + "success": False, + "error": "runtime_error", + "message": str(e), + } + click.echo(json.dumps(error_data, indent=2)) + else: + click.echo(f"❌ Error: {e}", err=True) + sys.exit(1) + + +if __name__ == "__main__": + main()