Initial commit

2025-11-29 18:48:40 +08:00
commit 21e99f1aa7
18 changed files with 1868 additions and 0 deletions
--- a/.claude-plugin/hooks.json
+++ b/.claude-plugin/hooks.json
@@ -0,0 +1,105 @@
+{
+  "hooks": {
+    "UserPromptSubmit": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "python ${CLAUDE_PLUGIN_ROOT}/src/rag_cli_plugin/hooks/slash-command-blocker.py",
+            "name": "slash-command-blocker",
+            "priority": 150,
+            "description": "Prevents Claude from responding to slash commands, showing only execution status"
+          }
+        ]
+      },
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "python ${CLAUDE_PLUGIN_ROOT}/src/rag_cli_plugin/hooks/user-prompt-submit.py",
+            "name": "user-prompt-submit",
+            "priority": 100,
+            "description": "Automatically enhances queries with RAG context and multi-agent orchestration"
+          }
+        ]
+      }
+    ],
+    "PostToolUse": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "python ${CLAUDE_PLUGIN_ROOT}/src/rag_cli_plugin/hooks/response-post.py",
+            "name": "response-post",
+            "priority": 80,
+            "enabled": false,
+            "description": "Adds inline citations to Claude responses when RAG context is used (DISABLED: Claude Code framework bug)"
+          }
+        ]
+      }
+    ],
+    "Notification": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "python ${CLAUDE_PLUGIN_ROOT}/src/rag_cli_plugin/hooks/error-handler.py",
+            "name": "error-handler",
+            "priority": 70,
+            "description": "Provides graceful error handling with helpful troubleshooting tips"
+          }
+        ]
+      },
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "python ${CLAUDE_PLUGIN_ROOT}/src/rag_cli_plugin/hooks/plugin-state-change.py",
+            "name": "plugin-state-change",
+            "priority": 60,
+            "description": "Persists RAG settings across Claude Code restarts"
+          }
+        ]
+      }
+    ],
+    "PreToolUse": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "python ${CLAUDE_PLUGIN_ROOT}/src/rag_cli_plugin/hooks/document-indexing.py",
+            "name": "document-indexing",
+            "priority": 50,
+            "description": "Automatically indexes new or modified documents"
+          }
+        ]
+      }
+    ],
+    "SessionStart": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "python ${CLAUDE_PLUGIN_ROOT}/src/rag_cli_plugin/hooks/session-start.py",
+            "name": "session-start",
+            "priority": 40,
+            "description": "Initializes RAG-CLI resources when a Claude Code session starts"
+          }
+        ]
+      }
+    ],
+    "SessionEnd": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "python ${CLAUDE_PLUGIN_ROOT}/src/rag_cli_plugin/hooks/session-end.py",
+            "name": "session-end",
+            "priority": 30,
+            "description": "Cleanup and state persistence when Claude Code session ends"
+          }
+        ]
+      }
+    ]
+  }
+}
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -0,0 +1,41 @@
+{
+  "name": "rag-cli",
+  "description": "Local RAG system with embedded Multi-Agent Framework for Claude Code plugin",
+  "version": "2.0.0",
+  "author": {
+    "name": "DiaTech",
+    "email": "support@rag-cli.dev",
+    "url": "https://github.com/ItMeDiaTech/rag-cli"
+  },
+  "skills": [
+    "./src/rag_cli_plugin/skills"
+  ],
+  "commands": [
+    "./src/rag_cli_plugin/commands"
+  ],
+  "hooks": [
+    "./.claude-plugin/hooks.json"
+  ],
+  "mcp": {
+    "rag-cli": {
+      "command": "python",
+      "args": [
+        "-m",
+        "rag_cli_plugin.mcp.unified_server"
+      ],
+      "env": {
+        "PYTHONUNBUFFERED": "1",
+        "RAG_CLI_MODE": "claude_code",
+        "RAG_CLI_ROOT": "${CLAUDE_PLUGIN_ROOT}",
+        "PYTHONPATH": "${CLAUDE_PLUGIN_ROOT}:${CLAUDE_PLUGIN_ROOT}/src"
+      },
+      "alwaysAllowTrust": false,
+      "features": {
+        "autoStart": true,
+        "retryOnFailure": true,
+        "maxRetries": 3,
+        "retryDelayMs": 1000
+      }
+    }
+  }
+}
--- a/README.md
+++ b/README.md
@@ -0,0 +1,3 @@
+# rag-cli
+
+Local RAG system with embedded Multi-Agent Framework for Claude Code plugin
--- a/plugin.lock.json
+++ b/plugin.lock.json
@@ -0,0 +1,101 @@
+{
+  "$schema": "internal://schemas/plugin.lock.v1.json",
+  "pluginId": "gh:ItMeDiaTech/rag-cli:",
+  "normalized": {
+    "repo": null,
+    "ref": "refs/tags/v20251128.0",
+    "commit": "d41dff4351d8372b69cf167a7bf7dce070e5d4eb",
+    "treeHash": "a7b52cd20069d6544987e3af33ae528e100af3c58668cf04b8c6565a68137d97",
+    "generatedAt": "2025-11-28T10:11:43.025507Z",
+    "toolVersion": "publish_plugins.py@0.2.0"
+  },
+  "origin": {
+    "remote": "git@github.com:zhongweili/42plugin-data.git",
+    "branch": "master",
+    "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
+    "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
+  },
+  "manifest": {
+    "name": "rag-cli",
+    "description": "Local RAG system with embedded Multi-Agent Framework for Claude Code plugin",
+    "version": "2.0.0"
+  },
+  "content": {
+    "files": [
+      {
+        "path": "README.md",
+        "sha256": "07a1ae0600f4cd267ab5d722a087106f2daa2ce934c952cfef360e13172c3662"
+      },
+      {
+        "path": ".claude-plugin/plugin.json",
+        "sha256": "07f5e3223927b535a46e17ae5e79b7cf13d8834f3ee59218c53d2daf92306406"
+      },
+      {
+        "path": ".claude-plugin/hooks.json",
+        "sha256": "b7d2cd9afed5ac36e00208039cac850ed1c1a25baedfe958c06cf41ebf38c212"
+      },
+      {
+        "path": "src/rag_cli_plugin/commands/update-rag.md",
+        "sha256": "b82d64da7a7a1ea304f0a337faf3679be1b8cd6a56b2030e75cab047285c4d63"
+      },
+      {
+        "path": "src/rag_cli_plugin/commands/rag-project.md",
+        "sha256": "ca122b0609204877f9c883dcada151b1fe689140ba3bd90843d3701bce7c4a28"
+      },
+      {
+        "path": "src/rag_cli_plugin/commands/search.md",
+        "sha256": "231a053682e915a03c8c8e4be09e259dbdb2f2ba3d681eabddfef08d43f237ee"
+      },
+      {
+        "path": "src/rag_cli_plugin/commands/rag-maf-config.md",
+        "sha256": "a4714ee51af7ea1f2129834a85be6f3c6f9fbc4fbf419c981b2c1a7933ba1a20"
+      },
+      {
+        "path": "src/rag_cli_plugin/commands/update_rag.py",
+        "sha256": "cce4bf19d4676037033d71cb087eafdabcfa052cb5db812025be0c54831ede77"
+      },
+      {
+        "path": "src/rag_cli_plugin/commands/rag-disable.md",
+        "sha256": "dc513bc399ca398c6ad39048b785fef8b67e4951a333aa7bc98840883560c141"
+      },
+      {
+        "path": "src/rag_cli_plugin/commands/__init__.py",
+        "sha256": "9216efe7a399c2c8b817f9b06d475c88921c59c786b72e14f4fe5f3fda7003c0"
+      },
+      {
+        "path": "src/rag_cli_plugin/commands/rag-enable.md",
+        "sha256": "a65e6f97b891214e2080a96a09893f786e1d6a5b2855dde038588a8776871728"
+      },
+      {
+        "path": "src/rag_cli_plugin/commands/rag_maf_config.py",
+        "sha256": "f8fa4a7e125e6a2ef37729351ae82ab100f2792a96b0a0b6250ce1e72c6b7e5f"
+      },
+      {
+        "path": "src/rag_cli_plugin/commands/rag_project_indexer.py",
+        "sha256": "12e7ac59d82073309dc02598945c742d24696a15d846e92092c511194be769c8"
+      },
+      {
+        "path": "src/rag_cli_plugin/skills/__init__.py",
+        "sha256": "e4f116632c535dc337dd80c379d591ab6c913cd64284e7ca4b8addf5e187d22a"
+      },
+      {
+        "path": "src/rag_cli_plugin/skills/rag-retrieval/__init__.py",
+        "sha256": "1bbd3befa06fae7f04f07dfbc2859246fb0e9b5dac43f823319320f5c59396a2"
+      },
+      {
+        "path": "src/rag_cli_plugin/skills/rag-retrieval/SKILL.md",
+        "sha256": "7cc1109e5ed7bfb38da301cce2b1ed7dcbaafc419b970372815b590832971f50"
+      },
+      {
+        "path": "src/rag_cli_plugin/skills/rag-retrieval/retrieve.py",
+        "sha256": "3be3697ac967d819c8d29c35edc5e266a98f5452242e55f0ab83fc2631d9b6e6"
+      }
+    ],
+    "dirSha256": "a7b52cd20069d6544987e3af33ae528e100af3c58668cf04b8c6565a68137d97"
+  },
+  "security": {
+    "scannedAt": null,
+    "scannerVersion": null,
+    "flags": []
+  }
+}
--- a/src/rag_cli_plugin/commands/init.py
+++ b/src/rag_cli_plugin/commands/init.py
@@ -0,0 +1,4 @@
+"""RAG-CLI Plugin Commands Module.
+
+Contains all slash commands for RAG-CLI Claude Code plugin.
+"""
--- a/src/rag_cli_plugin/commands/rag-disable.md
+++ b/src/rag_cli_plugin/commands/rag-disable.md
@@ -0,0 +1,21 @@
+# Disable RAG
+
+Update config to disable automatic RAG enhancement.
+
+**IMPORTANT**: Execute task and report status only. No additional commentary.
+
+## Task
+
+1. Update `config/rag_settings.json`: set `"enabled": false`
+2. Report: "RAG disabled"
+
+**DO NOT**:
+- Explain when to disable RAG
+- Describe what changes
+- Provide usage examples
+- Research or elaborate
+
+**ONLY**:
+- Update the config file
+- Confirm completion with one line
+- Stop
--- a/src/rag_cli_plugin/commands/rag-enable.md
+++ b/src/rag_cli_plugin/commands/rag-enable.md
@@ -0,0 +1,21 @@
+# Enable RAG
+
+Update config to enable automatic RAG enhancement.
+
+**IMPORTANT**: Execute task and report status only. No additional commentary.
+
+## Task
+
+1. Update `config/rag_settings.json`: set `"enabled": true`
+2. Report: "RAG enabled"
+
+**DO NOT**:
+- Explain what RAG is
+- Describe benefits or features
+- Provide usage examples
+- Research or elaborate
+
+**ONLY**:
+- Update the config file
+- Confirm completion with one line
+- Stop
--- a/src/rag_cli_plugin/commands/rag-maf-config.md
+++ b/src/rag_cli_plugin/commands/rag-maf-config.md
@@ -0,0 +1,69 @@
+# /rag-maf-config
+
+Configure embedded Multi-Agent Framework (MAF) features for RAG-CLI.
+
+## Usage
+
+```
+/rag-maf-config [OPTION]
+```
+
+## Options
+
+- `status` - Show current MAF configuration and agent status
+- `enable` - Enable MAF parallel execution
+- `disable` - Disable MAF (RAG-only mode)
+- `test-connection` - Test MAF connector health
+- `list-agents` - List available agents
+- `set-mode PARALLEL|SEQUENTIAL` - Set execution mode
+
+## Examples
+
+### Check MAF Status
+```
+/rag-maf-config status
+```
+
+### Enable MAF Features
+```
+/rag-maf-config enable
+```
+
+### Disable MAF (Fallback to RAG-only)
+```
+/rag-maf-config disable
+```
+
+### Test MAF Connectivity
+```
+/rag-maf-config test-connection
+```
+
+### List Available Agents
+```
+/rag-maf-config list-agents
+```
+
+### Set Execution Mode
+```
+/rag-maf-config set-mode PARALLEL
+```
+
+## Output
+
+When successful, displays:
+- [OK] MAF Status (enabled/disabled)
+- Available Agents (7 total: debugger, developer, reviewer, tester, architect, documenter, optimizer)
+- Execution Strategy (parallel/sequential)
+- Timeout Configuration
+- Health Check Results
+
+## Notes
+
+- MAF is **enabled by default** with parallel execution
+- Disabling MAF falls back gracefully to **RAG-only mode**
+- All 7 agents are embedded within the plugin
+- Parallel execution runs RAG + MAF simultaneously for comprehensive results
+- No external dependencies required for MAF functionality
+
+## IMPORTANT: Execute only, no commentary
--- a/src/rag_cli_plugin/commands/rag-project.md
+++ b/src/rag_cli_plugin/commands/rag-project.md
@@ -0,0 +1,22 @@
+# RAG Project Indexer
+
+Analyze current project and index relevant documentation.
+
+**IMPORTANT**: Execute indexer and show summary only. No additional commentary.
+
+## Task
+
+Run: `python ${CLAUDE_PLUGIN_ROOT}/src/plugin/commands/rag_project_indexer.py [ARGS]`
+
+Show the indexing summary output only.
+
+**DO NOT**:
+- Explain the project analysis process
+- Describe what will be indexed
+- Provide examples or troubleshooting
+- Add commentary before or after output
+
+**ONLY**:
+- Execute the command with any user-provided args
+- Display the output
+- Stop
--- a/src/rag_cli_plugin/commands/rag_maf_config.py
+++ b/src/rag_cli_plugin/commands/rag_maf_config.py
@@ -0,0 +1,250 @@
+"""MAF Configuration Command for RAG-CLI Plugin.
+
+Manages embedded Multi-Agent Framework settings and agent status.
+"""
+
+import sys
+import json
+import asyncio
+from pathlib import Path
+from typing import Optional
+
+# Add plugin root to path
+plugin_root = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(plugin_root.parent.parent))
+
+from rag_cli.integrations.maf_connector import get_maf_connector
+from rag_cli_plugin.services.output_formatter import OutputFormatter
+from rag_cli_plugin.services.logger import get_logger
+
+logger = get_logger(__name__)
+
+class MAFConfigCommand:
+    """Manages MAF configuration and status."""
+
+    def __init__(self):
+        """Initialize MAF config command."""
+        self.maf = get_maf_connector()
+        self.formatter = OutputFormatter()
+        # Navigate to project root: src -> RAG-CLI
+        project_root = plugin_root.parent
+        self.config_file = project_root / "config" / "rag_settings.json"
+
+    def execute(self, option: Optional[str] = None) -> str:
+        """Execute MAF configuration command.
+
+        Args:
+            option: Configuration option (status, enable, disable, test-connection, etc.)
+
+        Returns:
+            Formatted output string
+        """
+        if not option or option == "status":
+            return self._show_status()
+        elif option == "enable":
+            return self._enable_maf()
+        elif option == "disable":
+            return self._disable_maf()
+        elif option == "test-connection":
+            return asyncio.run(self._test_connection())
+        elif option == "list-agents":
+            return self._list_agents()
+        elif option.startswith("set-mode"):
+            mode = option.split()[-1] if len(option.split()) > 1 else "PARALLEL"
+            return self._set_mode(mode)
+        else:
+            return f"Unknown option: {option}. Use: status, enable, disable, test-connection, list-agents, set-mode"
+
+    def _show_status(self) -> str:
+        """Show MAF configuration status.
+
+        Returns:
+            Formatted status output
+        """
+        try:
+            config = self._load_config()
+            maf_config = config.get("maf", {})
+
+            status = [
+                "## MAF Configuration Status",
+                "",
+                f"**Status**: {'ENABLED' if maf_config.get('enabled') else 'DISABLED'}",
+                f"**Mode**: {maf_config.get('mode', 'parallel').upper()}",
+                f"**Fallback to RAG**: {'Yes' if maf_config.get('fallback_to_rag') else 'No'}",
+                f"**Notifications**: {'Enabled' if maf_config.get('show_notifications') else 'Disabled'}",
+                f"**Timeout**: {maf_config.get('timeout_seconds', 30)}s",
+                f"**Available Agents**: {len(maf_config.get('agents', []))} agents",
+                "",
+                f"**Framework Status**: {'Embedded (v1.2.0)' if self.maf.is_available() else 'Not Available'}",
+                f"**Available Agents**: {', '.join(self.maf.get_available_agents()) if self.maf.is_available() else 'None'}",
+            ]
+
+            return "\n".join(status)
+
+        except Exception as e:
+            logger.error(f"Failed to show MAF status: {e}")
+            return f"Error retrieving MAF status: {e}"
+
+    def _enable_maf(self) -> str:
+        """Enable MAF features.
+
+        Returns:
+            Confirmation message
+        """
+        try:
+            config = self._load_config()
+            config["maf"]["enabled"] = True
+            config["orchestration"]["enable_maf"] = True
+            self._save_config(config)
+
+            return "SUCCESS: **MAF enabled successfully**\n\nParallel RAG + MAF execution is now active.\n- All 7 agents: debugger, developer, reviewer, tester, architect, documenter, optimizer\n- Execution mode: PARALLEL (simultaneous RAG + MAF)\n- Fallback to RAG-only if MAF unavailable: Yes"
+
+        except Exception as e:
+            logger.error(f"Failed to enable MAF: {e}")
+            return f"ERROR: Error enabling MAF: {e}"
+
+    def _disable_maf(self) -> str:
+        """Disable MAF features.
+
+        Returns:
+            Confirmation message
+        """
+        try:
+            config = self._load_config()
+            config["maf"]["enabled"] = False
+            config["orchestration"]["enable_maf"] = False
+            self._save_config(config)
+
+            return "SUCCESS: **MAF disabled**\n\nFalling back to RAG-only retrieval mode.\n- Vector search + keyword search (BM25)\n- Semantic caching enabled\n- Online retrieval fallback (GitHub, StackOverflow, ArXiv, Tavily)"
+
+        except Exception as e:
+            logger.error(f"Failed to disable MAF: {e}")
+            return f"ERROR: Error disabling MAF: {e}"
+
+    async def _test_connection(self) -> str:
+        """Test MAF connector health.
+
+        Returns:
+            Health check result
+        """
+        try:
+            health = await self.maf.health_check()
+
+            result = [
+                "## MAF Connector Health Check",
+                "",
+                f"**Overall Status**: {'HEALTHY' if health['status'] == 'healthy' else 'UNAVAILABLE'}",
+                f"**MAF Type**: {health.get('maf_type', 'embedded')}",
+                f"**Location**: {health.get('maf_location', 'src/agents/maf/')}",
+                f"**Version**: {health.get('maf_version', '1.2.2')}",
+                f"**Available Agents**: {len(health.get('available_agents', []))}",
+            ]
+
+            if health.get('available_agents'):
+                result.append("")
+                result.append("**Agents Ready**:")
+                for agent in health['available_agents']:
+                    result.append(f"  - {agent}")
+
+            return "\n".join(result)
+
+        except Exception as e:
+            logger.error(f"Health check failed: {e}")
+            return f"ERROR: Health check failed: {e}"
+
+    def _list_agents(self) -> str:
+        """List available MAF agents.
+
+        Returns:
+            Formatted agent list
+        """
+        agents = self.maf.get_available_agents()
+
+        if not agents:
+            return "ERROR: No MAF agents available. Embedded MAF framework may not be initialized."
+
+        descriptions = {
+            'debugger': 'Error analysis and troubleshooting',
+            'developer': 'Code implementation and generation',
+            'reviewer': 'Code quality and security analysis',
+            'tester': 'Test creation and validation',
+            'architect': 'System design and query planning',
+            'documenter': 'Documentation generation',
+            'optimizer': 'Performance optimization'
+        }
+
+        result = ["## Available MAF Agents", "", "All 7 agents embedded in plugin:", ""]
+
+        for agent in agents:
+            desc = descriptions.get(agent, 'Agent')
+            result.append(f"- **{agent.capitalize()}**: {desc}")
+
+        result.append("")
+        result.append("**Execution Strategy**: Parallel (all agents run simultaneously)")
+        result.append("**Timeout per Agent**: 30 seconds")
+        result.append("**Max Parallel Agents**: 3 (for resource management)")
+
+        return "\n".join(result)
+
+    def _set_mode(self, mode: str) -> str:
+        """Set MAF execution mode.
+
+        Args:
+            mode: Execution mode (PARALLEL or SEQUENTIAL)
+
+        Returns:
+            Confirmation message
+        """
+        try:
+            mode = mode.upper()
+            if mode not in ["PARALLEL", "SEQUENTIAL"]:
+                return f"ERROR: Invalid mode '{mode}'. Use: PARALLEL or SEQUENTIAL"
+
+            config = self._load_config()
+            config["maf"]["mode"] = mode.lower()
+            self._save_config(config)
+
+            if mode == "PARALLEL":
+                return "SUCCESS: **Execution mode set to PARALLEL**\n\nRAG and MAF agents will execute simultaneously for maximum coverage and comprehensiveness."
+            else:
+                return "SUCCESS: **Execution mode set to SEQUENTIAL**\n\nRAG executes first, then MAF agents run on results. Slower but more resource-efficient."
+
+        except Exception as e:
+            logger.error(f"Failed to set mode: {e}")
+            return f"ERROR: Error setting mode: {e}"
+
+    def _load_config(self) -> dict:
+        """Load RAG settings configuration.
+
+        Returns:
+            Configuration dictionary
+        """
+        if not self.config_file.exists():
+            raise FileNotFoundError(f"Config file not found: {self.config_file}")
+
+        with open(self.config_file, 'r') as f:
+            return json.load(f)
+
+    def _save_config(self, config: dict) -> None:
+        """Save RAG settings configuration.
+
+        Args:
+            config: Configuration dictionary
+        """
+        self.config_file.parent.mkdir(parents=True, exist_ok=True)
+        with open(self.config_file, 'w') as f:
+            json.dump(config, f, indent=2)
+
+        logger.info("MAF configuration updated", config_file=str(self.config_file))
+
+def main():
+    """Main entry point for /rag-maf-config command."""
+    option = sys.argv[1] if len(sys.argv) > 1 else None
+
+    command = MAFConfigCommand()
+    result = command.execute(option)
+
+    print(result)
+
+if __name__ == "__main__":
+    main()
--- a/src/rag_cli_plugin/commands/rag_project_indexer.py
+++ b/src/rag_cli_plugin/commands/rag_project_indexer.py
@@ -0,0 +1,641 @@
+#!/usr/bin/env python3
+"""RAG Project Indexer - Automatically index project-relevant documentation.
+
+This script analyzes a project to detect languages, frameworks, and dependencies,
+then searches for and indexes relevant documentation.
+"""
+
+import sys
+import json
+import time
+import re
+import yaml
+from pathlib import Path
+from typing import Dict, List, Any, Optional
+from dataclasses import dataclass, asdict
+from collections import defaultdict
+
+from rag_cli.core.config import get_config
+from rag_cli.core.online_retriever import OnlineRetriever
+from rag_cli.core.document_processor import get_document_processor
+from rag_cli.core.vector_store import get_vector_store
+from rag_cli.core.embeddings import get_embedding_generator
+from rag_cli.core.output import warning, error
+from rag_cli.core.web_scraper import DocumentationScraperFactory
+from rag_cli_plugin.services.logger import get_logger
+
+logger = get_logger(__name__)
+
+# Simple print-based output for better compatibility
+
+def print_header(text: str):
+    """Print header text."""
+    print(f"\n{'=' * 60}")
+    print(text)
+    print('=' * 60)
+
+@dataclass
+class DetectedTechnology:
+    """Detected technology in project."""
+    name: str
+    type: str  # 'language', 'framework', 'library', 'tool'
+    version: Optional[str] = None
+    confidence: float = 1.0
+    source_file: Optional[str] = None
+
+@dataclass
+class DocumentationSource:
+    """Documentation source to fetch."""
+    name: str
+    url: str
+    priority: int  # 1 = highest
+    doc_type: str  # 'official', 'tutorial', 'reference', 'examples'
+
+@dataclass
+class IndexingResult:
+    """Result of indexing operation."""
+    source: str
+    documents_added: int
+    success: bool
+    error: Optional[str] = None
+    duration_seconds: float = 0.0
+
+class ProjectAnalyzer:
+    """Analyzes project to detect technologies."""
+
+    def __init__(self, project_path: Path):
+        """Initialize project analyzer.
+
+        Args:
+            project_path: Path to project root
+        """
+        self.project_path = project_path
+        self.detected_tech: List[DetectedTechnology] = []
+
+    def analyze(self) -> List[DetectedTechnology]:
+        """Analyze project and detect technologies.
+
+        Returns:
+            List of detected technologies
+        """
+        print("Analyzing project structure...")
+
+        # Detect from package files
+        self._analyze_python()
+        self._analyze_javascript()
+        self._analyze_typescript()
+        self._analyze_rust()
+        self._analyze_go()
+        self._analyze_java()
+
+        # Detect from file extensions
+        self._analyze_file_extensions()
+
+        # Detect frameworks from code patterns
+        self._detect_frameworks()
+
+        print(f"[OK] Detected {len(self.detected_tech)} technologies")
+
+        return self.detected_tech
+
+    def _analyze_python(self):
+        """Analyze Python project files."""
+        # requirements.txt
+        req_file = self.project_path / "requirements.txt"
+        if req_file.exists():
+            self.detected_tech.append(DetectedTechnology(
+                name="Python",
+                type="language",
+                confidence=1.0,
+                source_file="requirements.txt"
+            ))
+
+            with open(req_file, 'r') as f:
+                for line in f:
+                    line = line.strip()
+                    if line and not line.startswith('#'):
+                        # Parse package name
+                        match = re.match(r'^([a-zA-Z0-9\-_]+)', line)
+                        if match:
+                            package = match.group(1).lower()
+
+                            # Detect major frameworks
+                            framework_map = {
+                                'django': ('Django', 'framework'),
+                                'flask': ('Flask', 'framework'),
+                                'fastapi': ('FastAPI', 'framework'),
+                                'anthropic': ('Anthropic SDK', 'library'),
+                                'langchain': ('LangChain', 'framework'),
+                                'faiss': ('FAISS', 'library'),
+                                'numpy': ('NumPy', 'library'),
+                                'pandas': ('Pandas', 'library'),
+                                'pytorch': ('PyTorch', 'framework'),
+                                'tensorflow': ('TensorFlow', 'framework'),
+                            }
+
+                            if package in framework_map:
+                                name, tech_type = framework_map[package]
+                                self.detected_tech.append(DetectedTechnology(
+                                    name=name,
+                                    type=tech_type,
+                                    source_file="requirements.txt"
+                                ))
+
+        # pyproject.toml
+        pyproject = self.project_path / "pyproject.toml"
+        if pyproject.exists():
+            self.detected_tech.append(DetectedTechnology(
+                name="Python",
+                type="language",
+                confidence=1.0,
+                source_file="pyproject.toml"
+            ))
+
+    def _analyze_javascript(self):
+        """Analyze JavaScript project files."""
+        package_json = self.project_path / "package.json"
+        if package_json.exists():
+            self.detected_tech.append(DetectedTechnology(
+                name="JavaScript",
+                type="language",
+                confidence=1.0,
+                source_file="package.json"
+            ))
+
+            try:
+                with open(package_json, 'r') as f:
+                    data = json.load(f)
+
+                # Check dependencies
+                deps = {**data.get('dependencies', {}), **data.get('devDependencies', {})}
+
+                framework_map = {
+                    'react': ('React', 'framework'),
+                    'vue': ('Vue.js', 'framework'),
+                    'angular': ('Angular', 'framework'),
+                    'express': ('Express', 'framework'),
+                    'next': ('Next.js', 'framework'),
+                    'svelte': ('Svelte', 'framework'),
+                }
+
+                for package, version in deps.items():
+                    package_lower = package.lower()
+                    if package_lower in framework_map:
+                        name, tech_type = framework_map[package_lower]
+                        self.detected_tech.append(DetectedTechnology(
+                            name=name,
+                            type=tech_type,
+                            version=version,
+                            source_file="package.json"
+                        ))
+
+            except Exception as e:
+                logger.warning(f"Failed to parse package.json: {e}")
+
+    def _analyze_typescript(self):
+        """Analyze TypeScript project files."""
+        tsconfig = self.project_path / "tsconfig.json"
+        if tsconfig.exists():
+            self.detected_tech.append(DetectedTechnology(
+                name="TypeScript",
+                type="language",
+                confidence=1.0,
+                source_file="tsconfig.json"
+            ))
+
+    def _analyze_rust(self):
+        """Analyze Rust project files."""
+        cargo_toml = self.project_path / "Cargo.toml"
+        if cargo_toml.exists():
+            self.detected_tech.append(DetectedTechnology(
+                name="Rust",
+                type="language",
+                confidence=1.0,
+                source_file="Cargo.toml"
+            ))
+
+    def _analyze_go(self):
+        """Analyze Go project files."""
+        go_mod = self.project_path / "go.mod"
+        if go_mod.exists():
+            self.detected_tech.append(DetectedTechnology(
+                name="Go",
+                type="language",
+                confidence=1.0,
+                source_file="go.mod"
+            ))
+
+    def _analyze_java(self):
+        """Analyze Java project files."""
+        pom_xml = self.project_path / "pom.xml"
+        build_gradle = self.project_path / "build.gradle"
+
+        if pom_xml.exists():
+            self.detected_tech.append(DetectedTechnology(
+                name="Java",
+                type="language",
+                confidence=1.0,
+                source_file="pom.xml"
+            ))
+        elif build_gradle.exists():
+            self.detected_tech.append(DetectedTechnology(
+                name="Java",
+                type="language",
+                confidence=1.0,
+                source_file="build.gradle"
+            ))
+
+    def _analyze_file_extensions(self):
+        """Analyze file extensions to detect languages."""
+        extension_map = {
+            '.py': 'Python',
+            '.js': 'JavaScript',
+            '.ts': 'TypeScript',
+            '.rs': 'Rust',
+            '.go': 'Go',
+            '.java': 'Java',
+            '.cpp': 'C++',
+            '.c': 'C',
+            '.rb': 'Ruby',
+            '.php': 'PHP',
+        }
+
+        extension_counts = defaultdict(int)
+
+        # Sample files from project
+        for ext, lang in extension_map.items():
+            count = len(list(self.project_path.rglob(f'*{ext}')))
+            if count > 0:
+                extension_counts[lang] = count
+
+        # Add languages with significant file counts
+        for lang, count in extension_counts.items():
+            if count >= 3:  # At least 3 files
+                # Check if not already detected
+                if not any(t.name == lang for t in self.detected_tech):
+                    self.detected_tech.append(DetectedTechnology(
+                        name=lang,
+                        type="language",
+                        confidence=0.8,
+                        source_file=f"file_analysis ({count} files)"
+                    ))
+
+    def _detect_frameworks(self):
+        """Detect frameworks from code patterns and directory structure."""
+        # Django detection
+        if (self.project_path / "manage.py").exists():
+            if not any(t.name == "Django" for t in self.detected_tech):
+                self.detected_tech.append(DetectedTechnology(
+                    name="Django",
+                    type="framework",
+                    confidence=1.0,
+                    source_file="manage.py"
+                ))
+
+        # Flask detection
+        app_files = list(self.project_path.rglob("app.py"))
+        if app_files:
+            # Check if Flask is imported
+            for app_file in app_files[:3]:  # Check first 3
+                try:
+                    content = app_file.read_text()
+                    if 'from flask import' in content or 'import flask' in content:
+                        if not any(t.name == "Flask" for t in self.detected_tech):
+                            self.detected_tech.append(DetectedTechnology(
+                                name="Flask",
+                                type="framework",
+                                confidence=0.9,
+                                source_file=str(app_file.relative_to(self.project_path))
+                            ))
+                        break
+                except Exception as e:
+                    logger.debug(f"Could not parse {app_file.name} for Flask detection: {e}")
+
+class DocumentationFetcher:
+    """Fetches documentation for detected technologies."""
+
+    def __init__(self):
+        """Initialize fetcher and load documentation sources from config."""
+        self.doc_sources = self._load_documentation_sources()
+
+    def _load_documentation_sources(self) -> Dict[str, List[tuple]]:
+        """Load documentation sources from YAML configuration file.
+
+        Returns:
+            Dictionary mapping technology names to lists of (url, priority, doc_type) tuples
+        """
+        # Get project root (3 levels up from this file)
+        project_root = Path(__file__).resolve().parents[3]
+        config_file = project_root / "config" / "documentation_sources.yaml"
+
+        if config_file.exists():
+            try:
+                with open(config_file, 'r') as f:
+                    config = yaml.safe_load(f)
+                    doc_sources_dict = config.get('documentation_sources', {})
+
+                    # Convert YAML structure to internal format
+                    result = {}
+                    for tech_name, sources in doc_sources_dict.items():
+                        result[tech_name] = [
+                            (source['url'], source['priority'], source['doc_type'])
+                            for source in sources
+                            if source.get('enabled', True)
+                        ]
+
+                    logger.info(f"Loaded documentation sources for {len(result)} technologies from {config_file}")
+                    return result
+
+            except Exception as e:
+                logger.warning(f"Failed to load documentation sources from {config_file}: {e}")
+                # Fall through to defaults
+
+        # Fallback to default sources if config missing
+        logger.info("Using default documentation sources")
+        return self._get_default_sources()
+
+    def _get_default_sources(self) -> Dict[str, List[tuple]]:
+        """Get default documentation sources as fallback.
+
+        Returns:
+            Dictionary mapping technology names to lists of (url, priority, doc_type) tuples
+        """
+        return {
+            "Python": [("https://docs.python.org/3/", 1, "official")],
+            "JavaScript": [("https://developer.mozilla.org/en-US/docs/Web/JavaScript", 1, "official")],
+            "TypeScript": [("https://www.typescriptlang.org/docs/", 1, "official")],
+            "React": [("https://react.dev/", 1, "official")],
+            "Django": [("https://docs.djangoproject.com/", 1, "official")],
+            "Flask": [("https://flask.palletsprojects.com/", 1, "official")],
+        }
+
+    def get_sources(self, technologies: List[DetectedTechnology]) -> List[DocumentationSource]:
+        """Get documentation sources for detected technologies.
+
+        Args:
+            technologies: List of detected technologies
+
+        Returns:
+            List of documentation sources to fetch
+        """
+        sources = []
+
+        for tech in technologies:
+            if tech.name in self.doc_sources:
+                for url, priority, doc_type in self.doc_sources[tech.name]:
+                    sources.append(DocumentationSource(
+                        name=tech.name,
+                        url=url,
+                        priority=priority,
+                        doc_type=doc_type
+                    ))
+
+        # Sort by priority
+        sources.sort(key=lambda x: x.priority)
+
+        return sources
+
+class ProjectIndexer:
+    """Orchestrates project documentation indexing."""
+
+    def __init__(self):
+        """Initialize project indexer."""
+        self.config = get_config()
+        self.document_processor = get_document_processor()
+        self.vector_store = get_vector_store()
+        self.embedding_generator = get_embedding_generator()
+        self.online_retriever = OnlineRetriever(self.config)
+
+    def index_project(self, project_path: Path) -> Dict[str, Any]:
+        """Index documentation for a project.
+
+        Args:
+            project_path: Path to project root
+
+        Returns:
+            Summary of indexing results
+        """
+        start_time = time.time()
+
+        print(f"\nStarting project indexing for: {project_path}\n")
+
+        # Step 1: Analyze project
+        print_header("Step 1: Analyzing Project")
+        analyzer = ProjectAnalyzer(project_path)
+        technologies = analyzer.analyze()
+
+        if not technologies:
+            warning("No technologies detected in project")
+            return {
+                "success": False,
+                "error": "No technologies detected",
+                "duration_seconds": time.time() - start_time
+            }
+
+        # Display detected technologies
+        print("\nDetected Technologies:")
+        for tech in technologies:
+            confidence_str = f"({tech.confidence:.0%})" if tech.confidence < 1.0 else ""
+            print(f"  - {tech.name} [{tech.type}] {confidence_str}")
+            if tech.source_file:
+                print(f"    from {tech.source_file}")
+
+        # Step 2: Get documentation sources
+        print()
+        print_header("Step 2: Finding Documentation Sources")
+        fetcher = DocumentationFetcher()
+        sources = fetcher.get_sources(technologies)
+
+        if not sources:
+            warning("No documentation sources found for detected technologies")
+            return {
+                "success": False,
+                "error": "No documentation sources available",
+                "technologies": [asdict(t) for t in technologies],
+                "duration_seconds": time.time() - start_time
+            }
+
+        print(f"\nFound {len(sources)} documentation source(s):")
+        for source in sources:
+            print(f"  - {source.name}: {source.url} [{source.doc_type}]")
+
+        # Step 3: Fetch and index documentation
+        print()
+        print_header("Step 3: Fetching and Indexing Documentation")
+        print("This may take several minutes...\n")
+
+        results = []
+        total_docs = 0
+
+        for source in sources:
+            try:
+                result = self._index_source(source)
+                results.append(result)
+                if result.success:
+                    total_docs += result.documents_added
+                    print(f"  [OK] {source.name}: {result.documents_added} documents ({result.duration_seconds:.1f}s)")
+                else:
+                    print(f"  [FAILED] {source.name}: {result.error}")
+            except Exception as e:
+                logger.error(f"Failed to index {source.name}: {e}")
+                results.append(IndexingResult(
+                    source=source.name,
+                    documents_added=0,
+                    success=False,
+                    error=str(e)
+                ))
+
+        # Summary
+        elapsed = time.time() - start_time
+        print()
+        print_header("Indexing Complete!")
+        print("\nSummary:")
+        print(f"  - Technologies detected: {len(technologies)}")
+        print(f"  - Documentation sources: {len(sources)}")
+        print(f"  - Total documents indexed: {total_docs}")
+        print(f"  - Total time: {elapsed:.1f}s")
+
+        return {
+            "success": True,
+            "technologies": [asdict(t) for t in technologies],
+            "sources": [asdict(s) for s in sources],
+            "results": [asdict(r) for r in results],
+            "total_documents": total_docs,
+            "duration_seconds": elapsed
+        }
+
+    def _index_source(self, source: DocumentationSource) -> IndexingResult:
+        """Index a single documentation source.
+
+        Args:
+            source: Documentation source
+
+        Returns:
+            Indexing result
+        """
+        start_time = time.time()
+
+        try:
+            logger.info(f"Indexing documentation from: {source.url}")
+
+            # Step 1: Scrape documentation
+            scraper = DocumentationScraperFactory.create_scraper(source.name, source.url)
+            scraped_docs = scraper.scrape_documentation(
+                source.url,
+                source.name,
+                source.doc_type
+            )
+
+            if not scraped_docs:
+                return IndexingResult(
+                    source=source.name,
+                    documents_added=0,
+                    success=False,
+                    error="No documents scraped from source",
+                    duration_seconds=time.time() - start_time
+                )
+
+            logger.info(f"Scraped {len(scraped_docs)} documents from {source.name}")
+
+            # Step 2: Process and chunk documents
+            all_chunks = []
+            for doc in scraped_docs:
+                # Create document text with title and content
+                full_text = f"# {doc.title}\n\n{doc.content}"
+
+                # Process document into chunks
+                chunks = self.document_processor.process_text(
+                    text=full_text,
+                    metadata={
+                        'source': doc.metadata['source'],
+                        'url': doc.url,
+                        'title': doc.title,
+                        'doc_type': doc.doc_type,
+                        'scraped_at': doc.metadata['scraped_at']
+                    }
+                )
+
+                all_chunks.extend(chunks)
+
+            if not all_chunks:
+                return IndexingResult(
+                    source=source.name,
+                    documents_added=0,
+                    success=False,
+                    error="No chunks generated from scraped content",
+                    duration_seconds=time.time() - start_time
+                )
+
+            logger.info(f"Generated {len(all_chunks)} chunks from {source.name}")
+
+            # Step 3: Generate embeddings and add to vector store
+            texts = [chunk.content for chunk in all_chunks]
+            metadatas = [chunk.metadata for chunk in all_chunks]
+
+            embeddings = self.embedding_generator.encode(texts, show_progress=False)
+
+            # Add to vector store
+            self.vector_store.add(embeddings, metadatas)
+
+            # Save vector store
+            self.vector_store.save()
+
+            logger.info(f"Successfully indexed {len(all_chunks)} chunks from {source.name}")
+
+            return IndexingResult(
+                source=source.name,
+                documents_added=len(all_chunks),
+                success=True,
+                duration_seconds=time.time() - start_time
+            )
+
+        except Exception as e:
+            logger.error(f"Failed to index {source.name}: {e}", exc_info=True)
+            return IndexingResult(
+                source=source.name,
+                documents_added=0,
+                success=False,
+                error=str(e),
+                duration_seconds=time.time() - start_time
+            )
+
+def main():
+    """Main entry point for project indexing."""
+    try:
+        # Get current working directory as project path
+        project_path = Path.cwd()
+
+        print("=" * 60)
+        print("RAG Project Documentation Indexer")
+        print("=" * 60)
+        print()
+
+        # Create indexer and run
+        indexer = ProjectIndexer()
+        result = indexer.index_project(project_path)
+
+        # Save result
+        result_file = project_path / "data" / "project_indexing_result.json"
+        result_file.parent.mkdir(parents=True, exist_ok=True)
+
+        with open(result_file, 'w') as f:
+            json.dump(result, f, indent=2)
+
+        print()
+        print(f"Results saved to: {result_file}")
+        print()
+        print("Next Steps:")
+        print("  1. The system has analyzed your project")
+        print("  2. Relevant documentation sources have been identified")
+        print("  3. Use online fallback queries for real-time documentation access")
+        print("  4. For offline indexing, implement full doc crawler (see TODO)")
+
+        return 0 if result.get("success") else 1
+
+    except Exception as e:
+        logger.error(f"Project indexing failed: {e}", exc_info=True)
+        error(f"Indexing failed: {e}")
+        return 1
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/src/rag_cli_plugin/commands/search.md
+++ b/src/rag_cli_plugin/commands/search.md
@@ -0,0 +1,23 @@
+# RAG Document Search
+
+Execute the retrieval script and return results only.
+
+**IMPORTANT**: Run the command, show output, then STOP. Do NOT provide commentary, explanations, or additional research.
+
+## Execution
+
+Run: `python scripts/retrieve.py --query "[USER_QUERY]"`
+
+Display the output exactly as returned. Do not add any additional text before or after the output.
+
+**DO NOT**:
+- Explain how the command works
+- Provide additional context or suggestions
+- Research related topics
+- Offer troubleshooting advice unless the command fails
+- Describe what you're about to do
+
+**ONLY**:
+- Execute the command
+- Show the raw output
+- Stop
--- a/src/rag_cli_plugin/commands/update-rag.md
+++ b/src/rag_cli_plugin/commands/update-rag.md
@@ -0,0 +1,22 @@
+# Update RAG Plugin
+
+Synchronize RAG-CLI plugin files.
+
+**IMPORTANT**: Execute sync and show summary only. No additional commentary.
+
+## Task
+
+Run: `python scripts/sync_plugin.py [ARGS]`
+
+Show the sync summary output only.
+
+**DO NOT**:
+- Explain what will be synced
+- Describe the sync process
+- Provide examples or troubleshooting
+- Add commentary before or after output
+
+**ONLY**:
+- Execute the command with any user-provided args
+- Display the output
+- Stop
--- a/src/rag_cli_plugin/commands/update_rag.py
+++ b/src/rag_cli_plugin/commands/update_rag.py
@@ -0,0 +1,174 @@
+"""
+/update-rag slash command implementation.
+
+Allows users to update RAG-CLI to the latest version from GitHub.
+
+IMPORTANT: NO EMOJIS - All output must be professional text only.
+"""
+
+import subprocess
+import sys
+import requests
+from pathlib import Path
+from typing import Dict, Any, Optional
+
+
+def get_plugin_root() -> Path:
+    """Resolve plugin root directory."""
+    import os
+
+    plugin_root = os.environ.get('CLAUDE_PLUGIN_ROOT')
+    if plugin_root:
+        return Path(plugin_root)
+    return Path(__file__).parent.parent.parent.parent
+
+
+def check_current_version() -> str:
+    """Get currently installed version."""
+    try:
+        import rag_cli_plugin
+        return getattr(rag_cli_plugin, '__version__', 'unknown')
+    except ImportError:
+        return 'unknown'
+
+
+def fetch_latest_version() -> Optional[str]:
+    """Check GitHub for latest release version."""
+    try:
+        response = requests.get(
+            "https://api.github.com/repos/ItMeDiaTech/rag-cli/releases/latest",
+            timeout=5
+        )
+        if response.ok:
+            data = response.json()
+            version = data['tag_name'].lstrip('v')
+            return version
+        return None
+    except Exception as e:
+        print(f"Warning: Could not fetch latest version: {e}")
+        return None
+
+
+def update_from_git() -> bool:
+    """Pull latest from GitHub and reinstall."""
+    plugin_root = get_plugin_root()
+
+    print("\nUpdating RAG-CLI from GitHub...")
+    print("-" * 60)
+
+    try:
+        # Check if this is a git repository
+        git_dir = plugin_root / ".git"
+        if not git_dir.exists():
+            print("Error: Plugin is not a git repository")
+            print("Please reinstall from Claude Code marketplace or clone from GitHub")
+            return False
+
+        # Pull latest changes
+        print("Pulling latest changes...")
+
+        # Check which remote exists (origin or github)
+        remote_check = subprocess.run(
+            ["git", "remote"],
+            cwd=plugin_root,
+            capture_output=True,
+            text=True
+        )
+        remote = "github" if "github" in remote_check.stdout else "origin"
+
+        result = subprocess.run(
+            ["git", "pull", remote, "master"],
+            cwd=plugin_root,
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        print(result.stdout)
+
+        # Reinstall package with updated code
+        print("\nReinstalling package...")
+        subprocess.check_call([
+            sys.executable, "-m", "pip", "install",
+            "-e", str(plugin_root),
+            "--upgrade", "--quiet"
+        ])
+
+        print("\nUpdate complete!")
+        print("-" * 60)
+        return True
+
+    except subprocess.CalledProcessError as e:
+        print(f"Error during update: {e}")
+        if e.stderr:
+            print(f"Details: {e.stderr}")
+        return False
+    except Exception as e:
+        print(f"Unexpected error: {e}")
+        return False
+
+
+def handle_update_command(args: Dict[str, Any]) -> Dict[str, Any]:
+    """Main handler for /update-rag command."""
+
+    print("\n" + "=" * 60)
+    print("RAG-CLI Update")
+    print("=" * 60)
+
+    current = check_current_version()
+    latest = fetch_latest_version()
+
+    print(f"\nCurrent version: {current}")
+    if latest:
+        print(f"Latest version:  {latest}")
+    else:
+        print("Latest version:  Unable to determine (continuing anyway)")
+
+    if latest and current == latest:
+        print("\nYou are already on the latest version!")
+        print("Running update anyway to ensure all files are current...")
+
+    # Perform update
+    success = update_from_git()
+
+    if success:
+        # Get new version after update
+        import importlib
+        import rag_cli_plugin
+        importlib.reload(rag_cli_plugin)
+        new_version = getattr(rag_cli_plugin, '__version__', 'unknown')
+
+        print("\n" + "=" * 60)
+        if current != new_version:
+            print(f"Successfully updated from {current} to {new_version}")
+        else:
+            print("Update completed - all files are current")
+        print("=" * 60)
+
+        return {
+            "success": True,
+            "message": f"Updated to version {new_version}",
+            "old_version": current,
+            "new_version": new_version
+        }
+    else:
+        print("\n" + "=" * 60)
+        print("Update failed - see errors above")
+        print("=" * 60)
+
+        return {
+            "success": False,
+            "message": "Update failed",
+            "old_version": current,
+            "new_version": None
+        }
+
+
+# Entry point for slash command
+def main():
+    """CLI entry point for testing."""
+    result = handle_update_command({})
+    return 0 if result["success"] else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/src/rag_cli_plugin/skills/init.py
+++ b/src/rag_cli_plugin/skills/init.py
@@ -0,0 +1,6 @@
+"""Skills module for RAG-CLI plugin.
+
+This module contains agent skills that can be executed by Claude Code.
+"""
+
+__version__ = "2.0.0"
--- a/src/rag_cli_plugin/skills/rag-retrieval/SKILL.md
+++ b/src/rag_cli_plugin/skills/rag-retrieval/SKILL.md
@@ -0,0 +1,84 @@
+# RAG Retrieval Skill
+
+Query your local document knowledge base using semantic search and get AI-powered answers.
+
+## Overview
+
+This skill enables RAG (Retrieval-Augmented Generation) queries against your locally indexed documents. It uses semantic search to find relevant documents and generates answers using Claude Haiku.
+
+## Usage
+
+```
+/skill rag-retrieval "How to configure the API?"
+```
+
+## Features
+
+- **Semantic Search**: Uses vector similarity to find relevant documents
+- **Hybrid Retrieval**: Combines vector search with keyword matching for better accuracy
+- **Context-Aware Answers**: Uses claude-haiku-4-5-20251001 to generate responses
+- **Citation Support**: Shows sources for generated answers
+- **Performance Monitoring**: Tracks query latency and accuracy
+
+## Arguments
+
+- `query` (required): Your question or search query
+- `--top-k` (optional): Number of documents to retrieve (default: 5)
+- `--threshold` (optional): Minimum similarity score (default: 0.7)
+- `--mode` (optional): Search mode - "hybrid", "vector", or "keyword" (default: "hybrid")
+
+## Examples
+
+### Basic Query
+```
+/skill rag-retrieval "What is the authentication process?"
+```
+
+### Retrieve More Context
+```
+/skill rag-retrieval "How to handle errors?" --top-k 10
+```
+
+### Vector-Only Search
+```
+/skill rag-retrieval "API rate limits" --mode vector
+```
+
+## Configuration
+
+The skill uses the following configuration from `config/default.yaml`:
+
+- `retrieval.top_k`: Default number of documents to retrieve
+- `retrieval.hybrid_ratio`: Balance between vector and keyword search (0.7 = 70% vector)
+- `claude.model`: LLM model for response generation
+- `claude.max_tokens`: Maximum response length
+
+## Performance
+
+Typical latencies:
+- Vector search: <100ms
+- End-to-end response: <5 seconds
+- Indexing: ~0.5s per 100 documents
+
+## Requirements
+
+- Indexed documents in `data/vectors/`
+- Valid Anthropic API key in environment
+- At least 2GB RAM for vector operations
+
+## Troubleshooting
+
+### No Results Found
+- Ensure documents are indexed: `python scripts/index.py --input data/documents`
+- Lower the similarity threshold: `--threshold 0.5`
+- Try keyword mode if vector search fails
+
+### Slow Responses
+- Reduce top_k value for faster retrieval
+- Check if vector index is optimized for your document count
+- Monitor memory usage with `python -m src.monitoring.tcp_server`
+
+### API Errors
+- Verify ANTHROPIC_API_KEY environment variable
+- Check API rate limits and quota
+- Review logs in `logs/rag_cli.log`
--- a/src/rag_cli_plugin/skills/rag-retrieval/init.py
+++ b/src/rag_cli_plugin/skills/rag-retrieval/init.py
@@ -0,0 +1,4 @@
+"""RAG Retrieval Skill for RAG-CLI Plugin.
+
+Provides RAG document retrieval skill for Claude Code agent execution.
+"""
--- a/src/rag_cli_plugin/skills/rag-retrieval/retrieve.py
+++ b/src/rag_cli_plugin/skills/rag-retrieval/retrieve.py
@@ -0,0 +1,277 @@
+#!/usr/bin/env python3
+"""RAG Retrieval Skill for Claude Code.
+
+This skill provides semantic search capabilities over locally indexed documents
+and generates AI-powered answers using Claude Haiku.
+"""
+
+import sys
+import argparse
+import time
+from typing import Dict, Any
+from pathlib import Path
+
+from rag_cli.core.config import get_config
+from rag_cli.core.vector_store import get_vector_store
+from rag_cli.core.embeddings import get_embedding_model
+from rag_cli.core.retrieval_pipeline import HybridRetriever
+from rag_cli.core.claude_integration import ClaudeAssistant
+from rag_cli.core.claude_code_adapter import get_adapter, is_claude_code_mode
+from rag_cli_plugin.services.logger import get_logger
+from rag_cli_plugin.services.tcp_server import metrics_collector
+
+logger = get_logger(__name__)
+
+def parse_arguments():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="RAG Retrieval Skill - Query your document knowledge base"
+    )
+
+    parser.add_argument(
+        "query",
+        type=str,
+        help="Your question or search query"
+    )
+
+    parser.add_argument(
+        "--top-k",
+        type=int,
+        default=5,
+        help="Number of documents to retrieve (default: 5)"
+    )
+
+    parser.add_argument(
+        "--threshold",
+        type=float,
+        default=0.7,
+        help="Minimum similarity score (default: 0.7)"
+    )
+
+    parser.add_argument(
+        "--mode",
+        type=str,
+        choices=["hybrid", "vector", "keyword"],
+        default="hybrid",
+        help="Search mode (default: hybrid)"
+    )
+
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Show detailed output"
+    )
+
+    parser.add_argument(
+        "--no-llm",
+        action="store_true",
+        help="Skip LLM generation, only show retrieved documents"
+    )
+
+    return parser.parse_args()
+
+def format_output(result: Dict[str, Any], verbose: bool = False) -> str:
+    """Format the result for CLI output.
+
+    Args:
+        result: Result dictionary from retrieval
+        verbose: Whether to show detailed output
+
+    Returns:
+        Formatted output string
+    """
+    output = []
+
+    # Add answer if available
+    if "answer" in result:
+        output.append("## Answer\n")
+        output.append(result["answer"])
+        output.append("\n")
+
+    # Add sources
+    if "sources" in result and result["sources"]:
+        output.append("\n## Sources\n")
+        for i, doc in enumerate(result["sources"], 1):
+            output.append(f"\n### [{i}] {doc.source}")
+            if verbose:
+                output.append(f"**Score**: {doc.score:.3f}")
+                output.append(f"**Content**: {doc.text[:200]}...")
+            else:
+                output.append(f"*Relevance: {doc.score:.1%}*")
+
+    # Add metrics if verbose
+    if verbose and "metrics" in result:
+        output.append("\n## Performance Metrics\n")
+        metrics = result["metrics"]
+        output.append(f"- Vector Search: {metrics.get('vector_search_ms', 0):.0f}ms")
+        output.append(f"- Reranking: {metrics.get('reranking_ms', 0):.0f}ms")
+        if "claude_api_ms" in metrics:
+            output.append(f"- Claude API: {metrics.get('claude_api_ms', 0):.0f}ms")
+        output.append(f"- Total: {metrics.get('total_ms', 0):.0f}ms")
+
+    return "\n".join(output)
+
+def perform_retrieval(
+    query: str,
+    top_k: int = 5,
+    threshold: float = 0.7,
+    mode: str = "hybrid",
+    use_llm: bool = True
+) -> Dict[str, Any]:
+    """Perform RAG retrieval and generation.
+
+    Args:
+        query: User query
+        top_k: Number of documents to retrieve
+        threshold: Minimum similarity threshold
+        mode: Search mode (hybrid, vector, keyword)
+        use_llm: Whether to use LLM for answer generation
+
+    Returns:
+        Result dictionary with answer and sources
+    """
+    start_time = time.time()
+    result = {
+        "query": query,
+        "sources": [],
+        "metrics": {}
+    }
+
+    try:
+        # Initialize components
+        logger.info(f"Processing query: {query}", mode=mode, top_k=top_k)
+
+        config = get_config()
+        vector_store = get_vector_store()
+        embedding_model = get_embedding_model()
+
+        # Create retriever
+        retriever = HybridRetriever(
+            vector_store=vector_store,
+            embedding_model=embedding_model,
+            config=config
+        )
+
+        # Perform retrieval
+        retrieval_start = time.time()
+
+        if mode == "vector":
+            documents = retriever.vector_search(query, top_k=top_k)
+        elif mode == "keyword":
+            documents = retriever.keyword_search(query, top_k=top_k)
+        else:  # hybrid
+            documents = retriever.search(query, top_k=top_k)
+
+        retrieval_time = (time.time() - retrieval_start) * 1000
+        result["metrics"]["retrieval_ms"] = retrieval_time
+
+        # Filter by threshold
+        filtered_docs = [
+            doc for doc in documents
+            if doc.score >= threshold
+        ]
+
+        result["sources"] = filtered_docs
+
+        # Record metrics
+        metrics_collector.record_query()
+        metrics_collector.record_latency("retrieval", retrieval_time)
+
+        if not filtered_docs:
+            logger.warning("No documents found above threshold",
+                           threshold=threshold,
+                           max_score=max([d.score for d in documents]) if documents else 0)
+            result["answer"] = "No relevant documents found for your query. Try lowering the threshold or using different keywords."
+            return result
+
+        # Generate answer based on mode
+        if use_llm:
+            # Check if we're in Claude Code mode
+            if is_claude_code_mode():
+                logger.info("Claude Code mode - formatting context for Claude")
+
+                # Use adapter to format response for Claude Code
+                adapter = get_adapter()
+                formatted_response = adapter.format_skill_response(filtered_docs, query)
+
+                result["answer"] = formatted_response.get("context", "")
+                result["mode"] = "claude_code"
+                result["message"] = formatted_response.get("message", "")
+
+                logger.info("Context formatted for Claude Code",
+                            docs_count=len(filtered_docs))
+            else:
+                # Standalone mode - use Claude API
+                claude_start = time.time()
+
+                assistant = ClaudeAssistant(config)
+                response = assistant.generate_response(query, filtered_docs)
+
+                claude_time = (time.time() - claude_start) * 1000
+                result["metrics"]["claude_api_ms"] = claude_time
+                result["answer"] = response["answer"]
+
+                metrics_collector.record_latency("claude_api", claude_time)
+
+                logger.info("Answer generated successfully",
+                            answer_length=len(response["answer"]),
+                            sources_used=len(filtered_docs))
+
+        # Calculate total time
+        total_time = (time.time() - start_time) * 1000
+        result["metrics"]["total_ms"] = total_time
+
+        metrics_collector.record_latency("end_to_end", total_time)
+
+        # Update component status
+        metrics_collector.update_component_status("vector_store", "operational")
+        metrics_collector.update_component_status("retriever", "operational")
+        if use_llm:
+            metrics_collector.update_component_status("claude", "operational")
+
+        return result
+
+    except Exception as e:
+        logger.error(f"Retrieval failed: {e}")
+        metrics_collector.record_error()
+
+        result["error"] = str(e)
+        result["answer"] = f"An error occurred during retrieval: {e}"
+
+        # Update component status
+        metrics_collector.update_component_status("retriever", "error")
+
+        return result
+
+def main():
+    """Main function for the RAG retrieval skill."""
+    args = parse_arguments()
+
+    # Check if vector store exists
+    # Get project root (4 levels up from this file)
+    project_root = Path(__file__).resolve().parents[4]
+    vector_store_path = project_root / "data" / "vectors" / "chroma_db"
+    if not vector_store_path.exists():
+        print("Error: No vector index found. Please index documents first:")
+        print("  rag-index ./data/documents --recursive")
+        sys.exit(1)
+
+    # Perform retrieval
+    result = perform_retrieval(
+        query=args.query,
+        top_k=args.top_k,
+        threshold=args.threshold,
+        mode=args.mode,
+        use_llm=not args.no_llm
+    )
+
+    # Format and print output
+    output = format_output(result, verbose=args.verbose)
+    print(output)
+
+    # Return error code if retrieval failed
+    if "error" in result:
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()