commit df4751ce28787a667c8a00f2b19b939941790be9 Author: Zhongwei Li Date: Sat Nov 29 18:03:17 2025 +0800 Initial commit diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..90c995c --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,15 @@ +{ + "name": "semantic-search", + "description": "Natural language semantic search for codebases and notes using odino CLI with BGE embeddings. Commands for indexing and searching with automatic directory traversal.", + "version": "1.0.0", + "author": { + "name": "Mae Capacite", + "email": "cadrianmae@users.noreply.github.com" + }, + "skills": [ + "./skills" + ], + "commands": [ + "./commands" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..6b62cb4 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# semantic-search + +Natural language semantic search for codebases and notes using odino CLI with BGE embeddings. Commands for indexing and searching with automatic directory traversal. diff --git a/commands/here.md b/commands/here.md new file mode 100644 index 0000000..d535bc8 --- /dev/null +++ b/commands/here.md @@ -0,0 +1,235 @@ +--- +description: Search from current directory upward to find indexed codebase +argument-hint: +allowed-tools: Bash, Read +disable-model-invocation: true +--- + +# here - Semantic search with directory traversal + +Search from current directory upward to find and search the nearest semantic index. Shows where the index was found for transparency. + +## Usage + +``` +/semq:here +``` + +**Arguments:** +- `query` - Natural language description of what to find (required) + +## What It Does + +1. Starts from current working directory +2. Traverses up the directory tree looking for `.odino/` +3. Shows where the index was found +4. Runs semantic search from that location +5. Displays results with scores and file paths + +**Difference from `/semq:search`:** +- `/semq:search` - Assumes you know where the index is +- `/semq:here` - Explicitly shows index location (useful from subdirectories) + +## Examples + +**Search from subdirectory:** +```bash +cd src/utils/ +/semq:here validation functions +``` + +**Find authentication from deep directory:** +```bash +cd src/routes/api/v1/ +/semq:here authentication logic +``` + +## Implementation + +```bash +# Helper function to find .odino directory with verbose output +find_odino_root_verbose() { + local dir="$PWD" + local depth=0 + + while [[ "$dir" != "/" ]]; do + if [[ -d "$dir/.odino" ]]; then + echo "FOUND:$dir" + return 0 + fi + + # Stop at git root as a boundary + if [[ -d "$dir/.git" ]] && [[ ! -d "$dir/.odino" ]]; then + echo "NOTFOUND:git-boundary:$dir" + return 1 + fi + + dir="$(dirname "$dir")" + depth=$((depth + 1)) + + # Safety limit + if [[ $depth -gt 20 ]]; then + echo "NOTFOUND:max-depth" + return 1 + fi + done + + echo "NOTFOUND:filesystem-root" + return 1 +} + +# Get query from arguments +QUERY="$*" + +if [[ -z "$QUERY" ]]; then + echo "Error: Query required" + echo "Usage: /semq:here " + exit 1 +fi + +# Show current location +echo "Searching from: $PWD" +echo "" + +# Find index with verbose output +RESULT=$(find_odino_root_verbose) +EXIT_CODE=$? + +if [[ $EXIT_CODE -eq 0 ]]; then + ODINO_ROOT="${RESULT#FOUND:}" + + # Show where index was found + if [[ "$ODINO_ROOT" == "$PWD" ]]; then + echo "โœ“ Index found in current directory" + else + # Calculate relative path for clarity + REL_PATH=$(realpath --relative-to="$PWD" "$ODINO_ROOT") + echo "โœ“ Index found at: $REL_PATH" + fi + echo " Location: $ODINO_ROOT" + echo "" + + # Run search + RESULTS=$(cd "$ODINO_ROOT" && odino query -q "$QUERY" 2>&1) + + if [[ $? -eq 0 ]]; then + echo "$RESULTS" + echo "" + echo "๐Ÿ’ก Tip: File paths are relative to: $ODINO_ROOT" + else + echo "Search failed:" + echo "$RESULTS" + fi +else + # Parse failure reason + REASON="${RESULT#NOTFOUND:}" + + echo "โœ— No semantic search index found" + echo "" + + case "$REASON" in + git-boundary:*) + GIT_ROOT="${REASON#git-boundary:}" + echo "Searched up to git repository root: $GIT_ROOT" + echo "The repository is not indexed." + ;; + filesystem-root) + echo "Searched all the way to filesystem root" + echo "No index found in any parent directory." + ;; + max-depth) + echo "Reached maximum search depth (20 levels)" + echo "Index might be higher up or doesn't exist." + ;; + esac + + echo "" + echo "To create an index, navigate to your project root and run:" + echo " cd " + echo " /semq:index" +fi +``` + +## Output Example + +**From subdirectory:** +``` +Searching from: /home/user/project/src/utils + +โœ“ Index found at: ../.. + Location: /home/user/project + +Score: 0.87 | Path: src/utils/validation.js +Score: 0.81 | Path: src/middleware/validate.js +Score: 0.74 | Path: src/schemas/user.js + +๐Ÿ’ก Tip: File paths are relative to: /home/user/project +``` + +**No index found:** +``` +Searching from: /home/user/project/src/utils + +โœ— No semantic search index found +Searched up to git repository root: /home/user/project +The repository is not indexed. + +To create an index, navigate to your project root and run: + cd + /semq:index +``` + +## When to Use + +Use `/semq:here` when: +- Working in a subdirectory +- Want to see where the index is located +- Unsure if directory is indexed +- Want explicit feedback about index location + +Use `/semq:search` when: +- Already know the directory is indexed +- Don't need index location info +- Want simpler output + +## Behavior + +**Traversal stops at:** +1. `.odino/` directory found (success) +2. `.git/` directory without `.odino/` (git repository boundary) +3. Filesystem root `/` (no more parents) +4. 20 levels up (safety limit) + +**Why stop at git root?** +- Projects are typically git repositories +- Prevents searching into parent projects +- Makes "not found" more meaningful + +## Related Commands + +- `/semq:search ` - Search without traversal info +- `/semq:status` - Check index status +- `/semq:index` - Create index + +## Tips + +1. **Use from subdirectories** - That's what this command is for +2. **Check the index location** - Helps understand project structure +3. **Git boundary** - Index should be at git root for best results +4. **Relative paths** - Results show paths relative to index location + +## Troubleshooting + +**"Searched up to git repository root"** +- The git repository is not indexed +- Solution: Run `/semq:index` from the git root + +**"Searched all the way to filesystem root"** +- No index found anywhere in parent directories +- Not in a git repository +- Solution: Create index with `/semq:index` + +**"Reached maximum search depth"** +- Very deep directory structure (>20 levels) +- Index might be higher up +- Solution: Navigate closer to project root and try again diff --git a/commands/index.md b/commands/index.md new file mode 100644 index 0000000..4115d9d --- /dev/null +++ b/commands/index.md @@ -0,0 +1,253 @@ +--- +description: Index directory for semantic search +argument-hint: [path] [--force] +allowed-tools: Bash +disable-model-invocation: true +--- + +# index - Create semantic search index + +Index a directory for semantic search using odino with BGE embeddings. + +## Usage + +``` +/semq:index [path] [--force] +``` + +**Arguments:** +- `path` - Directory to index (optional, defaults to current directory) +- `--force` - Force reindex even if index exists + +## What It Does + +1. Checks if directory already has an index +2. Runs `odino index` with BGE model (BAAI/bge-small-en-v1.5) +3. Creates `.odino/` directory with: + - `config.json` - Configuration settings + - `chroma_db/` - Vector database with embeddings +4. Shows progress and completion statistics + +## Examples + +**Index current directory:** +``` +/semq:index +``` + +**Index specific directory:** +``` +/semq:index ~/projects/myapp +``` + +**Force reindex:** +``` +/semq:index --force +``` + +## Implementation + +```bash +# Parse arguments +INDEX_PATH="." +FORCE_FLAG="" + +for arg in "$@"; do + case "$arg" in + --force) + FORCE_FLAG="--force" + ;; + *) + if [[ -d "$arg" ]]; then + INDEX_PATH="$arg" + else + echo "Warning: Directory not found: $arg" + fi + ;; + esac +done + +# Convert to absolute path +INDEX_PATH="$(cd "$INDEX_PATH" && pwd)" + +echo "Indexing directory: $INDEX_PATH" + +# Check if already indexed +if [[ -d "$INDEX_PATH/.odino" ]] && [[ -z "$FORCE_FLAG" ]]; then + echo "" + echo "โš ๏ธ Directory is already indexed" + echo "" + echo "To reindex, use: /semq:index --force" + echo "To check status: /semq:status" + exit 0 +fi + +# Create .odinoignore if it doesn't exist +if [[ ! -f "$INDEX_PATH/.odinoignore" ]]; then + cat > "$INDEX_PATH/.odinoignore" << 'EOF' +# Build artifacts +build/ +dist/ +*.pyc +__pycache__/ + +# Dependencies +node_modules/ +venv/ +.venv/ +.virtualenv/ + +# Config and secrets +.env +.env.local +*.secret +.git/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db +EOF + echo "Created .odinoignore file" +fi + +# Run indexing with BGE model +echo "" +echo "Indexing with BGE model (this may take a moment)..." +echo "" + +(cd "$INDEX_PATH" && odino index $FORCE_FLAG --model BAAI/bge-small-en-v1.5) + +if [[ $? -eq 0 ]]; then + echo "" + echo "โœ… Indexing complete!" + echo "" + echo "You can now search with:" + echo " /semq:search " +else + echo "" + echo "โŒ Indexing failed" + echo "" + echo "Troubleshooting:" + echo "- Ensure odino is installed: pipx install odino" + echo "- Check disk space" + echo "- Try again with --force flag" +fi +``` + +## Output Example + +``` +Indexing directory: /home/user/project +Created .odinoignore file + +Indexing with BGE model (this may take a moment)... + +Indexed 63 files +Generated 142 chunks (529.5 KB) +Model: BAAI/bge-small-en-v1.5 + +โœ… Indexing complete! + +You can now search with: + /semq:search +``` + +## When to Use + +Use `/semq:index` when: +- Setting up semantic search for a new project +- Major code changes have been made +- Switching to a different embedding model +- Index is corrupted or outdated + +## .odinoignore + +The command automatically creates `.odinoignore` to exclude: +- Build artifacts (build/, dist/) +- Dependencies (node_modules/, venv/) +- Configuration files (.env, secrets) +- IDE files (.vscode/, .idea/) +- Version control (.git/) + +**Customize `.odinoignore`** for your project: + +``` +# Project-specific ignores +generated/ +*.min.js +vendor/ +``` + +## Model Selection + +The command uses **BAAI/bge-small-en-v1.5** by default: +- **Size:** 133MB (vs 600MB for default model) +- **Parameters:** 33M (vs 308M) +- **Quality:** ~62-63 MTEB score (vs ~69) +- **Speed:** Much faster indexing +- **Memory:** Lower RAM usage + +**Why BGE?** +- 78% smaller download +- 90% fewer parameters +- Faster indexing and search +- Only ~7 point quality drop +- Better for most use cases + +## Performance Tips + +1. **Use .odinoignore** - Exclude unnecessary files +2. **GPU acceleration** - Indexing is much faster with CUDA +3. **Batch size** - Adjust in `.odino/config.json` (16 for GPU, 8 for CPU) +4. **Reindex periodically** - After major code changes + +## Troubleshooting + +**"Command not found: odino"** +```bash +pipx install odino +``` + +**GPU out of memory** +```bash +# Edit .odino/config.json after first index +{ + "embedding_batch_size": 8 # or 4 +} +# Then reindex +/semq:index --force +``` + +**Slow indexing** +```bash +# BGE model is already the fastest option +# But you can reduce batch size if needed +# Edit .odino/config.json: "embedding_batch_size": 8 +``` + +## Related Commands + +- `/semq:status` - Check index status +- `/semq:search ` - Search the index +- `/semq:here ` - Search with traversal + +## Configuration + +After indexing, configuration is stored in `.odino/config.json`: + +```json +{ + "model_name": "BAAI/bge-small-en-v1.5", + "embedding_batch_size": 16, + "chunk_size": 512, + "chunk_overlap": 50 +} +``` + +Edit this file to change settings, then reindex with `--force`. diff --git a/commands/search.md b/commands/search.md new file mode 100644 index 0000000..8ff53b6 --- /dev/null +++ b/commands/search.md @@ -0,0 +1,205 @@ +--- +description: Search indexed codebase using natural language semantic search +argument-hint: +allowed-tools: Bash, Read +disable-model-invocation: true +--- + +# search - Semantic search in current directory + +Search the current directory's semantic index using natural language queries. + +## Usage + +``` +/semq:search +``` + +**Arguments:** +- `query` - Natural language description of what to find (required) + +## What It Does + +1. Finds `.odino` directory by traversing up from current directory +2. Runs `odino query` from the index location +3. Parses and formats results with scores and file paths +4. Optionally reads top results for context +5. Suggests using code-pointer to open relevant files + +## Examples + +**Find error handling (conceptual):** +``` +User: /semq:search error handling + +Claude infers: "error handling exception management try catch validation" + +Results: +โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“ +โ”ƒ File โ”ƒ Score โ”ƒ +โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ +โ”‚ knowledge/Error Handling.md โ”‚ 0.876 โ”‚ +โ”‚ โ†’ "Error handling is the process of..." +โ”‚ โ†’ Shows: Key Concepts, Best Practices +โ”‚ +โ”‚ middleware/errorHandler.js โ”‚ 0.745 โ”‚ +โ”‚ โ†’ Shows: Global error handler implementation +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + +Claude reads top result and summarizes key concepts. +``` + +**Find database code:** +``` +User: /semq:search DB connection code + +Claude infers query with Python example: +"database connection pooling setup +import mysql.connector +pool = mysql.connector.pooling.MySQLConnectionPool( + pool_name='mypool', + pool_size=5, + host='localhost' +) +connection = pool.get_connection()" + +Results: +โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“ +โ”ƒ File โ”ƒ Score โ”ƒ +โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ +โ”‚ src/db/connection.js โ”‚ 0.924 โ”‚ +โ”‚ โ†’ const pool = mysql.createPool({...}) +โ”‚ โ†’ Shows: Connection pooling config with env vars +โ”‚ โ†’ Includes: Error handling and testing +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + +Claude shows code snippet and explains pooling strategy. +``` + +**Find algorithms:** +``` +User: /semq:search BFS algorithm Python + +Claude infers query with code: +"breadth first search BFS graph traversal +def bfs(graph, start): + visited = set() + queue = [start] + while queue: + node = queue.pop(0) + if node not in visited: + visited.add(node) + queue.extend(graph[node])" + +Results: +โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“ +โ”ƒ File โ”ƒ Score โ”ƒ +โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ +โ”‚ knowledge/Search Algorithms.md โ”‚ 0.891 โ”‚ +โ”‚ โ†’ Types: Uninformed (BFS, DFS) vs Informed (A*, Greedy) +โ”‚ โ†’ When to use each algorithm +โ”‚ โ†’ Includes mermaid diagram +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + +Claude reads note and explains algorithm categories. +``` + +## Implementation + +Use the directory traversal helper to find the index, then run the search: + +```bash +# Helper function to find .odino directory +find_odino_root() { + local dir="$PWD" + while [[ "$dir" != "/" ]]; do + if [[ -d "$dir/.odino" ]]; then + echo "$dir" + return 0 + fi + dir="$(dirname "$dir")" + done + return 1 +} + +# Get query from arguments +QUERY="$*" + +if [[ -z "$QUERY" ]]; then + echo "Error: Query required" + echo "Usage: /semq:search " + exit 1 +fi + +# Find index and search +if ODINO_ROOT=$(find_odino_root); then + echo "Searching in: $ODINO_ROOT" + echo "" + + # Run search + RESULTS=$(cd "$ODINO_ROOT" && odino query -q "$QUERY" 2>&1) + + if [[ $? -eq 0 ]]; then + echo "$RESULTS" + echo "" + echo "๐Ÿ’ก Tip: Use code-pointer to open files at specific lines" + else + echo "Search failed:" + echo "$RESULTS" + fi +else + echo "No semantic search index found in current path." + echo "" + echo "To create an index, run:" + echo " /semq:index" + echo "" + echo "This will index the current directory for semantic search." +fi +``` + +## Output Format + +Results are shown with similarity scores and file paths: + +``` +Searching in: /home/user/project + +Score: 0.89 | Path: src/auth/middleware.js +Score: 0.82 | Path: src/auth/jwt.js +Score: 0.75 | Path: src/middleware/passport.js + +๐Ÿ’ก Tip: Use code-pointer to open files at specific lines +``` + +## Score Interpretation + +- **0.85-1.0**: Highly relevant, definitely check this +- **0.70-0.84**: Likely relevant, worth reviewing +- **0.60-0.69**: Possibly relevant, may contain related concepts +- **<0.60**: Weakly related, probably not useful + +## When to Use + +Use `/semq:search` when: +- You know the directory is indexed +- You want to find code by describing what it does +- You're exploring an unfamiliar codebase +- Grep/glob aren't working (too literal) + +Use `/semq:here` instead when: +- You're in a subdirectory and want automatic traversal +- You want to see where the index was found + +## Related Commands + +- `/semq:here ` - Search with directory info +- `/semq:status` - Check if directory is indexed +- `/semq:index` - Create a new index + +## Tips + +1. **Be conceptual** - Describe what the code does, not exact names +2. **Use natural language** - "authentication logic" not "auth" +3. **Check top 3-5 results** - Sometimes #3 is the best match +4. **Combine with grep** - Use semantic search to find area, grep for specifics +5. **Read files** - Use Read tool on top results for context diff --git a/commands/status.md b/commands/status.md new file mode 100644 index 0000000..9cd1b04 --- /dev/null +++ b/commands/status.md @@ -0,0 +1,131 @@ +--- +description: Show semantic search index status and statistics +argument-hint: [path] +allowed-tools: Bash +disable-model-invocation: true +--- + +# status - Check semantic search index status + +Show indexing status, statistics, and configuration for current or specified directory. + +## Usage + +``` +/semq:status [path] +``` + +**Arguments:** +- `path` - Directory to check (optional, defaults to current directory) + +## What It Does + +1. Finds `.odino` directory by traversing up from specified path +2. Runs `odino status` to show index information +3. Displays: + - Number of indexed files + - Total chunks generated + - Model name + - Index location + - Last modified date + +## Examples + +**Check current directory:** +``` +/semq:status +``` + +**Check specific directory:** +``` +/semq:status ~/projects/myapp +``` + +## Implementation + +```bash +# Helper function to find .odino directory +find_odino_root() { + local start_dir="${1:-.}" + local dir="$(cd "$start_dir" && pwd)" + + while [[ "$dir" != "/" ]]; do + if [[ -d "$dir/.odino" ]]; then + echo "$dir" + return 0 + fi + dir="$(dirname "$dir")" + done + return 1 +} + +# Get path argument or use current directory +CHECK_PATH="${1:-.}" + +# Find index and show status +if ODINO_ROOT=$(find_odino_root "$CHECK_PATH"); then + echo "Index found at: $ODINO_ROOT" + echo "" + + # Run status command + (cd "$ODINO_ROOT" && odino status) +else + echo "No semantic search index found" + if [[ "$CHECK_PATH" != "." ]]; then + echo "Searched from: $CHECK_PATH" + fi + echo "" + echo "To create an index, run:" + echo " /semq:index" +fi +``` + +## Output Example + +``` +Index found at: /home/user/project + +Indexed files: 63 +Total chunks: 142 (529.5 KB) +Model: BAAI/bge-small-en-v1.5 +Last updated: 2025-11-15 22:30:45 +``` + +## When to Use + +Use `/semq:status` to: +- Check if a directory is indexed +- See how many files are indexed +- Verify which model is being used +- Check when index was last updated +- Troubleshoot search issues + +## Related Commands + +- `/semq:search ` - Search the index +- `/semq:index [path]` - Create or update index +- `/semq:here ` - Search with traversal + +## Tips + +1. **Before searching** - Run status to verify index exists +2. **After major changes** - Check if reindexing is needed +3. **Troubleshooting** - Use status to diagnose search issues +4. **Model verification** - Confirm BGE model is being used + +## Configuration + +The index configuration is stored in `.odino/config.json`: + +```json +{ + "model_name": "BAAI/bge-small-en-v1.5", + "embedding_batch_size": 16, + "chunk_size": 512, + "chunk_overlap": 50 +} +``` + +To change configuration: +1. Edit `.odino/config.json` in the indexed directory +2. Reindex with `/semq:index --force` diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..1920460 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,73 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:cadrianmae/claude-marketplace:plugins/semantic-search", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "9d7e9ed0fd0612cf572128722d9bf510b954e0a9", + "treeHash": "a67d2540e959b5c630ee9243e3d111155e87c9791cdd258f20076d07edb9d02c", + "generatedAt": "2025-11-28T10:14:28.300922Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "semantic-search", + "description": "Natural language semantic search for codebases and notes using odino CLI with BGE embeddings. Commands for indexing and searching with automatic directory traversal.", + "version": "1.0.0" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "ff0b1df3fc9ff92f9d1991f658bdbf10bf489eb1011abfc3a352e63aa8f39ada" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "7f4c290da891fce66cb304167bb7fc0155fa5a2e8f5bd7a1ffe3f97c9bb605d2" + }, + { + "path": "commands/here.md", + "sha256": "c65dfef51df2724b9c502d02f977ce5d62b89c48e9240cc2146c01b2f9fac157" + }, + { + "path": "commands/search.md", + "sha256": "a18d128c6d7f2bd8cafafa13f31bc6cd658fd072469b23e52fcda521e102db4e" + }, + { + "path": "commands/status.md", + "sha256": "bee6d4d8850eac49e30f6e10e9904c8a546a61f808feda6d9881bce4818bc7d4" + }, + { + "path": "commands/index.md", + "sha256": "cb91da7341da1bbfd50dbb594da4825c08b48c6ef81c563a027803f5f056e7aa" + }, + { + "path": "skills/semantic-search/SKILL.md", + "sha256": "899f6fec7ac1ec803c8c669a557ae5167922ce883f2913ec252a8d4a92925280" + }, + { + "path": "skills/semantic-search/references/cli_basics.md", + "sha256": "1c37e217a020f93ebe9873220d8730c07d0d03c3991da9ef7efeb2393eb6bef2" + }, + { + "path": "skills/semantic-search/references/integration.md", + "sha256": "4536dee4b485c4c9b7a0e1bbf3d57d2d471bf4a20d1084dae022359a6152f640" + }, + { + "path": "skills/semantic-search/references/search_patterns.md", + "sha256": "640862e177bbdc2f5a1071efa8b260857bf8aaa46dbbd86535612017480df36f" + } + ], + "dirSha256": "a67d2540e959b5c630ee9243e3d111155e87c9791cdd258f20076d07edb9d02c" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/semantic-search/SKILL.md b/skills/semantic-search/SKILL.md new file mode 100644 index 0000000..15d4153 --- /dev/null +++ b/skills/semantic-search/SKILL.md @@ -0,0 +1,372 @@ +--- +name: semantic-search +description: Use semantic search to find relevant code and documentation when user asks about specific functionality, features, or implementation patterns. Automatically invoke when user asks "where is...", "how does... work", "find code that...", or similar conceptual queries. More powerful than grep for concept-based searches. Uses odino CLI with BGE embeddings for fully local semantic search. +allowed-tools: Bash, Read +--- + +# Semantic Search + +## Overview + +Enable natural language semantic search across codebases and notes using odino CLI with BGE embeddings. Unlike grep (exact text matching) or glob (filename patterns), semantic search finds code by what it does, not what it's called. + +## When to Use This Skill + +Automatically invoke semantic search when the user: +- Asks "where is [concept]" or "how does [feature] work" +- Wants to find implementation of a concept/pattern +- Needs to understand codebase structure around a topic +- Searches for patterns by meaning, not exact text +- Asks exploratory questions like "show me authentication logic" + +**Do not use** for: +- Exact string matching (use grep) +- Filename patterns (use glob) +- Known file paths (use read) +- When the user explicitly requests grep/glob + +## Directory Traversal Logic + +Odino requires running commands from the directory containing `.odino/` config. To make this transparent (like git), use this helper function: + +```bash +# Function to find .odino directory by traversing up the directory tree +find_odino_root() { + local dir="$PWD" + while [[ "$dir" != "/" ]]; do + if [[ -d "$dir/.odino" ]]; then + echo "$dir" + return 0 + fi + dir="$(dirname "$dir")" + done + return 1 +} + +# Usage in commands +if ODINO_ROOT=$(find_odino_root); then + echo "Found index at: $ODINO_ROOT" + (cd "$ODINO_ROOT" && odino query -q "$QUERY") +else + echo "No .odino index found in current path" + echo "Suggestion: Run /semq:index to create an index" +fi +``` + +**Why this matters:** +- User can be in any subdirectory of their project +- Commands automatically find the project root (where `.odino/` lives) +- Mirrors git behavior (works from anywhere in the tree) + +## Quick Start + +### Check if Directory is Indexed + +Before searching, verify an index exists: + +```bash +if ODINO_ROOT=$(find_odino_root); then + (cd "$ODINO_ROOT" && odino status) +else + echo "No index found. Suggest running /semq:index" +fi +``` + +### Search Indexed Codebase + +```bash +# Basic search +odino query -q "authentication logic" + +# With directory traversal +if ODINO_ROOT=$(find_odino_root); then + (cd "$ODINO_ROOT" && odino query -q "$QUERY") +fi +``` + +### Parse and Present Results + +Odino returns results in a formatted table: +``` +โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“ +โ”ƒ File โ”ƒ Score โ”ƒ Content โ”ƒ +โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ +โ”‚ knowledge/Search Algorithms.md โ”‚ 0.361 โ”‚ 1 --- โ”‚ +โ”‚ โ”‚ โ”‚ 2 tags: [todo/stub] โ”‚ +โ”‚ โ”‚ โ”‚ 3 module: CMPU 4010 โ”‚ +โ”‚ โ”‚ โ”‚ ... โ”‚ +โ”‚ โ”‚ โ”‚ 7 # Search Algorithms in AI โ”‚ +โ”‚ โ”‚ โ”‚ ... โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +Found 2 results +``` + +**Enhanced workflow:** +1. Parse table to extract file paths, scores, and content previews +2. Read top 2-3 results (score > 0.3) for full context +3. Summarize findings with explanations +4. Use code-pointer to open most relevant file +5. Suggest follow-up queries or related concepts + +## Query Inference + +Transform user requests into better semantic queries with realistic output examples. + +### Example 1: Conceptual Query + +**User asks:** "error handling" +**Inferred query:** `error handling exception management try catch validation` +**Sample odino output:** +``` +โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“ +โ”ƒ File โ”ƒ Score โ”ƒ Content โ”ƒ +โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ +โ”‚ knowledge/Error Handling.md โ”‚ 0.876 โ”‚ 1 --- โ”‚ +โ”‚ โ”‚ โ”‚ 2 tags: [software-eng, best- โ”‚ +โ”‚ โ”‚ โ”‚ 3 --- โ”‚ +โ”‚ โ”‚ โ”‚ 4 # Error Handling โ”‚ +โ”‚ โ”‚ โ”‚ 5 โ”‚ +โ”‚ โ”‚ โ”‚ 6 Error handling is the proc โ”‚ +โ”‚ โ”‚ โ”‚ 7 runtime errors gracefully โ”‚ +โ”‚ โ”‚ โ”‚ 8 system stability. โ”‚ +โ”‚ โ”‚ โ”‚ 9 โ”‚ +โ”‚ โ”‚ โ”‚ 10 ## Key Concepts โ”‚ +โ”‚ โ”‚ โ”‚ 11 - Try-catch blocks for syn โ”‚ +โ”‚ โ”‚ โ”‚ 12 - Promise rejection handli โ”‚ +โ”‚ โ”‚ โ”‚ 13 - Input validation to prev โ”‚ +โ”‚ โ”‚ โ”‚ 14 - Logging errors for debug โ”‚ +โ”‚ โ”‚ โ”‚ 15 - User-friendly error mess โ”‚ +โ”‚ โ”‚ โ”‚ 16 โ”‚ +โ”‚ โ”‚ โ”‚ 17 ## Best Practices โ”‚ +โ”‚ โ”‚ โ”‚ 18 1. Fail fast - validate ea โ”‚ +โ”‚ โ”‚ โ”‚ 19 2. Log with context - incl โ”‚ +โ”‚ โ”‚ โ”‚ 20 3. Don't swallow errors - โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +### Example 2: Code Query + +**User asks:** "DB connection code" +**Inferred query:** +``` +database connection pooling setup +import mysql.connector +pool = mysql.connector.pooling.MySQLConnectionPool( + pool_name="mypool", + pool_size=5, + host="localhost", + database="mydb" +) +connection = pool.get_connection() +``` +**Sample odino output:** +``` +โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“ +โ”ƒ File โ”ƒ Score โ”ƒ Content โ”ƒ +โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ +โ”‚ src/db/connection.js โ”‚ 0.924 โ”‚ 1 const mysql = require('mys โ”‚ +โ”‚ โ”‚ โ”‚ 2 โ”‚ +โ”‚ โ”‚ โ”‚ 3 // Create connection pool โ”‚ +โ”‚ โ”‚ โ”‚ 4 const pool = mysql.createP โ”‚ +โ”‚ โ”‚ โ”‚ 5 host: process.env.DB_HOS โ”‚ +โ”‚ โ”‚ โ”‚ 6 user: process.env.DB_USE โ”‚ +โ”‚ โ”‚ โ”‚ 7 password: process.env.DB โ”‚ +โ”‚ โ”‚ โ”‚ 8 database: process.env.DB โ”‚ +โ”‚ โ”‚ โ”‚ 9 waitForConnections: true โ”‚ +โ”‚ โ”‚ โ”‚ 10 connectionLimit: 10, โ”‚ +โ”‚ โ”‚ โ”‚ 11 queueLimit: 0 โ”‚ +โ”‚ โ”‚ โ”‚ 12 }); โ”‚ +โ”‚ โ”‚ โ”‚ 13 โ”‚ +โ”‚ โ”‚ โ”‚ 14 // Test connection โ”‚ +โ”‚ โ”‚ โ”‚ 15 pool.getConnection((err, c โ”‚ +โ”‚ โ”‚ โ”‚ 16 if (err) { โ”‚ +โ”‚ โ”‚ โ”‚ 17 console.error('DB conn โ”‚ +โ”‚ โ”‚ โ”‚ 18 process.exit(1); โ”‚ +โ”‚ โ”‚ โ”‚ 19 } โ”‚ +โ”‚ โ”‚ โ”‚ 20 console.log('Connected t โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +### Example 3: Algorithm Query (with code) + +**User asks:** "BFS algorithm in Python" +**Inferred query:** +``` +breadth first search BFS graph traversal queue +def bfs(graph, start): + visited = set() + queue = [start] + while queue: + node = queue.pop(0) + if node not in visited: + visited.add(node) + queue.extend(graph[node]) + return visited +``` +**Sample odino output:** +``` +โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“ +โ”ƒ File โ”ƒ Score โ”ƒ Content โ”ƒ +โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ +โ”‚ knowledge/Search Algorithms.md โ”‚ 0.891 โ”‚ 1 --- โ”‚ +โ”‚ โ”‚ โ”‚ 2 tags: [ai, algorithms] โ”‚ +โ”‚ โ”‚ โ”‚ 3 module: CMPU 4010 AI โ”‚ +โ”‚ โ”‚ โ”‚ 4 --- โ”‚ +โ”‚ โ”‚ โ”‚ 5 # Search Algorithms in AI โ”‚ +โ”‚ โ”‚ โ”‚ 6 โ”‚ +โ”‚ โ”‚ โ”‚ 7 Algorithms for finding sol โ”‚ +โ”‚ โ”‚ โ”‚ 8 problem spaces. Used in pa โ”‚ +โ”‚ โ”‚ โ”‚ 9 game AI, and optimization. โ”‚ +โ”‚ โ”‚ โ”‚ 10 โ”‚ +โ”‚ โ”‚ โ”‚ 11 ## Types โ”‚ +โ”‚ โ”‚ โ”‚ 12 โ”‚ +โ”‚ โ”‚ โ”‚ 13 ### Uninformed Search โ”‚ +โ”‚ โ”‚ โ”‚ 14 - **BFS**: Explores level โ”‚ +โ”‚ โ”‚ โ”‚ 15 - **DFS**: Explores deeply โ”‚ +โ”‚ โ”‚ โ”‚ 16 - **Uniform Cost**: Expand โ”‚ +โ”‚ โ”‚ โ”‚ 17 โ”‚ +โ”‚ โ”‚ โ”‚ 18 ### Informed Search โ”‚ +โ”‚ โ”‚ โ”‚ 19 - **A***: Uses heuristic + โ”‚ +โ”‚ โ”‚ โ”‚ 20 - **Greedy**: Only conside โ”‚ +โ”‚ โ”‚ โ”‚ 21 - **Hill Climbing**: Local โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +### Inference Patterns + +- **Expand abbreviations:** DB โ†’ database, auth โ†’ authentication +- **Code queries include sample code:** User asks "connection pooling" โ†’ Query includes Python example with `pool.get_connection()` +- **Use specified language:** User mentions "JavaScript" โ†’ Use JavaScript syntax in query +- **Default to Python:** No language specified โ†’ Use Python code examples +- **Add related concepts:** "search" โ†’ include BFS, DFS, A* terminology +- **Add context words:** "handling", "management", "setup", "configuration" + +## Core Capabilities + +### 1. Semantic Search + +Find code by describing what it does, not exact text: + +**User asks:** "Where is the database connection handling?" + +**Workflow:** +1. Check if directory is indexed (use `find_odino_root`) +2. Run `odino query -q "database connection handling"` +3. Parse results and rank by score +4. Read top 2-3 results for context +5. Summarize findings with file paths +6. Suggest using code-pointer to open specific files + +**Example:** +```bash +if ODINO_ROOT=$(find_odino_root); then + RESULTS=$(cd "$ODINO_ROOT" && odino query -q "database connection handling") + # Parse results, read top files, summarize +else + echo "No index found. Would you like me to index this directory?" +fi +``` + +### 2. Index Status Check + +Verify indexing status before operations: + +```bash +if ODINO_ROOT=$(find_odino_root); then + (cd "$ODINO_ROOT" && odino status) + # Shows: indexed files, model, last update +else + echo "No .odino index found" +fi +``` + +### 3. Integration with Other Tools + +**Semantic search โ†’ code-pointer:** +```bash +# After finding relevant file +echo "Found authentication logic in src/auth/middleware.js:42" +echo "Opening file..." +code -g src/auth/middleware.js:42 +``` + +**Semantic search โ†’ grep refinement:** +```bash +# Use semantic search to find the area +odino query -q "API endpoint handlers" +# Then use grep for exact matches in those files +grep -n "app.get\|app.post" src/routes/*.js +``` + +### 4. Handling Edge Cases + +**No index found:** +```bash +if ! ODINO_ROOT=$(find_odino_root); then + echo "No semantic search index found in current path." + echo "" + echo "To create an index, run:" + echo " /semq:index" + echo "" + echo "This will index the current directory for semantic search." +fi +``` + +**Empty results:** +```bash +if [[ -z "$RESULTS" ]]; then + echo "No results found for query: $QUERY" + echo "" + echo "Suggestions:" + echo "- Try a different query (more general or specific)" + echo "- Verify the index is up to date (/semq:status)" + echo "- Consider using grep for exact text matching" +fi +``` + +## Slash Commands + +This skill provides several slash commands for explicit control: + +- **`/semq:search `** - Search indexed codebase +- **`/semq:here `** - Search with automatic directory traversal +- **`/semq:index [path]`** - Index directory for semantic search +- **`/semq:status [path]`** - Show indexing status and stats + +## Best Practices + +1. **Always check for index first** - Use `find_odino_root` before search operations +2. **Parse results clearly** - Show scores, file paths, and context +3. **Combine with other tools** - Use code-pointer for opening files, grep for exact matches +4. **Handle failures gracefully** - Suggest solutions when no index or no results +5. **Read top results** - Provide context by reading the most relevant files +6. **Use directory traversal** - Don't assume user is in project root + +## Effective Query Patterns + +Good queries are conceptual, not literal: +- โŒ "config.js" โ†’ Use glob instead +- โœ… "configuration loading logic" + +- โŒ "validateEmail" โ†’ Use grep instead +- โœ… "email validation functions" + +- โŒ "class AuthService" โ†’ Use grep instead +- โœ… "authentication service implementation" + +## Technical Details + +**Model:** BAAI/bge-small-en-v1.5 (33M params, ~133MB) +**Vector DB:** ChromaDB (stored in `.odino/chroma_db/`) +**Index location:** `.odino/` directory in project root +**Embedding batch size:** 16 (GPU) or 8 (CPU) + +## Reference Documentation + +For detailed information, see: + +- **`references/cli_basics.md`** - Odino CLI syntax, commands, and options +- **`references/search_patterns.md`** - Effective query examples and tips +- **`references/integration.md`** - Workflows with code-pointer, grep, glob + +Load these references as needed for deeper technical details or complex use cases. diff --git a/skills/semantic-search/references/cli_basics.md b/skills/semantic-search/references/cli_basics.md new file mode 100644 index 0000000..2423acb --- /dev/null +++ b/skills/semantic-search/references/cli_basics.md @@ -0,0 +1,226 @@ +# Odino CLI Basics + +Reference guide for odino CLI syntax, commands, and options. + +## Installation + +```bash +# Install via pipx (recommended) +pipx install odino + +# Verify installation +odino --version +``` + +## Core Commands + +### `odino index` + +Index a directory for semantic search. + +```bash +# Index current directory +odino index + +# Index specific directory +odino index /path/to/project + +# Specify model (recommended: BGE for efficiency) +odino index --model BAAI/bge-small-en-v1.5 + +# Force reindex (ignores existing index) +odino index --force +``` + +**Configuration:** +- Creates `.odino/` directory in indexed location +- Stores config in `.odino/config.json` +- Stores embeddings in `.odino/chroma_db/` + +**Default model:** EmmanuelEA/eea-embedding-gemma (600MB) +**Recommended model:** BAAI/bge-small-en-v1.5 (133MB, faster) + +### `odino query` + +Search indexed directory using natural language. + +```bash +# Basic search +odino query -q "authentication logic" + +# Search with custom number of results +odino query -q "database connections" -n 10 + +# Search specific path +odino query -q "error handling" -p /path/to/indexed/dir +``` + +**Options:** +- `-q, --query ` - Natural language search query (required) +- `-n, --num-results ` - Number of results to return (default: 5) +- `-p, --path ` - Path to indexed directory (default: current dir) + +**Output format:** +``` +Score: 0.85 | Path: src/auth/middleware.js +Score: 0.78 | Path: src/auth/tokens.js +Score: 0.72 | Path: src/utils/validation.js +``` + +### `odino status` + +Show indexing status and statistics. + +```bash +# Status for current directory +odino status + +# Status for specific path +odino status -p /path/to/project +``` + +**Output includes:** +- Number of indexed files +- Total chunks generated +- Model name +- Index location +- Last modified date + +## Configuration File + +Location: `.odino/config.json` in indexed directory + +```json +{ + "model_name": "BAAI/bge-small-en-v1.5", + "embedding_batch_size": 16, + "chunk_size": 512, + "chunk_overlap": 50 +} +``` + +**Key settings:** +- `model_name` - Embedding model to use +- `embedding_batch_size` - Batch size for GPU/CPU (16 for GPU, 8 for CPU) +- `chunk_size` - Token length for text chunks +- `chunk_overlap` - Overlap between chunks + +## .odinoignore File + +Create `.odinoignore` in project root to exclude files/directories (gitignore syntax): + +``` +# Build artifacts +build/ +dist/ +*.pyc +__pycache__/ + +# Dependencies +node_modules/ +venv/ +.venv/ + +# Config files +.env +.env.local +*.secret +``` + +## Model Comparison + +| Model | Size | Params | MTEB Score | Speed | +|-------|------|--------|------------|-------| +| eea-embedding-gemma | 600MB | 308M | 69.67 | Slower | +| bge-small-en-v1.5 | 133MB | 33M | ~62-63 | Faster | + +**Recommendation:** Use BGE for most cases (smaller, faster, good quality) + +## Common CLI Patterns + +**Index with BGE model:** +```bash +odino index --model BAAI/bge-small-en-v1.5 +``` + +**Search from subdirectory:** +```bash +# Requires finding .odino directory first (see SKILL.md) +cd project/src/utils +odino query -q "validation" -p ../.. +``` + +**Reindex after code changes:** +```bash +odino index --force +``` + +**Check if directory is indexed:** +```bash +if [[ -d .odino ]]; then + echo "Directory is indexed" + odino status +else + echo "Directory is not indexed" +fi +``` + +## Performance Tips + +1. **Use BGE model** - 78% smaller, 90% fewer parameters, only ~7 point MTEB drop +2. **Adjust batch size** - Use 16 for GPU, 8 for CPU +3. **Use .odinoignore** - Exclude build artifacts, dependencies, config files +4. **GPU acceleration** - Much faster indexing if CUDA available +5. **Chunking strategy** - Default 512 tokens works well for most code + +## Troubleshooting + +**"Command not found: odino"** +```bash +# Ensure pipx bin directory is in PATH +export PATH="$HOME/.local/bin:$PATH" + +# Or reinstall +pipx install odino +``` + +**"No index found"** +```bash +# Check for .odino directory +ls -la .odino + +# If missing, index first +odino index --model BAAI/bge-small-en-v1.5 +``` + +**GPU out of memory** +```bash +# Reduce batch size in .odino/config.json +{ + "embedding_batch_size": 8 # or even 4 +} + +# Then reindex +odino index --force +``` + +**Slow indexing** +```bash +# Use smaller model +odino index --model BAAI/bge-small-en-v1.5 + +# Reduce batch size if GPU memory limited +# Edit .odino/config.json: "embedding_batch_size": 8 +``` + +## Exit Codes + +- `0` - Success +- `1` - General error (no index, invalid path, etc.) +- `2` - Invalid arguments + +## Environment Variables + +Odino respects standard environment variables: +- `CUDA_VISIBLE_DEVICES` - GPU selection +- `HF_HOME` - Hugging Face cache directory (for model downloads) diff --git a/skills/semantic-search/references/integration.md b/skills/semantic-search/references/integration.md new file mode 100644 index 0000000..6ffba6a --- /dev/null +++ b/skills/semantic-search/references/integration.md @@ -0,0 +1,449 @@ +# Integration with Other Tools + +Guide to combining semantic search with other Claude Code tools for powerful workflows. + +## Tool Integration Matrix + +| Tool | Purpose | When to Use After Semantic Search | +|------|---------|-----------------------------------| +| **code-pointer** | Open files at specific lines | When you want to view/edit found code | +| **grep** | Exact text matching | To find specific strings in sem-search results | +| **glob** | File pattern matching | To filter results by file type/location | +| **read** | Read file contents | To examine top semantic search results | + +## Semantic Search โ†’ Code-Pointer + +**Use case:** Open relevant files in VSCode after semantic search + +### Basic Pattern + +```bash +# 1. Find relevant code with semantic search +RESULTS=$(odino query -q "authentication middleware") + +# Example output: +# Score: 0.89 | Path: src/middleware/auth.js +# Score: 0.82 | Path: src/middleware/jwt.js + +# 2. Open top result in VSCode +code -g src/middleware/auth.js +``` + +### With Line Numbers + +When you know the specific section: + +```bash +# Find the file +odino query -q "JWT token generation" +# โ†’ src/auth/jwt.js + +# Read the file to find exact line +grep -n "generateToken" src/auth/jwt.js +# โ†’ 42:function generateToken(user) { + +# Open at specific line +code -g src/auth/jwt.js:42 +``` + +### Automated Workflow + +```bash +# Parse top result and open automatically +TOP_FILE=$(odino query -q "password hashing" | head -1 | cut -d'|' -f2 | xargs) +code -g "$TOP_FILE" +``` + +## Semantic Search โ†’ Grep + +**Use case:** Find exact text in files discovered by semantic search + +### Two-Stage Search + +```bash +# Stage 1: Find relevant area with semantic search +odino query -q "database connection handling" +# โ†’ Found: src/db/connection.js, src/db/pool.js + +# Stage 2: Find exact string in those files +grep -n "createConnection\|getConnection" src/db/*.js +``` + +### Narrowing Results + +```bash +# Broad semantic search +odino query -q "API endpoints" +# โ†’ Returns 20+ files + +# Narrow to specific endpoint with grep +grep -r "app.post('/users')" . +``` + +### Finding Patterns + +```bash +# Find error handling code +odino query -q "error handling patterns" +# โ†’ src/middleware/error.js, src/utils/errors.js + +# Find specific error types +grep -r "try.*catch\|throw new" src/middleware/error.js src/utils/errors.js +``` + +## Semantic Search โ†’ Glob + +**Use case:** Filter semantic search results by file patterns + +### File Type Filtering + +```bash +# Find configuration code +odino query -q "application configuration" +# โ†’ Might return .js, .json, .yaml files + +# Focus on just config files +find . -name "*.config.js" -o -name "config.json" +``` + +### Directory-Specific Search + +```bash +# Find test files related to authentication +odino query -q "authentication testing" + +# Then narrow to test directory +ls tests/**/*auth*.test.js +``` + +## Semantic Search โ†’ Read + +**Use case:** Examine top results to understand context + +### Standard Workflow + +```bash +# 1. Semantic search +odino query -q "user registration logic" +# โ†’ Top 3 results: +# Score: 0.91 | Path: src/routes/auth.js +# Score: 0.85 | Path: src/services/user.js +# Score: 0.79 | Path: src/validators/user.js + +# 2. Read top result +cat src/routes/auth.js + +# 3. Read related files for full context +cat src/services/user.js +cat src/validators/user.js +``` + +### With Summary + +```bash +# Find relevant code +odino query -q "payment processing" + +# Read top results and summarize +echo "## Payment Processing Implementation" +echo "" +echo "### Main handler:" +head -20 src/routes/payment.js +echo "" +echo "### Service layer:" +head -20 src/services/payment.js +``` + +## Complete Workflow Examples + +### Example 1: Debugging a Feature + +**Goal:** Fix bug in user login + +```bash +# 1. Find login code with semantic search +odino query -q "user login authentication" +# โ†’ src/routes/auth.js (0.92) +# โ†’ src/services/auth.js (0.88) +# โ†’ src/middleware/passport.js (0.81) + +# 2. Read main login handler +cat src/routes/auth.js | grep -A 20 "post.*login" + +# 3. Find error handling in that file +grep -n "catch\|error" src/routes/auth.js + +# 4. Open at error handling line +code -g src/routes/auth.js:67 +``` + +### Example 2: Understanding a Feature + +**Goal:** Understand how caching works + +```bash +# 1. Find caching implementation +odino query -q "caching implementation and configuration" +# โ†’ src/cache/redis.js (0.94) +# โ†’ src/config/cache.js (0.87) +# โ†’ src/middleware/cache.js (0.79) + +# 2. Read configuration first +cat src/config/cache.js + +# 3. Then read main implementation +cat src/cache/redis.js + +# 4. Find usage examples +grep -r "cache.get\|cache.set" src/ | head -10 + +# 5. Open implementation in editor +code -g src/cache/redis.js +``` + +### Example 3: Refactoring + +**Goal:** Replace all database connection code + +```bash +# 1. Find all database connection code +odino query -q "database connection creation and management" +# โ†’ Returns 5 files + +# 2. Find exact connection creation calls +grep -rn "createConnection\|mysql.connect\|pg.Pool" src/ + +# 3. Check each file with semantic context +odino query -q "connection pooling" +odino query -q "database transaction handling" + +# 4. Open files for editing +code -g src/db/connection.js src/db/pool.js src/db/transaction.js +``` + +### Example 4: Code Review + +**Goal:** Review all authentication changes + +```bash +# 1. Find authentication code +odino query -q "authentication and authorization logic" + +# 2. Find recent changes (combine with git) +git diff main -- src/auth/ src/middleware/auth.js + +# 3. Semantic search for related security code +odino query -q "security validation and sanitization" + +# 4. Check for vulnerabilities +grep -r "eval\|exec\|innerHTML" src/auth/ +``` + +## Advanced Integration Patterns + +### Cascading Search + +Start broad, narrow progressively: + +```bash +# Level 1: Semantic search (broad) +odino query -q "user management" +# โ†’ 15 files + +# Level 2: File pattern (medium) +find src/ -path "*/users/*" -name "*.js" +# โ†’ 8 files + +# Level 3: Grep (narrow) +grep -l "updateUser\|deleteUser" src/users/*.js +# โ†’ 3 files + +# Level 4: Open specific files +code -g src/users/controller.js src/users/service.js +``` + +### Context Building + +Build understanding layer by layer: + +```bash +# 1. High-level architecture +odino query -q "application architecture and structure" + +# 2. Specific subsystem +odino query -q "data access layer implementation" + +# 3. Specific functionality +odino query -q "CRUD operations for users" + +# 4. Exact implementation +grep -A 30 "function createUser" src/data/users.js +``` + +### Verification Workflow + +Verify semantic search results: + +```bash +# 1. Semantic search +odino query -q "input validation" +# โ†’ src/validators/user.js (0.87) + +# 2. Verify by reading +cat src/validators/user.js | head -50 + +# 3. Find all validation usage +grep -r "import.*validator" src/ + +# 4. Cross-reference with tests +odino query -q "validation testing" +``` + +## When to Use Each Tool + +### Use semantic search when: +- Exploring unfamiliar codebase +- Finding conceptual implementations +- Locating cross-cutting concerns +- Understanding feature architecture + +### Use grep when: +- Searching for exact strings +- Finding variable/function usages +- Locating error messages +- Searching for TODOs/FIXMEs + +### Use glob when: +- Finding files by name pattern +- Filtering by file type +- Working with specific directories +- Batch operations on matching files + +### Use code-pointer when: +- Need to view/edit specific code +- Opening files at exact lines +- Navigating to definitions +- Debugging specific sections + +### Use read when: +- Examining file contents +- Understanding context +- Checking configuration +- Reviewing small files + +## Tool Selection Decision Tree + +``` +Need to find code? +โ”œโ”€ Know exact string? โ†’ Use grep +โ”œโ”€ Know filename pattern? โ†’ Use glob +โ”œโ”€ Know file path? โ†’ Use read +โ””โ”€ Know concept only? โ†’ Use semantic search + โ””โ”€ Got results? โ†’ Use code-pointer to open + โ””โ”€ Need exact line? โ†’ Use grep in file +``` + +## Best Practices + +1. **Start semantic, end specific** + - Semantic search โ†’ Find area + - Grep โ†’ Find exact code + - Code-pointer โ†’ Open for editing + +2. **Read before editing** + - Semantic search โ†’ Find files + - Read โ†’ Understand context + - Code-pointer โ†’ Open in editor + +3. **Verify with multiple tools** + - Semantic search โ†’ Find candidates + - Grep โ†’ Verify it's the right code + - Read โ†’ Confirm implementation + +4. **Build context progressively** + - Semantic search โ†’ High-level structure + - Semantic search โ†’ Specific subsystem + - Read/Grep โ†’ Detailed implementation + +5. **Combine for complex tasks** + - Semantic search โ†’ Find authentication + - Grep โ†’ Find specific function + - Code-pointer โ†’ Open file + - Read โ†’ Check related files + +## Performance Tips + +1. **Cache semantic results** + - Run semantic search once + - Use results for multiple grep/read operations + +2. **Narrow scope early** + - Use semantic search to identify directory + - Then use grep/glob only in that directory + +3. **Batch file operations** + - Collect file paths from semantic search + - Read multiple files in one pass + +4. **Use appropriate tool** + - Don't use semantic search for exact strings + - Don't use grep for conceptual searches + +## Common Mistakes to Avoid + +**โŒ Using semantic search for exact matches:** +```bash +# Wrong +odino query -q "function validateEmail" + +# Right +grep -r "function validateEmail" . +``` + +**โŒ Using grep for concepts:** +```bash +# Wrong (might miss variations) +grep -r "auth" . + +# Right +odino query -q "authentication implementation" +``` + +**โŒ Not reading semantic results:** +```bash +# Wrong (blindly trusting results) +odino query -q "payment" | head -1 | cut -d'|' -f2 + +# Right (verify first) +odino query -q "payment" +cat [top-result] # Verify it's actually payment code +``` + +**โŒ Opening too many files:** +```bash +# Wrong +odino query -q "validation" | while read line; do + code -g "$(echo $line | cut -d'|' -f2)" +done # Opens 20+ files + +# Right +odino query -q "validation" | head -3 +# Review, then open specific files +code -g src/validators/user.js +``` + +## Summary + +**Effective integration means:** +- Using the right tool for each task +- Starting broad (semantic) and narrowing (grep) +- Verifying results before acting +- Building context progressively +- Combining tools for complex workflows + +**Remember:** +- Semantic search โ†’ Finding concepts +- Grep โ†’ Finding exact text +- Glob โ†’ Finding files +- Read โ†’ Understanding context +- Code-pointer โ†’ Editing code diff --git a/skills/semantic-search/references/search_patterns.md b/skills/semantic-search/references/search_patterns.md new file mode 100644 index 0000000..65f37d1 --- /dev/null +++ b/skills/semantic-search/references/search_patterns.md @@ -0,0 +1,346 @@ +# Effective Semantic Search Patterns + +Guide to crafting effective semantic search queries and interpreting results. + +## Query Design Principles + +### Be Conceptual, Not Literal + +Semantic search works best with conceptual queries that describe **what the code does**, not what it's named. + +**โŒ Poor queries (too literal):** +- "validateEmail" โ†’ Use grep instead +- "config.js" โ†’ Use glob instead +- "class AuthService" โ†’ Use grep instead +- "TODO" โ†’ Use grep instead + +**โœ… Good queries (conceptual):** +- "email validation logic" +- "configuration loading and parsing" +- "authentication service implementation" +- "incomplete features or pending work" + +### Use Natural Language + +Write queries as you would explain the concept to a colleague. + +**โŒ Keyword stuffing:** +- "db connect pool config" + +**โœ… Natural language:** +- "database connection pooling configuration" + +### Be Specific When Needed + +Balance specificity with generality based on what you're looking for. + +**Too general:** +- "functions" โ†’ Will match everything +- "code" โ†’ Will match everything + +**Too specific:** +- "JWT token validation using bcrypt with salt rounds set to 10" โ†’ Too narrow + +**Just right:** +- "JWT token validation" +- "password hashing and verification" + +## Common Query Patterns + +### Finding Implementation + +**Pattern:** "[concept] implementation" or "how [feature] works" + +```bash +odino query -q "authentication implementation" +odino query -q "how caching works" +odino query -q "error handling implementation" +``` + +### Finding Configuration + +**Pattern:** "[system/feature] configuration" or "[thing] setup" + +```bash +odino query -q "database configuration" +odino query -q "API endpoint setup" +odino query -q "logging configuration" +``` + +### Finding Patterns + +**Pattern:** "[pattern/technique] usage" or "examples of [pattern]" + +```bash +odino query -q "middleware usage patterns" +odino query -q "dependency injection examples" +odino query -q "async/await error handling" +``` + +### Finding by Purpose + +**Pattern:** "code that [does something]" or "[action] logic" + +```bash +odino query -q "code that validates user input" +odino query -q "file upload logic" +odino query -q "payment processing" +``` + +### Finding Documentation + +**Pattern:** "[topic] documentation" or "how to [task]" + +```bash +odino query -q "API documentation" +odino query -q "how to deploy the application" +odino query -q "setup instructions" +``` + +## Result Interpretation + +### Understanding Scores + +Odino returns results with similarity scores (0.0 to 1.0): + +- **0.85-1.0**: Highly relevant, almost certainly what you're looking for +- **0.70-0.84**: Likely relevant, worth checking +- **0.60-0.69**: Possibly relevant, may contain related concepts +- **<0.60**: Weakly related, probably not useful + +**Example output:** +``` +Score: 0.92 | Path: src/auth/jwt.js # Definitely check this +Score: 0.78 | Path: src/middleware/auth.js # Likely relevant +Score: 0.64 | Path: src/utils/crypto.js # Maybe related +Score: 0.51 | Path: src/config/index.js # Probably not it +``` + +### When to Read Files + +**Always read:** Top 1-2 results (score > 0.80) +**Sometimes read:** Next 2-3 results (score 0.65-0.80) for context +**Rarely read:** Results with score < 0.65 + +### Combining Results + +Often the answer spans multiple files: + +```bash +odino query -q "user authentication flow" + +# Results might include: +# - Login endpoint (score: 0.89) +# - JWT generation (score: 0.85) +# - Password verification (score: 0.82) +# - Session management (score: 0.76) +``` + +Read top results to understand the complete picture. + +## Refinement Strategies + +### Too Many Results + +Make query more specific: + +```bash +# Too broad +odino query -q "validation" + +# Better +odino query -q "email format validation" +``` + +### Too Few Results + +Make query more general: + +```bash +# Too narrow +odino query -q "SHA256 password hashing with bcrypt" + +# Better +odino query -q "password hashing" +``` + +### Wrong Results + +Try different phrasing: + +```bash +# If "API endpoint handlers" doesn't work well +odino query -q "route definitions" +odino query -q "HTTP request handlers" +odino query -q "REST API implementation" +``` + +## Advanced Patterns + +### Multi-Concept Queries + +Combine related concepts for broader coverage: + +```bash +odino query -q "authentication and authorization logic" +odino query -q "database queries and ORM usage" +odino query -q "error handling and logging" +``` + +### Feature-Specific Queries + +Target specific features or subsystems: + +```bash +odino query -q "user registration feature" +odino query -q "shopping cart functionality" +odino query -q "notification system" +``` + +### Cross-Cutting Concerns + +Find patterns that span the codebase: + +```bash +odino query -q "error handling patterns" +odino query -q "input validation across endpoints" +odino query -q "database transaction usage" +``` + +## Query Examples by Use Case + +### Code Exploration + +"I'm new to this codebase, where do I start?" + +```bash +odino query -q "main application entry point" +odino query -q "core business logic" +odino query -q "primary data models" +``` + +### Bug Hunting + +"There's a bug in feature X, where's the code?" + +```bash +odino query -q "user login functionality" +odino query -q "payment processing logic" +odino query -q "email sending implementation" +``` + +### Refactoring + +"I need to change how we do X across the codebase" + +```bash +odino query -q "database connection creation" +odino query -q "API key validation" +odino query -q "date formatting and parsing" +``` + +### Learning Patterns + +"How does this codebase handle X?" + +```bash +odino query -q "dependency injection patterns" +odino query -q "testing approach and examples" +odino query -q "configuration management" +``` + +## When Semantic Search Doesn't Help + +Use other tools when: + +1. **Exact string needed** - Use `grep` + ```bash + grep -r "validateEmail" . + ``` + +2. **Filename patterns** - Use `glob` or `find` + ```bash + find . -name "*config*.js" + ``` + +3. **Known file location** - Use `read` directly + ```bash + # Just read the file + cat src/config/database.js + ``` + +4. **Symbol definitions** - Use language-specific tools + ```bash + # For Python + grep -r "class AuthService" . + + # For JavaScript + grep -r "export.*AuthService" . + ``` + +## Combining Tools Workflow + +**Best practice:** Start semantic, narrow with grep/glob + +```bash +# 1. Find the general area with semantic search +odino query -q "database migrations" +# โ†’ Found: migrations/2024-01-15-add-users.sql + +# 2. Narrow to specific files/patterns +find migrations/ -name "*users*" + +# 3. Search for exact strings in those files +grep -n "CREATE TABLE" migrations/*users*.sql +``` + +## Tips for Better Results + +1. **Use verbs and nouns** - "validates user input" not just "validation" +2. **Include context** - "email validation in registration" not just "email" +3. **Think about purpose** - What does the code **do**, not what it's **called** +4. **Try synonyms** - "authentication" vs "login" vs "sign in" +5. **Be patient** - Try 2-3 query variations if first doesn't work +6. **Check top 3-5 results** - Sometimes #3 is the best match +7. **Combine with file reading** - Read top results to confirm relevance + +## Anti-Patterns to Avoid + +**โŒ Searching for variable names:** +```bash +odino query -q "userEmail" # Use grep instead +``` + +**โŒ Searching for exact error messages:** +```bash +odino query -q "Error: Connection refused" # Use grep instead +``` + +**โŒ Searching for file paths:** +```bash +odino query -q "src/utils/validation.js" # Use find/glob instead +``` + +**โŒ Searching for TODOs/comments:** +```bash +odino query -q "TODO fix this" # Use grep instead +``` + +**โŒ Overly generic queries:** +```bash +odino query -q "code" # Way too broad +``` + +## Summary + +**Good semantic queries are:** +- Conceptual, not literal +- Natural language, not keywords +- Focused on purpose/behavior +- Appropriately specific + +**After getting results:** +- Check scores (> 0.70 is good) +- Read top 2-3 files for context +- Combine with grep/glob for precision +- Iterate query if needed