commit b923023331732db3b4c05012ed4abff8c74fda27
Author: Zhongwei Li <lizhongwei.nkcs@gmail.com>
Date:   Sat Nov 29 18:17:12 2025 +0800

    Initial commit

diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
new file mode 100644
index 0000000..81417bf
--- /dev/null
+++ b/.claude-plugin/plugin.json
@@ -0,0 +1,12 @@
+{
+  "name": "arc",
+  "description": "CLI tools for semantic and full-text search across Qdrant and MeiliSearch. Manage Docker instances, index PDFs, markdown, and source code with AST-aware chunking, frontmatter extraction, git awareness, and dual-indexing support. Store agent-generated content for long-term memory with rich metadata.",
+  "version": "0.1.0",
+  "author": {
+    "name": "Arcaneum Contributors",
+    "url": "https://github.com/yourorg/arcaneum"
+  },
+  "commands": [
+    "./commands"
+  ]
+}
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..80d3637
--- /dev/null
+++ b/README.md
@@ -0,0 +1,3 @@
+# arc
+
+CLI tools for semantic and full-text search across Qdrant and MeiliSearch. Manage Docker instances, index PDFs, markdown, and source code with AST-aware chunking, frontmatter extraction, git awareness, and dual-indexing support. Store agent-generated content for long-term memory with rich metadata.
diff --git a/commands/collection.md b/commands/collection.md
new file mode 100644
index 0000000..5aef8f2
--- /dev/null
+++ b/commands/collection.md
@@ -0,0 +1,58 @@
+---
+description: Manage Qdrant collections
+argument-hint: <create|list|info|delete> [options]
+---
+
+Manage Qdrant vector collections for storing embeddings.
+
+**Subcommands:**
+
+- create: Create a new collection with specified embedding model
+- list: List all collections in Qdrant
+- info: Show detailed information about a collection
+- delete: Delete a collection permanently
+
+**Common Options:**
+
+- --json: Output in JSON format for programmatic use
+
+**Examples:**
+
+```text
+/collection create MyDocs --model stella --type pdf
+/collection list
+/collection info MyDocs
+/collection delete MyDocs --confirm
+```
+
+**Execution:**
+
+```bash
+cd ${CLAUDE_PLUGIN_ROOT}
+arc collection $ARGUMENTS
+```
+
+**Collection Types:**
+
+Collections should specify a type (pdf or code) to ensure content matches:
+- pdf: For document collections (PDFs, text files)
+- code: For source code repositories
+
+**Available Models:**
+
+- stella: 1024D, best for documents and PDFs (default for pdfs)
+- jina-code: 768D, optimized for source code (default for code)
+- bge: 1024D, general purpose
+- modernbert: 1024D, newer general-purpose model
+
+**Related Commands:**
+
+- /index pdf - Index PDFs into a collection
+- /index code - Index source code into a collection
+- /corpus create - Create both vector and full-text indexes
+
+**Implementation:**
+
+- Defined in RDR-003 (Collection Creation)
+- Enhanced in RDR-006 (Claude Code Integration)
+- Type enforcement added in arcaneum-122
diff --git a/commands/config.md b/commands/config.md
new file mode 100644
index 0000000..429adf8
--- /dev/null
+++ b/commands/config.md
@@ -0,0 +1,45 @@
+---
+description: Configuration and cache management
+argument-hint: <show-cache-dir|clear-cache> [options]
+---
+
+Manage Arcaneum configuration and model cache.
+
+**Subcommands:**
+
+- show-cache-dir: Display cache locations and sizes
+- clear-cache: Clear model cache to free disk space
+
+**Arguments:**
+
+- --confirm: Confirm cache deletion (required for clear-cache)
+
+**Examples:**
+
+```text
+/config show-cache-dir
+/config clear-cache --confirm
+```
+
+**Execution:**
+
+```bash
+cd ${CLAUDE_PLUGIN_ROOT}
+arc config $ARGUMENTS
+```
+
+**Note:** The model cache (~/.arcaneum/models/) stores downloaded embedding models.
+First-time indexing downloads ~1-2GB of models which are then reused. I'll show you:
+
+- Current cache directory locations
+- Size of each directory (models, data)
+- Free disk space information
+
+Use clear-cache when models are corrupted or to free disk space (models will
+be re-downloaded on next use).
+
+**Related:**
+
+- Models auto-downloaded to ~/.arcaneum/models/
+- Data stored in ~/.arcaneum/data/
+- Implemented in arcaneum-157 and arcaneum-162
diff --git a/commands/container.md b/commands/container.md
new file mode 100644
index 0000000..e966483
--- /dev/null
+++ b/commands/container.md
@@ -0,0 +1,60 @@
+---
+description: Manage container services (Qdrant, MeiliSearch)
+argument-hint: <start|stop|status|logs|restart|reset> [options]
+---
+
+Manage Docker container services for Qdrant and MeiliSearch.
+
+**Subcommands:**
+
+- start: Start all services
+- stop: Stop all services
+- status: Show service status and health
+- logs: View service logs
+- restart: Restart services
+- reset: Delete all data and reset (WARNING: destructive)
+
+**Arguments:**
+
+- --follow, -f: Follow log output (logs command only)
+- --tail <n>: Number of log lines to show (logs command, default: 100)
+- --confirm: Confirm data deletion (reset command only)
+
+**Examples:**
+
+```text
+/container start
+/container status
+/container logs
+/container logs --follow
+/container stop
+/container restart
+/container reset --confirm
+```
+
+**Execution:**
+
+```bash
+cd ${CLAUDE_PLUGIN_ROOT}
+arc container $ARGUMENTS
+```
+
+**Note:** Container management commands check Docker availability and provide
+helpful error messages if Docker is not running. I'll show you:
+
+- Service startup confirmation with URLs
+- Health check status for Qdrant
+- Data directory locations and sizes
+- Log output for debugging
+
+**Data Locations:**
+
+- Qdrant: ~/.arcaneum/data/qdrant/
+- Snapshots: ~/.arcaneum/data/qdrant_snapshots/
+- All data persists across container restarts
+
+**Related:**
+
+- Implemented in arcaneum-167, renamed in arcaneum-169
+- Replaces old scripts/qdrant-manage.sh
+- Part of simplified Docker management (arcaneum-158)
diff --git a/commands/corpus.md b/commands/corpus.md
new file mode 100644
index 0000000..aae0ef7
--- /dev/null
+++ b/commands/corpus.md
@@ -0,0 +1,97 @@
+---
+description: Manage dual-index corpora
+argument-hint: <create|sync> [options]
+---
+
+Manage corpora that combine both vector search (Qdrant) and full-text search (MeiliSearch) for the same content.
+
+**Subcommands:**
+
+- create: Create both Qdrant collection and MeiliSearch index
+- sync: Index directory to both systems simultaneously
+
+**Common Options:**
+
+- --json: Output in JSON format
+
+**Create Options:**
+
+- name: Corpus name (required)
+- --type: Corpus type - code or pdf (required)
+- --models: Embedding models, comma-separated (default: stella,jina)
+
+**Sync Options:**
+
+- directory: Directory path to index (required)
+- --corpus: Corpus name (required)
+- --models: Embedding models (default: stella,jina)
+- --file-types: File extensions to index (e.g., .py,.md)
+
+**Examples:**
+
+```text
+/corpus create MyDocs --type pdf --models stella
+/corpus sync ~/Documents --corpus MyDocs
+/corpus create CodeBase --type code
+/corpus sync ~/projects --corpus CodeBase --file-types .py,.js,.md
+```
+
+**Execution:**
+
+```bash
+cd ${CLAUDE_PLUGIN_ROOT}
+arc corpus $ARGUMENTS
+```
+
+**What Is a Corpus?**
+
+A corpus combines two search systems:
+1. **Vector search** (Qdrant): Semantic similarity, concept matching
+2. **Full-text search** (MeiliSearch): Keyword, phrase, boolean operators
+
+This enables hybrid search strategies:
+- Broad semantic discovery (vector search)
+- Precise keyword refinement (full-text search)
+- Combined results for best of both worlds
+
+**When to Use Corpus vs Collection:**
+
+**Use Corpus When:**
+- Need both semantic and keyword search
+- Users search different ways (concepts vs exact terms)
+- Want fast keyword filtering of semantic results
+- Building search UIs with multiple search modes
+
+**Use Collection When:**
+- Only need semantic search
+- Working with embeddings/vectors directly
+- Integrating with existing vector workflows
+- MeiliSearch not available/needed
+
+**How Sync Works:**
+
+1. Discovers files in directory (respects .gitignore for code)
+2. Chunks content appropriately (PDFs vs code)
+3. Generates embeddings with specified models
+4. Uploads to Qdrant (vector search)
+5. Indexes to MeiliSearch (full-text search)
+6. Both indexes share same document IDs and metadata
+
+**Performance:**
+
+Corpus sync is approximately 2x slower than single-system indexing due to dual upload, but still efficient:
+- PDFs: ~5-15/minute
+- Source files: 50-100 files/second
+
+**Related Commands:**
+
+- /collection create - Create vector-only collection
+- /index pdf - Index PDFs to vector only
+- /index code - Index code to vector only
+- /search semantic - Search vector index
+- /search text - Search full-text index
+
+**Implementation:**
+
+- RDR-009: Dual indexing strategy
+- RDR-006: Claude Code integration
diff --git a/commands/doctor.md b/commands/doctor.md
new file mode 100644
index 0000000..95a98d9
--- /dev/null
+++ b/commands/doctor.md
@@ -0,0 +1,40 @@
+---
+description: Verify Arcaneum setup and prerequisites
+argument-hint: [--verbose] [--json]
+---
+
+Check that all Arcaneum prerequisites are met and the system is ready for use.
+
+**Arguments:**
+
+- --verbose: Show detailed diagnostic information
+- --json: Output JSON format
+
+**Checks Performed:**
+
+- Python version (>= 3.12 required)
+- Required Python dependencies installed
+- Qdrant server connectivity and health
+- MeiliSearch server connectivity (if configured)
+- Embedding model availability
+- Write permissions for temporary files
+- Environment variable configuration
+
+**Examples:**
+
+```text
+/doctor
+/doctor --verbose
+/doctor --json
+```
+
+**Execution:**
+
+```bash
+cd ${CLAUDE_PLUGIN_ROOT}
+arc doctor $ARGUMENTS
+```
+
+**Note:** This diagnostic command helps troubleshoot setup issues. I'll run all
+checks and present a summary showing which requirements are met (✅) and which
+need attention (❌), along with specific guidance for fixing any problems found.
diff --git a/commands/index.md b/commands/index.md
new file mode 100644
index 0000000..cce17ed
--- /dev/null
+++ b/commands/index.md
@@ -0,0 +1,199 @@
+---
+description: Index content into collections
+argument-hint: <pdf|code|markdown> [<path> | --from-file <file>] [options]
+---
+
+Index PDFs, markdown, or source code into Qdrant collections for semantic search.
+
+**Subcommands:**
+
+- pdf: Index PDF documents (with OCR support)
+- markdown: Index markdown files (with frontmatter extraction)
+- code: Index source code repositories (git-aware)
+
+**Common Options:**
+
+- --collection: Target collection (required)
+- --from-file: Read file paths from list (one per line, or "-" for stdin)
+- --model: Embedding model (auto-selected by content type)
+- --workers: Parallel workers (default: 4)
+- --force: Force reindex all files
+- --randomize: Randomize file processing order (useful for parallel indexing)
+- --no-gpu: Disable GPU acceleration (GPU enabled by default)
+- --verbose: Show detailed progress (suppress library warnings)
+- --debug: Show all library warnings including transformers
+- --json: Output in JSON format
+
+**PDF Indexing Options:**
+
+- --no-ocr: Disable OCR (enabled by default for scanned PDFs)
+- --ocr-language: OCR language code (default: eng)
+- --ocr-workers: Parallel OCR workers (default: cpu_count)
+- --normalize-only: Skip markdown conversion, only normalize whitespace
+- --preserve-images: Extract images for multimodal search
+- --process-priority: Process scheduling priority (low, normal, high)
+- --embedding-batch-size: Batch size for embeddings (auto-tuned if not specified)
+- --offline: Use cached models only (no network)
+
+**Markdown Indexing Options:**
+
+- --chunk-size: Target chunk size in tokens (overrides model default)
+- --chunk-overlap: Overlap between chunks in tokens
+- --recursive/--no-recursive: Search subdirectories recursively (default: recursive)
+- --exclude: Patterns to exclude (e.g., node_modules, .obsidian)
+- --offline: Use cached models only (no network)
+
+**Source Code Indexing Options:**
+
+- --depth: Git discovery depth (traverse subdirectories)
+
+**Examples:**
+
+```text
+# Basic indexing (GPU enabled by default)
+/index pdf ~/Documents/Research --collection PDFs --model stella
+/index markdown ~/notes --collection Notes --model stella
+/index code ~/projects/myapp --collection MyCode --model jina-code
+
+# Index from file list
+/index pdf --from-file /path/to/pdf_list.txt --collection PDFs
+/index markdown --from-file /path/to/md_list.txt --collection Notes
+
+# Index from stdin (pipe file paths)
+find ~/Documents -name "*.pdf" | /index pdf --from-file - --collection PDFs
+ls ~/notes/*.md | /index markdown --from-file - --collection Notes
+
+# With options
+/index markdown ~/docs --collection Docs --chunk-size 512 --verbose
+/index pdf ~/scanned-docs --collection Scans --no-ocr --offline
+
+# Force CPU-only mode (disable GPU)
+/index pdf ~/Documents/Research --collection PDFs --model stella --no-gpu
+
+# Parallel indexing from multiple terminals (randomize order)
+/index pdf ~/Documents/Research --collection PDFs --randomize
+/index markdown ~/notes --collection Notes --randomize
+
+# Debug mode (show all warnings)
+/index pdf ~/Documents/Research --collection PDFs --model stella --debug
+```
+
+**Execution:**
+
+```bash
+cd ${CLAUDE_PLUGIN_ROOT}
+arc index $ARGUMENTS
+```
+
+**File List Format (--from-file):**
+
+When using `--from-file`, provide a text file with one file path per line:
+
+```text
+# Comments are supported (lines starting with #)
+/absolute/path/to/file1.pdf
+relative/path/to/file2.md
+/another/file3.pdf
+
+# Empty lines are ignored
+```
+
+Features:
+
+- Supports both absolute and relative paths
+- Relative paths resolved from current directory
+- Comments (lines starting with #) and empty lines are skipped
+- Non-existent files are warned about but processing continues
+- Wrong file extensions are filtered with warnings
+- Use "-" to read from stdin
+
+**How It Works:**
+
+**PDF Indexing:**
+
+1. Extract text from PDFs (PyMuPDF + pdfplumber fallback)
+2. Auto-trigger OCR for scanned PDFs (< 100 chars extracted)
+3. Chunk text with 15% overlap for context
+4. Generate embeddings (stella default: 1024D for documents)
+5. Upload to Qdrant with metadata (file path, page numbers)
+6. Incremental: Skips unchanged files (file hash metadata check)
+
+**Markdown Indexing:**
+
+1. Discover markdown files (.md, .markdown extensions)
+2. Extract YAML frontmatter (title, author, tags, category, etc.)
+3. Semantic chunking preserving document structure (headers, code blocks)
+4. Generate embeddings (stella default: 1024D for documents)
+5. Upload to Qdrant with metadata (file path, frontmatter fields, header context)
+6. Incremental: Skips unchanged files (SHA256 content hash check)
+
+**Source Code Indexing:**
+
+1. Discover git repositories in directory tree
+2. Extract git metadata (project, branch, commit)
+3. Parse code with tree-sitter (AST-aware chunking, 15+ languages)
+4. Generate embeddings (jina-code default: 768D for code)
+5. Upload to Qdrant with metadata (git info, file path, language)
+6. Multi-branch support: project#branch identifier
+7. Incremental: Skips unchanged commits (metadata-based sync)
+
+**Default Models:**
+
+- PDFs: stella (1024D, document-optimized)
+- Markdown: stella (1024D, document-optimized)
+- Source: jina-code (768D, code-optimized)
+
+**Performance:**
+
+- PDF: ~10-30 PDFs/minute (depends on OCR workload)
+- Markdown: ~50-100 files/minute (depends on file size)
+- Source: 100-200 files/second (depends on file size)
+- Batch upload: 100-200 chunks per batch
+- Parallel workers: 4 (adjustable with --workers for PDF/source)
+- **GPU acceleration**: 1.5-3x speedup (enabled by default, use --no-gpu to disable)
+
+**GPU Acceleration:**
+
+GPU acceleration is **enabled by default** for faster embedding generation:
+
+- **Apple Silicon**: MPS (Metal Performance Shaders) backend
+- **NVIDIA GPUs**: CUDA backend
+- **CPU fallback**: Automatic if GPU unavailable
+- **Disable GPU**: Use --no-gpu flag (for thermal/battery concerns)
+
+**Compatible models** (verified with GPU support):
+
+- stella (recommended for PDFs/markdown) - Full MPS support
+- jina-code (recommended for source code) - Full MPS support
+- bge-small, bge-base - CoreML support
+
+**Offline Mode:**
+
+Use --offline for corporate proxies or SSL issues:
+
+- Requires models pre-downloaded: `arc models download`
+- No network calls during indexing
+- Fails if model not cached
+
+**Related Commands:**
+
+- /collection create - Create collection before indexing
+- /search semantic - Search indexed content
+- /corpus create - Create both vector + full-text indexes
+
+**Debug Mode:**
+
+Use --debug to troubleshoot indexing issues:
+
+- Shows all library warnings (including HuggingFace transformers)
+- Displays detailed stack traces
+- Helps diagnose model loading or GPU issues
+- Use --verbose for user-facing progress without library warnings
+
+**Implementation:**
+
+- RDR-004: PDF bulk indexing
+- RDR-005: Git-aware source code indexing
+- RDR-013: Performance optimization with GPU acceleration
+- RDR-014: Markdown content indexing
+- RDR-006: Claude Code integration
diff --git a/commands/models.md b/commands/models.md
new file mode 100644
index 0000000..0fee79e
--- /dev/null
+++ b/commands/models.md
@@ -0,0 +1,96 @@
+---
+description: Manage embedding models
+argument-hint: <list> [options]
+---
+
+Manage and view available embedding models for vector search.
+
+**Subcommands:**
+
+- list: List all available embedding models with details
+
+**Options:**
+
+- --json: Output in JSON format
+
+**Examples:**
+
+```text
+/models list
+/models list --json
+```
+
+**Execution:**
+
+```bash
+cd ${CLAUDE_PLUGIN_ROOT}
+arc models $ARGUMENTS
+```
+
+**Available Models:**
+
+The list command shows:
+- Model name (for --model flags)
+- Dimensions (vector size)
+- Backend (fastembed, sentence-transformers)
+- Best use case (PDFs, code, general)
+- Model ID (HuggingFace identifier)
+
+**Current Models:**
+
+**For Documents/PDFs:**
+- **stella** (1024D): Best for documents, PDFs, general text
+- **bge-large** (1024D): General purpose, high quality
+- **modernbert** (1024D): Newer general-purpose model
+
+**For Source Code:**
+- **jina-code** (768D): Optimized for code, cross-language
+- **jina-v2-code** (768D): Alternative code model
+
+**For General Use:**
+- **bge** (1024D): High-quality general embeddings
+- **bge-small** (384D): Faster, smaller, lower quality
+
+**Model Selection Tips:**
+
+1. **Match content type:**
+   - PDFs/docs → stella or modernbert
+   - Source code → jina-code
+   - Mixed → stella or bge
+
+2. **Consider dimensions:**
+   - Higher dimensions (1024D) = better quality, more storage
+   - Lower dimensions (384D, 768D) = faster, less storage
+
+3. **Backend matters:**
+   - fastembed: Faster, optimized, limited models
+   - sentence-transformers: More models, HuggingFace ecosystem
+
+4. **Collection consistency:**
+   - Use same model for all documents in a collection
+   - Cannot mix dimensions in one vector space
+
+**Downloading Models:**
+
+Models auto-download on first use (~1-2GB):
+- Cached in ~/.arcaneum/models/
+- Reused across indexing operations
+- Use --offline flag to require cached models
+
+**Pre-download for offline use:**
+
+```bash
+python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('jinaai/jina-embeddings-v2-base-code')"
+```
+
+**Related Commands:**
+
+- /collection create - Create collection with specific model
+- /index pdf - Index with model selection
+- /index code - Index with model selection
+
+**Implementation:**
+
+- RDR-002: Embedding client architecture
+- RDR-006: Model listing CLI
+- arcaneum-142: Multi-backend support
diff --git a/commands/search.md b/commands/search.md
new file mode 100644
index 0000000..e80c970
--- /dev/null
+++ b/commands/search.md
@@ -0,0 +1,92 @@
+---
+description: Search across collections
+argument-hint: <semantic|text> <query> [options]
+---
+
+Search your indexed content using vector-based semantic search or keyword-based full-text search.
+
+**Subcommands:**
+
+- semantic: Vector-based semantic search (Qdrant)
+- text: Keyword-based full-text search (MeiliSearch)
+
+**Common Options:**
+
+- --limit: Number of results to return (default: 10)
+- --offset: Number of results to skip for pagination (default: 0)
+- --filter: Metadata filter (key=value or JSON)
+- --json: Output in JSON format
+- --verbose: Show detailed information
+
+**Semantic Search Options:**
+
+- --collection: Collection to search (required)
+- --vector-name: Vector name (auto-detected if not specified)
+- --score-threshold: Minimum similarity score
+
+**Full-Text Search Options:**
+
+- --index: MeiliSearch index name (required)
+
+**Examples:**
+
+```text
+# Basic semantic search
+/search semantic "authentication logic" --collection MyCode --limit 5
+
+# Full-text keyword search
+/search text "def authenticate" --index MyCode-fulltext
+
+# Search with score threshold
+/search semantic "fraud detection patterns" --collection PDFs --score-threshold 0.7
+
+# Pagination: Get second page of results
+/search semantic "machine learning" --collection Papers --limit 10 --offset 10
+
+# Pagination: Get third page with JSON output
+/search semantic "neural networks" --collection Papers --limit 10 --offset 20 --json
+```
+
+**Execution:**
+
+```bash
+cd ${CLAUDE_PLUGIN_ROOT}
+arc search $ARGUMENTS
+```
+
+**When to Use Each:**
+
+**Semantic Search** (vector-based):
+
+- Finding conceptually similar code/documents
+- Cross-language semantic matching
+- "What does this" or "How to" questions
+- Fuzzy concept matching
+
+**Full-Text Search** (keyword-based):
+
+- Exact keyword or phrase matching
+- Function/variable name search
+- Quoted phrase search
+- Boolean operators (AND, OR, NOT)
+
+**Result Format:**
+
+Both commands show:
+
+- Relevance score (similarity for semantic, rank for text)
+- Source file path
+- Matching content snippet
+- Metadata (git info for code, page numbers for PDFs)
+
+**Related Commands:**
+
+- /collection list - See available collections
+- /index pdf - Index PDFs for searching
+- /index code - Index code for searching
+
+**Implementation:**
+
+- RDR-007: Semantic search via Qdrant
+- RDR-012: Full-text search via MeiliSearch
+- RDR-006: Claude Code integration
diff --git a/commands/store.md b/commands/store.md
new file mode 100644
index 0000000..39fd23e
--- /dev/null
+++ b/commands/store.md
@@ -0,0 +1,81 @@
+---
+description: Store agent-generated content for long-term memory
+argument-hint: <file|-for-stdin> --collection <name> [options]
+---
+
+Store agent-generated content (research, analysis, synthesized information) with rich
+metadata. Content is persisted to disk for re-indexing and full-text retrieval, then
+indexed to Qdrant for semantic search.
+
+**Storage Location:** `~/.arcaneum/agent-memory/{collection}/`
+
+**Options:**
+
+- --collection: Target collection (required)
+- --model: Embedding model (default: stella for documents)
+- --title: Document title (added to frontmatter)
+- --category: Document category (e.g., research, security, analysis)
+- --tags: Comma-separated tags
+- --metadata: Additional metadata as JSON
+- --chunk-size: Target chunk size in tokens (overrides model default)
+- --chunk-overlap: Overlap between chunks in tokens
+- --verbose: Show detailed progress
+- --json: Output in JSON format
+
+**Examples:**
+
+```text
+/store analysis.md --collection Memory --title "Security Analysis" --category security
+/store - --collection Research --title "Findings" --tags "research,important"
+```
+
+**Execution:**
+
+```bash
+cd ${CLAUDE_PLUGIN_ROOT}
+arc store $ARGUMENTS
+```
+
+**How It Works:**
+
+1. Accept content from file or stdin (`-`)
+2. Extract/add rich metadata (title, category, tags, custom fields)
+3. Semantic chunking preserving document structure
+4. Generate embeddings (stella default: 1024D for documents)
+5. Upload to Qdrant with metadata
+6. Persist to disk: `~/.arcaneum/agent-memory/{collection}/{date}_{agent}_{slug}.md`
+7. Generate YAML frontmatter with injection metadata (injection_id, injected_at, injected_by)
+
+**Persistence:**
+
+Content is always persisted for durability. This enables:
+
+- Re-indexing: Update embeddings without losing original content
+- Full-text retrieval: Access complete original documents
+- Audit trail: Track what was stored and when (injection_id, timestamps)
+
+**Filename Format:**
+
+`YYYYMMDD_agent_slug.md` (e.g., `20251030_claude_security-analysis.md`)
+
+**Use Cases:**
+
+- AI agents storing research findings
+- Preserving analysis results
+- Collecting synthesized information
+- Building knowledge bases from agent workflows
+
+**Default Model:**
+
+- stella (1024D, document-optimized)
+
+**Related Commands:**
+
+- /collection create - Create collection before storing (use --type markdown)
+- /search semantic - Search stored content
+- /index markdown - For indexing existing markdown directories (different use case)
+
+**Implementation:**
+
+- RDR-014: Markdown content indexing
+- arcaneum-204: Direct injection persistence module
diff --git a/plugin.lock.json b/plugin.lock.json
new file mode 100644
index 0000000..e1d7e8c
--- /dev/null
+++ b/plugin.lock.json
@@ -0,0 +1,77 @@
+{
+  "$schema": "internal://schemas/plugin.lock.v1.json",
+  "pluginId": "gh:cwensel/arcaneum:",
+  "normalized": {
+    "repo": null,
+    "ref": "refs/tags/v20251128.0",
+    "commit": "b427db27911fcf8fd69bad65d1f7e7ed41dfda5b",
+    "treeHash": "eead649300748ee8ca72c45b77d5a026b696a41e8a04b0f731e98a3bc4d61394",
+    "generatedAt": "2025-11-28T10:15:59.424342Z",
+    "toolVersion": "publish_plugins.py@0.2.0"
+  },
+  "origin": {
+    "remote": "git@github.com:zhongweili/42plugin-data.git",
+    "branch": "master",
+    "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
+    "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
+  },
+  "manifest": {
+    "name": "arc",
+    "description": "CLI tools for semantic and full-text search across Qdrant and MeiliSearch. Manage Docker instances, index PDFs, markdown, and source code with AST-aware chunking, frontmatter extraction, git awareness, and dual-indexing support. Store agent-generated content for long-term memory with rich metadata.",
+    "version": "0.1.0"
+  },
+  "content": {
+    "files": [
+      {
+        "path": "README.md",
+        "sha256": "c0988ee1f62a47b177874fe278d9a212bdf3cf2aae11a65dd64b7ea2caa708a5"
+      },
+      {
+        "path": ".claude-plugin/plugin.json",
+        "sha256": "b58298e91a3f58df658a7bdb5e4458aa547187445f5f5fc5c5109071c3e97b84"
+      },
+      {
+        "path": "commands/search.md",
+        "sha256": "077765fd1a2c9d22254cc69ed980f6893935b1b41de31a33246fa0b6ce3ed94b"
+      },
+      {
+        "path": "commands/collection.md",
+        "sha256": "cc9fd9c12ab856022b0241dfde3d0577967d254d8e73f7d26f9fb3190af13fd0"
+      },
+      {
+        "path": "commands/container.md",
+        "sha256": "5704ac9013d3eed4fb4f719c248e904ad064a1b6437da10c44698ad7dc2f6c80"
+      },
+      {
+        "path": "commands/doctor.md",
+        "sha256": "a04151ad4100b5930eec770145e894a9ceca33c7c6cbada0a5d3e635f0d5d3c7"
+      },
+      {
+        "path": "commands/corpus.md",
+        "sha256": "6d2a8b24e5bd9a45529f1b29f1d9d41d1850608567427c1b9c4ab7722cb775ac"
+      },
+      {
+        "path": "commands/config.md",
+        "sha256": "778133802cf7cb51136b42fc245799f375cbd4606827df93bc75f98c8de93a84"
+      },
+      {
+        "path": "commands/index.md",
+        "sha256": "63708ca6be57b81a28c6d67e7754e6897652e80fe910cb2ecd97e95f6987dbf5"
+      },
+      {
+        "path": "commands/models.md",
+        "sha256": "38195767662116e8851c7a951c40fc5f608da58e42f0229757fb38859330d73b"
+      },
+      {
+        "path": "commands/store.md",
+        "sha256": "377fd1126feaff18ac00d80a15cc561e664a3249a8087d6ebf77466f03b12e47"
+      }
+    ],
+    "dirSha256": "eead649300748ee8ca72c45b77d5a026b696a41e8a04b0f731e98a3bc4d61394"
+  },
+  "security": {
+    "scannedAt": null,
+    "scannerVersion": null,
+    "flags": []
+  }
+}
\ No newline at end of file