commit 4ad7b3dd73f02f0d0d03dfb580ed716a9c4ab9f0 Author: Zhongwei Li Date: Sun Nov 30 08:24:34 2025 +0800 Initial commit diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..6134754 --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,12 @@ +{ + "name": "cloudflare-vectorize", + "description": "Build semantic search and RAG applications with Cloudflare Vectorize, a globally distributed vector database. Supports Workers AI and OpenAI embeddings, metadata filtering with 10 indexes, and namespace partitioning. Use when: creating vector indexes, querying embeddings, implementing semantic search or RAG, configuring metadata filters, or troubleshooting dimension mismatches, metadata index timing, insert vs upsert confusion, or filter syntax errors.", + "version": "1.0.0", + "author": { + "name": "Jeremy Dawes", + "email": "jeremy@jezweb.net" + }, + "skills": [ + "./" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..c91ac16 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# cloudflare-vectorize + +Build semantic search and RAG applications with Cloudflare Vectorize, a globally distributed vector database. Supports Workers AI and OpenAI embeddings, metadata filtering with 10 indexes, and namespace partitioning. Use when: creating vector indexes, querying embeddings, implementing semantic search or RAG, configuring metadata filters, or troubleshooting dimension mismatches, metadata index timing, insert vs upsert confusion, or filter syntax errors. diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..cf9a492 --- /dev/null +++ b/SKILL.md @@ -0,0 +1,387 @@ +--- +name: cloudflare-vectorize +description: | + Build semantic search with Cloudflare Vectorize V2 (Sept 2024 GA). Covers V2 breaking changes: async mutations, + 5M vectors/index (was 200K), 31ms latency (was 549ms), returnMetadata enum, and V1 deprecation (Dec 2024). + + Use when: migrating V1→V2, handling async mutations with mutationId, creating metadata indexes before insert, + or troubleshooting "returnMetadata must be 'all'", V2 timing issues, metadata index errors, dimension mismatches. +license: MIT +metadata: + keywords: + - vectorize v2 + - vectorize ga september 2024 + - vectorize breaking changes + - async mutations + - mutationId + - returnMetadata enum + - v1 deprecated december 2024 + - metadata index before insert + - 5 million vectors + - 31ms latency + - topK 100 + - range queries v2 + - $gte $lte $in $nin + - wrangler 3.71.0 + - vectorize migration +--- + +# Cloudflare Vectorize + +Complete implementation guide for Cloudflare Vectorize - a globally distributed vector database for building semantic search, RAG (Retrieval Augmented Generation), and AI-powered applications with Cloudflare Workers. + +**Status**: Production Ready ✅ +**Last Updated**: 2025-10-21 +**Dependencies**: cloudflare-worker-base (for Worker setup), cloudflare-workers-ai (for embeddings) +**Latest Versions**: wrangler@4.43.0, @cloudflare/workers-types@4.20251014.0 +**Token Savings**: ~65% +**Errors Prevented**: 8 +**Dev Time Saved**: ~3 hours + +## What This Skill Provides + +### Core Capabilities +- ✅ **Index Management**: Create, configure, and manage vector indexes +- ✅ **Vector Operations**: Insert, upsert, query, delete, and list vectors +- ✅ **Metadata Filtering**: Advanced filtering with 10 metadata indexes per index +- ✅ **Semantic Search**: Find similar vectors using cosine, euclidean, or dot-product metrics +- ✅ **RAG Patterns**: Complete retrieval-augmented generation workflows +- ✅ **Workers AI Integration**: Native embedding generation with @cf/baai/bge-base-en-v1.5 +- ✅ **OpenAI Integration**: Support for text-embedding-3-small/large models +- ✅ **Document Processing**: Text chunking and batch ingestion pipelines + +### Templates Included +1. **basic-search.ts** - Simple vector search with Workers AI +2. **rag-chat.ts** - Full RAG chatbot with context retrieval +3. **document-ingestion.ts** - Document chunking and embedding pipeline +4. **metadata-filtering.ts** - Advanced filtering patterns + +--- + +## ⚠️ Vectorize V2 Breaking Changes (September 2024) + +**IMPORTANT**: Vectorize V2 became GA in September 2024 with significant breaking changes. + +### What Changed in V2 + +**Performance Improvements**: +- **Index capacity**: 200,000 → **5 million vectors** per index +- **Query latency**: 549ms → **31ms** median (18× faster) +- **TopK limit**: 20 → **100** results per query +- **Scale limits**: 100 → **50,000 indexes** per account +- **Namespace limits**: 100 → **50,000 namespaces** per index + +**Breaking API Changes**: +1. **Async Mutations** - All mutations now asynchronous: + ```typescript + // V2: Returns mutationId + const result = await env.VECTORIZE_INDEX.insert(vectors); + console.log(result.mutationId); // "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + + // Vector inserts/deletes may take a few seconds to be reflected + ``` + +2. **returnMetadata Parameter** - Boolean → String enum: + ```typescript + // ❌ V1 (deprecated) + { returnMetadata: true } + + // ✅ V2 (required) + { returnMetadata: 'all' | 'indexed' | 'none' } + ``` + +3. **Metadata Indexes Required Before Insert**: + - V2 requires metadata indexes created BEFORE vectors inserted + - Vectors added before metadata index won't be indexed + - Must re-upsert vectors after creating metadata index + +**V1 Deprecation Timeline**: +- **December 2024**: Can no longer create V1 indexes +- **Existing V1 indexes**: Continue to work (other operations unaffected) +- **Migration**: Use `wrangler vectorize --deprecated-v1` flag for V1 operations + +**Wrangler Version Required**: +- **Minimum**: wrangler@3.71.0 for V2 commands +- **Recommended**: wrangler@4.43.0+ (latest) + +### Check Mutation Status + +```typescript +// Get index info to check last mutation processed +const info = await env.VECTORIZE_INDEX.describe(); +console.log(info.mutationId); // Last mutation ID +console.log(info.processedUpToMutation); // Last processed timestamp +``` + +--- + +## Critical Setup Rules + +### ⚠️ MUST DO BEFORE INSERTING VECTORS +```bash +# 1. Create the index with FIXED dimensions and metric +npx wrangler vectorize create my-index \ + --dimensions=768 \ + --metric=cosine + +# 2. Create metadata indexes IMMEDIATELY (before inserting vectors!) +npx wrangler vectorize create-metadata-index my-index \ + --property-name=category \ + --type=string + +npx wrangler vectorize create-metadata-index my-index \ + --property-name=timestamp \ + --type=number +``` + +**Why**: Metadata indexes MUST exist before vectors are inserted. Vectors added before a metadata index was created won't be filterable on that property. + +### Index Configuration (Cannot Be Changed Later) + +```bash +# Dimensions MUST match your embedding model output: +# - Workers AI @cf/baai/bge-base-en-v1.5: 768 dimensions +# - OpenAI text-embedding-3-small: 1536 dimensions +# - OpenAI text-embedding-3-large: 3072 dimensions + +# Metrics determine similarity calculation: +# - cosine: Best for normalized embeddings (most common) +# - euclidean: Absolute distance between vectors +# - dot-product: For non-normalized vectors +``` + +## Wrangler Configuration + +**wrangler.jsonc**: +```jsonc +{ + "name": "my-vectorize-worker", + "main": "src/index.ts", + "compatibility_date": "2025-10-21", + "vectorize": [ + { + "binding": "VECTORIZE_INDEX", + "index_name": "my-index" + } + ], + "ai": { + "binding": "AI" + } +} +``` + +## TypeScript Types + +```typescript +export interface Env { + VECTORIZE_INDEX: VectorizeIndex; + AI: Ai; +} + +interface VectorizeVector { + id: string; + values: number[] | Float32Array | Float64Array; + namespace?: string; + metadata?: Record; +} + +interface VectorizeMatches { + matches: Array<{ + id: string; + score: number; + values?: number[]; + metadata?: Record; + namespace?: string; + }>; + count: number; +} +``` + +## Metadata Filter Operators (V2) + +Vectorize V2 supports advanced metadata filtering with range queries: + +```typescript +// Equality (implicit $eq) +{ category: "docs" } + +// Not equals +{ status: { $ne: "archived" } } + +// In/Not in arrays +{ category: { $in: ["docs", "tutorials"] } } +{ category: { $nin: ["deprecated", "draft"] } } + +// Range queries (numbers) - NEW in V2 +{ timestamp: { $gte: 1704067200, $lt: 1735689600 } } + +// Range queries (strings) - prefix searching +{ url: { $gte: "/docs/workers", $lt: "/docs/workersz" } } + +// Nested metadata with dot notation +{ "author.id": "user123" } + +// Multiple conditions (implicit AND) +{ category: "docs", language: "en", "metadata.published": true } +``` + +## Metadata Best Practices + +### 1. Cardinality Considerations + +**Low Cardinality (Good for $eq filters)**: +```typescript +// Few unique values - efficient filtering +metadata: { + category: "docs", // ~10 categories + language: "en", // ~5 languages + published: true // 2 values (boolean) +} +``` + +**High Cardinality (Avoid in range queries)**: +```typescript +// Many unique values - avoid large range scans +metadata: { + user_id: "uuid-v4...", // Millions of unique values + timestamp_ms: 1704067200123 // Use seconds instead +} +``` + +### 2. Metadata Limits + +- **Max 10 metadata indexes** per Vectorize index +- **Max 10 KiB metadata** per vector +- **String indexes**: First 64 bytes (UTF-8) +- **Number indexes**: Float64 precision +- **Filter size**: Max 2048 bytes (compact JSON) + +### 3. Key Restrictions + +```typescript +// ❌ INVALID metadata keys +metadata: { + "": "value", // Empty key + "user.name": "John", // Contains dot (reserved for nesting) + "$admin": true, // Starts with $ + "key\"with\"quotes": 1 // Contains quotes +} + +// ✅ VALID metadata keys +metadata: { + "user_name": "John", + "isAdmin": true, + "nested": { "allowed": true } // Access as "nested.allowed" in filters +} +``` + +## Common Errors & Solutions + +### Error 1: Metadata Index Created After Vectors Inserted +``` +Problem: Filtering doesn't work on existing vectors +Solution: Delete and re-insert vectors OR create metadata indexes BEFORE inserting +``` + +### Error 2: Dimension Mismatch +``` +Problem: "Vector dimensions do not match index configuration" +Solution: Ensure embedding model output matches index dimensions: + - Workers AI bge-base: 768 + - OpenAI small: 1536 + - OpenAI large: 3072 +``` + +### Error 3: Invalid Metadata Keys +``` +Problem: "Invalid metadata key" +Solution: Keys cannot: + - Be empty + - Contain . (dot) + - Contain " (quote) + - Start with $ (dollar sign) +``` + +### Error 4: Filter Too Large +``` +Problem: "Filter exceeds 2048 bytes" +Solution: Simplify filter or split into multiple queries +``` + +### Error 5: Range Query on High Cardinality +``` +Problem: Slow queries or reduced accuracy +Solution: Use lower cardinality fields for range queries, or use seconds instead of milliseconds for timestamps +``` + +### Error 6: Insert vs Upsert Confusion +``` +Problem: Updates not reflecting in index +Solution: Use upsert() to overwrite existing vectors, not insert() +``` + +### Error 7: Missing Bindings +``` +Problem: "VECTORIZE_INDEX is not defined" +Solution: Add [[vectorize]] binding to wrangler.jsonc +``` + +### Error 8: Namespace vs Metadata Confusion +``` +Problem: Unclear when to use namespace vs metadata filtering +Solution: + - Namespace: Partition key, applied BEFORE metadata filters + - Metadata: Flexible key-value filtering within namespace +``` + +### Error 9: V2 Async Mutation Timing (NEW in V2) +``` +Problem: Inserted vectors not immediately queryable +Solution: V2 mutations are asynchronous - vectors may take a few seconds to be reflected + - Use mutationId to track mutation status + - Check env.VECTORIZE_INDEX.describe() for processedUpToMutation timestamp +``` + +### Error 10: V1 returnMetadata Boolean (BREAKING in V2) +``` +Problem: "returnMetadata must be 'all', 'indexed', or 'none'" +Solution: V2 changed returnMetadata from boolean to string enum: + - ❌ V1: { returnMetadata: true } + - ✅ V2: { returnMetadata: 'all' } +``` + +--- + +## V2 Migration Checklist + +**If migrating from V1 to V2**: + +1. ✅ Update wrangler to 3.71.0+ (`npm install -g wrangler@latest`) +2. ✅ Create new V2 index (can't upgrade V1 → V2) +3. ✅ Create metadata indexes BEFORE inserting vectors +4. ✅ Update `returnMetadata` boolean → string enum ('all', 'indexed', 'none') +5. ✅ Handle async mutations (expect `mutationId` in responses) +6. ✅ Test with V2 limits (topK up to 100, 5M vectors per index) +7. ✅ Update error handling for async behavior + +**V1 Deprecation**: +- After December 2024: Cannot create new V1 indexes +- Existing V1 indexes: Continue to work +- Use `wrangler vectorize --deprecated-v1` for V1 operations + +--- + +## Official Documentation + +- **Vectorize V2 Docs**: https://developers.cloudflare.com/vectorize/ +- **V2 Changelog**: https://developers.cloudflare.com/vectorize/platform/changelog/ +- **V1 to V2 Migration**: https://developers.cloudflare.com/vectorize/reference/transition-vectorize-legacy/ +- **Metadata Filtering**: https://developers.cloudflare.com/vectorize/reference/metadata-filtering/ +- **Workers AI Models**: https://developers.cloudflare.com/workers-ai/models/ + +--- + +**Status**: Production Ready ✅ (Vectorize V2 GA - September 2024) +**Last Updated**: 2025-11-22 +**Token Savings**: ~70% +**Errors Prevented**: 10 (includes V2 breaking changes) diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..9a437b6 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,89 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:jezweb/claude-skills:skills/cloudflare-vectorize", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "de2ea268bcafcbbf8ae0be57c6a3d873b29e4cdb", + "treeHash": "327aa5de3888099b1a554dc7477712aa47d13777e8b2664713fccd69ea27170f", + "generatedAt": "2025-11-28T10:18:56.801059Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "cloudflare-vectorize", + "description": "Build semantic search and RAG applications with Cloudflare Vectorize, a globally distributed vector database. Supports Workers AI and OpenAI embeddings, metadata filtering with 10 indexes, and namespace partitioning. Use when: creating vector indexes, querying embeddings, implementing semantic search or RAG, configuring metadata filters, or troubleshooting dimension mismatches, metadata index timing, insert vs upsert confusion, or filter syntax errors.", + "version": "1.0.0" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "05ca70d55f10152936ffea58f014492f9dd671f1faec080d3acdd6712a3de3c3" + }, + { + "path": "SKILL.md", + "sha256": "78734c14174cda1067c0a6dcede80ae9ecbf13c1377811ba39bc52c68a965345" + }, + { + "path": "references/vector-operations.md", + "sha256": "c6d5dbf5ad52453212f1f4c4825dbc5cd03bcafd75f2ef7d8bffa36246d99e74" + }, + { + "path": "references/metadata-guide.md", + "sha256": "8815b8c188d7be48d87c5fe3468f1f88d745e8bb80ed01480f05f98fb247d0bd" + }, + { + "path": "references/integration-workers-ai-bge-base.md", + "sha256": "b4c270ba6ee4d097b6443321cdd5e8b4558adad4fc49e6cd05c70d6d17607d0c" + }, + { + "path": "references/integration-openai-embeddings.md", + "sha256": "81d6dbdb262d7e0018d08218a02fb168beec92edc9a2ae49bed66d74c69dec99" + }, + { + "path": "references/wrangler-commands.md", + "sha256": "9f0fba62708ccf5644331abfb6e5d81fa7da03237661b7869ab63f8d630177f2" + }, + { + "path": "references/embedding-models.md", + "sha256": "20bc83002e6989b4951681a5ba35b382cb9159abcd9e879ebb444c2ea9f829f7" + }, + { + "path": "references/index-operations.md", + "sha256": "a05090c2a2d6cdab083400232aeaf91ba6c511e1947c2ded68a0d1a8bc7a4d84" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "3616f151dec1c7f42a0aabce23aae9aedbd7fb162bdb1deb823f219f15db99b2" + }, + { + "path": "templates/document-ingestion.ts", + "sha256": "2860088733e249a4f311898f942002e7437bd571790067aac6f832e29e1d50fe" + }, + { + "path": "templates/metadata-filtering.ts", + "sha256": "95d4b599b0e9d992ace6619ef3dbde9d206bf35cdf29f3d6ef3368b51e1fc907" + }, + { + "path": "templates/basic-search.ts", + "sha256": "e15f40238cee2227ef2189d16036adf5d771351d53a77272210ab716003f9ef0" + }, + { + "path": "templates/rag-chat.ts", + "sha256": "09a9dbeb76166c23e8ff4a3e8c5a17ab8b1c5f9ad66b27c925d9ebfc94dd65e9" + } + ], + "dirSha256": "327aa5de3888099b1a554dc7477712aa47d13777e8b2664713fccd69ea27170f" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/references/embedding-models.md b/references/embedding-models.md new file mode 100644 index 0000000..f5615ef --- /dev/null +++ b/references/embedding-models.md @@ -0,0 +1,425 @@ +# Embedding Models Reference + +Complete guide for generating vector embeddings with Workers AI and OpenAI. + +## Model Comparison + +| Model | Provider | Dimensions | Metric | Cost | Performance | +|-------|----------|------------|--------|------|-------------| +| @cf/baai/bge-base-en-v1.5 | Workers AI | 768 | cosine | Free | Fast, edge-optimized | +| text-embedding-3-small | OpenAI | 1536 | cosine | $0.02/1M tokens | High quality, affordable | +| text-embedding-3-large | OpenAI | 3072 | cosine | $0.13/1M tokens | Highest accuracy | +| text-embedding-ada-002 | OpenAI (legacy) | 1536 | cosine | $0.10/1M tokens | Deprecated | + +## Workers AI (@cf/baai/bge-base-en-v1.5) + +**Best for**: Production apps requiring free, fast embeddings with good quality. + +### Configuration + +```bash +# Create index with 768 dimensions +npx wrangler vectorize create my-index \ + --dimensions=768 \ + --metric=cosine +``` + +### Wrangler Binding + +```jsonc +{ + "ai": { + "binding": "AI" + }, + "vectorize": [ + { + "binding": "VECTORIZE_INDEX", + "index_name": "my-index" + } + ] +} +``` + +### Single Text + +```typescript +const embedding = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: "Cloudflare Workers are serverless functions." +}); + +// embedding.data[0] is number[] with 768 dimensions +await env.VECTORIZE_INDEX.upsert([{ + id: 'doc-1', + values: embedding.data[0], + metadata: { title: 'Workers Intro' } +}]); +``` + +### Batch Embeddings + +```typescript +const texts = [ + "Document 1 content", + "Document 2 content", + "Document 3 content" +]; + +const embeddings = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: texts // Array of strings +}); + +// embeddings.data is number[][] (array of 768-dim vectors) +const vectors = texts.map((text, i) => ({ + id: `doc-${i}`, + values: embeddings.data[i], + metadata: { content: text } +})); + +await env.VECTORIZE_INDEX.upsert(vectors); +``` + +### Error Handling + +```typescript +try { + const embedding = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: userQuery + }); + + if (!embedding?.data?.[0]) { + throw new Error('No embedding returned'); + } + + // Use embedding +} catch (error) { + console.error('Embedding generation failed:', error); + // Fallback logic +} +``` + +### Limits + +- **Max input length**: ~512 tokens (~2000 characters) +- **Batch size**: Up to 100 texts per request +- **Rate limits**: Generous (Workers AI scales automatically) +- **Cost**: Free! + +## OpenAI Embeddings + +**Best for**: Higher quality embeddings, larger context windows, or specific use cases. + +### API Key Setup + +Store API key as environment variable: + +```bash +npx wrangler secret put OPENAI_API_KEY +``` + +### text-embedding-3-small (1536 dimensions) + +**Best for**: Cost-effective high quality embeddings. + +#### Configuration + +```bash +# Create index with 1536 dimensions +npx wrangler vectorize create my-index \ + --dimensions=1536 \ + --metric=cosine +``` + +#### Worker Code + +```typescript +import OpenAI from 'openai'; + +export interface Env { + OPENAI_API_KEY: string; + VECTORIZE_INDEX: VectorizeIndex; +} + +export default { + async fetch(request: Request, env: Env): Promise { + const openai = new OpenAI({ apiKey: env.OPENAI_API_KEY }); + + // Single embedding + const response = await openai.embeddings.create({ + model: "text-embedding-3-small", + input: "Text to embed", + encoding_format: "float" // Default + }); + + await env.VECTORIZE_INDEX.upsert([{ + id: 'doc-1', + values: response.data[0].embedding, // 1536 dimensions + metadata: { model: 'openai-3-small' } + }]); + + return Response.json({ success: true }); + } +}; +``` + +#### Batch Embeddings + +```typescript +const response = await openai.embeddings.create({ + model: "text-embedding-3-small", + input: [ + "Document 1", + "Document 2", + "Document 3" + ] +}); + +const vectors = response.data.map((item, i) => ({ + id: `doc-${i}`, + values: item.embedding, + metadata: { index: i } +})); + +await env.VECTORIZE_INDEX.upsert(vectors); +``` + +### text-embedding-3-large (3072 dimensions) + +**Best for**: Maximum accuracy, research, or high-stakes applications. + +```bash +# Create index with 3072 dimensions +npx wrangler vectorize create high-accuracy-index \ + --dimensions=3072 \ + --metric=cosine +``` + +```typescript +const response = await openai.embeddings.create({ + model: "text-embedding-3-large", + input: "Text requiring high accuracy embedding" +}); + +await env.VECTORIZE_INDEX.upsert([{ + id: 'doc-1', + values: response.data[0].embedding, // 3072 dimensions + metadata: { model: 'openai-3-large' } +}]); +``` + +### OpenAI Error Handling + +```typescript +try { + const response = await openai.embeddings.create({ + model: "text-embedding-3-small", + input: text + }); + + return response.data[0].embedding; +} catch (error) { + if (error.status === 429) { + console.error('Rate limited'); + // Implement retry with backoff + } else if (error.status === 401) { + console.error('Invalid API key'); + } else { + console.error('OpenAI error:', error); + } + throw error; +} +``` + +### OpenAI Limits + +- **text-embedding-3-small**: 8191 tokens input +- **text-embedding-3-large**: 8191 tokens input +- **Batch size**: Up to 2048 inputs per request +- **Rate limits**: Varies by tier (check OpenAI dashboard) + +## Model Selection Guide + +### Use Workers AI (@cf/baai/bge-base-en-v1.5) when: + +✅ Building production apps with budget constraints +✅ Need fast, edge-optimized embeddings +✅ Working with English text +✅ Don't need extremely high accuracy +✅ Want zero per-request costs + +### Use OpenAI text-embedding-3-small when: + +✅ Need higher quality than Workers AI +✅ Budget allows ($0.02/1M tokens is affordable) +✅ Working with multilingual content +✅ Need longer context (8191 tokens) +✅ Willing to pay for better accuracy + +### Use OpenAI text-embedding-3-large when: + +✅ Accuracy is critical (legal, medical, research) +✅ Large budget ($0.13/1M tokens) +✅ Need best possible search quality +✅ Working with complex or nuanced content + +## Embedding Best Practices + +### 1. Consistent Model Usage + +**Always use the SAME model for indexing and querying!** + +```typescript +// ❌ Wrong: Different models +// Index with Workers AI +const indexEmbedding = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: document +}); + +// Query with OpenAI (WRONG!) +const queryEmbedding = await openai.embeddings.create({ + model: "text-embedding-3-small", + input: query +}); +// This won't work - different embedding spaces! + +// ✅ Right: Same model +const indexEmbedding = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: document +}); +const queryEmbedding = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: query +}); +``` + +### 2. Text Preprocessing + +```typescript +function preprocessText(text: string): string { + return text + .trim() // Remove leading/trailing whitespace + .replace(/\s+/g, ' ') // Normalize whitespace + .slice(0, 8000); // Truncate to model limits +} + +const embedding = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: preprocessText(rawText) +}); +``` + +### 3. Batch for Efficiency + +```typescript +// ✅ Good: Batch processing +const embeddings = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: arrayOf100Texts +}); + +// ❌ Bad: Individual requests +for (const text of texts) { + const embedding = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text + }); +} +``` + +### 4. Cache Embeddings + +```typescript +// Store embeddings, don't regenerate +await env.VECTORIZE_INDEX.upsert([{ + id: 'doc-1', + values: embedding, + metadata: { + content: text, // Store original + model: 'bge-base-en-v1.5', + generated_at: Date.now() + } +}]); + +// Later: Retrieve embedding instead of regenerating +const vectors = await env.VECTORIZE_INDEX.getByIds(['doc-1']); +const cachedEmbedding = vectors[0].values; // Reuse! +``` + +### 5. Handle Failures Gracefully + +```typescript +async function generateEmbedding(text: string, env: Env): Promise { + try { + const response = await env.AI.run('@cf/baai/bge-base-en-v1.5', { text }); + return response.data[0]; + } catch (error) { + console.error('Primary embedding failed, trying fallback'); + + // Fallback to OpenAI if Workers AI fails + const openai = new OpenAI({ apiKey: env.OPENAI_API_KEY }); + const fallback = await openai.embeddings.create({ + model: "text-embedding-3-small", + input: text + }); + + return fallback.data[0].embedding; + } +} +``` + +## Testing Embedding Quality + +### Compare Similarity Scores + +```typescript +// Test known similar texts +const text1 = "Cloudflare Workers are serverless functions"; +const text2 = "Workers are serverless code running on Cloudflare's edge"; +const text3 = "Unrelated content about cooking recipes"; + +const [emb1, emb2, emb3] = await Promise.all([ + env.AI.run('@cf/baai/bge-base-en-v1.5', { text: text1 }), + env.AI.run('@cf/baai/bge-base-en-v1.5', { text: text2 }), + env.AI.run('@cf/baai/bge-base-en-v1.5', { text: text3 }), +]); + +const similar = cosineSimilarity(emb1.data[0], emb2.data[0]); // Should be high (>0.7) +const different = cosineSimilarity(emb1.data[0], emb3.data[0]); // Should be low (<0.3) +``` + +### Cosine Similarity Helper + +```typescript +function cosineSimilarity(a: number[], b: number[]): number { + const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0); + const magA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0)); + const magB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0)); + return dotProduct / (magA * magB); +} +``` + +## Dimension Mismatch Debugging + +```typescript +// Check actual dimensions before upserting +const embedding = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: "test" +}); + +console.log('Embedding dimensions:', embedding.data[0].length); + +// Verify against index +const indexInfo = await fetch( + `https://api.cloudflare.com/client/v4/accounts/${accountId}/vectorize/v2/indexes/my-index`, + { headers: { 'Authorization': `Bearer ${apiToken}` } } +); +const indexConfig = await indexInfo.json(); +console.log('Index dimensions:', indexConfig.result.config.dimensions); + +// Must match! +if (embedding.data[0].length !== indexConfig.result.config.dimensions) { + throw new Error('Dimension mismatch!'); +} +``` + +## See Also + +- [Vector Operations](./vector-operations.md) +- [Index Operations](./index-operations.md) +- [Workers AI Models](https://developers.cloudflare.com/workers-ai/models/#text-embeddings) +- [OpenAI Embeddings API](https://platform.openai.com/docs/guides/embeddings) diff --git a/references/index-operations.md b/references/index-operations.md new file mode 100644 index 0000000..327ea01 --- /dev/null +++ b/references/index-operations.md @@ -0,0 +1,364 @@ +# Index Operations Guide + +Complete guide for creating and managing Vectorize indexes. + +## Index Configuration + +### Critical Decisions (Cannot Be Changed!) + +When creating an index, these settings are **permanent**: + +1. **Dimensions**: Vector width (must match embedding model) +2. **Distance Metric**: How similarity is calculated + +Choose carefully - you cannot change these after creation! + +### Dimensions + +Dimensions must match your embedding model's output: + +| Model | Provider | Dimensions | Recommended Metric | +|-------|----------|------------|-------------------| +| @cf/baai/bge-base-en-v1.5 | Workers AI | 768 | cosine | +| text-embedding-3-small | OpenAI | 1536 | cosine | +| text-embedding-3-large | OpenAI | 3072 | cosine | +| text-embedding-ada-002 | OpenAI (legacy) | 1536 | cosine | +| embed-english-v3.0 | Cohere | 1024 | cosine | + +**Common Mistake**: Creating an index with 1536 dimensions but using a 768-dim model! + +### Distance Metrics + +Choose based on your embedding model and use case: + +#### Cosine Similarity (`cosine`) +- **Best for**: Normalized embeddings (most common) +- **Range**: -1 (opposite) to 1 (identical) +- **Use when**: Embeddings are L2-normalized +- **Most common choice** - works with Workers AI, OpenAI, Cohere + +```bash +npx wrangler vectorize create my-index \ + --dimensions=768 \ + --metric=cosine +``` + +#### Euclidean Distance (`euclidean`) +- **Best for**: Absolute distance matters +- **Range**: 0 (identical) to ∞ (different) +- **Use when**: Magnitude of vectors is important +- **Example**: Geographic coordinates, image features + +```bash +npx wrangler vectorize create geo-index \ + --dimensions=2 \ + --metric=euclidean +``` + +#### Dot Product (`dot-product`) +- **Best for**: Non-normalized embeddings +- **Range**: -∞ to ∞ +- **Use when**: Embeddings are not normalized +- **Less common** - most models produce normalized embeddings + +```bash +npx wrangler vectorize create sparse-index \ + --dimensions=1024 \ + --metric=dot-product +``` + +## Creating Indexes + +### Via Wrangler CLI + +```bash +npx wrangler vectorize create \ + --dimensions= \ + --metric= \ + [--description=""] +``` + +### Via REST API + +```typescript +const response = await fetch( + `https://api.cloudflare.com/client/v4/accounts/${accountId}/vectorize/v2/indexes`, + { + method: 'POST', + headers: { + 'Authorization': `Bearer ${apiToken}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + name: 'my-index', + description: 'Production semantic search', + config: { + dimensions: 768, + metric: 'cosine', + }, + }), + } +); +``` + +## Metadata Indexes + +**⚠️ CRITICAL TIMING**: Create metadata indexes IMMEDIATELY after creating the main index, BEFORE inserting any vectors! + +### Why Timing Matters + +Vectorize builds metadata indexes **only for vectors inserted AFTER** the metadata index was created. Vectors inserted before won't be filterable! + +### Best Practice Workflow + +```bash +# 1. Create main index +npx wrangler vectorize create docs-search \ + --dimensions=768 \ + --metric=cosine + +# 2. IMMEDIATELY create all metadata indexes +npx wrangler vectorize create-metadata-index docs-search \ + --property-name=category --type=string + +npx wrangler vectorize create-metadata-index docs-search \ + --property-name=timestamp --type=number + +npx wrangler vectorize create-metadata-index docs-search \ + --property-name=published --type=boolean + +# 3. Verify metadata indexes exist +npx wrangler vectorize list-metadata-index docs-search + +# 4. NOW safe to start inserting vectors +``` + +### Metadata Index Limits + +- **Max 10 metadata indexes** per Vectorize index +- **String type**: First 64 bytes indexed (UTF-8 boundaries) +- **Number type**: Float64 precision +- **Boolean type**: true/false + +### Choosing What to Index + +Only create metadata indexes for fields you'll **filter** on: + +✅ **Good candidates**: +- `category` (string) - "docs", "tutorials", "guides" +- `language` (string) - "en", "es", "fr" +- `published_at` (number) - Unix timestamp +- `status` (string) - "published", "draft", "archived" +- `verified` (boolean) - true/false + +❌ **Bad candidates** (don't need indexes): +- `title` (string) - only for display, not filtering +- `content` (string) - stored in metadata but not filtered +- `url` (string) - unless filtering by URL prefix + +## Wrangler Binding + +After creating an index, bind it to your Worker: + +### wrangler.jsonc + +```jsonc +{ + "name": "my-worker", + "main": "src/index.ts", + "vectorize": [ + { + "binding": "VECTORIZE_INDEX", + "index_name": "docs-search" + } + ] +} +``` + +### TypeScript Types + +```typescript +export interface Env { + VECTORIZE_INDEX: VectorizeIndex; +} +``` + +## Index Management Operations + +### List All Indexes + +```bash +npx wrangler vectorize list +``` + +### Get Index Details + +```bash +npx wrangler vectorize get my-index +``` + +**Returns**: +```json +{ + "name": "my-index", + "description": "Production search", + "config": { + "dimensions": 768, + "metric": "cosine" + }, + "created_on": "2024-01-15T10:30:00Z", + "modified_on": "2024-01-15T10:30:00Z" +} +``` + +### Get Index Info (Vector Count) + +```bash +npx wrangler vectorize info my-index +``` + +**Returns**: +```json +{ + "vectorsCount": 12543, + "lastProcessedMutation": { + "id": "abc123...", + "timestamp": "2024-01-20T14:22:00Z" + } +} +``` + +### Delete Index + +```bash +# With confirmation +npx wrangler vectorize delete my-index + +# Skip confirmation (use with caution!) +npx wrangler vectorize delete my-index --force +``` + +**⚠️ WARNING**: Deletion is **irreversible**! All vectors are permanently lost. + +## Index Naming Best Practices + +### Good Names + +- `production-docs-search` - Environment + purpose +- `dev-product-recommendations` - Environment + use case +- `customer-support-rag` - Descriptive use case +- `en-knowledge-base` - Language + type + +### Bad Names + +- `index1` - Not descriptive +- `my_index` - Use dashes, not underscores +- `PRODUCTION` - Use lowercase +- `this-is-a-very-long-index-name-that-exceeds-limits` - Too long + +### Naming Rules + +- Lowercase letters and numbers only +- Dashes allowed (not underscores or spaces) +- Must start with a letter +- Max 32 characters +- No special characters + +## Common Patterns + +### Multi-Environment Setup + +```bash +# Development +npx wrangler vectorize create dev-docs-search \ + --dimensions=768 --metric=cosine + +# Staging +npx wrangler vectorize create staging-docs-search \ + --dimensions=768 --metric=cosine + +# Production +npx wrangler vectorize create prod-docs-search \ + --dimensions=768 --metric=cosine +``` + +```jsonc +// wrangler.jsonc +{ + "env": { + "dev": { + "vectorize": [ + { "binding": "VECTORIZE", "index_name": "dev-docs-search" } + ] + }, + "staging": { + "vectorize": [ + { "binding": "VECTORIZE", "index_name": "staging-docs-search" } + ] + }, + "production": { + "vectorize": [ + { "binding": "VECTORIZE", "index_name": "prod-docs-search" } + ] + } + } +} +``` + +### Multi-Tenant with Namespaces + +Instead of creating separate indexes per customer, use one index with namespaces: + +```bash +# Single index for all tenants +npx wrangler vectorize create multi-tenant-index \ + --dimensions=768 --metric=cosine +``` + +```typescript +// Insert with namespace +await env.VECTORIZE_INDEX.upsert([{ + id: 'doc-1', + values: embedding, + namespace: 'customer-abc123', // Isolates by customer + metadata: { title: 'Customer document' } +}]); + +// Query within namespace +const results = await env.VECTORIZE_INDEX.query(queryVector, { + topK: 5, + namespace: 'customer-abc123' // Only search this customer's data +}); +``` + +## Troubleshooting + +### "Index name already exists" + +```bash +# Check existing indexes +npx wrangler vectorize list + +# Delete old index if needed +npx wrangler vectorize delete old-name --force +``` + +### "Cannot change dimensions" + +**No fix** - must create new index and re-insert all vectors. + +### "Wrangler version 3.71.0 required" + +```bash +# Update Wrangler +npm install -g wrangler@latest + +# Or use npx +npx wrangler@latest vectorize create ... +``` + +## See Also + +- [Wrangler Commands](./wrangler-commands.md) +- [Vector Operations](./vector-operations.md) +- [Metadata Guide](./metadata-guide.md) diff --git a/references/integration-openai-embeddings.md b/references/integration-openai-embeddings.md new file mode 100644 index 0000000..6ad08a9 --- /dev/null +++ b/references/integration-openai-embeddings.md @@ -0,0 +1,479 @@ +# OpenAI Embeddings Integration Example + +Complete working example using OpenAI embeddings (text-embedding-3-small/large) with Vectorize. + +## Model Specifications + +### text-embedding-3-small +- **Dimensions**: 1536 +- **Metric**: cosine (recommended) +- **Max Input**: 8191 tokens (~32K characters) +- **Cost**: $0.02 per 1M tokens +- **Best for**: High-quality embeddings at affordable cost + +### text-embedding-3-large +- **Dimensions**: 3072 +- **Metric**: cosine (recommended) +- **Max Input**: 8191 tokens (~32K characters) +- **Cost**: $0.13 per 1M tokens +- **Best for**: Maximum accuracy + +## Setup + +### 1. Install OpenAI SDK + +```bash +npm install openai +``` + +### 2. Store API Key + +```bash +# Set as Cloudflare secret +npx wrangler secret put OPENAI_API_KEY +# Paste your API key when prompted +``` + +### 3. Create Vectorize Index + +**For text-embedding-3-small**: +```bash +npx wrangler vectorize create openai-search \ + --dimensions=1536 \ + --metric=cosine \ + --description="Semantic search with OpenAI embeddings" +``` + +**For text-embedding-3-large**: +```bash +npx wrangler vectorize create openai-high-accuracy \ + --dimensions=3072 \ + --metric=cosine +``` + +### 4. Create Metadata Indexes + +```bash +npx wrangler vectorize create-metadata-index openai-search \ + --property-name=category --type=string + +npx wrangler vectorize create-metadata-index openai-search \ + --property-name=timestamp --type=number +``` + +### 5. Configure Wrangler + +**wrangler.jsonc**: +```jsonc +{ + "name": "vectorize-openai-example", + "main": "src/index.ts", + "compatibility_date": "2025-10-21", + "vectorize": [ + { + "binding": "VECTORIZE_INDEX", + "index_name": "openai-search" + } + ], + "vars": { + "EMBEDDING_MODEL": "text-embedding-3-small" + } +} +``` + +**Note**: OPENAI_API_KEY is stored as a secret, not in wrangler.jsonc! + +## Complete Worker Example + +```typescript +import OpenAI from 'openai'; + +export interface Env { + OPENAI_API_KEY: string; + VECTORIZE_INDEX: VectorizeIndex; + EMBEDDING_MODEL?: string; // From wrangler.jsonc vars +} + +interface Document { + id: string; + title: string; + content: string; + category?: string; + metadata?: Record; +} + +export default { + async fetch(request: Request, env: Env, ctx: ExecutionContext): Promise { + const openai = new OpenAI({ + apiKey: env.OPENAI_API_KEY, + }); + + const embeddingModel = env.EMBEDDING_MODEL || 'text-embedding-3-small'; + const url = new URL(request.url); + + // CORS + if (request.method === 'OPTIONS') { + return new Response(null, { + headers: { + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS', + 'Access-Control-Allow-Headers': 'Content-Type', + }, + }); + } + + // INDEX DOCUMENTS + if (url.pathname === '/index' && request.method === 'POST') { + try { + const { documents } = await request.json() as { documents: Document[] }; + + if (!documents || !Array.isArray(documents) || documents.length === 0) { + return Response.json({ error: 'Invalid documents array' }, { status: 400 }); + } + + // Generate embeddings (batch) + const response = await openai.embeddings.create({ + model: embeddingModel, + input: documents.map(doc => doc.content), + encoding_format: 'float', + }); + + // Prepare vectors + const vectors = documents.map((doc, i) => ({ + id: doc.id, + values: response.data[i].embedding, + metadata: { + title: doc.title, + content: doc.content, + category: doc.category || 'general', + timestamp: Math.floor(Date.now() / 1000), + model: embeddingModel, + ...doc.metadata, + }, + })); + + // Batch upsert (100 at a time) + const batchSize = 100; + for (let i = 0; i < vectors.length; i += batchSize) { + const batch = vectors.slice(i, i + batchSize); + await env.VECTORIZE_INDEX.upsert(batch); + } + + return Response.json({ + success: true, + indexed: vectors.length, + model: embeddingModel, + usage: { + prompt_tokens: response.usage.prompt_tokens, + total_tokens: response.usage.total_tokens, + }, + }, { + headers: { 'Access-Control-Allow-Origin': '*' }, + }); + } catch (error) { + console.error('Indexing error:', error); + + // Handle OpenAI-specific errors + if (error instanceof OpenAI.APIError) { + return Response.json({ + error: 'OpenAI API error', + message: error.message, + status: error.status, + code: error.code, + }, { status: error.status || 500 }); + } + + return Response.json({ + error: error instanceof Error ? error.message : 'Unknown error', + }, { status: 500 }); + } + } + + // SEARCH + if (url.pathname === '/search' && request.method === 'POST') { + try { + const { query, topK = 5, filter, namespace } = await request.json() as { + query: string; + topK?: number; + filter?: Record; + namespace?: string; + }; + + if (!query) { + return Response.json({ error: 'Missing query' }, { status: 400 }); + } + + // Generate query embedding + const response = await openai.embeddings.create({ + model: embeddingModel, + input: query, + encoding_format: 'float', + }); + + // Search Vectorize + const results = await env.VECTORIZE_INDEX.query( + response.data[0].embedding, + { + topK, + filter, + namespace, + returnMetadata: 'all', + returnValues: false, + } + ); + + return Response.json({ + query, + model: embeddingModel, + results: results.matches.map(match => ({ + id: match.id, + score: match.score, + title: match.metadata?.title, + content: match.metadata?.content, + category: match.metadata?.category, + })), + count: results.count, + usage: { + prompt_tokens: response.usage.prompt_tokens, + }, + }, { + headers: { 'Access-Control-Allow-Origin': '*' }, + }); + } catch (error) { + console.error('Search error:', error); + + if (error instanceof OpenAI.APIError) { + return Response.json({ + error: 'OpenAI API error', + message: error.message, + status: error.status, + }, { status: error.status || 500 }); + } + + return Response.json({ + error: error instanceof Error ? error.message : 'Unknown error', + }, { status: 500 }); + } + } + + // DEFAULT: API Documentation + return Response.json({ + name: 'Vectorize + OpenAI Embeddings', + model: embeddingModel, + endpoints: { + 'POST /index': { + description: 'Index documents with OpenAI embeddings', + body: { + documents: [ + { + id: 'doc-1', + title: 'Document Title', + content: 'Document content (up to 8191 tokens)', + category: 'tutorials', + }, + ], + }, + }, + 'POST /search': { + description: 'Semantic search', + body: { + query: 'search query', + topK: 5, + filter: { category: 'tutorials' }, + }, + }, + }, + }); + }, +}; +``` + +## Usage Examples + +### 1. Index Documents + +```bash +curl -X POST https://your-worker.workers.dev/index \ + -H "Content-Type: application/json" \ + -d '{ + "documents": [ + { + "id": "legal-doc-1", + "title": "Terms of Service", + "content": "This Terms of Service agreement governs your use of our platform. By accessing or using the service, you agree to be bound by these terms. The service is provided as-is without warranties...", + "category": "legal", + "metadata": { + "version": "2.1", + "effective_date": "2024-01-01" + } + }, + { + "id": "legal-doc-2", + "title": "Privacy Policy", + "content": "We collect and process personal data in accordance with GDPR and other applicable regulations. This policy describes what data we collect, how we use it, and your rights regarding your data...", + "category": "legal" + } + ] + }' +``` + +### 2. Search with High Accuracy + +```bash +curl -X POST https://your-worker.workers.dev/search \ + -H "Content-Type: application/json" \ + -d '{ + "query": "What are my rights under your privacy policy?", + "topK": 3, + "filter": { "category": "legal" } + }' +``` + +## Cost Estimation + +### text-embedding-3-small ($0.02/1M tokens) + +``` +1 page ≈ 500 tokens +10,000 pages = 5M tokens = $0.10 +100,000 pages = 50M tokens = $1.00 +1M pages = 500M tokens = $10.00 +``` + +### text-embedding-3-large ($0.13/1M tokens) + +``` +10,000 pages = 5M tokens = $0.65 +100,000 pages = 50M tokens = $6.50 +1M pages = 500M tokens = $65.00 +``` + +## Error Handling + +### Rate Limiting + +```typescript +async function generateEmbeddingWithRetry( + text: string, + openai: OpenAI, + model: string, + maxRetries = 3 +): Promise { + for (let attempt = 0; attempt < maxRetries; attempt++) { + try { + const response = await openai.embeddings.create({ + model, + input: text, + }); + return response.data[0].embedding; + } catch (error) { + if (error instanceof OpenAI.APIError && error.status === 429) { + // Rate limited - exponential backoff + const delay = Math.pow(2, attempt) * 1000; // 1s, 2s, 4s + console.log(`Rate limited. Retrying in ${delay}ms...`); + await new Promise(resolve => setTimeout(resolve, delay)); + continue; + } + throw error; + } + } + throw new Error('Max retries exceeded'); +} +``` + +### API Key Validation + +```typescript +if (!env.OPENAI_API_KEY) { + return Response.json({ + error: 'OpenAI API key not configured', + message: 'Set OPENAI_API_KEY using: npx wrangler secret put OPENAI_API_KEY', + }, { status: 500 }); +} +``` + +### Dimension Validation + +```typescript +const response = await openai.embeddings.create({ + model: 'text-embedding-3-small', + input: 'test', +}); + +const dimensions = response.data[0].embedding.length; +console.log(`Embedding dimensions: ${dimensions}`); // Should be 1536 + +if (dimensions !== 1536) { + throw new Error(`Expected 1536 dimensions, got ${dimensions}`); +} +``` + +## Switching Between Models + +### Update wrangler.jsonc + +```jsonc +{ + "vars": { + "EMBEDDING_MODEL": "text-embedding-3-large" + } +} +``` + +### Create New Index + +```bash +# Create index with 3072 dimensions for text-embedding-3-large +npx wrangler vectorize create openai-large \ + --dimensions=3072 \ + --metric=cosine + +# Update binding +# wrangler.jsonc: +{ + "vectorize": [ + { + "binding": "VECTORIZE_INDEX", + "index_name": "openai-large" + } + ] +} +``` + +## Testing Locally + +```bash +# Set API key for local dev +export OPENAI_API_KEY=sk-... + +# Run dev server +npx wrangler dev + +# Test +curl -X POST http://localhost:8787/index \ + -H "Content-Type: application/json" \ + -d '{"documents":[{"id":"test","title":"Test","content":"Test content"}]}' +``` + +## Performance Tips + +1. **Batch requests**: Up to 2048 inputs per API call +2. **Monitor usage**: Track token consumption in response +3. **Cache embeddings**: Store in Vectorize, don't regenerate +4. **Use smaller model**: text-embedding-3-small is 6.5x cheaper + +## Migration from Workers AI + +If migrating from Workers AI to OpenAI: + +1. Create new index with 1536 or 3072 dimensions +2. Re-generate embeddings with OpenAI +3. Update queries to use same model +4. **Don't mix models!** Always use the same model for index and query + +## See Also + +- [Main Skill Documentation](../SKILL.md) +- [Embedding Models Reference](../references/embedding-models.md) +- [Workers AI Example](./workers-ai-bge-base.md) +- [OpenAI Embeddings API](https://platform.openai.com/docs/guides/embeddings) diff --git a/references/integration-workers-ai-bge-base.md b/references/integration-workers-ai-bge-base.md new file mode 100644 index 0000000..02b4e00 --- /dev/null +++ b/references/integration-workers-ai-bge-base.md @@ -0,0 +1,388 @@ +# Workers AI Integration Example (@cf/baai/bge-base-en-v1.5) + +Complete working example using Cloudflare Workers AI for embeddings with Vectorize. + +## Model Specifications + +- **Model**: `@cf/baai/bge-base-en-v1.5` +- **Dimensions**: 768 +- **Metric**: cosine (recommended) +- **Max Input**: ~512 tokens (~2000 characters) +- **Cost**: Free +- **Latency**: ~50-200ms (edge-optimized) + +## Setup + +### 1. Create Vectorize Index + +```bash +npx wrangler vectorize create docs-search \ + --dimensions=768 \ + --metric=cosine \ + --description="Documentation search with Workers AI" +``` + +### 2. Create Metadata Indexes + +```bash +npx wrangler vectorize create-metadata-index docs-search \ + --property-name=category --type=string + +npx wrangler vectorize create-metadata-index docs-search \ + --property-name=published_at --type=number +``` + +### 3. Configure Wrangler + +**wrangler.jsonc**: +```jsonc +{ + "name": "vectorize-workers-ai-example", + "main": "src/index.ts", + "compatibility_date": "2025-10-21", + "ai": { + "binding": "AI" + }, + "vectorize": [ + { + "binding": "VECTORIZE_INDEX", + "index_name": "docs-search" + } + ] +} +``` + +## Complete Worker Example + +```typescript +export interface Env { + AI: Ai; + VECTORIZE_INDEX: VectorizeIndex; +} + +interface Document { + id: string; + title: string; + content: string; + category?: string; + url?: string; +} + +export default { + async fetch(request: Request, env: Env, ctx: ExecutionContext): Promise { + const url = new URL(request.url); + + // INDEX DOCUMENTS + if (url.pathname === '/index' && request.method === 'POST') { + try { + const { documents } = await request.json() as { documents: Document[] }; + + if (!documents || !Array.isArray(documents)) { + return Response.json({ error: 'Invalid documents array' }, { status: 400 }); + } + + // Extract text for embedding + const texts = documents.map(doc => doc.content); + + // Generate embeddings (batch) + const embeddings = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: texts + }); + + // Prepare vectors + const vectors = documents.map((doc, i) => ({ + id: doc.id, + values: embeddings.data[i], + metadata: { + title: doc.title, + content: doc.content, + category: doc.category || 'general', + url: doc.url, + published_at: Math.floor(Date.now() / 1000), + }, + })); + + // Upsert to Vectorize + await env.VECTORIZE_INDEX.upsert(vectors); + + return Response.json({ + success: true, + indexed: vectors.length, + ids: vectors.map(v => v.id), + }); + } catch (error) { + return Response.json({ + error: error instanceof Error ? error.message : 'Unknown error', + }, { status: 500 }); + } + } + + // SEARCH + if (url.pathname === '/search' && request.method === 'POST') { + try { + const { query, topK = 5, filter } = await request.json() as { + query: string; + topK?: number; + filter?: Record; + }; + + if (!query) { + return Response.json({ error: 'Missing query' }, { status: 400 }); + } + + // Generate query embedding + const queryEmbedding = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: query, + }); + + // Search Vectorize + const results = await env.VECTORIZE_INDEX.query( + queryEmbedding.data[0], + { + topK, + filter, + returnMetadata: 'all', + returnValues: false, + } + ); + + return Response.json({ + query, + results: results.matches.map(match => ({ + id: match.id, + score: match.score, + title: match.metadata?.title, + content: match.metadata?.content, + category: match.metadata?.category, + url: match.metadata?.url, + })), + count: results.count, + }); + } catch (error) { + return Response.json({ + error: error instanceof Error ? error.message : 'Unknown error', + }, { status: 500 }); + } + } + + // DEFAULT: API Documentation + return Response.json({ + name: 'Vectorize + Workers AI Example', + endpoints: { + 'POST /index': { + description: 'Index documents', + body: { + documents: [ + { + id: 'doc-1', + title: 'Document Title', + content: 'Document content for embedding', + category: 'tutorials', + url: '/docs/getting-started', + }, + ], + }, + }, + 'POST /search': { + description: 'Semantic search', + body: { + query: 'search query text', + topK: 5, + filter: { category: 'tutorials' }, + }, + }, + }, + }); + }, +}; +``` + +## Usage Examples + +### 1. Index Documents + +```bash +curl -X POST https://your-worker.workers.dev/index \ + -H "Content-Type: application/json" \ + -d '{ + "documents": [ + { + "id": "workers-intro", + "title": "Introduction to Cloudflare Workers", + "content": "Cloudflare Workers allow you to deploy serverless code globally across Cloudflare's edge network. Workers run on V8 isolates providing fast cold starts.", + "category": "documentation", + "url": "/workers/getting-started" + }, + { + "id": "vectorize-intro", + "title": "Introduction to Vectorize", + "content": "Vectorize is a globally distributed vector database for semantic search and AI applications. It integrates seamlessly with Workers AI for embedding generation.", + "category": "documentation", + "url": "/vectorize/getting-started" + }, + { + "id": "d1-intro", + "title": "Introduction to D1", + "content": "D1 is Cloudflare's serverless SQL database built on SQLite. It provides familiar SQL semantics with global distribution.", + "category": "documentation", + "url": "/d1/getting-started" + } + ] + }' +``` + +### 2. Search + +```bash +curl -X POST https://your-worker.workers.dev/search \ + -H "Content-Type: application/json" \ + -d '{ + "query": "How do I deploy serverless functions?", + "topK": 3, + "filter": { "category": "documentation" } + }' +``` + +**Response**: +```json +{ + "query": "How do I deploy serverless functions?", + "results": [ + { + "id": "workers-intro", + "score": 0.87, + "title": "Introduction to Cloudflare Workers", + "content": "Cloudflare Workers allow you to deploy...", + "category": "documentation", + "url": "/workers/getting-started" + }, + { + "id": "vectorize-intro", + "score": 0.62, + "title": "Introduction to Vectorize", + "content": "Vectorize is a globally distributed...", + "category": "documentation", + "url": "/vectorize/getting-started" + } + ], + "count": 2 +} +``` + +## Performance Tips + +### 1. Batch Embeddings + +```typescript +// ✅ Good: Single API call for multiple texts +const embeddings = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: [text1, text2, text3, ...] // Up to 100 texts +}); + +// ❌ Bad: Multiple API calls +for (const text of texts) { + const embedding = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text + }); +} +``` + +### 2. Optimize Return Data + +```typescript +// Only return what you need +const results = await env.VECTORIZE_INDEX.query(queryVector, { + topK: 5, + returnValues: false, // Don't return 768 floats per result + returnMetadata: 'all', // Return metadata only +}); +``` + +### 3. Use Filters + +```typescript +// Narrow search scope with metadata filters +const results = await env.VECTORIZE_INDEX.query(queryVector, { + topK: 5, + filter: { + category: 'documentation', + published_at: { $gte: lastWeek } + } +}); +``` + +## Error Handling + +```typescript +async function generateEmbedding(text: string, env: Env): Promise { + try { + const response = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: text.trim().slice(0, 2000) // Truncate to model limits + }); + + if (!response?.data?.[0]) { + throw new Error('No embedding returned from Workers AI'); + } + + if (response.data[0].length !== 768) { + throw new Error(`Expected 768 dimensions, got ${response.data[0].length}`); + } + + return response.data[0]; + } catch (error) { + console.error('Workers AI embedding error:', error); + throw new Error(`Failed to generate embedding: ${error instanceof Error ? error.message : 'Unknown error'}`); + } +} +``` + +## Testing Locally + +```bash +# Install dependencies +npm install + +# Run dev server +npx wrangler dev + +# Test indexing +curl -X POST http://localhost:8787/index \ + -H "Content-Type: application/json" \ + -d '{"documents":[{"id":"test-1","title":"Test","content":"Test content"}]}' + +# Test search +curl -X POST http://localhost:8787/search \ + -H "Content-Type: application/json" \ + -d '{"query":"test","topK":5}' +``` + +## Deployment + +```bash +# Deploy to production +npx wrangler deploy + +# View logs +npx wrangler tail +``` + +## Common Issues + +### "Embedding dimensions don't match" +- **Cause**: Index created with wrong dimensions +- **Fix**: Ensure index has 768 dimensions for bge-base-en-v1.5 + +### "Text too long for model" +- **Cause**: Input text exceeds ~2000 characters +- **Fix**: Truncate or chunk text before embedding + +### "Rate limiting" +- **Cause**: Too many concurrent requests +- **Fix**: Workers AI scales automatically, but add retry logic for safety + +## See Also + +- [Main Skill Documentation](../SKILL.md) +- [RAG Chat Template](../templates/rag-chat.ts) +- [Embedding Models Reference](../references/embedding-models.md) +- [Workers AI Docs](https://developers.cloudflare.com/workers-ai/models/bge-base-en-v1.5/) diff --git a/references/metadata-guide.md b/references/metadata-guide.md new file mode 100644 index 0000000..27a5c56 --- /dev/null +++ b/references/metadata-guide.md @@ -0,0 +1,458 @@ +# Metadata Filtering Guide + +Complete reference for metadata indexes and filtering in Vectorize. + +## Overview + +Metadata allows you to: +- Store additional data alongside vectors (up to 10 KiB per vector) +- Filter query results based on metadata properties +- Narrow search scope without re-indexing + +## Metadata Indexes + +**⚠️ CRITICAL**: Metadata indexes MUST be created BEFORE inserting vectors! + +Vectors inserted before a metadata index exists won't be filterable on that property. + +### Creating Metadata Indexes + +```bash +npx wrangler vectorize create-metadata-index \ + --property-name= \ + --type= +``` + +**Types**: `string`, `number`, `boolean` + +**Limits**: +- Max **10 metadata indexes** per Vectorize index +- **String**: First 64 bytes indexed (UTF-8 boundaries) +- **Number**: Float64 precision +- **Boolean**: true/false + +### Example Setup + +```bash +# Create index +npx wrangler vectorize create docs-search --dimensions=768 --metric=cosine + +# Create metadata indexes IMMEDIATELY +npx wrangler vectorize create-metadata-index docs-search \ + --property-name=category --type=string + +npx wrangler vectorize create-metadata-index docs-search \ + --property-name=published_at --type=number + +npx wrangler vectorize create-metadata-index docs-search \ + --property-name=verified --type=boolean + +# Verify +npx wrangler vectorize list-metadata-index docs-search +``` + +## Metadata Schema + +### Valid Metadata Keys + +```typescript +// ✅ Valid keys +metadata: { + category: 'docs', + title: 'Getting Started', + published_at: 1704067200, + verified: true, + nested: { allowed: true } +} + +// ❌ Invalid keys +metadata: { + '': 'value', // Empty key + 'user.name': 'John', // Contains dot (reserved for nesting) + '$admin': true, // Starts with $ + 'key"quoted': 1 // Contains " +} +``` + +**Key restrictions**: +- Cannot be empty +- Cannot contain `.` (dot) - reserved for nested access +- Cannot contain `"` (double quote) +- Cannot start with `$` (dollar sign) +- Max 512 characters + +### Nested Metadata + +Use dot notation for nested properties: + +```typescript +// Store nested metadata +metadata: { + author: { + id: 'user123', + name: 'John Doe', + verified: true + } +} + +// Filter with dot notation +filter: { 'author.verified': true } + +// Create index for nested property +npx wrangler vectorize create-metadata-index docs-search \ + --property-name=author_verified \ + --type=boolean +``` + +## Filter Operators + +### Equality + +```typescript +// Implicit $eq +filter: { category: 'documentation' } + +// Explicit $eq +filter: { category: { $eq: 'documentation' } } +``` + +### Not Equals + +```typescript +filter: { status: { $ne: 'archived' } } +``` + +### In Array + +```typescript +filter: { category: { $in: ['docs', 'tutorials', 'guides'] } } +``` + +### Not In Array + +```typescript +filter: { status: { $nin: ['archived', 'draft', 'deleted'] } } +``` + +### Less Than + +```typescript +filter: { published_at: { $lt: 1735689600 } } +``` + +### Less Than or Equal + +```typescript +filter: { priority: { $lte: 5 } } +``` + +### Greater Than + +```typescript +filter: { published_at: { $gt: 1704067200 } } +``` + +### Greater Than or Equal + +```typescript +filter: { score: { $gte: 0.8 } } +``` + +## Range Queries + +### Number Ranges + +```typescript +// Documents published in 2024 +filter: { + published_at: { + $gte: 1704067200, // >= Jan 1, 2024 + $lt: 1735689600 // < Jan 1, 2025 + } +} + +// Scores between 0.7 and 0.9 +filter: { + quality_score: { + $gte: 0.7, + $lte: 0.9 + } +} +``` + +### String Ranges (Prefix Search) + +```typescript +// URLs starting with /docs/workers/ +filter: { + url: { + $gte: '/docs/workers/', + $lt: '/docs/workersz' // 'z' after all possible chars + } +} + +// IDs starting with 'user-2024' +filter: { + id: { + $gte: 'user-2024', + $lt: 'user-2025' + } +} +``` + +## Combined Filters + +Multiple conditions are combined with implicit **AND**: + +```typescript +filter: { + category: 'documentation', // AND + language: 'en', // AND + published: true, // AND + published_at: { $gte: 1704067200 } // AND +} +``` + +**No OR operator** - for OR logic, make multiple queries. + +## Complex Examples + +### Multi-field with Ranges + +```typescript +filter: { + category: { $in: ['docs', 'tutorials'] }, + language: 'en', + status: { $ne: 'archived' }, + published_at: { + $gte: 1704067200, + $lt: 1735689600 + }, + 'author.verified': true +} +``` + +### Boolean and String + +```typescript +filter: { + published: true, + featured: false, + category: 'documentation', + language: { $in: ['en', 'es', 'fr'] } +} +``` + +### Nested with Range + +```typescript +filter: { + 'metrics.views': { $gte: 1000 }, + 'metrics.rating': { $gte: 4.5 }, + 'author.verified': true, + published_at: { $gt: Date.now() / 1000 - 86400 * 30 } // Last 30 days +} +``` + +## Cardinality Considerations + +**Cardinality** = Number of unique values in a field + +### Low Cardinality (Good for Filtering) + +```typescript +// Few unique values - efficient +category: 'docs' | 'tutorials' | 'guides' // ~3-10 values +language: 'en' | 'es' | 'fr' // ~5-20 values +published: true | false // 2 values +``` + +### High Cardinality (Avoid in Range Queries) + +```typescript +// Many unique values - can impact performance +user_id: 'uuid-v4-...' // Millions of unique values +timestamp_ms: 1704067200123 // Unique per millisecond +email: 'user@example.com' // Unique per user +``` + +### Performance Impact + +**Range queries** on high-cardinality fields can be slow: + +```typescript +// ❌ Slow: High cardinality range +filter: { + user_id: { // Millions of unique UUIDs + $gte: '00000000-0000-0000-0000-000000000000', + $lt: 'zzzzzzzz-zzzz-zzzz-zzzz-zzzzzzzzzzzz' + } +} + +// ✅ Better: Low cardinality range +filter: { + published_at: { // Timestamps in seconds + $gte: 1704067200, + $lt: 1735689600 + } +} +``` + +### Best Practices + +1. **Use seconds, not milliseconds** for timestamps +2. **Categorize high-cardinality fields** (e.g., user → user_tier) +3. **Limit range span** to avoid scanning millions of values +4. **Use $eq for high cardinality**, not ranges + +## Filter Size Limit + +**Max 2048 bytes** (compact JSON representation) + +```typescript +// Check filter size +const filterString = JSON.stringify(filter); +if (filterString.length > 2048) { + console.error('Filter too large!'); +} +``` + +If filter is too large: +- Split into multiple queries +- Simplify conditions +- Use namespace filtering first + +## Namespace vs Metadata Filtering + +### Namespace Filtering + +```typescript +// Insert with namespace +await env.VECTORIZE_INDEX.upsert([{ + id: 'doc-1', + values: embedding, + namespace: 'customer-abc123', // Partition key + metadata: { type: 'support' } +}]); + +// Query with namespace (applied FIRST) +const results = await env.VECTORIZE_INDEX.query(queryVector, { + namespace: 'customer-abc123', + filter: { type: 'support' } +}); +``` + +### When to Use Each + +| Use Namespace | Use Metadata | +|---------------|--------------| +| Multi-tenant isolation | Fine-grained filtering | +| Customer segmentation | Category filtering | +| Environment (dev/prod) | Date ranges | +| Large partitions | Boolean flags | +| Applied BEFORE metadata | Applied AFTER namespace | + +### Combined Strategy + +```typescript +// Namespace: Customer isolation +namespace: 'customer-abc123' + +// Metadata: Detailed filtering +filter: { + category: 'support_tickets', + status: { $ne: 'closed' }, + priority: { $gte: 3 }, + created_at: { $gte: Date.now() / 1000 - 86400 * 7 } // Last 7 days +} +``` + +## Common Patterns + +### Published Content Only + +```typescript +filter: { + published: true, + status: { $ne: 'archived' } +} +``` + +### Recent Documents + +```typescript +const oneWeekAgo = Math.floor(Date.now() / 1000) - (7 * 24 * 60 * 60); +filter: { + published_at: { $gte: oneWeekAgo } +} +``` + +### Multi-Language Support + +```typescript +filter: { + language: { $in: ['en', 'es', 'fr'] }, + published: true +} +``` + +### Verified Authors Only + +```typescript +filter: { + 'author.verified': true, + 'author.active': true +} +``` + +### Time-Based Content + +```typescript +// Content from specific quarter +filter: { + published_at: { + $gte: 1704067200, // Q1 2024 start + $lt: 1711929600 // Q1 2024 end + } +} +``` + +## Debugging Filters + +### Test Filter Syntax + +```bash +npx wrangler vectorize query docs-search \ + --vector="[0.1,0.2,...]" \ + --filter='{"category":"docs","published":true}' \ + --top-k=5 +``` + +### Check Metadata Indexes + +```bash +npx wrangler vectorize list-metadata-index docs-search +``` + +### Verify Metadata Structure + +```typescript +const vectors = await env.VECTORIZE_INDEX.getByIds(['doc-1']); +console.log(vectors[0].metadata); +``` + +## Error Messages + +| Error | Cause | Solution | +|-------|-------|----------| +| "Metadata property not indexed" | No metadata index for property | Create metadata index | +| "Filter exceeds 2048 bytes" | Filter JSON too large | Simplify or split queries | +| "Invalid metadata key" | Key contains `.`, `"`, or starts with `$` | Rename metadata key | +| "Filter must be non-empty object" | Empty filter `{}` | Remove filter or add conditions | + +## See Also + +- [Vector Operations](./vector-operations.md) +- [Wrangler Commands](./wrangler-commands.md) +- [Index Operations](./index-operations.md) +- [Official Docs](https://developers.cloudflare.com/vectorize/reference/metadata-filtering/) diff --git a/references/vector-operations.md b/references/vector-operations.md new file mode 100644 index 0000000..703eef7 --- /dev/null +++ b/references/vector-operations.md @@ -0,0 +1,371 @@ +# Vector Operations Guide + +Complete guide for inserting, querying, updating, and deleting vectors in Vectorize. + +## Insert vs Upsert + +**Critical difference**: +- **insert()**: Keeps the FIRST vector if ID already exists +- **upsert()**: Overwrites with the LATEST vector if ID already exists + +**Use upsert() for updates!** + +```typescript +// ❌ Wrong: Updates won't work! +await env.VECTORIZE_INDEX.insert([ + { id: 'doc-1', values: newEmbedding, metadata: { version: 2 } } +]); +// If doc-1 exists, this does nothing! + +// ✅ Right: Use upsert for updates +await env.VECTORIZE_INDEX.upsert([ + { id: 'doc-1', values: newEmbedding, metadata: { version: 2 } } +]); +// This WILL update doc-1 +``` + +## Vector Format + +```typescript +interface VectorizeVector { + id: string; // Unique identifier + values: number[] | Float32Array | Float64Array; // Embedding + namespace?: string; // Partition key (optional) + metadata?: Record; // Filterable data (optional) +} +``` + +### ID Guidelines + +- **String** type +- **Unique** within namespace +- **Descriptive**: `doc-123`, `user-456-profile`, `chunk-789` +- **Avoid special chars**: Use alphanumeric + dashes +- **Max length**: No official limit, but keep reasonable (<256 chars) + +### Values (Embeddings) + +Accepted types: +- `number[]` - Most common (from AI APIs) +- `Float32Array` - Memory efficient +- `Float64Array` - High precision (stored as Float32) + +**Must match index dimensions exactly!** + +```typescript +// ❌ Wrong dimensions +await env.VECTORIZE_INDEX.upsert([{ + id: '1', + values: [0.1, 0.2] // Index expects 768! +}]); +// Error: "Vector dimensions do not match" + +// ✅ Correct dimensions +await env.VECTORIZE_INDEX.upsert([{ + id: '1', + values: embedding.data[0] // 768 dimensions +}]); +``` + +## Inserting Vectors + +### Single Vector + +```typescript +await env.VECTORIZE_INDEX.upsert([{ + id: 'doc-1', + values: [0.1, 0.2, 0.3, ...], // 768 dims + metadata: { + title: 'Getting Started', + category: 'docs' + } +}]); +``` + +### Batch Insert (Recommended) + +```typescript +const vectors = documents.map((doc, i) => ({ + id: `doc-${doc.id}`, + values: embeddings.data[i], + metadata: { + title: doc.title, + content: doc.content, + category: doc.category + } +})); + +// Insert in batches of 100-1000 +const batchSize = 100; +for (let i = 0; i < vectors.length; i += batchSize) { + const batch = vectors.slice(i, i + batchSize); + await env.VECTORIZE_INDEX.upsert(batch); +} +``` + +### With Namespace + +```typescript +await env.VECTORIZE_INDEX.upsert([{ + id: 'ticket-123', + values: embedding, + namespace: 'customer-abc', // Isolate by customer + metadata: { type: 'support_ticket' } +}]); +``` + +## Querying Vectors + +### Basic Query + +```typescript +const results = await env.VECTORIZE_INDEX.query(queryVector, { + topK: 5, +}); + +// Returns: { matches: [...], count: number } +``` + +### Query with Options + +```typescript +const results = await env.VECTORIZE_INDEX.query(queryVector, { + topK: 10, // Return top 10 matches + returnValues: false, // Don't return vector values (saves bandwidth) + returnMetadata: 'all', // Return all metadata + namespace: 'customer-abc', // Query specific namespace + filter: { // Metadata filtering + category: 'documentation', + published: true + } +}); +``` + +### Query Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `topK` | number | 10 | Number of results to return (1-100 recommended) | +| `returnValues` | boolean | false | Include vector values in response | +| `returnMetadata` | string | `'none'` | `'none'`, `'indexed'`, or `'all'` | +| `namespace` | string | undefined | Query specific namespace only | +| `filter` | object | undefined | Metadata filter conditions | + +### Query Results + +```typescript +interface VectorizeMatches { + matches: Array<{ + id: string; + score: number; // Similarity score + values?: number[]; // If returnValues: true + metadata?: Record; // If returnMetadata: 'all' or 'indexed' + namespace?: string; + }>; + count: number; // Total matches returned +} +``` + +### Score Interpretation + +**Cosine metric** (most common): +- `1.0` = Identical vectors +- `0.5-0.9` = Similar +- `0.0-0.5` = Somewhat related +- `< 0.0` = Opposite direction +- `-1.0` = Completely opposite + +**Euclidean metric**: +- `0.0` = Identical +- `< 1.0` = Very similar +- `1.0-10.0` = Similar +- `> 10.0` = Different + +## Metadata Filtering in Queries + +See [Metadata Guide](./metadata-guide.md) for complete reference. + +### Common Patterns + +```typescript +// Exact match +filter: { category: 'docs' } + +// Not equals +filter: { status: { $ne: 'archived' } } + +// In array +filter: { category: { $in: ['docs', 'tutorials'] } } + +// Range (timestamp) +filter: { + published_at: { + $gte: 1704067200, + $lt: 1735689600 + } +} + +// Multiple conditions (AND) +filter: { + category: 'docs', + language: 'en', + published: true +} + +// Nested metadata +filter: { 'author.verified': true } +``` + +## Retrieving Vectors + +### List Vector IDs + +```typescript +const response = await env.VECTORIZE_INDEX.listVectors({ + limit: 100, + cursor: null, // Or cursor from previous response +}); + +// Returns: { vectors: [{ id: '...' }, ...], cursor: '...' } +``` + +### Get Specific Vectors + +```typescript +const vectors = await env.VECTORIZE_INDEX.getByIds([ + 'doc-1', + 'doc-2', + 'doc-3' +]); + +// Returns: Array +``` + +## Deleting Vectors + +### Delete by IDs + +```typescript +await env.VECTORIZE_INDEX.deleteByIds([ + 'doc-1', + 'doc-2', + 'old-chunk-123' +]); +``` + +### Delete All Vectors for a Document + +```typescript +// If using doc-{id}-chunk-{index} pattern +const docId = 'doc-123'; +const allVectors = await env.VECTORIZE_INDEX.listVectors({ limit: 1000 }); + +const chunkIds = allVectors.vectors + .filter(v => v.id.startsWith(`${docId}-chunk-`)) + .map(v => v.id); + +if (chunkIds.length > 0) { + await env.VECTORIZE_INDEX.deleteByIds(chunkIds); +} +``` + +## Performance Tips + +### Batch Operations + +✅ **Good** - Batch insert/upsert: +```typescript +await env.VECTORIZE_INDEX.upsert(arrayOf100Vectors); +``` + +❌ **Bad** - Individual operations: +```typescript +for (const vector of vectors) { + await env.VECTORIZE_INDEX.upsert([vector]); // Slow! +} +``` + +### Optimal Batch Sizes + +- **Insert/Upsert**: 100-1000 vectors per batch +- **Delete**: 100-500 IDs per batch +- **Query**: topK = 3-10 for best latency + +### Return Only What You Need + +```typescript +// ✅ Efficient - no vector values +const results = await env.VECTORIZE_INDEX.query(queryVector, { + topK: 5, + returnValues: false, // Saves bandwidth + returnMetadata: 'all' // Only metadata needed +}); + +// ❌ Wasteful - returns 768 floats per match +const results = await env.VECTORIZE_INDEX.query(queryVector, { + topK: 5, + returnValues: true, // Unnecessary if not using values + returnMetadata: 'all' +}); +``` + +### Namespace for Multi-Tenancy + +Use namespaces instead of separate indexes: + +```typescript +// ✅ One index, isolated by namespace +await env.VECTORIZE_INDEX.upsert([{ + id: 'doc-1', + values: embedding, + namespace: `customer-${customerId}`, + metadata: { ... } +}]); + +// Query only customer's data +const results = await env.VECTORIZE_INDEX.query(queryVector, { + namespace: `customer-${customerId}`, + topK: 5 +}); +``` + +## Common Errors + +### "Vector dimensions do not match" + +```typescript +// Check your embedding dimensions +const embedding = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: 'test' +}); +console.log(embedding.data[0].length); // Should match index dimensions (768) +``` + +### "Metadata property not indexed" + +```typescript +// Create metadata index first! +// npx wrangler vectorize create-metadata-index my-index --property-name=category --type=string + +// Then you can filter +const results = await env.VECTORIZE_INDEX.query(queryVector, { + filter: { category: 'docs' } // Now works! +}); +``` + +### "Insert vs Upsert not working" + +```typescript +// Use upsert for updates, not insert +await env.VECTORIZE_INDEX.upsert([{ // ✅ Use upsert + id: 'existing-doc', + values: newEmbedding, + metadata: { version: 2 } +}]); +``` + +## See Also + +- [Metadata Guide](./metadata-guide.md) +- [Wrangler Commands](./wrangler-commands.md) +- [Embedding Models](./embedding-models.md) diff --git a/references/wrangler-commands.md b/references/wrangler-commands.md new file mode 100644 index 0000000..80d5c85 --- /dev/null +++ b/references/wrangler-commands.md @@ -0,0 +1,378 @@ +# Wrangler Vectorize Commands Reference + +Complete CLI reference for managing Cloudflare Vectorize indexes with Wrangler. + +**Minimum Version Required**: Wrangler 3.71.0+ + +## Index Management + +### create + +Create a new Vectorize index. + +**⚠️ CRITICAL**: Dimensions and metric cannot be changed after creation! + +```bash +npx wrangler vectorize create \ + --dimensions= \ + --metric= \ + [--description=] +``` + +**Parameters**: +- `INDEX_NAME` (required): Name of the index (lowercase, alphanumeric, dashes, max 32 chars) +- `--dimensions` (required): Vector width (768 for Workers AI bge-base, 1536 for OpenAI small, 3072 for OpenAI large) +- `--metric` (required): Distance metric (`cosine`, `euclidean`, or `dot-product`) +- `--description` (optional): Human-readable description + +**Examples**: +```bash +# Workers AI @cf/baai/bge-base-en-v1.5 (768 dimensions) +npx wrangler vectorize create docs-search \ + --dimensions=768 \ + --metric=cosine \ + --description="Documentation semantic search" + +# OpenAI text-embedding-3-small (1536 dimensions) +npx wrangler vectorize create product-recs \ + --dimensions=1536 \ + --metric=cosine + +# OpenAI text-embedding-3-large (3072 dimensions) +npx wrangler vectorize create high-accuracy-index \ + --dimensions=3072 \ + --metric=euclidean +``` + +### list + +List all Vectorize indexes in your account. + +```bash +npx wrangler vectorize list +``` + +**Output**: Table with index names, dimensions, and distance metrics. + +### get + +Get details about a specific index. + +```bash +npx wrangler vectorize get +``` + +**Returns**: Index configuration (name, dimensions, metric, description). + +### info + +Get additional information about an index. + +```bash +npx wrangler vectorize info +``` + +**Returns**: Vector count, last processed mutation, index status. + +### delete + +Delete a Vectorize index (irreversible!). + +```bash +npx wrangler vectorize delete [--force] +``` + +**Parameters**: +- `--force` (optional): Skip confirmation prompt + +**Example**: +```bash +npx wrangler vectorize delete old-index --force +``` + +## Metadata Indexes + +**⚠️ CRITICAL**: Create metadata indexes BEFORE inserting vectors! Vectors added before a metadata index exists won't be filterable on that property. + +### create-metadata-index + +Enable metadata filtering on a specific property. + +```bash +npx wrangler vectorize create-metadata-index \ + --property-name= \ + --type= +``` + +**Parameters**: +- `INDEX_NAME` (required): Vectorize index name +- `--property-name` (required): Metadata field name +- `--type` (required): Data type (`string`, `number`, or `boolean`) + +**Limits**: +- Max 10 metadata indexes per Vectorize index +- String indexes: First 64 bytes (UTF-8) +- Number indexes: Float64 precision + +**Examples**: +```bash +# String metadata (category filtering) +npx wrangler vectorize create-metadata-index docs-search \ + --property-name=category \ + --type=string + +# Number metadata (timestamp filtering) +npx wrangler vectorize create-metadata-index docs-search \ + --property-name=published_at \ + --type=number + +# Boolean metadata (published status) +npx wrangler vectorize create-metadata-index docs-search \ + --property-name=published \ + --type=boolean + +# Nested metadata (use dot notation) +npx wrangler vectorize create-metadata-index docs-search \ + --property-name=author_verified \ + --type=boolean +``` + +### list-metadata-index + +List all metadata indexes for an index. + +```bash +npx wrangler vectorize list-metadata-index +``` + +**Output**: Table with property names and types. + +### delete-metadata-index + +Disable metadata filtering on a property. + +```bash +npx wrangler vectorize delete-metadata-index \ + --property-name= +``` + +**Example**: +```bash +npx wrangler vectorize delete-metadata-index docs-search \ + --property-name=category +``` + +## Vector Operations + +### insert + +Insert vectors from a file (NDJSON format). + +```bash +npx wrangler vectorize insert \ + --file= +``` + +**File Format** (NDJSON - one JSON object per line): +```json +{"id":"1","values":[0.1,0.2,0.3],"metadata":{"title":"Doc 1"}} +{"id":"2","values":[0.4,0.5,0.6],"metadata":{"title":"Doc 2"}} +``` + +**Example**: +```bash +npx wrangler vectorize insert docs-search --file=vectors.ndjson +``` + +### query + +Query vectors directly from CLI. + +```bash +npx wrangler vectorize query \ + --vector="[]" \ + [--top-k=] \ + [--return-metadata=] \ + [--namespace=] \ + [--filter=] +``` + +**Parameters**: +- `--vector` (required): Query vector as JSON array +- `--top-k` (optional): Number of results (default: 10) +- `--return-metadata` (optional): `none`, `indexed`, or `all` (default: `none`) +- `--namespace` (optional): Query specific namespace +- `--filter` (optional): Metadata filter as JSON string + +**Examples**: +```bash +# Simple query +npx wrangler vectorize query docs-search \ + --vector="[0.1,0.2,0.3,...]" \ + --top-k=5 \ + --return-metadata=all + +# Query with filter +npx wrangler vectorize query docs-search \ + --vector="[0.1,0.2,...]" \ + --filter='{"category":"documentation","published":true}' \ + --top-k=3 + +# Query specific namespace +npx wrangler vectorize query docs-search \ + --vector="[0.1,0.2,...]" \ + --namespace="customer-123" \ + --top-k=5 +``` + +### list-vectors + +List vector IDs in paginated manner. + +```bash +npx wrangler vectorize list-vectors \ + [--count=] \ + [--cursor=] +``` + +**Parameters**: +- `--count` (optional): Vectors per page (1-1000, default: 100) +- `--cursor` (optional): Pagination cursor from previous response + +**Example**: +```bash +# Get first 100 vector IDs +npx wrangler vectorize list-vectors docs-search --count=100 + +# Get next page (use cursor from previous response) +npx wrangler vectorize list-vectors docs-search \ + --count=100 \ + --cursor="abc123..." +``` + +### get-vectors + +Fetch specific vectors by ID. + +```bash +npx wrangler vectorize get-vectors \ + --ids= +``` + +**Example**: +```bash +npx wrangler vectorize get-vectors docs-search \ + --ids="doc-1,doc-2,doc-3" +``` + +### delete-vectors + +Delete vectors by ID. + +```bash +npx wrangler vectorize delete-vectors \ + --ids= +``` + +**Example**: +```bash +npx wrangler vectorize delete-vectors docs-search \ + --ids="old-doc-1,old-doc-2" +``` + +## Common Workflows + +### Initial Setup + +```bash +# 1. Create index +npx wrangler vectorize create my-index \ + --dimensions=768 \ + --metric=cosine + +# 2. Create metadata indexes (BEFORE inserting!) +npx wrangler vectorize create-metadata-index my-index \ + --property-name=category --type=string + +npx wrangler vectorize create-metadata-index my-index \ + --property-name=timestamp --type=number + +# 3. Verify metadata indexes +npx wrangler vectorize list-metadata-index my-index + +# 4. Now safe to insert vectors (via Worker or CLI) +``` + +### Bulk Data Import + +```bash +# Prepare NDJSON file +cat > vectors.ndjson << 'EOF' +{"id":"doc-1","values":[0.1,0.2,0.3,...],"metadata":{"category":"docs"}} +{"id":"doc-2","values":[0.4,0.5,0.6,...],"metadata":{"category":"tutorials"}} +EOF + +# Import +npx wrangler vectorize insert my-index --file=vectors.ndjson + +# Verify +npx wrangler vectorize info my-index +``` + +### Debug / Inspect + +```bash +# Check index configuration +npx wrangler vectorize get my-index + +# Check vector count +npx wrangler vectorize info my-index + +# List some vector IDs +npx wrangler vectorize list-vectors my-index --count=10 + +# Inspect specific vectors +npx wrangler vectorize get-vectors my-index --ids="doc-1,doc-2" + +# Test query +npx wrangler vectorize query my-index \ + --vector="[0.1,0.2,...]" \ + --top-k=3 \ + --return-metadata=all +``` + +### Cleanup + +```bash +# Delete specific vectors +npx wrangler vectorize delete-vectors my-index --ids="doc-1,doc-2" + +# Delete entire index (irreversible!) +npx wrangler vectorize delete my-index --force +``` + +## Tips & Best Practices + +1. **Always use latest Wrangler**: `npx wrangler@latest vectorize ...` +2. **Create metadata indexes first**: Before any vector insertion +3. **Test with small data**: Use `--count=10` when listing/testing +4. **Batch operations**: Use Workers for bulk operations (faster than CLI) +5. **Monitor vector count**: Use `info` command to track index size +6. **Verify before delete**: Always check with `get` before `delete` + +## Error Messages + +| Error | Cause | Solution | +|-------|-------|----------| +| "Wrangler version 3.71.0 required" | Old Wrangler | Update: `npm install -g wrangler@latest` | +| "Vector dimensions do not match" | Wrong embedding size | Check model output dimensions | +| "Metadata property not indexed" | Metadata index missing | Create metadata index before querying | +| "Index name already exists" | Duplicate name | Use different name or delete old index | +| "Invalid filter syntax" | Malformed JSON filter | Check JSON syntax and operators | + +## See Also + +- [Index Operations](./index-operations.md) +- [Vector Operations](./vector-operations.md) +- [Metadata Guide](./metadata-guide.md) +- [Official Wrangler Docs](https://developers.cloudflare.com/workers/wrangler/commands/#vectorize) diff --git a/templates/basic-search.ts b/templates/basic-search.ts new file mode 100644 index 0000000..cd0e99b --- /dev/null +++ b/templates/basic-search.ts @@ -0,0 +1,254 @@ +/** + * Basic Semantic Search with Cloudflare Vectorize + Workers AI + * + * Use case: Simple semantic search over documents, FAQs, or product catalog + * + * Features: + * - Workers AI embeddings (@cf/baai/bge-base-en-v1.5) + * - Vectorize query with topK results + * - Metadata filtering + * - Simple JSON API + */ + +export interface Env { + VECTORIZE_INDEX: VectorizeIndex; + AI: Ai; +} + +interface SearchRequest { + query: string; + topK?: number; + filter?: Record; + namespace?: string; +} + +interface SearchResult { + id: string; + score: number; + metadata: Record; +} + +export default { + async fetch(request: Request, env: Env, ctx: ExecutionContext): Promise { + // Handle CORS preflight + if (request.method === 'OPTIONS') { + return new Response(null, { + headers: { + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS', + 'Access-Control-Allow-Headers': 'Content-Type', + }, + }); + } + + const url = new URL(request.url); + + // Route: POST /search - Semantic search endpoint + if (url.pathname === '/search' && request.method === 'POST') { + try { + const body = await request.json() as SearchRequest; + const { query, topK = 5, filter, namespace } = body; + + if (!query) { + return Response.json( + { error: 'Missing required field: query' }, + { status: 400 } + ); + } + + // Generate embedding for search query + const queryEmbedding = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: query, + }); + + // Search vector database + const results = await env.VECTORIZE_INDEX.query(queryEmbedding.data[0], { + topK, + filter, + namespace, + returnMetadata: 'all', + returnValues: false, // Save bandwidth + }); + + // Format results + const searchResults: SearchResult[] = results.matches.map((match) => ({ + id: match.id, + score: match.score, + metadata: match.metadata || {}, + })); + + return Response.json({ + query, + results: searchResults, + count: results.count, + }, { + headers: { 'Access-Control-Allow-Origin': '*' }, + }); + } catch (error) { + console.error('Search error:', error); + return Response.json( + { + error: 'Search failed', + message: error instanceof Error ? error.message : 'Unknown error', + }, + { status: 500 } + ); + } + } + + // Route: POST /index - Add document to index + if (url.pathname === '/index' && request.method === 'POST') { + try { + const body = await request.json() as { + id: string; + content: string; + metadata?: Record; + namespace?: string; + }; + + if (!body.id || !body.content) { + return Response.json( + { error: 'Missing required fields: id, content' }, + { status: 400 } + ); + } + + // Generate embedding for document + const embedding = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: body.content, + }); + + // Upsert vector (overwrites if exists) + await env.VECTORIZE_INDEX.upsert([ + { + id: body.id, + values: embedding.data[0], + namespace: body.namespace, + metadata: { + ...body.metadata, + content: body.content, + indexed_at: Date.now(), + }, + }, + ]); + + return Response.json({ + success: true, + id: body.id, + message: 'Document indexed successfully', + }, { + headers: { 'Access-Control-Allow-Origin': '*' }, + }); + } catch (error) { + console.error('Index error:', error); + return Response.json( + { + error: 'Indexing failed', + message: error instanceof Error ? error.message : 'Unknown error', + }, + { status: 500 } + ); + } + } + + // Route: DELETE /index/:id - Remove document from index + if (url.pathname.startsWith('/index/') && request.method === 'DELETE') { + try { + const id = url.pathname.split('/')[2]; + + if (!id) { + return Response.json( + { error: 'Missing document ID' }, + { status: 400 } + ); + } + + await env.VECTORIZE_INDEX.deleteByIds([id]); + + return Response.json({ + success: true, + id, + message: 'Document removed from index', + }, { + headers: { 'Access-Control-Allow-Origin': '*' }, + }); + } catch (error) { + console.error('Delete error:', error); + return Response.json( + { + error: 'Delete failed', + message: error instanceof Error ? error.message : 'Unknown error', + }, + { status: 500 } + ); + } + } + + // Default: API documentation + return Response.json({ + name: 'Vectorize Semantic Search API', + endpoints: { + 'POST /search': { + description: 'Semantic search over indexed documents', + body: { + query: 'string (required)', + topK: 'number (optional, default: 5)', + filter: 'object (optional)', + namespace: 'string (optional)', + }, + example: { + query: 'How do I deploy a Worker?', + topK: 3, + filter: { category: 'documentation' }, + }, + }, + 'POST /index': { + description: 'Add or update document in index', + body: { + id: 'string (required)', + content: 'string (required)', + metadata: 'object (optional)', + namespace: 'string (optional)', + }, + example: { + id: 'doc-123', + content: 'Cloudflare Workers are serverless functions...', + metadata: { category: 'documentation', author: 'Cloudflare' }, + }, + }, + 'DELETE /index/:id': { + description: 'Remove document from index', + example: 'DELETE /index/doc-123', + }, + }, + }); + }, +}; + +/** + * Example Usage: + * + * 1. Index a document: + * + * curl -X POST https://your-worker.workers.dev/index \ + * -H "Content-Type: application/json" \ + * -d '{ + * "id": "doc-1", + * "content": "Cloudflare Workers allow you to deploy serverless code globally.", + * "metadata": { "category": "docs", "section": "workers" } + * }' + * + * 2. Search: + * + * curl -X POST https://your-worker.workers.dev/search \ + * -H "Content-Type: application/json" \ + * -d '{ + * "query": "How do I deploy serverless functions?", + * "topK": 5, + * "filter": { "category": "docs" } + * }' + * + * 3. Delete: + * + * curl -X DELETE https://your-worker.workers.dev/index/doc-1 + */ diff --git a/templates/document-ingestion.ts b/templates/document-ingestion.ts new file mode 100644 index 0000000..f235dbf --- /dev/null +++ b/templates/document-ingestion.ts @@ -0,0 +1,414 @@ +/** + * Document Ingestion Pipeline for Cloudflare Vectorize + * + * Use case: Process large documents, chunk text, generate embeddings, and index + * + * Features: + * - Intelligent text chunking (sentence-based) + * - Batch embedding generation + * - Metadata tagging (doc_id, chunk_index, timestamps) + * - R2 integration for document storage (optional) + * - Progress tracking and error handling + */ + +export interface Env { + VECTORIZE_INDEX: VectorizeIndex; + AI: Ai; + DOCUMENTS_BUCKET?: R2Bucket; // Optional: Store original documents +} + +interface Document { + id: string; + title: string; + content: string; + url?: string; + author?: string; + category?: string; + tags?: string[]; + publishedAt?: number; + [key: string]: any; +} + +interface ChunkMetadata { + doc_id: string; + doc_title: string; + chunk_index: number; + total_chunks: number; + content: string; + [key: string]: any; +} + +/** + * Chunk text into smaller segments while preserving sentence boundaries + */ +function chunkText(text: string, maxChunkSize = 500, overlapSize = 50): string[] { + // Split into sentences (handles . ! ? with spaces) + const sentences = text.match(/[^.!?]+[.!?]+(?:\s|$)/g) || [text]; + const chunks: string[] = []; + let currentChunk = ''; + + for (let i = 0; i < sentences.length; i++) { + const sentence = sentences[i].trim(); + + // If adding this sentence exceeds max size and we have content, start new chunk + if ((currentChunk + ' ' + sentence).length > maxChunkSize && currentChunk) { + chunks.push(currentChunk.trim()); + + // Create overlap by including last few words + const words = currentChunk.split(' '); + const overlapWords = words.slice(-Math.floor(overlapSize / 6)); // ~6 chars/word + currentChunk = overlapWords.join(' ') + ' ' + sentence; + } else { + currentChunk += (currentChunk ? ' ' : '') + sentence; + } + } + + // Add final chunk + if (currentChunk.trim()) { + chunks.push(currentChunk.trim()); + } + + return chunks.length > 0 ? chunks : [text]; +} + +/** + * Batch array into smaller arrays of specified size + */ +function batchArray(array: T[], batchSize: number): T[][] { + const batches: T[][] = []; + for (let i = 0; i < array.length; i += batchSize) { + batches.push(array.slice(i, i + batchSize)); + } + return batches; +} + +export default { + async fetch(request: Request, env: Env, ctx: ExecutionContext): Promise { + // Handle CORS + if (request.method === 'OPTIONS') { + return new Response(null, { + headers: { + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Methods': 'GET, POST, DELETE, OPTIONS', + 'Access-Control-Allow-Headers': 'Content-Type', + }, + }); + } + + const url = new URL(request.url); + + // Route: POST /ingest - Process and index document(s) + if (url.pathname === '/ingest' && request.method === 'POST') { + try { + const body = await request.json() as { + documents: Document[]; + chunkSize?: number; + overlapSize?: number; + namespace?: string; + storeInR2?: boolean; + }; + + const { + documents, + chunkSize = 500, + overlapSize = 50, + namespace, + storeInR2 = false, + } = body; + + if (!documents || !Array.isArray(documents) || documents.length === 0) { + return Response.json( + { error: 'Missing or invalid field: documents (non-empty array)' }, + { status: 400 } + ); + } + + const results = { + success: true, + processed: 0, + totalChunks: 0, + errors: [] as string[], + documentDetails: [] as any[], + }; + + // Process each document + for (const doc of documents) { + try { + if (!doc.id || !doc.content) { + results.errors.push(`Document missing id or content: ${JSON.stringify(doc)}`); + continue; + } + + // Optional: Store original document in R2 + if (storeInR2 && env.DOCUMENTS_BUCKET) { + await env.DOCUMENTS_BUCKET.put( + `documents/${doc.id}.json`, + JSON.stringify(doc), + { + httpMetadata: { contentType: 'application/json' }, + customMetadata: { title: doc.title, indexed_at: Date.now().toString() }, + } + ); + } + + // Chunk the document + const chunks = chunkText(doc.content, chunkSize, overlapSize); + + // Generate embeddings for all chunks (batch) + const embeddings = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: chunks, + }); + + // Prepare vectors with metadata + const vectors = chunks.map((chunk, index) => ({ + id: `${doc.id}-chunk-${index}`, + values: embeddings.data[index], + namespace, + metadata: { + doc_id: doc.id, + doc_title: doc.title, + chunk_index: index, + total_chunks: chunks.length, + content: chunk, + url: doc.url, + author: doc.author, + category: doc.category, + tags: doc.tags, + published_at: doc.publishedAt, + indexed_at: Date.now(), + } as ChunkMetadata, + })); + + // Upsert in batches (100 vectors at a time) + const vectorBatches = batchArray(vectors, 100); + for (const batch of vectorBatches) { + await env.VECTORIZE_INDEX.upsert(batch); + } + + results.processed++; + results.totalChunks += chunks.length; + results.documentDetails.push({ + id: doc.id, + title: doc.title, + chunks: chunks.length, + }); + } catch (error) { + const errorMsg = `Failed to process document ${doc.id}: ${ + error instanceof Error ? error.message : 'Unknown error' + }`; + console.error(errorMsg); + results.errors.push(errorMsg); + } + } + + const statusCode = results.errors.length > 0 ? 207 : 200; // 207 Multi-Status + + return Response.json(results, { + status: statusCode, + headers: { 'Access-Control-Allow-Origin': '*' }, + }); + } catch (error) { + console.error('Ingest error:', error); + return Response.json( + { + error: 'Ingestion failed', + message: error instanceof Error ? error.message : 'Unknown error', + }, + { status: 500 } + ); + } + } + + // Route: POST /ingest/url - Fetch and ingest from URL (requires Firecrawl or similar) + if (url.pathname === '/ingest/url' && request.method === 'POST') { + try { + const body = await request.json() as { + url: string; + id?: string; + category?: string; + namespace?: string; + }; + + if (!body.url) { + return Response.json({ error: 'Missing required field: url' }, { status: 400 }); + } + + // Fetch content (simple fetch - for production use Firecrawl or similar) + const response = await fetch(body.url); + const html = await response.text(); + + // Simple text extraction (production would use proper HTML parsing) + const text = html + .replace(/]*>[\s\S]*?<\/script>/gi, '') + .replace(/]*>[\s\S]*?<\/style>/gi, '') + .replace(/<[^>]+>/g, ' ') + .replace(/\s+/g, ' ') + .trim(); + + // Create document from fetched content + const doc: Document = { + id: body.id || `url-${Date.now()}`, + title: body.url, + content: text, + url: body.url, + category: body.category || 'web-page', + publishedAt: Date.now(), + }; + + // Re-use the /ingest logic + const ingestResponse = await this.fetch( + new Request(new URL('/ingest', request.url), { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + documents: [doc], + namespace: body.namespace, + }), + }), + env, + ctx + ); + + return ingestResponse; + } catch (error) { + console.error('URL ingest error:', error); + return Response.json( + { + error: 'URL ingestion failed', + message: error instanceof Error ? error.message : 'Unknown error', + }, + { status: 500 } + ); + } + } + + // Route: DELETE /documents/:id - Delete all chunks for a document + if (url.pathname.startsWith('/documents/') && request.method === 'DELETE') { + try { + const docId = url.pathname.split('/')[2]; + + if (!docId) { + return Response.json({ error: 'Missing document ID' }, { status: 400 }); + } + + // List all vector IDs (need to find chunks for this doc) + // Note: This is inefficient for large indexes. Better to maintain a separate index of doc -> chunk mappings + const allVectors = await env.VECTORIZE_INDEX.listVectors({ limit: 1000 }); + + const chunkIds = allVectors.vectors + .filter((v) => v.id.startsWith(`${docId}-chunk-`)) + .map((v) => v.id); + + if (chunkIds.length === 0) { + return Response.json( + { error: 'Document not found', id: docId }, + { status: 404 } + ); + } + + // Delete in batches + const idBatches = batchArray(chunkIds, 100); + for (const batch of idBatches) { + await env.VECTORIZE_INDEX.deleteByIds(batch); + } + + // Optional: Delete from R2 if exists + if (env.DOCUMENTS_BUCKET) { + await env.DOCUMENTS_BUCKET.delete(`documents/${docId}.json`); + } + + return Response.json({ + success: true, + id: docId, + chunksDeleted: chunkIds.length, + }, { + headers: { 'Access-Control-Allow-Origin': '*' }, + }); + } catch (error) { + console.error('Delete error:', error); + return Response.json( + { + error: 'Delete failed', + message: error instanceof Error ? error.message : 'Unknown error', + }, + { status: 500 } + ); + } + } + + // Default: API documentation + return Response.json({ + name: 'Document Ingestion Pipeline API', + endpoints: { + 'POST /ingest': { + description: 'Process and index documents with chunking', + body: { + documents: [ + { + id: 'string (required)', + title: 'string (required)', + content: 'string (required)', + url: 'string (optional)', + author: 'string (optional)', + category: 'string (optional)', + tags: ['array (optional)'], + publishedAt: 'number (optional)', + }, + ], + chunkSize: 'number (optional, default: 500)', + overlapSize: 'number (optional, default: 50)', + namespace: 'string (optional)', + storeInR2: 'boolean (optional, default: false)', + }, + }, + 'POST /ingest/url': { + description: 'Fetch and ingest document from URL', + body: { + url: 'string (required)', + id: 'string (optional)', + category: 'string (optional)', + namespace: 'string (optional)', + }, + }, + 'DELETE /documents/:id': { + description: 'Delete all chunks for a document', + example: 'DELETE /documents/doc-123', + }, + }, + }); + }, +}; + +/** + * Example Usage: + * + * 1. Ingest a single document: + * + * curl -X POST https://your-worker.workers.dev/ingest \ + * -H "Content-Type: application/json" \ + * -d '{ + * "documents": [{ + * "id": "cloudflare-workers-intro", + * "title": "Introduction to Cloudflare Workers", + * "content": "Very long document content here...", + * "category": "documentation", + * "author": "Cloudflare", + * "tags": ["workers", "serverless", "edge-computing"] + * }], + * "chunkSize": 500, + * "overlapSize": 50 + * }' + * + * 2. Ingest from URL: + * + * curl -X POST https://your-worker.workers.dev/ingest/url \ + * -H "Content-Type: application/json" \ + * -d '{ + * "url": "https://developers.cloudflare.com/workers/", + * "category": "documentation" + * }' + * + * 3. Delete document: + * + * curl -X DELETE https://your-worker.workers.dev/documents/cloudflare-workers-intro + */ diff --git a/templates/metadata-filtering.ts b/templates/metadata-filtering.ts new file mode 100644 index 0000000..7d2ce6f --- /dev/null +++ b/templates/metadata-filtering.ts @@ -0,0 +1,425 @@ +/** + * Advanced Metadata Filtering Examples for Cloudflare Vectorize + * + * Use case: Multi-tenant apps, complex filtering, range queries, nested metadata + * + * Features: + * - All filter operators ($eq, $ne, $in, $nin, $lt, $lte, $gt, $gte) + * - Nested metadata with dot notation + * - Namespace-based isolation + * - Combined filters (implicit AND) + * - Range queries on numbers and strings + * - Performance optimization tips + */ + +export interface Env { + VECTORIZE_INDEX: VectorizeIndex; + AI: Ai; +} + +interface FilterExample { + name: string; + description: string; + filter: Record; + namespace?: string; +} + +export default { + async fetch(request: Request, env: Env, ctx: ExecutionContext): Promise { + // Handle CORS + if (request.method === 'OPTIONS') { + return new Response(null, { + headers: { + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS', + 'Access-Control-Allow-Headers': 'Content-Type', + }, + }); + } + + const url = new URL(request.url); + + // Route: GET /examples - Show all filter examples + if (url.pathname === '/examples' && request.method === 'GET') { + const examples: FilterExample[] = [ + { + name: 'Equality (implicit)', + description: 'Find vectors with exact category match', + filter: { category: 'documentation' }, + }, + { + name: 'Equality (explicit)', + description: 'Explicit $eq operator', + filter: { category: { $eq: 'documentation' } }, + }, + { + name: 'Not Equals', + description: 'Exclude archived documents', + filter: { status: { $ne: 'archived' } }, + }, + { + name: 'In Array', + description: 'Match any of multiple categories', + filter: { category: { $in: ['docs', 'tutorials', 'guides'] } }, + }, + { + name: 'Not In Array', + description: 'Exclude multiple statuses', + filter: { status: { $nin: ['archived', 'draft', 'deleted'] } }, + }, + { + name: 'Greater Than', + description: 'Documents published after date', + filter: { published_at: { $gt: 1704067200 } }, // Jan 1, 2024 + }, + { + name: 'Less Than or Equal', + description: 'Documents published before or on date', + filter: { published_at: { $lte: 1735689600 } }, // Jan 1, 2025 + }, + { + name: 'Range Query (numbers)', + description: 'Documents published in 2024', + filter: { + published_at: { + $gte: 1704067200, // >= Jan 1, 2024 + $lt: 1735689600, // < Jan 1, 2025 + }, + }, + }, + { + name: 'Range Query (strings - prefix search)', + description: 'URLs starting with /docs/workers/', + filter: { + url: { + $gte: '/docs/workers/', + $lt: '/docs/workersz', // 'z' is after all possible chars + }, + }, + }, + { + name: 'Nested Metadata', + description: 'Filter by nested author ID', + filter: { 'author.id': 'user123' }, + }, + { + name: 'Combined Filters (AND)', + description: 'Multiple conditions (implicit AND)', + filter: { + category: 'docs', + language: 'en', + published: true, + published_at: { $gte: 1704067200 }, + }, + }, + { + name: 'Multi-tenant (namespace)', + description: 'Isolate by customer ID using namespace', + namespace: 'customer-abc123', + filter: { type: 'support_ticket' }, + }, + { + name: 'Boolean Filter', + description: 'Published documents only', + filter: { published: true }, + }, + { + name: 'Complex Multi-field', + description: 'Docs in English, published in 2024, not archived', + filter: { + category: { $in: ['docs', 'tutorials'] }, + language: 'en', + status: { $ne: 'archived' }, + published_at: { $gte: 1704067200, $lt: 1735689600 }, + 'author.verified': true, + }, + }, + ]; + + return Response.json({ examples }); + } + + // Route: POST /search/filtered - Execute filtered search + if (url.pathname === '/search/filtered' && request.method === 'POST') { + try { + const body = await request.json() as { + query: string; + exampleName?: string; + filter?: Record; + namespace?: string; + topK?: number; + }; + + const { query, exampleName, filter, namespace, topK = 5 } = body; + + if (!query) { + return Response.json({ error: 'Missing required field: query' }, { status: 400 }); + } + + // If exampleName provided, use pre-defined filter + let finalFilter = filter; + let finalNamespace = namespace; + + if (exampleName) { + const examplesResponse = await this.fetch( + new Request(new URL('/examples', request.url)), + env, + ctx + ); + const { examples } = (await examplesResponse.json()) as { examples: FilterExample[] }; + + const example = examples.find((ex) => ex.name === exampleName); + if (example) { + finalFilter = example.filter; + finalNamespace = example.namespace || namespace; + } + } + + // Generate embedding + const embedding = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: query, + }); + + // Query with filter + const results = await env.VECTORIZE_INDEX.query(embedding.data[0], { + topK, + filter: finalFilter, + namespace: finalNamespace, + returnMetadata: 'all', + returnValues: false, + }); + + return Response.json({ + query, + filter: finalFilter, + namespace: finalNamespace, + results: results.matches.map((m) => ({ + id: m.id, + score: m.score, + metadata: m.metadata, + })), + count: results.count, + }, { + headers: { 'Access-Control-Allow-Origin': '*' }, + }); + } catch (error) { + console.error('Filtered search error:', error); + return Response.json( + { + error: 'Filtered search failed', + message: error instanceof Error ? error.message : 'Unknown error', + }, + { status: 500 } + ); + } + } + + // Route: POST /seed - Seed example data with rich metadata + if (url.pathname === '/seed' && request.method === 'POST') { + try { + // Sample documents with diverse metadata + const sampleDocs = [ + { + content: 'Cloudflare Workers are serverless functions that run on the edge.', + metadata: { + category: 'documentation', + language: 'en', + status: 'published', + published_at: 1704153600, // Jan 2, 2024 + published: true, + url: '/docs/workers/intro', + author: { id: 'user123', name: 'John Doe', verified: true }, + tags: ['workers', 'serverless', 'edge'], + }, + }, + { + content: 'Vectorize is a globally distributed vector database.', + metadata: { + category: 'documentation', + language: 'en', + status: 'published', + published_at: 1720310400, // Jul 7, 2024 + published: true, + url: '/docs/vectorize/intro', + author: { id: 'user456', name: 'Jane Smith', verified: true }, + tags: ['vectorize', 'database', 'ai'], + }, + }, + { + content: 'D1 is Cloudflare\'s serverless SQL database.', + metadata: { + category: 'tutorials', + language: 'en', + status: 'draft', + published_at: 1735603200, // Dec 31, 2024 + published: false, + url: '/tutorials/d1/getting-started', + author: { id: 'user123', name: 'John Doe', verified: true }, + tags: ['d1', 'database', 'sql'], + }, + }, + { + content: 'R2 provides S3-compatible object storage without egress fees.', + metadata: { + category: 'guides', + language: 'en', + status: 'published', + published_at: 1712880000, // Apr 12, 2024 + published: true, + url: '/docs/r2/overview', + author: { id: 'user789', name: 'Bob Wilson', verified: false }, + tags: ['r2', 'storage', 'object-storage'], + }, + }, + { + content: 'Workers KV is a key-value store for edge applications.', + metadata: { + category: 'documentation', + language: 'en', + status: 'archived', + published_at: 1640995200, // Jan 1, 2022 + published: true, + url: '/docs/kv/intro', + author: { id: 'user456', name: 'Jane Smith', verified: true }, + tags: ['kv', 'storage', 'edge'], + }, + }, + ]; + + // Generate embeddings + const texts = sampleDocs.map((doc) => doc.content); + const embeddings = await env.AI.run('@cf/baai/bge-base-en-v1.5', { text: texts }); + + // Prepare vectors + const vectors = sampleDocs.map((doc, i) => ({ + id: `sample-${i + 1}`, + values: embeddings.data[i], + metadata: { + content: doc.content, + ...doc.metadata, + indexed_at: Date.now(), + }, + })); + + // Upsert all + await env.VECTORIZE_INDEX.upsert(vectors); + + return Response.json({ + success: true, + message: 'Seeded 5 sample documents with rich metadata', + count: vectors.length, + }, { + headers: { 'Access-Control-Allow-Origin': '*' }, + }); + } catch (error) { + console.error('Seed error:', error); + return Response.json( + { + error: 'Seeding failed', + message: error instanceof Error ? error.message : 'Unknown error', + }, + { status: 500 } + ); + } + } + + // Default: API documentation + return Response.json({ + name: 'Metadata Filtering Examples API', + endpoints: { + 'GET /examples': { + description: 'List all filter examples with syntax', + }, + 'POST /search/filtered': { + description: 'Execute filtered vector search', + body: { + query: 'string (required)', + exampleName: 'string (optional) - use pre-defined filter', + filter: 'object (optional) - custom filter', + namespace: 'string (optional)', + topK: 'number (optional, default: 5)', + }, + example: { + query: 'serverless database', + exampleName: 'Range Query (numbers)', + }, + }, + 'POST /seed': { + description: 'Seed database with example documents', + note: 'Creates 5 sample documents with rich metadata for testing', + }, + }, + filterOperators: { + $eq: 'Equals', + $ne: 'Not equals', + $in: 'In array', + $nin: 'Not in array', + $lt: 'Less than', + $lte: 'Less than or equal', + $gt: 'Greater than', + $gte: 'Greater than or equal', + }, + notes: { + 'Metadata Keys': 'Cannot be empty, contain dots (.), quotes ("), or start with $', + 'Filter Size': 'Max 2048 bytes (compact JSON)', + 'Cardinality': 'High cardinality in range queries can impact performance', + 'Namespace': 'Applied BEFORE metadata filters', + }, + }); + }, +}; + +/** + * Example Usage: + * + * 1. Seed example data: + * + * curl -X POST https://your-worker.workers.dev/seed + * + * 2. List filter examples: + * + * curl https://your-worker.workers.dev/examples + * + * 3. Search with pre-defined filter: + * + * curl -X POST https://your-worker.workers.dev/search/filtered \ + * -H "Content-Type: application/json" \ + * -d '{ + * "query": "database storage", + * "exampleName": "Range Query (numbers)" + * }' + * + * 4. Search with custom filter: + * + * curl -X POST https://your-worker.workers.dev/search/filtered \ + * -H "Content-Type: application/json" \ + * -d '{ + * "query": "edge computing", + * "filter": { + * "category": { "$in": ["docs", "tutorials"] }, + * "language": "en", + * "status": { "$ne": "archived" }, + * "author.verified": true + * }, + * "topK": 3 + * }' + * + * Performance Tips: + * + * 1. Low Cardinality for Range Queries: + * ✅ Good: published_at (timestamps in seconds, not milliseconds) + * ❌ Bad: user_id (millions of unique values in range) + * + * 2. Namespace First: + * Use namespace for partition key (customer_id, tenant_id) + * Then use metadata filters for finer-grained filtering + * + * 3. Filter Size: + * Keep filters under 2048 bytes + * If hitting limit, split into multiple queries + * + * 4. Indexed Metadata: + * Create metadata indexes BEFORE inserting vectors: + * npx wrangler vectorize create-metadata-index my-index \ + * --property-name=category --type=string + */ diff --git a/templates/rag-chat.ts b/templates/rag-chat.ts new file mode 100644 index 0000000..e4afb1b --- /dev/null +++ b/templates/rag-chat.ts @@ -0,0 +1,351 @@ +/** + * RAG (Retrieval Augmented Generation) Chatbot + * with Cloudflare Vectorize + Workers AI + * + * Use case: Q&A chatbot that retrieves relevant context before generating answers + * + * Features: + * - Semantic search over knowledge base + * - Context-aware LLM responses + * - Source citations + * - Conversation history support + * - Streaming responses (optional) + */ + +export interface Env { + VECTORIZE_INDEX: VectorizeIndex; + AI: Ai; +} + +interface ChatRequest { + question: string; + conversationHistory?: Array<{ role: string; content: string }>; + topK?: number; + filter?: Record; + namespace?: string; +} + +interface ChatResponse { + answer: string; + sources: Array<{ + id: string; + title: string; + score: number; + excerpt: string; + }>; + context: string; +} + +export default { + async fetch(request: Request, env: Env, ctx: ExecutionContext): Promise { + // Handle CORS + if (request.method === 'OPTIONS') { + return new Response(null, { + headers: { + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS', + 'Access-Control-Allow-Headers': 'Content-Type', + }, + }); + } + + const url = new URL(request.url); + + // Route: POST /chat - RAG chatbot endpoint + if (url.pathname === '/chat' && request.method === 'POST') { + try { + const body = await request.json() as ChatRequest; + const { + question, + conversationHistory = [], + topK = 3, + filter, + namespace, + } = body; + + if (!question) { + return Response.json( + { error: 'Missing required field: question' }, + { status: 400 } + ); + } + + // Step 1: Generate embedding for user question + const questionEmbedding = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: question, + }); + + // Step 2: Search vector database for relevant context + const searchResults = await env.VECTORIZE_INDEX.query( + questionEmbedding.data[0], + { + topK, + filter, + namespace, + returnMetadata: 'all', + returnValues: false, + } + ); + + // Step 3: Build context from retrieved documents + const contextParts: string[] = []; + const sources: ChatResponse['sources'] = []; + + for (const match of searchResults.matches) { + const metadata = match.metadata || {}; + const title = metadata.title || metadata.id || match.id; + const content = metadata.content || ''; + + // Truncate content for context (max ~500 chars per source) + const excerpt = + content.length > 500 ? content.slice(0, 497) + '...' : content; + + contextParts.push(`[${title}]\n${content}`); + sources.push({ + id: match.id, + title, + score: match.score, + excerpt, + }); + } + + const context = contextParts.join('\n\n---\n\n'); + + // Step 4: Build conversation with context + const messages = [ + { + role: 'system', + content: `You are a helpful AI assistant. Answer questions based on the following context. If the context doesn't contain enough information to answer the question, say so honestly. + +Context: +${context}`, + }, + ...conversationHistory, + { + role: 'user', + content: question, + }, + ]; + + // Step 5: Generate answer with LLM + const aiResponse = await env.AI.run('@cf/meta/llama-3-8b-instruct', { + messages, + }); + + const answer = aiResponse.response || 'Sorry, I could not generate a response.'; + + // Return response with sources + return Response.json({ + answer, + sources, + context: context.slice(0, 1000), // Include truncated context for debugging + } as ChatResponse, { + headers: { 'Access-Control-Allow-Origin': '*' }, + }); + } catch (error) { + console.error('Chat error:', error); + return Response.json( + { + error: 'Chat failed', + message: error instanceof Error ? error.message : 'Unknown error', + }, + { status: 500 } + ); + } + } + + // Route: POST /chat/stream - Streaming RAG responses + if (url.pathname === '/chat/stream' && request.method === 'POST') { + try { + const body = await request.json() as ChatRequest; + const { question, topK = 3, filter, namespace } = body; + + if (!question) { + return Response.json( + { error: 'Missing required field: question' }, + { status: 400 } + ); + } + + // Retrieve context (same as above) + const questionEmbedding = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: question, + }); + + const searchResults = await env.VECTORIZE_INDEX.query( + questionEmbedding.data[0], + { topK, filter, namespace, returnMetadata: 'all', returnValues: false } + ); + + const contextParts = searchResults.matches.map( + (m) => `[${m.metadata?.title || m.id}]\n${m.metadata?.content || ''}` + ); + const context = contextParts.join('\n\n---\n\n'); + + // Stream LLM response + const stream = await env.AI.run('@cf/meta/llama-3-8b-instruct', { + messages: [ + { + role: 'system', + content: `Answer based on context:\n\n${context}`, + }, + { role: 'user', content: question }, + ], + stream: true, + }); + + return new Response(stream, { + headers: { + 'Content-Type': 'text/event-stream', + 'Access-Control-Allow-Origin': '*', + }, + }); + } catch (error) { + console.error('Stream error:', error); + return Response.json( + { + error: 'Streaming failed', + message: error instanceof Error ? error.message : 'Unknown error', + }, + { status: 500 } + ); + } + } + + // Route: POST /ingest - Add knowledge base content + if (url.pathname === '/ingest' && request.method === 'POST') { + try { + const body = await request.json() as { + documents: Array<{ + id: string; + title: string; + content: string; + metadata?: Record; + }>; + namespace?: string; + }; + + if (!body.documents || !Array.isArray(body.documents)) { + return Response.json( + { error: 'Missing or invalid field: documents (array)' }, + { status: 400 } + ); + } + + // Generate embeddings for all documents + const texts = body.documents.map((doc) => doc.content); + const embeddings = await env.AI.run('@cf/baai/bge-base-en-v1.5', { + text: texts, + }); + + // Prepare vectors for upsert + const vectors = body.documents.map((doc, i) => ({ + id: doc.id, + values: embeddings.data[i], + namespace: body.namespace, + metadata: { + title: doc.title, + content: doc.content, + ...doc.metadata, + indexed_at: Date.now(), + }, + })); + + // Batch upsert + await env.VECTORIZE_INDEX.upsert(vectors); + + return Response.json({ + success: true, + count: vectors.length, + message: `Successfully indexed ${vectors.length} documents`, + }, { + headers: { 'Access-Control-Allow-Origin': '*' }, + }); + } catch (error) { + console.error('Ingest error:', error); + return Response.json( + { + error: 'Ingestion failed', + message: error instanceof Error ? error.message : 'Unknown error', + }, + { status: 500 } + ); + } + } + + // Default: API documentation + return Response.json({ + name: 'RAG Chatbot API', + endpoints: { + 'POST /chat': { + description: 'Ask questions with context retrieval', + body: { + question: 'string (required)', + conversationHistory: 'array (optional)', + topK: 'number (optional, default: 3)', + filter: 'object (optional)', + namespace: 'string (optional)', + }, + example: { + question: 'How do I deploy a Cloudflare Worker?', + topK: 3, + filter: { category: 'documentation' }, + }, + }, + 'POST /chat/stream': { + description: 'Streaming responses', + body: 'Same as /chat', + }, + 'POST /ingest': { + description: 'Add documents to knowledge base', + body: { + documents: [ + { + id: 'doc-1', + title: 'Document Title', + content: 'Document content...', + metadata: { category: 'docs' }, + }, + ], + namespace: 'string (optional)', + }, + }, + }, + }); + }, +}; + +/** + * Example Usage: + * + * 1. Ingest knowledge base: + * + * curl -X POST https://your-worker.workers.dev/ingest \ + * -H "Content-Type: application/json" \ + * -d '{ + * "documents": [ + * { + * "id": "workers-intro", + * "title": "Introduction to Workers", + * "content": "Cloudflare Workers allow you to deploy serverless code globally...", + * "metadata": { "category": "docs", "section": "workers" } + * } + * ] + * }' + * + * 2. Ask a question: + * + * curl -X POST https://your-worker.workers.dev/chat \ + * -H "Content-Type: application/json" \ + * -d '{ + * "question": "How do I deploy serverless code?", + * "topK": 3, + * "filter": { "category": "docs" } + * }' + * + * 3. Streaming response: + * + * curl -X POST https://your-worker.workers.dev/chat/stream \ + * -H "Content-Type: application/json" \ + * -d '{ "question": "What is a Worker?" }' + */