/** * Cloudflare Workers AI - Embeddings & RAG Examples * * This template demonstrates: * - Generating text embeddings with BGE models * - Storing embeddings in Vectorize * - Semantic search with vector similarity * - Complete RAG (Retrieval Augmented Generation) pattern * - Document chunking strategies */ import { Hono } from 'hono'; type Bindings = { AI: Ai; VECTORIZE: Vectorize; DB?: D1Database; }; const app = new Hono<{ Bindings: Bindings }>(); // ============================================================================ // Generate Embeddings // ============================================================================ /** * Generate embeddings for text * BGE-base: 768 dimensions, good balance * BGE-large: 1024 dimensions, higher accuracy * BGE-small: 384 dimensions, faster/smaller */ app.post('/embeddings', async (c) => { try { const { text } = await c.req.json<{ text: string | string[] }>(); const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', { text: Array.isArray(text) ? text : [text], }); return c.json({ success: true, shape: embeddings.shape, // [batch_size, dimensions] data: embeddings.data, // Array of vectors }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Batch Embeddings // ============================================================================ /** * Generate embeddings for multiple texts in one request * More efficient than individual requests */ app.post('/embeddings/batch', async (c) => { try { const { texts } = await c.req.json<{ texts: string[] }>(); if (!texts || texts.length === 0) { return c.json({ error: 'texts array is required' }, 400); } const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', { text: texts, }); return c.json({ success: true, count: texts.length, shape: embeddings.shape, embeddings: embeddings.data, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Store Embeddings in Vectorize // ============================================================================ app.post('/documents', async (c) => { try { const { id, text, metadata } = await c.req.json<{ id: string; text: string; metadata?: Record; }>(); // Generate embedding const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', { text: [text], }); const vector = embeddings.data[0]; // Store in Vectorize await c.env.VECTORIZE.upsert([ { id, values: vector, metadata: { text, ...metadata, createdAt: Date.now(), }, }, ]); return c.json({ success: true, message: 'Document indexed', id, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Semantic Search // ============================================================================ app.post('/search', async (c) => { try { const { query, topK = 5 } = await c.req.json<{ query: string; topK?: number; }>(); // Convert query to embedding const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', { text: [query], }); const vector = embeddings.data[0]; // Search Vectorize const results = await c.env.VECTORIZE.query(vector, { topK, returnMetadata: true, }); return c.json({ success: true, query, results: results.matches.map((match) => ({ id: match.id, score: match.score, text: match.metadata?.text, metadata: match.metadata, })), }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // RAG Pattern: Query with Context // ============================================================================ app.post('/rag/ask', async (c) => { try { const { question, topK = 3 } = await c.req.json<{ question: string; topK?: number; }>(); // Step 1: Convert question to embedding const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', { text: [question], }); const vector = embeddings.data[0]; // Step 2: Find relevant documents const results = await c.env.VECTORIZE.query(vector, { topK, returnMetadata: true, }); // Step 3: Build context from matches const context = results.matches .map((match) => match.metadata?.text) .filter(Boolean) .join('\n\n'); // Step 4: Generate answer with context const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', { messages: [ { role: 'system', content: `Answer the question using ONLY the following context. If the context doesn't contain relevant information, say "I don't have enough information to answer that."\n\nContext:\n${context}`, }, { role: 'user', content: question, }, ], stream: true, }); return new Response(stream, { headers: { 'content-type': 'text/event-stream', 'x-sources': JSON.stringify( results.matches.map((m) => ({ id: m.id, score: m.score })) ), }, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Document Chunking // ============================================================================ /** * Split long documents into chunks for better embedding quality * Recommended: 200-500 tokens per chunk */ function chunkText(text: string, chunkSize = 500, overlap = 50): string[] { const words = text.split(/\s+/); const chunks: string[] = []; for (let i = 0; i < words.length; i += chunkSize - overlap) { const chunk = words.slice(i, i + chunkSize).join(' '); chunks.push(chunk); } return chunks; } app.post('/documents/long', async (c) => { try { const { id, text, chunkSize = 500 } = await c.req.json<{ id: string; text: string; chunkSize?: number; }>(); // Split into chunks const chunks = chunkText(text, chunkSize); // Generate embeddings for all chunks const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', { text: chunks, }); // Store each chunk in Vectorize const vectors = chunks.map((chunk, index) => ({ id: `${id}-chunk-${index}`, values: embeddings.data[index], metadata: { documentId: id, chunkIndex: index, text: chunk, totalChunks: chunks.length, }, })); await c.env.VECTORIZE.upsert(vectors); return c.json({ success: true, message: 'Document indexed with chunks', documentId: id, chunks: chunks.length, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // RAG with Citations // ============================================================================ app.post('/rag/ask-with-citations', async (c) => { try { const { question } = await c.req.json<{ question: string }>(); // Find relevant chunks const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', { text: [question], }); const results = await c.env.VECTORIZE.query(embeddings.data[0], { topK: 5, returnMetadata: true, }); // Build context with citations const context = results.matches .map( (match, i) => `[Source ${i + 1}] ${match.metadata?.text} (Relevance: ${(match.score * 100).toFixed(1)}%)` ) .join('\n\n'); // Generate answer const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', { messages: [ { role: 'system', content: `Answer the question using the provided sources. Cite sources using [Source N] format. ${context}`, }, { role: 'user', content: question, }, ], }); return c.json({ success: true, answer: response.response, sources: results.matches.map((m, i) => ({ id: i + 1, documentId: m.metadata?.documentId, text: m.metadata?.text, score: m.score, })), }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Hybrid Search (Keyword + Semantic) // ============================================================================ /** * Combine keyword search (D1) with semantic search (Vectorize) * for better recall */ app.post('/search/hybrid', async (c) => { try { const { query } = await c.req.json<{ query: string }>(); // Semantic search const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', { text: [query], }); const vectorResults = await c.env.VECTORIZE.query(embeddings.data[0], { topK: 5, returnMetadata: true, }); // Keyword search (if D1 available) let keywordResults: any[] = []; if (c.env.DB) { const { results } = await c.env.DB.prepare( 'SELECT id, text FROM documents WHERE text LIKE ? LIMIT 5' ) .bind(`%${query}%`) .all(); keywordResults = results || []; } // Combine and deduplicate const combined = [ ...vectorResults.matches.map((m) => ({ id: m.id, text: m.metadata?.text, score: m.score, source: 'vector', })), ...keywordResults.map((r) => ({ id: r.id, text: r.text, score: 1.0, source: 'keyword', })), ]; // Deduplicate by ID const unique = Array.from(new Map(combined.map((item) => [item.id, item])).values()); return c.json({ success: true, query, results: unique, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Delete Documents // ============================================================================ app.delete('/documents/:id', async (c) => { try { const id = c.req.param('id'); // Delete from Vectorize await c.env.VECTORIZE.deleteByIds([id]); return c.json({ success: true, message: 'Document deleted', id, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Health Check // ============================================================================ app.get('/health', (c) => { return c.json({ status: 'ok', timestamp: new Date().toISOString(), }); }); export default app;