/** * Complete RAG Implementation with Gemini Embeddings + Cloudflare Vectorize * * Demonstrates end-to-end RAG (Retrieval Augmented Generation): * 1. Document ingestion (chunking + embedding + storage) * 2. Query processing (embedding + vector search) * 3. Response generation (context + LLM) * * Setup: * 1. Create Vectorize index: * npx wrangler vectorize create gemini-embeddings --dimensions 768 --metric cosine * * 2. Add to wrangler.jsonc: * { * "vectorize": { * "bindings": [{ * "binding": "VECTORIZE", * "index_name": "gemini-embeddings" * }] * } * } * * 3. Set secret: * npx wrangler secret put GEMINI_API_KEY * * 4. Deploy: * npx wrangler deploy * * Usage: * POST /ingest - Upload documents * POST /query - Ask questions * GET /health - Check status */ interface Env { GEMINI_API_KEY: string; VECTORIZE: VectorizeIndex; } interface VectorizeVector { id: string; values: number[]; metadata?: Record; } interface VectorizeMatch { id: string; score: number; metadata?: Record; } interface VectorizeIndex { insert(vectors: VectorizeVector[]): Promise<{ count: number }>; query( vector: number[], options: { topK: number; returnMetadata?: boolean } ): Promise<{ matches: VectorizeMatch[] }>; getByIds(ids: string[]): Promise; deleteByIds(ids: string[]): Promise<{ count: number }>; } /** * Document chunking for better retrieval */ function chunkDocument( text: string, chunkSize: number = 500, overlap: number = 50 ): string[] { const words = text.split(/\s+/); const chunks: string[] = []; for (let i = 0; i < words.length; i += chunkSize - overlap) { const chunk = words.slice(i, i + chunkSize).join(' '); if (chunk.trim().length > 0) { chunks.push(chunk.trim()); } } return chunks; } /** * Generate embedding using Gemini API */ async function generateEmbedding( text: string, apiKey: string, taskType: string = 'RETRIEVAL_DOCUMENT' ): Promise { const response = await fetch( 'https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-001:embedContent', { method: 'POST', headers: { 'x-goog-api-key': apiKey, 'Content-Type': 'application/json' }, body: JSON.stringify({ content: { parts: [{ text }] }, taskType, outputDimensionality: 768 // MUST match Vectorize index dimensions }) } ); if (!response.ok) { const error = await response.text(); throw new Error(`Embedding API error: ${response.status} - ${error}`); } const data = await response.json<{ embedding: { values: number[] } }>(); return data.embedding.values; } /** * Generate response using Gemini API */ async function generateResponse( context: string, query: string, apiKey: string ): Promise { const response = await fetch( 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent', { method: 'POST', headers: { 'x-goog-api-key': apiKey, 'Content-Type': 'application/json' }, body: JSON.stringify({ contents: [{ parts: [{ text: `You are a helpful assistant. Answer the question based ONLY on the provided context. Context: ${context} Question: ${query} Answer:` }] }] }) } ); if (!response.ok) { const error = await response.text(); throw new Error(`Generation API error: ${response.status} - ${error}`); } const data = await response.json<{ candidates: Array<{ content: { parts: Array<{ text: string }> }; }>; }>(); return data.candidates[0]?.content?.parts[0]?.text || 'No response generated'; } export default { async fetch(request: Request, env: Env): Promise { const url = new URL(request.url); const corsHeaders = { 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS', 'Access-Control-Allow-Headers': 'Content-Type' }; if (request.method === 'OPTIONS') { return new Response(null, { headers: corsHeaders }); } try { // Health check if (url.pathname === '/health') { return new Response(JSON.stringify({ status: 'ok', vectorize: 'connected', gemini: 'ready' }), { headers: { 'Content-Type': 'application/json', ...corsHeaders } }); } // Document ingestion if (url.pathname === '/ingest' && request.method === 'POST') { const { documents } = await request.json<{ documents: Array<{ id: string; text: string }> }>(); if (!documents || !Array.isArray(documents)) { return new Response(JSON.stringify({ error: 'Invalid request: documents array required' }), { status: 400, headers: { 'Content-Type': 'application/json', ...corsHeaders } }); } console.log(`📥 Ingesting ${documents.length} documents...`); const vectors: VectorizeVector[] = []; for (const doc of documents) { // Chunk document const chunks = chunkDocument(doc.text, 500, 50); console.log(`📄 Document ${doc.id}: ${chunks.length} chunks`); // Generate embeddings for each chunk for (let i = 0; i < chunks.length; i++) { const embedding = await generateEmbedding( chunks[i], env.GEMINI_API_KEY, 'RETRIEVAL_DOCUMENT' // ← Documents for indexing ); vectors.push({ id: `${doc.id}-chunk-${i}`, values: embedding, metadata: { documentId: doc.id, chunkIndex: i, text: chunks[i], timestamp: Date.now() } }); } } // Insert into Vectorize const result = await env.VECTORIZE.insert(vectors); console.log(`✅ Ingested ${result.count} vectors`); return new Response(JSON.stringify({ success: true, documentsProcessed: documents.length, chunksCreated: vectors.length, vectorsInserted: result.count }), { headers: { 'Content-Type': 'application/json', ...corsHeaders } }); } // Query processing (RAG) if (url.pathname === '/query' && request.method === 'POST') { const { query, topK = 5 } = await request.json<{ query: string; topK?: number }>(); if (!query) { return new Response(JSON.stringify({ error: 'Invalid request: query required' }), { status: 400, headers: { 'Content-Type': 'application/json', ...corsHeaders } }); } console.log(`🔍 Query: "${query}"`); // 1. Generate query embedding const queryEmbedding = await generateEmbedding( query, env.GEMINI_API_KEY, 'RETRIEVAL_QUERY' // ← Query, not document ); // 2. Search Vectorize for similar chunks const results = await env.VECTORIZE.query(queryEmbedding, { topK, returnMetadata: true }); if (results.matches.length === 0) { return new Response(JSON.stringify({ success: true, answer: 'No relevant information found in the knowledge base.', sources: [] }), { headers: { 'Content-Type': 'application/json', ...corsHeaders } }); } console.log(`📚 Found ${results.matches.length} relevant chunks`); // 3. Extract context from top matches const context = results.matches .map((match, i) => `[${i + 1}] ${match.metadata?.text || ''}`) .join('\n\n'); // 4. Generate response with context const answer = await generateResponse(context, query, env.GEMINI_API_KEY); return new Response(JSON.stringify({ success: true, query, answer, sources: results.matches.map(match => ({ documentId: match.metadata?.documentId, chunkIndex: match.metadata?.chunkIndex, similarity: match.score, text: match.metadata?.text })) }, null, 2), { headers: { 'Content-Type': 'application/json', ...corsHeaders } }); } // 404 for unknown routes return new Response(JSON.stringify({ error: 'Not found', routes: { 'POST /ingest': 'Upload documents', 'POST /query': 'Ask questions', 'GET /health': 'Health check' } }), { status: 404, headers: { 'Content-Type': 'application/json', ...corsHeaders } }); } catch (error: any) { console.error('❌ Error:', error.message); return new Response(JSON.stringify({ success: false, error: error.message }), { status: 500, headers: { 'Content-Type': 'application/json', ...corsHeaders } }); } } }; /** * Example requests: * * 1. Ingest documents: * curl -X POST https://your-worker.workers.dev/ingest \ * -H "Content-Type: application/json" \ * -d '{ * "documents": [ * { * "id": "doc1", * "text": "Paris is the capital of France. It is known for the Eiffel Tower..." * }, * { * "id": "doc2", * "text": "Machine learning is a subset of artificial intelligence..." * } * ] * }' * * 2. Query: * curl -X POST https://your-worker.workers.dev/query \ * -H "Content-Type: application/json" \ * -d '{ * "query": "What is the capital of France?", * "topK": 5 * }' * * 3. Health check: * curl https://your-worker.workers.dev/health */