Initial commit

2025-11-30 08:24:54 +08:00
commit 7927519669
17 changed files with 4377 additions and 0 deletions
--- a/templates/rag-with-vectorize.ts
+++ b/templates/rag-with-vectorize.ts
@@ -0,0 +1,361 @@
+/**
+ * Complete RAG Implementation with Gemini Embeddings + Cloudflare Vectorize
+ *
+ * Demonstrates end-to-end RAG (Retrieval Augmented Generation):
+ * 1. Document ingestion (chunking + embedding + storage)
+ * 2. Query processing (embedding + vector search)
+ * 3. Response generation (context + LLM)
+ *
+ * Setup:
+ * 1. Create Vectorize index:
+ *    npx wrangler vectorize create gemini-embeddings --dimensions 768 --metric cosine
+ *
+ * 2. Add to wrangler.jsonc:
+ *    {
+ *      "vectorize": {
+ *        "bindings": [{
+ *          "binding": "VECTORIZE",
+ *          "index_name": "gemini-embeddings"
+ *        }]
+ *      }
+ *    }
+ *
+ * 3. Set secret:
+ *    npx wrangler secret put GEMINI_API_KEY
+ *
+ * 4. Deploy:
+ *    npx wrangler deploy
+ *
+ * Usage:
+ * POST /ingest - Upload documents
+ * POST /query - Ask questions
+ * GET /health - Check status
+ */
+
+interface Env {
+  GEMINI_API_KEY: string;
+  VECTORIZE: VectorizeIndex;
+}
+
+interface VectorizeVector {
+  id: string;
+  values: number[];
+  metadata?: Record<string, any>;
+}
+
+interface VectorizeMatch {
+  id: string;
+  score: number;
+  metadata?: Record<string, any>;
+}
+
+interface VectorizeIndex {
+  insert(vectors: VectorizeVector[]): Promise<{ count: number }>;
+  query(
+    vector: number[],
+    options: { topK: number; returnMetadata?: boolean }
+  ): Promise<{ matches: VectorizeMatch[] }>;
+  getByIds(ids: string[]): Promise<VectorizeVector[]>;
+  deleteByIds(ids: string[]): Promise<{ count: number }>;
+}
+
+/**
+ * Document chunking for better retrieval
+ */
+function chunkDocument(
+  text: string,
+  chunkSize: number = 500,
+  overlap: number = 50
+): string[] {
+  const words = text.split(/\s+/);
+  const chunks: string[] = [];
+
+  for (let i = 0; i < words.length; i += chunkSize - overlap) {
+    const chunk = words.slice(i, i + chunkSize).join(' ');
+    if (chunk.trim().length > 0) {
+      chunks.push(chunk.trim());
+    }
+  }
+
+  return chunks;
+}
+
+/**
+ * Generate embedding using Gemini API
+ */
+async function generateEmbedding(
+  text: string,
+  apiKey: string,
+  taskType: string = 'RETRIEVAL_DOCUMENT'
+): Promise<number[]> {
+  const response = await fetch(
+    'https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-001:embedContent',
+    {
+      method: 'POST',
+      headers: {
+        'x-goog-api-key': apiKey,
+        'Content-Type': 'application/json'
+      },
+      body: JSON.stringify({
+        content: { parts: [{ text }] },
+        taskType,
+        outputDimensionality: 768 // MUST match Vectorize index dimensions
+      })
+    }
+  );
+
+  if (!response.ok) {
+    const error = await response.text();
+    throw new Error(`Embedding API error: ${response.status} - ${error}`);
+  }
+
+  const data = await response.json<{ embedding: { values: number[] } }>();
+  return data.embedding.values;
+}
+
+/**
+ * Generate response using Gemini API
+ */
+async function generateResponse(
+  context: string,
+  query: string,
+  apiKey: string
+): Promise<string> {
+  const response = await fetch(
+    'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent',
+    {
+      method: 'POST',
+      headers: {
+        'x-goog-api-key': apiKey,
+        'Content-Type': 'application/json'
+      },
+      body: JSON.stringify({
+        contents: [{
+          parts: [{
+            text: `You are a helpful assistant. Answer the question based ONLY on the provided context.
+
+Context:
+${context}
+
+Question: ${query}
+
+Answer:`
+          }]
+        }]
+      })
+    }
+  );
+
+  if (!response.ok) {
+    const error = await response.text();
+    throw new Error(`Generation API error: ${response.status} - ${error}`);
+  }
+
+  const data = await response.json<{
+    candidates: Array<{
+      content: { parts: Array<{ text: string }> };
+    }>;
+  }>();
+
+  return data.candidates[0]?.content?.parts[0]?.text || 'No response generated';
+}
+
+export default {
+  async fetch(request: Request, env: Env): Promise<Response> {
+    const url = new URL(request.url);
+    const corsHeaders = {
+      'Access-Control-Allow-Origin': '*',
+      'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
+      'Access-Control-Allow-Headers': 'Content-Type'
+    };
+
+    if (request.method === 'OPTIONS') {
+      return new Response(null, { headers: corsHeaders });
+    }
+
+    try {
+      // Health check
+      if (url.pathname === '/health') {
+        return new Response(JSON.stringify({
+          status: 'ok',
+          vectorize: 'connected',
+          gemini: 'ready'
+        }), {
+          headers: { 'Content-Type': 'application/json', ...corsHeaders }
+        });
+      }
+
+      // Document ingestion
+      if (url.pathname === '/ingest' && request.method === 'POST') {
+        const { documents } = await request.json<{ documents: Array<{ id: string; text: string }> }>();
+
+        if (!documents || !Array.isArray(documents)) {
+          return new Response(JSON.stringify({ error: 'Invalid request: documents array required' }), {
+            status: 400,
+            headers: { 'Content-Type': 'application/json', ...corsHeaders }
+          });
+        }
+
+        console.log(`📥 Ingesting ${documents.length} documents...`);
+
+        const vectors: VectorizeVector[] = [];
+
+        for (const doc of documents) {
+          // Chunk document
+          const chunks = chunkDocument(doc.text, 500, 50);
+          console.log(`📄 Document ${doc.id}: ${chunks.length} chunks`);
+
+          // Generate embeddings for each chunk
+          for (let i = 0; i < chunks.length; i++) {
+            const embedding = await generateEmbedding(
+              chunks[i],
+              env.GEMINI_API_KEY,
+              'RETRIEVAL_DOCUMENT' // ← Documents for indexing
+            );
+
+            vectors.push({
+              id: `${doc.id}-chunk-${i}`,
+              values: embedding,
+              metadata: {
+                documentId: doc.id,
+                chunkIndex: i,
+                text: chunks[i],
+                timestamp: Date.now()
+              }
+            });
+          }
+        }
+
+        // Insert into Vectorize
+        const result = await env.VECTORIZE.insert(vectors);
+
+        console.log(`✅ Ingested ${result.count} vectors`);
+
+        return new Response(JSON.stringify({
+          success: true,
+          documentsProcessed: documents.length,
+          chunksCreated: vectors.length,
+          vectorsInserted: result.count
+        }), {
+          headers: { 'Content-Type': 'application/json', ...corsHeaders }
+        });
+      }
+
+      // Query processing (RAG)
+      if (url.pathname === '/query' && request.method === 'POST') {
+        const { query, topK = 5 } = await request.json<{ query: string; topK?: number }>();
+
+        if (!query) {
+          return new Response(JSON.stringify({ error: 'Invalid request: query required' }), {
+            status: 400,
+            headers: { 'Content-Type': 'application/json', ...corsHeaders }
+          });
+        }
+
+        console.log(`🔍 Query: "${query}"`);
+
+        // 1. Generate query embedding
+        const queryEmbedding = await generateEmbedding(
+          query,
+          env.GEMINI_API_KEY,
+          'RETRIEVAL_QUERY' // ← Query, not document
+        );
+
+        // 2. Search Vectorize for similar chunks
+        const results = await env.VECTORIZE.query(queryEmbedding, {
+          topK,
+          returnMetadata: true
+        });
+
+        if (results.matches.length === 0) {
+          return new Response(JSON.stringify({
+            success: true,
+            answer: 'No relevant information found in the knowledge base.',
+            sources: []
+          }), {
+            headers: { 'Content-Type': 'application/json', ...corsHeaders }
+          });
+        }
+
+        console.log(`📚 Found ${results.matches.length} relevant chunks`);
+
+        // 3. Extract context from top matches
+        const context = results.matches
+          .map((match, i) => `[${i + 1}] ${match.metadata?.text || ''}`)
+          .join('\n\n');
+
+        // 4. Generate response with context
+        const answer = await generateResponse(context, query, env.GEMINI_API_KEY);
+
+        return new Response(JSON.stringify({
+          success: true,
+          query,
+          answer,
+          sources: results.matches.map(match => ({
+            documentId: match.metadata?.documentId,
+            chunkIndex: match.metadata?.chunkIndex,
+            similarity: match.score,
+            text: match.metadata?.text
+          }))
+        }, null, 2), {
+          headers: { 'Content-Type': 'application/json', ...corsHeaders }
+        });
+      }
+
+      // 404 for unknown routes
+      return new Response(JSON.stringify({
+        error: 'Not found',
+        routes: {
+          'POST /ingest': 'Upload documents',
+          'POST /query': 'Ask questions',
+          'GET /health': 'Health check'
+        }
+      }), {
+        status: 404,
+        headers: { 'Content-Type': 'application/json', ...corsHeaders }
+      });
+
+    } catch (error: any) {
+      console.error('❌ Error:', error.message);
+
+      return new Response(JSON.stringify({
+        success: false,
+        error: error.message
+      }), {
+        status: 500,
+        headers: { 'Content-Type': 'application/json', ...corsHeaders }
+      });
+    }
+  }
+};
+
+/**
+ * Example requests:
+ *
+ * 1. Ingest documents:
+ * curl -X POST https://your-worker.workers.dev/ingest \
+ *   -H "Content-Type: application/json" \
+ *   -d '{
+ *     "documents": [
+ *       {
+ *         "id": "doc1",
+ *         "text": "Paris is the capital of France. It is known for the Eiffel Tower..."
+ *       },
+ *       {
+ *         "id": "doc2",
+ *         "text": "Machine learning is a subset of artificial intelligence..."
+ *       }
+ *     ]
+ *   }'
+ *
+ * 2. Query:
+ * curl -X POST https://your-worker.workers.dev/query \
+ *   -H "Content-Type: application/json" \
+ *   -d '{
+ *     "query": "What is the capital of France?",
+ *     "topK": 5
+ *   }'
+ *
+ * 3. Health check:
+ * curl https://your-worker.workers.dev/health
+ */