gh-jezweb-claude-skills-ski…/templates/rag-with-vectorize.ts

/**
 * Complete RAG Implementation with Gemini Embeddings + Cloudflare Vectorize
 *
 * Demonstrates end-to-end RAG (Retrieval Augmented Generation):
 * 1. Document ingestion (chunking + embedding + storage)
 * 2. Query processing (embedding + vector search)
 * 3. Response generation (context + LLM)
 *
 * Setup:
 * 1. Create Vectorize index:
 *    npx wrangler vectorize create gemini-embeddings --dimensions 768 --metric cosine
 *
 * 2. Add to wrangler.jsonc:
 *    {
 *      "vectorize": {
 *        "bindings": [{
 *          "binding": "VECTORIZE",
 *          "index_name": "gemini-embeddings"
 *        }]
 *      }
 *    }
 *
 * 3. Set secret:
 *    npx wrangler secret put GEMINI_API_KEY
 *
 * 4. Deploy:
 *    npx wrangler deploy
 *
 * Usage:
 * POST /ingest - Upload documents
 * POST /query - Ask questions
 * GET /health - Check status
 */

interface Env {
  GEMINI_API_KEY: string;
  VECTORIZE: VectorizeIndex;
}

interface VectorizeVector {
  id: string;
  values: number[];
  metadata?: Record<string, any>;
}

interface VectorizeMatch {
  id: string;
  score: number;
  metadata?: Record<string, any>;
}

interface VectorizeIndex {
  insert(vectors: VectorizeVector[]): Promise<{ count: number }>;
  query(
    vector: number[],
    options: { topK: number; returnMetadata?: boolean }
  ): Promise<{ matches: VectorizeMatch[] }>;
  getByIds(ids: string[]): Promise<VectorizeVector[]>;
  deleteByIds(ids: string[]): Promise<{ count: number }>;
}

/**
 * Document chunking for better retrieval
 */
function chunkDocument(
  text: string,
  chunkSize: number = 500,
  overlap: number = 50
): string[] {
  const words = text.split(/\s+/);
  const chunks: string[] = [];

  for (let i = 0; i < words.length; i += chunkSize - overlap) {
    const chunk = words.slice(i, i + chunkSize).join(' ');
    if (chunk.trim().length > 0) {
      chunks.push(chunk.trim());
    }
  }

  return chunks;
}

/**
 * Generate embedding using Gemini API
 */
async function generateEmbedding(
  text: string,
  apiKey: string,
  taskType: string = 'RETRIEVAL_DOCUMENT'
): Promise<number[]> {
  const response = await fetch(
    'https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-001:embedContent',
    {
      method: 'POST',
      headers: {
        'x-goog-api-key': apiKey,
        'Content-Type': 'application/json'
      },
      body: JSON.stringify({
        content: { parts: [{ text }] },
        taskType,
        outputDimensionality: 768 // MUST match Vectorize index dimensions
      })
    }
  );

  if (!response.ok) {
    const error = await response.text();
    throw new Error(`Embedding API error: ${response.status} - ${error}`);
  }

  const data = await response.json<{ embedding: { values: number[] } }>();
  return data.embedding.values;
}

/**
 * Generate response using Gemini API
 */
async function generateResponse(
  context: string,
  query: string,
  apiKey: string
): Promise<string> {
  const response = await fetch(
    'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent',
    {
      method: 'POST',
      headers: {
        'x-goog-api-key': apiKey,
        'Content-Type': 'application/json'
      },
      body: JSON.stringify({
        contents: [{
          parts: [{
            text: `You are a helpful assistant. Answer the question based ONLY on the provided context.

Context:
${context}

Question: ${query}

Answer:`
          }]
        }]
      })
    }
  );

  if (!response.ok) {
    const error = await response.text();
    throw new Error(`Generation API error: ${response.status} - ${error}`);
  }

  const data = await response.json<{
    candidates: Array<{
      content: { parts: Array<{ text: string }> };
    }>;
  }>();

  return data.candidates[0]?.content?.parts[0]?.text || 'No response generated';
}

export default {
  async fetch(request: Request, env: Env): Promise<Response> {
    const url = new URL(request.url);
    const corsHeaders = {
      'Access-Control-Allow-Origin': '*',
      'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
      'Access-Control-Allow-Headers': 'Content-Type'
    };

    if (request.method === 'OPTIONS') {
      return new Response(null, { headers: corsHeaders });
    }

    try {
      // Health check
      if (url.pathname === '/health') {
        return new Response(JSON.stringify({
          status: 'ok',
          vectorize: 'connected',
          gemini: 'ready'
        }), {
          headers: { 'Content-Type': 'application/json', ...corsHeaders }
        });
      }

      // Document ingestion
      if (url.pathname === '/ingest' && request.method === 'POST') {
        const { documents } = await request.json<{ documents: Array<{ id: string; text: string }> }>();

        if (!documents || !Array.isArray(documents)) {
          return new Response(JSON.stringify({ error: 'Invalid request: documents array required' }), {
            status: 400,
            headers: { 'Content-Type': 'application/json', ...corsHeaders }
          });
        }

        console.log(`📥 Ingesting ${documents.length} documents...`);

        const vectors: VectorizeVector[] = [];

        for (const doc of documents) {
          // Chunk document
          const chunks = chunkDocument(doc.text, 500, 50);
          console.log(`📄 Document ${doc.id}: ${chunks.length} chunks`);

          // Generate embeddings for each chunk
          for (let i = 0; i < chunks.length; i++) {
            const embedding = await generateEmbedding(
              chunks[i],
              env.GEMINI_API_KEY,
              'RETRIEVAL_DOCUMENT' // ← Documents for indexing
            );

            vectors.push({
              id: `${doc.id}-chunk-${i}`,
              values: embedding,
              metadata: {
                documentId: doc.id,
                chunkIndex: i,
                text: chunks[i],
                timestamp: Date.now()
              }
            });
          }
        }

        // Insert into Vectorize
        const result = await env.VECTORIZE.insert(vectors);

        console.log(`✅ Ingested ${result.count} vectors`);

        return new Response(JSON.stringify({
          success: true,
          documentsProcessed: documents.length,
          chunksCreated: vectors.length,
          vectorsInserted: result.count
        }), {
          headers: { 'Content-Type': 'application/json', ...corsHeaders }
        });
      }

      // Query processing (RAG)
      if (url.pathname === '/query' && request.method === 'POST') {
        const { query, topK = 5 } = await request.json<{ query: string; topK?: number }>();

        if (!query) {
          return new Response(JSON.stringify({ error: 'Invalid request: query required' }), {
            status: 400,
            headers: { 'Content-Type': 'application/json', ...corsHeaders }
          });
        }

        console.log(`🔍 Query: "${query}"`);

        // 1. Generate query embedding
        const queryEmbedding = await generateEmbedding(
          query,
          env.GEMINI_API_KEY,
          'RETRIEVAL_QUERY' // ← Query, not document
        );

        // 2. Search Vectorize for similar chunks
        const results = await env.VECTORIZE.query(queryEmbedding, {
          topK,
          returnMetadata: true
        });

        if (results.matches.length === 0) {
          return new Response(JSON.stringify({
            success: true,
            answer: 'No relevant information found in the knowledge base.',
            sources: []
          }), {
            headers: { 'Content-Type': 'application/json', ...corsHeaders }
          });
        }

        console.log(`📚 Found ${results.matches.length} relevant chunks`);

        // 3. Extract context from top matches
        const context = results.matches
          .map((match, i) => `[${i + 1}] ${match.metadata?.text || ''}`)
          .join('\n\n');

        // 4. Generate response with context
        const answer = await generateResponse(context, query, env.GEMINI_API_KEY);

        return new Response(JSON.stringify({
          success: true,
          query,
          answer,
          sources: results.matches.map(match => ({
            documentId: match.metadata?.documentId,
            chunkIndex: match.metadata?.chunkIndex,
            similarity: match.score,
            text: match.metadata?.text
          }))
        }, null, 2), {
          headers: { 'Content-Type': 'application/json', ...corsHeaders }
        });
      }

      // 404 for unknown routes
      return new Response(JSON.stringify({
        error: 'Not found',
        routes: {
          'POST /ingest': 'Upload documents',
          'POST /query': 'Ask questions',
          'GET /health': 'Health check'
        }
      }), {
        status: 404,
        headers: { 'Content-Type': 'application/json', ...corsHeaders }
      });

    } catch (error: any) {
      console.error('❌ Error:', error.message);

      return new Response(JSON.stringify({
        success: false,
        error: error.message
      }), {
        status: 500,
        headers: { 'Content-Type': 'application/json', ...corsHeaders }
      });
    }
  }
};

/**
 * Example requests:
 *
 * 1. Ingest documents:
 * curl -X POST https://your-worker.workers.dev/ingest \
 *   -H "Content-Type: application/json" \
 *   -d '{
 *     "documents": [
 *       {
 *         "id": "doc1",
 *         "text": "Paris is the capital of France. It is known for the Eiffel Tower..."
 *       },
 *       {
 *         "id": "doc2",
 *         "text": "Machine learning is a subset of artificial intelligence..."
 *       }
 *     ]
 *   }'
 *
 * 2. Query:
 * curl -X POST https://your-worker.workers.dev/query \
 *   -H "Content-Type: application/json" \
 *   -d '{
 *     "query": "What is the capital of France?",
 *     "topK": 5
 *   }'
 *
 * 3. Health check:
 * curl https://your-worker.workers.dev/health
 */