Initial commit

2025-11-30 08:24:38 +08:00
commit b41966ed51
12 changed files with 3508 additions and 0 deletions
--- a/templates/ai-embeddings-rag.ts
+++ b/templates/ai-embeddings-rag.ts
@@ -0,0 +1,491 @@
+/**
+ * Cloudflare Workers AI - Embeddings & RAG Examples
+ *
+ * This template demonstrates:
+ * - Generating text embeddings with BGE models
+ * - Storing embeddings in Vectorize
+ * - Semantic search with vector similarity
+ * - Complete RAG (Retrieval Augmented Generation) pattern
+ * - Document chunking strategies
+ */
+
+import { Hono } from 'hono';
+
+type Bindings = {
+  AI: Ai;
+  VECTORIZE: Vectorize;
+  DB?: D1Database;
+};
+
+const app = new Hono<{ Bindings: Bindings }>();
+
+// ============================================================================
+// Generate Embeddings
+// ============================================================================
+
+/**
+ * Generate embeddings for text
+ * BGE-base: 768 dimensions, good balance
+ * BGE-large: 1024 dimensions, higher accuracy
+ * BGE-small: 384 dimensions, faster/smaller
+ */
+
+app.post('/embeddings', async (c) => {
+  try {
+    const { text } = await c.req.json<{ text: string | string[] }>();
+
+    const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
+      text: Array.isArray(text) ? text : [text],
+    });
+
+    return c.json({
+      success: true,
+      shape: embeddings.shape, // [batch_size, dimensions]
+      data: embeddings.data, // Array of vectors
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Batch Embeddings
+// ============================================================================
+
+/**
+ * Generate embeddings for multiple texts in one request
+ * More efficient than individual requests
+ */
+
+app.post('/embeddings/batch', async (c) => {
+  try {
+    const { texts } = await c.req.json<{ texts: string[] }>();
+
+    if (!texts || texts.length === 0) {
+      return c.json({ error: 'texts array is required' }, 400);
+    }
+
+    const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
+      text: texts,
+    });
+
+    return c.json({
+      success: true,
+      count: texts.length,
+      shape: embeddings.shape,
+      embeddings: embeddings.data,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Store Embeddings in Vectorize
+// ============================================================================
+
+app.post('/documents', async (c) => {
+  try {
+    const { id, text, metadata } = await c.req.json<{
+      id: string;
+      text: string;
+      metadata?: Record<string, any>;
+    }>();
+
+    // Generate embedding
+    const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
+      text: [text],
+    });
+
+    const vector = embeddings.data[0];
+
+    // Store in Vectorize
+    await c.env.VECTORIZE.upsert([
+      {
+        id,
+        values: vector,
+        metadata: {
+          text,
+          ...metadata,
+          createdAt: Date.now(),
+        },
+      },
+    ]);
+
+    return c.json({
+      success: true,
+      message: 'Document indexed',
+      id,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Semantic Search
+// ============================================================================
+
+app.post('/search', async (c) => {
+  try {
+    const { query, topK = 5 } = await c.req.json<{
+      query: string;
+      topK?: number;
+    }>();
+
+    // Convert query to embedding
+    const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
+      text: [query],
+    });
+
+    const vector = embeddings.data[0];
+
+    // Search Vectorize
+    const results = await c.env.VECTORIZE.query(vector, {
+      topK,
+      returnMetadata: true,
+    });
+
+    return c.json({
+      success: true,
+      query,
+      results: results.matches.map((match) => ({
+        id: match.id,
+        score: match.score,
+        text: match.metadata?.text,
+        metadata: match.metadata,
+      })),
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// RAG Pattern: Query with Context
+// ============================================================================
+
+app.post('/rag/ask', async (c) => {
+  try {
+    const { question, topK = 3 } = await c.req.json<{
+      question: string;
+      topK?: number;
+    }>();
+
+    // Step 1: Convert question to embedding
+    const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
+      text: [question],
+    });
+
+    const vector = embeddings.data[0];
+
+    // Step 2: Find relevant documents
+    const results = await c.env.VECTORIZE.query(vector, {
+      topK,
+      returnMetadata: true,
+    });
+
+    // Step 3: Build context from matches
+    const context = results.matches
+      .map((match) => match.metadata?.text)
+      .filter(Boolean)
+      .join('\n\n');
+
+    // Step 4: Generate answer with context
+    const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages: [
+        {
+          role: 'system',
+          content: `Answer the question using ONLY the following context. If the context doesn't contain relevant information, say "I don't have enough information to answer that."\n\nContext:\n${context}`,
+        },
+        {
+          role: 'user',
+          content: question,
+        },
+      ],
+      stream: true,
+    });
+
+    return new Response(stream, {
+      headers: {
+        'content-type': 'text/event-stream',
+        'x-sources': JSON.stringify(
+          results.matches.map((m) => ({ id: m.id, score: m.score }))
+        ),
+      },
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Document Chunking
+// ============================================================================
+
+/**
+ * Split long documents into chunks for better embedding quality
+ * Recommended: 200-500 tokens per chunk
+ */
+
+function chunkText(text: string, chunkSize = 500, overlap = 50): string[] {
+  const words = text.split(/\s+/);
+  const chunks: string[] = [];
+
+  for (let i = 0; i < words.length; i += chunkSize - overlap) {
+    const chunk = words.slice(i, i + chunkSize).join(' ');
+    chunks.push(chunk);
+  }
+
+  return chunks;
+}
+
+app.post('/documents/long', async (c) => {
+  try {
+    const { id, text, chunkSize = 500 } = await c.req.json<{
+      id: string;
+      text: string;
+      chunkSize?: number;
+    }>();
+
+    // Split into chunks
+    const chunks = chunkText(text, chunkSize);
+
+    // Generate embeddings for all chunks
+    const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
+      text: chunks,
+    });
+
+    // Store each chunk in Vectorize
+    const vectors = chunks.map((chunk, index) => ({
+      id: `${id}-chunk-${index}`,
+      values: embeddings.data[index],
+      metadata: {
+        documentId: id,
+        chunkIndex: index,
+        text: chunk,
+        totalChunks: chunks.length,
+      },
+    }));
+
+    await c.env.VECTORIZE.upsert(vectors);
+
+    return c.json({
+      success: true,
+      message: 'Document indexed with chunks',
+      documentId: id,
+      chunks: chunks.length,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// RAG with Citations
+// ============================================================================
+
+app.post('/rag/ask-with-citations', async (c) => {
+  try {
+    const { question } = await c.req.json<{ question: string }>();
+
+    // Find relevant chunks
+    const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
+      text: [question],
+    });
+
+    const results = await c.env.VECTORIZE.query(embeddings.data[0], {
+      topK: 5,
+      returnMetadata: true,
+    });
+
+    // Build context with citations
+    const context = results.matches
+      .map(
+        (match, i) =>
+          `[Source ${i + 1}] ${match.metadata?.text} (Relevance: ${(match.score * 100).toFixed(1)}%)`
+      )
+      .join('\n\n');
+
+    // Generate answer
+    const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages: [
+        {
+          role: 'system',
+          content: `Answer the question using the provided sources. Cite sources using [Source N] format.
+
+${context}`,
+        },
+        {
+          role: 'user',
+          content: question,
+        },
+      ],
+    });
+
+    return c.json({
+      success: true,
+      answer: response.response,
+      sources: results.matches.map((m, i) => ({
+        id: i + 1,
+        documentId: m.metadata?.documentId,
+        text: m.metadata?.text,
+        score: m.score,
+      })),
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Hybrid Search (Keyword + Semantic)
+// ============================================================================
+
+/**
+ * Combine keyword search (D1) with semantic search (Vectorize)
+ * for better recall
+ */
+
+app.post('/search/hybrid', async (c) => {
+  try {
+    const { query } = await c.req.json<{ query: string }>();
+
+    // Semantic search
+    const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
+      text: [query],
+    });
+
+    const vectorResults = await c.env.VECTORIZE.query(embeddings.data[0], {
+      topK: 5,
+      returnMetadata: true,
+    });
+
+    // Keyword search (if D1 available)
+    let keywordResults: any[] = [];
+    if (c.env.DB) {
+      const { results } = await c.env.DB.prepare(
+        'SELECT id, text FROM documents WHERE text LIKE ? LIMIT 5'
+      )
+        .bind(`%${query}%`)
+        .all();
+
+      keywordResults = results || [];
+    }
+
+    // Combine and deduplicate
+    const combined = [
+      ...vectorResults.matches.map((m) => ({
+        id: m.id,
+        text: m.metadata?.text,
+        score: m.score,
+        source: 'vector',
+      })),
+      ...keywordResults.map((r) => ({
+        id: r.id,
+        text: r.text,
+        score: 1.0,
+        source: 'keyword',
+      })),
+    ];
+
+    // Deduplicate by ID
+    const unique = Array.from(new Map(combined.map((item) => [item.id, item])).values());
+
+    return c.json({
+      success: true,
+      query,
+      results: unique,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Delete Documents
+// ============================================================================
+
+app.delete('/documents/:id', async (c) => {
+  try {
+    const id = c.req.param('id');
+
+    // Delete from Vectorize
+    await c.env.VECTORIZE.deleteByIds([id]);
+
+    return c.json({
+      success: true,
+      message: 'Document deleted',
+      id,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Health Check
+// ============================================================================
+
+app.get('/health', (c) => {
+  return c.json({
+    status: 'ok',
+    timestamp: new Date().toISOString(),
+  });
+});
+
+export default app;