Files
gh-jezweb-claude-skills-ski…/templates/rag-with-vectorize.ts
2025-11-30 08:24:54 +08:00

362 lines
9.7 KiB
TypeScript

/**
* Complete RAG Implementation with Gemini Embeddings + Cloudflare Vectorize
*
* Demonstrates end-to-end RAG (Retrieval Augmented Generation):
* 1. Document ingestion (chunking + embedding + storage)
* 2. Query processing (embedding + vector search)
* 3. Response generation (context + LLM)
*
* Setup:
* 1. Create Vectorize index:
* npx wrangler vectorize create gemini-embeddings --dimensions 768 --metric cosine
*
* 2. Add to wrangler.jsonc:
* {
* "vectorize": {
* "bindings": [{
* "binding": "VECTORIZE",
* "index_name": "gemini-embeddings"
* }]
* }
* }
*
* 3. Set secret:
* npx wrangler secret put GEMINI_API_KEY
*
* 4. Deploy:
* npx wrangler deploy
*
* Usage:
* POST /ingest - Upload documents
* POST /query - Ask questions
* GET /health - Check status
*/
interface Env {
GEMINI_API_KEY: string;
VECTORIZE: VectorizeIndex;
}
interface VectorizeVector {
id: string;
values: number[];
metadata?: Record<string, any>;
}
interface VectorizeMatch {
id: string;
score: number;
metadata?: Record<string, any>;
}
interface VectorizeIndex {
insert(vectors: VectorizeVector[]): Promise<{ count: number }>;
query(
vector: number[],
options: { topK: number; returnMetadata?: boolean }
): Promise<{ matches: VectorizeMatch[] }>;
getByIds(ids: string[]): Promise<VectorizeVector[]>;
deleteByIds(ids: string[]): Promise<{ count: number }>;
}
/**
* Document chunking for better retrieval
*/
function chunkDocument(
text: string,
chunkSize: number = 500,
overlap: number = 50
): string[] {
const words = text.split(/\s+/);
const chunks: string[] = [];
for (let i = 0; i < words.length; i += chunkSize - overlap) {
const chunk = words.slice(i, i + chunkSize).join(' ');
if (chunk.trim().length > 0) {
chunks.push(chunk.trim());
}
}
return chunks;
}
/**
* Generate embedding using Gemini API
*/
async function generateEmbedding(
text: string,
apiKey: string,
taskType: string = 'RETRIEVAL_DOCUMENT'
): Promise<number[]> {
const response = await fetch(
'https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-001:embedContent',
{
method: 'POST',
headers: {
'x-goog-api-key': apiKey,
'Content-Type': 'application/json'
},
body: JSON.stringify({
content: { parts: [{ text }] },
taskType,
outputDimensionality: 768 // MUST match Vectorize index dimensions
})
}
);
if (!response.ok) {
const error = await response.text();
throw new Error(`Embedding API error: ${response.status} - ${error}`);
}
const data = await response.json<{ embedding: { values: number[] } }>();
return data.embedding.values;
}
/**
* Generate response using Gemini API
*/
async function generateResponse(
context: string,
query: string,
apiKey: string
): Promise<string> {
const response = await fetch(
'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent',
{
method: 'POST',
headers: {
'x-goog-api-key': apiKey,
'Content-Type': 'application/json'
},
body: JSON.stringify({
contents: [{
parts: [{
text: `You are a helpful assistant. Answer the question based ONLY on the provided context.
Context:
${context}
Question: ${query}
Answer:`
}]
}]
})
}
);
if (!response.ok) {
const error = await response.text();
throw new Error(`Generation API error: ${response.status} - ${error}`);
}
const data = await response.json<{
candidates: Array<{
content: { parts: Array<{ text: string }> };
}>;
}>();
return data.candidates[0]?.content?.parts[0]?.text || 'No response generated';
}
export default {
async fetch(request: Request, env: Env): Promise<Response> {
const url = new URL(request.url);
const corsHeaders = {
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
'Access-Control-Allow-Headers': 'Content-Type'
};
if (request.method === 'OPTIONS') {
return new Response(null, { headers: corsHeaders });
}
try {
// Health check
if (url.pathname === '/health') {
return new Response(JSON.stringify({
status: 'ok',
vectorize: 'connected',
gemini: 'ready'
}), {
headers: { 'Content-Type': 'application/json', ...corsHeaders }
});
}
// Document ingestion
if (url.pathname === '/ingest' && request.method === 'POST') {
const { documents } = await request.json<{ documents: Array<{ id: string; text: string }> }>();
if (!documents || !Array.isArray(documents)) {
return new Response(JSON.stringify({ error: 'Invalid request: documents array required' }), {
status: 400,
headers: { 'Content-Type': 'application/json', ...corsHeaders }
});
}
console.log(`📥 Ingesting ${documents.length} documents...`);
const vectors: VectorizeVector[] = [];
for (const doc of documents) {
// Chunk document
const chunks = chunkDocument(doc.text, 500, 50);
console.log(`📄 Document ${doc.id}: ${chunks.length} chunks`);
// Generate embeddings for each chunk
for (let i = 0; i < chunks.length; i++) {
const embedding = await generateEmbedding(
chunks[i],
env.GEMINI_API_KEY,
'RETRIEVAL_DOCUMENT' // ← Documents for indexing
);
vectors.push({
id: `${doc.id}-chunk-${i}`,
values: embedding,
metadata: {
documentId: doc.id,
chunkIndex: i,
text: chunks[i],
timestamp: Date.now()
}
});
}
}
// Insert into Vectorize
const result = await env.VECTORIZE.insert(vectors);
console.log(`✅ Ingested ${result.count} vectors`);
return new Response(JSON.stringify({
success: true,
documentsProcessed: documents.length,
chunksCreated: vectors.length,
vectorsInserted: result.count
}), {
headers: { 'Content-Type': 'application/json', ...corsHeaders }
});
}
// Query processing (RAG)
if (url.pathname === '/query' && request.method === 'POST') {
const { query, topK = 5 } = await request.json<{ query: string; topK?: number }>();
if (!query) {
return new Response(JSON.stringify({ error: 'Invalid request: query required' }), {
status: 400,
headers: { 'Content-Type': 'application/json', ...corsHeaders }
});
}
console.log(`🔍 Query: "${query}"`);
// 1. Generate query embedding
const queryEmbedding = await generateEmbedding(
query,
env.GEMINI_API_KEY,
'RETRIEVAL_QUERY' // ← Query, not document
);
// 2. Search Vectorize for similar chunks
const results = await env.VECTORIZE.query(queryEmbedding, {
topK,
returnMetadata: true
});
if (results.matches.length === 0) {
return new Response(JSON.stringify({
success: true,
answer: 'No relevant information found in the knowledge base.',
sources: []
}), {
headers: { 'Content-Type': 'application/json', ...corsHeaders }
});
}
console.log(`📚 Found ${results.matches.length} relevant chunks`);
// 3. Extract context from top matches
const context = results.matches
.map((match, i) => `[${i + 1}] ${match.metadata?.text || ''}`)
.join('\n\n');
// 4. Generate response with context
const answer = await generateResponse(context, query, env.GEMINI_API_KEY);
return new Response(JSON.stringify({
success: true,
query,
answer,
sources: results.matches.map(match => ({
documentId: match.metadata?.documentId,
chunkIndex: match.metadata?.chunkIndex,
similarity: match.score,
text: match.metadata?.text
}))
}, null, 2), {
headers: { 'Content-Type': 'application/json', ...corsHeaders }
});
}
// 404 for unknown routes
return new Response(JSON.stringify({
error: 'Not found',
routes: {
'POST /ingest': 'Upload documents',
'POST /query': 'Ask questions',
'GET /health': 'Health check'
}
}), {
status: 404,
headers: { 'Content-Type': 'application/json', ...corsHeaders }
});
} catch (error: any) {
console.error('❌ Error:', error.message);
return new Response(JSON.stringify({
success: false,
error: error.message
}), {
status: 500,
headers: { 'Content-Type': 'application/json', ...corsHeaders }
});
}
}
};
/**
* Example requests:
*
* 1. Ingest documents:
* curl -X POST https://your-worker.workers.dev/ingest \
* -H "Content-Type: application/json" \
* -d '{
* "documents": [
* {
* "id": "doc1",
* "text": "Paris is the capital of France. It is known for the Eiffel Tower..."
* },
* {
* "id": "doc2",
* "text": "Machine learning is a subset of artificial intelligence..."
* }
* ]
* }'
*
* 2. Query:
* curl -X POST https://your-worker.workers.dev/query \
* -H "Content-Type: application/json" \
* -d '{
* "query": "What is the capital of France?",
* "topK": 5
* }'
*
* 3. Health check:
* curl https://your-worker.workers.dev/health
*/