362 lines
9.7 KiB
TypeScript
362 lines
9.7 KiB
TypeScript
/**
|
|
* Complete RAG Implementation with Gemini Embeddings + Cloudflare Vectorize
|
|
*
|
|
* Demonstrates end-to-end RAG (Retrieval Augmented Generation):
|
|
* 1. Document ingestion (chunking + embedding + storage)
|
|
* 2. Query processing (embedding + vector search)
|
|
* 3. Response generation (context + LLM)
|
|
*
|
|
* Setup:
|
|
* 1. Create Vectorize index:
|
|
* npx wrangler vectorize create gemini-embeddings --dimensions 768 --metric cosine
|
|
*
|
|
* 2. Add to wrangler.jsonc:
|
|
* {
|
|
* "vectorize": {
|
|
* "bindings": [{
|
|
* "binding": "VECTORIZE",
|
|
* "index_name": "gemini-embeddings"
|
|
* }]
|
|
* }
|
|
* }
|
|
*
|
|
* 3. Set secret:
|
|
* npx wrangler secret put GEMINI_API_KEY
|
|
*
|
|
* 4. Deploy:
|
|
* npx wrangler deploy
|
|
*
|
|
* Usage:
|
|
* POST /ingest - Upload documents
|
|
* POST /query - Ask questions
|
|
* GET /health - Check status
|
|
*/
|
|
|
|
interface Env {
|
|
GEMINI_API_KEY: string;
|
|
VECTORIZE: VectorizeIndex;
|
|
}
|
|
|
|
interface VectorizeVector {
|
|
id: string;
|
|
values: number[];
|
|
metadata?: Record<string, any>;
|
|
}
|
|
|
|
interface VectorizeMatch {
|
|
id: string;
|
|
score: number;
|
|
metadata?: Record<string, any>;
|
|
}
|
|
|
|
interface VectorizeIndex {
|
|
insert(vectors: VectorizeVector[]): Promise<{ count: number }>;
|
|
query(
|
|
vector: number[],
|
|
options: { topK: number; returnMetadata?: boolean }
|
|
): Promise<{ matches: VectorizeMatch[] }>;
|
|
getByIds(ids: string[]): Promise<VectorizeVector[]>;
|
|
deleteByIds(ids: string[]): Promise<{ count: number }>;
|
|
}
|
|
|
|
/**
|
|
* Document chunking for better retrieval
|
|
*/
|
|
function chunkDocument(
|
|
text: string,
|
|
chunkSize: number = 500,
|
|
overlap: number = 50
|
|
): string[] {
|
|
const words = text.split(/\s+/);
|
|
const chunks: string[] = [];
|
|
|
|
for (let i = 0; i < words.length; i += chunkSize - overlap) {
|
|
const chunk = words.slice(i, i + chunkSize).join(' ');
|
|
if (chunk.trim().length > 0) {
|
|
chunks.push(chunk.trim());
|
|
}
|
|
}
|
|
|
|
return chunks;
|
|
}
|
|
|
|
/**
|
|
* Generate embedding using Gemini API
|
|
*/
|
|
async function generateEmbedding(
|
|
text: string,
|
|
apiKey: string,
|
|
taskType: string = 'RETRIEVAL_DOCUMENT'
|
|
): Promise<number[]> {
|
|
const response = await fetch(
|
|
'https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-001:embedContent',
|
|
{
|
|
method: 'POST',
|
|
headers: {
|
|
'x-goog-api-key': apiKey,
|
|
'Content-Type': 'application/json'
|
|
},
|
|
body: JSON.stringify({
|
|
content: { parts: [{ text }] },
|
|
taskType,
|
|
outputDimensionality: 768 // MUST match Vectorize index dimensions
|
|
})
|
|
}
|
|
);
|
|
|
|
if (!response.ok) {
|
|
const error = await response.text();
|
|
throw new Error(`Embedding API error: ${response.status} - ${error}`);
|
|
}
|
|
|
|
const data = await response.json<{ embedding: { values: number[] } }>();
|
|
return data.embedding.values;
|
|
}
|
|
|
|
/**
|
|
* Generate response using Gemini API
|
|
*/
|
|
async function generateResponse(
|
|
context: string,
|
|
query: string,
|
|
apiKey: string
|
|
): Promise<string> {
|
|
const response = await fetch(
|
|
'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent',
|
|
{
|
|
method: 'POST',
|
|
headers: {
|
|
'x-goog-api-key': apiKey,
|
|
'Content-Type': 'application/json'
|
|
},
|
|
body: JSON.stringify({
|
|
contents: [{
|
|
parts: [{
|
|
text: `You are a helpful assistant. Answer the question based ONLY on the provided context.
|
|
|
|
Context:
|
|
${context}
|
|
|
|
Question: ${query}
|
|
|
|
Answer:`
|
|
}]
|
|
}]
|
|
})
|
|
}
|
|
);
|
|
|
|
if (!response.ok) {
|
|
const error = await response.text();
|
|
throw new Error(`Generation API error: ${response.status} - ${error}`);
|
|
}
|
|
|
|
const data = await response.json<{
|
|
candidates: Array<{
|
|
content: { parts: Array<{ text: string }> };
|
|
}>;
|
|
}>();
|
|
|
|
return data.candidates[0]?.content?.parts[0]?.text || 'No response generated';
|
|
}
|
|
|
|
export default {
|
|
async fetch(request: Request, env: Env): Promise<Response> {
|
|
const url = new URL(request.url);
|
|
const corsHeaders = {
|
|
'Access-Control-Allow-Origin': '*',
|
|
'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
|
|
'Access-Control-Allow-Headers': 'Content-Type'
|
|
};
|
|
|
|
if (request.method === 'OPTIONS') {
|
|
return new Response(null, { headers: corsHeaders });
|
|
}
|
|
|
|
try {
|
|
// Health check
|
|
if (url.pathname === '/health') {
|
|
return new Response(JSON.stringify({
|
|
status: 'ok',
|
|
vectorize: 'connected',
|
|
gemini: 'ready'
|
|
}), {
|
|
headers: { 'Content-Type': 'application/json', ...corsHeaders }
|
|
});
|
|
}
|
|
|
|
// Document ingestion
|
|
if (url.pathname === '/ingest' && request.method === 'POST') {
|
|
const { documents } = await request.json<{ documents: Array<{ id: string; text: string }> }>();
|
|
|
|
if (!documents || !Array.isArray(documents)) {
|
|
return new Response(JSON.stringify({ error: 'Invalid request: documents array required' }), {
|
|
status: 400,
|
|
headers: { 'Content-Type': 'application/json', ...corsHeaders }
|
|
});
|
|
}
|
|
|
|
console.log(`📥 Ingesting ${documents.length} documents...`);
|
|
|
|
const vectors: VectorizeVector[] = [];
|
|
|
|
for (const doc of documents) {
|
|
// Chunk document
|
|
const chunks = chunkDocument(doc.text, 500, 50);
|
|
console.log(`📄 Document ${doc.id}: ${chunks.length} chunks`);
|
|
|
|
// Generate embeddings for each chunk
|
|
for (let i = 0; i < chunks.length; i++) {
|
|
const embedding = await generateEmbedding(
|
|
chunks[i],
|
|
env.GEMINI_API_KEY,
|
|
'RETRIEVAL_DOCUMENT' // ← Documents for indexing
|
|
);
|
|
|
|
vectors.push({
|
|
id: `${doc.id}-chunk-${i}`,
|
|
values: embedding,
|
|
metadata: {
|
|
documentId: doc.id,
|
|
chunkIndex: i,
|
|
text: chunks[i],
|
|
timestamp: Date.now()
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
// Insert into Vectorize
|
|
const result = await env.VECTORIZE.insert(vectors);
|
|
|
|
console.log(`✅ Ingested ${result.count} vectors`);
|
|
|
|
return new Response(JSON.stringify({
|
|
success: true,
|
|
documentsProcessed: documents.length,
|
|
chunksCreated: vectors.length,
|
|
vectorsInserted: result.count
|
|
}), {
|
|
headers: { 'Content-Type': 'application/json', ...corsHeaders }
|
|
});
|
|
}
|
|
|
|
// Query processing (RAG)
|
|
if (url.pathname === '/query' && request.method === 'POST') {
|
|
const { query, topK = 5 } = await request.json<{ query: string; topK?: number }>();
|
|
|
|
if (!query) {
|
|
return new Response(JSON.stringify({ error: 'Invalid request: query required' }), {
|
|
status: 400,
|
|
headers: { 'Content-Type': 'application/json', ...corsHeaders }
|
|
});
|
|
}
|
|
|
|
console.log(`🔍 Query: "${query}"`);
|
|
|
|
// 1. Generate query embedding
|
|
const queryEmbedding = await generateEmbedding(
|
|
query,
|
|
env.GEMINI_API_KEY,
|
|
'RETRIEVAL_QUERY' // ← Query, not document
|
|
);
|
|
|
|
// 2. Search Vectorize for similar chunks
|
|
const results = await env.VECTORIZE.query(queryEmbedding, {
|
|
topK,
|
|
returnMetadata: true
|
|
});
|
|
|
|
if (results.matches.length === 0) {
|
|
return new Response(JSON.stringify({
|
|
success: true,
|
|
answer: 'No relevant information found in the knowledge base.',
|
|
sources: []
|
|
}), {
|
|
headers: { 'Content-Type': 'application/json', ...corsHeaders }
|
|
});
|
|
}
|
|
|
|
console.log(`📚 Found ${results.matches.length} relevant chunks`);
|
|
|
|
// 3. Extract context from top matches
|
|
const context = results.matches
|
|
.map((match, i) => `[${i + 1}] ${match.metadata?.text || ''}`)
|
|
.join('\n\n');
|
|
|
|
// 4. Generate response with context
|
|
const answer = await generateResponse(context, query, env.GEMINI_API_KEY);
|
|
|
|
return new Response(JSON.stringify({
|
|
success: true,
|
|
query,
|
|
answer,
|
|
sources: results.matches.map(match => ({
|
|
documentId: match.metadata?.documentId,
|
|
chunkIndex: match.metadata?.chunkIndex,
|
|
similarity: match.score,
|
|
text: match.metadata?.text
|
|
}))
|
|
}, null, 2), {
|
|
headers: { 'Content-Type': 'application/json', ...corsHeaders }
|
|
});
|
|
}
|
|
|
|
// 404 for unknown routes
|
|
return new Response(JSON.stringify({
|
|
error: 'Not found',
|
|
routes: {
|
|
'POST /ingest': 'Upload documents',
|
|
'POST /query': 'Ask questions',
|
|
'GET /health': 'Health check'
|
|
}
|
|
}), {
|
|
status: 404,
|
|
headers: { 'Content-Type': 'application/json', ...corsHeaders }
|
|
});
|
|
|
|
} catch (error: any) {
|
|
console.error('❌ Error:', error.message);
|
|
|
|
return new Response(JSON.stringify({
|
|
success: false,
|
|
error: error.message
|
|
}), {
|
|
status: 500,
|
|
headers: { 'Content-Type': 'application/json', ...corsHeaders }
|
|
});
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Example requests:
|
|
*
|
|
* 1. Ingest documents:
|
|
* curl -X POST https://your-worker.workers.dev/ingest \
|
|
* -H "Content-Type: application/json" \
|
|
* -d '{
|
|
* "documents": [
|
|
* {
|
|
* "id": "doc1",
|
|
* "text": "Paris is the capital of France. It is known for the Eiffel Tower..."
|
|
* },
|
|
* {
|
|
* "id": "doc2",
|
|
* "text": "Machine learning is a subset of artificial intelligence..."
|
|
* }
|
|
* ]
|
|
* }'
|
|
*
|
|
* 2. Query:
|
|
* curl -X POST https://your-worker.workers.dev/query \
|
|
* -H "Content-Type: application/json" \
|
|
* -d '{
|
|
* "query": "What is the capital of France?",
|
|
* "topK": 5
|
|
* }'
|
|
*
|
|
* 3. Health check:
|
|
* curl https://your-worker.workers.dev/health
|
|
*/
|