Initial commit
This commit is contained in:
361
templates/rag-with-vectorize.ts
Normal file
361
templates/rag-with-vectorize.ts
Normal file
@@ -0,0 +1,361 @@
|
||||
/**
|
||||
* Complete RAG Implementation with Gemini Embeddings + Cloudflare Vectorize
|
||||
*
|
||||
* Demonstrates end-to-end RAG (Retrieval Augmented Generation):
|
||||
* 1. Document ingestion (chunking + embedding + storage)
|
||||
* 2. Query processing (embedding + vector search)
|
||||
* 3. Response generation (context + LLM)
|
||||
*
|
||||
* Setup:
|
||||
* 1. Create Vectorize index:
|
||||
* npx wrangler vectorize create gemini-embeddings --dimensions 768 --metric cosine
|
||||
*
|
||||
* 2. Add to wrangler.jsonc:
|
||||
* {
|
||||
* "vectorize": {
|
||||
* "bindings": [{
|
||||
* "binding": "VECTORIZE",
|
||||
* "index_name": "gemini-embeddings"
|
||||
* }]
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* 3. Set secret:
|
||||
* npx wrangler secret put GEMINI_API_KEY
|
||||
*
|
||||
* 4. Deploy:
|
||||
* npx wrangler deploy
|
||||
*
|
||||
* Usage:
|
||||
* POST /ingest - Upload documents
|
||||
* POST /query - Ask questions
|
||||
* GET /health - Check status
|
||||
*/
|
||||
|
||||
interface Env {
|
||||
GEMINI_API_KEY: string;
|
||||
VECTORIZE: VectorizeIndex;
|
||||
}
|
||||
|
||||
interface VectorizeVector {
|
||||
id: string;
|
||||
values: number[];
|
||||
metadata?: Record<string, any>;
|
||||
}
|
||||
|
||||
interface VectorizeMatch {
|
||||
id: string;
|
||||
score: number;
|
||||
metadata?: Record<string, any>;
|
||||
}
|
||||
|
||||
interface VectorizeIndex {
|
||||
insert(vectors: VectorizeVector[]): Promise<{ count: number }>;
|
||||
query(
|
||||
vector: number[],
|
||||
options: { topK: number; returnMetadata?: boolean }
|
||||
): Promise<{ matches: VectorizeMatch[] }>;
|
||||
getByIds(ids: string[]): Promise<VectorizeVector[]>;
|
||||
deleteByIds(ids: string[]): Promise<{ count: number }>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Document chunking for better retrieval
|
||||
*/
|
||||
function chunkDocument(
|
||||
text: string,
|
||||
chunkSize: number = 500,
|
||||
overlap: number = 50
|
||||
): string[] {
|
||||
const words = text.split(/\s+/);
|
||||
const chunks: string[] = [];
|
||||
|
||||
for (let i = 0; i < words.length; i += chunkSize - overlap) {
|
||||
const chunk = words.slice(i, i + chunkSize).join(' ');
|
||||
if (chunk.trim().length > 0) {
|
||||
chunks.push(chunk.trim());
|
||||
}
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embedding using Gemini API
|
||||
*/
|
||||
async function generateEmbedding(
|
||||
text: string,
|
||||
apiKey: string,
|
||||
taskType: string = 'RETRIEVAL_DOCUMENT'
|
||||
): Promise<number[]> {
|
||||
const response = await fetch(
|
||||
'https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-001:embedContent',
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'x-goog-api-key': apiKey,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
content: { parts: [{ text }] },
|
||||
taskType,
|
||||
outputDimensionality: 768 // MUST match Vectorize index dimensions
|
||||
})
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`Embedding API error: ${response.status} - ${error}`);
|
||||
}
|
||||
|
||||
const data = await response.json<{ embedding: { values: number[] } }>();
|
||||
return data.embedding.values;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate response using Gemini API
|
||||
*/
|
||||
async function generateResponse(
|
||||
context: string,
|
||||
query: string,
|
||||
apiKey: string
|
||||
): Promise<string> {
|
||||
const response = await fetch(
|
||||
'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent',
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'x-goog-api-key': apiKey,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
contents: [{
|
||||
parts: [{
|
||||
text: `You are a helpful assistant. Answer the question based ONLY on the provided context.
|
||||
|
||||
Context:
|
||||
${context}
|
||||
|
||||
Question: ${query}
|
||||
|
||||
Answer:`
|
||||
}]
|
||||
}]
|
||||
})
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`Generation API error: ${response.status} - ${error}`);
|
||||
}
|
||||
|
||||
const data = await response.json<{
|
||||
candidates: Array<{
|
||||
content: { parts: Array<{ text: string }> };
|
||||
}>;
|
||||
}>();
|
||||
|
||||
return data.candidates[0]?.content?.parts[0]?.text || 'No response generated';
|
||||
}
|
||||
|
||||
export default {
|
||||
async fetch(request: Request, env: Env): Promise<Response> {
|
||||
const url = new URL(request.url);
|
||||
const corsHeaders = {
|
||||
'Access-Control-Allow-Origin': '*',
|
||||
'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
|
||||
'Access-Control-Allow-Headers': 'Content-Type'
|
||||
};
|
||||
|
||||
if (request.method === 'OPTIONS') {
|
||||
return new Response(null, { headers: corsHeaders });
|
||||
}
|
||||
|
||||
try {
|
||||
// Health check
|
||||
if (url.pathname === '/health') {
|
||||
return new Response(JSON.stringify({
|
||||
status: 'ok',
|
||||
vectorize: 'connected',
|
||||
gemini: 'ready'
|
||||
}), {
|
||||
headers: { 'Content-Type': 'application/json', ...corsHeaders }
|
||||
});
|
||||
}
|
||||
|
||||
// Document ingestion
|
||||
if (url.pathname === '/ingest' && request.method === 'POST') {
|
||||
const { documents } = await request.json<{ documents: Array<{ id: string; text: string }> }>();
|
||||
|
||||
if (!documents || !Array.isArray(documents)) {
|
||||
return new Response(JSON.stringify({ error: 'Invalid request: documents array required' }), {
|
||||
status: 400,
|
||||
headers: { 'Content-Type': 'application/json', ...corsHeaders }
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`📥 Ingesting ${documents.length} documents...`);
|
||||
|
||||
const vectors: VectorizeVector[] = [];
|
||||
|
||||
for (const doc of documents) {
|
||||
// Chunk document
|
||||
const chunks = chunkDocument(doc.text, 500, 50);
|
||||
console.log(`📄 Document ${doc.id}: ${chunks.length} chunks`);
|
||||
|
||||
// Generate embeddings for each chunk
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
const embedding = await generateEmbedding(
|
||||
chunks[i],
|
||||
env.GEMINI_API_KEY,
|
||||
'RETRIEVAL_DOCUMENT' // ← Documents for indexing
|
||||
);
|
||||
|
||||
vectors.push({
|
||||
id: `${doc.id}-chunk-${i}`,
|
||||
values: embedding,
|
||||
metadata: {
|
||||
documentId: doc.id,
|
||||
chunkIndex: i,
|
||||
text: chunks[i],
|
||||
timestamp: Date.now()
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Insert into Vectorize
|
||||
const result = await env.VECTORIZE.insert(vectors);
|
||||
|
||||
console.log(`✅ Ingested ${result.count} vectors`);
|
||||
|
||||
return new Response(JSON.stringify({
|
||||
success: true,
|
||||
documentsProcessed: documents.length,
|
||||
chunksCreated: vectors.length,
|
||||
vectorsInserted: result.count
|
||||
}), {
|
||||
headers: { 'Content-Type': 'application/json', ...corsHeaders }
|
||||
});
|
||||
}
|
||||
|
||||
// Query processing (RAG)
|
||||
if (url.pathname === '/query' && request.method === 'POST') {
|
||||
const { query, topK = 5 } = await request.json<{ query: string; topK?: number }>();
|
||||
|
||||
if (!query) {
|
||||
return new Response(JSON.stringify({ error: 'Invalid request: query required' }), {
|
||||
status: 400,
|
||||
headers: { 'Content-Type': 'application/json', ...corsHeaders }
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`🔍 Query: "${query}"`);
|
||||
|
||||
// 1. Generate query embedding
|
||||
const queryEmbedding = await generateEmbedding(
|
||||
query,
|
||||
env.GEMINI_API_KEY,
|
||||
'RETRIEVAL_QUERY' // ← Query, not document
|
||||
);
|
||||
|
||||
// 2. Search Vectorize for similar chunks
|
||||
const results = await env.VECTORIZE.query(queryEmbedding, {
|
||||
topK,
|
||||
returnMetadata: true
|
||||
});
|
||||
|
||||
if (results.matches.length === 0) {
|
||||
return new Response(JSON.stringify({
|
||||
success: true,
|
||||
answer: 'No relevant information found in the knowledge base.',
|
||||
sources: []
|
||||
}), {
|
||||
headers: { 'Content-Type': 'application/json', ...corsHeaders }
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`📚 Found ${results.matches.length} relevant chunks`);
|
||||
|
||||
// 3. Extract context from top matches
|
||||
const context = results.matches
|
||||
.map((match, i) => `[${i + 1}] ${match.metadata?.text || ''}`)
|
||||
.join('\n\n');
|
||||
|
||||
// 4. Generate response with context
|
||||
const answer = await generateResponse(context, query, env.GEMINI_API_KEY);
|
||||
|
||||
return new Response(JSON.stringify({
|
||||
success: true,
|
||||
query,
|
||||
answer,
|
||||
sources: results.matches.map(match => ({
|
||||
documentId: match.metadata?.documentId,
|
||||
chunkIndex: match.metadata?.chunkIndex,
|
||||
similarity: match.score,
|
||||
text: match.metadata?.text
|
||||
}))
|
||||
}, null, 2), {
|
||||
headers: { 'Content-Type': 'application/json', ...corsHeaders }
|
||||
});
|
||||
}
|
||||
|
||||
// 404 for unknown routes
|
||||
return new Response(JSON.stringify({
|
||||
error: 'Not found',
|
||||
routes: {
|
||||
'POST /ingest': 'Upload documents',
|
||||
'POST /query': 'Ask questions',
|
||||
'GET /health': 'Health check'
|
||||
}
|
||||
}), {
|
||||
status: 404,
|
||||
headers: { 'Content-Type': 'application/json', ...corsHeaders }
|
||||
});
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('❌ Error:', error.message);
|
||||
|
||||
return new Response(JSON.stringify({
|
||||
success: false,
|
||||
error: error.message
|
||||
}), {
|
||||
status: 500,
|
||||
headers: { 'Content-Type': 'application/json', ...corsHeaders }
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Example requests:
|
||||
*
|
||||
* 1. Ingest documents:
|
||||
* curl -X POST https://your-worker.workers.dev/ingest \
|
||||
* -H "Content-Type: application/json" \
|
||||
* -d '{
|
||||
* "documents": [
|
||||
* {
|
||||
* "id": "doc1",
|
||||
* "text": "Paris is the capital of France. It is known for the Eiffel Tower..."
|
||||
* },
|
||||
* {
|
||||
* "id": "doc2",
|
||||
* "text": "Machine learning is a subset of artificial intelligence..."
|
||||
* }
|
||||
* ]
|
||||
* }'
|
||||
*
|
||||
* 2. Query:
|
||||
* curl -X POST https://your-worker.workers.dev/query \
|
||||
* -H "Content-Type: application/json" \
|
||||
* -d '{
|
||||
* "query": "What is the capital of France?",
|
||||
* "topK": 5
|
||||
* }'
|
||||
*
|
||||
* 3. Health check:
|
||||
* curl https://your-worker.workers.dev/health
|
||||
*/
|
||||
Reference in New Issue
Block a user