/** * Batch Embeddings with Rate Limiting * * Demonstrates processing multiple texts with proper rate limiting * and exponential backoff for production use. * * Setup: * 1. npm install @google/genai@^1.27.0 * 2. export GEMINI_API_KEY="your-api-key" * * Usage: * npx tsx batch-embeddings.ts */ import { GoogleGenAI } from "@google/genai"; interface RateLimitConfig { requestsPerMinute: number; maxRetries: number; initialDelayMs: number; } class EmbeddingService { private ai: GoogleGenAI; private config: RateLimitConfig; private requestTimes: number[] = []; constructor(apiKey: string, config?: Partial) { this.ai = new GoogleGenAI({ apiKey }); this.config = { requestsPerMinute: config?.requestsPerMinute || 100, // Free tier limit maxRetries: config?.maxRetries || 3, initialDelayMs: config?.initialDelayMs || 1000 }; } /** * Wait if needed to respect rate limits */ private async enforceRateLimit(): Promise { const now = Date.now(); const oneMinuteAgo = now - 60000; // Remove requests older than 1 minute this.requestTimes = this.requestTimes.filter(time => time > oneMinuteAgo); // If at limit, wait until oldest request expires if (this.requestTimes.length >= this.config.requestsPerMinute) { const oldestRequest = this.requestTimes[0]; const waitTime = 60000 - (now - oldestRequest) + 100; // +100ms buffer if (waitTime > 0) { console.log(`ā³ Rate limit reached. Waiting ${(waitTime / 1000).toFixed(1)}s...`); await new Promise(resolve => setTimeout(resolve, waitTime)); } } this.requestTimes.push(Date.now()); } /** * Generate embedding with retry logic */ async embedText( text: string, options: { taskType?: string; outputDimensionality?: number; } = {} ): Promise { const { taskType = 'SEMANTIC_SIMILARITY', outputDimensionality = 768 } = options; for (let attempt = 0; attempt < this.config.maxRetries; attempt++) { try { await this.enforceRateLimit(); const response = await this.ai.models.embedContent({ model: 'gemini-embedding-001', content: text, config: { taskType, outputDimensionality } }); return response.embedding.values; } catch (error: any) { const isLastAttempt = attempt === this.config.maxRetries - 1; // Retry on rate limit errors if (error.status === 429 && !isLastAttempt) { const delay = this.config.initialDelayMs * Math.pow(2, attempt); console.log(`āš ļø Rate limit error. Retrying in ${delay / 1000}s... (attempt ${attempt + 1}/${this.config.maxRetries})`); await new Promise(resolve => setTimeout(resolve, delay)); continue; } throw error; } } throw new Error(`Failed after ${this.config.maxRetries} retries`); } /** * Batch embed multiple texts */ async embedBatch( texts: string[], options: { taskType?: string; outputDimensionality?: number; onProgress?: (current: number, total: number) => void; } = {} ): Promise { const { taskType = 'RETRIEVAL_DOCUMENT', outputDimensionality = 768, onProgress } = options; console.log(`\nšŸ“Š Embedding ${texts.length} texts...`); console.log(`Rate limit: ${this.config.requestsPerMinute} RPM\n`); const embeddings: number[][] = []; const startTime = Date.now(); for (let i = 0; i < texts.length; i++) { const text = texts[i]; const embedding = await this.embedText(text, { taskType, outputDimensionality }); embeddings.push(embedding); if (onProgress) { onProgress(i + 1, texts.length); } // Progress logging if ((i + 1) % 10 === 0 || i === texts.length - 1) { const elapsed = (Date.now() - startTime) / 1000; const rate = (i + 1) / elapsed; const remaining = texts.length - (i + 1); const eta = remaining / rate; console.log(`āœ… ${i + 1}/${texts.length} (${rate.toFixed(1)} texts/sec, ETA: ${eta.toFixed(1)}s)`); } } const totalTime = (Date.now() - startTime) / 1000; console.log(`\n✨ Completed in ${totalTime.toFixed(1)}s (avg: ${(texts.length / totalTime).toFixed(1)} texts/sec)\n`); return embeddings; } /** * Use batch API for multiple texts at once (more efficient) */ async embedBatchAPI( texts: string[], options: { taskType?: string; outputDimensionality?: number; } = {} ): Promise { const { taskType = 'RETRIEVAL_DOCUMENT', outputDimensionality = 768 } = options; await this.enforceRateLimit(); const response = await this.ai.models.embedContent({ model: 'gemini-embedding-001', contents: texts, // Array of strings config: { taskType, outputDimensionality } }); return response.embeddings.map(e => e.values); } } // Example usage async function main() { try { const apiKey = process.env.GEMINI_API_KEY; if (!apiKey) { throw new Error('GEMINI_API_KEY environment variable not set'); } const service = new EmbeddingService(apiKey, { requestsPerMinute: 100, // Free tier maxRetries: 3 }); // Sample documents const documents = [ "What is the meaning of life?", "How does photosynthesis work?", "Explain quantum mechanics in simple terms", "What is the history of artificial intelligence?", "How do neural networks learn?", "What is the difference between machine learning and deep learning?", "Explain the theory of relativity", "What is climate change?", "How does the human brain work?", "What is the future of technology?" ]; console.log('šŸš€ Method 1: Sequential with rate limiting'); const embeddings1 = await service.embedBatch(documents, { taskType: 'RETRIEVAL_DOCUMENT', outputDimensionality: 768, onProgress: (current, total) => { // Optional: Update progress bar, database, etc. } }); console.log('\nšŸš€ Method 2: Batch API (single request)'); const startTime = Date.now(); const embeddings2 = await service.embedBatchAPI(documents, { taskType: 'RETRIEVAL_DOCUMENT', outputDimensionality: 768 }); const elapsed = (Date.now() - startTime) / 1000; console.log(`✨ Completed in ${elapsed.toFixed(1)}s (${documents.length} texts in 1 request)\n`); // Verify results console.log('šŸ“ˆ Results:'); console.log(`Embeddings generated: ${embeddings2.length}`); console.log(`Dimensions per embedding: ${embeddings2[0].length}`); console.log(`Total vectors: ${embeddings2.length * embeddings2[0].length}`); } catch (error: any) { console.error('āŒ Error:', error.message); process.exit(1); } } main();