241 lines
6.8 KiB
TypeScript
241 lines
6.8 KiB
TypeScript
/**
|
|
* Batch Embeddings with Rate Limiting
|
|
*
|
|
* Demonstrates processing multiple texts with proper rate limiting
|
|
* and exponential backoff for production use.
|
|
*
|
|
* Setup:
|
|
* 1. npm install @google/genai@^1.27.0
|
|
* 2. export GEMINI_API_KEY="your-api-key"
|
|
*
|
|
* Usage:
|
|
* npx tsx batch-embeddings.ts
|
|
*/
|
|
|
|
import { GoogleGenAI } from "@google/genai";
|
|
|
|
interface RateLimitConfig {
|
|
requestsPerMinute: number;
|
|
maxRetries: number;
|
|
initialDelayMs: number;
|
|
}
|
|
|
|
class EmbeddingService {
|
|
private ai: GoogleGenAI;
|
|
private config: RateLimitConfig;
|
|
private requestTimes: number[] = [];
|
|
|
|
constructor(apiKey: string, config?: Partial<RateLimitConfig>) {
|
|
this.ai = new GoogleGenAI({ apiKey });
|
|
this.config = {
|
|
requestsPerMinute: config?.requestsPerMinute || 100, // Free tier limit
|
|
maxRetries: config?.maxRetries || 3,
|
|
initialDelayMs: config?.initialDelayMs || 1000
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Wait if needed to respect rate limits
|
|
*/
|
|
private async enforceRateLimit(): Promise<void> {
|
|
const now = Date.now();
|
|
const oneMinuteAgo = now - 60000;
|
|
|
|
// Remove requests older than 1 minute
|
|
this.requestTimes = this.requestTimes.filter(time => time > oneMinuteAgo);
|
|
|
|
// If at limit, wait until oldest request expires
|
|
if (this.requestTimes.length >= this.config.requestsPerMinute) {
|
|
const oldestRequest = this.requestTimes[0];
|
|
const waitTime = 60000 - (now - oldestRequest) + 100; // +100ms buffer
|
|
|
|
if (waitTime > 0) {
|
|
console.log(`⏳ Rate limit reached. Waiting ${(waitTime / 1000).toFixed(1)}s...`);
|
|
await new Promise(resolve => setTimeout(resolve, waitTime));
|
|
}
|
|
}
|
|
|
|
this.requestTimes.push(Date.now());
|
|
}
|
|
|
|
/**
|
|
* Generate embedding with retry logic
|
|
*/
|
|
async embedText(
|
|
text: string,
|
|
options: {
|
|
taskType?: string;
|
|
outputDimensionality?: number;
|
|
} = {}
|
|
): Promise<number[]> {
|
|
const {
|
|
taskType = 'SEMANTIC_SIMILARITY',
|
|
outputDimensionality = 768
|
|
} = options;
|
|
|
|
for (let attempt = 0; attempt < this.config.maxRetries; attempt++) {
|
|
try {
|
|
await this.enforceRateLimit();
|
|
|
|
const response = await this.ai.models.embedContent({
|
|
model: 'gemini-embedding-001',
|
|
content: text,
|
|
config: { taskType, outputDimensionality }
|
|
});
|
|
|
|
return response.embedding.values;
|
|
|
|
} catch (error: any) {
|
|
const isLastAttempt = attempt === this.config.maxRetries - 1;
|
|
|
|
// Retry on rate limit errors
|
|
if (error.status === 429 && !isLastAttempt) {
|
|
const delay = this.config.initialDelayMs * Math.pow(2, attempt);
|
|
console.log(`⚠️ Rate limit error. Retrying in ${delay / 1000}s... (attempt ${attempt + 1}/${this.config.maxRetries})`);
|
|
await new Promise(resolve => setTimeout(resolve, delay));
|
|
continue;
|
|
}
|
|
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
throw new Error(`Failed after ${this.config.maxRetries} retries`);
|
|
}
|
|
|
|
/**
|
|
* Batch embed multiple texts
|
|
*/
|
|
async embedBatch(
|
|
texts: string[],
|
|
options: {
|
|
taskType?: string;
|
|
outputDimensionality?: number;
|
|
onProgress?: (current: number, total: number) => void;
|
|
} = {}
|
|
): Promise<number[][]> {
|
|
const {
|
|
taskType = 'RETRIEVAL_DOCUMENT',
|
|
outputDimensionality = 768,
|
|
onProgress
|
|
} = options;
|
|
|
|
console.log(`\n📊 Embedding ${texts.length} texts...`);
|
|
console.log(`Rate limit: ${this.config.requestsPerMinute} RPM\n`);
|
|
|
|
const embeddings: number[][] = [];
|
|
const startTime = Date.now();
|
|
|
|
for (let i = 0; i < texts.length; i++) {
|
|
const text = texts[i];
|
|
const embedding = await this.embedText(text, { taskType, outputDimensionality });
|
|
embeddings.push(embedding);
|
|
|
|
if (onProgress) {
|
|
onProgress(i + 1, texts.length);
|
|
}
|
|
|
|
// Progress logging
|
|
if ((i + 1) % 10 === 0 || i === texts.length - 1) {
|
|
const elapsed = (Date.now() - startTime) / 1000;
|
|
const rate = (i + 1) / elapsed;
|
|
const remaining = texts.length - (i + 1);
|
|
const eta = remaining / rate;
|
|
|
|
console.log(`✅ ${i + 1}/${texts.length} (${rate.toFixed(1)} texts/sec, ETA: ${eta.toFixed(1)}s)`);
|
|
}
|
|
}
|
|
|
|
const totalTime = (Date.now() - startTime) / 1000;
|
|
console.log(`\n✨ Completed in ${totalTime.toFixed(1)}s (avg: ${(texts.length / totalTime).toFixed(1)} texts/sec)\n`);
|
|
|
|
return embeddings;
|
|
}
|
|
|
|
/**
|
|
* Use batch API for multiple texts at once (more efficient)
|
|
*/
|
|
async embedBatchAPI(
|
|
texts: string[],
|
|
options: {
|
|
taskType?: string;
|
|
outputDimensionality?: number;
|
|
} = {}
|
|
): Promise<number[][]> {
|
|
const {
|
|
taskType = 'RETRIEVAL_DOCUMENT',
|
|
outputDimensionality = 768
|
|
} = options;
|
|
|
|
await this.enforceRateLimit();
|
|
|
|
const response = await this.ai.models.embedContent({
|
|
model: 'gemini-embedding-001',
|
|
contents: texts, // Array of strings
|
|
config: { taskType, outputDimensionality }
|
|
});
|
|
|
|
return response.embeddings.map(e => e.values);
|
|
}
|
|
}
|
|
|
|
// Example usage
|
|
async function main() {
|
|
try {
|
|
const apiKey = process.env.GEMINI_API_KEY;
|
|
if (!apiKey) {
|
|
throw new Error('GEMINI_API_KEY environment variable not set');
|
|
}
|
|
|
|
const service = new EmbeddingService(apiKey, {
|
|
requestsPerMinute: 100, // Free tier
|
|
maxRetries: 3
|
|
});
|
|
|
|
// Sample documents
|
|
const documents = [
|
|
"What is the meaning of life?",
|
|
"How does photosynthesis work?",
|
|
"Explain quantum mechanics in simple terms",
|
|
"What is the history of artificial intelligence?",
|
|
"How do neural networks learn?",
|
|
"What is the difference between machine learning and deep learning?",
|
|
"Explain the theory of relativity",
|
|
"What is climate change?",
|
|
"How does the human brain work?",
|
|
"What is the future of technology?"
|
|
];
|
|
|
|
console.log('🚀 Method 1: Sequential with rate limiting');
|
|
const embeddings1 = await service.embedBatch(documents, {
|
|
taskType: 'RETRIEVAL_DOCUMENT',
|
|
outputDimensionality: 768,
|
|
onProgress: (current, total) => {
|
|
// Optional: Update progress bar, database, etc.
|
|
}
|
|
});
|
|
|
|
console.log('\n🚀 Method 2: Batch API (single request)');
|
|
const startTime = Date.now();
|
|
const embeddings2 = await service.embedBatchAPI(documents, {
|
|
taskType: 'RETRIEVAL_DOCUMENT',
|
|
outputDimensionality: 768
|
|
});
|
|
const elapsed = (Date.now() - startTime) / 1000;
|
|
|
|
console.log(`✨ Completed in ${elapsed.toFixed(1)}s (${documents.length} texts in 1 request)\n`);
|
|
|
|
// Verify results
|
|
console.log('📈 Results:');
|
|
console.log(`Embeddings generated: ${embeddings2.length}`);
|
|
console.log(`Dimensions per embedding: ${embeddings2[0].length}`);
|
|
console.log(`Total vectors: ${embeddings2.length * embeddings2[0].length}`);
|
|
|
|
} catch (error: any) {
|
|
console.error('❌ Error:', error.message);
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
main();
|