Initial commit
This commit is contained in:
240
templates/batch-embeddings.ts
Normal file
240
templates/batch-embeddings.ts
Normal file
@@ -0,0 +1,240 @@
|
||||
/**
|
||||
* Batch Embeddings with Rate Limiting
|
||||
*
|
||||
* Demonstrates processing multiple texts with proper rate limiting
|
||||
* and exponential backoff for production use.
|
||||
*
|
||||
* Setup:
|
||||
* 1. npm install @google/genai@^1.27.0
|
||||
* 2. export GEMINI_API_KEY="your-api-key"
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx batch-embeddings.ts
|
||||
*/
|
||||
|
||||
import { GoogleGenAI } from "@google/genai";
|
||||
|
||||
interface RateLimitConfig {
|
||||
requestsPerMinute: number;
|
||||
maxRetries: number;
|
||||
initialDelayMs: number;
|
||||
}
|
||||
|
||||
class EmbeddingService {
|
||||
private ai: GoogleGenAI;
|
||||
private config: RateLimitConfig;
|
||||
private requestTimes: number[] = [];
|
||||
|
||||
constructor(apiKey: string, config?: Partial<RateLimitConfig>) {
|
||||
this.ai = new GoogleGenAI({ apiKey });
|
||||
this.config = {
|
||||
requestsPerMinute: config?.requestsPerMinute || 100, // Free tier limit
|
||||
maxRetries: config?.maxRetries || 3,
|
||||
initialDelayMs: config?.initialDelayMs || 1000
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait if needed to respect rate limits
|
||||
*/
|
||||
private async enforceRateLimit(): Promise<void> {
|
||||
const now = Date.now();
|
||||
const oneMinuteAgo = now - 60000;
|
||||
|
||||
// Remove requests older than 1 minute
|
||||
this.requestTimes = this.requestTimes.filter(time => time > oneMinuteAgo);
|
||||
|
||||
// If at limit, wait until oldest request expires
|
||||
if (this.requestTimes.length >= this.config.requestsPerMinute) {
|
||||
const oldestRequest = this.requestTimes[0];
|
||||
const waitTime = 60000 - (now - oldestRequest) + 100; // +100ms buffer
|
||||
|
||||
if (waitTime > 0) {
|
||||
console.log(`⏳ Rate limit reached. Waiting ${(waitTime / 1000).toFixed(1)}s...`);
|
||||
await new Promise(resolve => setTimeout(resolve, waitTime));
|
||||
}
|
||||
}
|
||||
|
||||
this.requestTimes.push(Date.now());
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embedding with retry logic
|
||||
*/
|
||||
async embedText(
|
||||
text: string,
|
||||
options: {
|
||||
taskType?: string;
|
||||
outputDimensionality?: number;
|
||||
} = {}
|
||||
): Promise<number[]> {
|
||||
const {
|
||||
taskType = 'SEMANTIC_SIMILARITY',
|
||||
outputDimensionality = 768
|
||||
} = options;
|
||||
|
||||
for (let attempt = 0; attempt < this.config.maxRetries; attempt++) {
|
||||
try {
|
||||
await this.enforceRateLimit();
|
||||
|
||||
const response = await this.ai.models.embedContent({
|
||||
model: 'gemini-embedding-001',
|
||||
content: text,
|
||||
config: { taskType, outputDimensionality }
|
||||
});
|
||||
|
||||
return response.embedding.values;
|
||||
|
||||
} catch (error: any) {
|
||||
const isLastAttempt = attempt === this.config.maxRetries - 1;
|
||||
|
||||
// Retry on rate limit errors
|
||||
if (error.status === 429 && !isLastAttempt) {
|
||||
const delay = this.config.initialDelayMs * Math.pow(2, attempt);
|
||||
console.log(`⚠️ Rate limit error. Retrying in ${delay / 1000}s... (attempt ${attempt + 1}/${this.config.maxRetries})`);
|
||||
await new Promise(resolve => setTimeout(resolve, delay));
|
||||
continue;
|
||||
}
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`Failed after ${this.config.maxRetries} retries`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Batch embed multiple texts
|
||||
*/
|
||||
async embedBatch(
|
||||
texts: string[],
|
||||
options: {
|
||||
taskType?: string;
|
||||
outputDimensionality?: number;
|
||||
onProgress?: (current: number, total: number) => void;
|
||||
} = {}
|
||||
): Promise<number[][]> {
|
||||
const {
|
||||
taskType = 'RETRIEVAL_DOCUMENT',
|
||||
outputDimensionality = 768,
|
||||
onProgress
|
||||
} = options;
|
||||
|
||||
console.log(`\n📊 Embedding ${texts.length} texts...`);
|
||||
console.log(`Rate limit: ${this.config.requestsPerMinute} RPM\n`);
|
||||
|
||||
const embeddings: number[][] = [];
|
||||
const startTime = Date.now();
|
||||
|
||||
for (let i = 0; i < texts.length; i++) {
|
||||
const text = texts[i];
|
||||
const embedding = await this.embedText(text, { taskType, outputDimensionality });
|
||||
embeddings.push(embedding);
|
||||
|
||||
if (onProgress) {
|
||||
onProgress(i + 1, texts.length);
|
||||
}
|
||||
|
||||
// Progress logging
|
||||
if ((i + 1) % 10 === 0 || i === texts.length - 1) {
|
||||
const elapsed = (Date.now() - startTime) / 1000;
|
||||
const rate = (i + 1) / elapsed;
|
||||
const remaining = texts.length - (i + 1);
|
||||
const eta = remaining / rate;
|
||||
|
||||
console.log(`✅ ${i + 1}/${texts.length} (${rate.toFixed(1)} texts/sec, ETA: ${eta.toFixed(1)}s)`);
|
||||
}
|
||||
}
|
||||
|
||||
const totalTime = (Date.now() - startTime) / 1000;
|
||||
console.log(`\n✨ Completed in ${totalTime.toFixed(1)}s (avg: ${(texts.length / totalTime).toFixed(1)} texts/sec)\n`);
|
||||
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Use batch API for multiple texts at once (more efficient)
|
||||
*/
|
||||
async embedBatchAPI(
|
||||
texts: string[],
|
||||
options: {
|
||||
taskType?: string;
|
||||
outputDimensionality?: number;
|
||||
} = {}
|
||||
): Promise<number[][]> {
|
||||
const {
|
||||
taskType = 'RETRIEVAL_DOCUMENT',
|
||||
outputDimensionality = 768
|
||||
} = options;
|
||||
|
||||
await this.enforceRateLimit();
|
||||
|
||||
const response = await this.ai.models.embedContent({
|
||||
model: 'gemini-embedding-001',
|
||||
contents: texts, // Array of strings
|
||||
config: { taskType, outputDimensionality }
|
||||
});
|
||||
|
||||
return response.embeddings.map(e => e.values);
|
||||
}
|
||||
}
|
||||
|
||||
// Example usage
|
||||
async function main() {
|
||||
try {
|
||||
const apiKey = process.env.GEMINI_API_KEY;
|
||||
if (!apiKey) {
|
||||
throw new Error('GEMINI_API_KEY environment variable not set');
|
||||
}
|
||||
|
||||
const service = new EmbeddingService(apiKey, {
|
||||
requestsPerMinute: 100, // Free tier
|
||||
maxRetries: 3
|
||||
});
|
||||
|
||||
// Sample documents
|
||||
const documents = [
|
||||
"What is the meaning of life?",
|
||||
"How does photosynthesis work?",
|
||||
"Explain quantum mechanics in simple terms",
|
||||
"What is the history of artificial intelligence?",
|
||||
"How do neural networks learn?",
|
||||
"What is the difference between machine learning and deep learning?",
|
||||
"Explain the theory of relativity",
|
||||
"What is climate change?",
|
||||
"How does the human brain work?",
|
||||
"What is the future of technology?"
|
||||
];
|
||||
|
||||
console.log('🚀 Method 1: Sequential with rate limiting');
|
||||
const embeddings1 = await service.embedBatch(documents, {
|
||||
taskType: 'RETRIEVAL_DOCUMENT',
|
||||
outputDimensionality: 768,
|
||||
onProgress: (current, total) => {
|
||||
// Optional: Update progress bar, database, etc.
|
||||
}
|
||||
});
|
||||
|
||||
console.log('\n🚀 Method 2: Batch API (single request)');
|
||||
const startTime = Date.now();
|
||||
const embeddings2 = await service.embedBatchAPI(documents, {
|
||||
taskType: 'RETRIEVAL_DOCUMENT',
|
||||
outputDimensionality: 768
|
||||
});
|
||||
const elapsed = (Date.now() - startTime) / 1000;
|
||||
|
||||
console.log(`✨ Completed in ${elapsed.toFixed(1)}s (${documents.length} texts in 1 request)\n`);
|
||||
|
||||
// Verify results
|
||||
console.log('📈 Results:');
|
||||
console.log(`Embeddings generated: ${embeddings2.length}`);
|
||||
console.log(`Dimensions per embedding: ${embeddings2[0].length}`);
|
||||
console.log(`Total vectors: ${embeddings2.length * embeddings2[0].length}`);
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('❌ Error:', error.message);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
Reference in New Issue
Block a user