Initial commit

2025-11-30 08:24:54 +08:00
commit 7927519669
17 changed files with 4377 additions and 0 deletions
--- a/templates/batch-embeddings.ts
+++ b/templates/batch-embeddings.ts
@@ -0,0 +1,240 @@
+/**
+ * Batch Embeddings with Rate Limiting
+ *
+ * Demonstrates processing multiple texts with proper rate limiting
+ * and exponential backoff for production use.
+ *
+ * Setup:
+ * 1. npm install @google/genai@^1.27.0
+ * 2. export GEMINI_API_KEY="your-api-key"
+ *
+ * Usage:
+ * npx tsx batch-embeddings.ts
+ */
+
+import { GoogleGenAI } from "@google/genai";
+
+interface RateLimitConfig {
+  requestsPerMinute: number;
+  maxRetries: number;
+  initialDelayMs: number;
+}
+
+class EmbeddingService {
+  private ai: GoogleGenAI;
+  private config: RateLimitConfig;
+  private requestTimes: number[] = [];
+
+  constructor(apiKey: string, config?: Partial<RateLimitConfig>) {
+    this.ai = new GoogleGenAI({ apiKey });
+    this.config = {
+      requestsPerMinute: config?.requestsPerMinute || 100, // Free tier limit
+      maxRetries: config?.maxRetries || 3,
+      initialDelayMs: config?.initialDelayMs || 1000
+    };
+  }
+
+  /**
+   * Wait if needed to respect rate limits
+   */
+  private async enforceRateLimit(): Promise<void> {
+    const now = Date.now();
+    const oneMinuteAgo = now - 60000;
+
+    // Remove requests older than 1 minute
+    this.requestTimes = this.requestTimes.filter(time => time > oneMinuteAgo);
+
+    // If at limit, wait until oldest request expires
+    if (this.requestTimes.length >= this.config.requestsPerMinute) {
+      const oldestRequest = this.requestTimes[0];
+      const waitTime = 60000 - (now - oldestRequest) + 100; // +100ms buffer
+
+      if (waitTime > 0) {
+        console.log(`⏳ Rate limit reached. Waiting ${(waitTime / 1000).toFixed(1)}s...`);
+        await new Promise(resolve => setTimeout(resolve, waitTime));
+      }
+    }
+
+    this.requestTimes.push(Date.now());
+  }
+
+  /**
+   * Generate embedding with retry logic
+   */
+  async embedText(
+    text: string,
+    options: {
+      taskType?: string;
+      outputDimensionality?: number;
+    } = {}
+  ): Promise<number[]> {
+    const {
+      taskType = 'SEMANTIC_SIMILARITY',
+      outputDimensionality = 768
+    } = options;
+
+    for (let attempt = 0; attempt < this.config.maxRetries; attempt++) {
+      try {
+        await this.enforceRateLimit();
+
+        const response = await this.ai.models.embedContent({
+          model: 'gemini-embedding-001',
+          content: text,
+          config: { taskType, outputDimensionality }
+        });
+
+        return response.embedding.values;
+
+      } catch (error: any) {
+        const isLastAttempt = attempt === this.config.maxRetries - 1;
+
+        // Retry on rate limit errors
+        if (error.status === 429 && !isLastAttempt) {
+          const delay = this.config.initialDelayMs * Math.pow(2, attempt);
+          console.log(`⚠️ Rate limit error. Retrying in ${delay / 1000}s... (attempt ${attempt + 1}/${this.config.maxRetries})`);
+          await new Promise(resolve => setTimeout(resolve, delay));
+          continue;
+        }
+
+        throw error;
+      }
+    }
+
+    throw new Error(`Failed after ${this.config.maxRetries} retries`);
+  }
+
+  /**
+   * Batch embed multiple texts
+   */
+  async embedBatch(
+    texts: string[],
+    options: {
+      taskType?: string;
+      outputDimensionality?: number;
+      onProgress?: (current: number, total: number) => void;
+    } = {}
+  ): Promise<number[][]> {
+    const {
+      taskType = 'RETRIEVAL_DOCUMENT',
+      outputDimensionality = 768,
+      onProgress
+    } = options;
+
+    console.log(`\n📊 Embedding ${texts.length} texts...`);
+    console.log(`Rate limit: ${this.config.requestsPerMinute} RPM\n`);
+
+    const embeddings: number[][] = [];
+    const startTime = Date.now();
+
+    for (let i = 0; i < texts.length; i++) {
+      const text = texts[i];
+      const embedding = await this.embedText(text, { taskType, outputDimensionality });
+      embeddings.push(embedding);
+
+      if (onProgress) {
+        onProgress(i + 1, texts.length);
+      }
+
+      // Progress logging
+      if ((i + 1) % 10 === 0 || i === texts.length - 1) {
+        const elapsed = (Date.now() - startTime) / 1000;
+        const rate = (i + 1) / elapsed;
+        const remaining = texts.length - (i + 1);
+        const eta = remaining / rate;
+
+        console.log(`✅ ${i + 1}/${texts.length} (${rate.toFixed(1)} texts/sec, ETA: ${eta.toFixed(1)}s)`);
+      }
+    }
+
+    const totalTime = (Date.now() - startTime) / 1000;
+    console.log(`\n✨ Completed in ${totalTime.toFixed(1)}s (avg: ${(texts.length / totalTime).toFixed(1)} texts/sec)\n`);
+
+    return embeddings;
+  }
+
+  /**
+   * Use batch API for multiple texts at once (more efficient)
+   */
+  async embedBatchAPI(
+    texts: string[],
+    options: {
+      taskType?: string;
+      outputDimensionality?: number;
+    } = {}
+  ): Promise<number[][]> {
+    const {
+      taskType = 'RETRIEVAL_DOCUMENT',
+      outputDimensionality = 768
+    } = options;
+
+    await this.enforceRateLimit();
+
+    const response = await this.ai.models.embedContent({
+      model: 'gemini-embedding-001',
+      contents: texts, // Array of strings
+      config: { taskType, outputDimensionality }
+    });
+
+    return response.embeddings.map(e => e.values);
+  }
+}
+
+// Example usage
+async function main() {
+  try {
+    const apiKey = process.env.GEMINI_API_KEY;
+    if (!apiKey) {
+      throw new Error('GEMINI_API_KEY environment variable not set');
+    }
+
+    const service = new EmbeddingService(apiKey, {
+      requestsPerMinute: 100, // Free tier
+      maxRetries: 3
+    });
+
+    // Sample documents
+    const documents = [
+      "What is the meaning of life?",
+      "How does photosynthesis work?",
+      "Explain quantum mechanics in simple terms",
+      "What is the history of artificial intelligence?",
+      "How do neural networks learn?",
+      "What is the difference between machine learning and deep learning?",
+      "Explain the theory of relativity",
+      "What is climate change?",
+      "How does the human brain work?",
+      "What is the future of technology?"
+    ];
+
+    console.log('🚀 Method 1: Sequential with rate limiting');
+    const embeddings1 = await service.embedBatch(documents, {
+      taskType: 'RETRIEVAL_DOCUMENT',
+      outputDimensionality: 768,
+      onProgress: (current, total) => {
+        // Optional: Update progress bar, database, etc.
+      }
+    });
+
+    console.log('\n🚀 Method 2: Batch API (single request)');
+    const startTime = Date.now();
+    const embeddings2 = await service.embedBatchAPI(documents, {
+      taskType: 'RETRIEVAL_DOCUMENT',
+      outputDimensionality: 768
+    });
+    const elapsed = (Date.now() - startTime) / 1000;
+
+    console.log(`✨ Completed in ${elapsed.toFixed(1)}s (${documents.length} texts in 1 request)\n`);
+
+    // Verify results
+    console.log('📈 Results:');
+    console.log(`Embeddings generated: ${embeddings2.length}`);
+    console.log(`Dimensions per embedding: ${embeddings2[0].length}`);
+    console.log(`Total vectors: ${embeddings2.length * embeddings2[0].length}`);
+
+  } catch (error: any) {
+    console.error('❌ Error:', error.message);
+    process.exit(1);
+  }
+}
+
+main();