gh-jezweb-claude-skills-ski…/templates/batch-embeddings.ts

/**
 * Batch Embeddings with Rate Limiting
 *
 * Demonstrates processing multiple texts with proper rate limiting
 * and exponential backoff for production use.
 *
 * Setup:
 * 1. npm install @google/genai@^1.27.0
 * 2. export GEMINI_API_KEY="your-api-key"
 *
 * Usage:
 * npx tsx batch-embeddings.ts
 */

import { GoogleGenAI } from "@google/genai";

interface RateLimitConfig {
  requestsPerMinute: number;
  maxRetries: number;
  initialDelayMs: number;
}

class EmbeddingService {
  private ai: GoogleGenAI;
  private config: RateLimitConfig;
  private requestTimes: number[] = [];

  constructor(apiKey: string, config?: Partial<RateLimitConfig>) {
    this.ai = new GoogleGenAI({ apiKey });
    this.config = {
      requestsPerMinute: config?.requestsPerMinute || 100, // Free tier limit
      maxRetries: config?.maxRetries || 3,
      initialDelayMs: config?.initialDelayMs || 1000
    };
  }

  /**
   * Wait if needed to respect rate limits
   */
  private async enforceRateLimit(): Promise<void> {
    const now = Date.now();
    const oneMinuteAgo = now - 60000;

    // Remove requests older than 1 minute
    this.requestTimes = this.requestTimes.filter(time => time > oneMinuteAgo);

    // If at limit, wait until oldest request expires
    if (this.requestTimes.length >= this.config.requestsPerMinute) {
      const oldestRequest = this.requestTimes[0];
      const waitTime = 60000 - (now - oldestRequest) + 100; // +100ms buffer

      if (waitTime > 0) {
        console.log(`⏳ Rate limit reached. Waiting ${(waitTime / 1000).toFixed(1)}s...`);
        await new Promise(resolve => setTimeout(resolve, waitTime));
      }
    }

    this.requestTimes.push(Date.now());
  }

  /**
   * Generate embedding with retry logic
   */
  async embedText(
    text: string,
    options: {
      taskType?: string;
      outputDimensionality?: number;
    } = {}
  ): Promise<number[]> {
    const {
      taskType = 'SEMANTIC_SIMILARITY',
      outputDimensionality = 768
    } = options;

    for (let attempt = 0; attempt < this.config.maxRetries; attempt++) {
      try {
        await this.enforceRateLimit();

        const response = await this.ai.models.embedContent({
          model: 'gemini-embedding-001',
          content: text,
          config: { taskType, outputDimensionality }
        });

        return response.embedding.values;

      } catch (error: any) {
        const isLastAttempt = attempt === this.config.maxRetries - 1;

        // Retry on rate limit errors
        if (error.status === 429 && !isLastAttempt) {
          const delay = this.config.initialDelayMs * Math.pow(2, attempt);
          console.log(`⚠️ Rate limit error. Retrying in ${delay / 1000}s... (attempt ${attempt + 1}/${this.config.maxRetries})`);
          await new Promise(resolve => setTimeout(resolve, delay));
          continue;
        }

        throw error;
      }
    }

    throw new Error(`Failed after ${this.config.maxRetries} retries`);
  }

  /**
   * Batch embed multiple texts
   */
  async embedBatch(
    texts: string[],
    options: {
      taskType?: string;
      outputDimensionality?: number;
      onProgress?: (current: number, total: number) => void;
    } = {}
  ): Promise<number[][]> {
    const {
      taskType = 'RETRIEVAL_DOCUMENT',
      outputDimensionality = 768,
      onProgress
    } = options;

    console.log(`\n📊 Embedding ${texts.length} texts...`);
    console.log(`Rate limit: ${this.config.requestsPerMinute} RPM\n`);

    const embeddings: number[][] = [];
    const startTime = Date.now();

    for (let i = 0; i < texts.length; i++) {
      const text = texts[i];
      const embedding = await this.embedText(text, { taskType, outputDimensionality });
      embeddings.push(embedding);

      if (onProgress) {
        onProgress(i + 1, texts.length);
      }

      // Progress logging
      if ((i + 1) % 10 === 0 || i === texts.length - 1) {
        const elapsed = (Date.now() - startTime) / 1000;
        const rate = (i + 1) / elapsed;
        const remaining = texts.length - (i + 1);
        const eta = remaining / rate;

        console.log(`✅ ${i + 1}/${texts.length} (${rate.toFixed(1)} texts/sec, ETA: ${eta.toFixed(1)}s)`);
      }
    }

    const totalTime = (Date.now() - startTime) / 1000;
    console.log(`\n✨ Completed in ${totalTime.toFixed(1)}s (avg: ${(texts.length / totalTime).toFixed(1)} texts/sec)\n`);

    return embeddings;
  }

  /**
   * Use batch API for multiple texts at once (more efficient)
   */
  async embedBatchAPI(
    texts: string[],
    options: {
      taskType?: string;
      outputDimensionality?: number;
    } = {}
  ): Promise<number[][]> {
    const {
      taskType = 'RETRIEVAL_DOCUMENT',
      outputDimensionality = 768
    } = options;

    await this.enforceRateLimit();

    const response = await this.ai.models.embedContent({
      model: 'gemini-embedding-001',
      contents: texts, // Array of strings
      config: { taskType, outputDimensionality }
    });

    return response.embeddings.map(e => e.values);
  }
}

// Example usage
async function main() {
  try {
    const apiKey = process.env.GEMINI_API_KEY;
    if (!apiKey) {
      throw new Error('GEMINI_API_KEY environment variable not set');
    }

    const service = new EmbeddingService(apiKey, {
      requestsPerMinute: 100, // Free tier
      maxRetries: 3
    });

    // Sample documents
    const documents = [
      "What is the meaning of life?",
      "How does photosynthesis work?",
      "Explain quantum mechanics in simple terms",
      "What is the history of artificial intelligence?",
      "How do neural networks learn?",
      "What is the difference between machine learning and deep learning?",
      "Explain the theory of relativity",
      "What is climate change?",
      "How does the human brain work?",
      "What is the future of technology?"
    ];

    console.log('🚀 Method 1: Sequential with rate limiting');
    const embeddings1 = await service.embedBatch(documents, {
      taskType: 'RETRIEVAL_DOCUMENT',
      outputDimensionality: 768,
      onProgress: (current, total) => {
        // Optional: Update progress bar, database, etc.
      }
    });

    console.log('\n🚀 Method 2: Batch API (single request)');
    const startTime = Date.now();
    const embeddings2 = await service.embedBatchAPI(documents, {
      taskType: 'RETRIEVAL_DOCUMENT',
      outputDimensionality: 768
    });
    const elapsed = (Date.now() - startTime) / 1000;

    console.log(`✨ Completed in ${elapsed.toFixed(1)}s (${documents.length} texts in 1 request)\n`);

    // Verify results
    console.log('📈 Results:');
    console.log(`Embeddings generated: ${embeddings2.length}`);
    console.log(`Dimensions per embedding: ${embeddings2[0].length}`);
    console.log(`Total vectors: ${embeddings2.length * embeddings2[0].length}`);

  } catch (error: any) {
    console.error('❌ Error:', error.message);
    process.exit(1);
  }
}

main();