gh-jezweb-claude-skills-ski…/templates/multimodal-image.ts

/**
 * Multimodal Image Understanding with Gemini API
 *
 * Demonstrates:
 * - Image analysis with vision capabilities
 * - Base64 encoding of images
 * - Combining text and image inputs
 * - Multiple images in one request
 *
 * Prerequisites:
 * - npm install @google/genai@1.27.0
 * - export GEMINI_API_KEY="..."
 */

import { GoogleGenAI } from '@google/genai';
import fs from 'fs';

async function main() {
  const ai = new GoogleGenAI({
    apiKey: process.env.GEMINI_API_KEY,
  });

  try {
    // Example 1: Analyze a single image
    console.log('Example 1: Analyze Single Image\n');

    // Load image from file
    const imagePath = '/path/to/image.jpg'; // Replace with actual path
    const imageData = fs.readFileSync(imagePath);
    const base64Image = imageData.toString('base64');

    const response1 = await ai.models.generateContent({
      model: 'gemini-2.5-flash',
      contents: [
        {
          parts: [
            { text: 'Describe this image in detail. What objects, people, or scenes do you see?' },
            {
              inlineData: {
                data: base64Image,
                mimeType: 'image/jpeg' // or 'image/png', 'image/webp', etc.
              }
            }
          ]
        }
      ]
    });

    console.log(response1.text);
    console.log('\n---\n');

    // Example 2: Compare two images
    console.log('Example 2: Compare Two Images\n');

    const imagePath2 = '/path/to/image2.jpg'; // Replace with actual path
    const imageData2 = fs.readFileSync(imagePath2);
    const base64Image2 = imageData2.toString('base64');

    const response2 = await ai.models.generateContent({
      model: 'gemini-2.5-flash',
      contents: [
        {
          parts: [
            { text: 'Compare these two images. What are the similarities and differences?' },
            { inlineData: { data: base64Image, mimeType: 'image/jpeg' } },
            { inlineData: { data: base64Image2, mimeType: 'image/jpeg' } }
          ]
        }
      ]
    });

    console.log(response2.text);
    console.log('\n---\n');

    // Example 3: Specific questions about image
    console.log('Example 3: Specific Questions\n');

    const response3 = await ai.models.generateContent({
      model: 'gemini-2.5-flash',
      contents: [
        {
          parts: [
            { text: 'How many people are in this image? What are they wearing?' },
            { inlineData: { data: base64Image, mimeType: 'image/jpeg' } }
          ]
        }
      ]
    });

    console.log(response3.text);

  } catch (error: any) {
    console.error('Error:', error.message);

    if (error.message.includes('ENOENT')) {
      console.error('\n⚠️ Image file not found. Update the imagePath variable with a valid path.');
    }
  }
}

/**
 * Supported image formats:
 * - JPEG (.jpg, .jpeg)
 * - PNG (.png)
 * - WebP (.webp)
 * - HEIC (.heic)
 * - HEIF (.heif)
 *
 * Max size: 20MB per image
 *
 * Tips:
 * - Use specific, detailed prompts for better results
 * - You can analyze multiple images in one request
 * - gemini-2.5-flash and gemini-2.5-pro both support vision
 */

main();