Initial commit

2025-11-30 08:24:51 +08:00
commit 8aebb293cd
31 changed files with 7386 additions and 0 deletions
--- a/templates/multimodal-image.ts
+++ b/templates/multimodal-image.ts
@@ -0,0 +1,117 @@
+/**
+ * Multimodal Image Understanding with Gemini API
+ *
+ * Demonstrates:
+ * - Image analysis with vision capabilities
+ * - Base64 encoding of images
+ * - Combining text and image inputs
+ * - Multiple images in one request
+ *
+ * Prerequisites:
+ * - npm install @google/genai@1.27.0
+ * - export GEMINI_API_KEY="..."
+ */
+
+import { GoogleGenAI } from '@google/genai';
+import fs from 'fs';
+
+async function main() {
+  const ai = new GoogleGenAI({
+    apiKey: process.env.GEMINI_API_KEY,
+  });
+
+  try {
+    // Example 1: Analyze a single image
+    console.log('Example 1: Analyze Single Image\n');
+
+    // Load image from file
+    const imagePath = '/path/to/image.jpg'; // Replace with actual path
+    const imageData = fs.readFileSync(imagePath);
+    const base64Image = imageData.toString('base64');
+
+    const response1 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        {
+          parts: [
+            { text: 'Describe this image in detail. What objects, people, or scenes do you see?' },
+            {
+              inlineData: {
+                data: base64Image,
+                mimeType: 'image/jpeg' // or 'image/png', 'image/webp', etc.
+              }
+            }
+          ]
+        }
+      ]
+    });
+
+    console.log(response1.text);
+    console.log('\n---\n');
+
+    // Example 2: Compare two images
+    console.log('Example 2: Compare Two Images\n');
+
+    const imagePath2 = '/path/to/image2.jpg'; // Replace with actual path
+    const imageData2 = fs.readFileSync(imagePath2);
+    const base64Image2 = imageData2.toString('base64');
+
+    const response2 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        {
+          parts: [
+            { text: 'Compare these two images. What are the similarities and differences?' },
+            { inlineData: { data: base64Image, mimeType: 'image/jpeg' } },
+            { inlineData: { data: base64Image2, mimeType: 'image/jpeg' } }
+          ]
+        }
+      ]
+    });
+
+    console.log(response2.text);
+    console.log('\n---\n');
+
+    // Example 3: Specific questions about image
+    console.log('Example 3: Specific Questions\n');
+
+    const response3 = await ai.models.generateContent({
+      model: 'gemini-2.5-flash',
+      contents: [
+        {
+          parts: [
+            { text: 'How many people are in this image? What are they wearing?' },
+            { inlineData: { data: base64Image, mimeType: 'image/jpeg' } }
+          ]
+        }
+      ]
+    });
+
+    console.log(response3.text);
+
+  } catch (error: any) {
+    console.error('Error:', error.message);
+
+    if (error.message.includes('ENOENT')) {
+      console.error('\n⚠️ Image file not found. Update the imagePath variable with a valid path.');
+    }
+  }
+}
+
+/**
+ * Supported image formats:
+ * - JPEG (.jpg, .jpeg)
+ * - PNG (.png)
+ * - WebP (.webp)
+ * - HEIC (.heic)
+ * - HEIF (.heif)
+ *
+ * Max size: 20MB per image
+ *
+ * Tips:
+ * - Use specific, detailed prompts for better results
+ * - You can analyze multiple images in one request
+ * - gemini-2.5-flash and gemini-2.5-pro both support vision
+ */
+
+main();