Files
gh-jezweb-claude-skills-ski…/templates/multimodal-image.ts
2025-11-30 08:24:51 +08:00

118 lines
3.0 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Multimodal Image Understanding with Gemini API
*
* Demonstrates:
* - Image analysis with vision capabilities
* - Base64 encoding of images
* - Combining text and image inputs
* - Multiple images in one request
*
* Prerequisites:
* - npm install @google/genai@1.27.0
* - export GEMINI_API_KEY="..."
*/
import { GoogleGenAI } from '@google/genai';
import fs from 'fs';
async function main() {
const ai = new GoogleGenAI({
apiKey: process.env.GEMINI_API_KEY,
});
try {
// Example 1: Analyze a single image
console.log('Example 1: Analyze Single Image\n');
// Load image from file
const imagePath = '/path/to/image.jpg'; // Replace with actual path
const imageData = fs.readFileSync(imagePath);
const base64Image = imageData.toString('base64');
const response1 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: [
{
parts: [
{ text: 'Describe this image in detail. What objects, people, or scenes do you see?' },
{
inlineData: {
data: base64Image,
mimeType: 'image/jpeg' // or 'image/png', 'image/webp', etc.
}
}
]
}
]
});
console.log(response1.text);
console.log('\n---\n');
// Example 2: Compare two images
console.log('Example 2: Compare Two Images\n');
const imagePath2 = '/path/to/image2.jpg'; // Replace with actual path
const imageData2 = fs.readFileSync(imagePath2);
const base64Image2 = imageData2.toString('base64');
const response2 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: [
{
parts: [
{ text: 'Compare these two images. What are the similarities and differences?' },
{ inlineData: { data: base64Image, mimeType: 'image/jpeg' } },
{ inlineData: { data: base64Image2, mimeType: 'image/jpeg' } }
]
}
]
});
console.log(response2.text);
console.log('\n---\n');
// Example 3: Specific questions about image
console.log('Example 3: Specific Questions\n');
const response3 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: [
{
parts: [
{ text: 'How many people are in this image? What are they wearing?' },
{ inlineData: { data: base64Image, mimeType: 'image/jpeg' } }
]
}
]
});
console.log(response3.text);
} catch (error: any) {
console.error('Error:', error.message);
if (error.message.includes('ENOENT')) {
console.error('\n⚠ Image file not found. Update the imagePath variable with a valid path.');
}
}
}
/**
* Supported image formats:
* - JPEG (.jpg, .jpeg)
* - PNG (.png)
* - WebP (.webp)
* - HEIC (.heic)
* - HEIF (.heif)
*
* Max size: 20MB per image
*
* Tips:
* - Use specific, detailed prompts for better results
* - You can analyze multiple images in one request
* - gemini-2.5-flash and gemini-2.5-pro both support vision
*/
main();