118 lines
3.0 KiB
TypeScript
118 lines
3.0 KiB
TypeScript
/**
|
||
* Multimodal Image Understanding with Gemini API
|
||
*
|
||
* Demonstrates:
|
||
* - Image analysis with vision capabilities
|
||
* - Base64 encoding of images
|
||
* - Combining text and image inputs
|
||
* - Multiple images in one request
|
||
*
|
||
* Prerequisites:
|
||
* - npm install @google/genai@1.27.0
|
||
* - export GEMINI_API_KEY="..."
|
||
*/
|
||
|
||
import { GoogleGenAI } from '@google/genai';
|
||
import fs from 'fs';
|
||
|
||
async function main() {
|
||
const ai = new GoogleGenAI({
|
||
apiKey: process.env.GEMINI_API_KEY,
|
||
});
|
||
|
||
try {
|
||
// Example 1: Analyze a single image
|
||
console.log('Example 1: Analyze Single Image\n');
|
||
|
||
// Load image from file
|
||
const imagePath = '/path/to/image.jpg'; // Replace with actual path
|
||
const imageData = fs.readFileSync(imagePath);
|
||
const base64Image = imageData.toString('base64');
|
||
|
||
const response1 = await ai.models.generateContent({
|
||
model: 'gemini-2.5-flash',
|
||
contents: [
|
||
{
|
||
parts: [
|
||
{ text: 'Describe this image in detail. What objects, people, or scenes do you see?' },
|
||
{
|
||
inlineData: {
|
||
data: base64Image,
|
||
mimeType: 'image/jpeg' // or 'image/png', 'image/webp', etc.
|
||
}
|
||
}
|
||
]
|
||
}
|
||
]
|
||
});
|
||
|
||
console.log(response1.text);
|
||
console.log('\n---\n');
|
||
|
||
// Example 2: Compare two images
|
||
console.log('Example 2: Compare Two Images\n');
|
||
|
||
const imagePath2 = '/path/to/image2.jpg'; // Replace with actual path
|
||
const imageData2 = fs.readFileSync(imagePath2);
|
||
const base64Image2 = imageData2.toString('base64');
|
||
|
||
const response2 = await ai.models.generateContent({
|
||
model: 'gemini-2.5-flash',
|
||
contents: [
|
||
{
|
||
parts: [
|
||
{ text: 'Compare these two images. What are the similarities and differences?' },
|
||
{ inlineData: { data: base64Image, mimeType: 'image/jpeg' } },
|
||
{ inlineData: { data: base64Image2, mimeType: 'image/jpeg' } }
|
||
]
|
||
}
|
||
]
|
||
});
|
||
|
||
console.log(response2.text);
|
||
console.log('\n---\n');
|
||
|
||
// Example 3: Specific questions about image
|
||
console.log('Example 3: Specific Questions\n');
|
||
|
||
const response3 = await ai.models.generateContent({
|
||
model: 'gemini-2.5-flash',
|
||
contents: [
|
||
{
|
||
parts: [
|
||
{ text: 'How many people are in this image? What are they wearing?' },
|
||
{ inlineData: { data: base64Image, mimeType: 'image/jpeg' } }
|
||
]
|
||
}
|
||
]
|
||
});
|
||
|
||
console.log(response3.text);
|
||
|
||
} catch (error: any) {
|
||
console.error('Error:', error.message);
|
||
|
||
if (error.message.includes('ENOENT')) {
|
||
console.error('\n⚠️ Image file not found. Update the imagePath variable with a valid path.');
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Supported image formats:
|
||
* - JPEG (.jpg, .jpeg)
|
||
* - PNG (.png)
|
||
* - WebP (.webp)
|
||
* - HEIC (.heic)
|
||
* - HEIF (.heif)
|
||
*
|
||
* Max size: 20MB per image
|
||
*
|
||
* Tips:
|
||
* - Use specific, detailed prompts for better results
|
||
* - You can analyze multiple images in one request
|
||
* - gemini-2.5-flash and gemini-2.5-pro both support vision
|
||
*/
|
||
|
||
main();
|