Initial commit
This commit is contained in:
117
templates/multimodal-image.ts
Normal file
117
templates/multimodal-image.ts
Normal file
@@ -0,0 +1,117 @@
|
||||
/**
|
||||
* Multimodal Image Understanding with Gemini API
|
||||
*
|
||||
* Demonstrates:
|
||||
* - Image analysis with vision capabilities
|
||||
* - Base64 encoding of images
|
||||
* - Combining text and image inputs
|
||||
* - Multiple images in one request
|
||||
*
|
||||
* Prerequisites:
|
||||
* - npm install @google/genai@1.27.0
|
||||
* - export GEMINI_API_KEY="..."
|
||||
*/
|
||||
|
||||
import { GoogleGenAI } from '@google/genai';
|
||||
import fs from 'fs';
|
||||
|
||||
async function main() {
|
||||
const ai = new GoogleGenAI({
|
||||
apiKey: process.env.GEMINI_API_KEY,
|
||||
});
|
||||
|
||||
try {
|
||||
// Example 1: Analyze a single image
|
||||
console.log('Example 1: Analyze Single Image\n');
|
||||
|
||||
// Load image from file
|
||||
const imagePath = '/path/to/image.jpg'; // Replace with actual path
|
||||
const imageData = fs.readFileSync(imagePath);
|
||||
const base64Image = imageData.toString('base64');
|
||||
|
||||
const response1 = await ai.models.generateContent({
|
||||
model: 'gemini-2.5-flash',
|
||||
contents: [
|
||||
{
|
||||
parts: [
|
||||
{ text: 'Describe this image in detail. What objects, people, or scenes do you see?' },
|
||||
{
|
||||
inlineData: {
|
||||
data: base64Image,
|
||||
mimeType: 'image/jpeg' // or 'image/png', 'image/webp', etc.
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
console.log(response1.text);
|
||||
console.log('\n---\n');
|
||||
|
||||
// Example 2: Compare two images
|
||||
console.log('Example 2: Compare Two Images\n');
|
||||
|
||||
const imagePath2 = '/path/to/image2.jpg'; // Replace with actual path
|
||||
const imageData2 = fs.readFileSync(imagePath2);
|
||||
const base64Image2 = imageData2.toString('base64');
|
||||
|
||||
const response2 = await ai.models.generateContent({
|
||||
model: 'gemini-2.5-flash',
|
||||
contents: [
|
||||
{
|
||||
parts: [
|
||||
{ text: 'Compare these two images. What are the similarities and differences?' },
|
||||
{ inlineData: { data: base64Image, mimeType: 'image/jpeg' } },
|
||||
{ inlineData: { data: base64Image2, mimeType: 'image/jpeg' } }
|
||||
]
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
console.log(response2.text);
|
||||
console.log('\n---\n');
|
||||
|
||||
// Example 3: Specific questions about image
|
||||
console.log('Example 3: Specific Questions\n');
|
||||
|
||||
const response3 = await ai.models.generateContent({
|
||||
model: 'gemini-2.5-flash',
|
||||
contents: [
|
||||
{
|
||||
parts: [
|
||||
{ text: 'How many people are in this image? What are they wearing?' },
|
||||
{ inlineData: { data: base64Image, mimeType: 'image/jpeg' } }
|
||||
]
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
console.log(response3.text);
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('Error:', error.message);
|
||||
|
||||
if (error.message.includes('ENOENT')) {
|
||||
console.error('\n⚠️ Image file not found. Update the imagePath variable with a valid path.');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Supported image formats:
|
||||
* - JPEG (.jpg, .jpeg)
|
||||
* - PNG (.png)
|
||||
* - WebP (.webp)
|
||||
* - HEIC (.heic)
|
||||
* - HEIF (.heif)
|
||||
*
|
||||
* Max size: 20MB per image
|
||||
*
|
||||
* Tips:
|
||||
* - Use specific, detailed prompts for better results
|
||||
* - You can analyze multiple images in one request
|
||||
* - gemini-2.5-flash and gemini-2.5-pro both support vision
|
||||
*/
|
||||
|
||||
main();
|
||||
Reference in New Issue
Block a user