Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:24:51 +08:00
commit 8aebb293cd
31 changed files with 7386 additions and 0 deletions

View File

@@ -0,0 +1,117 @@
/**
* Multimodal Image Understanding with Gemini API
*
* Demonstrates:
* - Image analysis with vision capabilities
* - Base64 encoding of images
* - Combining text and image inputs
* - Multiple images in one request
*
* Prerequisites:
* - npm install @google/genai@1.27.0
* - export GEMINI_API_KEY="..."
*/
import { GoogleGenAI } from '@google/genai';
import fs from 'fs';
async function main() {
const ai = new GoogleGenAI({
apiKey: process.env.GEMINI_API_KEY,
});
try {
// Example 1: Analyze a single image
console.log('Example 1: Analyze Single Image\n');
// Load image from file
const imagePath = '/path/to/image.jpg'; // Replace with actual path
const imageData = fs.readFileSync(imagePath);
const base64Image = imageData.toString('base64');
const response1 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: [
{
parts: [
{ text: 'Describe this image in detail. What objects, people, or scenes do you see?' },
{
inlineData: {
data: base64Image,
mimeType: 'image/jpeg' // or 'image/png', 'image/webp', etc.
}
}
]
}
]
});
console.log(response1.text);
console.log('\n---\n');
// Example 2: Compare two images
console.log('Example 2: Compare Two Images\n');
const imagePath2 = '/path/to/image2.jpg'; // Replace with actual path
const imageData2 = fs.readFileSync(imagePath2);
const base64Image2 = imageData2.toString('base64');
const response2 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: [
{
parts: [
{ text: 'Compare these two images. What are the similarities and differences?' },
{ inlineData: { data: base64Image, mimeType: 'image/jpeg' } },
{ inlineData: { data: base64Image2, mimeType: 'image/jpeg' } }
]
}
]
});
console.log(response2.text);
console.log('\n---\n');
// Example 3: Specific questions about image
console.log('Example 3: Specific Questions\n');
const response3 = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: [
{
parts: [
{ text: 'How many people are in this image? What are they wearing?' },
{ inlineData: { data: base64Image, mimeType: 'image/jpeg' } }
]
}
]
});
console.log(response3.text);
} catch (error: any) {
console.error('Error:', error.message);
if (error.message.includes('ENOENT')) {
console.error('\n⚠ Image file not found. Update the imagePath variable with a valid path.');
}
}
}
/**
* Supported image formats:
* - JPEG (.jpg, .jpeg)
* - PNG (.png)
* - WebP (.webp)
* - HEIC (.heic)
* - HEIF (.heif)
*
* Max size: 20MB per image
*
* Tips:
* - Use specific, detailed prompts for better results
* - You can analyze multiple images in one request
* - gemini-2.5-flash and gemini-2.5-pro both support vision
*/
main();