444 lines
12 KiB
TypeScript
444 lines
12 KiB
TypeScript
/**
|
|
* OpenAI Vision API - GPT-4o Image Understanding
|
|
*
|
|
* This template demonstrates:
|
|
* - Image via URL
|
|
* - Image via base64
|
|
* - Multiple images in one request
|
|
* - Detailed image analysis
|
|
* - OCR / text extraction
|
|
* - Object detection
|
|
*/
|
|
|
|
import OpenAI from 'openai';
|
|
import fs from 'fs';
|
|
|
|
const openai = new OpenAI({
|
|
apiKey: process.env.OPENAI_API_KEY,
|
|
});
|
|
|
|
// =============================================================================
|
|
// IMAGE VIA URL
|
|
// =============================================================================
|
|
|
|
async function imageViaUrl() {
|
|
const completion = await openai.chat.completions.create({
|
|
model: 'gpt-4o',
|
|
messages: [
|
|
{
|
|
role: 'user',
|
|
content: [
|
|
{ type: 'text', text: 'What is in this image?' },
|
|
{
|
|
type: 'image_url',
|
|
image_url: {
|
|
url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg',
|
|
},
|
|
},
|
|
],
|
|
},
|
|
],
|
|
});
|
|
|
|
console.log('Image description:', completion.choices[0].message.content);
|
|
|
|
return completion.choices[0].message.content;
|
|
}
|
|
|
|
// =============================================================================
|
|
// IMAGE VIA BASE64
|
|
// =============================================================================
|
|
|
|
async function imageViaBase64() {
|
|
// Read image file
|
|
const imageBuffer = fs.readFileSync('./image.jpg');
|
|
const base64Image = imageBuffer.toString('base64');
|
|
|
|
const completion = await openai.chat.completions.create({
|
|
model: 'gpt-4o',
|
|
messages: [
|
|
{
|
|
role: 'user',
|
|
content: [
|
|
{ type: 'text', text: 'Describe this image in detail' },
|
|
{
|
|
type: 'image_url',
|
|
image_url: {
|
|
url: `data:image/jpeg;base64,${base64Image}`,
|
|
},
|
|
},
|
|
],
|
|
},
|
|
],
|
|
});
|
|
|
|
console.log('Description:', completion.choices[0].message.content);
|
|
|
|
return completion.choices[0].message.content;
|
|
}
|
|
|
|
// =============================================================================
|
|
// MULTIPLE IMAGES
|
|
// =============================================================================
|
|
|
|
async function multipleImages() {
|
|
const completion = await openai.chat.completions.create({
|
|
model: 'gpt-4o',
|
|
messages: [
|
|
{
|
|
role: 'user',
|
|
content: [
|
|
{ type: 'text', text: 'Compare these two images. What are the differences?' },
|
|
{
|
|
type: 'image_url',
|
|
image_url: {
|
|
url: 'https://example.com/image1.jpg',
|
|
},
|
|
},
|
|
{
|
|
type: 'image_url',
|
|
image_url: {
|
|
url: 'https://example.com/image2.jpg',
|
|
},
|
|
},
|
|
],
|
|
},
|
|
],
|
|
});
|
|
|
|
console.log('Comparison:', completion.choices[0].message.content);
|
|
|
|
return completion.choices[0].message.content;
|
|
}
|
|
|
|
// =============================================================================
|
|
// DETAILED IMAGE ANALYSIS
|
|
// =============================================================================
|
|
|
|
async function detailedAnalysis(imageUrl: string) {
|
|
const completion = await openai.chat.completions.create({
|
|
model: 'gpt-4o',
|
|
messages: [
|
|
{
|
|
role: 'system',
|
|
content: 'You are an expert image analyst. Provide detailed, structured analysis of images.',
|
|
},
|
|
{
|
|
role: 'user',
|
|
content: [
|
|
{
|
|
type: 'text',
|
|
text: `Analyze this image in detail. Include:
|
|
1. Main subject/objects
|
|
2. Colors and composition
|
|
3. Lighting and mood
|
|
4. Background elements
|
|
5. Any text visible
|
|
6. Estimated context/setting`,
|
|
},
|
|
{
|
|
type: 'image_url',
|
|
image_url: { url: imageUrl },
|
|
},
|
|
],
|
|
},
|
|
],
|
|
});
|
|
|
|
console.log('Detailed analysis:', completion.choices[0].message.content);
|
|
|
|
return completion.choices[0].message.content;
|
|
}
|
|
|
|
// =============================================================================
|
|
// OCR / TEXT EXTRACTION
|
|
// =============================================================================
|
|
|
|
async function extractText(imageUrl: string) {
|
|
const completion = await openai.chat.completions.create({
|
|
model: 'gpt-4o',
|
|
messages: [
|
|
{
|
|
role: 'user',
|
|
content: [
|
|
{ type: 'text', text: 'Extract all text visible in this image' },
|
|
{
|
|
type: 'image_url',
|
|
image_url: { url: imageUrl },
|
|
},
|
|
],
|
|
},
|
|
],
|
|
});
|
|
|
|
console.log('Extracted text:', completion.choices[0].message.content);
|
|
|
|
return completion.choices[0].message.content;
|
|
}
|
|
|
|
// =============================================================================
|
|
// OBJECT DETECTION
|
|
// =============================================================================
|
|
|
|
async function detectObjects(imageUrl: string) {
|
|
const completion = await openai.chat.completions.create({
|
|
model: 'gpt-4o',
|
|
messages: [
|
|
{
|
|
role: 'user',
|
|
content: [
|
|
{ type: 'text', text: 'List all objects visible in this image with their approximate locations' },
|
|
{
|
|
type: 'image_url',
|
|
image_url: { url: imageUrl },
|
|
},
|
|
],
|
|
},
|
|
],
|
|
});
|
|
|
|
console.log('Objects detected:', completion.choices[0].message.content);
|
|
|
|
return completion.choices[0].message.content;
|
|
}
|
|
|
|
// =============================================================================
|
|
// IMAGE CLASSIFICATION
|
|
// =============================================================================
|
|
|
|
async function classifyImage(imageUrl: string) {
|
|
const completion = await openai.chat.completions.create({
|
|
model: 'gpt-4o',
|
|
messages: [
|
|
{
|
|
role: 'user',
|
|
content: [
|
|
{
|
|
type: 'text',
|
|
text: 'Classify this image into categories: nature, urban, people, objects, abstract, other',
|
|
},
|
|
{
|
|
type: 'image_url',
|
|
image_url: { url: imageUrl },
|
|
},
|
|
],
|
|
},
|
|
],
|
|
});
|
|
|
|
console.log('Classification:', completion.choices[0].message.content);
|
|
|
|
return completion.choices[0].message.content;
|
|
}
|
|
|
|
// =============================================================================
|
|
// STRUCTURED OUTPUT WITH VISION
|
|
// =============================================================================
|
|
|
|
async function structuredVisionOutput(imageUrl: string) {
|
|
const completion = await openai.chat.completions.create({
|
|
model: 'gpt-4o',
|
|
messages: [
|
|
{
|
|
role: 'user',
|
|
content: [
|
|
{ type: 'text', text: 'Analyze this image and return structured data' },
|
|
{
|
|
type: 'image_url',
|
|
image_url: { url: imageUrl },
|
|
},
|
|
],
|
|
},
|
|
],
|
|
response_format: {
|
|
type: 'json_schema',
|
|
json_schema: {
|
|
name: 'image_analysis',
|
|
strict: true,
|
|
schema: {
|
|
type: 'object',
|
|
properties: {
|
|
main_subject: { type: 'string' },
|
|
objects: {
|
|
type: 'array',
|
|
items: { type: 'string' },
|
|
},
|
|
colors: {
|
|
type: 'array',
|
|
items: { type: 'string' },
|
|
},
|
|
mood: { type: 'string' },
|
|
setting: { type: 'string' },
|
|
has_text: { type: 'boolean' },
|
|
},
|
|
required: ['main_subject', 'objects', 'colors', 'mood', 'setting', 'has_text'],
|
|
additionalProperties: false,
|
|
},
|
|
},
|
|
},
|
|
});
|
|
|
|
const analysis = JSON.parse(completion.choices[0].message.content!);
|
|
console.log('Structured analysis:', JSON.stringify(analysis, null, 2));
|
|
|
|
return analysis;
|
|
}
|
|
|
|
// =============================================================================
|
|
// MULTI-TURN CONVERSATION WITH VISION
|
|
// =============================================================================
|
|
|
|
async function conversationWithVision() {
|
|
const messages: any[] = [
|
|
{
|
|
role: 'user',
|
|
content: [
|
|
{ type: 'text', text: 'What is in this image?' },
|
|
{
|
|
type: 'image_url',
|
|
image_url: {
|
|
url: 'https://example.com/image.jpg',
|
|
},
|
|
},
|
|
],
|
|
},
|
|
];
|
|
|
|
// First turn
|
|
const response1 = await openai.chat.completions.create({
|
|
model: 'gpt-4o',
|
|
messages,
|
|
});
|
|
|
|
console.log('Turn 1:', response1.choices[0].message.content);
|
|
messages.push(response1.choices[0].message);
|
|
|
|
// Follow-up question
|
|
messages.push({
|
|
role: 'user',
|
|
content: 'Can you describe the colors in more detail?',
|
|
});
|
|
|
|
const response2 = await openai.chat.completions.create({
|
|
model: 'gpt-4o',
|
|
messages,
|
|
});
|
|
|
|
console.log('Turn 2:', response2.choices[0].message.content);
|
|
|
|
return messages;
|
|
}
|
|
|
|
// =============================================================================
|
|
// BATCH IMAGE ANALYSIS
|
|
// =============================================================================
|
|
|
|
async function batchAnalysis(imageUrls: string[]) {
|
|
const results = [];
|
|
|
|
for (const url of imageUrls) {
|
|
console.log(`Analyzing: ${url}`);
|
|
|
|
const completion = await openai.chat.completions.create({
|
|
model: 'gpt-4o',
|
|
messages: [
|
|
{
|
|
role: 'user',
|
|
content: [
|
|
{ type: 'text', text: 'Briefly describe this image' },
|
|
{ type: 'image_url', image_url: { url } },
|
|
],
|
|
},
|
|
],
|
|
});
|
|
|
|
results.push({
|
|
url,
|
|
description: completion.choices[0].message.content,
|
|
});
|
|
|
|
// Rate limit protection
|
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
}
|
|
|
|
console.log(`Analyzed ${results.length} images`);
|
|
return results;
|
|
}
|
|
|
|
// =============================================================================
|
|
// ERROR HANDLING
|
|
// =============================================================================
|
|
|
|
async function withErrorHandling(imageUrl: string) {
|
|
try {
|
|
const completion = await openai.chat.completions.create({
|
|
model: 'gpt-4o',
|
|
messages: [
|
|
{
|
|
role: 'user',
|
|
content: [
|
|
{ type: 'text', text: 'What is in this image?' },
|
|
{ type: 'image_url', image_url: { url: imageUrl } },
|
|
],
|
|
},
|
|
],
|
|
});
|
|
|
|
return completion.choices[0].message.content;
|
|
} catch (error: any) {
|
|
if (error.message.includes('invalid image')) {
|
|
console.error('Image URL is invalid or inaccessible');
|
|
} else if (error.message.includes('base64')) {
|
|
console.error('Base64 encoding error');
|
|
} else if (error.status === 429) {
|
|
console.error('Rate limit exceeded');
|
|
} else {
|
|
console.error('Vision API error:', error.message);
|
|
}
|
|
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// MAIN EXECUTION
|
|
// =============================================================================
|
|
|
|
async function main() {
|
|
console.log('=== OpenAI Vision (GPT-4o) Examples ===\n');
|
|
|
|
// Example 1: Image via URL
|
|
console.log('1. Image via URL:');
|
|
await imageViaUrl();
|
|
console.log();
|
|
|
|
// Example 2: Image via base64 (uncomment when you have image.jpg)
|
|
// console.log('2. Image via Base64:');
|
|
// await imageViaBase64();
|
|
// console.log();
|
|
|
|
// Example 3: Multiple images
|
|
// console.log('3. Multiple Images:');
|
|
// await multipleImages();
|
|
// console.log();
|
|
}
|
|
|
|
// Run if executed directly
|
|
if (require.main === module) {
|
|
main().catch(console.error);
|
|
}
|
|
|
|
export {
|
|
imageViaUrl,
|
|
imageViaBase64,
|
|
multipleImages,
|
|
detailedAnalysis,
|
|
extractText,
|
|
detectObjects,
|
|
classifyImage,
|
|
structuredVisionOutput,
|
|
conversationWithVision,
|
|
batchAnalysis,
|
|
withErrorHandling,
|
|
};
|