Files
gh-jezweb-claude-skills-ski…/templates/vision-gpt4o.ts
2025-11-30 08:25:12 +08:00

444 lines
12 KiB
TypeScript

/**
* OpenAI Vision API - GPT-4o Image Understanding
*
* This template demonstrates:
* - Image via URL
* - Image via base64
* - Multiple images in one request
* - Detailed image analysis
* - OCR / text extraction
* - Object detection
*/
import OpenAI from 'openai';
import fs from 'fs';
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
// =============================================================================
// IMAGE VIA URL
// =============================================================================
async function imageViaUrl() {
const completion = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'What is in this image?' },
{
type: 'image_url',
image_url: {
url: 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg',
},
},
],
},
],
});
console.log('Image description:', completion.choices[0].message.content);
return completion.choices[0].message.content;
}
// =============================================================================
// IMAGE VIA BASE64
// =============================================================================
async function imageViaBase64() {
// Read image file
const imageBuffer = fs.readFileSync('./image.jpg');
const base64Image = imageBuffer.toString('base64');
const completion = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'Describe this image in detail' },
{
type: 'image_url',
image_url: {
url: `data:image/jpeg;base64,${base64Image}`,
},
},
],
},
],
});
console.log('Description:', completion.choices[0].message.content);
return completion.choices[0].message.content;
}
// =============================================================================
// MULTIPLE IMAGES
// =============================================================================
async function multipleImages() {
const completion = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'Compare these two images. What are the differences?' },
{
type: 'image_url',
image_url: {
url: 'https://example.com/image1.jpg',
},
},
{
type: 'image_url',
image_url: {
url: 'https://example.com/image2.jpg',
},
},
],
},
],
});
console.log('Comparison:', completion.choices[0].message.content);
return completion.choices[0].message.content;
}
// =============================================================================
// DETAILED IMAGE ANALYSIS
// =============================================================================
async function detailedAnalysis(imageUrl: string) {
const completion = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'system',
content: 'You are an expert image analyst. Provide detailed, structured analysis of images.',
},
{
role: 'user',
content: [
{
type: 'text',
text: `Analyze this image in detail. Include:
1. Main subject/objects
2. Colors and composition
3. Lighting and mood
4. Background elements
5. Any text visible
6. Estimated context/setting`,
},
{
type: 'image_url',
image_url: { url: imageUrl },
},
],
},
],
});
console.log('Detailed analysis:', completion.choices[0].message.content);
return completion.choices[0].message.content;
}
// =============================================================================
// OCR / TEXT EXTRACTION
// =============================================================================
async function extractText(imageUrl: string) {
const completion = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'Extract all text visible in this image' },
{
type: 'image_url',
image_url: { url: imageUrl },
},
],
},
],
});
console.log('Extracted text:', completion.choices[0].message.content);
return completion.choices[0].message.content;
}
// =============================================================================
// OBJECT DETECTION
// =============================================================================
async function detectObjects(imageUrl: string) {
const completion = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'List all objects visible in this image with their approximate locations' },
{
type: 'image_url',
image_url: { url: imageUrl },
},
],
},
],
});
console.log('Objects detected:', completion.choices[0].message.content);
return completion.choices[0].message.content;
}
// =============================================================================
// IMAGE CLASSIFICATION
// =============================================================================
async function classifyImage(imageUrl: string) {
const completion = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'user',
content: [
{
type: 'text',
text: 'Classify this image into categories: nature, urban, people, objects, abstract, other',
},
{
type: 'image_url',
image_url: { url: imageUrl },
},
],
},
],
});
console.log('Classification:', completion.choices[0].message.content);
return completion.choices[0].message.content;
}
// =============================================================================
// STRUCTURED OUTPUT WITH VISION
// =============================================================================
async function structuredVisionOutput(imageUrl: string) {
const completion = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'Analyze this image and return structured data' },
{
type: 'image_url',
image_url: { url: imageUrl },
},
],
},
],
response_format: {
type: 'json_schema',
json_schema: {
name: 'image_analysis',
strict: true,
schema: {
type: 'object',
properties: {
main_subject: { type: 'string' },
objects: {
type: 'array',
items: { type: 'string' },
},
colors: {
type: 'array',
items: { type: 'string' },
},
mood: { type: 'string' },
setting: { type: 'string' },
has_text: { type: 'boolean' },
},
required: ['main_subject', 'objects', 'colors', 'mood', 'setting', 'has_text'],
additionalProperties: false,
},
},
},
});
const analysis = JSON.parse(completion.choices[0].message.content!);
console.log('Structured analysis:', JSON.stringify(analysis, null, 2));
return analysis;
}
// =============================================================================
// MULTI-TURN CONVERSATION WITH VISION
// =============================================================================
async function conversationWithVision() {
const messages: any[] = [
{
role: 'user',
content: [
{ type: 'text', text: 'What is in this image?' },
{
type: 'image_url',
image_url: {
url: 'https://example.com/image.jpg',
},
},
],
},
];
// First turn
const response1 = await openai.chat.completions.create({
model: 'gpt-4o',
messages,
});
console.log('Turn 1:', response1.choices[0].message.content);
messages.push(response1.choices[0].message);
// Follow-up question
messages.push({
role: 'user',
content: 'Can you describe the colors in more detail?',
});
const response2 = await openai.chat.completions.create({
model: 'gpt-4o',
messages,
});
console.log('Turn 2:', response2.choices[0].message.content);
return messages;
}
// =============================================================================
// BATCH IMAGE ANALYSIS
// =============================================================================
async function batchAnalysis(imageUrls: string[]) {
const results = [];
for (const url of imageUrls) {
console.log(`Analyzing: ${url}`);
const completion = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'Briefly describe this image' },
{ type: 'image_url', image_url: { url } },
],
},
],
});
results.push({
url,
description: completion.choices[0].message.content,
});
// Rate limit protection
await new Promise(resolve => setTimeout(resolve, 1000));
}
console.log(`Analyzed ${results.length} images`);
return results;
}
// =============================================================================
// ERROR HANDLING
// =============================================================================
async function withErrorHandling(imageUrl: string) {
try {
const completion = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'What is in this image?' },
{ type: 'image_url', image_url: { url: imageUrl } },
],
},
],
});
return completion.choices[0].message.content;
} catch (error: any) {
if (error.message.includes('invalid image')) {
console.error('Image URL is invalid or inaccessible');
} else if (error.message.includes('base64')) {
console.error('Base64 encoding error');
} else if (error.status === 429) {
console.error('Rate limit exceeded');
} else {
console.error('Vision API error:', error.message);
}
throw error;
}
}
// =============================================================================
// MAIN EXECUTION
// =============================================================================
async function main() {
console.log('=== OpenAI Vision (GPT-4o) Examples ===\n');
// Example 1: Image via URL
console.log('1. Image via URL:');
await imageViaUrl();
console.log();
// Example 2: Image via base64 (uncomment when you have image.jpg)
// console.log('2. Image via Base64:');
// await imageViaBase64();
// console.log();
// Example 3: Multiple images
// console.log('3. Multiple Images:');
// await multipleImages();
// console.log();
}
// Run if executed directly
if (require.main === module) {
main().catch(console.error);
}
export {
imageViaUrl,
imageViaBase64,
multipleImages,
detailedAnalysis,
extractText,
detectObjects,
classifyImage,
structuredVisionOutput,
conversationWithVision,
batchAnalysis,
withErrorHandling,
};