/** * Cloudflare Workers AI - Vision Models Examples * * This template demonstrates: * - Llama 3.2 11B Vision Instruct for image understanding * - Image captioning and description * - Visual question answering * - Base64 image encoding * - Combining vision + text prompts */ import { Hono } from 'hono'; type Bindings = { AI: Ai; }; const app = new Hono<{ Bindings: Bindings }>(); // ============================================================================ // Image Understanding // ============================================================================ /** * Llama 3.2 11B Vision Instruct * - Understands images and answers questions * - Accepts base64-encoded images * - Rate limit: 720/min */ app.post('/vision/understand', async (c) => { try { const { image, question = 'What is in this image?' } = await c.req.json<{ image: string; // Base64 data URL or base64 string question?: string; }>(); // Ensure image has proper data URL prefix const imageUrl = image.startsWith('data:') ? image : `data:image/png;base64,${image}`; const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', { messages: [ { role: 'user', content: [ { type: 'text', text: question }, { type: 'image_url', image_url: { url: imageUrl } }, ], }, ], }); return c.json({ success: true, question, answer: response.response, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Image Captioning // ============================================================================ app.post('/vision/caption', async (c) => { try { const { image } = await c.req.json<{ image: string }>(); const imageUrl = image.startsWith('data:') ? image : `data:image/png;base64,${image}`; const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', { messages: [ { role: 'user', content: [ { type: 'text', text: 'Generate a detailed caption for this image. Describe what you see, including objects, people, setting, mood, and any notable details.', }, { type: 'image_url', image_url: { url: imageUrl } }, ], }, ], }); return c.json({ success: true, caption: response.response, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Visual Question Answering // ============================================================================ app.post('/vision/qa', async (c) => { try { const { image, questions } = await c.req.json<{ image: string; questions: string[]; }>(); if (!questions || questions.length === 0) { return c.json({ error: 'questions array is required' }, 400); } const imageUrl = image.startsWith('data:') ? image : `data:image/png;base64,${image}`; // Answer all questions const answers = await Promise.all( questions.map(async (question) => { const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', { messages: [ { role: 'user', content: [ { type: 'text', text: question }, { type: 'image_url', image_url: { url: imageUrl } }, ], }, ], }); return { question, answer: response.response, }; }) ); return c.json({ success: true, count: answers.length, results: answers, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Image Analysis (Structured Output) // ============================================================================ app.post('/vision/analyze', async (c) => { try { const { image } = await c.req.json<{ image: string }>(); const imageUrl = image.startsWith('data:') ? image : `data:image/png;base64,${image}`; const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', { messages: [ { role: 'user', content: [ { type: 'text', text: `Analyze this image and return a JSON object with: - objects: array of objects detected - scene: description of the setting - mood: emotional tone - colors: dominant colors - text: any visible text Return ONLY valid JSON, no explanations.`, }, { type: 'image_url', image_url: { url: imageUrl } }, ], }, ], }); // Parse JSON response try { const analysis = JSON.parse(response.response); return c.json({ success: true, analysis, }); } catch { return c.json({ success: true, raw: response.response, }); } } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Image Comparison // ============================================================================ app.post('/vision/compare', async (c) => { try { const { image1, image2, question = 'What are the differences between these images?' } = await c.req.json<{ image1: string; image2: string; question?: string; }>(); const imageUrl1 = image1.startsWith('data:') ? image1 : `data:image/png;base64,${image1}`; const imageUrl2 = image2.startsWith('data:') ? image2 : `data:image/png;base64,${image2}`; // Analyze first image const analysis1 = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', { messages: [ { role: 'user', content: [ { type: 'text', text: 'Describe this image in detail.' }, { type: 'image_url', image_url: { url: imageUrl1 } }, ], }, ], }); // Analyze second image const analysis2 = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', { messages: [ { role: 'user', content: [ { type: 'text', text: 'Describe this image in detail.' }, { type: 'image_url', image_url: { url: imageUrl2 } }, ], }, ], }); // Compare using text generation const comparison = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', { messages: [ { role: 'user', content: `Compare these two images based on their descriptions: Image 1: ${analysis1.response} Image 2: ${analysis2.response} Question: ${question}`, }, ], }); return c.json({ success: true, image1Description: analysis1.response, image2Description: analysis2.response, comparison: comparison.response, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Image Upload from URL // ============================================================================ /** * Fetch image from URL, convert to base64, and analyze */ app.post('/vision/url', async (c) => { try { const { url, question = 'What is in this image?' } = await c.req.json<{ url: string; question?: string; }>(); // Fetch image const imageResponse = await fetch(url); if (!imageResponse.ok) { return c.json({ error: 'Failed to fetch image' }, 400); } // Convert to base64 const imageBytes = await imageResponse.bytes(); const base64 = btoa(String.fromCharCode(...imageBytes)); const contentType = imageResponse.headers.get('content-type') || 'image/png'; const imageUrl = `data:${contentType};base64,${base64}`; // Analyze image const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', { messages: [ { role: 'user', content: [ { type: 'text', text: question }, { type: 'image_url', image_url: { url: imageUrl } }, ], }, ], }); return c.json({ success: true, sourceUrl: url, question, answer: response.response, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Accessibility: Alt Text Generation // ============================================================================ app.post('/vision/alt-text', async (c) => { try { const { image } = await c.req.json<{ image: string }>(); const imageUrl = image.startsWith('data:') ? image : `data:image/png;base64,${image}`; const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', { messages: [ { role: 'user', content: [ { type: 'text', text: 'Generate a concise, descriptive alt text for this image for accessibility purposes. Keep it under 125 characters.', }, { type: 'image_url', image_url: { url: imageUrl } }, ], }, ], }); return c.json({ success: true, altText: response.response.trim(), }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Health Check // ============================================================================ app.get('/health', (c) => { return c.json({ status: 'ok', timestamp: new Date().toISOString(), }); }); export default app;