Initial commit

2025-11-30 08:24:38 +08:00
commit b41966ed51
12 changed files with 3508 additions and 0 deletions
--- a/templates/ai-embeddings-rag.ts
+++ b/templates/ai-embeddings-rag.ts
@@ -0,0 +1,491 @@
+/**
+ * Cloudflare Workers AI - Embeddings & RAG Examples
+ *
+ * This template demonstrates:
+ * - Generating text embeddings with BGE models
+ * - Storing embeddings in Vectorize
+ * - Semantic search with vector similarity
+ * - Complete RAG (Retrieval Augmented Generation) pattern
+ * - Document chunking strategies
+ */
+
+import { Hono } from 'hono';
+
+type Bindings = {
+  AI: Ai;
+  VECTORIZE: Vectorize;
+  DB?: D1Database;
+};
+
+const app = new Hono<{ Bindings: Bindings }>();
+
+// ============================================================================
+// Generate Embeddings
+// ============================================================================
+
+/**
+ * Generate embeddings for text
+ * BGE-base: 768 dimensions, good balance
+ * BGE-large: 1024 dimensions, higher accuracy
+ * BGE-small: 384 dimensions, faster/smaller
+ */
+
+app.post('/embeddings', async (c) => {
+  try {
+    const { text } = await c.req.json<{ text: string | string[] }>();
+
+    const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
+      text: Array.isArray(text) ? text : [text],
+    });
+
+    return c.json({
+      success: true,
+      shape: embeddings.shape, // [batch_size, dimensions]
+      data: embeddings.data, // Array of vectors
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Batch Embeddings
+// ============================================================================
+
+/**
+ * Generate embeddings for multiple texts in one request
+ * More efficient than individual requests
+ */
+
+app.post('/embeddings/batch', async (c) => {
+  try {
+    const { texts } = await c.req.json<{ texts: string[] }>();
+
+    if (!texts || texts.length === 0) {
+      return c.json({ error: 'texts array is required' }, 400);
+    }
+
+    const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
+      text: texts,
+    });
+
+    return c.json({
+      success: true,
+      count: texts.length,
+      shape: embeddings.shape,
+      embeddings: embeddings.data,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Store Embeddings in Vectorize
+// ============================================================================
+
+app.post('/documents', async (c) => {
+  try {
+    const { id, text, metadata } = await c.req.json<{
+      id: string;
+      text: string;
+      metadata?: Record<string, any>;
+    }>();
+
+    // Generate embedding
+    const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
+      text: [text],
+    });
+
+    const vector = embeddings.data[0];
+
+    // Store in Vectorize
+    await c.env.VECTORIZE.upsert([
+      {
+        id,
+        values: vector,
+        metadata: {
+          text,
+          ...metadata,
+          createdAt: Date.now(),
+        },
+      },
+    ]);
+
+    return c.json({
+      success: true,
+      message: 'Document indexed',
+      id,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Semantic Search
+// ============================================================================
+
+app.post('/search', async (c) => {
+  try {
+    const { query, topK = 5 } = await c.req.json<{
+      query: string;
+      topK?: number;
+    }>();
+
+    // Convert query to embedding
+    const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
+      text: [query],
+    });
+
+    const vector = embeddings.data[0];
+
+    // Search Vectorize
+    const results = await c.env.VECTORIZE.query(vector, {
+      topK,
+      returnMetadata: true,
+    });
+
+    return c.json({
+      success: true,
+      query,
+      results: results.matches.map((match) => ({
+        id: match.id,
+        score: match.score,
+        text: match.metadata?.text,
+        metadata: match.metadata,
+      })),
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// RAG Pattern: Query with Context
+// ============================================================================
+
+app.post('/rag/ask', async (c) => {
+  try {
+    const { question, topK = 3 } = await c.req.json<{
+      question: string;
+      topK?: number;
+    }>();
+
+    // Step 1: Convert question to embedding
+    const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
+      text: [question],
+    });
+
+    const vector = embeddings.data[0];
+
+    // Step 2: Find relevant documents
+    const results = await c.env.VECTORIZE.query(vector, {
+      topK,
+      returnMetadata: true,
+    });
+
+    // Step 3: Build context from matches
+    const context = results.matches
+      .map((match) => match.metadata?.text)
+      .filter(Boolean)
+      .join('\n\n');
+
+    // Step 4: Generate answer with context
+    const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages: [
+        {
+          role: 'system',
+          content: `Answer the question using ONLY the following context. If the context doesn't contain relevant information, say "I don't have enough information to answer that."\n\nContext:\n${context}`,
+        },
+        {
+          role: 'user',
+          content: question,
+        },
+      ],
+      stream: true,
+    });
+
+    return new Response(stream, {
+      headers: {
+        'content-type': 'text/event-stream',
+        'x-sources': JSON.stringify(
+          results.matches.map((m) => ({ id: m.id, score: m.score }))
+        ),
+      },
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Document Chunking
+// ============================================================================
+
+/**
+ * Split long documents into chunks for better embedding quality
+ * Recommended: 200-500 tokens per chunk
+ */
+
+function chunkText(text: string, chunkSize = 500, overlap = 50): string[] {
+  const words = text.split(/\s+/);
+  const chunks: string[] = [];
+
+  for (let i = 0; i < words.length; i += chunkSize - overlap) {
+    const chunk = words.slice(i, i + chunkSize).join(' ');
+    chunks.push(chunk);
+  }
+
+  return chunks;
+}
+
+app.post('/documents/long', async (c) => {
+  try {
+    const { id, text, chunkSize = 500 } = await c.req.json<{
+      id: string;
+      text: string;
+      chunkSize?: number;
+    }>();
+
+    // Split into chunks
+    const chunks = chunkText(text, chunkSize);
+
+    // Generate embeddings for all chunks
+    const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
+      text: chunks,
+    });
+
+    // Store each chunk in Vectorize
+    const vectors = chunks.map((chunk, index) => ({
+      id: `${id}-chunk-${index}`,
+      values: embeddings.data[index],
+      metadata: {
+        documentId: id,
+        chunkIndex: index,
+        text: chunk,
+        totalChunks: chunks.length,
+      },
+    }));
+
+    await c.env.VECTORIZE.upsert(vectors);
+
+    return c.json({
+      success: true,
+      message: 'Document indexed with chunks',
+      documentId: id,
+      chunks: chunks.length,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// RAG with Citations
+// ============================================================================
+
+app.post('/rag/ask-with-citations', async (c) => {
+  try {
+    const { question } = await c.req.json<{ question: string }>();
+
+    // Find relevant chunks
+    const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
+      text: [question],
+    });
+
+    const results = await c.env.VECTORIZE.query(embeddings.data[0], {
+      topK: 5,
+      returnMetadata: true,
+    });
+
+    // Build context with citations
+    const context = results.matches
+      .map(
+        (match, i) =>
+          `[Source ${i + 1}] ${match.metadata?.text} (Relevance: ${(match.score * 100).toFixed(1)}%)`
+      )
+      .join('\n\n');
+
+    // Generate answer
+    const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages: [
+        {
+          role: 'system',
+          content: `Answer the question using the provided sources. Cite sources using [Source N] format.
+
+${context}`,
+        },
+        {
+          role: 'user',
+          content: question,
+        },
+      ],
+    });
+
+    return c.json({
+      success: true,
+      answer: response.response,
+      sources: results.matches.map((m, i) => ({
+        id: i + 1,
+        documentId: m.metadata?.documentId,
+        text: m.metadata?.text,
+        score: m.score,
+      })),
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Hybrid Search (Keyword + Semantic)
+// ============================================================================
+
+/**
+ * Combine keyword search (D1) with semantic search (Vectorize)
+ * for better recall
+ */
+
+app.post('/search/hybrid', async (c) => {
+  try {
+    const { query } = await c.req.json<{ query: string }>();
+
+    // Semantic search
+    const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
+      text: [query],
+    });
+
+    const vectorResults = await c.env.VECTORIZE.query(embeddings.data[0], {
+      topK: 5,
+      returnMetadata: true,
+    });
+
+    // Keyword search (if D1 available)
+    let keywordResults: any[] = [];
+    if (c.env.DB) {
+      const { results } = await c.env.DB.prepare(
+        'SELECT id, text FROM documents WHERE text LIKE ? LIMIT 5'
+      )
+        .bind(`%${query}%`)
+        .all();
+
+      keywordResults = results || [];
+    }
+
+    // Combine and deduplicate
+    const combined = [
+      ...vectorResults.matches.map((m) => ({
+        id: m.id,
+        text: m.metadata?.text,
+        score: m.score,
+        source: 'vector',
+      })),
+      ...keywordResults.map((r) => ({
+        id: r.id,
+        text: r.text,
+        score: 1.0,
+        source: 'keyword',
+      })),
+    ];
+
+    // Deduplicate by ID
+    const unique = Array.from(new Map(combined.map((item) => [item.id, item])).values());
+
+    return c.json({
+      success: true,
+      query,
+      results: unique,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Delete Documents
+// ============================================================================
+
+app.delete('/documents/:id', async (c) => {
+  try {
+    const id = c.req.param('id');
+
+    // Delete from Vectorize
+    await c.env.VECTORIZE.deleteByIds([id]);
+
+    return c.json({
+      success: true,
+      message: 'Document deleted',
+      id,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Health Check
+// ============================================================================
+
+app.get('/health', (c) => {
+  return c.json({
+    status: 'ok',
+    timestamp: new Date().toISOString(),
+  });
+});
+
+export default app;
--- a/templates/ai-gateway-integration.ts
+++ b/templates/ai-gateway-integration.ts
@@ -0,0 +1,432 @@
+/**
+ * Cloudflare AI Gateway - Integration Examples
+ *
+ * This template demonstrates:
+ * - AI Gateway setup and configuration
+ * - Caching AI responses
+ * - Logging and analytics
+ * - Cost tracking
+ * - Rate limiting
+ * - Feedback collection
+ */
+
+import { Hono } from 'hono';
+
+type Bindings = {
+  AI: Ai;
+};
+
+const app = new Hono<{ Bindings: Bindings }>();
+
+// ============================================================================
+// Basic AI Gateway Usage
+// ============================================================================
+
+/**
+ * Create a gateway at: https://dash.cloudflare.com/ai/ai-gateway
+ * Use the gateway ID in your requests
+ */
+
+app.post('/gateway/basic', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const response = await c.env.AI.run(
+      '@cf/meta/llama-3.1-8b-instruct',
+      {
+        messages: [{ role: 'user', content: prompt }],
+      },
+      {
+        gateway: {
+          id: 'my-gateway', // Your gateway ID
+        },
+      }
+    );
+
+    // Access log ID for analytics
+    const logId = c.env.AI.aiGatewayLogId;
+
+    return c.json({
+      success: true,
+      response: response.response,
+      logId, // Use for tracking and feedback
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// AI Gateway with Caching
+// ============================================================================
+
+/**
+ * AI Gateway can cache responses to reduce costs
+ * Same prompt = cached response (no inference cost)
+ */
+
+app.post('/gateway/cached', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const response = await c.env.AI.run(
+      '@cf/meta/llama-3.1-8b-instruct',
+      {
+        messages: [{ role: 'user', content: prompt }],
+      },
+      {
+        gateway: {
+          id: 'my-gateway',
+          skipCache: false, // Use cache (default)
+        },
+      }
+    );
+
+    return c.json({
+      success: true,
+      response: response.response,
+      logId: c.env.AI.aiGatewayLogId,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Skip Cache for Dynamic Content
+// ============================================================================
+
+app.post('/gateway/no-cache', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const response = await c.env.AI.run(
+      '@cf/meta/llama-3.1-8b-instruct',
+      {
+        messages: [{ role: 'user', content: prompt }],
+      },
+      {
+        gateway: {
+          id: 'my-gateway',
+          skipCache: true, // Always fetch fresh response
+        },
+      }
+    );
+
+    return c.json({
+      success: true,
+      response: response.response,
+      logId: c.env.AI.aiGatewayLogId,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Send Feedback to AI Gateway
+// ============================================================================
+
+/**
+ * Track user satisfaction with AI responses
+ * Helps optimize prompts and model selection
+ */
+
+app.post('/gateway/feedback', async (c) => {
+  try {
+    const { logId, rating, comment } = await c.req.json<{
+      logId: string;
+      rating: number; // 1-5
+      comment?: string;
+    }>();
+
+    const gateway = c.env.AI.gateway('my-gateway');
+
+    await gateway.patchLog(logId, {
+      feedback: {
+        rating,
+        comment,
+      },
+    });
+
+    return c.json({
+      success: true,
+      message: 'Feedback recorded',
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Track Cost Per Request
+// ============================================================================
+
+/**
+ * Monitor neurons usage per request
+ * AI Gateway logs show cost breakdown
+ */
+
+app.post('/gateway/track-cost', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const start = Date.now();
+
+    const response = await c.env.AI.run(
+      '@cf/meta/llama-3.1-8b-instruct',
+      {
+        messages: [{ role: 'user', content: prompt }],
+      },
+      {
+        gateway: {
+          id: 'my-gateway',
+        },
+      }
+    );
+
+    const duration = Date.now() - start;
+    const logId = c.env.AI.aiGatewayLogId;
+
+    return c.json({
+      success: true,
+      response: response.response,
+      metrics: {
+        logId,
+        duration,
+        // Check AI Gateway dashboard for neurons usage
+      },
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Multi-Model Gateway
+// ============================================================================
+
+/**
+ * Use different models through the same gateway
+ * Compare performance and costs
+ */
+
+app.post('/gateway/multi-model', async (c) => {
+  try {
+    const { prompt, model = '@cf/meta/llama-3.1-8b-instruct' } = await c.req.json<{
+      prompt: string;
+      model?: string;
+    }>();
+
+    const response = await c.env.AI.run(
+      model,
+      {
+        messages: [{ role: 'user', content: prompt }],
+      },
+      {
+        gateway: {
+          id: 'my-gateway',
+        },
+      }
+    );
+
+    return c.json({
+      success: true,
+      model,
+      response: response.response,
+      logId: c.env.AI.aiGatewayLogId,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Streaming with AI Gateway
+// ============================================================================
+
+app.post('/gateway/stream', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const stream = await c.env.AI.run(
+      '@cf/meta/llama-3.1-8b-instruct',
+      {
+        messages: [{ role: 'user', content: prompt }],
+        stream: true,
+      },
+      {
+        gateway: {
+          id: 'my-gateway',
+        },
+      }
+    );
+
+    // Log ID available after streaming starts
+    const logId = c.env.AI.aiGatewayLogId;
+
+    return new Response(stream, {
+      headers: {
+        'content-type': 'text/event-stream',
+        'x-ai-gateway-log-id': logId || '',
+      },
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Request Analytics Middleware
+// ============================================================================
+
+/**
+ * Log all AI requests for analytics
+ */
+
+app.use('/ai/*', async (c, next) => {
+  const start = Date.now();
+  const path = c.req.path;
+
+  await next();
+
+  const duration = Date.now() - start;
+  const logId = c.env.AI.aiGatewayLogId;
+
+  // Log to console (or send to analytics service)
+  console.log({
+    timestamp: new Date().toISOString(),
+    path,
+    duration,
+    logId,
+    status: c.res.status,
+  });
+});
+
+app.post('/ai/chat', async (c) => {
+  const { prompt } = await c.req.json<{ prompt: string }>();
+
+  const response = await c.env.AI.run(
+    '@cf/meta/llama-3.1-8b-instruct',
+    {
+      messages: [{ role: 'user', content: prompt }],
+    },
+    {
+      gateway: { id: 'my-gateway' },
+    }
+  );
+
+  return c.json({
+    success: true,
+    response: response.response,
+  });
+});
+
+// ============================================================================
+// Rate Limit Protection
+// ============================================================================
+
+/**
+ * AI Gateway provides additional rate limiting
+ * Configure in dashboard: https://dash.cloudflare.com/ai/ai-gateway
+ */
+
+app.post('/gateway/rate-limited', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const response = await c.env.AI.run(
+      '@cf/meta/llama-3.1-8b-instruct',
+      {
+        messages: [{ role: 'user', content: prompt }],
+      },
+      {
+        gateway: {
+          id: 'my-gateway',
+        },
+      }
+    );
+
+    return c.json({
+      success: true,
+      response: response.response,
+    });
+  } catch (error) {
+    const message = (error as Error).message;
+
+    // Check for rate limit error
+    if (message.includes('429') || message.includes('rate limit')) {
+      return c.json(
+        {
+          success: false,
+          error: 'Rate limit exceeded. Please try again later.',
+          retryAfter: 60, // seconds
+        },
+        429
+      );
+    }
+
+    return c.json(
+      {
+        success: false,
+        error: message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Health Check
+// ============================================================================
+
+app.get('/health', (c) => {
+  return c.json({
+    status: 'ok',
+    timestamp: new Date().toISOString(),
+  });
+});
+
+export default app;
--- a/templates/ai-image-generation.ts
+++ b/templates/ai-image-generation.ts
@@ -0,0 +1,391 @@
+/**
+ * Cloudflare Workers AI - Image Generation Examples
+ *
+ * This template demonstrates:
+ * - Text-to-image with Flux models (highest quality)
+ * - Stable Diffusion XL
+ * - Image storage in R2
+ * - Base64 and binary responses
+ * - Custom prompts and parameters
+ */
+
+import { Hono } from 'hono';
+
+type Bindings = {
+  AI: Ai;
+  BUCKET?: R2Bucket;
+};
+
+const app = new Hono<{ Bindings: Bindings }>();
+
+// ============================================================================
+// Flux - Text-to-Image (Highest Quality)
+// ============================================================================
+
+/**
+ * Flux 1 Schnell - Fast, high-quality image generation
+ * Best for: Photorealistic images, detailed artwork
+ * Rate limit: 720/min
+ */
+
+app.post('/generate/flux', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const imageStream = await c.env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
+      prompt,
+    });
+
+    return new Response(imageStream, {
+      headers: { 'content-type': 'image/png' },
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Stable Diffusion XL
+// ============================================================================
+
+app.post('/generate/sdxl', async (c) => {
+  try {
+    const { prompt, num_steps = 20, guidance = 7.5 } = await c.req.json<{
+      prompt: string;
+      num_steps?: number;
+      guidance?: number;
+    }>();
+
+    const imageStream = await c.env.AI.run(
+      '@cf/stabilityai/stable-diffusion-xl-base-1.0',
+      {
+        prompt,
+        num_steps, // More steps = higher quality, slower
+        guidance, // CFG scale: higher = more prompt adherence
+      }
+    );
+
+    return new Response(imageStream, {
+      headers: { 'content-type': 'image/png' },
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// DreamShaper (Artistic/Stylized)
+// ============================================================================
+
+app.post('/generate/dreamshaper', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const imageStream = await c.env.AI.run('@cf/lykon/dreamshaper-8-lcm', {
+      prompt,
+    });
+
+    return new Response(imageStream, {
+      headers: { 'content-type': 'image/png' },
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Generate and Store in R2
+// ============================================================================
+
+app.post('/generate/save', async (c) => {
+  try {
+    const { prompt, filename } = await c.req.json<{
+      prompt: string;
+      filename?: string;
+    }>();
+
+    if (!c.env.BUCKET) {
+      return c.json({ error: 'R2 bucket not configured' }, 500);
+    }
+
+    // Generate image
+    const imageStream = await c.env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
+      prompt,
+    });
+
+    const imageBytes = await new Response(imageStream).bytes();
+
+    // Generate filename
+    const key = filename || `images/${Date.now()}.png`;
+
+    // Store in R2
+    await c.env.BUCKET.put(key, imageBytes, {
+      httpMetadata: {
+        contentType: 'image/png',
+      },
+      customMetadata: {
+        prompt,
+        generatedAt: new Date().toISOString(),
+      },
+    });
+
+    return c.json({
+      success: true,
+      message: 'Image generated and saved',
+      key,
+      url: `https://your-domain.com/${key}`, // Update with your R2 public URL
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Return Base64 Encoded Image
+// ============================================================================
+
+app.post('/generate/base64', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const imageStream = await c.env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
+      prompt,
+    });
+
+    const imageBytes = await new Response(imageStream).bytes();
+    const base64 = btoa(String.fromCharCode(...imageBytes));
+
+    return c.json({
+      success: true,
+      image: `data:image/png;base64,${base64}`,
+      prompt,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Image-to-Image (Stable Diffusion)
+// ============================================================================
+
+/**
+ * Transform existing images based on prompts
+ * Requires base64-encoded input image
+ */
+
+app.post('/generate/img2img', async (c) => {
+  try {
+    const { prompt, image, strength = 0.8 } = await c.req.json<{
+      prompt: string;
+      image: string; // Base64 encoded
+      strength?: number; // 0.0-1.0, higher = more transformation
+    }>();
+
+    // Decode base64 image to array
+    const imageData = Uint8Array.from(atob(image.replace(/^data:image\/\w+;base64,/, '')), (c) =>
+      c.charCodeAt(0)
+    );
+
+    const result = await c.env.AI.run('@cf/runwayml/stable-diffusion-v1-5-img2img', {
+      prompt,
+      image: Array.from(imageData),
+      strength,
+    });
+
+    return new Response(result, {
+      headers: { 'content-type': 'image/png' },
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Batch Generation
+// ============================================================================
+
+app.post('/generate/batch', async (c) => {
+  try {
+    const { prompts } = await c.req.json<{ prompts: string[] }>();
+
+    if (!prompts || prompts.length === 0) {
+      return c.json({ error: 'prompts array is required' }, 400);
+    }
+
+    if (prompts.length > 5) {
+      return c.json({ error: 'Maximum 5 prompts per batch' }, 400);
+    }
+
+    const images = await Promise.all(
+      prompts.map(async (prompt) => {
+        const imageStream = await c.env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
+          prompt,
+        });
+
+        const imageBytes = await new Response(imageStream).bytes();
+        const base64 = btoa(String.fromCharCode(...imageBytes));
+
+        return {
+          prompt,
+          image: `data:image/png;base64,${base64}`,
+        };
+      })
+    );
+
+    return c.json({
+      success: true,
+      count: images.length,
+      images,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Prompt Enhancement
+// ============================================================================
+
+/**
+ * Use LLM to enhance user prompts for better image quality
+ */
+
+app.post('/generate/enhanced', async (c) => {
+  try {
+    const { userPrompt } = await c.req.json<{ userPrompt: string }>();
+
+    // Step 1: Enhance prompt with LLM
+    const enhancement = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages: [
+        {
+          role: 'system',
+          content:
+            'You are a Stable Diffusion prompt expert. Enhance the user prompt for image generation. Add details about style, lighting, quality, composition. Return ONLY the enhanced prompt, no explanations.',
+        },
+        {
+          role: 'user',
+          content: userPrompt,
+        },
+      ],
+    });
+
+    const enhancedPrompt = enhancement.response.trim();
+
+    // Step 2: Generate image with enhanced prompt
+    const imageStream = await c.env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
+      prompt: enhancedPrompt,
+    });
+
+    const imageBytes = await new Response(imageStream).bytes();
+    const base64 = btoa(String.fromCharCode(...imageBytes));
+
+    return c.json({
+      success: true,
+      originalPrompt: userPrompt,
+      enhancedPrompt,
+      image: `data:image/png;base64,${base64}`,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// List Generated Images (from R2)
+// ============================================================================
+
+app.get('/images', async (c) => {
+  try {
+    if (!c.env.BUCKET) {
+      return c.json({ error: 'R2 bucket not configured' }, 500);
+    }
+
+    const listed = await c.env.BUCKET.list({
+      prefix: 'images/',
+      limit: 100,
+    });
+
+    const images = listed.objects.map((obj) => ({
+      key: obj.key,
+      size: obj.size,
+      uploaded: obj.uploaded,
+      url: `https://your-domain.com/${obj.key}`,
+    }));
+
+    return c.json({
+      success: true,
+      count: images.length,
+      images,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Health Check
+// ============================================================================
+
+app.get('/health', (c) => {
+  return c.json({
+    status: 'ok',
+    timestamp: new Date().toISOString(),
+  });
+});
+
+export default app;
--- a/templates/ai-text-generation.ts
+++ b/templates/ai-text-generation.ts
@@ -0,0 +1,437 @@
+/**
+ * Cloudflare Workers AI - Text Generation Examples
+ *
+ * This template demonstrates:
+ * - Basic text generation (prompt and messages)
+ * - Streaming responses (RECOMMENDED for production)
+ * - Chat completions with conversation history
+ * - Structured output with JSON
+ * - Error handling and retry logic
+ * - Rate limit management
+ */
+
+import { Hono } from 'hono';
+
+type Bindings = {
+  AI: Ai;
+};
+
+const app = new Hono<{ Bindings: Bindings }>();
+
+// ============================================================================
+// Basic Text Generation
+// ============================================================================
+
+// Simple prompt (deprecated pattern, use messages instead)
+app.post('/simple', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      prompt,
+    });
+
+    return c.json({
+      success: true,
+      response: response.response,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Streaming Text Generation (RECOMMENDED)
+// ============================================================================
+
+/**
+ * Streaming is ESSENTIAL for production:
+ * - Prevents buffering large responses in memory
+ * - Faster time-to-first-token
+ * - Better user experience
+ * - Avoids Worker timeout issues
+ */
+
+app.post('/stream', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages: [{ role: 'user', content: prompt }],
+      stream: true, // Enable streaming
+    });
+
+    return new Response(stream, {
+      headers: {
+        'content-type': 'text/event-stream',
+        'cache-control': 'no-cache',
+        connection: 'keep-alive',
+      },
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Chat Completions with History
+// ============================================================================
+
+app.post('/chat', async (c) => {
+  try {
+    const { messages } = await c.req.json<{
+      messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }>;
+    }>();
+
+    // Validate messages
+    if (!messages || messages.length === 0) {
+      return c.json({ error: 'Messages array is required' }, 400);
+    }
+
+    const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages,
+      stream: true,
+      max_tokens: 512, // Limit response length
+    });
+
+    return new Response(stream, {
+      headers: { 'content-type': 'text/event-stream' },
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Streaming with Custom Parameters
+// ============================================================================
+
+app.post('/stream/custom', async (c) => {
+  try {
+    const { prompt, temperature = 0.7, max_tokens = 512 } = await c.req.json<{
+      prompt: string;
+      temperature?: number;
+      max_tokens?: number;
+    }>();
+
+    const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages: [
+        {
+          role: 'system',
+          content: 'You are a helpful AI assistant.',
+        },
+        {
+          role: 'user',
+          content: prompt,
+        },
+      ],
+      stream: true,
+      max_tokens,
+      temperature, // Controls randomness (0.0-1.0)
+    });
+
+    return new Response(stream, {
+      headers: { 'content-type': 'text/event-stream' },
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Structured Output (JSON)
+// ============================================================================
+
+/**
+ * Generate structured JSON output
+ * Useful for extracting data, generating schemas, etc.
+ */
+
+app.post('/structured', async (c) => {
+  try {
+    const { topic } = await c.req.json<{ topic: string }>();
+
+    const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages: [
+        {
+          role: 'system',
+          content:
+            'You are a helpful assistant that ONLY returns valid JSON. Never include explanations or markdown, just raw JSON.',
+        },
+        {
+          role: 'user',
+          content: `Generate a recipe for ${topic}. Return JSON with keys: name, ingredients (array), instructions (array), prepTime (number in minutes)`,
+        },
+      ],
+      max_tokens: 1024,
+    });
+
+    // Parse JSON response
+    const data = JSON.parse(response.response);
+
+    return c.json({
+      success: true,
+      data,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Model Comparison
+// ============================================================================
+
+/**
+ * Compare different models side-by-side
+ */
+
+app.post('/compare', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const models = [
+      '@cf/meta/llama-3.1-8b-instruct', // Balanced
+      '@cf/meta/llama-3.2-1b-instruct', // Fast
+      '@cf/qwen/qwen1.5-14b-chat-awq', // High quality
+    ];
+
+    const results = await Promise.all(
+      models.map(async (model) => {
+        const start = Date.now();
+        const response = await c.env.AI.run(model, {
+          messages: [{ role: 'user', content: prompt }],
+          max_tokens: 256,
+        });
+        const duration = Date.now() - start;
+
+        return {
+          model,
+          response: response.response,
+          duration,
+        };
+      })
+    );
+
+    return c.json({
+      success: true,
+      results,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Error Handling with Retry
+// ============================================================================
+
+/**
+ * Retry logic for rate limits and transient errors
+ */
+
+async function runWithRetry(
+  ai: Ai,
+  model: string,
+  inputs: any,
+  maxRetries = 3
+): Promise<any> {
+  let lastError: Error;
+
+  for (let i = 0; i < maxRetries; i++) {
+    try {
+      return await ai.run(model, inputs);
+    } catch (error) {
+      lastError = error as Error;
+      const message = lastError.message.toLowerCase();
+
+      // Rate limit (429) - retry with exponential backoff
+      if (message.includes('429') || message.includes('rate limit')) {
+        if (i < maxRetries - 1) {
+          const delay = Math.pow(2, i) * 1000; // 1s, 2s, 4s
+          console.log(`Rate limited. Retrying in ${delay}ms...`);
+          await new Promise((resolve) => setTimeout(resolve, delay));
+          continue;
+        }
+      }
+
+      // Model unavailable - try fallback model
+      if (message.includes('model') && message.includes('unavailable')) {
+        if (i === 0) {
+          console.log('Model unavailable, trying fallback...');
+          model = '@cf/meta/llama-3.2-1b-instruct'; // Faster fallback
+          continue;
+        }
+      }
+
+      // Other errors - throw immediately
+      throw error;
+    }
+  }
+
+  throw lastError!;
+}
+
+app.post('/reliable', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const response = await runWithRetry(c.env.AI, '@cf/meta/llama-3.1-8b-instruct', {
+      messages: [{ role: 'user', content: prompt }],
+    });
+
+    return c.json({
+      success: true,
+      response: response.response,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Token Length Validation
+// ============================================================================
+
+/**
+ * Validate input length to prevent token limit errors
+ * Approximate: 1 token ≈ 4 characters
+ */
+
+function estimateTokens(text: string): number {
+  return Math.ceil(text.length / 4);
+}
+
+app.post('/validate', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const estimatedTokens = estimateTokens(prompt);
+    const maxInputTokens = 2048; // Most models support 2K-128K
+
+    if (estimatedTokens > maxInputTokens) {
+      return c.json(
+        {
+          success: false,
+          error: `Input too long: ${estimatedTokens} tokens (max: ${maxInputTokens})`,
+        },
+        400
+      );
+    }
+
+    const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages: [{ role: 'user', content: prompt }],
+    });
+
+    return c.json({
+      success: true,
+      response: response.response,
+      estimatedTokens,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// System Prompts & Personas
+// ============================================================================
+
+const PERSONAS = {
+  helpful: 'You are a helpful AI assistant.',
+  concise: 'You are a concise AI assistant. Keep responses brief.',
+  technical: 'You are a technical AI assistant. Provide detailed, accurate information.',
+  creative: 'You are a creative AI assistant. Be imaginative and original.',
+};
+
+app.post('/persona/:persona', async (c) => {
+  try {
+    const persona = c.req.param('persona') as keyof typeof PERSONAS;
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    if (!PERSONAS[persona]) {
+      return c.json({ error: 'Invalid persona' }, 400);
+    }
+
+    const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages: [
+        { role: 'system', content: PERSONAS[persona] },
+        { role: 'user', content: prompt },
+      ],
+      stream: true,
+    });
+
+    return new Response(stream, {
+      headers: { 'content-type': 'text/event-stream' },
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Health Check
+// ============================================================================
+
+app.get('/health', (c) => {
+  return c.json({
+    status: 'ok',
+    timestamp: new Date().toISOString(),
+  });
+});
+
+export default app;
--- a/templates/ai-vision-models.ts
+++ b/templates/ai-vision-models.ts
@@ -0,0 +1,417 @@
+/**
+ * Cloudflare Workers AI - Vision Models Examples
+ *
+ * This template demonstrates:
+ * - Llama 3.2 11B Vision Instruct for image understanding
+ * - Image captioning and description
+ * - Visual question answering
+ * - Base64 image encoding
+ * - Combining vision + text prompts
+ */
+
+import { Hono } from 'hono';
+
+type Bindings = {
+  AI: Ai;
+};
+
+const app = new Hono<{ Bindings: Bindings }>();
+
+// ============================================================================
+// Image Understanding
+// ============================================================================
+
+/**
+ * Llama 3.2 11B Vision Instruct
+ * - Understands images and answers questions
+ * - Accepts base64-encoded images
+ * - Rate limit: 720/min
+ */
+
+app.post('/vision/understand', async (c) => {
+  try {
+    const { image, question = 'What is in this image?' } = await c.req.json<{
+      image: string; // Base64 data URL or base64 string
+      question?: string;
+    }>();
+
+    // Ensure image has proper data URL prefix
+    const imageUrl = image.startsWith('data:')
+      ? image
+      : `data:image/png;base64,${image}`;
+
+    const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
+      messages: [
+        {
+          role: 'user',
+          content: [
+            { type: 'text', text: question },
+            { type: 'image_url', image_url: { url: imageUrl } },
+          ],
+        },
+      ],
+    });
+
+    return c.json({
+      success: true,
+      question,
+      answer: response.response,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Image Captioning
+// ============================================================================
+
+app.post('/vision/caption', async (c) => {
+  try {
+    const { image } = await c.req.json<{ image: string }>();
+
+    const imageUrl = image.startsWith('data:')
+      ? image
+      : `data:image/png;base64,${image}`;
+
+    const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
+      messages: [
+        {
+          role: 'user',
+          content: [
+            {
+              type: 'text',
+              text: 'Generate a detailed caption for this image. Describe what you see, including objects, people, setting, mood, and any notable details.',
+            },
+            { type: 'image_url', image_url: { url: imageUrl } },
+          ],
+        },
+      ],
+    });
+
+    return c.json({
+      success: true,
+      caption: response.response,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Visual Question Answering
+// ============================================================================
+
+app.post('/vision/qa', async (c) => {
+  try {
+    const { image, questions } = await c.req.json<{
+      image: string;
+      questions: string[];
+    }>();
+
+    if (!questions || questions.length === 0) {
+      return c.json({ error: 'questions array is required' }, 400);
+    }
+
+    const imageUrl = image.startsWith('data:')
+      ? image
+      : `data:image/png;base64,${image}`;
+
+    // Answer all questions
+    const answers = await Promise.all(
+      questions.map(async (question) => {
+        const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
+          messages: [
+            {
+              role: 'user',
+              content: [
+                { type: 'text', text: question },
+                { type: 'image_url', image_url: { url: imageUrl } },
+              ],
+            },
+          ],
+        });
+
+        return {
+          question,
+          answer: response.response,
+        };
+      })
+    );
+
+    return c.json({
+      success: true,
+      count: answers.length,
+      results: answers,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Image Analysis (Structured Output)
+// ============================================================================
+
+app.post('/vision/analyze', async (c) => {
+  try {
+    const { image } = await c.req.json<{ image: string }>();
+
+    const imageUrl = image.startsWith('data:')
+      ? image
+      : `data:image/png;base64,${image}`;
+
+    const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
+      messages: [
+        {
+          role: 'user',
+          content: [
+            {
+              type: 'text',
+              text: `Analyze this image and return a JSON object with:
+- objects: array of objects detected
+- scene: description of the setting
+- mood: emotional tone
+- colors: dominant colors
+- text: any visible text
+
+Return ONLY valid JSON, no explanations.`,
+            },
+            { type: 'image_url', image_url: { url: imageUrl } },
+          ],
+        },
+      ],
+    });
+
+    // Parse JSON response
+    try {
+      const analysis = JSON.parse(response.response);
+      return c.json({
+        success: true,
+        analysis,
+      });
+    } catch {
+      return c.json({
+        success: true,
+        raw: response.response,
+      });
+    }
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Image Comparison
+// ============================================================================
+
+app.post('/vision/compare', async (c) => {
+  try {
+    const { image1, image2, question = 'What are the differences between these images?' } =
+      await c.req.json<{
+        image1: string;
+        image2: string;
+        question?: string;
+      }>();
+
+    const imageUrl1 = image1.startsWith('data:')
+      ? image1
+      : `data:image/png;base64,${image1}`;
+    const imageUrl2 = image2.startsWith('data:')
+      ? image2
+      : `data:image/png;base64,${image2}`;
+
+    // Analyze first image
+    const analysis1 = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
+      messages: [
+        {
+          role: 'user',
+          content: [
+            { type: 'text', text: 'Describe this image in detail.' },
+            { type: 'image_url', image_url: { url: imageUrl1 } },
+          ],
+        },
+      ],
+    });
+
+    // Analyze second image
+    const analysis2 = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
+      messages: [
+        {
+          role: 'user',
+          content: [
+            { type: 'text', text: 'Describe this image in detail.' },
+            { type: 'image_url', image_url: { url: imageUrl2 } },
+          ],
+        },
+      ],
+    });
+
+    // Compare using text generation
+    const comparison = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages: [
+        {
+          role: 'user',
+          content: `Compare these two images based on their descriptions:
+
+Image 1: ${analysis1.response}
+
+Image 2: ${analysis2.response}
+
+Question: ${question}`,
+        },
+      ],
+    });
+
+    return c.json({
+      success: true,
+      image1Description: analysis1.response,
+      image2Description: analysis2.response,
+      comparison: comparison.response,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Image Upload from URL
+// ============================================================================
+
+/**
+ * Fetch image from URL, convert to base64, and analyze
+ */
+
+app.post('/vision/url', async (c) => {
+  try {
+    const { url, question = 'What is in this image?' } = await c.req.json<{
+      url: string;
+      question?: string;
+    }>();
+
+    // Fetch image
+    const imageResponse = await fetch(url);
+    if (!imageResponse.ok) {
+      return c.json({ error: 'Failed to fetch image' }, 400);
+    }
+
+    // Convert to base64
+    const imageBytes = await imageResponse.bytes();
+    const base64 = btoa(String.fromCharCode(...imageBytes));
+    const contentType = imageResponse.headers.get('content-type') || 'image/png';
+    const imageUrl = `data:${contentType};base64,${base64}`;
+
+    // Analyze image
+    const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
+      messages: [
+        {
+          role: 'user',
+          content: [
+            { type: 'text', text: question },
+            { type: 'image_url', image_url: { url: imageUrl } },
+          ],
+        },
+      ],
+    });
+
+    return c.json({
+      success: true,
+      sourceUrl: url,
+      question,
+      answer: response.response,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Accessibility: Alt Text Generation
+// ============================================================================
+
+app.post('/vision/alt-text', async (c) => {
+  try {
+    const { image } = await c.req.json<{ image: string }>();
+
+    const imageUrl = image.startsWith('data:')
+      ? image
+      : `data:image/png;base64,${image}`;
+
+    const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
+      messages: [
+        {
+          role: 'user',
+          content: [
+            {
+              type: 'text',
+              text: 'Generate a concise, descriptive alt text for this image for accessibility purposes. Keep it under 125 characters.',
+            },
+            { type: 'image_url', image_url: { url: imageUrl } },
+          ],
+        },
+      ],
+    });
+
+    return c.json({
+      success: true,
+      altText: response.response.trim(),
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Health Check
+// ============================================================================
+
+app.get('/health', (c) => {
+  return c.json({
+    status: 'ok',
+    timestamp: new Date().toISOString(),
+  });
+});
+
+export default app;
--- a/templates/wrangler-ai-config.jsonc
+++ b/templates/wrangler-ai-config.jsonc
@@ -0,0 +1,138 @@
+/**
+ * Cloudflare Workers AI - Wrangler Configuration
+ *
+ * This configuration file sets up Workers AI binding for your Worker.
+ * Place this in your project root as wrangler.jsonc
+ */
+
+{
+  "name": "my-ai-worker",
+  "main": "src/index.ts",
+  "compatibility_date": "2025-10-21",
+
+  /**
+   * AI Binding
+   * Provides access to Workers AI models via env.AI
+   */
+  "ai": {
+    "binding": "AI" // Available in your Worker as env.AI
+  },
+
+  /**
+   * Optional: AI Gateway Integration
+   * Provides caching, logging, and analytics for AI requests
+   * Create a gateway at: https://dash.cloudflare.com/ai/ai-gateway
+   */
+  // Note: AI Gateway is configured per-request, not in wrangler.jsonc
+  // Use the gateway option in env.AI.run():
+  // env.AI.run(model, inputs, { gateway: { id: 'my-gateway' } })
+
+  /**
+   * Optional: Vectorize Binding (for RAG patterns)
+   * Store and search vector embeddings
+   */
+  "vectorize": [
+    {
+      "binding": "VECTORIZE",
+      "index_name": "my-embeddings-index"
+    }
+  ],
+
+  /**
+   * Optional: D1 Database (for RAG document storage)
+   */
+  "d1_databases": [
+    {
+      "binding": "DB",
+      "database_name": "my-database",
+      "database_id": "YOUR_DATABASE_ID"
+    }
+  ],
+
+  /**
+   * Optional: R2 Bucket (for image storage)
+   */
+  "r2_buckets": [
+    {
+      "binding": "BUCKET",
+      "bucket_name": "ai-generated-images"
+    }
+  ],
+
+  /**
+   * Optional: KV Namespace (for caching AI responses)
+   */
+  "kv_namespaces": [
+    {
+      "binding": "CACHE",
+      "id": "YOUR_KV_NAMESPACE_ID"
+    }
+  ],
+
+  /**
+   * Environment Variables
+   * Store API keys and configuration
+   */
+  "vars": {
+    "ENVIRONMENT": "production"
+  },
+
+  /**
+   * Secrets (use: wrangler secret put SECRET_NAME)
+   * - CLOUDFLARE_API_KEY
+   * - CLOUDFLARE_ACCOUNT_ID
+   */
+
+  /**
+   * Workers Configuration
+   */
+  "limits": {
+    "cpu_ms": 30000 // 30 seconds (increase for long AI operations)
+  },
+
+  /**
+   * Local Development
+   * Run: npx wrangler dev
+   */
+  "dev": {
+    "port": 8787
+  }
+}
+
+/**
+ * TypeScript Types
+ *
+ * Add to your src/index.ts:
+ *
+ * export interface Env {
+ *   AI: Ai;
+ *   VECTORIZE?: Vectorize;
+ *   DB?: D1Database;
+ *   BUCKET?: R2Bucket;
+ *   CACHE?: KVNamespace;
+ *   CLOUDFLARE_API_KEY?: string;
+ *   CLOUDFLARE_ACCOUNT_ID?: string;
+ * }
+ */
+
+/**
+ * Usage Examples
+ *
+ * Basic AI inference:
+ * const response = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+ *   prompt: 'Hello!',
+ * });
+ *
+ * With AI Gateway:
+ * const response = await env.AI.run(
+ *   '@cf/meta/llama-3.1-8b-instruct',
+ *   { prompt: 'Hello!' },
+ *   { gateway: { id: 'my-gateway' } }
+ * );
+ *
+ * Streaming:
+ * const stream = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+ *   messages: [{ role: 'user', content: 'Hello!' }],
+ *   stream: true,
+ * });
+ */