Initial commit

2025-11-30 08:24:38 +08:00
commit b41966ed51
12 changed files with 3508 additions and 0 deletions
--- a/templates/ai-text-generation.ts
+++ b/templates/ai-text-generation.ts
@@ -0,0 +1,437 @@
+/**
+ * Cloudflare Workers AI - Text Generation Examples
+ *
+ * This template demonstrates:
+ * - Basic text generation (prompt and messages)
+ * - Streaming responses (RECOMMENDED for production)
+ * - Chat completions with conversation history
+ * - Structured output with JSON
+ * - Error handling and retry logic
+ * - Rate limit management
+ */
+
+import { Hono } from 'hono';
+
+type Bindings = {
+  AI: Ai;
+};
+
+const app = new Hono<{ Bindings: Bindings }>();
+
+// ============================================================================
+// Basic Text Generation
+// ============================================================================
+
+// Simple prompt (deprecated pattern, use messages instead)
+app.post('/simple', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      prompt,
+    });
+
+    return c.json({
+      success: true,
+      response: response.response,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Streaming Text Generation (RECOMMENDED)
+// ============================================================================
+
+/**
+ * Streaming is ESSENTIAL for production:
+ * - Prevents buffering large responses in memory
+ * - Faster time-to-first-token
+ * - Better user experience
+ * - Avoids Worker timeout issues
+ */
+
+app.post('/stream', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages: [{ role: 'user', content: prompt }],
+      stream: true, // Enable streaming
+    });
+
+    return new Response(stream, {
+      headers: {
+        'content-type': 'text/event-stream',
+        'cache-control': 'no-cache',
+        connection: 'keep-alive',
+      },
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Chat Completions with History
+// ============================================================================
+
+app.post('/chat', async (c) => {
+  try {
+    const { messages } = await c.req.json<{
+      messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }>;
+    }>();
+
+    // Validate messages
+    if (!messages || messages.length === 0) {
+      return c.json({ error: 'Messages array is required' }, 400);
+    }
+
+    const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages,
+      stream: true,
+      max_tokens: 512, // Limit response length
+    });
+
+    return new Response(stream, {
+      headers: { 'content-type': 'text/event-stream' },
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Streaming with Custom Parameters
+// ============================================================================
+
+app.post('/stream/custom', async (c) => {
+  try {
+    const { prompt, temperature = 0.7, max_tokens = 512 } = await c.req.json<{
+      prompt: string;
+      temperature?: number;
+      max_tokens?: number;
+    }>();
+
+    const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages: [
+        {
+          role: 'system',
+          content: 'You are a helpful AI assistant.',
+        },
+        {
+          role: 'user',
+          content: prompt,
+        },
+      ],
+      stream: true,
+      max_tokens,
+      temperature, // Controls randomness (0.0-1.0)
+    });
+
+    return new Response(stream, {
+      headers: { 'content-type': 'text/event-stream' },
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Structured Output (JSON)
+// ============================================================================
+
+/**
+ * Generate structured JSON output
+ * Useful for extracting data, generating schemas, etc.
+ */
+
+app.post('/structured', async (c) => {
+  try {
+    const { topic } = await c.req.json<{ topic: string }>();
+
+    const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages: [
+        {
+          role: 'system',
+          content:
+            'You are a helpful assistant that ONLY returns valid JSON. Never include explanations or markdown, just raw JSON.',
+        },
+        {
+          role: 'user',
+          content: `Generate a recipe for ${topic}. Return JSON with keys: name, ingredients (array), instructions (array), prepTime (number in minutes)`,
+        },
+      ],
+      max_tokens: 1024,
+    });
+
+    // Parse JSON response
+    const data = JSON.parse(response.response);
+
+    return c.json({
+      success: true,
+      data,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Model Comparison
+// ============================================================================
+
+/**
+ * Compare different models side-by-side
+ */
+
+app.post('/compare', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const models = [
+      '@cf/meta/llama-3.1-8b-instruct', // Balanced
+      '@cf/meta/llama-3.2-1b-instruct', // Fast
+      '@cf/qwen/qwen1.5-14b-chat-awq', // High quality
+    ];
+
+    const results = await Promise.all(
+      models.map(async (model) => {
+        const start = Date.now();
+        const response = await c.env.AI.run(model, {
+          messages: [{ role: 'user', content: prompt }],
+          max_tokens: 256,
+        });
+        const duration = Date.now() - start;
+
+        return {
+          model,
+          response: response.response,
+          duration,
+        };
+      })
+    );
+
+    return c.json({
+      success: true,
+      results,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Error Handling with Retry
+// ============================================================================
+
+/**
+ * Retry logic for rate limits and transient errors
+ */
+
+async function runWithRetry(
+  ai: Ai,
+  model: string,
+  inputs: any,
+  maxRetries = 3
+): Promise<any> {
+  let lastError: Error;
+
+  for (let i = 0; i < maxRetries; i++) {
+    try {
+      return await ai.run(model, inputs);
+    } catch (error) {
+      lastError = error as Error;
+      const message = lastError.message.toLowerCase();
+
+      // Rate limit (429) - retry with exponential backoff
+      if (message.includes('429') || message.includes('rate limit')) {
+        if (i < maxRetries - 1) {
+          const delay = Math.pow(2, i) * 1000; // 1s, 2s, 4s
+          console.log(`Rate limited. Retrying in ${delay}ms...`);
+          await new Promise((resolve) => setTimeout(resolve, delay));
+          continue;
+        }
+      }
+
+      // Model unavailable - try fallback model
+      if (message.includes('model') && message.includes('unavailable')) {
+        if (i === 0) {
+          console.log('Model unavailable, trying fallback...');
+          model = '@cf/meta/llama-3.2-1b-instruct'; // Faster fallback
+          continue;
+        }
+      }
+
+      // Other errors - throw immediately
+      throw error;
+    }
+  }
+
+  throw lastError!;
+}
+
+app.post('/reliable', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const response = await runWithRetry(c.env.AI, '@cf/meta/llama-3.1-8b-instruct', {
+      messages: [{ role: 'user', content: prompt }],
+    });
+
+    return c.json({
+      success: true,
+      response: response.response,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Token Length Validation
+// ============================================================================
+
+/**
+ * Validate input length to prevent token limit errors
+ * Approximate: 1 token ≈ 4 characters
+ */
+
+function estimateTokens(text: string): number {
+  return Math.ceil(text.length / 4);
+}
+
+app.post('/validate', async (c) => {
+  try {
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    const estimatedTokens = estimateTokens(prompt);
+    const maxInputTokens = 2048; // Most models support 2K-128K
+
+    if (estimatedTokens > maxInputTokens) {
+      return c.json(
+        {
+          success: false,
+          error: `Input too long: ${estimatedTokens} tokens (max: ${maxInputTokens})`,
+        },
+        400
+      );
+    }
+
+    const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages: [{ role: 'user', content: prompt }],
+    });
+
+    return c.json({
+      success: true,
+      response: response.response,
+      estimatedTokens,
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// System Prompts & Personas
+// ============================================================================
+
+const PERSONAS = {
+  helpful: 'You are a helpful AI assistant.',
+  concise: 'You are a concise AI assistant. Keep responses brief.',
+  technical: 'You are a technical AI assistant. Provide detailed, accurate information.',
+  creative: 'You are a creative AI assistant. Be imaginative and original.',
+};
+
+app.post('/persona/:persona', async (c) => {
+  try {
+    const persona = c.req.param('persona') as keyof typeof PERSONAS;
+    const { prompt } = await c.req.json<{ prompt: string }>();
+
+    if (!PERSONAS[persona]) {
+      return c.json({ error: 'Invalid persona' }, 400);
+    }
+
+    const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
+      messages: [
+        { role: 'system', content: PERSONAS[persona] },
+        { role: 'user', content: prompt },
+      ],
+      stream: true,
+    });
+
+    return new Response(stream, {
+      headers: { 'content-type': 'text/event-stream' },
+    });
+  } catch (error) {
+    return c.json(
+      {
+        success: false,
+        error: (error as Error).message,
+      },
+      500
+    );
+  }
+});
+
+// ============================================================================
+// Health Check
+// ============================================================================
+
+app.get('/health', (c) => {
+  return c.json({
+    status: 'ok',
+    timestamp: new Date().toISOString(),
+  });
+});
+
+export default app;