/** * Cloudflare Workers AI - Text Generation Examples * * This template demonstrates: * - Basic text generation (prompt and messages) * - Streaming responses (RECOMMENDED for production) * - Chat completions with conversation history * - Structured output with JSON * - Error handling and retry logic * - Rate limit management */ import { Hono } from 'hono'; type Bindings = { AI: Ai; }; const app = new Hono<{ Bindings: Bindings }>(); // ============================================================================ // Basic Text Generation // ============================================================================ // Simple prompt (deprecated pattern, use messages instead) app.post('/simple', async (c) => { try { const { prompt } = await c.req.json<{ prompt: string }>(); const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', { prompt, }); return c.json({ success: true, response: response.response, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Streaming Text Generation (RECOMMENDED) // ============================================================================ /** * Streaming is ESSENTIAL for production: * - Prevents buffering large responses in memory * - Faster time-to-first-token * - Better user experience * - Avoids Worker timeout issues */ app.post('/stream', async (c) => { try { const { prompt } = await c.req.json<{ prompt: string }>(); const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', { messages: [{ role: 'user', content: prompt }], stream: true, // Enable streaming }); return new Response(stream, { headers: { 'content-type': 'text/event-stream', 'cache-control': 'no-cache', connection: 'keep-alive', }, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Chat Completions with History // ============================================================================ app.post('/chat', async (c) => { try { const { messages } = await c.req.json<{ messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }>; }>(); // Validate messages if (!messages || messages.length === 0) { return c.json({ error: 'Messages array is required' }, 400); } const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', { messages, stream: true, max_tokens: 512, // Limit response length }); return new Response(stream, { headers: { 'content-type': 'text/event-stream' }, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Streaming with Custom Parameters // ============================================================================ app.post('/stream/custom', async (c) => { try { const { prompt, temperature = 0.7, max_tokens = 512 } = await c.req.json<{ prompt: string; temperature?: number; max_tokens?: number; }>(); const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', { messages: [ { role: 'system', content: 'You are a helpful AI assistant.', }, { role: 'user', content: prompt, }, ], stream: true, max_tokens, temperature, // Controls randomness (0.0-1.0) }); return new Response(stream, { headers: { 'content-type': 'text/event-stream' }, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Structured Output (JSON) // ============================================================================ /** * Generate structured JSON output * Useful for extracting data, generating schemas, etc. */ app.post('/structured', async (c) => { try { const { topic } = await c.req.json<{ topic: string }>(); const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', { messages: [ { role: 'system', content: 'You are a helpful assistant that ONLY returns valid JSON. Never include explanations or markdown, just raw JSON.', }, { role: 'user', content: `Generate a recipe for ${topic}. Return JSON with keys: name, ingredients (array), instructions (array), prepTime (number in minutes)`, }, ], max_tokens: 1024, }); // Parse JSON response const data = JSON.parse(response.response); return c.json({ success: true, data, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Model Comparison // ============================================================================ /** * Compare different models side-by-side */ app.post('/compare', async (c) => { try { const { prompt } = await c.req.json<{ prompt: string }>(); const models = [ '@cf/meta/llama-3.1-8b-instruct', // Balanced '@cf/meta/llama-3.2-1b-instruct', // Fast '@cf/qwen/qwen1.5-14b-chat-awq', // High quality ]; const results = await Promise.all( models.map(async (model) => { const start = Date.now(); const response = await c.env.AI.run(model, { messages: [{ role: 'user', content: prompt }], max_tokens: 256, }); const duration = Date.now() - start; return { model, response: response.response, duration, }; }) ); return c.json({ success: true, results, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Error Handling with Retry // ============================================================================ /** * Retry logic for rate limits and transient errors */ async function runWithRetry( ai: Ai, model: string, inputs: any, maxRetries = 3 ): Promise { let lastError: Error; for (let i = 0; i < maxRetries; i++) { try { return await ai.run(model, inputs); } catch (error) { lastError = error as Error; const message = lastError.message.toLowerCase(); // Rate limit (429) - retry with exponential backoff if (message.includes('429') || message.includes('rate limit')) { if (i < maxRetries - 1) { const delay = Math.pow(2, i) * 1000; // 1s, 2s, 4s console.log(`Rate limited. Retrying in ${delay}ms...`); await new Promise((resolve) => setTimeout(resolve, delay)); continue; } } // Model unavailable - try fallback model if (message.includes('model') && message.includes('unavailable')) { if (i === 0) { console.log('Model unavailable, trying fallback...'); model = '@cf/meta/llama-3.2-1b-instruct'; // Faster fallback continue; } } // Other errors - throw immediately throw error; } } throw lastError!; } app.post('/reliable', async (c) => { try { const { prompt } = await c.req.json<{ prompt: string }>(); const response = await runWithRetry(c.env.AI, '@cf/meta/llama-3.1-8b-instruct', { messages: [{ role: 'user', content: prompt }], }); return c.json({ success: true, response: response.response, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Token Length Validation // ============================================================================ /** * Validate input length to prevent token limit errors * Approximate: 1 token ≈ 4 characters */ function estimateTokens(text: string): number { return Math.ceil(text.length / 4); } app.post('/validate', async (c) => { try { const { prompt } = await c.req.json<{ prompt: string }>(); const estimatedTokens = estimateTokens(prompt); const maxInputTokens = 2048; // Most models support 2K-128K if (estimatedTokens > maxInputTokens) { return c.json( { success: false, error: `Input too long: ${estimatedTokens} tokens (max: ${maxInputTokens})`, }, 400 ); } const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', { messages: [{ role: 'user', content: prompt }], }); return c.json({ success: true, response: response.response, estimatedTokens, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // System Prompts & Personas // ============================================================================ const PERSONAS = { helpful: 'You are a helpful AI assistant.', concise: 'You are a concise AI assistant. Keep responses brief.', technical: 'You are a technical AI assistant. Provide detailed, accurate information.', creative: 'You are a creative AI assistant. Be imaginative and original.', }; app.post('/persona/:persona', async (c) => { try { const persona = c.req.param('persona') as keyof typeof PERSONAS; const { prompt } = await c.req.json<{ prompt: string }>(); if (!PERSONAS[persona]) { return c.json({ error: 'Invalid persona' }, 400); } const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', { messages: [ { role: 'system', content: PERSONAS[persona] }, { role: 'user', content: prompt }, ], stream: true, }); return new Response(stream, { headers: { 'content-type': 'text/event-stream' }, }); } catch (error) { return c.json( { success: false, error: (error as Error).message, }, 500 ); } }); // ============================================================================ // Health Check // ============================================================================ app.get('/health', (c) => { return c.json({ status: 'ok', timestamp: new Date().toISOString(), }); }); export default app;