Files
gh-jezweb-claude-skills-ski…/references/production-patterns.md
2025-11-30 08:23:50 +08:00

14 KiB

AI SDK Core - Production Patterns

Best practices for deploying AI SDK Core in production environments.


Performance Optimization

1. Streaming for Long-Form Content

Always use streaming for user-facing long-form content:

// ✅ GOOD: User-facing (better perceived performance)
app.post('/chat', async (req, res) => {
  const stream = streamText({
    model: openai('gpt-4'),
    prompt: req.body.message,
  });

  return stream.toDataStreamResponse();
});

// ❌ BAD: User waits for entire response
app.post('/chat', async (req, res) => {
  const result = await generateText({
    model: openai('gpt-4'),
    prompt: req.body.message,
  });

  return res.json({ response: result.text });
});

// ✅ GOOD: Background tasks (no user waiting)
async function processDocument(doc: string) {
  const result = await generateText({
    model: openai('gpt-4'),
    prompt: `Analyze: ${doc}`,
  });

  await saveToDatabase(result.text);
}

2. Set Appropriate maxOutputTokens

// ✅ GOOD: Limit token usage based on use case
const shortSummary = await generateText({
  model: openai('gpt-4'),
  prompt: 'Summarize in 2 sentences',
  maxOutputTokens: 100,  // Prevents over-generation
});

const article = await generateText({
  model: openai('gpt-4'),
  prompt: 'Write article',
  maxOutputTokens: 2000,  // Appropriate for long-form
});

// ❌ BAD: No limit (can waste tokens/money)
const unlimited = await generateText({
  model: openai('gpt-4'),
  prompt: 'Write something',
  // No maxOutputTokens
});

3. Cache Provider Instances

// ✅ GOOD: Reuse provider instances
const gpt4 = openai('gpt-4-turbo');
const claude = anthropic('claude-3-5-sonnet-20241022');

app.post('/chat', async (req, res) => {
  const result = await generateText({
    model: gpt4,  // Reuse
    prompt: req.body.message,
  });
  return res.json({ response: result.text });
});

// ❌ BAD: Create new instance every time
app.post('/chat', async (req, res) => {
  const result = await generateText({
    model: openai('gpt-4-turbo'),  // New instance each call
    prompt: req.body.message,
  });
});

4. Optimize Zod Schemas (Especially in Workers)

// ❌ BAD: Complex schema at top level (slow startup)
const ComplexSchema = z.object({
  // 50+ fields with deep nesting
});

// ✅ GOOD: Define schemas inside functions
function generateStructuredData() {
  const schema = z.object({
    // Schema definition here
  });

  return generateObject({ model: openai('gpt-4'), schema, prompt: '...' });
}

// ✅ GOOD: Split into smaller reusable schemas
const AddressSchema = z.object({ street: z.string(), city: z.string() });
const PersonSchema = z.object({ name: z.string(), address: AddressSchema });

Error Handling

1. Wrap All AI Calls in Try-Catch

async function generateSafely(prompt: string) {
  try {
    const result = await generateText({
      model: openai('gpt-4'),
      prompt,
    });

    return { success: true, data: result.text };
  } catch (error) {
    if (error instanceof AI_APICallError) {
      console.error('API call failed:', error.statusCode, error.message);
      return { success: false, error: 'AI service temporarily unavailable' };
    } else if (error instanceof AI_NoContentGeneratedError) {
      console.error('No content generated');
      return { success: false, error: 'Unable to generate response' };
    } else {
      console.error('Unknown error:', error);
      return { success: false, error: 'An error occurred' };
    }
  }
}

2. Handle Specific Error Types

import {
  AI_APICallError,
  AI_NoObjectGeneratedError,
  AI_TypeValidationError,
  AI_RetryError,
} from 'ai';

async function robustGeneration(prompt: string) {
  try {
    return await generateText({ model: openai('gpt-4'), prompt });
  } catch (error) {
    switch (error.constructor) {
      case AI_APICallError:
        if (error.statusCode === 429) {
          // Rate limit - wait and retry
          await wait(5000);
          return retry();
        } else if (error.statusCode >= 500) {
          // Provider issue - try fallback
          return generateText({ model: anthropic('claude-3-5-sonnet-20241022'), prompt });
        }
        break;

      case AI_RetryError:
        // All retries failed - use fallback provider
        return generateText({ model: google('gemini-2.5-pro'), prompt });

      case AI_NoContentGeneratedError:
        // Content filtered - return safe message
        return { text: 'Unable to generate response for this input.' };

      default:
        throw error;
    }
  }
}

3. Implement Retry Logic

async function generateWithRetry(prompt: string, maxRetries = 3) {
  for (let i = 0; i < maxRetries; i++) {
    try {
      return await generateText({
        model: openai('gpt-4'),
        prompt,
        maxRetries: 2,  // Built-in retry
      });
    } catch (error) {
      if (i === maxRetries - 1) throw error; // Last attempt failed

      // Exponential backoff
      const delay = Math.pow(2, i) * 1000;
      console.log(`Retry ${i + 1}/${maxRetries} after ${delay}ms`);
      await new Promise(resolve => setTimeout(resolve, delay));
    }
  }
}

4. Log Errors Properly

function logAIError(error: any, context: Record<string, any>) {
  const errorLog = {
    timestamp: new Date().toISOString(),
    type: error.constructor.name,
    message: error.message,
    statusCode: error.statusCode,
    responseBody: error.responseBody,
    context,
    stack: error.stack,
  };

  // Send to monitoring service (e.g., Sentry, Datadog)
  console.error('AI SDK Error:', JSON.stringify(errorLog));

  // Track metrics
  metrics.increment('ai.error', {
    type: error.constructor.name,
    statusCode: error.statusCode,
  });
}

try {
  const result = await generateText({ model: openai('gpt-4'), prompt });
} catch (error) {
  logAIError(error, { prompt, model: 'gpt-4' });
  throw error;
}

Cost Optimization

1. Choose Appropriate Models

// Model selection based on task complexity
async function generateWithCostOptimization(prompt: string, complexity: 'simple' | 'medium' | 'complex') {
  const models = {
    simple: openai('gpt-3.5-turbo'),     // $0.50 / 1M tokens
    medium: openai('gpt-4-turbo'),       // $10 / 1M tokens
    complex: openai('gpt-4'),            // $30 / 1M tokens
  };

  return generateText({
    model: models[complexity],
    prompt,
  });
}

// Usage
await generateWithCostOptimization('Translate to Spanish', 'simple');
await generateWithCostOptimization('Analyze sentiment', 'medium');
await generateWithCostOptimization('Complex reasoning task', 'complex');

2. Set Token Limits

// Prevent runaway costs
const result = await generateText({
  model: openai('gpt-4'),
  prompt: 'Write essay',
  maxOutputTokens: 500,  // Hard limit
});

// Adjust limits per use case
const limits = {
  chatMessage: 200,
  summary: 300,
  article: 2000,
  analysis: 1000,
};

3. Cache Results

import { LRUCache } from 'lru-cache';

const cache = new LRUCache<string, string>({
  max: 1000,  // Max 1000 items
  ttl: 1000 * 60 * 60,  // 1 hour TTL
});

async function generateWithCache(prompt: string) {
  const cacheKey = `ai:${hash(prompt)}`;

  // Check cache
  const cached = cache.get(cacheKey);
  if (cached) {
    console.log('Cache hit');
    return { text: cached, cached: true };
  }

  // Generate
  const result = await generateText({
    model: openai('gpt-4'),
    prompt,
  });

  // Store in cache
  cache.set(cacheKey, result.text);

  return { text: result.text, cached: false };
}

4. Monitor Usage

// Track token usage
let totalTokensUsed = 0;
let totalCost = 0;

async function generateWithTracking(prompt: string) {
  const result = await generateText({
    model: openai('gpt-4'),
    prompt,
  });

  // Track tokens
  totalTokensUsed += result.usage.totalTokens;

  // Estimate cost (GPT-4: $30/1M tokens)
  const cost = (result.usage.totalTokens / 1_000_000) * 30;
  totalCost += cost;

  console.log(`Tokens: ${result.usage.totalTokens}, Cost: $${cost.toFixed(4)}`);
  console.log(`Total tokens: ${totalTokensUsed}, Total cost: $${totalCost.toFixed(2)}`);

  return result;
}

Cloudflare Workers Best Practices

1. Lazy Initialization

// ✅ GOOD: Import inside handler
export default {
  async fetch(request, env) {
    const { generateText } = await import('ai');
    const { createWorkersAI } = await import('workers-ai-provider');

    const workersai = createWorkersAI({ binding: env.AI });

    const result = await generateText({
      model: workersai('@cf/meta/llama-3.1-8b-instruct'),
      prompt: 'Hello',
    });

    return new Response(result.text);
  }
};

// ❌ BAD: Top-level imports (startup overhead)
import { generateText } from 'ai';
const workersai = createWorkersAI({ binding: env.AI }); // Runs at startup!

2. Monitor Startup Time

# Wrangler reports startup time
npx wrangler deploy

# Output shows:
# Startup Time: 287ms (must be <400ms)

3. Handle Streaming Properly

app.post('/chat/stream', async (c) => {
  const workersai = createWorkersAI({ binding: c.env.AI });

  const stream = streamText({
    model: workersai('@cf/meta/llama-3.1-8b-instruct'),
    prompt: 'Hello',
  });

  // Return ReadableStream for Workers
  return new Response(stream.toTextStream(), {
    headers: {
      'Content-Type': 'text/plain; charset=utf-8',
      'X-Content-Type-Options': 'nosniff',
    },
  });
});

Next.js / Vercel Best Practices

1. Server Actions for Mutations

// app/actions.ts
'use server';

export async function generateContent(input: string) {
  const result = await generateText({
    model: openai('gpt-4'),
    prompt: input,
    maxOutputTokens: 500,
  });

  return result.text;
}

// app/page.tsx (Client Component)
'use client';

import { generateContent } from './actions';

export default function Page() {
  const [loading, setLoading] = useState(false);

  async function handleSubmit(formData: FormData) {
    setLoading(true);
    const result = await generateContent(formData.get('input') as string);
    setLoading(false);
  }

  return <form action={handleSubmit}>...</form>;
}

2. Server Components for Initial Loads

// app/page.tsx (Server Component)
export default async function Page() {
  // Generate on server
  const result = await generateText({
    model: openai('gpt-4'),
    prompt: 'Welcome message',
  });

  // No loading state needed
  return <div>{result.text}</div>;
}

3. API Routes for Streaming

// app/api/chat/route.ts
import { streamText } from 'ai';
import { openai } from '@ai-sdk/openai';

export async function POST(request: Request) {
  const { messages } = await request.json();

  const stream = streamText({
    model: openai('gpt-4-turbo'),
    messages,
  });

  return stream.toDataStreamResponse();
}

Monitoring and Logging

1. Track Key Metrics

// Token usage
metrics.gauge('ai.tokens.total', result.usage.totalTokens);
metrics.gauge('ai.tokens.prompt', result.usage.promptTokens);
metrics.gauge('ai.tokens.completion', result.usage.completionTokens);

// Response time
const startTime = Date.now();
const result = await generateText({ model: openai('gpt-4'), prompt });
metrics.timing('ai.response_time', Date.now() - startTime);

// Error rate
metrics.increment('ai.errors', { type: error.constructor.name });

2. Structured Logging

import winston from 'winston';

const logger = winston.createLogger({
  format: winston.format.json(),
  transports: [new winston.transports.Console()],
});

logger.info('AI generation started', {
  model: 'gpt-4',
  promptLength: prompt.length,
  userId: user.id,
});

const result = await generateText({ model: openai('gpt-4'), prompt });

logger.info('AI generation completed', {
  model: 'gpt-4',
  tokensUsed: result.usage.totalTokens,
  responseLength: result.text.length,
  duration: Date.now() - startTime,
});

Rate Limiting

1. Queue Requests

import PQueue from 'p-queue';

// Limit: 50 requests per minute
const queue = new PQueue({
  concurrency: 5,
  interval: 60000,
  intervalCap: 50,
});

async function generateQueued(prompt: string) {
  return queue.add(() =>
    generateText({ model: openai('gpt-4'), prompt })
  );
}

2. Monitor Rate Limits

async function generateWithRateCheck(prompt: string) {
  const result = await generateText({
    model: openai('gpt-4'),
    prompt,
  });

  // Check rate limit headers (provider-specific)
  console.log('Remaining requests:', response.headers['x-ratelimit-remaining']);
  console.log('Resets at:', response.headers['x-ratelimit-reset']);

  return result;
}

Security

1. Sanitize User Inputs

function sanitizePrompt(userInput: string): string {
  // Remove potential prompt injections
  return userInput
    .replace(/system:/gi, '')
    .replace(/ignore previous/gi, '')
    .slice(0, 1000);  // Limit length
}

const result = await generateText({
  model: openai('gpt-4'),
  prompt: sanitizePrompt(req.body.message),
});

2. Validate API Keys

// Startup validation
function validateEnv() {
  const required = ['OPENAI_API_KEY', 'ANTHROPIC_API_KEY'];

  for (const key of required) {
    if (!process.env[key]) {
      throw new Error(`Missing: ${key}`);
    }

    if (!process.env[key].match(/^sk-/)) {
      throw new Error(`Invalid format: ${key}`);
    }
  }
}

validateEnv();

Deployment

See Vercel's official deployment documentation: https://vercel.com/docs/functions

For Cloudflare Workers: https://developers.cloudflare.com/workers/


Last Updated: 2025-10-21