Files
gh-jezweb-claude-skills-ski…/templates/ai-text-generation.ts
2025-11-30 08:24:38 +08:00

438 lines
11 KiB
TypeScript

/**
* Cloudflare Workers AI - Text Generation Examples
*
* This template demonstrates:
* - Basic text generation (prompt and messages)
* - Streaming responses (RECOMMENDED for production)
* - Chat completions with conversation history
* - Structured output with JSON
* - Error handling and retry logic
* - Rate limit management
*/
import { Hono } from 'hono';
type Bindings = {
AI: Ai;
};
const app = new Hono<{ Bindings: Bindings }>();
// ============================================================================
// Basic Text Generation
// ============================================================================
// Simple prompt (deprecated pattern, use messages instead)
app.post('/simple', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
prompt,
});
return c.json({
success: true,
response: response.response,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Streaming Text Generation (RECOMMENDED)
// ============================================================================
/**
* Streaming is ESSENTIAL for production:
* - Prevents buffering large responses in memory
* - Faster time-to-first-token
* - Better user experience
* - Avoids Worker timeout issues
*/
app.post('/stream', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [{ role: 'user', content: prompt }],
stream: true, // Enable streaming
});
return new Response(stream, {
headers: {
'content-type': 'text/event-stream',
'cache-control': 'no-cache',
connection: 'keep-alive',
},
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Chat Completions with History
// ============================================================================
app.post('/chat', async (c) => {
try {
const { messages } = await c.req.json<{
messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }>;
}>();
// Validate messages
if (!messages || messages.length === 0) {
return c.json({ error: 'Messages array is required' }, 400);
}
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages,
stream: true,
max_tokens: 512, // Limit response length
});
return new Response(stream, {
headers: { 'content-type': 'text/event-stream' },
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Streaming with Custom Parameters
// ============================================================================
app.post('/stream/custom', async (c) => {
try {
const { prompt, temperature = 0.7, max_tokens = 512 } = await c.req.json<{
prompt: string;
temperature?: number;
max_tokens?: number;
}>();
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [
{
role: 'system',
content: 'You are a helpful AI assistant.',
},
{
role: 'user',
content: prompt,
},
],
stream: true,
max_tokens,
temperature, // Controls randomness (0.0-1.0)
});
return new Response(stream, {
headers: { 'content-type': 'text/event-stream' },
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Structured Output (JSON)
// ============================================================================
/**
* Generate structured JSON output
* Useful for extracting data, generating schemas, etc.
*/
app.post('/structured', async (c) => {
try {
const { topic } = await c.req.json<{ topic: string }>();
const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [
{
role: 'system',
content:
'You are a helpful assistant that ONLY returns valid JSON. Never include explanations or markdown, just raw JSON.',
},
{
role: 'user',
content: `Generate a recipe for ${topic}. Return JSON with keys: name, ingredients (array), instructions (array), prepTime (number in minutes)`,
},
],
max_tokens: 1024,
});
// Parse JSON response
const data = JSON.parse(response.response);
return c.json({
success: true,
data,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Model Comparison
// ============================================================================
/**
* Compare different models side-by-side
*/
app.post('/compare', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const models = [
'@cf/meta/llama-3.1-8b-instruct', // Balanced
'@cf/meta/llama-3.2-1b-instruct', // Fast
'@cf/qwen/qwen1.5-14b-chat-awq', // High quality
];
const results = await Promise.all(
models.map(async (model) => {
const start = Date.now();
const response = await c.env.AI.run(model, {
messages: [{ role: 'user', content: prompt }],
max_tokens: 256,
});
const duration = Date.now() - start;
return {
model,
response: response.response,
duration,
};
})
);
return c.json({
success: true,
results,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Error Handling with Retry
// ============================================================================
/**
* Retry logic for rate limits and transient errors
*/
async function runWithRetry(
ai: Ai,
model: string,
inputs: any,
maxRetries = 3
): Promise<any> {
let lastError: Error;
for (let i = 0; i < maxRetries; i++) {
try {
return await ai.run(model, inputs);
} catch (error) {
lastError = error as Error;
const message = lastError.message.toLowerCase();
// Rate limit (429) - retry with exponential backoff
if (message.includes('429') || message.includes('rate limit')) {
if (i < maxRetries - 1) {
const delay = Math.pow(2, i) * 1000; // 1s, 2s, 4s
console.log(`Rate limited. Retrying in ${delay}ms...`);
await new Promise((resolve) => setTimeout(resolve, delay));
continue;
}
}
// Model unavailable - try fallback model
if (message.includes('model') && message.includes('unavailable')) {
if (i === 0) {
console.log('Model unavailable, trying fallback...');
model = '@cf/meta/llama-3.2-1b-instruct'; // Faster fallback
continue;
}
}
// Other errors - throw immediately
throw error;
}
}
throw lastError!;
}
app.post('/reliable', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const response = await runWithRetry(c.env.AI, '@cf/meta/llama-3.1-8b-instruct', {
messages: [{ role: 'user', content: prompt }],
});
return c.json({
success: true,
response: response.response,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Token Length Validation
// ============================================================================
/**
* Validate input length to prevent token limit errors
* Approximate: 1 token ≈ 4 characters
*/
function estimateTokens(text: string): number {
return Math.ceil(text.length / 4);
}
app.post('/validate', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const estimatedTokens = estimateTokens(prompt);
const maxInputTokens = 2048; // Most models support 2K-128K
if (estimatedTokens > maxInputTokens) {
return c.json(
{
success: false,
error: `Input too long: ${estimatedTokens} tokens (max: ${maxInputTokens})`,
},
400
);
}
const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [{ role: 'user', content: prompt }],
});
return c.json({
success: true,
response: response.response,
estimatedTokens,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// System Prompts & Personas
// ============================================================================
const PERSONAS = {
helpful: 'You are a helpful AI assistant.',
concise: 'You are a concise AI assistant. Keep responses brief.',
technical: 'You are a technical AI assistant. Provide detailed, accurate information.',
creative: 'You are a creative AI assistant. Be imaginative and original.',
};
app.post('/persona/:persona', async (c) => {
try {
const persona = c.req.param('persona') as keyof typeof PERSONAS;
const { prompt } = await c.req.json<{ prompt: string }>();
if (!PERSONAS[persona]) {
return c.json({ error: 'Invalid persona' }, 400);
}
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [
{ role: 'system', content: PERSONAS[persona] },
{ role: 'user', content: prompt },
],
stream: true,
});
return new Response(stream, {
headers: { 'content-type': 'text/event-stream' },
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Health Check
// ============================================================================
app.get('/health', (c) => {
return c.json({
status: 'ok',
timestamp: new Date().toISOString(),
});
});
export default app;