438 lines
11 KiB
TypeScript
438 lines
11 KiB
TypeScript
/**
|
|
* Cloudflare Workers AI - Text Generation Examples
|
|
*
|
|
* This template demonstrates:
|
|
* - Basic text generation (prompt and messages)
|
|
* - Streaming responses (RECOMMENDED for production)
|
|
* - Chat completions with conversation history
|
|
* - Structured output with JSON
|
|
* - Error handling and retry logic
|
|
* - Rate limit management
|
|
*/
|
|
|
|
import { Hono } from 'hono';
|
|
|
|
type Bindings = {
|
|
AI: Ai;
|
|
};
|
|
|
|
const app = new Hono<{ Bindings: Bindings }>();
|
|
|
|
// ============================================================================
|
|
// Basic Text Generation
|
|
// ============================================================================
|
|
|
|
// Simple prompt (deprecated pattern, use messages instead)
|
|
app.post('/simple', async (c) => {
|
|
try {
|
|
const { prompt } = await c.req.json<{ prompt: string }>();
|
|
|
|
const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
|
prompt,
|
|
});
|
|
|
|
return c.json({
|
|
success: true,
|
|
response: response.response,
|
|
});
|
|
} catch (error) {
|
|
return c.json(
|
|
{
|
|
success: false,
|
|
error: (error as Error).message,
|
|
},
|
|
500
|
|
);
|
|
}
|
|
});
|
|
|
|
// ============================================================================
|
|
// Streaming Text Generation (RECOMMENDED)
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Streaming is ESSENTIAL for production:
|
|
* - Prevents buffering large responses in memory
|
|
* - Faster time-to-first-token
|
|
* - Better user experience
|
|
* - Avoids Worker timeout issues
|
|
*/
|
|
|
|
app.post('/stream', async (c) => {
|
|
try {
|
|
const { prompt } = await c.req.json<{ prompt: string }>();
|
|
|
|
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
|
messages: [{ role: 'user', content: prompt }],
|
|
stream: true, // Enable streaming
|
|
});
|
|
|
|
return new Response(stream, {
|
|
headers: {
|
|
'content-type': 'text/event-stream',
|
|
'cache-control': 'no-cache',
|
|
connection: 'keep-alive',
|
|
},
|
|
});
|
|
} catch (error) {
|
|
return c.json(
|
|
{
|
|
success: false,
|
|
error: (error as Error).message,
|
|
},
|
|
500
|
|
);
|
|
}
|
|
});
|
|
|
|
// ============================================================================
|
|
// Chat Completions with History
|
|
// ============================================================================
|
|
|
|
app.post('/chat', async (c) => {
|
|
try {
|
|
const { messages } = await c.req.json<{
|
|
messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }>;
|
|
}>();
|
|
|
|
// Validate messages
|
|
if (!messages || messages.length === 0) {
|
|
return c.json({ error: 'Messages array is required' }, 400);
|
|
}
|
|
|
|
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
|
messages,
|
|
stream: true,
|
|
max_tokens: 512, // Limit response length
|
|
});
|
|
|
|
return new Response(stream, {
|
|
headers: { 'content-type': 'text/event-stream' },
|
|
});
|
|
} catch (error) {
|
|
return c.json(
|
|
{
|
|
success: false,
|
|
error: (error as Error).message,
|
|
},
|
|
500
|
|
);
|
|
}
|
|
});
|
|
|
|
// ============================================================================
|
|
// Streaming with Custom Parameters
|
|
// ============================================================================
|
|
|
|
app.post('/stream/custom', async (c) => {
|
|
try {
|
|
const { prompt, temperature = 0.7, max_tokens = 512 } = await c.req.json<{
|
|
prompt: string;
|
|
temperature?: number;
|
|
max_tokens?: number;
|
|
}>();
|
|
|
|
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
|
messages: [
|
|
{
|
|
role: 'system',
|
|
content: 'You are a helpful AI assistant.',
|
|
},
|
|
{
|
|
role: 'user',
|
|
content: prompt,
|
|
},
|
|
],
|
|
stream: true,
|
|
max_tokens,
|
|
temperature, // Controls randomness (0.0-1.0)
|
|
});
|
|
|
|
return new Response(stream, {
|
|
headers: { 'content-type': 'text/event-stream' },
|
|
});
|
|
} catch (error) {
|
|
return c.json(
|
|
{
|
|
success: false,
|
|
error: (error as Error).message,
|
|
},
|
|
500
|
|
);
|
|
}
|
|
});
|
|
|
|
// ============================================================================
|
|
// Structured Output (JSON)
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Generate structured JSON output
|
|
* Useful for extracting data, generating schemas, etc.
|
|
*/
|
|
|
|
app.post('/structured', async (c) => {
|
|
try {
|
|
const { topic } = await c.req.json<{ topic: string }>();
|
|
|
|
const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
|
messages: [
|
|
{
|
|
role: 'system',
|
|
content:
|
|
'You are a helpful assistant that ONLY returns valid JSON. Never include explanations or markdown, just raw JSON.',
|
|
},
|
|
{
|
|
role: 'user',
|
|
content: `Generate a recipe for ${topic}. Return JSON with keys: name, ingredients (array), instructions (array), prepTime (number in minutes)`,
|
|
},
|
|
],
|
|
max_tokens: 1024,
|
|
});
|
|
|
|
// Parse JSON response
|
|
const data = JSON.parse(response.response);
|
|
|
|
return c.json({
|
|
success: true,
|
|
data,
|
|
});
|
|
} catch (error) {
|
|
return c.json(
|
|
{
|
|
success: false,
|
|
error: (error as Error).message,
|
|
},
|
|
500
|
|
);
|
|
}
|
|
});
|
|
|
|
// ============================================================================
|
|
// Model Comparison
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Compare different models side-by-side
|
|
*/
|
|
|
|
app.post('/compare', async (c) => {
|
|
try {
|
|
const { prompt } = await c.req.json<{ prompt: string }>();
|
|
|
|
const models = [
|
|
'@cf/meta/llama-3.1-8b-instruct', // Balanced
|
|
'@cf/meta/llama-3.2-1b-instruct', // Fast
|
|
'@cf/qwen/qwen1.5-14b-chat-awq', // High quality
|
|
];
|
|
|
|
const results = await Promise.all(
|
|
models.map(async (model) => {
|
|
const start = Date.now();
|
|
const response = await c.env.AI.run(model, {
|
|
messages: [{ role: 'user', content: prompt }],
|
|
max_tokens: 256,
|
|
});
|
|
const duration = Date.now() - start;
|
|
|
|
return {
|
|
model,
|
|
response: response.response,
|
|
duration,
|
|
};
|
|
})
|
|
);
|
|
|
|
return c.json({
|
|
success: true,
|
|
results,
|
|
});
|
|
} catch (error) {
|
|
return c.json(
|
|
{
|
|
success: false,
|
|
error: (error as Error).message,
|
|
},
|
|
500
|
|
);
|
|
}
|
|
});
|
|
|
|
// ============================================================================
|
|
// Error Handling with Retry
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Retry logic for rate limits and transient errors
|
|
*/
|
|
|
|
async function runWithRetry(
|
|
ai: Ai,
|
|
model: string,
|
|
inputs: any,
|
|
maxRetries = 3
|
|
): Promise<any> {
|
|
let lastError: Error;
|
|
|
|
for (let i = 0; i < maxRetries; i++) {
|
|
try {
|
|
return await ai.run(model, inputs);
|
|
} catch (error) {
|
|
lastError = error as Error;
|
|
const message = lastError.message.toLowerCase();
|
|
|
|
// Rate limit (429) - retry with exponential backoff
|
|
if (message.includes('429') || message.includes('rate limit')) {
|
|
if (i < maxRetries - 1) {
|
|
const delay = Math.pow(2, i) * 1000; // 1s, 2s, 4s
|
|
console.log(`Rate limited. Retrying in ${delay}ms...`);
|
|
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Model unavailable - try fallback model
|
|
if (message.includes('model') && message.includes('unavailable')) {
|
|
if (i === 0) {
|
|
console.log('Model unavailable, trying fallback...');
|
|
model = '@cf/meta/llama-3.2-1b-instruct'; // Faster fallback
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Other errors - throw immediately
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
throw lastError!;
|
|
}
|
|
|
|
app.post('/reliable', async (c) => {
|
|
try {
|
|
const { prompt } = await c.req.json<{ prompt: string }>();
|
|
|
|
const response = await runWithRetry(c.env.AI, '@cf/meta/llama-3.1-8b-instruct', {
|
|
messages: [{ role: 'user', content: prompt }],
|
|
});
|
|
|
|
return c.json({
|
|
success: true,
|
|
response: response.response,
|
|
});
|
|
} catch (error) {
|
|
return c.json(
|
|
{
|
|
success: false,
|
|
error: (error as Error).message,
|
|
},
|
|
500
|
|
);
|
|
}
|
|
});
|
|
|
|
// ============================================================================
|
|
// Token Length Validation
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Validate input length to prevent token limit errors
|
|
* Approximate: 1 token ≈ 4 characters
|
|
*/
|
|
|
|
function estimateTokens(text: string): number {
|
|
return Math.ceil(text.length / 4);
|
|
}
|
|
|
|
app.post('/validate', async (c) => {
|
|
try {
|
|
const { prompt } = await c.req.json<{ prompt: string }>();
|
|
|
|
const estimatedTokens = estimateTokens(prompt);
|
|
const maxInputTokens = 2048; // Most models support 2K-128K
|
|
|
|
if (estimatedTokens > maxInputTokens) {
|
|
return c.json(
|
|
{
|
|
success: false,
|
|
error: `Input too long: ${estimatedTokens} tokens (max: ${maxInputTokens})`,
|
|
},
|
|
400
|
|
);
|
|
}
|
|
|
|
const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
|
messages: [{ role: 'user', content: prompt }],
|
|
});
|
|
|
|
return c.json({
|
|
success: true,
|
|
response: response.response,
|
|
estimatedTokens,
|
|
});
|
|
} catch (error) {
|
|
return c.json(
|
|
{
|
|
success: false,
|
|
error: (error as Error).message,
|
|
},
|
|
500
|
|
);
|
|
}
|
|
});
|
|
|
|
// ============================================================================
|
|
// System Prompts & Personas
|
|
// ============================================================================
|
|
|
|
const PERSONAS = {
|
|
helpful: 'You are a helpful AI assistant.',
|
|
concise: 'You are a concise AI assistant. Keep responses brief.',
|
|
technical: 'You are a technical AI assistant. Provide detailed, accurate information.',
|
|
creative: 'You are a creative AI assistant. Be imaginative and original.',
|
|
};
|
|
|
|
app.post('/persona/:persona', async (c) => {
|
|
try {
|
|
const persona = c.req.param('persona') as keyof typeof PERSONAS;
|
|
const { prompt } = await c.req.json<{ prompt: string }>();
|
|
|
|
if (!PERSONAS[persona]) {
|
|
return c.json({ error: 'Invalid persona' }, 400);
|
|
}
|
|
|
|
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
|
messages: [
|
|
{ role: 'system', content: PERSONAS[persona] },
|
|
{ role: 'user', content: prompt },
|
|
],
|
|
stream: true,
|
|
});
|
|
|
|
return new Response(stream, {
|
|
headers: { 'content-type': 'text/event-stream' },
|
|
});
|
|
} catch (error) {
|
|
return c.json(
|
|
{
|
|
success: false,
|
|
error: (error as Error).message,
|
|
},
|
|
500
|
|
);
|
|
}
|
|
});
|
|
|
|
// ============================================================================
|
|
// Health Check
|
|
// ============================================================================
|
|
|
|
app.get('/health', (c) => {
|
|
return c.json({
|
|
status: 'ok',
|
|
timestamp: new Date().toISOString(),
|
|
});
|
|
});
|
|
|
|
export default app;
|