Initial commit
This commit is contained in:
437
templates/ai-text-generation.ts
Normal file
437
templates/ai-text-generation.ts
Normal file
@@ -0,0 +1,437 @@
|
||||
/**
|
||||
* Cloudflare Workers AI - Text Generation Examples
|
||||
*
|
||||
* This template demonstrates:
|
||||
* - Basic text generation (prompt and messages)
|
||||
* - Streaming responses (RECOMMENDED for production)
|
||||
* - Chat completions with conversation history
|
||||
* - Structured output with JSON
|
||||
* - Error handling and retry logic
|
||||
* - Rate limit management
|
||||
*/
|
||||
|
||||
import { Hono } from 'hono';
|
||||
|
||||
type Bindings = {
|
||||
AI: Ai;
|
||||
};
|
||||
|
||||
const app = new Hono<{ Bindings: Bindings }>();
|
||||
|
||||
// ============================================================================
|
||||
// Basic Text Generation
|
||||
// ============================================================================
|
||||
|
||||
// Simple prompt (deprecated pattern, use messages instead)
|
||||
app.post('/simple', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
prompt,
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
response: response.response,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Streaming Text Generation (RECOMMENDED)
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Streaming is ESSENTIAL for production:
|
||||
* - Prevents buffering large responses in memory
|
||||
* - Faster time-to-first-token
|
||||
* - Better user experience
|
||||
* - Avoids Worker timeout issues
|
||||
*/
|
||||
|
||||
app.post('/stream', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
stream: true, // Enable streaming
|
||||
});
|
||||
|
||||
return new Response(stream, {
|
||||
headers: {
|
||||
'content-type': 'text/event-stream',
|
||||
'cache-control': 'no-cache',
|
||||
connection: 'keep-alive',
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Chat Completions with History
|
||||
// ============================================================================
|
||||
|
||||
app.post('/chat', async (c) => {
|
||||
try {
|
||||
const { messages } = await c.req.json<{
|
||||
messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }>;
|
||||
}>();
|
||||
|
||||
// Validate messages
|
||||
if (!messages || messages.length === 0) {
|
||||
return c.json({ error: 'Messages array is required' }, 400);
|
||||
}
|
||||
|
||||
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages,
|
||||
stream: true,
|
||||
max_tokens: 512, // Limit response length
|
||||
});
|
||||
|
||||
return new Response(stream, {
|
||||
headers: { 'content-type': 'text/event-stream' },
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Streaming with Custom Parameters
|
||||
// ============================================================================
|
||||
|
||||
app.post('/stream/custom', async (c) => {
|
||||
try {
|
||||
const { prompt, temperature = 0.7, max_tokens = 512 } = await c.req.json<{
|
||||
prompt: string;
|
||||
temperature?: number;
|
||||
max_tokens?: number;
|
||||
}>();
|
||||
|
||||
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: 'You are a helpful AI assistant.',
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: prompt,
|
||||
},
|
||||
],
|
||||
stream: true,
|
||||
max_tokens,
|
||||
temperature, // Controls randomness (0.0-1.0)
|
||||
});
|
||||
|
||||
return new Response(stream, {
|
||||
headers: { 'content-type': 'text/event-stream' },
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Structured Output (JSON)
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Generate structured JSON output
|
||||
* Useful for extracting data, generating schemas, etc.
|
||||
*/
|
||||
|
||||
app.post('/structured', async (c) => {
|
||||
try {
|
||||
const { topic } = await c.req.json<{ topic: string }>();
|
||||
|
||||
const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content:
|
||||
'You are a helpful assistant that ONLY returns valid JSON. Never include explanations or markdown, just raw JSON.',
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: `Generate a recipe for ${topic}. Return JSON with keys: name, ingredients (array), instructions (array), prepTime (number in minutes)`,
|
||||
},
|
||||
],
|
||||
max_tokens: 1024,
|
||||
});
|
||||
|
||||
// Parse JSON response
|
||||
const data = JSON.parse(response.response);
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
data,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Model Comparison
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Compare different models side-by-side
|
||||
*/
|
||||
|
||||
app.post('/compare', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const models = [
|
||||
'@cf/meta/llama-3.1-8b-instruct', // Balanced
|
||||
'@cf/meta/llama-3.2-1b-instruct', // Fast
|
||||
'@cf/qwen/qwen1.5-14b-chat-awq', // High quality
|
||||
];
|
||||
|
||||
const results = await Promise.all(
|
||||
models.map(async (model) => {
|
||||
const start = Date.now();
|
||||
const response = await c.env.AI.run(model, {
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
max_tokens: 256,
|
||||
});
|
||||
const duration = Date.now() - start;
|
||||
|
||||
return {
|
||||
model,
|
||||
response: response.response,
|
||||
duration,
|
||||
};
|
||||
})
|
||||
);
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
results,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Error Handling with Retry
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Retry logic for rate limits and transient errors
|
||||
*/
|
||||
|
||||
async function runWithRetry(
|
||||
ai: Ai,
|
||||
model: string,
|
||||
inputs: any,
|
||||
maxRetries = 3
|
||||
): Promise<any> {
|
||||
let lastError: Error;
|
||||
|
||||
for (let i = 0; i < maxRetries; i++) {
|
||||
try {
|
||||
return await ai.run(model, inputs);
|
||||
} catch (error) {
|
||||
lastError = error as Error;
|
||||
const message = lastError.message.toLowerCase();
|
||||
|
||||
// Rate limit (429) - retry with exponential backoff
|
||||
if (message.includes('429') || message.includes('rate limit')) {
|
||||
if (i < maxRetries - 1) {
|
||||
const delay = Math.pow(2, i) * 1000; // 1s, 2s, 4s
|
||||
console.log(`Rate limited. Retrying in ${delay}ms...`);
|
||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Model unavailable - try fallback model
|
||||
if (message.includes('model') && message.includes('unavailable')) {
|
||||
if (i === 0) {
|
||||
console.log('Model unavailable, trying fallback...');
|
||||
model = '@cf/meta/llama-3.2-1b-instruct'; // Faster fallback
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Other errors - throw immediately
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError!;
|
||||
}
|
||||
|
||||
app.post('/reliable', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const response = await runWithRetry(c.env.AI, '@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
response: response.response,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Token Length Validation
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Validate input length to prevent token limit errors
|
||||
* Approximate: 1 token ≈ 4 characters
|
||||
*/
|
||||
|
||||
function estimateTokens(text: string): number {
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
|
||||
app.post('/validate', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const estimatedTokens = estimateTokens(prompt);
|
||||
const maxInputTokens = 2048; // Most models support 2K-128K
|
||||
|
||||
if (estimatedTokens > maxInputTokens) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: `Input too long: ${estimatedTokens} tokens (max: ${maxInputTokens})`,
|
||||
},
|
||||
400
|
||||
);
|
||||
}
|
||||
|
||||
const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
response: response.response,
|
||||
estimatedTokens,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// System Prompts & Personas
|
||||
// ============================================================================
|
||||
|
||||
const PERSONAS = {
|
||||
helpful: 'You are a helpful AI assistant.',
|
||||
concise: 'You are a concise AI assistant. Keep responses brief.',
|
||||
technical: 'You are a technical AI assistant. Provide detailed, accurate information.',
|
||||
creative: 'You are a creative AI assistant. Be imaginative and original.',
|
||||
};
|
||||
|
||||
app.post('/persona/:persona', async (c) => {
|
||||
try {
|
||||
const persona = c.req.param('persona') as keyof typeof PERSONAS;
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
if (!PERSONAS[persona]) {
|
||||
return c.json({ error: 'Invalid persona' }, 400);
|
||||
}
|
||||
|
||||
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages: [
|
||||
{ role: 'system', content: PERSONAS[persona] },
|
||||
{ role: 'user', content: prompt },
|
||||
],
|
||||
stream: true,
|
||||
});
|
||||
|
||||
return new Response(stream, {
|
||||
headers: { 'content-type': 'text/event-stream' },
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Health Check
|
||||
// ============================================================================
|
||||
|
||||
app.get('/health', (c) => {
|
||||
return c.json({
|
||||
status: 'ok',
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
});
|
||||
|
||||
export default app;
|
||||
Reference in New Issue
Block a user