Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:24:38 +08:00
commit b41966ed51
12 changed files with 3508 additions and 0 deletions

View File

@@ -0,0 +1,491 @@
/**
* Cloudflare Workers AI - Embeddings & RAG Examples
*
* This template demonstrates:
* - Generating text embeddings with BGE models
* - Storing embeddings in Vectorize
* - Semantic search with vector similarity
* - Complete RAG (Retrieval Augmented Generation) pattern
* - Document chunking strategies
*/
import { Hono } from 'hono';
type Bindings = {
AI: Ai;
VECTORIZE: Vectorize;
DB?: D1Database;
};
const app = new Hono<{ Bindings: Bindings }>();
// ============================================================================
// Generate Embeddings
// ============================================================================
/**
* Generate embeddings for text
* BGE-base: 768 dimensions, good balance
* BGE-large: 1024 dimensions, higher accuracy
* BGE-small: 384 dimensions, faster/smaller
*/
app.post('/embeddings', async (c) => {
try {
const { text } = await c.req.json<{ text: string | string[] }>();
const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
text: Array.isArray(text) ? text : [text],
});
return c.json({
success: true,
shape: embeddings.shape, // [batch_size, dimensions]
data: embeddings.data, // Array of vectors
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Batch Embeddings
// ============================================================================
/**
* Generate embeddings for multiple texts in one request
* More efficient than individual requests
*/
app.post('/embeddings/batch', async (c) => {
try {
const { texts } = await c.req.json<{ texts: string[] }>();
if (!texts || texts.length === 0) {
return c.json({ error: 'texts array is required' }, 400);
}
const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
text: texts,
});
return c.json({
success: true,
count: texts.length,
shape: embeddings.shape,
embeddings: embeddings.data,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Store Embeddings in Vectorize
// ============================================================================
app.post('/documents', async (c) => {
try {
const { id, text, metadata } = await c.req.json<{
id: string;
text: string;
metadata?: Record<string, any>;
}>();
// Generate embedding
const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
text: [text],
});
const vector = embeddings.data[0];
// Store in Vectorize
await c.env.VECTORIZE.upsert([
{
id,
values: vector,
metadata: {
text,
...metadata,
createdAt: Date.now(),
},
},
]);
return c.json({
success: true,
message: 'Document indexed',
id,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Semantic Search
// ============================================================================
app.post('/search', async (c) => {
try {
const { query, topK = 5 } = await c.req.json<{
query: string;
topK?: number;
}>();
// Convert query to embedding
const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
text: [query],
});
const vector = embeddings.data[0];
// Search Vectorize
const results = await c.env.VECTORIZE.query(vector, {
topK,
returnMetadata: true,
});
return c.json({
success: true,
query,
results: results.matches.map((match) => ({
id: match.id,
score: match.score,
text: match.metadata?.text,
metadata: match.metadata,
})),
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// RAG Pattern: Query with Context
// ============================================================================
app.post('/rag/ask', async (c) => {
try {
const { question, topK = 3 } = await c.req.json<{
question: string;
topK?: number;
}>();
// Step 1: Convert question to embedding
const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
text: [question],
});
const vector = embeddings.data[0];
// Step 2: Find relevant documents
const results = await c.env.VECTORIZE.query(vector, {
topK,
returnMetadata: true,
});
// Step 3: Build context from matches
const context = results.matches
.map((match) => match.metadata?.text)
.filter(Boolean)
.join('\n\n');
// Step 4: Generate answer with context
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [
{
role: 'system',
content: `Answer the question using ONLY the following context. If the context doesn't contain relevant information, say "I don't have enough information to answer that."\n\nContext:\n${context}`,
},
{
role: 'user',
content: question,
},
],
stream: true,
});
return new Response(stream, {
headers: {
'content-type': 'text/event-stream',
'x-sources': JSON.stringify(
results.matches.map((m) => ({ id: m.id, score: m.score }))
),
},
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Document Chunking
// ============================================================================
/**
* Split long documents into chunks for better embedding quality
* Recommended: 200-500 tokens per chunk
*/
function chunkText(text: string, chunkSize = 500, overlap = 50): string[] {
const words = text.split(/\s+/);
const chunks: string[] = [];
for (let i = 0; i < words.length; i += chunkSize - overlap) {
const chunk = words.slice(i, i + chunkSize).join(' ');
chunks.push(chunk);
}
return chunks;
}
app.post('/documents/long', async (c) => {
try {
const { id, text, chunkSize = 500 } = await c.req.json<{
id: string;
text: string;
chunkSize?: number;
}>();
// Split into chunks
const chunks = chunkText(text, chunkSize);
// Generate embeddings for all chunks
const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
text: chunks,
});
// Store each chunk in Vectorize
const vectors = chunks.map((chunk, index) => ({
id: `${id}-chunk-${index}`,
values: embeddings.data[index],
metadata: {
documentId: id,
chunkIndex: index,
text: chunk,
totalChunks: chunks.length,
},
}));
await c.env.VECTORIZE.upsert(vectors);
return c.json({
success: true,
message: 'Document indexed with chunks',
documentId: id,
chunks: chunks.length,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// RAG with Citations
// ============================================================================
app.post('/rag/ask-with-citations', async (c) => {
try {
const { question } = await c.req.json<{ question: string }>();
// Find relevant chunks
const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
text: [question],
});
const results = await c.env.VECTORIZE.query(embeddings.data[0], {
topK: 5,
returnMetadata: true,
});
// Build context with citations
const context = results.matches
.map(
(match, i) =>
`[Source ${i + 1}] ${match.metadata?.text} (Relevance: ${(match.score * 100).toFixed(1)}%)`
)
.join('\n\n');
// Generate answer
const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [
{
role: 'system',
content: `Answer the question using the provided sources. Cite sources using [Source N] format.
${context}`,
},
{
role: 'user',
content: question,
},
],
});
return c.json({
success: true,
answer: response.response,
sources: results.matches.map((m, i) => ({
id: i + 1,
documentId: m.metadata?.documentId,
text: m.metadata?.text,
score: m.score,
})),
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Hybrid Search (Keyword + Semantic)
// ============================================================================
/**
* Combine keyword search (D1) with semantic search (Vectorize)
* for better recall
*/
app.post('/search/hybrid', async (c) => {
try {
const { query } = await c.req.json<{ query: string }>();
// Semantic search
const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
text: [query],
});
const vectorResults = await c.env.VECTORIZE.query(embeddings.data[0], {
topK: 5,
returnMetadata: true,
});
// Keyword search (if D1 available)
let keywordResults: any[] = [];
if (c.env.DB) {
const { results } = await c.env.DB.prepare(
'SELECT id, text FROM documents WHERE text LIKE ? LIMIT 5'
)
.bind(`%${query}%`)
.all();
keywordResults = results || [];
}
// Combine and deduplicate
const combined = [
...vectorResults.matches.map((m) => ({
id: m.id,
text: m.metadata?.text,
score: m.score,
source: 'vector',
})),
...keywordResults.map((r) => ({
id: r.id,
text: r.text,
score: 1.0,
source: 'keyword',
})),
];
// Deduplicate by ID
const unique = Array.from(new Map(combined.map((item) => [item.id, item])).values());
return c.json({
success: true,
query,
results: unique,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Delete Documents
// ============================================================================
app.delete('/documents/:id', async (c) => {
try {
const id = c.req.param('id');
// Delete from Vectorize
await c.env.VECTORIZE.deleteByIds([id]);
return c.json({
success: true,
message: 'Document deleted',
id,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Health Check
// ============================================================================
app.get('/health', (c) => {
return c.json({
status: 'ok',
timestamp: new Date().toISOString(),
});
});
export default app;

View File

@@ -0,0 +1,432 @@
/**
* Cloudflare AI Gateway - Integration Examples
*
* This template demonstrates:
* - AI Gateway setup and configuration
* - Caching AI responses
* - Logging and analytics
* - Cost tracking
* - Rate limiting
* - Feedback collection
*/
import { Hono } from 'hono';
type Bindings = {
AI: Ai;
};
const app = new Hono<{ Bindings: Bindings }>();
// ============================================================================
// Basic AI Gateway Usage
// ============================================================================
/**
* Create a gateway at: https://dash.cloudflare.com/ai/ai-gateway
* Use the gateway ID in your requests
*/
app.post('/gateway/basic', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const response = await c.env.AI.run(
'@cf/meta/llama-3.1-8b-instruct',
{
messages: [{ role: 'user', content: prompt }],
},
{
gateway: {
id: 'my-gateway', // Your gateway ID
},
}
);
// Access log ID for analytics
const logId = c.env.AI.aiGatewayLogId;
return c.json({
success: true,
response: response.response,
logId, // Use for tracking and feedback
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// AI Gateway with Caching
// ============================================================================
/**
* AI Gateway can cache responses to reduce costs
* Same prompt = cached response (no inference cost)
*/
app.post('/gateway/cached', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const response = await c.env.AI.run(
'@cf/meta/llama-3.1-8b-instruct',
{
messages: [{ role: 'user', content: prompt }],
},
{
gateway: {
id: 'my-gateway',
skipCache: false, // Use cache (default)
},
}
);
return c.json({
success: true,
response: response.response,
logId: c.env.AI.aiGatewayLogId,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Skip Cache for Dynamic Content
// ============================================================================
app.post('/gateway/no-cache', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const response = await c.env.AI.run(
'@cf/meta/llama-3.1-8b-instruct',
{
messages: [{ role: 'user', content: prompt }],
},
{
gateway: {
id: 'my-gateway',
skipCache: true, // Always fetch fresh response
},
}
);
return c.json({
success: true,
response: response.response,
logId: c.env.AI.aiGatewayLogId,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Send Feedback to AI Gateway
// ============================================================================
/**
* Track user satisfaction with AI responses
* Helps optimize prompts and model selection
*/
app.post('/gateway/feedback', async (c) => {
try {
const { logId, rating, comment } = await c.req.json<{
logId: string;
rating: number; // 1-5
comment?: string;
}>();
const gateway = c.env.AI.gateway('my-gateway');
await gateway.patchLog(logId, {
feedback: {
rating,
comment,
},
});
return c.json({
success: true,
message: 'Feedback recorded',
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Track Cost Per Request
// ============================================================================
/**
* Monitor neurons usage per request
* AI Gateway logs show cost breakdown
*/
app.post('/gateway/track-cost', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const start = Date.now();
const response = await c.env.AI.run(
'@cf/meta/llama-3.1-8b-instruct',
{
messages: [{ role: 'user', content: prompt }],
},
{
gateway: {
id: 'my-gateway',
},
}
);
const duration = Date.now() - start;
const logId = c.env.AI.aiGatewayLogId;
return c.json({
success: true,
response: response.response,
metrics: {
logId,
duration,
// Check AI Gateway dashboard for neurons usage
},
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Multi-Model Gateway
// ============================================================================
/**
* Use different models through the same gateway
* Compare performance and costs
*/
app.post('/gateway/multi-model', async (c) => {
try {
const { prompt, model = '@cf/meta/llama-3.1-8b-instruct' } = await c.req.json<{
prompt: string;
model?: string;
}>();
const response = await c.env.AI.run(
model,
{
messages: [{ role: 'user', content: prompt }],
},
{
gateway: {
id: 'my-gateway',
},
}
);
return c.json({
success: true,
model,
response: response.response,
logId: c.env.AI.aiGatewayLogId,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Streaming with AI Gateway
// ============================================================================
app.post('/gateway/stream', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const stream = await c.env.AI.run(
'@cf/meta/llama-3.1-8b-instruct',
{
messages: [{ role: 'user', content: prompt }],
stream: true,
},
{
gateway: {
id: 'my-gateway',
},
}
);
// Log ID available after streaming starts
const logId = c.env.AI.aiGatewayLogId;
return new Response(stream, {
headers: {
'content-type': 'text/event-stream',
'x-ai-gateway-log-id': logId || '',
},
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Request Analytics Middleware
// ============================================================================
/**
* Log all AI requests for analytics
*/
app.use('/ai/*', async (c, next) => {
const start = Date.now();
const path = c.req.path;
await next();
const duration = Date.now() - start;
const logId = c.env.AI.aiGatewayLogId;
// Log to console (or send to analytics service)
console.log({
timestamp: new Date().toISOString(),
path,
duration,
logId,
status: c.res.status,
});
});
app.post('/ai/chat', async (c) => {
const { prompt } = await c.req.json<{ prompt: string }>();
const response = await c.env.AI.run(
'@cf/meta/llama-3.1-8b-instruct',
{
messages: [{ role: 'user', content: prompt }],
},
{
gateway: { id: 'my-gateway' },
}
);
return c.json({
success: true,
response: response.response,
});
});
// ============================================================================
// Rate Limit Protection
// ============================================================================
/**
* AI Gateway provides additional rate limiting
* Configure in dashboard: https://dash.cloudflare.com/ai/ai-gateway
*/
app.post('/gateway/rate-limited', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const response = await c.env.AI.run(
'@cf/meta/llama-3.1-8b-instruct',
{
messages: [{ role: 'user', content: prompt }],
},
{
gateway: {
id: 'my-gateway',
},
}
);
return c.json({
success: true,
response: response.response,
});
} catch (error) {
const message = (error as Error).message;
// Check for rate limit error
if (message.includes('429') || message.includes('rate limit')) {
return c.json(
{
success: false,
error: 'Rate limit exceeded. Please try again later.',
retryAfter: 60, // seconds
},
429
);
}
return c.json(
{
success: false,
error: message,
},
500
);
}
});
// ============================================================================
// Health Check
// ============================================================================
app.get('/health', (c) => {
return c.json({
status: 'ok',
timestamp: new Date().toISOString(),
});
});
export default app;

View File

@@ -0,0 +1,391 @@
/**
* Cloudflare Workers AI - Image Generation Examples
*
* This template demonstrates:
* - Text-to-image with Flux models (highest quality)
* - Stable Diffusion XL
* - Image storage in R2
* - Base64 and binary responses
* - Custom prompts and parameters
*/
import { Hono } from 'hono';
type Bindings = {
AI: Ai;
BUCKET?: R2Bucket;
};
const app = new Hono<{ Bindings: Bindings }>();
// ============================================================================
// Flux - Text-to-Image (Highest Quality)
// ============================================================================
/**
* Flux 1 Schnell - Fast, high-quality image generation
* Best for: Photorealistic images, detailed artwork
* Rate limit: 720/min
*/
app.post('/generate/flux', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const imageStream = await c.env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
prompt,
});
return new Response(imageStream, {
headers: { 'content-type': 'image/png' },
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Stable Diffusion XL
// ============================================================================
app.post('/generate/sdxl', async (c) => {
try {
const { prompt, num_steps = 20, guidance = 7.5 } = await c.req.json<{
prompt: string;
num_steps?: number;
guidance?: number;
}>();
const imageStream = await c.env.AI.run(
'@cf/stabilityai/stable-diffusion-xl-base-1.0',
{
prompt,
num_steps, // More steps = higher quality, slower
guidance, // CFG scale: higher = more prompt adherence
}
);
return new Response(imageStream, {
headers: { 'content-type': 'image/png' },
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// DreamShaper (Artistic/Stylized)
// ============================================================================
app.post('/generate/dreamshaper', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const imageStream = await c.env.AI.run('@cf/lykon/dreamshaper-8-lcm', {
prompt,
});
return new Response(imageStream, {
headers: { 'content-type': 'image/png' },
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Generate and Store in R2
// ============================================================================
app.post('/generate/save', async (c) => {
try {
const { prompt, filename } = await c.req.json<{
prompt: string;
filename?: string;
}>();
if (!c.env.BUCKET) {
return c.json({ error: 'R2 bucket not configured' }, 500);
}
// Generate image
const imageStream = await c.env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
prompt,
});
const imageBytes = await new Response(imageStream).bytes();
// Generate filename
const key = filename || `images/${Date.now()}.png`;
// Store in R2
await c.env.BUCKET.put(key, imageBytes, {
httpMetadata: {
contentType: 'image/png',
},
customMetadata: {
prompt,
generatedAt: new Date().toISOString(),
},
});
return c.json({
success: true,
message: 'Image generated and saved',
key,
url: `https://your-domain.com/${key}`, // Update with your R2 public URL
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Return Base64 Encoded Image
// ============================================================================
app.post('/generate/base64', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const imageStream = await c.env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
prompt,
});
const imageBytes = await new Response(imageStream).bytes();
const base64 = btoa(String.fromCharCode(...imageBytes));
return c.json({
success: true,
image: `data:image/png;base64,${base64}`,
prompt,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Image-to-Image (Stable Diffusion)
// ============================================================================
/**
* Transform existing images based on prompts
* Requires base64-encoded input image
*/
app.post('/generate/img2img', async (c) => {
try {
const { prompt, image, strength = 0.8 } = await c.req.json<{
prompt: string;
image: string; // Base64 encoded
strength?: number; // 0.0-1.0, higher = more transformation
}>();
// Decode base64 image to array
const imageData = Uint8Array.from(atob(image.replace(/^data:image\/\w+;base64,/, '')), (c) =>
c.charCodeAt(0)
);
const result = await c.env.AI.run('@cf/runwayml/stable-diffusion-v1-5-img2img', {
prompt,
image: Array.from(imageData),
strength,
});
return new Response(result, {
headers: { 'content-type': 'image/png' },
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Batch Generation
// ============================================================================
app.post('/generate/batch', async (c) => {
try {
const { prompts } = await c.req.json<{ prompts: string[] }>();
if (!prompts || prompts.length === 0) {
return c.json({ error: 'prompts array is required' }, 400);
}
if (prompts.length > 5) {
return c.json({ error: 'Maximum 5 prompts per batch' }, 400);
}
const images = await Promise.all(
prompts.map(async (prompt) => {
const imageStream = await c.env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
prompt,
});
const imageBytes = await new Response(imageStream).bytes();
const base64 = btoa(String.fromCharCode(...imageBytes));
return {
prompt,
image: `data:image/png;base64,${base64}`,
};
})
);
return c.json({
success: true,
count: images.length,
images,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Prompt Enhancement
// ============================================================================
/**
* Use LLM to enhance user prompts for better image quality
*/
app.post('/generate/enhanced', async (c) => {
try {
const { userPrompt } = await c.req.json<{ userPrompt: string }>();
// Step 1: Enhance prompt with LLM
const enhancement = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [
{
role: 'system',
content:
'You are a Stable Diffusion prompt expert. Enhance the user prompt for image generation. Add details about style, lighting, quality, composition. Return ONLY the enhanced prompt, no explanations.',
},
{
role: 'user',
content: userPrompt,
},
],
});
const enhancedPrompt = enhancement.response.trim();
// Step 2: Generate image with enhanced prompt
const imageStream = await c.env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
prompt: enhancedPrompt,
});
const imageBytes = await new Response(imageStream).bytes();
const base64 = btoa(String.fromCharCode(...imageBytes));
return c.json({
success: true,
originalPrompt: userPrompt,
enhancedPrompt,
image: `data:image/png;base64,${base64}`,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// List Generated Images (from R2)
// ============================================================================
app.get('/images', async (c) => {
try {
if (!c.env.BUCKET) {
return c.json({ error: 'R2 bucket not configured' }, 500);
}
const listed = await c.env.BUCKET.list({
prefix: 'images/',
limit: 100,
});
const images = listed.objects.map((obj) => ({
key: obj.key,
size: obj.size,
uploaded: obj.uploaded,
url: `https://your-domain.com/${obj.key}`,
}));
return c.json({
success: true,
count: images.length,
images,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Health Check
// ============================================================================
app.get('/health', (c) => {
return c.json({
status: 'ok',
timestamp: new Date().toISOString(),
});
});
export default app;

View File

@@ -0,0 +1,437 @@
/**
* Cloudflare Workers AI - Text Generation Examples
*
* This template demonstrates:
* - Basic text generation (prompt and messages)
* - Streaming responses (RECOMMENDED for production)
* - Chat completions with conversation history
* - Structured output with JSON
* - Error handling and retry logic
* - Rate limit management
*/
import { Hono } from 'hono';
type Bindings = {
AI: Ai;
};
const app = new Hono<{ Bindings: Bindings }>();
// ============================================================================
// Basic Text Generation
// ============================================================================
// Simple prompt (deprecated pattern, use messages instead)
app.post('/simple', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
prompt,
});
return c.json({
success: true,
response: response.response,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Streaming Text Generation (RECOMMENDED)
// ============================================================================
/**
* Streaming is ESSENTIAL for production:
* - Prevents buffering large responses in memory
* - Faster time-to-first-token
* - Better user experience
* - Avoids Worker timeout issues
*/
app.post('/stream', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [{ role: 'user', content: prompt }],
stream: true, // Enable streaming
});
return new Response(stream, {
headers: {
'content-type': 'text/event-stream',
'cache-control': 'no-cache',
connection: 'keep-alive',
},
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Chat Completions with History
// ============================================================================
app.post('/chat', async (c) => {
try {
const { messages } = await c.req.json<{
messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }>;
}>();
// Validate messages
if (!messages || messages.length === 0) {
return c.json({ error: 'Messages array is required' }, 400);
}
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages,
stream: true,
max_tokens: 512, // Limit response length
});
return new Response(stream, {
headers: { 'content-type': 'text/event-stream' },
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Streaming with Custom Parameters
// ============================================================================
app.post('/stream/custom', async (c) => {
try {
const { prompt, temperature = 0.7, max_tokens = 512 } = await c.req.json<{
prompt: string;
temperature?: number;
max_tokens?: number;
}>();
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [
{
role: 'system',
content: 'You are a helpful AI assistant.',
},
{
role: 'user',
content: prompt,
},
],
stream: true,
max_tokens,
temperature, // Controls randomness (0.0-1.0)
});
return new Response(stream, {
headers: { 'content-type': 'text/event-stream' },
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Structured Output (JSON)
// ============================================================================
/**
* Generate structured JSON output
* Useful for extracting data, generating schemas, etc.
*/
app.post('/structured', async (c) => {
try {
const { topic } = await c.req.json<{ topic: string }>();
const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [
{
role: 'system',
content:
'You are a helpful assistant that ONLY returns valid JSON. Never include explanations or markdown, just raw JSON.',
},
{
role: 'user',
content: `Generate a recipe for ${topic}. Return JSON with keys: name, ingredients (array), instructions (array), prepTime (number in minutes)`,
},
],
max_tokens: 1024,
});
// Parse JSON response
const data = JSON.parse(response.response);
return c.json({
success: true,
data,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Model Comparison
// ============================================================================
/**
* Compare different models side-by-side
*/
app.post('/compare', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const models = [
'@cf/meta/llama-3.1-8b-instruct', // Balanced
'@cf/meta/llama-3.2-1b-instruct', // Fast
'@cf/qwen/qwen1.5-14b-chat-awq', // High quality
];
const results = await Promise.all(
models.map(async (model) => {
const start = Date.now();
const response = await c.env.AI.run(model, {
messages: [{ role: 'user', content: prompt }],
max_tokens: 256,
});
const duration = Date.now() - start;
return {
model,
response: response.response,
duration,
};
})
);
return c.json({
success: true,
results,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Error Handling with Retry
// ============================================================================
/**
* Retry logic for rate limits and transient errors
*/
async function runWithRetry(
ai: Ai,
model: string,
inputs: any,
maxRetries = 3
): Promise<any> {
let lastError: Error;
for (let i = 0; i < maxRetries; i++) {
try {
return await ai.run(model, inputs);
} catch (error) {
lastError = error as Error;
const message = lastError.message.toLowerCase();
// Rate limit (429) - retry with exponential backoff
if (message.includes('429') || message.includes('rate limit')) {
if (i < maxRetries - 1) {
const delay = Math.pow(2, i) * 1000; // 1s, 2s, 4s
console.log(`Rate limited. Retrying in ${delay}ms...`);
await new Promise((resolve) => setTimeout(resolve, delay));
continue;
}
}
// Model unavailable - try fallback model
if (message.includes('model') && message.includes('unavailable')) {
if (i === 0) {
console.log('Model unavailable, trying fallback...');
model = '@cf/meta/llama-3.2-1b-instruct'; // Faster fallback
continue;
}
}
// Other errors - throw immediately
throw error;
}
}
throw lastError!;
}
app.post('/reliable', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const response = await runWithRetry(c.env.AI, '@cf/meta/llama-3.1-8b-instruct', {
messages: [{ role: 'user', content: prompt }],
});
return c.json({
success: true,
response: response.response,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Token Length Validation
// ============================================================================
/**
* Validate input length to prevent token limit errors
* Approximate: 1 token ≈ 4 characters
*/
function estimateTokens(text: string): number {
return Math.ceil(text.length / 4);
}
app.post('/validate', async (c) => {
try {
const { prompt } = await c.req.json<{ prompt: string }>();
const estimatedTokens = estimateTokens(prompt);
const maxInputTokens = 2048; // Most models support 2K-128K
if (estimatedTokens > maxInputTokens) {
return c.json(
{
success: false,
error: `Input too long: ${estimatedTokens} tokens (max: ${maxInputTokens})`,
},
400
);
}
const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [{ role: 'user', content: prompt }],
});
return c.json({
success: true,
response: response.response,
estimatedTokens,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// System Prompts & Personas
// ============================================================================
const PERSONAS = {
helpful: 'You are a helpful AI assistant.',
concise: 'You are a concise AI assistant. Keep responses brief.',
technical: 'You are a technical AI assistant. Provide detailed, accurate information.',
creative: 'You are a creative AI assistant. Be imaginative and original.',
};
app.post('/persona/:persona', async (c) => {
try {
const persona = c.req.param('persona') as keyof typeof PERSONAS;
const { prompt } = await c.req.json<{ prompt: string }>();
if (!PERSONAS[persona]) {
return c.json({ error: 'Invalid persona' }, 400);
}
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [
{ role: 'system', content: PERSONAS[persona] },
{ role: 'user', content: prompt },
],
stream: true,
});
return new Response(stream, {
headers: { 'content-type': 'text/event-stream' },
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Health Check
// ============================================================================
app.get('/health', (c) => {
return c.json({
status: 'ok',
timestamp: new Date().toISOString(),
});
});
export default app;

View File

@@ -0,0 +1,417 @@
/**
* Cloudflare Workers AI - Vision Models Examples
*
* This template demonstrates:
* - Llama 3.2 11B Vision Instruct for image understanding
* - Image captioning and description
* - Visual question answering
* - Base64 image encoding
* - Combining vision + text prompts
*/
import { Hono } from 'hono';
type Bindings = {
AI: Ai;
};
const app = new Hono<{ Bindings: Bindings }>();
// ============================================================================
// Image Understanding
// ============================================================================
/**
* Llama 3.2 11B Vision Instruct
* - Understands images and answers questions
* - Accepts base64-encoded images
* - Rate limit: 720/min
*/
app.post('/vision/understand', async (c) => {
try {
const { image, question = 'What is in this image?' } = await c.req.json<{
image: string; // Base64 data URL or base64 string
question?: string;
}>();
// Ensure image has proper data URL prefix
const imageUrl = image.startsWith('data:')
? image
: `data:image/png;base64,${image}`;
const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
messages: [
{
role: 'user',
content: [
{ type: 'text', text: question },
{ type: 'image_url', image_url: { url: imageUrl } },
],
},
],
});
return c.json({
success: true,
question,
answer: response.response,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Image Captioning
// ============================================================================
app.post('/vision/caption', async (c) => {
try {
const { image } = await c.req.json<{ image: string }>();
const imageUrl = image.startsWith('data:')
? image
: `data:image/png;base64,${image}`;
const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
messages: [
{
role: 'user',
content: [
{
type: 'text',
text: 'Generate a detailed caption for this image. Describe what you see, including objects, people, setting, mood, and any notable details.',
},
{ type: 'image_url', image_url: { url: imageUrl } },
],
},
],
});
return c.json({
success: true,
caption: response.response,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Visual Question Answering
// ============================================================================
app.post('/vision/qa', async (c) => {
try {
const { image, questions } = await c.req.json<{
image: string;
questions: string[];
}>();
if (!questions || questions.length === 0) {
return c.json({ error: 'questions array is required' }, 400);
}
const imageUrl = image.startsWith('data:')
? image
: `data:image/png;base64,${image}`;
// Answer all questions
const answers = await Promise.all(
questions.map(async (question) => {
const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
messages: [
{
role: 'user',
content: [
{ type: 'text', text: question },
{ type: 'image_url', image_url: { url: imageUrl } },
],
},
],
});
return {
question,
answer: response.response,
};
})
);
return c.json({
success: true,
count: answers.length,
results: answers,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Image Analysis (Structured Output)
// ============================================================================
app.post('/vision/analyze', async (c) => {
try {
const { image } = await c.req.json<{ image: string }>();
const imageUrl = image.startsWith('data:')
? image
: `data:image/png;base64,${image}`;
const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
messages: [
{
role: 'user',
content: [
{
type: 'text',
text: `Analyze this image and return a JSON object with:
- objects: array of objects detected
- scene: description of the setting
- mood: emotional tone
- colors: dominant colors
- text: any visible text
Return ONLY valid JSON, no explanations.`,
},
{ type: 'image_url', image_url: { url: imageUrl } },
],
},
],
});
// Parse JSON response
try {
const analysis = JSON.parse(response.response);
return c.json({
success: true,
analysis,
});
} catch {
return c.json({
success: true,
raw: response.response,
});
}
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Image Comparison
// ============================================================================
app.post('/vision/compare', async (c) => {
try {
const { image1, image2, question = 'What are the differences between these images?' } =
await c.req.json<{
image1: string;
image2: string;
question?: string;
}>();
const imageUrl1 = image1.startsWith('data:')
? image1
: `data:image/png;base64,${image1}`;
const imageUrl2 = image2.startsWith('data:')
? image2
: `data:image/png;base64,${image2}`;
// Analyze first image
const analysis1 = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'Describe this image in detail.' },
{ type: 'image_url', image_url: { url: imageUrl1 } },
],
},
],
});
// Analyze second image
const analysis2 = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'Describe this image in detail.' },
{ type: 'image_url', image_url: { url: imageUrl2 } },
],
},
],
});
// Compare using text generation
const comparison = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [
{
role: 'user',
content: `Compare these two images based on their descriptions:
Image 1: ${analysis1.response}
Image 2: ${analysis2.response}
Question: ${question}`,
},
],
});
return c.json({
success: true,
image1Description: analysis1.response,
image2Description: analysis2.response,
comparison: comparison.response,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Image Upload from URL
// ============================================================================
/**
* Fetch image from URL, convert to base64, and analyze
*/
app.post('/vision/url', async (c) => {
try {
const { url, question = 'What is in this image?' } = await c.req.json<{
url: string;
question?: string;
}>();
// Fetch image
const imageResponse = await fetch(url);
if (!imageResponse.ok) {
return c.json({ error: 'Failed to fetch image' }, 400);
}
// Convert to base64
const imageBytes = await imageResponse.bytes();
const base64 = btoa(String.fromCharCode(...imageBytes));
const contentType = imageResponse.headers.get('content-type') || 'image/png';
const imageUrl = `data:${contentType};base64,${base64}`;
// Analyze image
const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
messages: [
{
role: 'user',
content: [
{ type: 'text', text: question },
{ type: 'image_url', image_url: { url: imageUrl } },
],
},
],
});
return c.json({
success: true,
sourceUrl: url,
question,
answer: response.response,
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Accessibility: Alt Text Generation
// ============================================================================
app.post('/vision/alt-text', async (c) => {
try {
const { image } = await c.req.json<{ image: string }>();
const imageUrl = image.startsWith('data:')
? image
: `data:image/png;base64,${image}`;
const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
messages: [
{
role: 'user',
content: [
{
type: 'text',
text: 'Generate a concise, descriptive alt text for this image for accessibility purposes. Keep it under 125 characters.',
},
{ type: 'image_url', image_url: { url: imageUrl } },
],
},
],
});
return c.json({
success: true,
altText: response.response.trim(),
});
} catch (error) {
return c.json(
{
success: false,
error: (error as Error).message,
},
500
);
}
});
// ============================================================================
// Health Check
// ============================================================================
app.get('/health', (c) => {
return c.json({
status: 'ok',
timestamp: new Date().toISOString(),
});
});
export default app;

View File

@@ -0,0 +1,138 @@
/**
* Cloudflare Workers AI - Wrangler Configuration
*
* This configuration file sets up Workers AI binding for your Worker.
* Place this in your project root as wrangler.jsonc
*/
{
"name": "my-ai-worker",
"main": "src/index.ts",
"compatibility_date": "2025-10-21",
/**
* AI Binding
* Provides access to Workers AI models via env.AI
*/
"ai": {
"binding": "AI" // Available in your Worker as env.AI
},
/**
* Optional: AI Gateway Integration
* Provides caching, logging, and analytics for AI requests
* Create a gateway at: https://dash.cloudflare.com/ai/ai-gateway
*/
// Note: AI Gateway is configured per-request, not in wrangler.jsonc
// Use the gateway option in env.AI.run():
// env.AI.run(model, inputs, { gateway: { id: 'my-gateway' } })
/**
* Optional: Vectorize Binding (for RAG patterns)
* Store and search vector embeddings
*/
"vectorize": [
{
"binding": "VECTORIZE",
"index_name": "my-embeddings-index"
}
],
/**
* Optional: D1 Database (for RAG document storage)
*/
"d1_databases": [
{
"binding": "DB",
"database_name": "my-database",
"database_id": "YOUR_DATABASE_ID"
}
],
/**
* Optional: R2 Bucket (for image storage)
*/
"r2_buckets": [
{
"binding": "BUCKET",
"bucket_name": "ai-generated-images"
}
],
/**
* Optional: KV Namespace (for caching AI responses)
*/
"kv_namespaces": [
{
"binding": "CACHE",
"id": "YOUR_KV_NAMESPACE_ID"
}
],
/**
* Environment Variables
* Store API keys and configuration
*/
"vars": {
"ENVIRONMENT": "production"
},
/**
* Secrets (use: wrangler secret put SECRET_NAME)
* - CLOUDFLARE_API_KEY
* - CLOUDFLARE_ACCOUNT_ID
*/
/**
* Workers Configuration
*/
"limits": {
"cpu_ms": 30000 // 30 seconds (increase for long AI operations)
},
/**
* Local Development
* Run: npx wrangler dev
*/
"dev": {
"port": 8787
}
}
/**
* TypeScript Types
*
* Add to your src/index.ts:
*
* export interface Env {
* AI: Ai;
* VECTORIZE?: Vectorize;
* DB?: D1Database;
* BUCKET?: R2Bucket;
* CACHE?: KVNamespace;
* CLOUDFLARE_API_KEY?: string;
* CLOUDFLARE_ACCOUNT_ID?: string;
* }
*/
/**
* Usage Examples
*
* Basic AI inference:
* const response = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
* prompt: 'Hello!',
* });
*
* With AI Gateway:
* const response = await env.AI.run(
* '@cf/meta/llama-3.1-8b-instruct',
* { prompt: 'Hello!' },
* { gateway: { id: 'my-gateway' } }
* );
*
* Streaming:
* const stream = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
* messages: [{ role: 'user', content: 'Hello!' }],
* stream: true,
* });
*/