Initial commit
This commit is contained in:
491
templates/ai-embeddings-rag.ts
Normal file
491
templates/ai-embeddings-rag.ts
Normal file
@@ -0,0 +1,491 @@
|
||||
/**
|
||||
* Cloudflare Workers AI - Embeddings & RAG Examples
|
||||
*
|
||||
* This template demonstrates:
|
||||
* - Generating text embeddings with BGE models
|
||||
* - Storing embeddings in Vectorize
|
||||
* - Semantic search with vector similarity
|
||||
* - Complete RAG (Retrieval Augmented Generation) pattern
|
||||
* - Document chunking strategies
|
||||
*/
|
||||
|
||||
import { Hono } from 'hono';
|
||||
|
||||
type Bindings = {
|
||||
AI: Ai;
|
||||
VECTORIZE: Vectorize;
|
||||
DB?: D1Database;
|
||||
};
|
||||
|
||||
const app = new Hono<{ Bindings: Bindings }>();
|
||||
|
||||
// ============================================================================
|
||||
// Generate Embeddings
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Generate embeddings for text
|
||||
* BGE-base: 768 dimensions, good balance
|
||||
* BGE-large: 1024 dimensions, higher accuracy
|
||||
* BGE-small: 384 dimensions, faster/smaller
|
||||
*/
|
||||
|
||||
app.post('/embeddings', async (c) => {
|
||||
try {
|
||||
const { text } = await c.req.json<{ text: string | string[] }>();
|
||||
|
||||
const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
|
||||
text: Array.isArray(text) ? text : [text],
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
shape: embeddings.shape, // [batch_size, dimensions]
|
||||
data: embeddings.data, // Array of vectors
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Batch Embeddings
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Generate embeddings for multiple texts in one request
|
||||
* More efficient than individual requests
|
||||
*/
|
||||
|
||||
app.post('/embeddings/batch', async (c) => {
|
||||
try {
|
||||
const { texts } = await c.req.json<{ texts: string[] }>();
|
||||
|
||||
if (!texts || texts.length === 0) {
|
||||
return c.json({ error: 'texts array is required' }, 400);
|
||||
}
|
||||
|
||||
const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
|
||||
text: texts,
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
count: texts.length,
|
||||
shape: embeddings.shape,
|
||||
embeddings: embeddings.data,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Store Embeddings in Vectorize
|
||||
// ============================================================================
|
||||
|
||||
app.post('/documents', async (c) => {
|
||||
try {
|
||||
const { id, text, metadata } = await c.req.json<{
|
||||
id: string;
|
||||
text: string;
|
||||
metadata?: Record<string, any>;
|
||||
}>();
|
||||
|
||||
// Generate embedding
|
||||
const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
|
||||
text: [text],
|
||||
});
|
||||
|
||||
const vector = embeddings.data[0];
|
||||
|
||||
// Store in Vectorize
|
||||
await c.env.VECTORIZE.upsert([
|
||||
{
|
||||
id,
|
||||
values: vector,
|
||||
metadata: {
|
||||
text,
|
||||
...metadata,
|
||||
createdAt: Date.now(),
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
message: 'Document indexed',
|
||||
id,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Semantic Search
|
||||
// ============================================================================
|
||||
|
||||
app.post('/search', async (c) => {
|
||||
try {
|
||||
const { query, topK = 5 } = await c.req.json<{
|
||||
query: string;
|
||||
topK?: number;
|
||||
}>();
|
||||
|
||||
// Convert query to embedding
|
||||
const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
|
||||
text: [query],
|
||||
});
|
||||
|
||||
const vector = embeddings.data[0];
|
||||
|
||||
// Search Vectorize
|
||||
const results = await c.env.VECTORIZE.query(vector, {
|
||||
topK,
|
||||
returnMetadata: true,
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
query,
|
||||
results: results.matches.map((match) => ({
|
||||
id: match.id,
|
||||
score: match.score,
|
||||
text: match.metadata?.text,
|
||||
metadata: match.metadata,
|
||||
})),
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// RAG Pattern: Query with Context
|
||||
// ============================================================================
|
||||
|
||||
app.post('/rag/ask', async (c) => {
|
||||
try {
|
||||
const { question, topK = 3 } = await c.req.json<{
|
||||
question: string;
|
||||
topK?: number;
|
||||
}>();
|
||||
|
||||
// Step 1: Convert question to embedding
|
||||
const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
|
||||
text: [question],
|
||||
});
|
||||
|
||||
const vector = embeddings.data[0];
|
||||
|
||||
// Step 2: Find relevant documents
|
||||
const results = await c.env.VECTORIZE.query(vector, {
|
||||
topK,
|
||||
returnMetadata: true,
|
||||
});
|
||||
|
||||
// Step 3: Build context from matches
|
||||
const context = results.matches
|
||||
.map((match) => match.metadata?.text)
|
||||
.filter(Boolean)
|
||||
.join('\n\n');
|
||||
|
||||
// Step 4: Generate answer with context
|
||||
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: `Answer the question using ONLY the following context. If the context doesn't contain relevant information, say "I don't have enough information to answer that."\n\nContext:\n${context}`,
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: question,
|
||||
},
|
||||
],
|
||||
stream: true,
|
||||
});
|
||||
|
||||
return new Response(stream, {
|
||||
headers: {
|
||||
'content-type': 'text/event-stream',
|
||||
'x-sources': JSON.stringify(
|
||||
results.matches.map((m) => ({ id: m.id, score: m.score }))
|
||||
),
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Document Chunking
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Split long documents into chunks for better embedding quality
|
||||
* Recommended: 200-500 tokens per chunk
|
||||
*/
|
||||
|
||||
function chunkText(text: string, chunkSize = 500, overlap = 50): string[] {
|
||||
const words = text.split(/\s+/);
|
||||
const chunks: string[] = [];
|
||||
|
||||
for (let i = 0; i < words.length; i += chunkSize - overlap) {
|
||||
const chunk = words.slice(i, i + chunkSize).join(' ');
|
||||
chunks.push(chunk);
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
app.post('/documents/long', async (c) => {
|
||||
try {
|
||||
const { id, text, chunkSize = 500 } = await c.req.json<{
|
||||
id: string;
|
||||
text: string;
|
||||
chunkSize?: number;
|
||||
}>();
|
||||
|
||||
// Split into chunks
|
||||
const chunks = chunkText(text, chunkSize);
|
||||
|
||||
// Generate embeddings for all chunks
|
||||
const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
|
||||
text: chunks,
|
||||
});
|
||||
|
||||
// Store each chunk in Vectorize
|
||||
const vectors = chunks.map((chunk, index) => ({
|
||||
id: `${id}-chunk-${index}`,
|
||||
values: embeddings.data[index],
|
||||
metadata: {
|
||||
documentId: id,
|
||||
chunkIndex: index,
|
||||
text: chunk,
|
||||
totalChunks: chunks.length,
|
||||
},
|
||||
}));
|
||||
|
||||
await c.env.VECTORIZE.upsert(vectors);
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
message: 'Document indexed with chunks',
|
||||
documentId: id,
|
||||
chunks: chunks.length,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// RAG with Citations
|
||||
// ============================================================================
|
||||
|
||||
app.post('/rag/ask-with-citations', async (c) => {
|
||||
try {
|
||||
const { question } = await c.req.json<{ question: string }>();
|
||||
|
||||
// Find relevant chunks
|
||||
const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
|
||||
text: [question],
|
||||
});
|
||||
|
||||
const results = await c.env.VECTORIZE.query(embeddings.data[0], {
|
||||
topK: 5,
|
||||
returnMetadata: true,
|
||||
});
|
||||
|
||||
// Build context with citations
|
||||
const context = results.matches
|
||||
.map(
|
||||
(match, i) =>
|
||||
`[Source ${i + 1}] ${match.metadata?.text} (Relevance: ${(match.score * 100).toFixed(1)}%)`
|
||||
)
|
||||
.join('\n\n');
|
||||
|
||||
// Generate answer
|
||||
const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: `Answer the question using the provided sources. Cite sources using [Source N] format.
|
||||
|
||||
${context}`,
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: question,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
answer: response.response,
|
||||
sources: results.matches.map((m, i) => ({
|
||||
id: i + 1,
|
||||
documentId: m.metadata?.documentId,
|
||||
text: m.metadata?.text,
|
||||
score: m.score,
|
||||
})),
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Hybrid Search (Keyword + Semantic)
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Combine keyword search (D1) with semantic search (Vectorize)
|
||||
* for better recall
|
||||
*/
|
||||
|
||||
app.post('/search/hybrid', async (c) => {
|
||||
try {
|
||||
const { query } = await c.req.json<{ query: string }>();
|
||||
|
||||
// Semantic search
|
||||
const embeddings = await c.env.AI.run('@cf/baai/bge-base-en-v1.5', {
|
||||
text: [query],
|
||||
});
|
||||
|
||||
const vectorResults = await c.env.VECTORIZE.query(embeddings.data[0], {
|
||||
topK: 5,
|
||||
returnMetadata: true,
|
||||
});
|
||||
|
||||
// Keyword search (if D1 available)
|
||||
let keywordResults: any[] = [];
|
||||
if (c.env.DB) {
|
||||
const { results } = await c.env.DB.prepare(
|
||||
'SELECT id, text FROM documents WHERE text LIKE ? LIMIT 5'
|
||||
)
|
||||
.bind(`%${query}%`)
|
||||
.all();
|
||||
|
||||
keywordResults = results || [];
|
||||
}
|
||||
|
||||
// Combine and deduplicate
|
||||
const combined = [
|
||||
...vectorResults.matches.map((m) => ({
|
||||
id: m.id,
|
||||
text: m.metadata?.text,
|
||||
score: m.score,
|
||||
source: 'vector',
|
||||
})),
|
||||
...keywordResults.map((r) => ({
|
||||
id: r.id,
|
||||
text: r.text,
|
||||
score: 1.0,
|
||||
source: 'keyword',
|
||||
})),
|
||||
];
|
||||
|
||||
// Deduplicate by ID
|
||||
const unique = Array.from(new Map(combined.map((item) => [item.id, item])).values());
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
query,
|
||||
results: unique,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Delete Documents
|
||||
// ============================================================================
|
||||
|
||||
app.delete('/documents/:id', async (c) => {
|
||||
try {
|
||||
const id = c.req.param('id');
|
||||
|
||||
// Delete from Vectorize
|
||||
await c.env.VECTORIZE.deleteByIds([id]);
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
message: 'Document deleted',
|
||||
id,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Health Check
|
||||
// ============================================================================
|
||||
|
||||
app.get('/health', (c) => {
|
||||
return c.json({
|
||||
status: 'ok',
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
});
|
||||
|
||||
export default app;
|
||||
432
templates/ai-gateway-integration.ts
Normal file
432
templates/ai-gateway-integration.ts
Normal file
@@ -0,0 +1,432 @@
|
||||
/**
|
||||
* Cloudflare AI Gateway - Integration Examples
|
||||
*
|
||||
* This template demonstrates:
|
||||
* - AI Gateway setup and configuration
|
||||
* - Caching AI responses
|
||||
* - Logging and analytics
|
||||
* - Cost tracking
|
||||
* - Rate limiting
|
||||
* - Feedback collection
|
||||
*/
|
||||
|
||||
import { Hono } from 'hono';
|
||||
|
||||
type Bindings = {
|
||||
AI: Ai;
|
||||
};
|
||||
|
||||
const app = new Hono<{ Bindings: Bindings }>();
|
||||
|
||||
// ============================================================================
|
||||
// Basic AI Gateway Usage
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Create a gateway at: https://dash.cloudflare.com/ai/ai-gateway
|
||||
* Use the gateway ID in your requests
|
||||
*/
|
||||
|
||||
app.post('/gateway/basic', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const response = await c.env.AI.run(
|
||||
'@cf/meta/llama-3.1-8b-instruct',
|
||||
{
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
},
|
||||
{
|
||||
gateway: {
|
||||
id: 'my-gateway', // Your gateway ID
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
// Access log ID for analytics
|
||||
const logId = c.env.AI.aiGatewayLogId;
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
response: response.response,
|
||||
logId, // Use for tracking and feedback
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// AI Gateway with Caching
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* AI Gateway can cache responses to reduce costs
|
||||
* Same prompt = cached response (no inference cost)
|
||||
*/
|
||||
|
||||
app.post('/gateway/cached', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const response = await c.env.AI.run(
|
||||
'@cf/meta/llama-3.1-8b-instruct',
|
||||
{
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
},
|
||||
{
|
||||
gateway: {
|
||||
id: 'my-gateway',
|
||||
skipCache: false, // Use cache (default)
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
response: response.response,
|
||||
logId: c.env.AI.aiGatewayLogId,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Skip Cache for Dynamic Content
|
||||
// ============================================================================
|
||||
|
||||
app.post('/gateway/no-cache', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const response = await c.env.AI.run(
|
||||
'@cf/meta/llama-3.1-8b-instruct',
|
||||
{
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
},
|
||||
{
|
||||
gateway: {
|
||||
id: 'my-gateway',
|
||||
skipCache: true, // Always fetch fresh response
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
response: response.response,
|
||||
logId: c.env.AI.aiGatewayLogId,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Send Feedback to AI Gateway
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Track user satisfaction with AI responses
|
||||
* Helps optimize prompts and model selection
|
||||
*/
|
||||
|
||||
app.post('/gateway/feedback', async (c) => {
|
||||
try {
|
||||
const { logId, rating, comment } = await c.req.json<{
|
||||
logId: string;
|
||||
rating: number; // 1-5
|
||||
comment?: string;
|
||||
}>();
|
||||
|
||||
const gateway = c.env.AI.gateway('my-gateway');
|
||||
|
||||
await gateway.patchLog(logId, {
|
||||
feedback: {
|
||||
rating,
|
||||
comment,
|
||||
},
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
message: 'Feedback recorded',
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Track Cost Per Request
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Monitor neurons usage per request
|
||||
* AI Gateway logs show cost breakdown
|
||||
*/
|
||||
|
||||
app.post('/gateway/track-cost', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const start = Date.now();
|
||||
|
||||
const response = await c.env.AI.run(
|
||||
'@cf/meta/llama-3.1-8b-instruct',
|
||||
{
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
},
|
||||
{
|
||||
gateway: {
|
||||
id: 'my-gateway',
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
const duration = Date.now() - start;
|
||||
const logId = c.env.AI.aiGatewayLogId;
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
response: response.response,
|
||||
metrics: {
|
||||
logId,
|
||||
duration,
|
||||
// Check AI Gateway dashboard for neurons usage
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Multi-Model Gateway
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Use different models through the same gateway
|
||||
* Compare performance and costs
|
||||
*/
|
||||
|
||||
app.post('/gateway/multi-model', async (c) => {
|
||||
try {
|
||||
const { prompt, model = '@cf/meta/llama-3.1-8b-instruct' } = await c.req.json<{
|
||||
prompt: string;
|
||||
model?: string;
|
||||
}>();
|
||||
|
||||
const response = await c.env.AI.run(
|
||||
model,
|
||||
{
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
},
|
||||
{
|
||||
gateway: {
|
||||
id: 'my-gateway',
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
model,
|
||||
response: response.response,
|
||||
logId: c.env.AI.aiGatewayLogId,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Streaming with AI Gateway
|
||||
// ============================================================================
|
||||
|
||||
app.post('/gateway/stream', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const stream = await c.env.AI.run(
|
||||
'@cf/meta/llama-3.1-8b-instruct',
|
||||
{
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
stream: true,
|
||||
},
|
||||
{
|
||||
gateway: {
|
||||
id: 'my-gateway',
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
// Log ID available after streaming starts
|
||||
const logId = c.env.AI.aiGatewayLogId;
|
||||
|
||||
return new Response(stream, {
|
||||
headers: {
|
||||
'content-type': 'text/event-stream',
|
||||
'x-ai-gateway-log-id': logId || '',
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Request Analytics Middleware
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Log all AI requests for analytics
|
||||
*/
|
||||
|
||||
app.use('/ai/*', async (c, next) => {
|
||||
const start = Date.now();
|
||||
const path = c.req.path;
|
||||
|
||||
await next();
|
||||
|
||||
const duration = Date.now() - start;
|
||||
const logId = c.env.AI.aiGatewayLogId;
|
||||
|
||||
// Log to console (or send to analytics service)
|
||||
console.log({
|
||||
timestamp: new Date().toISOString(),
|
||||
path,
|
||||
duration,
|
||||
logId,
|
||||
status: c.res.status,
|
||||
});
|
||||
});
|
||||
|
||||
app.post('/ai/chat', async (c) => {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const response = await c.env.AI.run(
|
||||
'@cf/meta/llama-3.1-8b-instruct',
|
||||
{
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
},
|
||||
{
|
||||
gateway: { id: 'my-gateway' },
|
||||
}
|
||||
);
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
response: response.response,
|
||||
});
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Rate Limit Protection
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* AI Gateway provides additional rate limiting
|
||||
* Configure in dashboard: https://dash.cloudflare.com/ai/ai-gateway
|
||||
*/
|
||||
|
||||
app.post('/gateway/rate-limited', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const response = await c.env.AI.run(
|
||||
'@cf/meta/llama-3.1-8b-instruct',
|
||||
{
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
},
|
||||
{
|
||||
gateway: {
|
||||
id: 'my-gateway',
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
response: response.response,
|
||||
});
|
||||
} catch (error) {
|
||||
const message = (error as Error).message;
|
||||
|
||||
// Check for rate limit error
|
||||
if (message.includes('429') || message.includes('rate limit')) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: 'Rate limit exceeded. Please try again later.',
|
||||
retryAfter: 60, // seconds
|
||||
},
|
||||
429
|
||||
);
|
||||
}
|
||||
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Health Check
|
||||
// ============================================================================
|
||||
|
||||
app.get('/health', (c) => {
|
||||
return c.json({
|
||||
status: 'ok',
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
});
|
||||
|
||||
export default app;
|
||||
391
templates/ai-image-generation.ts
Normal file
391
templates/ai-image-generation.ts
Normal file
@@ -0,0 +1,391 @@
|
||||
/**
|
||||
* Cloudflare Workers AI - Image Generation Examples
|
||||
*
|
||||
* This template demonstrates:
|
||||
* - Text-to-image with Flux models (highest quality)
|
||||
* - Stable Diffusion XL
|
||||
* - Image storage in R2
|
||||
* - Base64 and binary responses
|
||||
* - Custom prompts and parameters
|
||||
*/
|
||||
|
||||
import { Hono } from 'hono';
|
||||
|
||||
type Bindings = {
|
||||
AI: Ai;
|
||||
BUCKET?: R2Bucket;
|
||||
};
|
||||
|
||||
const app = new Hono<{ Bindings: Bindings }>();
|
||||
|
||||
// ============================================================================
|
||||
// Flux - Text-to-Image (Highest Quality)
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Flux 1 Schnell - Fast, high-quality image generation
|
||||
* Best for: Photorealistic images, detailed artwork
|
||||
* Rate limit: 720/min
|
||||
*/
|
||||
|
||||
app.post('/generate/flux', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const imageStream = await c.env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
|
||||
prompt,
|
||||
});
|
||||
|
||||
return new Response(imageStream, {
|
||||
headers: { 'content-type': 'image/png' },
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Stable Diffusion XL
|
||||
// ============================================================================
|
||||
|
||||
app.post('/generate/sdxl', async (c) => {
|
||||
try {
|
||||
const { prompt, num_steps = 20, guidance = 7.5 } = await c.req.json<{
|
||||
prompt: string;
|
||||
num_steps?: number;
|
||||
guidance?: number;
|
||||
}>();
|
||||
|
||||
const imageStream = await c.env.AI.run(
|
||||
'@cf/stabilityai/stable-diffusion-xl-base-1.0',
|
||||
{
|
||||
prompt,
|
||||
num_steps, // More steps = higher quality, slower
|
||||
guidance, // CFG scale: higher = more prompt adherence
|
||||
}
|
||||
);
|
||||
|
||||
return new Response(imageStream, {
|
||||
headers: { 'content-type': 'image/png' },
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// DreamShaper (Artistic/Stylized)
|
||||
// ============================================================================
|
||||
|
||||
app.post('/generate/dreamshaper', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const imageStream = await c.env.AI.run('@cf/lykon/dreamshaper-8-lcm', {
|
||||
prompt,
|
||||
});
|
||||
|
||||
return new Response(imageStream, {
|
||||
headers: { 'content-type': 'image/png' },
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Generate and Store in R2
|
||||
// ============================================================================
|
||||
|
||||
app.post('/generate/save', async (c) => {
|
||||
try {
|
||||
const { prompt, filename } = await c.req.json<{
|
||||
prompt: string;
|
||||
filename?: string;
|
||||
}>();
|
||||
|
||||
if (!c.env.BUCKET) {
|
||||
return c.json({ error: 'R2 bucket not configured' }, 500);
|
||||
}
|
||||
|
||||
// Generate image
|
||||
const imageStream = await c.env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
|
||||
prompt,
|
||||
});
|
||||
|
||||
const imageBytes = await new Response(imageStream).bytes();
|
||||
|
||||
// Generate filename
|
||||
const key = filename || `images/${Date.now()}.png`;
|
||||
|
||||
// Store in R2
|
||||
await c.env.BUCKET.put(key, imageBytes, {
|
||||
httpMetadata: {
|
||||
contentType: 'image/png',
|
||||
},
|
||||
customMetadata: {
|
||||
prompt,
|
||||
generatedAt: new Date().toISOString(),
|
||||
},
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
message: 'Image generated and saved',
|
||||
key,
|
||||
url: `https://your-domain.com/${key}`, // Update with your R2 public URL
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Return Base64 Encoded Image
|
||||
// ============================================================================
|
||||
|
||||
app.post('/generate/base64', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const imageStream = await c.env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
|
||||
prompt,
|
||||
});
|
||||
|
||||
const imageBytes = await new Response(imageStream).bytes();
|
||||
const base64 = btoa(String.fromCharCode(...imageBytes));
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
image: `data:image/png;base64,${base64}`,
|
||||
prompt,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Image-to-Image (Stable Diffusion)
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Transform existing images based on prompts
|
||||
* Requires base64-encoded input image
|
||||
*/
|
||||
|
||||
app.post('/generate/img2img', async (c) => {
|
||||
try {
|
||||
const { prompt, image, strength = 0.8 } = await c.req.json<{
|
||||
prompt: string;
|
||||
image: string; // Base64 encoded
|
||||
strength?: number; // 0.0-1.0, higher = more transformation
|
||||
}>();
|
||||
|
||||
// Decode base64 image to array
|
||||
const imageData = Uint8Array.from(atob(image.replace(/^data:image\/\w+;base64,/, '')), (c) =>
|
||||
c.charCodeAt(0)
|
||||
);
|
||||
|
||||
const result = await c.env.AI.run('@cf/runwayml/stable-diffusion-v1-5-img2img', {
|
||||
prompt,
|
||||
image: Array.from(imageData),
|
||||
strength,
|
||||
});
|
||||
|
||||
return new Response(result, {
|
||||
headers: { 'content-type': 'image/png' },
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Batch Generation
|
||||
// ============================================================================
|
||||
|
||||
app.post('/generate/batch', async (c) => {
|
||||
try {
|
||||
const { prompts } = await c.req.json<{ prompts: string[] }>();
|
||||
|
||||
if (!prompts || prompts.length === 0) {
|
||||
return c.json({ error: 'prompts array is required' }, 400);
|
||||
}
|
||||
|
||||
if (prompts.length > 5) {
|
||||
return c.json({ error: 'Maximum 5 prompts per batch' }, 400);
|
||||
}
|
||||
|
||||
const images = await Promise.all(
|
||||
prompts.map(async (prompt) => {
|
||||
const imageStream = await c.env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
|
||||
prompt,
|
||||
});
|
||||
|
||||
const imageBytes = await new Response(imageStream).bytes();
|
||||
const base64 = btoa(String.fromCharCode(...imageBytes));
|
||||
|
||||
return {
|
||||
prompt,
|
||||
image: `data:image/png;base64,${base64}`,
|
||||
};
|
||||
})
|
||||
);
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
count: images.length,
|
||||
images,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Prompt Enhancement
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Use LLM to enhance user prompts for better image quality
|
||||
*/
|
||||
|
||||
app.post('/generate/enhanced', async (c) => {
|
||||
try {
|
||||
const { userPrompt } = await c.req.json<{ userPrompt: string }>();
|
||||
|
||||
// Step 1: Enhance prompt with LLM
|
||||
const enhancement = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content:
|
||||
'You are a Stable Diffusion prompt expert. Enhance the user prompt for image generation. Add details about style, lighting, quality, composition. Return ONLY the enhanced prompt, no explanations.',
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: userPrompt,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const enhancedPrompt = enhancement.response.trim();
|
||||
|
||||
// Step 2: Generate image with enhanced prompt
|
||||
const imageStream = await c.env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
|
||||
prompt: enhancedPrompt,
|
||||
});
|
||||
|
||||
const imageBytes = await new Response(imageStream).bytes();
|
||||
const base64 = btoa(String.fromCharCode(...imageBytes));
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
originalPrompt: userPrompt,
|
||||
enhancedPrompt,
|
||||
image: `data:image/png;base64,${base64}`,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// List Generated Images (from R2)
|
||||
// ============================================================================
|
||||
|
||||
app.get('/images', async (c) => {
|
||||
try {
|
||||
if (!c.env.BUCKET) {
|
||||
return c.json({ error: 'R2 bucket not configured' }, 500);
|
||||
}
|
||||
|
||||
const listed = await c.env.BUCKET.list({
|
||||
prefix: 'images/',
|
||||
limit: 100,
|
||||
});
|
||||
|
||||
const images = listed.objects.map((obj) => ({
|
||||
key: obj.key,
|
||||
size: obj.size,
|
||||
uploaded: obj.uploaded,
|
||||
url: `https://your-domain.com/${obj.key}`,
|
||||
}));
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
count: images.length,
|
||||
images,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Health Check
|
||||
// ============================================================================
|
||||
|
||||
app.get('/health', (c) => {
|
||||
return c.json({
|
||||
status: 'ok',
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
});
|
||||
|
||||
export default app;
|
||||
437
templates/ai-text-generation.ts
Normal file
437
templates/ai-text-generation.ts
Normal file
@@ -0,0 +1,437 @@
|
||||
/**
|
||||
* Cloudflare Workers AI - Text Generation Examples
|
||||
*
|
||||
* This template demonstrates:
|
||||
* - Basic text generation (prompt and messages)
|
||||
* - Streaming responses (RECOMMENDED for production)
|
||||
* - Chat completions with conversation history
|
||||
* - Structured output with JSON
|
||||
* - Error handling and retry logic
|
||||
* - Rate limit management
|
||||
*/
|
||||
|
||||
import { Hono } from 'hono';
|
||||
|
||||
type Bindings = {
|
||||
AI: Ai;
|
||||
};
|
||||
|
||||
const app = new Hono<{ Bindings: Bindings }>();
|
||||
|
||||
// ============================================================================
|
||||
// Basic Text Generation
|
||||
// ============================================================================
|
||||
|
||||
// Simple prompt (deprecated pattern, use messages instead)
|
||||
app.post('/simple', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
prompt,
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
response: response.response,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Streaming Text Generation (RECOMMENDED)
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Streaming is ESSENTIAL for production:
|
||||
* - Prevents buffering large responses in memory
|
||||
* - Faster time-to-first-token
|
||||
* - Better user experience
|
||||
* - Avoids Worker timeout issues
|
||||
*/
|
||||
|
||||
app.post('/stream', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
stream: true, // Enable streaming
|
||||
});
|
||||
|
||||
return new Response(stream, {
|
||||
headers: {
|
||||
'content-type': 'text/event-stream',
|
||||
'cache-control': 'no-cache',
|
||||
connection: 'keep-alive',
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Chat Completions with History
|
||||
// ============================================================================
|
||||
|
||||
app.post('/chat', async (c) => {
|
||||
try {
|
||||
const { messages } = await c.req.json<{
|
||||
messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }>;
|
||||
}>();
|
||||
|
||||
// Validate messages
|
||||
if (!messages || messages.length === 0) {
|
||||
return c.json({ error: 'Messages array is required' }, 400);
|
||||
}
|
||||
|
||||
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages,
|
||||
stream: true,
|
||||
max_tokens: 512, // Limit response length
|
||||
});
|
||||
|
||||
return new Response(stream, {
|
||||
headers: { 'content-type': 'text/event-stream' },
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Streaming with Custom Parameters
|
||||
// ============================================================================
|
||||
|
||||
app.post('/stream/custom', async (c) => {
|
||||
try {
|
||||
const { prompt, temperature = 0.7, max_tokens = 512 } = await c.req.json<{
|
||||
prompt: string;
|
||||
temperature?: number;
|
||||
max_tokens?: number;
|
||||
}>();
|
||||
|
||||
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: 'You are a helpful AI assistant.',
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: prompt,
|
||||
},
|
||||
],
|
||||
stream: true,
|
||||
max_tokens,
|
||||
temperature, // Controls randomness (0.0-1.0)
|
||||
});
|
||||
|
||||
return new Response(stream, {
|
||||
headers: { 'content-type': 'text/event-stream' },
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Structured Output (JSON)
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Generate structured JSON output
|
||||
* Useful for extracting data, generating schemas, etc.
|
||||
*/
|
||||
|
||||
app.post('/structured', async (c) => {
|
||||
try {
|
||||
const { topic } = await c.req.json<{ topic: string }>();
|
||||
|
||||
const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content:
|
||||
'You are a helpful assistant that ONLY returns valid JSON. Never include explanations or markdown, just raw JSON.',
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: `Generate a recipe for ${topic}. Return JSON with keys: name, ingredients (array), instructions (array), prepTime (number in minutes)`,
|
||||
},
|
||||
],
|
||||
max_tokens: 1024,
|
||||
});
|
||||
|
||||
// Parse JSON response
|
||||
const data = JSON.parse(response.response);
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
data,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Model Comparison
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Compare different models side-by-side
|
||||
*/
|
||||
|
||||
app.post('/compare', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const models = [
|
||||
'@cf/meta/llama-3.1-8b-instruct', // Balanced
|
||||
'@cf/meta/llama-3.2-1b-instruct', // Fast
|
||||
'@cf/qwen/qwen1.5-14b-chat-awq', // High quality
|
||||
];
|
||||
|
||||
const results = await Promise.all(
|
||||
models.map(async (model) => {
|
||||
const start = Date.now();
|
||||
const response = await c.env.AI.run(model, {
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
max_tokens: 256,
|
||||
});
|
||||
const duration = Date.now() - start;
|
||||
|
||||
return {
|
||||
model,
|
||||
response: response.response,
|
||||
duration,
|
||||
};
|
||||
})
|
||||
);
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
results,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Error Handling with Retry
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Retry logic for rate limits and transient errors
|
||||
*/
|
||||
|
||||
async function runWithRetry(
|
||||
ai: Ai,
|
||||
model: string,
|
||||
inputs: any,
|
||||
maxRetries = 3
|
||||
): Promise<any> {
|
||||
let lastError: Error;
|
||||
|
||||
for (let i = 0; i < maxRetries; i++) {
|
||||
try {
|
||||
return await ai.run(model, inputs);
|
||||
} catch (error) {
|
||||
lastError = error as Error;
|
||||
const message = lastError.message.toLowerCase();
|
||||
|
||||
// Rate limit (429) - retry with exponential backoff
|
||||
if (message.includes('429') || message.includes('rate limit')) {
|
||||
if (i < maxRetries - 1) {
|
||||
const delay = Math.pow(2, i) * 1000; // 1s, 2s, 4s
|
||||
console.log(`Rate limited. Retrying in ${delay}ms...`);
|
||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Model unavailable - try fallback model
|
||||
if (message.includes('model') && message.includes('unavailable')) {
|
||||
if (i === 0) {
|
||||
console.log('Model unavailable, trying fallback...');
|
||||
model = '@cf/meta/llama-3.2-1b-instruct'; // Faster fallback
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Other errors - throw immediately
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError!;
|
||||
}
|
||||
|
||||
app.post('/reliable', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const response = await runWithRetry(c.env.AI, '@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
response: response.response,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Token Length Validation
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Validate input length to prevent token limit errors
|
||||
* Approximate: 1 token ≈ 4 characters
|
||||
*/
|
||||
|
||||
function estimateTokens(text: string): number {
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
|
||||
app.post('/validate', async (c) => {
|
||||
try {
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
const estimatedTokens = estimateTokens(prompt);
|
||||
const maxInputTokens = 2048; // Most models support 2K-128K
|
||||
|
||||
if (estimatedTokens > maxInputTokens) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: `Input too long: ${estimatedTokens} tokens (max: ${maxInputTokens})`,
|
||||
},
|
||||
400
|
||||
);
|
||||
}
|
||||
|
||||
const response = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
response: response.response,
|
||||
estimatedTokens,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// System Prompts & Personas
|
||||
// ============================================================================
|
||||
|
||||
const PERSONAS = {
|
||||
helpful: 'You are a helpful AI assistant.',
|
||||
concise: 'You are a concise AI assistant. Keep responses brief.',
|
||||
technical: 'You are a technical AI assistant. Provide detailed, accurate information.',
|
||||
creative: 'You are a creative AI assistant. Be imaginative and original.',
|
||||
};
|
||||
|
||||
app.post('/persona/:persona', async (c) => {
|
||||
try {
|
||||
const persona = c.req.param('persona') as keyof typeof PERSONAS;
|
||||
const { prompt } = await c.req.json<{ prompt: string }>();
|
||||
|
||||
if (!PERSONAS[persona]) {
|
||||
return c.json({ error: 'Invalid persona' }, 400);
|
||||
}
|
||||
|
||||
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages: [
|
||||
{ role: 'system', content: PERSONAS[persona] },
|
||||
{ role: 'user', content: prompt },
|
||||
],
|
||||
stream: true,
|
||||
});
|
||||
|
||||
return new Response(stream, {
|
||||
headers: { 'content-type': 'text/event-stream' },
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Health Check
|
||||
// ============================================================================
|
||||
|
||||
app.get('/health', (c) => {
|
||||
return c.json({
|
||||
status: 'ok',
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
});
|
||||
|
||||
export default app;
|
||||
417
templates/ai-vision-models.ts
Normal file
417
templates/ai-vision-models.ts
Normal file
@@ -0,0 +1,417 @@
|
||||
/**
|
||||
* Cloudflare Workers AI - Vision Models Examples
|
||||
*
|
||||
* This template demonstrates:
|
||||
* - Llama 3.2 11B Vision Instruct for image understanding
|
||||
* - Image captioning and description
|
||||
* - Visual question answering
|
||||
* - Base64 image encoding
|
||||
* - Combining vision + text prompts
|
||||
*/
|
||||
|
||||
import { Hono } from 'hono';
|
||||
|
||||
type Bindings = {
|
||||
AI: Ai;
|
||||
};
|
||||
|
||||
const app = new Hono<{ Bindings: Bindings }>();
|
||||
|
||||
// ============================================================================
|
||||
// Image Understanding
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Llama 3.2 11B Vision Instruct
|
||||
* - Understands images and answers questions
|
||||
* - Accepts base64-encoded images
|
||||
* - Rate limit: 720/min
|
||||
*/
|
||||
|
||||
app.post('/vision/understand', async (c) => {
|
||||
try {
|
||||
const { image, question = 'What is in this image?' } = await c.req.json<{
|
||||
image: string; // Base64 data URL or base64 string
|
||||
question?: string;
|
||||
}>();
|
||||
|
||||
// Ensure image has proper data URL prefix
|
||||
const imageUrl = image.startsWith('data:')
|
||||
? image
|
||||
: `data:image/png;base64,${image}`;
|
||||
|
||||
const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: question },
|
||||
{ type: 'image_url', image_url: { url: imageUrl } },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
question,
|
||||
answer: response.response,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Image Captioning
|
||||
// ============================================================================
|
||||
|
||||
app.post('/vision/caption', async (c) => {
|
||||
try {
|
||||
const { image } = await c.req.json<{ image: string }>();
|
||||
|
||||
const imageUrl = image.startsWith('data:')
|
||||
? image
|
||||
: `data:image/png;base64,${image}`;
|
||||
|
||||
const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'Generate a detailed caption for this image. Describe what you see, including objects, people, setting, mood, and any notable details.',
|
||||
},
|
||||
{ type: 'image_url', image_url: { url: imageUrl } },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
caption: response.response,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Visual Question Answering
|
||||
// ============================================================================
|
||||
|
||||
app.post('/vision/qa', async (c) => {
|
||||
try {
|
||||
const { image, questions } = await c.req.json<{
|
||||
image: string;
|
||||
questions: string[];
|
||||
}>();
|
||||
|
||||
if (!questions || questions.length === 0) {
|
||||
return c.json({ error: 'questions array is required' }, 400);
|
||||
}
|
||||
|
||||
const imageUrl = image.startsWith('data:')
|
||||
? image
|
||||
: `data:image/png;base64,${image}`;
|
||||
|
||||
// Answer all questions
|
||||
const answers = await Promise.all(
|
||||
questions.map(async (question) => {
|
||||
const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: question },
|
||||
{ type: 'image_url', image_url: { url: imageUrl } },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
return {
|
||||
question,
|
||||
answer: response.response,
|
||||
};
|
||||
})
|
||||
);
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
count: answers.length,
|
||||
results: answers,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Image Analysis (Structured Output)
|
||||
// ============================================================================
|
||||
|
||||
app.post('/vision/analyze', async (c) => {
|
||||
try {
|
||||
const { image } = await c.req.json<{ image: string }>();
|
||||
|
||||
const imageUrl = image.startsWith('data:')
|
||||
? image
|
||||
: `data:image/png;base64,${image}`;
|
||||
|
||||
const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: `Analyze this image and return a JSON object with:
|
||||
- objects: array of objects detected
|
||||
- scene: description of the setting
|
||||
- mood: emotional tone
|
||||
- colors: dominant colors
|
||||
- text: any visible text
|
||||
|
||||
Return ONLY valid JSON, no explanations.`,
|
||||
},
|
||||
{ type: 'image_url', image_url: { url: imageUrl } },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
// Parse JSON response
|
||||
try {
|
||||
const analysis = JSON.parse(response.response);
|
||||
return c.json({
|
||||
success: true,
|
||||
analysis,
|
||||
});
|
||||
} catch {
|
||||
return c.json({
|
||||
success: true,
|
||||
raw: response.response,
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Image Comparison
|
||||
// ============================================================================
|
||||
|
||||
app.post('/vision/compare', async (c) => {
|
||||
try {
|
||||
const { image1, image2, question = 'What are the differences between these images?' } =
|
||||
await c.req.json<{
|
||||
image1: string;
|
||||
image2: string;
|
||||
question?: string;
|
||||
}>();
|
||||
|
||||
const imageUrl1 = image1.startsWith('data:')
|
||||
? image1
|
||||
: `data:image/png;base64,${image1}`;
|
||||
const imageUrl2 = image2.startsWith('data:')
|
||||
? image2
|
||||
: `data:image/png;base64,${image2}`;
|
||||
|
||||
// Analyze first image
|
||||
const analysis1 = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: 'Describe this image in detail.' },
|
||||
{ type: 'image_url', image_url: { url: imageUrl1 } },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
// Analyze second image
|
||||
const analysis2 = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: 'Describe this image in detail.' },
|
||||
{ type: 'image_url', image_url: { url: imageUrl2 } },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
// Compare using text generation
|
||||
const comparison = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: `Compare these two images based on their descriptions:
|
||||
|
||||
Image 1: ${analysis1.response}
|
||||
|
||||
Image 2: ${analysis2.response}
|
||||
|
||||
Question: ${question}`,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
image1Description: analysis1.response,
|
||||
image2Description: analysis2.response,
|
||||
comparison: comparison.response,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Image Upload from URL
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Fetch image from URL, convert to base64, and analyze
|
||||
*/
|
||||
|
||||
app.post('/vision/url', async (c) => {
|
||||
try {
|
||||
const { url, question = 'What is in this image?' } = await c.req.json<{
|
||||
url: string;
|
||||
question?: string;
|
||||
}>();
|
||||
|
||||
// Fetch image
|
||||
const imageResponse = await fetch(url);
|
||||
if (!imageResponse.ok) {
|
||||
return c.json({ error: 'Failed to fetch image' }, 400);
|
||||
}
|
||||
|
||||
// Convert to base64
|
||||
const imageBytes = await imageResponse.bytes();
|
||||
const base64 = btoa(String.fromCharCode(...imageBytes));
|
||||
const contentType = imageResponse.headers.get('content-type') || 'image/png';
|
||||
const imageUrl = `data:${contentType};base64,${base64}`;
|
||||
|
||||
// Analyze image
|
||||
const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: question },
|
||||
{ type: 'image_url', image_url: { url: imageUrl } },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
sourceUrl: url,
|
||||
question,
|
||||
answer: response.response,
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Accessibility: Alt Text Generation
|
||||
// ============================================================================
|
||||
|
||||
app.post('/vision/alt-text', async (c) => {
|
||||
try {
|
||||
const { image } = await c.req.json<{ image: string }>();
|
||||
|
||||
const imageUrl = image.startsWith('data:')
|
||||
? image
|
||||
: `data:image/png;base64,${image}`;
|
||||
|
||||
const response = await c.env.AI.run('@cf/meta/llama-3.2-11b-vision-instruct', {
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'Generate a concise, descriptive alt text for this image for accessibility purposes. Keep it under 125 characters.',
|
||||
},
|
||||
{ type: 'image_url', image_url: { url: imageUrl } },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
altText: response.response.trim(),
|
||||
});
|
||||
} catch (error) {
|
||||
return c.json(
|
||||
{
|
||||
success: false,
|
||||
error: (error as Error).message,
|
||||
},
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Health Check
|
||||
// ============================================================================
|
||||
|
||||
app.get('/health', (c) => {
|
||||
return c.json({
|
||||
status: 'ok',
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
});
|
||||
|
||||
export default app;
|
||||
138
templates/wrangler-ai-config.jsonc
Normal file
138
templates/wrangler-ai-config.jsonc
Normal file
@@ -0,0 +1,138 @@
|
||||
/**
|
||||
* Cloudflare Workers AI - Wrangler Configuration
|
||||
*
|
||||
* This configuration file sets up Workers AI binding for your Worker.
|
||||
* Place this in your project root as wrangler.jsonc
|
||||
*/
|
||||
|
||||
{
|
||||
"name": "my-ai-worker",
|
||||
"main": "src/index.ts",
|
||||
"compatibility_date": "2025-10-21",
|
||||
|
||||
/**
|
||||
* AI Binding
|
||||
* Provides access to Workers AI models via env.AI
|
||||
*/
|
||||
"ai": {
|
||||
"binding": "AI" // Available in your Worker as env.AI
|
||||
},
|
||||
|
||||
/**
|
||||
* Optional: AI Gateway Integration
|
||||
* Provides caching, logging, and analytics for AI requests
|
||||
* Create a gateway at: https://dash.cloudflare.com/ai/ai-gateway
|
||||
*/
|
||||
// Note: AI Gateway is configured per-request, not in wrangler.jsonc
|
||||
// Use the gateway option in env.AI.run():
|
||||
// env.AI.run(model, inputs, { gateway: { id: 'my-gateway' } })
|
||||
|
||||
/**
|
||||
* Optional: Vectorize Binding (for RAG patterns)
|
||||
* Store and search vector embeddings
|
||||
*/
|
||||
"vectorize": [
|
||||
{
|
||||
"binding": "VECTORIZE",
|
||||
"index_name": "my-embeddings-index"
|
||||
}
|
||||
],
|
||||
|
||||
/**
|
||||
* Optional: D1 Database (for RAG document storage)
|
||||
*/
|
||||
"d1_databases": [
|
||||
{
|
||||
"binding": "DB",
|
||||
"database_name": "my-database",
|
||||
"database_id": "YOUR_DATABASE_ID"
|
||||
}
|
||||
],
|
||||
|
||||
/**
|
||||
* Optional: R2 Bucket (for image storage)
|
||||
*/
|
||||
"r2_buckets": [
|
||||
{
|
||||
"binding": "BUCKET",
|
||||
"bucket_name": "ai-generated-images"
|
||||
}
|
||||
],
|
||||
|
||||
/**
|
||||
* Optional: KV Namespace (for caching AI responses)
|
||||
*/
|
||||
"kv_namespaces": [
|
||||
{
|
||||
"binding": "CACHE",
|
||||
"id": "YOUR_KV_NAMESPACE_ID"
|
||||
}
|
||||
],
|
||||
|
||||
/**
|
||||
* Environment Variables
|
||||
* Store API keys and configuration
|
||||
*/
|
||||
"vars": {
|
||||
"ENVIRONMENT": "production"
|
||||
},
|
||||
|
||||
/**
|
||||
* Secrets (use: wrangler secret put SECRET_NAME)
|
||||
* - CLOUDFLARE_API_KEY
|
||||
* - CLOUDFLARE_ACCOUNT_ID
|
||||
*/
|
||||
|
||||
/**
|
||||
* Workers Configuration
|
||||
*/
|
||||
"limits": {
|
||||
"cpu_ms": 30000 // 30 seconds (increase for long AI operations)
|
||||
},
|
||||
|
||||
/**
|
||||
* Local Development
|
||||
* Run: npx wrangler dev
|
||||
*/
|
||||
"dev": {
|
||||
"port": 8787
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* TypeScript Types
|
||||
*
|
||||
* Add to your src/index.ts:
|
||||
*
|
||||
* export interface Env {
|
||||
* AI: Ai;
|
||||
* VECTORIZE?: Vectorize;
|
||||
* DB?: D1Database;
|
||||
* BUCKET?: R2Bucket;
|
||||
* CACHE?: KVNamespace;
|
||||
* CLOUDFLARE_API_KEY?: string;
|
||||
* CLOUDFLARE_ACCOUNT_ID?: string;
|
||||
* }
|
||||
*/
|
||||
|
||||
/**
|
||||
* Usage Examples
|
||||
*
|
||||
* Basic AI inference:
|
||||
* const response = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
* prompt: 'Hello!',
|
||||
* });
|
||||
*
|
||||
* With AI Gateway:
|
||||
* const response = await env.AI.run(
|
||||
* '@cf/meta/llama-3.1-8b-instruct',
|
||||
* { prompt: 'Hello!' },
|
||||
* { gateway: { id: 'my-gateway' } }
|
||||
* );
|
||||
*
|
||||
* Streaming:
|
||||
* const stream = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
|
||||
* messages: [{ role: 'user', content: 'Hello!' }],
|
||||
* stream: true,
|
||||
* });
|
||||
*/
|
||||
Reference in New Issue
Block a user