Files
gh-jezweb-claude-skills-ski…/templates/realtime-agents/realtime-agent-basic.ts
2025-11-30 08:25:09 +08:00

188 lines
5.1 KiB
TypeScript

/**
* Basic Realtime Voice Agent
*
* Demonstrates:
* - Creating a realtime voice agent
* - Defining tools for voice agents
* - Configuring voice and instructions
* - Understanding WebRTC vs WebSocket transports
*
* NOTE: This runs in the browser or in a Node.js environment with WebRTC support
*/
import { z } from 'zod';
import { RealtimeAgent, tool } from '@openai/agents-realtime';
// ========================================
// Tools for Voice Agent
// ========================================
// Note: Tools for realtime agents execute in the client environment
// For sensitive operations, make HTTP requests to your backend
const checkWeatherTool = tool({
name: 'check_weather',
description: 'Check current weather for a city',
parameters: z.object({
city: z.string().describe('City name'),
units: z.enum(['celsius', 'fahrenheit']).optional().default('celsius'),
}),
execute: async ({ city, units }) => {
// In production, call a real weather API
const temp = Math.floor(Math.random() * 30) + 10;
return `The weather in ${city} is sunny and ${temp}°${units === 'celsius' ? 'C' : 'F'}`;
},
});
const setReminderTool = tool({
name: 'set_reminder',
description: 'Set a reminder for the user',
parameters: z.object({
message: z.string(),
timeMinutes: z.number().describe('Minutes from now'),
}),
execute: async ({ message, timeMinutes }) => {
// In production, save to database via API call
console.log(`Reminder set: "${message}" in ${timeMinutes} minutes`);
return `I'll remind you about "${message}" in ${timeMinutes} minutes`;
},
});
const searchDocsTool = tool({
name: 'search_docs',
description: 'Search documentation',
parameters: z.object({
query: z.string(),
}),
execute: async ({ query }) => {
// In production, call your search API
return `Found documentation about: ${query}`;
},
});
// ========================================
// Create Realtime Voice Agent
// ========================================
const voiceAssistant = new RealtimeAgent({
name: 'Voice Assistant',
// Instructions for the agent's behavior
instructions: `You are a friendly and helpful voice assistant.
- Keep responses concise and conversational
- Use natural speech patterns
- When using tools, explain what you're doing
- Be proactive in offering help`,
// Tools available to the agent
tools: [checkWeatherTool, setReminderTool, searchDocsTool],
// Voice configuration (OpenAI voice options)
voice: 'alloy', // Options: alloy, echo, fable, onyx, nova, shimmer
// Model (realtime API uses specific models)
model: 'gpt-4o-realtime-preview', // Default for realtime
// Turn detection (when to consider user done speaking)
turnDetection: {
type: 'server_vad', // Voice Activity Detection on server
threshold: 0.5, // Sensitivity (0-1)
prefix_padding_ms: 300, // Audio before speech starts
silence_duration_ms: 500, // Silence to end turn
},
// Additional configuration
temperature: 0.7, // Response creativity (0-1)
maxOutputTokens: 4096, // Maximum response length
});
// ========================================
// Example: Create Session (Node.js)
// ========================================
/**
* For Node.js environments, you need to manually manage the session.
* See realtime-session-browser.tsx for browser usage.
*/
async function createNodeSession() {
// Note: WebRTC transport requires browser environment
// For Node.js, use WebSocket transport
const { OpenAIRealtimeWebSocket } = await import('@openai/agents-realtime');
const transport = new OpenAIRealtimeWebSocket({
apiKey: process.env.OPENAI_API_KEY,
});
// Create session
const session = await voiceAssistant.createSession({
transport,
});
// Handle events
session.on('connected', () => {
console.log('✅ Voice session connected');
});
session.on('disconnected', () => {
console.log('🔌 Voice session disconnected');
});
session.on('error', (error) => {
console.error('❌ Session error:', error);
});
// Audio transcription events
session.on('audio.transcription.completed', (event) => {
console.log('User said:', event.transcript);
});
session.on('agent.audio.done', (event) => {
console.log('Agent said:', event.transcript);
});
// Tool call events
session.on('tool.call', (event) => {
console.log('Tool called:', event.name, event.arguments);
});
session.on('tool.result', (event) => {
console.log('Tool result:', event.result);
});
// Connect to start session
await session.connect();
// To disconnect later
// await session.disconnect();
return session;
}
// ========================================
// Transport Options
// ========================================
/**
* WebRTC Transport (recommended for browser)
* - Lower latency
* - Better for real-time voice
* - Requires browser environment
*
* WebSocket Transport
* - Works in Node.js
* - Slightly higher latency
* - Simpler setup
*/
// Uncomment to run in Node.js
// createNodeSession().catch(console.error);
export {
voiceAssistant,
checkWeatherTool,
setReminderTool,
searchDocsTool,
createNodeSession,
};