Initial commit

2025-11-30 08:25:09 +08:00
commit 9475095985
30 changed files with 5609 additions and 0 deletions
--- a/templates/realtime-agents/realtime-agent-basic.ts
+++ b/templates/realtime-agents/realtime-agent-basic.ts
@@ -0,0 +1,187 @@
+/**
+ * Basic Realtime Voice Agent
+ *
+ * Demonstrates:
+ * - Creating a realtime voice agent
+ * - Defining tools for voice agents
+ * - Configuring voice and instructions
+ * - Understanding WebRTC vs WebSocket transports
+ *
+ * NOTE: This runs in the browser or in a Node.js environment with WebRTC support
+ */
+
+import { z } from 'zod';
+import { RealtimeAgent, tool } from '@openai/agents-realtime';
+
+// ========================================
+// Tools for Voice Agent
+// ========================================
+
+// Note: Tools for realtime agents execute in the client environment
+// For sensitive operations, make HTTP requests to your backend
+
+const checkWeatherTool = tool({
+  name: 'check_weather',
+  description: 'Check current weather for a city',
+  parameters: z.object({
+    city: z.string().describe('City name'),
+    units: z.enum(['celsius', 'fahrenheit']).optional().default('celsius'),
+  }),
+  execute: async ({ city, units }) => {
+    // In production, call a real weather API
+    const temp = Math.floor(Math.random() * 30) + 10;
+    return `The weather in ${city} is sunny and ${temp}°${units === 'celsius' ? 'C' : 'F'}`;
+  },
+});
+
+const setReminderTool = tool({
+  name: 'set_reminder',
+  description: 'Set a reminder for the user',
+  parameters: z.object({
+    message: z.string(),
+    timeMinutes: z.number().describe('Minutes from now'),
+  }),
+  execute: async ({ message, timeMinutes }) => {
+    // In production, save to database via API call
+    console.log(`Reminder set: "${message}" in ${timeMinutes} minutes`);
+    return `I'll remind you about "${message}" in ${timeMinutes} minutes`;
+  },
+});
+
+const searchDocsTool = tool({
+  name: 'search_docs',
+  description: 'Search documentation',
+  parameters: z.object({
+    query: z.string(),
+  }),
+  execute: async ({ query }) => {
+    // In production, call your search API
+    return `Found documentation about: ${query}`;
+  },
+});
+
+// ========================================
+// Create Realtime Voice Agent
+// ========================================
+
+const voiceAssistant = new RealtimeAgent({
+  name: 'Voice Assistant',
+
+  // Instructions for the agent's behavior
+  instructions: `You are a friendly and helpful voice assistant.
+  - Keep responses concise and conversational
+  - Use natural speech patterns
+  - When using tools, explain what you're doing
+  - Be proactive in offering help`,
+
+  // Tools available to the agent
+  tools: [checkWeatherTool, setReminderTool, searchDocsTool],
+
+  // Voice configuration (OpenAI voice options)
+  voice: 'alloy', // Options: alloy, echo, fable, onyx, nova, shimmer
+
+  // Model (realtime API uses specific models)
+  model: 'gpt-4o-realtime-preview', // Default for realtime
+
+  // Turn detection (when to consider user done speaking)
+  turnDetection: {
+    type: 'server_vad', // Voice Activity Detection on server
+    threshold: 0.5, // Sensitivity (0-1)
+    prefix_padding_ms: 300, // Audio before speech starts
+    silence_duration_ms: 500, // Silence to end turn
+  },
+
+  // Additional configuration
+  temperature: 0.7, // Response creativity (0-1)
+  maxOutputTokens: 4096, // Maximum response length
+});
+
+// ========================================
+// Example: Create Session (Node.js)
+// ========================================
+
+/**
+ * For Node.js environments, you need to manually manage the session.
+ * See realtime-session-browser.tsx for browser usage.
+ */
+async function createNodeSession() {
+  // Note: WebRTC transport requires browser environment
+  // For Node.js, use WebSocket transport
+
+  const { OpenAIRealtimeWebSocket } = await import('@openai/agents-realtime');
+
+  const transport = new OpenAIRealtimeWebSocket({
+    apiKey: process.env.OPENAI_API_KEY,
+  });
+
+  // Create session
+  const session = await voiceAssistant.createSession({
+    transport,
+  });
+
+  // Handle events
+  session.on('connected', () => {
+    console.log('✅ Voice session connected');
+  });
+
+  session.on('disconnected', () => {
+    console.log('🔌 Voice session disconnected');
+  });
+
+  session.on('error', (error) => {
+    console.error('❌ Session error:', error);
+  });
+
+  // Audio transcription events
+  session.on('audio.transcription.completed', (event) => {
+    console.log('User said:', event.transcript);
+  });
+
+  session.on('agent.audio.done', (event) => {
+    console.log('Agent said:', event.transcript);
+  });
+
+  // Tool call events
+  session.on('tool.call', (event) => {
+    console.log('Tool called:', event.name, event.arguments);
+  });
+
+  session.on('tool.result', (event) => {
+    console.log('Tool result:', event.result);
+  });
+
+  // Connect to start session
+  await session.connect();
+
+  // To disconnect later
+  // await session.disconnect();
+
+  return session;
+}
+
+// ========================================
+// Transport Options
+// ========================================
+
+/**
+ * WebRTC Transport (recommended for browser)
+ * - Lower latency
+ * - Better for real-time voice
+ * - Requires browser environment
+ *
+ * WebSocket Transport
+ * - Works in Node.js
+ * - Slightly higher latency
+ * - Simpler setup
+ */
+
+// Uncomment to run in Node.js
+// createNodeSession().catch(console.error);
+
+export {
+  voiceAssistant,
+  checkWeatherTool,
+  setReminderTool,
+  searchDocsTool,
+  createNodeSession,
+};
--- a/templates/realtime-agents/realtime-handoffs.ts
+++ b/templates/realtime-agents/realtime-handoffs.ts
@@ -0,0 +1,215 @@
+/**
+ * Realtime Agent Handoffs (Voice)
+ *
+ * Demonstrates:
+ * - Multi-agent voice workflows
+ * - Handoffs between voice agents
+ * - Automatic conversation history passing
+ * - Voice/model constraints during handoffs
+ *
+ * IMPORTANT: Unlike text agents, realtime agent handoffs have constraints:
+ * - Cannot change voice during handoff
+ * - Cannot change model during handoff
+ * - Conversation history automatically passed
+ */
+
+import { z } from 'zod';
+import { RealtimeAgent, tool } from '@openai/agents-realtime';
+
+// ========================================
+// Specialized Agent Tools
+// ========================================
+
+const checkAccountTool = tool({
+  name: 'check_account',
+  description: 'Look up account information',
+  parameters: z.object({
+    accountId: z.string(),
+  }),
+  execute: async ({ accountId }) => {
+    return `Account ${accountId}: Premium tier, billing current, last login: 2025-10-20`;
+  },
+});
+
+const processPaymentTool = tool({
+  name: 'process_payment',
+  description: 'Process a payment',
+  parameters: z.object({
+    accountId: z.string(),
+    amount: z.number(),
+  }),
+  execute: async ({ accountId, amount }) => {
+    return `Payment of $${amount} processed for account ${accountId}`;
+  },
+});
+
+const checkSystemTool = tool({
+  name: 'check_system',
+  description: 'Check system status',
+  parameters: z.object({}),
+  execute: async () => {
+    return 'All systems operational: API ✅, Database ✅, CDN ✅';
+  },
+});
+
+const createTicketTool = tool({
+  name: 'create_ticket',
+  description: 'Create support ticket',
+  parameters: z.object({
+    title: z.string(),
+    priority: z.enum(['low', 'medium', 'high']),
+  }),
+  execute: async ({ title, priority }) => {
+    const id = `TICKET-${Math.floor(Math.random() * 10000)}`;
+    return `Created ${priority} priority ticket ${id}: ${title}`;
+  },
+});
+
+// ========================================
+// Specialized Voice Agents
+// ========================================
+
+const billingAgent = new RealtimeAgent({
+  name: 'Billing Specialist',
+  instructions: `You handle billing and payment questions.
+  - Be professional and empathetic
+  - Explain charges clearly
+  - Process payments when requested
+  - Keep responses concise for voice`,
+  handoffDescription: 'Transfer for billing, payments, or account questions',
+  tools: [checkAccountTool, processPaymentTool],
+  voice: 'nova', // All agents must use same voice as parent
+});
+
+const technicalAgent = new RealtimeAgent({
+  name: 'Technical Support',
+  instructions: `You handle technical issues and system problems.
+  - Diagnose issues systematically
+  - Provide clear troubleshooting steps
+  - Create tickets for complex issues
+  - Use simple language for voice`,
+  handoffDescription: 'Transfer for technical problems, bugs, or system issues',
+  tools: [checkSystemTool, createTicketTool],
+  voice: 'nova', // Must match triage agent voice
+});
+
+// ========================================
+// Triage Agent (Entry Point)
+// ========================================
+
+const triageVoiceAgent = new RealtimeAgent({
+  name: 'Customer Service',
+  instructions: `You are the first point of contact.
+  - Greet customers warmly
+  - Understand their issue
+  - Route to the right specialist
+  - Explain the transfer before handing off`,
+  handoffs: [billingAgent, technicalAgent],
+  voice: 'nova', // This voice will be used by all agents
+  model: 'gpt-4o-realtime-preview', // This model will be used by all agents
+});
+
+// ========================================
+// Important Notes about Voice Handoffs
+// ========================================
+
+/**
+ * KEY DIFFERENCES from text agent handoffs:
+ *
+ * 1. VOICE CONSTRAINT
+ *    - All agents in a handoff chain must use the same voice
+ *    - Voice is set by the initial agent
+ *    - Cannot change voice during handoff
+ *
+ * 2. MODEL CONSTRAINT
+ *    - All agents must use the same model
+ *    - Model is set by the initial agent
+ *    - Cannot change model during handoff
+ *
+ * 3. AUTOMATIC HISTORY
+ *    - Conversation history automatically passed to delegated agent
+ *    - No need to manually manage context
+ *    - Specialist agents can see full conversation
+ *
+ * 4. SEAMLESS AUDIO
+ *    - Audio stream continues during handoff
+ *    - User doesn't need to reconnect
+ *    - Tools execute in same session
+ */
+
+// ========================================
+// Example: Create Session with Handoffs
+// ========================================
+
+async function createVoiceSessionWithHandoffs() {
+  const { OpenAIRealtimeWebSocket } = await import('@openai/agents-realtime');
+
+  const transport = new OpenAIRealtimeWebSocket({
+    apiKey: process.env.OPENAI_API_KEY,
+  });
+
+  const session = await triageVoiceAgent.createSession({
+    transport,
+  });
+
+  // Track which agent is currently active
+  let currentAgent = 'Customer Service';
+
+  session.on('connected', () => {
+    console.log('✅ Voice session connected');
+    console.log('🎙️  Current agent:', currentAgent);
+  });
+
+  // Listen for agent changes (handoffs)
+  session.on('agent.changed', (event: any) => {
+    currentAgent = event.agent.name;
+    console.log('\n🔄 HANDOFF to:', currentAgent);
+  });
+
+  session.on('audio.transcription.completed', (event) => {
+    console.log(`👤 User: ${event.transcript}`);
+  });
+
+  session.on('agent.audio.done', (event) => {
+    console.log(`🤖 ${currentAgent}: ${event.transcript}`);
+  });
+
+  session.on('tool.call', (event) => {
+    console.log(`\n🛠️  Tool: ${event.name}`);
+    console.log(`   Arguments:`, event.arguments);
+  });
+
+  session.on('tool.result', (event) => {
+    console.log(`✅ Result:`, event.result, '\n');
+  });
+
+  await session.connect();
+
+  console.log('\n💡 Try saying:');
+  console.log('  - "I have a question about my bill"');
+  console.log('  - "The API is returning errors"');
+  console.log('  - "I need to update my payment method"');
+  console.log('\n');
+
+  return session;
+}
+
+// ========================================
+// Example: Manual Handoff Triggering
+// ========================================
+
+/**
+ * While handoffs usually happen automatically via LLM routing,
+ * you can also programmatically trigger them if needed via
+ * backend delegation patterns (see agent-patterns.md reference).
+ */
+
+// Uncomment to run
+// createVoiceSessionWithHandoffs().catch(console.error);
+
+export {
+  triageVoiceAgent,
+  billingAgent,
+  technicalAgent,
+  createVoiceSessionWithHandoffs,
+};
--- a/templates/realtime-agents/realtime-session-browser.tsx
+++ b/templates/realtime-agents/realtime-session-browser.tsx
@@ -0,0 +1,369 @@
+/**
+ * Realtime Voice Session - React Browser Client
+ *
+ * Demonstrates:
+ * - Creating a voice session in the browser
+ * - Using WebRTC transport for low latency
+ * - Handling audio I/O automatically
+ * - Managing session lifecycle
+ * - Displaying transcripts and tool calls
+ *
+ * IMPORTANT: Generate ephemeral API keys server-side, never expose your main API key
+ */
+
+import React, { useState, useEffect, useRef } from 'react';
+import { RealtimeSession, RealtimeAgent } from '@openai/agents-realtime';
+import { z } from 'zod';
+
+// ========================================
+// Voice Agent Definition
+// ========================================
+
+import { tool } from '@openai/agents-realtime';
+
+const weatherTool = tool({
+  name: 'get_weather',
+  description: 'Get weather for a city',
+  parameters: z.object({
+    city: z.string(),
+  }),
+  execute: async ({ city }) => {
+    // Call your backend API
+    const response = await fetch(`/api/weather?city=${city}`);
+    const data = await response.json();
+    return data.weather;
+  },
+});
+
+const voiceAgent = new RealtimeAgent({
+  name: 'Voice Assistant',
+  instructions: 'You are a helpful voice assistant. Keep responses concise and friendly.',
+  tools: [weatherTool],
+  voice: 'alloy',
+});
+
+// ========================================
+// React Component
+// ========================================
+
+interface Message {
+  role: 'user' | 'assistant';
+  content: string;
+  timestamp: Date;
+}
+
+interface ToolCall {
+  name: string;
+  arguments: Record<string, any>;
+  result?: any;
+}
+
+export function VoiceAssistant() {
+  const [isConnected, setIsConnected] = useState(false);
+  const [isListening, setIsListening] = useState(false);
+  const [messages, setMessages] = useState<Message[]>([]);
+  const [toolCalls, setToolCalls] = useState<ToolCall[]>([]);
+  const [error, setError] = useState<string | null>(null);
+
+  const sessionRef = useRef<RealtimeSession | null>(null);
+
+  // ========================================
+  // Initialize Session
+  // ========================================
+
+  useEffect(() => {
+    let session: RealtimeSession;
+
+    async function initSession() {
+      try {
+        // Get ephemeral API key from your backend
+        const response = await fetch('/api/generate-session-key');
+        const { apiKey } = await response.json();
+
+        // Create session with WebRTC transport (low latency)
+        session = new RealtimeSession(voiceAgent, {
+          apiKey,
+          transport: 'webrtc', // or 'websocket'
+        });
+
+        sessionRef.current = session;
+
+        // ========================================
+        // Session Event Handlers
+        // ========================================
+
+        session.on('connected', () => {
+          console.log('✅ Connected to voice session');
+          setIsConnected(true);
+          setError(null);
+        });
+
+        session.on('disconnected', () => {
+          console.log('🔌 Disconnected from voice session');
+          setIsConnected(false);
+          setIsListening(false);
+        });
+
+        session.on('error', (err) => {
+          console.error('❌ Session error:', err);
+          setError(err.message);
+        });
+
+        // ========================================
+        // Transcription Events
+        // ========================================
+
+        session.on('audio.transcription.completed', (event) => {
+          // User finished speaking
+          setMessages(prev => [...prev, {
+            role: 'user',
+            content: event.transcript,
+            timestamp: new Date(),
+          }]);
+          setIsListening(false);
+        });
+
+        session.on('audio.transcription.started', () => {
+          // User started speaking
+          setIsListening(true);
+        });
+
+        session.on('agent.audio.done', (event) => {
+          // Agent finished speaking
+          setMessages(prev => [...prev, {
+            role: 'assistant',
+            content: event.transcript,
+            timestamp: new Date(),
+          }]);
+        });
+
+        // ========================================
+        // Tool Call Events
+        // ========================================
+
+        session.on('tool.call', (event) => {
+          console.log('🛠️  Tool call:', event.name, event.arguments);
+          setToolCalls(prev => [...prev, {
+            name: event.name,
+            arguments: event.arguments,
+          }]);
+        });
+
+        session.on('tool.result', (event) => {
+          console.log('✅ Tool result:', event.result);
+          setToolCalls(prev => prev.map(tc =>
+            tc.name === event.name
+              ? { ...tc, result: event.result }
+              : tc
+          ));
+        });
+
+        // Connect to start session
+        await session.connect();
+
+      } catch (err: any) {
+        console.error('Failed to initialize session:', err);
+        setError(err.message);
+      }
+    }
+
+    initSession();
+
+    // Cleanup on unmount
+    return () => {
+      if (session) {
+        session.disconnect();
+      }
+    };
+  }, []);
+
+  // ========================================
+  // Manual Control Functions
+  // ========================================
+
+  const handleInterrupt = () => {
+    if (sessionRef.current) {
+      sessionRef.current.interrupt();
+    }
+  };
+
+  const handleDisconnect = () => {
+    if (sessionRef.current) {
+      sessionRef.current.disconnect();
+    }
+  };
+
+  // ========================================
+  // Render UI
+  // ========================================
+
+  return (
+    <div className="voice-assistant">
+      <div className="status-bar">
+        <div className={`status ${isConnected ? 'connected' : 'disconnected'}`}>
+          {isConnected ? '🟢 Connected' : '🔴 Disconnected'}
+        </div>
+        {isListening && <div className="listening">🎤 Listening...</div>}
+      </div>
+
+      {error && (
+        <div className="error">
+          ❌ Error: {error}
+        </div>
+      )}
+
+      <div className="messages">
+        {messages.map((msg, i) => (
+          <div key={i} className={`message ${msg.role}`}>
+            <div className="role">{msg.role === 'user' ? '👤' : '🤖'}</div>
+            <div className="content">
+              <p>{msg.content}</p>
+              <span className="timestamp">
+                {msg.timestamp.toLocaleTimeString()}
+              </span>
+            </div>
+          </div>
+        ))}
+      </div>
+
+      {toolCalls.length > 0 && (
+        <div className="tool-calls">
+          <h3>🛠️ Tool Calls</h3>
+          {toolCalls.map((tc, i) => (
+            <div key={i} className="tool-call">
+              <strong>{tc.name}</strong>
+              <pre>{JSON.stringify(tc.arguments, null, 2)}</pre>
+              {tc.result && (
+                <div className="result">
+                  Result: {JSON.stringify(tc.result)}
+                </div>
+              )}
+            </div>
+          ))}
+        </div>
+      )}
+
+      <div className="controls">
+        <button
+          onClick={handleInterrupt}
+          disabled={!isConnected}
+        >
+          ⏸️ Interrupt
+        </button>
+        <button
+          onClick={handleDisconnect}
+          disabled={!isConnected}
+        >
+          🔌 Disconnect
+        </button>
+      </div>
+
+      <style jsx>{`
+        .voice-assistant {
+          max-width: 600px;
+          margin: 0 auto;
+          padding: 20px;
+        }
+        .status-bar {
+          display: flex;
+          gap: 20px;
+          margin-bottom: 20px;
+        }
+        .status {
+          padding: 8px 16px;
+          border-radius: 20px;
+          font-size: 14px;
+        }
+        .status.connected {
+          background: #d4edda;
+          color: #155724;
+        }
+        .status.disconnected {
+          background: #f8d7da;
+          color: #721c24;
+        }
+        .listening {
+          padding: 8px 16px;
+          background: #fff3cd;
+          color: #856404;
+          border-radius: 20px;
+          font-size: 14px;
+        }
+        .error {
+          padding: 12px;
+          background: #f8d7da;
+          color: #721c24;
+          border-radius: 8px;
+          margin-bottom: 20px;
+        }
+        .messages {
+          height: 400px;
+          overflow-y: auto;
+          border: 1px solid #ddd;
+          border-radius: 8px;
+          padding: 16px;
+          margin-bottom: 20px;
+        }
+        .message {
+          display: flex;
+          gap: 12px;
+          margin-bottom: 16px;
+        }
+        .message.user {
+          justify-content: flex-end;
+        }
+        .content {
+          max-width: 70%;
+          padding: 12px;
+          border-radius: 12px;
+        }
+        .message.user .content {
+          background: #007bff;
+          color: white;
+        }
+        .message.assistant .content {
+          background: #f1f3f4;
+          color: #000;
+        }
+        .timestamp {
+          font-size: 11px;
+          opacity: 0.6;
+        }
+        .tool-calls {
+          margin-bottom: 20px;
+          padding: 12px;
+          background: #f8f9fa;
+          border-radius: 8px;
+        }
+        .tool-call {
+          margin: 8px 0;
+          padding: 8px;
+          background: white;
+          border-radius: 4px;
+        }
+        .controls {
+          display: flex;
+          gap: 12px;
+        }
+        button {
+          flex: 1;
+          padding: 12px;
+          border: none;
+          border-radius: 8px;
+          background: #007bff;
+          color: white;
+          cursor: pointer;
+        }
+        button:disabled {
+          background: #ccc;
+          cursor: not-allowed;
+        }
+        button:hover:not(:disabled) {
+          background: #0056b3;
+        }
+      `}</style>
+    </div>
+  );
+}
+
+export default VoiceAssistant;