/** * OpenAI Audio API - Text-to-Speech Examples * * This template demonstrates: * - Basic TTS with all 11 voices * - Different models (tts-1, tts-1-hd, gpt-4o-mini-tts) * - Voice instructions (gpt-4o-mini-tts only) * - Speed control * - Different audio formats * - Streaming TTS */ import OpenAI from 'openai'; import fs from 'fs'; const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, }); // ============================================================================= // BASIC TTS // ============================================================================= async function basicTTS() { const mp3 = await openai.audio.speech.create({ model: 'tts-1', voice: 'alloy', input: 'The quick brown fox jumped over the lazy dog.', }); const buffer = Buffer.from(await mp3.arrayBuffer()); fs.writeFileSync('speech.mp3', buffer); console.log('Speech saved to: speech.mp3'); } // ============================================================================= // ALL 11 VOICES // ============================================================================= async function allVoices() { const voices = [ 'alloy', // Neutral, balanced 'ash', // Clear, professional 'ballad', // Warm, storytelling 'coral', // Soft, friendly 'echo', // Calm, measured 'fable', // Expressive, narrative 'onyx', // Deep, authoritative 'nova', // Bright, energetic 'sage', // Wise, thoughtful 'shimmer', // Gentle, soothing 'verse', // Poetic, rhythmic ] as const; const text = 'Hello, this is a voice sample.'; for (const voice of voices) { console.log(`Generating ${voice} voice...`); const mp3 = await openai.audio.speech.create({ model: 'tts-1', voice, input: text, }); const buffer = Buffer.from(await mp3.arrayBuffer()); fs.writeFileSync(`speech-${voice}.mp3`, buffer); // Wait 500ms between requests await new Promise(resolve => setTimeout(resolve, 500)); } console.log('All voice samples generated!'); } // ============================================================================= // MODEL COMPARISON // ============================================================================= async function modelComparison() { const text = 'This is a test of different TTS models.'; // tts-1 (standard quality, fastest) console.log('Generating with tts-1...'); const tts1 = await openai.audio.speech.create({ model: 'tts-1', voice: 'nova', input: text, }); const buffer1 = Buffer.from(await tts1.arrayBuffer()); fs.writeFileSync('tts-1-output.mp3', buffer1); // tts-1-hd (high quality) console.log('Generating with tts-1-hd...'); const tts1Hd = await openai.audio.speech.create({ model: 'tts-1-hd', voice: 'nova', input: text, }); const buffer2 = Buffer.from(await tts1Hd.arrayBuffer()); fs.writeFileSync('tts-1-hd-output.mp3', buffer2); console.log('Model comparison complete!'); console.log('tts-1 file size:', buffer1.length, 'bytes'); console.log('tts-1-hd file size:', buffer2.length, 'bytes'); } // ============================================================================= // VOICE INSTRUCTIONS (gpt-4o-mini-tts) // ============================================================================= async function voiceInstructions() { // Example 1: Calm and professional const professional = await openai.audio.speech.create({ model: 'gpt-4o-mini-tts', voice: 'nova', input: 'Welcome to our customer support line. How can I help you today?', instructions: 'Speak in a calm, professional, and friendly tone suitable for customer service.', }); const buffer1 = Buffer.from(await professional.arrayBuffer()); fs.writeFileSync('professional-tone.mp3', buffer1); // Example 2: Energetic and enthusiastic const energetic = await openai.audio.speech.create({ model: 'gpt-4o-mini-tts', voice: 'nova', input: 'Get ready for the biggest sale of the year! Don\'t miss out!', instructions: 'Use an enthusiastic, energetic tone perfect for marketing and advertisements.', }); const buffer2 = Buffer.from(await energetic.arrayBuffer()); fs.writeFileSync('energetic-tone.mp3', buffer2); // Example 3: Calm and soothing const soothing = await openai.audio.speech.create({ model: 'gpt-4o-mini-tts', voice: 'shimmer', input: 'Take a deep breath. Relax your shoulders. Let all tension fade away.', instructions: 'Adopt a calm, soothing voice suitable for meditation and relaxation guidance.', }); const buffer3 = Buffer.from(await soothing.arrayBuffer()); fs.writeFileSync('soothing-tone.mp3', buffer3); console.log('Voice instruction examples generated!'); } // ============================================================================= // SPEED CONTROL // ============================================================================= async function speedControl() { const text = 'This sentence will be spoken at different speeds.'; const speeds = [0.5, 0.75, 1.0, 1.25, 1.5, 2.0]; for (const speed of speeds) { console.log(`Generating at ${speed}x speed...`); const mp3 = await openai.audio.speech.create({ model: 'tts-1', voice: 'alloy', input: text, speed, }); const buffer = Buffer.from(await mp3.arrayBuffer()); fs.writeFileSync(`speech-${speed}x.mp3`, buffer); await new Promise(resolve => setTimeout(resolve, 500)); } console.log('Speed variations generated!'); } // ============================================================================= // DIFFERENT AUDIO FORMATS // ============================================================================= async function differentFormats() { const text = 'Testing different audio formats.'; const formats = ['mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'] as const; for (const format of formats) { console.log(`Generating ${format} format...`); const audio = await openai.audio.speech.create({ model: 'tts-1', voice: 'alloy', input: text, response_format: format, }); const buffer = Buffer.from(await audio.arrayBuffer()); const extension = format === 'pcm' ? 'raw' : format; fs.writeFileSync(`speech.${extension}`, buffer); console.log(` ${format}: ${buffer.length} bytes`); await new Promise(resolve => setTimeout(resolve, 500)); } console.log('All format examples generated!'); } // ============================================================================= // LONG TEXT HANDLING // ============================================================================= async function longText() { const longText = ` This is a longer piece of text that demonstrates how TTS handles extended content. The model can process up to 4096 characters in a single request. You can use this for narrating articles, generating audiobooks, or creating voice-overs. The speech will maintain natural pacing and intonation throughout. `.trim(); const mp3 = await openai.audio.speech.create({ model: 'tts-1-hd', voice: 'fable', // Good for narration input: longText, }); const buffer = Buffer.from(await mp3.arrayBuffer()); fs.writeFileSync('long-narration.mp3', buffer); console.log('Long narration generated!'); console.log('Text length:', longText.length, 'characters'); console.log('Audio size:', buffer.length, 'bytes'); } // ============================================================================= // STREAMING TTS (Server-Sent Events) // ============================================================================= async function streamingTTS() { const response = await fetch('https://api.openai.com/v1/audio/speech', { method: 'POST', headers: { 'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ model: 'gpt-4o-mini-tts', voice: 'nova', input: 'This is a streaming audio example. The audio will be generated and delivered in chunks.', stream_format: 'sse', // Server-Sent Events }), }); console.log('Streaming TTS...'); const reader = response.body?.getReader(); const chunks: Uint8Array[] = []; while (true) { const { done, value } = await reader!.read(); if (done) break; chunks.push(value); console.log('Received chunk:', value.length, 'bytes'); } // Combine chunks const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0); const combined = new Uint8Array(totalLength); let offset = 0; for (const chunk of chunks) { combined.set(chunk, offset); offset += chunk.length; } fs.writeFileSync('streaming-output.mp3', Buffer.from(combined)); console.log('Streaming TTS saved to: streaming-output.mp3'); } // ============================================================================= // ERROR HANDLING // ============================================================================= async function withErrorHandling() { try { const mp3 = await openai.audio.speech.create({ model: 'tts-1', voice: 'alloy', input: 'Hello world', }); const buffer = Buffer.from(await mp3.arrayBuffer()); fs.writeFileSync('output.mp3', buffer); return 'output.mp3'; } catch (error: any) { if (error.message.includes('input too long')) { console.error('Text exceeds 4096 character limit'); } else if (error.message.includes('invalid voice')) { console.error('Voice not recognized - use one of the 11 supported voices'); } else if (error.status === 429) { console.error('Rate limit exceeded - wait and retry'); } else { console.error('TTS error:', error.message); } throw error; } } // ============================================================================= // MAIN EXECUTION // ============================================================================= async function main() { console.log('=== OpenAI Text-to-Speech Examples ===\n'); // Example 1: Basic TTS console.log('1. Basic TTS:'); await basicTTS(); console.log(); // Example 2: All voices (uncomment to generate all) // console.log('2. All 11 Voices:'); // await allVoices(); // console.log(); // Example 3: Model comparison console.log('3. Model Comparison:'); await modelComparison(); console.log(); // Example 4: Voice instructions console.log('4. Voice Instructions (gpt-4o-mini-tts):'); await voiceInstructions(); console.log(); // Example 5: Speed control console.log('5. Speed Control:'); await speedControl(); console.log(); // Example 6: Different formats console.log('6. Different Audio Formats:'); await differentFormats(); console.log(); // Example 7: Long text console.log('7. Long Text Narration:'); await longText(); console.log(); } // Run if executed directly if (require.main === module) { main().catch(console.error); } export { basicTTS, allVoices, modelComparison, voiceInstructions, speedControl, differentFormats, longText, streamingTTS, withErrorHandling, };