Initial commit
This commit is contained in:
372
templates/text-to-speech.ts
Normal file
372
templates/text-to-speech.ts
Normal file
@@ -0,0 +1,372 @@
|
||||
/**
|
||||
* OpenAI Audio API - Text-to-Speech Examples
|
||||
*
|
||||
* This template demonstrates:
|
||||
* - Basic TTS with all 11 voices
|
||||
* - Different models (tts-1, tts-1-hd, gpt-4o-mini-tts)
|
||||
* - Voice instructions (gpt-4o-mini-tts only)
|
||||
* - Speed control
|
||||
* - Different audio formats
|
||||
* - Streaming TTS
|
||||
*/
|
||||
|
||||
import OpenAI from 'openai';
|
||||
import fs from 'fs';
|
||||
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
// =============================================================================
|
||||
// BASIC TTS
|
||||
// =============================================================================
|
||||
|
||||
async function basicTTS() {
|
||||
const mp3 = await openai.audio.speech.create({
|
||||
model: 'tts-1',
|
||||
voice: 'alloy',
|
||||
input: 'The quick brown fox jumped over the lazy dog.',
|
||||
});
|
||||
|
||||
const buffer = Buffer.from(await mp3.arrayBuffer());
|
||||
fs.writeFileSync('speech.mp3', buffer);
|
||||
|
||||
console.log('Speech saved to: speech.mp3');
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// ALL 11 VOICES
|
||||
// =============================================================================
|
||||
|
||||
async function allVoices() {
|
||||
const voices = [
|
||||
'alloy', // Neutral, balanced
|
||||
'ash', // Clear, professional
|
||||
'ballad', // Warm, storytelling
|
||||
'coral', // Soft, friendly
|
||||
'echo', // Calm, measured
|
||||
'fable', // Expressive, narrative
|
||||
'onyx', // Deep, authoritative
|
||||
'nova', // Bright, energetic
|
||||
'sage', // Wise, thoughtful
|
||||
'shimmer', // Gentle, soothing
|
||||
'verse', // Poetic, rhythmic
|
||||
] as const;
|
||||
|
||||
const text = 'Hello, this is a voice sample.';
|
||||
|
||||
for (const voice of voices) {
|
||||
console.log(`Generating ${voice} voice...`);
|
||||
|
||||
const mp3 = await openai.audio.speech.create({
|
||||
model: 'tts-1',
|
||||
voice,
|
||||
input: text,
|
||||
});
|
||||
|
||||
const buffer = Buffer.from(await mp3.arrayBuffer());
|
||||
fs.writeFileSync(`speech-${voice}.mp3`, buffer);
|
||||
|
||||
// Wait 500ms between requests
|
||||
await new Promise(resolve => setTimeout(resolve, 500));
|
||||
}
|
||||
|
||||
console.log('All voice samples generated!');
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// MODEL COMPARISON
|
||||
// =============================================================================
|
||||
|
||||
async function modelComparison() {
|
||||
const text = 'This is a test of different TTS models.';
|
||||
|
||||
// tts-1 (standard quality, fastest)
|
||||
console.log('Generating with tts-1...');
|
||||
const tts1 = await openai.audio.speech.create({
|
||||
model: 'tts-1',
|
||||
voice: 'nova',
|
||||
input: text,
|
||||
});
|
||||
|
||||
const buffer1 = Buffer.from(await tts1.arrayBuffer());
|
||||
fs.writeFileSync('tts-1-output.mp3', buffer1);
|
||||
|
||||
// tts-1-hd (high quality)
|
||||
console.log('Generating with tts-1-hd...');
|
||||
const tts1Hd = await openai.audio.speech.create({
|
||||
model: 'tts-1-hd',
|
||||
voice: 'nova',
|
||||
input: text,
|
||||
});
|
||||
|
||||
const buffer2 = Buffer.from(await tts1Hd.arrayBuffer());
|
||||
fs.writeFileSync('tts-1-hd-output.mp3', buffer2);
|
||||
|
||||
console.log('Model comparison complete!');
|
||||
console.log('tts-1 file size:', buffer1.length, 'bytes');
|
||||
console.log('tts-1-hd file size:', buffer2.length, 'bytes');
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// VOICE INSTRUCTIONS (gpt-4o-mini-tts)
|
||||
// =============================================================================
|
||||
|
||||
async function voiceInstructions() {
|
||||
// Example 1: Calm and professional
|
||||
const professional = await openai.audio.speech.create({
|
||||
model: 'gpt-4o-mini-tts',
|
||||
voice: 'nova',
|
||||
input: 'Welcome to our customer support line. How can I help you today?',
|
||||
instructions: 'Speak in a calm, professional, and friendly tone suitable for customer service.',
|
||||
});
|
||||
|
||||
const buffer1 = Buffer.from(await professional.arrayBuffer());
|
||||
fs.writeFileSync('professional-tone.mp3', buffer1);
|
||||
|
||||
// Example 2: Energetic and enthusiastic
|
||||
const energetic = await openai.audio.speech.create({
|
||||
model: 'gpt-4o-mini-tts',
|
||||
voice: 'nova',
|
||||
input: 'Get ready for the biggest sale of the year! Don\'t miss out!',
|
||||
instructions: 'Use an enthusiastic, energetic tone perfect for marketing and advertisements.',
|
||||
});
|
||||
|
||||
const buffer2 = Buffer.from(await energetic.arrayBuffer());
|
||||
fs.writeFileSync('energetic-tone.mp3', buffer2);
|
||||
|
||||
// Example 3: Calm and soothing
|
||||
const soothing = await openai.audio.speech.create({
|
||||
model: 'gpt-4o-mini-tts',
|
||||
voice: 'shimmer',
|
||||
input: 'Take a deep breath. Relax your shoulders. Let all tension fade away.',
|
||||
instructions: 'Adopt a calm, soothing voice suitable for meditation and relaxation guidance.',
|
||||
});
|
||||
|
||||
const buffer3 = Buffer.from(await soothing.arrayBuffer());
|
||||
fs.writeFileSync('soothing-tone.mp3', buffer3);
|
||||
|
||||
console.log('Voice instruction examples generated!');
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// SPEED CONTROL
|
||||
// =============================================================================
|
||||
|
||||
async function speedControl() {
|
||||
const text = 'This sentence will be spoken at different speeds.';
|
||||
|
||||
const speeds = [0.5, 0.75, 1.0, 1.25, 1.5, 2.0];
|
||||
|
||||
for (const speed of speeds) {
|
||||
console.log(`Generating at ${speed}x speed...`);
|
||||
|
||||
const mp3 = await openai.audio.speech.create({
|
||||
model: 'tts-1',
|
||||
voice: 'alloy',
|
||||
input: text,
|
||||
speed,
|
||||
});
|
||||
|
||||
const buffer = Buffer.from(await mp3.arrayBuffer());
|
||||
fs.writeFileSync(`speech-${speed}x.mp3`, buffer);
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 500));
|
||||
}
|
||||
|
||||
console.log('Speed variations generated!');
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// DIFFERENT AUDIO FORMATS
|
||||
// =============================================================================
|
||||
|
||||
async function differentFormats() {
|
||||
const text = 'Testing different audio formats.';
|
||||
|
||||
const formats = ['mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'] as const;
|
||||
|
||||
for (const format of formats) {
|
||||
console.log(`Generating ${format} format...`);
|
||||
|
||||
const audio = await openai.audio.speech.create({
|
||||
model: 'tts-1',
|
||||
voice: 'alloy',
|
||||
input: text,
|
||||
response_format: format,
|
||||
});
|
||||
|
||||
const buffer = Buffer.from(await audio.arrayBuffer());
|
||||
const extension = format === 'pcm' ? 'raw' : format;
|
||||
fs.writeFileSync(`speech.${extension}`, buffer);
|
||||
|
||||
console.log(` ${format}: ${buffer.length} bytes`);
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 500));
|
||||
}
|
||||
|
||||
console.log('All format examples generated!');
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// LONG TEXT HANDLING
|
||||
// =============================================================================
|
||||
|
||||
async function longText() {
|
||||
const longText = `
|
||||
This is a longer piece of text that demonstrates how TTS handles extended content.
|
||||
The model can process up to 4096 characters in a single request.
|
||||
You can use this for narrating articles, generating audiobooks, or creating voice-overs.
|
||||
The speech will maintain natural pacing and intonation throughout.
|
||||
`.trim();
|
||||
|
||||
const mp3 = await openai.audio.speech.create({
|
||||
model: 'tts-1-hd',
|
||||
voice: 'fable', // Good for narration
|
||||
input: longText,
|
||||
});
|
||||
|
||||
const buffer = Buffer.from(await mp3.arrayBuffer());
|
||||
fs.writeFileSync('long-narration.mp3', buffer);
|
||||
|
||||
console.log('Long narration generated!');
|
||||
console.log('Text length:', longText.length, 'characters');
|
||||
console.log('Audio size:', buffer.length, 'bytes');
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// STREAMING TTS (Server-Sent Events)
|
||||
// =============================================================================
|
||||
|
||||
async function streamingTTS() {
|
||||
const response = await fetch('https://api.openai.com/v1/audio/speech', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'gpt-4o-mini-tts',
|
||||
voice: 'nova',
|
||||
input: 'This is a streaming audio example. The audio will be generated and delivered in chunks.',
|
||||
stream_format: 'sse', // Server-Sent Events
|
||||
}),
|
||||
});
|
||||
|
||||
console.log('Streaming TTS...');
|
||||
|
||||
const reader = response.body?.getReader();
|
||||
const chunks: Uint8Array[] = [];
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader!.read();
|
||||
if (done) break;
|
||||
|
||||
chunks.push(value);
|
||||
console.log('Received chunk:', value.length, 'bytes');
|
||||
}
|
||||
|
||||
// Combine chunks
|
||||
const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
|
||||
const combined = new Uint8Array(totalLength);
|
||||
let offset = 0;
|
||||
|
||||
for (const chunk of chunks) {
|
||||
combined.set(chunk, offset);
|
||||
offset += chunk.length;
|
||||
}
|
||||
|
||||
fs.writeFileSync('streaming-output.mp3', Buffer.from(combined));
|
||||
console.log('Streaming TTS saved to: streaming-output.mp3');
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// ERROR HANDLING
|
||||
// =============================================================================
|
||||
|
||||
async function withErrorHandling() {
|
||||
try {
|
||||
const mp3 = await openai.audio.speech.create({
|
||||
model: 'tts-1',
|
||||
voice: 'alloy',
|
||||
input: 'Hello world',
|
||||
});
|
||||
|
||||
const buffer = Buffer.from(await mp3.arrayBuffer());
|
||||
fs.writeFileSync('output.mp3', buffer);
|
||||
|
||||
return 'output.mp3';
|
||||
} catch (error: any) {
|
||||
if (error.message.includes('input too long')) {
|
||||
console.error('Text exceeds 4096 character limit');
|
||||
} else if (error.message.includes('invalid voice')) {
|
||||
console.error('Voice not recognized - use one of the 11 supported voices');
|
||||
} else if (error.status === 429) {
|
||||
console.error('Rate limit exceeded - wait and retry');
|
||||
} else {
|
||||
console.error('TTS error:', error.message);
|
||||
}
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// MAIN EXECUTION
|
||||
// =============================================================================
|
||||
|
||||
async function main() {
|
||||
console.log('=== OpenAI Text-to-Speech Examples ===\n');
|
||||
|
||||
// Example 1: Basic TTS
|
||||
console.log('1. Basic TTS:');
|
||||
await basicTTS();
|
||||
console.log();
|
||||
|
||||
// Example 2: All voices (uncomment to generate all)
|
||||
// console.log('2. All 11 Voices:');
|
||||
// await allVoices();
|
||||
// console.log();
|
||||
|
||||
// Example 3: Model comparison
|
||||
console.log('3. Model Comparison:');
|
||||
await modelComparison();
|
||||
console.log();
|
||||
|
||||
// Example 4: Voice instructions
|
||||
console.log('4. Voice Instructions (gpt-4o-mini-tts):');
|
||||
await voiceInstructions();
|
||||
console.log();
|
||||
|
||||
// Example 5: Speed control
|
||||
console.log('5. Speed Control:');
|
||||
await speedControl();
|
||||
console.log();
|
||||
|
||||
// Example 6: Different formats
|
||||
console.log('6. Different Audio Formats:');
|
||||
await differentFormats();
|
||||
console.log();
|
||||
|
||||
// Example 7: Long text
|
||||
console.log('7. Long Text Narration:');
|
||||
await longText();
|
||||
console.log();
|
||||
}
|
||||
|
||||
// Run if executed directly
|
||||
if (require.main === module) {
|
||||
main().catch(console.error);
|
||||
}
|
||||
|
||||
export {
|
||||
basicTTS,
|
||||
allVoices,
|
||||
modelComparison,
|
||||
voiceInstructions,
|
||||
speedControl,
|
||||
differentFormats,
|
||||
longText,
|
||||
streamingTTS,
|
||||
withErrorHandling,
|
||||
};
|
||||
Reference in New Issue
Block a user