Initial commit

2025-11-30 08:25:12 +08:00
commit 7a35a34caa
30 changed files with 8396 additions and 0 deletions
--- a/templates/text-to-speech.ts
+++ b/templates/text-to-speech.ts
@@ -0,0 +1,372 @@
+/**
+ * OpenAI Audio API - Text-to-Speech Examples
+ *
+ * This template demonstrates:
+ * - Basic TTS with all 11 voices
+ * - Different models (tts-1, tts-1-hd, gpt-4o-mini-tts)
+ * - Voice instructions (gpt-4o-mini-tts only)
+ * - Speed control
+ * - Different audio formats
+ * - Streaming TTS
+ */
+
+import OpenAI from 'openai';
+import fs from 'fs';
+
+const openai = new OpenAI({
+  apiKey: process.env.OPENAI_API_KEY,
+});
+
+// =============================================================================
+// BASIC TTS
+// =============================================================================
+
+async function basicTTS() {
+  const mp3 = await openai.audio.speech.create({
+    model: 'tts-1',
+    voice: 'alloy',
+    input: 'The quick brown fox jumped over the lazy dog.',
+  });
+
+  const buffer = Buffer.from(await mp3.arrayBuffer());
+  fs.writeFileSync('speech.mp3', buffer);
+
+  console.log('Speech saved to: speech.mp3');
+}
+
+// =============================================================================
+// ALL 11 VOICES
+// =============================================================================
+
+async function allVoices() {
+  const voices = [
+    'alloy',   // Neutral, balanced
+    'ash',     // Clear, professional
+    'ballad',  // Warm, storytelling
+    'coral',   // Soft, friendly
+    'echo',    // Calm, measured
+    'fable',   // Expressive, narrative
+    'onyx',    // Deep, authoritative
+    'nova',    // Bright, energetic
+    'sage',    // Wise, thoughtful
+    'shimmer', // Gentle, soothing
+    'verse',   // Poetic, rhythmic
+  ] as const;
+
+  const text = 'Hello, this is a voice sample.';
+
+  for (const voice of voices) {
+    console.log(`Generating ${voice} voice...`);
+
+    const mp3 = await openai.audio.speech.create({
+      model: 'tts-1',
+      voice,
+      input: text,
+    });
+
+    const buffer = Buffer.from(await mp3.arrayBuffer());
+    fs.writeFileSync(`speech-${voice}.mp3`, buffer);
+
+    // Wait 500ms between requests
+    await new Promise(resolve => setTimeout(resolve, 500));
+  }
+
+  console.log('All voice samples generated!');
+}
+
+// =============================================================================
+// MODEL COMPARISON
+// =============================================================================
+
+async function modelComparison() {
+  const text = 'This is a test of different TTS models.';
+
+  // tts-1 (standard quality, fastest)
+  console.log('Generating with tts-1...');
+  const tts1 = await openai.audio.speech.create({
+    model: 'tts-1',
+    voice: 'nova',
+    input: text,
+  });
+
+  const buffer1 = Buffer.from(await tts1.arrayBuffer());
+  fs.writeFileSync('tts-1-output.mp3', buffer1);
+
+  // tts-1-hd (high quality)
+  console.log('Generating with tts-1-hd...');
+  const tts1Hd = await openai.audio.speech.create({
+    model: 'tts-1-hd',
+    voice: 'nova',
+    input: text,
+  });
+
+  const buffer2 = Buffer.from(await tts1Hd.arrayBuffer());
+  fs.writeFileSync('tts-1-hd-output.mp3', buffer2);
+
+  console.log('Model comparison complete!');
+  console.log('tts-1 file size:', buffer1.length, 'bytes');
+  console.log('tts-1-hd file size:', buffer2.length, 'bytes');
+}
+
+// =============================================================================
+// VOICE INSTRUCTIONS (gpt-4o-mini-tts)
+// =============================================================================
+
+async function voiceInstructions() {
+  // Example 1: Calm and professional
+  const professional = await openai.audio.speech.create({
+    model: 'gpt-4o-mini-tts',
+    voice: 'nova',
+    input: 'Welcome to our customer support line. How can I help you today?',
+    instructions: 'Speak in a calm, professional, and friendly tone suitable for customer service.',
+  });
+
+  const buffer1 = Buffer.from(await professional.arrayBuffer());
+  fs.writeFileSync('professional-tone.mp3', buffer1);
+
+  // Example 2: Energetic and enthusiastic
+  const energetic = await openai.audio.speech.create({
+    model: 'gpt-4o-mini-tts',
+    voice: 'nova',
+    input: 'Get ready for the biggest sale of the year! Don\'t miss out!',
+    instructions: 'Use an enthusiastic, energetic tone perfect for marketing and advertisements.',
+  });
+
+  const buffer2 = Buffer.from(await energetic.arrayBuffer());
+  fs.writeFileSync('energetic-tone.mp3', buffer2);
+
+  // Example 3: Calm and soothing
+  const soothing = await openai.audio.speech.create({
+    model: 'gpt-4o-mini-tts',
+    voice: 'shimmer',
+    input: 'Take a deep breath. Relax your shoulders. Let all tension fade away.',
+    instructions: 'Adopt a calm, soothing voice suitable for meditation and relaxation guidance.',
+  });
+
+  const buffer3 = Buffer.from(await soothing.arrayBuffer());
+  fs.writeFileSync('soothing-tone.mp3', buffer3);
+
+  console.log('Voice instruction examples generated!');
+}
+
+// =============================================================================
+// SPEED CONTROL
+// =============================================================================
+
+async function speedControl() {
+  const text = 'This sentence will be spoken at different speeds.';
+
+  const speeds = [0.5, 0.75, 1.0, 1.25, 1.5, 2.0];
+
+  for (const speed of speeds) {
+    console.log(`Generating at ${speed}x speed...`);
+
+    const mp3 = await openai.audio.speech.create({
+      model: 'tts-1',
+      voice: 'alloy',
+      input: text,
+      speed,
+    });
+
+    const buffer = Buffer.from(await mp3.arrayBuffer());
+    fs.writeFileSync(`speech-${speed}x.mp3`, buffer);
+
+    await new Promise(resolve => setTimeout(resolve, 500));
+  }
+
+  console.log('Speed variations generated!');
+}
+
+// =============================================================================
+// DIFFERENT AUDIO FORMATS
+// =============================================================================
+
+async function differentFormats() {
+  const text = 'Testing different audio formats.';
+
+  const formats = ['mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'] as const;
+
+  for (const format of formats) {
+    console.log(`Generating ${format} format...`);
+
+    const audio = await openai.audio.speech.create({
+      model: 'tts-1',
+      voice: 'alloy',
+      input: text,
+      response_format: format,
+    });
+
+    const buffer = Buffer.from(await audio.arrayBuffer());
+    const extension = format === 'pcm' ? 'raw' : format;
+    fs.writeFileSync(`speech.${extension}`, buffer);
+
+    console.log(`  ${format}: ${buffer.length} bytes`);
+
+    await new Promise(resolve => setTimeout(resolve, 500));
+  }
+
+  console.log('All format examples generated!');
+}
+
+// =============================================================================
+// LONG TEXT HANDLING
+// =============================================================================
+
+async function longText() {
+  const longText = `
+    This is a longer piece of text that demonstrates how TTS handles extended content.
+    The model can process up to 4096 characters in a single request.
+    You can use this for narrating articles, generating audiobooks, or creating voice-overs.
+    The speech will maintain natural pacing and intonation throughout.
+  `.trim();
+
+  const mp3 = await openai.audio.speech.create({
+    model: 'tts-1-hd',
+    voice: 'fable', // Good for narration
+    input: longText,
+  });
+
+  const buffer = Buffer.from(await mp3.arrayBuffer());
+  fs.writeFileSync('long-narration.mp3', buffer);
+
+  console.log('Long narration generated!');
+  console.log('Text length:', longText.length, 'characters');
+  console.log('Audio size:', buffer.length, 'bytes');
+}
+
+// =============================================================================
+// STREAMING TTS (Server-Sent Events)
+// =============================================================================
+
+async function streamingTTS() {
+  const response = await fetch('https://api.openai.com/v1/audio/speech', {
+    method: 'POST',
+    headers: {
+      'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      model: 'gpt-4o-mini-tts',
+      voice: 'nova',
+      input: 'This is a streaming audio example. The audio will be generated and delivered in chunks.',
+      stream_format: 'sse', // Server-Sent Events
+    }),
+  });
+
+  console.log('Streaming TTS...');
+
+  const reader = response.body?.getReader();
+  const chunks: Uint8Array[] = [];
+
+  while (true) {
+    const { done, value } = await reader!.read();
+    if (done) break;
+
+    chunks.push(value);
+    console.log('Received chunk:', value.length, 'bytes');
+  }
+
+  // Combine chunks
+  const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
+  const combined = new Uint8Array(totalLength);
+  let offset = 0;
+
+  for (const chunk of chunks) {
+    combined.set(chunk, offset);
+    offset += chunk.length;
+  }
+
+  fs.writeFileSync('streaming-output.mp3', Buffer.from(combined));
+  console.log('Streaming TTS saved to: streaming-output.mp3');
+}
+
+// =============================================================================
+// ERROR HANDLING
+// =============================================================================
+
+async function withErrorHandling() {
+  try {
+    const mp3 = await openai.audio.speech.create({
+      model: 'tts-1',
+      voice: 'alloy',
+      input: 'Hello world',
+    });
+
+    const buffer = Buffer.from(await mp3.arrayBuffer());
+    fs.writeFileSync('output.mp3', buffer);
+
+    return 'output.mp3';
+  } catch (error: any) {
+    if (error.message.includes('input too long')) {
+      console.error('Text exceeds 4096 character limit');
+    } else if (error.message.includes('invalid voice')) {
+      console.error('Voice not recognized - use one of the 11 supported voices');
+    } else if (error.status === 429) {
+      console.error('Rate limit exceeded - wait and retry');
+    } else {
+      console.error('TTS error:', error.message);
+    }
+
+    throw error;
+  }
+}
+
+// =============================================================================
+// MAIN EXECUTION
+// =============================================================================
+
+async function main() {
+  console.log('=== OpenAI Text-to-Speech Examples ===\n');
+
+  // Example 1: Basic TTS
+  console.log('1. Basic TTS:');
+  await basicTTS();
+  console.log();
+
+  // Example 2: All voices (uncomment to generate all)
+  // console.log('2. All 11 Voices:');
+  // await allVoices();
+  // console.log();
+
+  // Example 3: Model comparison
+  console.log('3. Model Comparison:');
+  await modelComparison();
+  console.log();
+
+  // Example 4: Voice instructions
+  console.log('4. Voice Instructions (gpt-4o-mini-tts):');
+  await voiceInstructions();
+  console.log();
+
+  // Example 5: Speed control
+  console.log('5. Speed Control:');
+  await speedControl();
+  console.log();
+
+  // Example 6: Different formats
+  console.log('6. Different Audio Formats:');
+  await differentFormats();
+  console.log();
+
+  // Example 7: Long text
+  console.log('7. Long Text Narration:');
+  await longText();
+  console.log();
+}
+
+// Run if executed directly
+if (require.main === module) {
+  main().catch(console.error);
+}
+
+export {
+  basicTTS,
+  allVoices,
+  modelComparison,
+  voiceInstructions,
+  speedControl,
+  differentFormats,
+  longText,
+  streamingTTS,
+  withErrorHandling,
+};