/** * OpenAI Audio API - Whisper Transcription Examples * * This template demonstrates: * - Basic audio transcription * - Supported audio formats * - Both SDK and fetch approaches * - Error handling */ import OpenAI from 'openai'; import fs from 'fs'; import FormData from 'form-data'; const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, }); // ============================================================================= // BASIC TRANSCRIPTION (SDK) // ============================================================================= async function basicTranscription() { const transcription = await openai.audio.transcriptions.create({ file: fs.createReadStream('./audio.mp3'), model: 'whisper-1', }); console.log('Transcription:', transcription.text); return transcription.text; } // ============================================================================= // TRANSCRIPTION WITH FETCH // ============================================================================= async function transcriptionFetch() { const formData = new FormData(); formData.append('file', fs.createReadStream('./audio.mp3')); formData.append('model', 'whisper-1'); const response = await fetch('https://api.openai.com/v1/audio/transcriptions', { method: 'POST', headers: { 'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`, ...formData.getHeaders(), }, body: formData, }); const data: any = await response.json(); console.log('Transcription:', data.text); return data.text; } // ============================================================================= // MULTIPLE AUDIO FORMATS // ============================================================================= async function multipleFormats() { const formats = ['mp3', 'wav', 'm4a', 'webm']; for (const format of formats) { const filename = `./audio.${format}`; if (fs.existsSync(filename)) { console.log(`Transcribing ${format}...`); const transcription = await openai.audio.transcriptions.create({ file: fs.createReadStream(filename), model: 'whisper-1', }); console.log(`${format.toUpperCase()}: ${transcription.text}`); } else { console.log(`${filename} not found, skipping...`); } } } // ============================================================================= // ERROR HANDLING // ============================================================================= async function withErrorHandling(audioFilePath: string) { try { // Check if file exists if (!fs.existsSync(audioFilePath)) { throw new Error(`Audio file not found: ${audioFilePath}`); } // Check file size (Whisper has limits) const stats = fs.statSync(audioFilePath); const fileSizeMB = stats.size / (1024 * 1024); console.log(`File size: ${fileSizeMB.toFixed(2)} MB`); if (fileSizeMB > 25) { console.warn('Warning: File larger than 25MB may be rejected'); } // Transcribe const transcription = await openai.audio.transcriptions.create({ file: fs.createReadStream(audioFilePath), model: 'whisper-1', }); return transcription.text; } catch (error: any) { if (error.message.includes('file not found')) { console.error('Audio file not found'); } else if (error.message.includes('file too large')) { console.error('Audio file exceeds size limit'); } else if (error.message.includes('unsupported format')) { console.error('Audio format not supported'); } else { console.error('Transcription error:', error.message); } throw error; } } // ============================================================================= // BATCH TRANSCRIPTION // ============================================================================= async function batchTranscription(audioFiles: string[]) { const results = []; for (const filePath of audioFiles) { console.log(`Transcribing: ${filePath}`); try { const transcription = await openai.audio.transcriptions.create({ file: fs.createReadStream(filePath), model: 'whisper-1', }); results.push({ file: filePath, text: transcription.text, success: true, }); console.log(`✓ ${filePath}: ${transcription.text.substring(0, 50)}...`); } catch (error: any) { results.push({ file: filePath, error: error.message, success: false, }); console.error(`✗ ${filePath}: ${error.message}`); } // Wait 1 second between requests to avoid rate limits await new Promise(resolve => setTimeout(resolve, 1000)); } console.log(`\nCompleted: ${results.filter(r => r.success).length}/${results.length}`); return results; } // ============================================================================= // SAVE TRANSCRIPTION TO FILE // ============================================================================= async function transcribeAndSave(audioFilePath: string, outputFilePath: string) { const transcription = await openai.audio.transcriptions.create({ file: fs.createReadStream(audioFilePath), model: 'whisper-1', }); fs.writeFileSync(outputFilePath, transcription.text); console.log(`Transcription saved to: ${outputFilePath}`); console.log(`Content: ${transcription.text}`); return transcription.text; } // ============================================================================= // MAIN EXECUTION // ============================================================================= async function main() { console.log('=== OpenAI Whisper Transcription Examples ===\n'); console.log('Note: This script requires audio files to run.'); console.log('Supported formats: mp3, mp4, mpeg, mpga, m4a, wav, webm\n'); // Example 1: Basic transcription (uncomment when you have audio.mp3) // console.log('1. Basic Transcription:'); // await basicTranscription(); // console.log(); // Example 2: Transcription with fetch // console.log('2. Transcription with Fetch:'); // await transcriptionFetch(); // console.log(); // Example 3: Multiple formats // console.log('3. Multiple Formats:'); // await multipleFormats(); // console.log(); // Example 4: Save to file // console.log('4. Transcribe and Save:'); // await transcribeAndSave('./audio.mp3', './transcription.txt'); // console.log(); } // Run if executed directly if (require.main === module) { main().catch(console.error); } export { basicTranscription, transcriptionFetch, multipleFormats, withErrorHandling, batchTranscription, transcribeAndSave, };