230 lines
6.5 KiB
TypeScript
230 lines
6.5 KiB
TypeScript
/**
|
|
* OpenAI Audio API - Whisper Transcription Examples
|
|
*
|
|
* This template demonstrates:
|
|
* - Basic audio transcription
|
|
* - Supported audio formats
|
|
* - Both SDK and fetch approaches
|
|
* - Error handling
|
|
*/
|
|
|
|
import OpenAI from 'openai';
|
|
import fs from 'fs';
|
|
import FormData from 'form-data';
|
|
|
|
const openai = new OpenAI({
|
|
apiKey: process.env.OPENAI_API_KEY,
|
|
});
|
|
|
|
// =============================================================================
|
|
// BASIC TRANSCRIPTION (SDK)
|
|
// =============================================================================
|
|
|
|
async function basicTranscription() {
|
|
const transcription = await openai.audio.transcriptions.create({
|
|
file: fs.createReadStream('./audio.mp3'),
|
|
model: 'whisper-1',
|
|
});
|
|
|
|
console.log('Transcription:', transcription.text);
|
|
|
|
return transcription.text;
|
|
}
|
|
|
|
// =============================================================================
|
|
// TRANSCRIPTION WITH FETCH
|
|
// =============================================================================
|
|
|
|
async function transcriptionFetch() {
|
|
const formData = new FormData();
|
|
formData.append('file', fs.createReadStream('./audio.mp3'));
|
|
formData.append('model', 'whisper-1');
|
|
|
|
const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
|
|
method: 'POST',
|
|
headers: {
|
|
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`,
|
|
...formData.getHeaders(),
|
|
},
|
|
body: formData,
|
|
});
|
|
|
|
const data: any = await response.json();
|
|
console.log('Transcription:', data.text);
|
|
|
|
return data.text;
|
|
}
|
|
|
|
// =============================================================================
|
|
// MULTIPLE AUDIO FORMATS
|
|
// =============================================================================
|
|
|
|
async function multipleFormats() {
|
|
const formats = ['mp3', 'wav', 'm4a', 'webm'];
|
|
|
|
for (const format of formats) {
|
|
const filename = `./audio.${format}`;
|
|
|
|
if (fs.existsSync(filename)) {
|
|
console.log(`Transcribing ${format}...`);
|
|
|
|
const transcription = await openai.audio.transcriptions.create({
|
|
file: fs.createReadStream(filename),
|
|
model: 'whisper-1',
|
|
});
|
|
|
|
console.log(`${format.toUpperCase()}: ${transcription.text}`);
|
|
} else {
|
|
console.log(`${filename} not found, skipping...`);
|
|
}
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// ERROR HANDLING
|
|
// =============================================================================
|
|
|
|
async function withErrorHandling(audioFilePath: string) {
|
|
try {
|
|
// Check if file exists
|
|
if (!fs.existsSync(audioFilePath)) {
|
|
throw new Error(`Audio file not found: ${audioFilePath}`);
|
|
}
|
|
|
|
// Check file size (Whisper has limits)
|
|
const stats = fs.statSync(audioFilePath);
|
|
const fileSizeMB = stats.size / (1024 * 1024);
|
|
|
|
console.log(`File size: ${fileSizeMB.toFixed(2)} MB`);
|
|
|
|
if (fileSizeMB > 25) {
|
|
console.warn('Warning: File larger than 25MB may be rejected');
|
|
}
|
|
|
|
// Transcribe
|
|
const transcription = await openai.audio.transcriptions.create({
|
|
file: fs.createReadStream(audioFilePath),
|
|
model: 'whisper-1',
|
|
});
|
|
|
|
return transcription.text;
|
|
} catch (error: any) {
|
|
if (error.message.includes('file not found')) {
|
|
console.error('Audio file not found');
|
|
} else if (error.message.includes('file too large')) {
|
|
console.error('Audio file exceeds size limit');
|
|
} else if (error.message.includes('unsupported format')) {
|
|
console.error('Audio format not supported');
|
|
} else {
|
|
console.error('Transcription error:', error.message);
|
|
}
|
|
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// BATCH TRANSCRIPTION
|
|
// =============================================================================
|
|
|
|
async function batchTranscription(audioFiles: string[]) {
|
|
const results = [];
|
|
|
|
for (const filePath of audioFiles) {
|
|
console.log(`Transcribing: ${filePath}`);
|
|
|
|
try {
|
|
const transcription = await openai.audio.transcriptions.create({
|
|
file: fs.createReadStream(filePath),
|
|
model: 'whisper-1',
|
|
});
|
|
|
|
results.push({
|
|
file: filePath,
|
|
text: transcription.text,
|
|
success: true,
|
|
});
|
|
|
|
console.log(`✓ ${filePath}: ${transcription.text.substring(0, 50)}...`);
|
|
} catch (error: any) {
|
|
results.push({
|
|
file: filePath,
|
|
error: error.message,
|
|
success: false,
|
|
});
|
|
|
|
console.error(`✗ ${filePath}: ${error.message}`);
|
|
}
|
|
|
|
// Wait 1 second between requests to avoid rate limits
|
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
}
|
|
|
|
console.log(`\nCompleted: ${results.filter(r => r.success).length}/${results.length}`);
|
|
|
|
return results;
|
|
}
|
|
|
|
// =============================================================================
|
|
// SAVE TRANSCRIPTION TO FILE
|
|
// =============================================================================
|
|
|
|
async function transcribeAndSave(audioFilePath: string, outputFilePath: string) {
|
|
const transcription = await openai.audio.transcriptions.create({
|
|
file: fs.createReadStream(audioFilePath),
|
|
model: 'whisper-1',
|
|
});
|
|
|
|
fs.writeFileSync(outputFilePath, transcription.text);
|
|
|
|
console.log(`Transcription saved to: ${outputFilePath}`);
|
|
console.log(`Content: ${transcription.text}`);
|
|
|
|
return transcription.text;
|
|
}
|
|
|
|
// =============================================================================
|
|
// MAIN EXECUTION
|
|
// =============================================================================
|
|
|
|
async function main() {
|
|
console.log('=== OpenAI Whisper Transcription Examples ===\n');
|
|
|
|
console.log('Note: This script requires audio files to run.');
|
|
console.log('Supported formats: mp3, mp4, mpeg, mpga, m4a, wav, webm\n');
|
|
|
|
// Example 1: Basic transcription (uncomment when you have audio.mp3)
|
|
// console.log('1. Basic Transcription:');
|
|
// await basicTranscription();
|
|
// console.log();
|
|
|
|
// Example 2: Transcription with fetch
|
|
// console.log('2. Transcription with Fetch:');
|
|
// await transcriptionFetch();
|
|
// console.log();
|
|
|
|
// Example 3: Multiple formats
|
|
// console.log('3. Multiple Formats:');
|
|
// await multipleFormats();
|
|
// console.log();
|
|
|
|
// Example 4: Save to file
|
|
// console.log('4. Transcribe and Save:');
|
|
// await transcribeAndSave('./audio.mp3', './transcription.txt');
|
|
// console.log();
|
|
}
|
|
|
|
// Run if executed directly
|
|
if (require.main === module) {
|
|
main().catch(console.error);
|
|
}
|
|
|
|
export {
|
|
basicTranscription,
|
|
transcriptionFetch,
|
|
multipleFormats,
|
|
withErrorHandling,
|
|
batchTranscription,
|
|
transcribeAndSave,
|
|
};
|