/** * OpenAI Moderation API - Content Safety Examples * * This template demonstrates: * - Basic content moderation * - All 11 safety categories * - Category scoring and thresholds * - Batch moderation * - Production patterns for UGC */ import OpenAI from 'openai'; const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, }); // ============================================================================= // BASIC MODERATION // ============================================================================= async function basicModeration() { const moderation = await openai.moderations.create({ model: 'omni-moderation-latest', input: 'I want to hurt someone.', }); const result = moderation.results[0]; console.log('Flagged:', result.flagged); console.log('Categories flagged:'); Object.entries(result.categories).forEach(([category, flagged]) => { if (flagged) { console.log(` - ${category}: ${result.category_scores[category].toFixed(4)}`); } }); return result; } // ============================================================================= // ALL SAFETY CATEGORIES // ============================================================================= async function allCategories() { const examples = { sexual: 'Explicit sexual content example', hate: 'Hateful speech based on identity', harassment: 'Bullying and intimidation example', 'self-harm': 'Content promoting self-harm', 'sexual/minors': 'Any sexualization of minors', 'hate/threatening': 'Violent threats based on identity', 'violence/graphic': 'Graphic violence description', 'self-harm/intent': 'Active suicidal ideation', 'self-harm/instructions': 'How to self-harm instructions', 'harassment/threatening': 'Violent threat toward individual', violence: 'General violent content', }; console.log('Safety Categories:\n'); for (const [category, description] of Object.entries(examples)) { console.log(`${category}:`); console.log(` Description: ${description}`); } console.log('\nNote: Use actual content for real moderation, not descriptions'); } // ============================================================================= // CATEGORY SCORES AND THRESHOLDS // ============================================================================= interface Thresholds { [key: string]: number; } const recommendedThresholds: Thresholds = { sexual: 0.5, hate: 0.4, harassment: 0.5, 'self-harm': 0.3, 'sexual/minors': 0.1, // Lower threshold for child safety 'hate/threatening': 0.3, 'violence/graphic': 0.5, 'self-harm/intent': 0.2, 'self-harm/instructions': 0.2, 'harassment/threatening': 0.3, violence: 0.5, }; function checkThresholds(result: any, thresholds: Thresholds): boolean { return Object.entries(result.category_scores).some( ([category, score]) => score > (thresholds[category] || 0.5) ); } async function withCustomThresholds(text: string) { const moderation = await openai.moderations.create({ model: 'omni-moderation-latest', input: text, }); const result = moderation.results[0]; const isFlagged = checkThresholds(result, recommendedThresholds); console.log('Content:', text); console.log('API flagged:', result.flagged); console.log('Custom thresholds flagged:', isFlagged); if (isFlagged) { console.log('Flagged categories:'); Object.entries(result.category_scores).forEach(([category, score]) => { const threshold = recommendedThresholds[category] || 0.5; if (score > threshold) { console.log(` - ${category}: ${score.toFixed(4)} (threshold: ${threshold})`); } }); } return { result, isFlagged }; } // ============================================================================= // BATCH MODERATION // ============================================================================= async function batchModeration() { const texts = [ 'This is a normal, safe comment', 'Potentially harmful content example', 'Another safe piece of text', ]; const moderation = await openai.moderations.create({ model: 'omni-moderation-latest', input: texts, }); moderation.results.forEach((result, index) => { console.log(`\nInput ${index + 1}: "${texts[index]}"`); console.log('Flagged:', result.flagged); if (result.flagged) { const flaggedCategories = Object.keys(result.categories).filter( cat => result.categories[cat] ); console.log('Categories:', flaggedCategories.join(', ')); } }); return moderation.results; } // ============================================================================= // PRODUCTION PATTERN - UGC MODERATION // ============================================================================= interface ModerationDecision { allowed: boolean; reason?: string; severity?: 'low' | 'medium' | 'high' | 'error'; scores?: any; } async function moderateUserContent(userInput: string): Promise { try { const moderation = await openai.moderations.create({ model: 'omni-moderation-latest', input: userInput, }); const result = moderation.results[0]; // Immediate block for severe categories const severeCategories = [ 'sexual/minors', 'self-harm/intent', 'hate/threatening', 'harassment/threatening', ]; for (const category of severeCategories) { if (result.categories[category]) { return { allowed: false, reason: `Content violates policy: ${category}`, severity: 'high', }; } } // High-confidence violence check if (result.category_scores.violence > 0.8) { return { allowed: false, reason: 'High-confidence violence detected', severity: 'medium', }; } // Self-harm content requires human review if (result.categories['self-harm']) { return { allowed: false, reason: 'Content flagged for human review', severity: 'medium', }; } // Allow content return { allowed: true, scores: result.category_scores, }; } catch (error: any) { console.error('Moderation error:', error); // Fail closed: block on error return { allowed: false, reason: 'Moderation service unavailable', severity: 'error', }; } } // ============================================================================= // CATEGORY-SPECIFIC FILTERING // ============================================================================= async function filterByCategory(text: string, categoriesToCheck: string[]) { const moderation = await openai.moderations.create({ model: 'omni-moderation-latest', input: text, }); const result = moderation.results[0]; const violations = categoriesToCheck.filter( category => result.categories[category] ); if (violations.length > 0) { console.log('Content violates:', violations.join(', ')); return false; } console.log('Content passed specified category checks'); return true; } // ============================================================================= // LOGGING AND AUDIT TRAIL // ============================================================================= interface ModerationLog { timestamp: string; content: string; flagged: boolean; categories: string[]; scores: any; action: 'allowed' | 'blocked' | 'review'; } async function moderateWithLogging(content: string): Promise { const moderation = await openai.moderations.create({ model: 'omni-moderation-latest', input: content, }); const result = moderation.results[0]; const flaggedCategories = Object.keys(result.categories).filter( cat => result.categories[cat] ); const log: ModerationLog = { timestamp: new Date().toISOString(), content: content.substring(0, 100), // Truncate for logging flagged: result.flagged, categories: flaggedCategories, scores: result.category_scores, action: result.flagged ? 'blocked' : 'allowed', }; // In production: save to database or logging service console.log('Moderation log:', JSON.stringify(log, null, 2)); return log; } // ============================================================================= // USER FEEDBACK PATTERN // ============================================================================= function getUserFriendlyMessage(result: any): string { if (!result.flagged) { return 'Content approved'; } const flaggedCategories = Object.keys(result.categories).filter( cat => result.categories[cat] ); // Don't reveal exact detection details if (flaggedCategories.some(cat => cat.includes('harm'))) { return 'Your content appears to contain concerning material. Please review our community guidelines.'; } if (flaggedCategories.includes('harassment') || flaggedCategories.includes('hate')) { return 'Your content may be disrespectful or harmful to others. Please rephrase.'; } if (flaggedCategories.includes('violence')) { return 'Your content contains violent themes that violate our policies.'; } return 'Your content doesn\'t meet our community guidelines. Please revise and try again.'; } // ============================================================================= // ERROR HANDLING // ============================================================================= async function withErrorHandling(text: string) { try { const moderation = await openai.moderations.create({ model: 'omni-moderation-latest', input: text, }); return moderation.results[0]; } catch (error: any) { if (error.status === 401) { console.error('Invalid API key'); } else if (error.status === 429) { console.error('Rate limit exceeded - implement retry logic'); } else if (error.status === 500) { console.error('OpenAI service error - fail closed and block content'); } else { console.error('Unexpected error:', error.message); } throw error; } } // ============================================================================= // MAIN EXECUTION // ============================================================================= async function main() { console.log('=== OpenAI Moderation API Examples ===\n'); // Example 1: Basic moderation console.log('1. Basic Moderation:'); await basicModeration(); console.log(); // Example 2: All categories console.log('2. All Safety Categories:'); allCategories(); console.log(); // Example 3: Custom thresholds console.log('3. Custom Thresholds:'); await withCustomThresholds('This is a test message'); console.log(); // Example 4: Batch moderation console.log('4. Batch Moderation:'); await batchModeration(); console.log(); // Example 5: Production pattern console.log('5. Production UGC Moderation:'); const decision = await moderateUserContent('Safe user comment'); console.log('Decision:', decision); console.log(); } // Run if executed directly if (require.main === module) { main().catch(console.error); } export { basicModeration, allCategories, withCustomThresholds, batchModeration, moderateUserContent, filterByCategory, moderateWithLogging, getUserFriendlyMessage, withErrorHandling, };