Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:25:12 +08:00
commit 7a35a34caa
30 changed files with 8396 additions and 0 deletions

399
templates/moderation.ts Normal file
View File

@@ -0,0 +1,399 @@
/**
* OpenAI Moderation API - Content Safety Examples
*
* This template demonstrates:
* - Basic content moderation
* - All 11 safety categories
* - Category scoring and thresholds
* - Batch moderation
* - Production patterns for UGC
*/
import OpenAI from 'openai';
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
// =============================================================================
// BASIC MODERATION
// =============================================================================
async function basicModeration() {
const moderation = await openai.moderations.create({
model: 'omni-moderation-latest',
input: 'I want to hurt someone.',
});
const result = moderation.results[0];
console.log('Flagged:', result.flagged);
console.log('Categories flagged:');
Object.entries(result.categories).forEach(([category, flagged]) => {
if (flagged) {
console.log(` - ${category}: ${result.category_scores[category].toFixed(4)}`);
}
});
return result;
}
// =============================================================================
// ALL SAFETY CATEGORIES
// =============================================================================
async function allCategories() {
const examples = {
sexual: 'Explicit sexual content example',
hate: 'Hateful speech based on identity',
harassment: 'Bullying and intimidation example',
'self-harm': 'Content promoting self-harm',
'sexual/minors': 'Any sexualization of minors',
'hate/threatening': 'Violent threats based on identity',
'violence/graphic': 'Graphic violence description',
'self-harm/intent': 'Active suicidal ideation',
'self-harm/instructions': 'How to self-harm instructions',
'harassment/threatening': 'Violent threat toward individual',
violence: 'General violent content',
};
console.log('Safety Categories:\n');
for (const [category, description] of Object.entries(examples)) {
console.log(`${category}:`);
console.log(` Description: ${description}`);
}
console.log('\nNote: Use actual content for real moderation, not descriptions');
}
// =============================================================================
// CATEGORY SCORES AND THRESHOLDS
// =============================================================================
interface Thresholds {
[key: string]: number;
}
const recommendedThresholds: Thresholds = {
sexual: 0.5,
hate: 0.4,
harassment: 0.5,
'self-harm': 0.3,
'sexual/minors': 0.1, // Lower threshold for child safety
'hate/threatening': 0.3,
'violence/graphic': 0.5,
'self-harm/intent': 0.2,
'self-harm/instructions': 0.2,
'harassment/threatening': 0.3,
violence: 0.5,
};
function checkThresholds(result: any, thresholds: Thresholds): boolean {
return Object.entries(result.category_scores).some(
([category, score]) => score > (thresholds[category] || 0.5)
);
}
async function withCustomThresholds(text: string) {
const moderation = await openai.moderations.create({
model: 'omni-moderation-latest',
input: text,
});
const result = moderation.results[0];
const isFlagged = checkThresholds(result, recommendedThresholds);
console.log('Content:', text);
console.log('API flagged:', result.flagged);
console.log('Custom thresholds flagged:', isFlagged);
if (isFlagged) {
console.log('Flagged categories:');
Object.entries(result.category_scores).forEach(([category, score]) => {
const threshold = recommendedThresholds[category] || 0.5;
if (score > threshold) {
console.log(` - ${category}: ${score.toFixed(4)} (threshold: ${threshold})`);
}
});
}
return { result, isFlagged };
}
// =============================================================================
// BATCH MODERATION
// =============================================================================
async function batchModeration() {
const texts = [
'This is a normal, safe comment',
'Potentially harmful content example',
'Another safe piece of text',
];
const moderation = await openai.moderations.create({
model: 'omni-moderation-latest',
input: texts,
});
moderation.results.forEach((result, index) => {
console.log(`\nInput ${index + 1}: "${texts[index]}"`);
console.log('Flagged:', result.flagged);
if (result.flagged) {
const flaggedCategories = Object.keys(result.categories).filter(
cat => result.categories[cat]
);
console.log('Categories:', flaggedCategories.join(', '));
}
});
return moderation.results;
}
// =============================================================================
// PRODUCTION PATTERN - UGC MODERATION
// =============================================================================
interface ModerationDecision {
allowed: boolean;
reason?: string;
severity?: 'low' | 'medium' | 'high' | 'error';
scores?: any;
}
async function moderateUserContent(userInput: string): Promise<ModerationDecision> {
try {
const moderation = await openai.moderations.create({
model: 'omni-moderation-latest',
input: userInput,
});
const result = moderation.results[0];
// Immediate block for severe categories
const severeCategories = [
'sexual/minors',
'self-harm/intent',
'hate/threatening',
'harassment/threatening',
];
for (const category of severeCategories) {
if (result.categories[category]) {
return {
allowed: false,
reason: `Content violates policy: ${category}`,
severity: 'high',
};
}
}
// High-confidence violence check
if (result.category_scores.violence > 0.8) {
return {
allowed: false,
reason: 'High-confidence violence detected',
severity: 'medium',
};
}
// Self-harm content requires human review
if (result.categories['self-harm']) {
return {
allowed: false,
reason: 'Content flagged for human review',
severity: 'medium',
};
}
// Allow content
return {
allowed: true,
scores: result.category_scores,
};
} catch (error: any) {
console.error('Moderation error:', error);
// Fail closed: block on error
return {
allowed: false,
reason: 'Moderation service unavailable',
severity: 'error',
};
}
}
// =============================================================================
// CATEGORY-SPECIFIC FILTERING
// =============================================================================
async function filterByCategory(text: string, categoriesToCheck: string[]) {
const moderation = await openai.moderations.create({
model: 'omni-moderation-latest',
input: text,
});
const result = moderation.results[0];
const violations = categoriesToCheck.filter(
category => result.categories[category]
);
if (violations.length > 0) {
console.log('Content violates:', violations.join(', '));
return false;
}
console.log('Content passed specified category checks');
return true;
}
// =============================================================================
// LOGGING AND AUDIT TRAIL
// =============================================================================
interface ModerationLog {
timestamp: string;
content: string;
flagged: boolean;
categories: string[];
scores: any;
action: 'allowed' | 'blocked' | 'review';
}
async function moderateWithLogging(content: string): Promise<ModerationLog> {
const moderation = await openai.moderations.create({
model: 'omni-moderation-latest',
input: content,
});
const result = moderation.results[0];
const flaggedCategories = Object.keys(result.categories).filter(
cat => result.categories[cat]
);
const log: ModerationLog = {
timestamp: new Date().toISOString(),
content: content.substring(0, 100), // Truncate for logging
flagged: result.flagged,
categories: flaggedCategories,
scores: result.category_scores,
action: result.flagged ? 'blocked' : 'allowed',
};
// In production: save to database or logging service
console.log('Moderation log:', JSON.stringify(log, null, 2));
return log;
}
// =============================================================================
// USER FEEDBACK PATTERN
// =============================================================================
function getUserFriendlyMessage(result: any): string {
if (!result.flagged) {
return 'Content approved';
}
const flaggedCategories = Object.keys(result.categories).filter(
cat => result.categories[cat]
);
// Don't reveal exact detection details
if (flaggedCategories.some(cat => cat.includes('harm'))) {
return 'Your content appears to contain concerning material. Please review our community guidelines.';
}
if (flaggedCategories.includes('harassment') || flaggedCategories.includes('hate')) {
return 'Your content may be disrespectful or harmful to others. Please rephrase.';
}
if (flaggedCategories.includes('violence')) {
return 'Your content contains violent themes that violate our policies.';
}
return 'Your content doesn\'t meet our community guidelines. Please revise and try again.';
}
// =============================================================================
// ERROR HANDLING
// =============================================================================
async function withErrorHandling(text: string) {
try {
const moderation = await openai.moderations.create({
model: 'omni-moderation-latest',
input: text,
});
return moderation.results[0];
} catch (error: any) {
if (error.status === 401) {
console.error('Invalid API key');
} else if (error.status === 429) {
console.error('Rate limit exceeded - implement retry logic');
} else if (error.status === 500) {
console.error('OpenAI service error - fail closed and block content');
} else {
console.error('Unexpected error:', error.message);
}
throw error;
}
}
// =============================================================================
// MAIN EXECUTION
// =============================================================================
async function main() {
console.log('=== OpenAI Moderation API Examples ===\n');
// Example 1: Basic moderation
console.log('1. Basic Moderation:');
await basicModeration();
console.log();
// Example 2: All categories
console.log('2. All Safety Categories:');
allCategories();
console.log();
// Example 3: Custom thresholds
console.log('3. Custom Thresholds:');
await withCustomThresholds('This is a test message');
console.log();
// Example 4: Batch moderation
console.log('4. Batch Moderation:');
await batchModeration();
console.log();
// Example 5: Production pattern
console.log('5. Production UGC Moderation:');
const decision = await moderateUserContent('Safe user comment');
console.log('Decision:', decision);
console.log();
}
// Run if executed directly
if (require.main === module) {
main().catch(console.error);
}
export {
basicModeration,
allCategories,
withCustomThresholds,
batchModeration,
moderateUserContent,
filterByCategory,
moderateWithLogging,
getUserFriendlyMessage,
withErrorHandling,
};