Initial commit

2025-11-30 08:25:12 +08:00
commit 7a35a34caa
30 changed files with 8396 additions and 0 deletions
--- a/templates/moderation.ts
+++ b/templates/moderation.ts
@@ -0,0 +1,399 @@
+/**
+ * OpenAI Moderation API - Content Safety Examples
+ *
+ * This template demonstrates:
+ * - Basic content moderation
+ * - All 11 safety categories
+ * - Category scoring and thresholds
+ * - Batch moderation
+ * - Production patterns for UGC
+ */
+
+import OpenAI from 'openai';
+
+const openai = new OpenAI({
+  apiKey: process.env.OPENAI_API_KEY,
+});
+
+// =============================================================================
+// BASIC MODERATION
+// =============================================================================
+
+async function basicModeration() {
+  const moderation = await openai.moderations.create({
+    model: 'omni-moderation-latest',
+    input: 'I want to hurt someone.',
+  });
+
+  const result = moderation.results[0];
+
+  console.log('Flagged:', result.flagged);
+  console.log('Categories flagged:');
+  Object.entries(result.categories).forEach(([category, flagged]) => {
+    if (flagged) {
+      console.log(`  - ${category}: ${result.category_scores[category].toFixed(4)}`);
+    }
+  });
+
+  return result;
+}
+
+// =============================================================================
+// ALL SAFETY CATEGORIES
+// =============================================================================
+
+async function allCategories() {
+  const examples = {
+    sexual: 'Explicit sexual content example',
+    hate: 'Hateful speech based on identity',
+    harassment: 'Bullying and intimidation example',
+    'self-harm': 'Content promoting self-harm',
+    'sexual/minors': 'Any sexualization of minors',
+    'hate/threatening': 'Violent threats based on identity',
+    'violence/graphic': 'Graphic violence description',
+    'self-harm/intent': 'Active suicidal ideation',
+    'self-harm/instructions': 'How to self-harm instructions',
+    'harassment/threatening': 'Violent threat toward individual',
+    violence: 'General violent content',
+  };
+
+  console.log('Safety Categories:\n');
+
+  for (const [category, description] of Object.entries(examples)) {
+    console.log(`${category}:`);
+    console.log(`  Description: ${description}`);
+  }
+
+  console.log('\nNote: Use actual content for real moderation, not descriptions');
+}
+
+// =============================================================================
+// CATEGORY SCORES AND THRESHOLDS
+// =============================================================================
+
+interface Thresholds {
+  [key: string]: number;
+}
+
+const recommendedThresholds: Thresholds = {
+  sexual: 0.5,
+  hate: 0.4,
+  harassment: 0.5,
+  'self-harm': 0.3,
+  'sexual/minors': 0.1, // Lower threshold for child safety
+  'hate/threatening': 0.3,
+  'violence/graphic': 0.5,
+  'self-harm/intent': 0.2,
+  'self-harm/instructions': 0.2,
+  'harassment/threatening': 0.3,
+  violence: 0.5,
+};
+
+function checkThresholds(result: any, thresholds: Thresholds): boolean {
+  return Object.entries(result.category_scores).some(
+    ([category, score]) => score > (thresholds[category] || 0.5)
+  );
+}
+
+async function withCustomThresholds(text: string) {
+  const moderation = await openai.moderations.create({
+    model: 'omni-moderation-latest',
+    input: text,
+  });
+
+  const result = moderation.results[0];
+
+  const isFlagged = checkThresholds(result, recommendedThresholds);
+
+  console.log('Content:', text);
+  console.log('API flagged:', result.flagged);
+  console.log('Custom thresholds flagged:', isFlagged);
+
+  if (isFlagged) {
+    console.log('Flagged categories:');
+    Object.entries(result.category_scores).forEach(([category, score]) => {
+      const threshold = recommendedThresholds[category] || 0.5;
+      if (score > threshold) {
+        console.log(`  - ${category}: ${score.toFixed(4)} (threshold: ${threshold})`);
+      }
+    });
+  }
+
+  return { result, isFlagged };
+}
+
+// =============================================================================
+// BATCH MODERATION
+// =============================================================================
+
+async function batchModeration() {
+  const texts = [
+    'This is a normal, safe comment',
+    'Potentially harmful content example',
+    'Another safe piece of text',
+  ];
+
+  const moderation = await openai.moderations.create({
+    model: 'omni-moderation-latest',
+    input: texts,
+  });
+
+  moderation.results.forEach((result, index) => {
+    console.log(`\nInput ${index + 1}: "${texts[index]}"`);
+    console.log('Flagged:', result.flagged);
+
+    if (result.flagged) {
+      const flaggedCategories = Object.keys(result.categories).filter(
+        cat => result.categories[cat]
+      );
+      console.log('Categories:', flaggedCategories.join(', '));
+    }
+  });
+
+  return moderation.results;
+}
+
+// =============================================================================
+// PRODUCTION PATTERN - UGC MODERATION
+// =============================================================================
+
+interface ModerationDecision {
+  allowed: boolean;
+  reason?: string;
+  severity?: 'low' | 'medium' | 'high' | 'error';
+  scores?: any;
+}
+
+async function moderateUserContent(userInput: string): Promise<ModerationDecision> {
+  try {
+    const moderation = await openai.moderations.create({
+      model: 'omni-moderation-latest',
+      input: userInput,
+    });
+
+    const result = moderation.results[0];
+
+    // Immediate block for severe categories
+    const severeCategories = [
+      'sexual/minors',
+      'self-harm/intent',
+      'hate/threatening',
+      'harassment/threatening',
+    ];
+
+    for (const category of severeCategories) {
+      if (result.categories[category]) {
+        return {
+          allowed: false,
+          reason: `Content violates policy: ${category}`,
+          severity: 'high',
+        };
+      }
+    }
+
+    // High-confidence violence check
+    if (result.category_scores.violence > 0.8) {
+      return {
+        allowed: false,
+        reason: 'High-confidence violence detected',
+        severity: 'medium',
+      };
+    }
+
+    // Self-harm content requires human review
+    if (result.categories['self-harm']) {
+      return {
+        allowed: false,
+        reason: 'Content flagged for human review',
+        severity: 'medium',
+      };
+    }
+
+    // Allow content
+    return {
+      allowed: true,
+      scores: result.category_scores,
+    };
+  } catch (error: any) {
+    console.error('Moderation error:', error);
+
+    // Fail closed: block on error
+    return {
+      allowed: false,
+      reason: 'Moderation service unavailable',
+      severity: 'error',
+    };
+  }
+}
+
+// =============================================================================
+// CATEGORY-SPECIFIC FILTERING
+// =============================================================================
+
+async function filterByCategory(text: string, categoriesToCheck: string[]) {
+  const moderation = await openai.moderations.create({
+    model: 'omni-moderation-latest',
+    input: text,
+  });
+
+  const result = moderation.results[0];
+
+  const violations = categoriesToCheck.filter(
+    category => result.categories[category]
+  );
+
+  if (violations.length > 0) {
+    console.log('Content violates:', violations.join(', '));
+    return false;
+  }
+
+  console.log('Content passed specified category checks');
+  return true;
+}
+
+// =============================================================================
+// LOGGING AND AUDIT TRAIL
+// =============================================================================
+
+interface ModerationLog {
+  timestamp: string;
+  content: string;
+  flagged: boolean;
+  categories: string[];
+  scores: any;
+  action: 'allowed' | 'blocked' | 'review';
+}
+
+async function moderateWithLogging(content: string): Promise<ModerationLog> {
+  const moderation = await openai.moderations.create({
+    model: 'omni-moderation-latest',
+    input: content,
+  });
+
+  const result = moderation.results[0];
+
+  const flaggedCategories = Object.keys(result.categories).filter(
+    cat => result.categories[cat]
+  );
+
+  const log: ModerationLog = {
+    timestamp: new Date().toISOString(),
+    content: content.substring(0, 100), // Truncate for logging
+    flagged: result.flagged,
+    categories: flaggedCategories,
+    scores: result.category_scores,
+    action: result.flagged ? 'blocked' : 'allowed',
+  };
+
+  // In production: save to database or logging service
+  console.log('Moderation log:', JSON.stringify(log, null, 2));
+
+  return log;
+}
+
+// =============================================================================
+// USER FEEDBACK PATTERN
+// =============================================================================
+
+function getUserFriendlyMessage(result: any): string {
+  if (!result.flagged) {
+    return 'Content approved';
+  }
+
+  const flaggedCategories = Object.keys(result.categories).filter(
+    cat => result.categories[cat]
+  );
+
+  // Don't reveal exact detection details
+  if (flaggedCategories.some(cat => cat.includes('harm'))) {
+    return 'Your content appears to contain concerning material. Please review our community guidelines.';
+  }
+
+  if (flaggedCategories.includes('harassment') || flaggedCategories.includes('hate')) {
+    return 'Your content may be disrespectful or harmful to others. Please rephrase.';
+  }
+
+  if (flaggedCategories.includes('violence')) {
+    return 'Your content contains violent themes that violate our policies.';
+  }
+
+  return 'Your content doesn\'t meet our community guidelines. Please revise and try again.';
+}
+
+// =============================================================================
+// ERROR HANDLING
+// =============================================================================
+
+async function withErrorHandling(text: string) {
+  try {
+    const moderation = await openai.moderations.create({
+      model: 'omni-moderation-latest',
+      input: text,
+    });
+
+    return moderation.results[0];
+  } catch (error: any) {
+    if (error.status === 401) {
+      console.error('Invalid API key');
+    } else if (error.status === 429) {
+      console.error('Rate limit exceeded - implement retry logic');
+    } else if (error.status === 500) {
+      console.error('OpenAI service error - fail closed and block content');
+    } else {
+      console.error('Unexpected error:', error.message);
+    }
+
+    throw error;
+  }
+}
+
+// =============================================================================
+// MAIN EXECUTION
+// =============================================================================
+
+async function main() {
+  console.log('=== OpenAI Moderation API Examples ===\n');
+
+  // Example 1: Basic moderation
+  console.log('1. Basic Moderation:');
+  await basicModeration();
+  console.log();
+
+  // Example 2: All categories
+  console.log('2. All Safety Categories:');
+  allCategories();
+  console.log();
+
+  // Example 3: Custom thresholds
+  console.log('3. Custom Thresholds:');
+  await withCustomThresholds('This is a test message');
+  console.log();
+
+  // Example 4: Batch moderation
+  console.log('4. Batch Moderation:');
+  await batchModeration();
+  console.log();
+
+  // Example 5: Production pattern
+  console.log('5. Production UGC Moderation:');
+  const decision = await moderateUserContent('Safe user comment');
+  console.log('Decision:', decision);
+  console.log();
+}
+
+// Run if executed directly
+if (require.main === module) {
+  main().catch(console.error);
+}
+
+export {
+  basicModeration,
+  allCategories,
+  withCustomThresholds,
+  batchModeration,
+  moderateUserContent,
+  filterByCategory,
+  moderateWithLogging,
+  getUserFriendlyMessage,
+  withErrorHandling,
+};