Files
gh-hopeoverture-worldbuildi…/skills/markdown-editor-integrator/references/sanitization.md
2025-11-29 18:46:28 +08:00

14 KiB

Markdown Sanitization Security Guide

Why Sanitization is Critical

Markdown editors can be exploited for XSS attacks through:

  • Malicious JavaScript in HTML tags
  • Script injection via event handlers
  • Data exfiltration through image sources
  • Link-based phishing attacks
  • Iframe injection

Client-Side Sanitization

Using rehype-sanitize

npm install rehype-sanitize
import MDEditor from '@uiw/react-md-editor'
import rehypeSanitize from 'rehype-sanitize'

<MDEditor
  value={value}
  onChange={onChange}
  previewOptions={{
    rehypePlugins: [[rehypeSanitize]],
  }}
/>

Custom Sanitization Schema

import rehypeSanitize, { defaultSchema } from 'rehype-sanitize'
import { deepmerge } from 'deepmerge-ts'

const customSchema = deepmerge(defaultSchema, {
  attributes: {
    '*': ['className'], // Allow className on all elements
    a: ['href', 'title', 'target', 'rel'],
    img: ['src', 'alt', 'title', 'width', 'height'],
    code: ['className'], // For syntax highlighting
  },
  tagNames: [
    // Standard markdown
    'p', 'br', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
    'strong', 'em', 'u', 's', 'del', 'ins',
    'ul', 'ol', 'li',
    'blockquote', 'code', 'pre',
    'a', 'img',
    'table', 'thead', 'tbody', 'tr', 'th', 'td',
    'hr', 'div', 'span',
    // Additional if needed
    'kbd', 'mark', 'abbr',
  ],
  protocols: {
    href: ['http', 'https', 'mailto'],
    src: ['http', 'https'],
  },
})

<MDEditor
  previewOptions={{
    rehypePlugins: [[rehypeSanitize, customSchema]],
  }}
/>

Strict Sanitization (No HTML)

import rehypeSanitize from 'rehype-sanitize'

const strictSchema = {
  tagNames: [
    'p', 'br', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
    'strong', 'em', 'code', 'pre',
    'ul', 'ol', 'li',
    'blockquote',
    'a',
    'hr',
  ],
  attributes: {
    a: ['href'],
    code: ['className'],
  },
  protocols: {
    href: ['https'], // Only HTTPS links
  },
}

<MDEditor
  previewOptions={{
    rehypePlugins: [[rehypeSanitize, strictSchema]],
  }}
/>

Server-Side Sanitization

Using DOMPurify

npm install isomorphic-dompurify
// lib/sanitize-markdown.ts
import DOMPurify from 'isomorphic-dompurify'

export function sanitizeMarkdown(markdown: string): string {
  return DOMPurify.sanitize(markdown, {
    ALLOWED_TAGS: [
      'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
      'p', 'br', 'strong', 'em', 'u', 's',
      'ul', 'ol', 'li',
      'blockquote', 'code', 'pre',
      'a', 'img',
      'table', 'thead', 'tbody', 'tr', 'th', 'td',
      'hr', 'div', 'span',
    ],
    ALLOWED_ATTR: [
      'href', 'src', 'alt', 'title',
      'class', 'id',
      'width', 'height',
    ],
    ALLOWED_URI_REGEXP: /^(?:https?:|mailto:)/i,
    KEEP_CONTENT: true,
    RETURN_DOM: false,
    RETURN_DOM_FRAGMENT: false,
    RETURN_DOM_IMPORT: false,
  })
}

Server Action with Sanitization

'use server'

import { sanitizeMarkdown } from '@/lib/sanitize-markdown'
import { db } from '@/lib/db'
import { auth } from '@/lib/auth'

export async function saveCharacterBio(
  characterId: string,
  biography: string
) {
  const session = await auth()

  if (!session?.user) {
    return { success: false, message: 'Unauthorized' }
  }

  // Sanitize before saving
  const sanitizedBio = sanitizeMarkdown(biography)

  try {
    await db.character.update({
      where: { id: characterId },
      data: { biography: sanitizedBio },
    })

    return { success: true }
  } catch (error) {
    console.error('Failed to save biography:', error)
    return { success: false, message: 'Failed to save' }
  }
}

API Route with Sanitization

// app/api/entities/[id]/description/route.ts
import { NextRequest, NextResponse } from 'next/server'
import { sanitizeMarkdown } from '@/lib/sanitize-markdown'
import { db } from '@/lib/db'
import { getServerSession } from 'next-auth'

export async function PATCH(
  request: NextRequest,
  { params }: { params: { id: string } }
) {
  const session = await getServerSession()

  if (!session?.user) {
    return NextResponse.json(
      { error: 'Unauthorized' },
      { status: 401 }
    )
  }

  const { description } = await request.json()

  // Validate input
  if (!description || typeof description !== 'string') {
    return NextResponse.json(
      { error: 'Invalid description' },
      { status: 400 }
    )
  }

  // Sanitize
  const sanitized = sanitizeMarkdown(description)

  // Save to database
  await db.entity.update({
    where: { id: params.id },
    data: { description: sanitized },
  })

  return NextResponse.json({ success: true })
}

Advanced Sanitization Patterns

Markdown-to-HTML with Sanitization

import { remark } from 'remark'
import remarkHtml from 'remark-html'
import DOMPurify from 'isomorphic-dompurify'

export async function markdownToSafeHtml(markdown: string): Promise<string> {
  // Convert markdown to HTML
  const result = await remark()
    .use(remarkHtml, { sanitize: false }) // Don't double-sanitize
    .process(markdown)

  const html = result.toString()

  // Sanitize HTML
  const sanitized = DOMPurify.sanitize(html, {
    ALLOWED_TAGS: [
      'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
      'p', 'br', 'strong', 'em', 'code', 'pre',
      'ul', 'ol', 'li',
      'blockquote', 'a', 'img',
      'table', 'thead', 'tbody', 'tr', 'th', 'td',
      'hr',
    ],
    ALLOWED_ATTR: ['href', 'src', 'alt', 'title'],
  })

  return sanitized
}

// Usage in server action
export async function saveDescription(id: string, markdown: string) {
  const html = await markdownToSafeHtml(markdown)

  await db.entity.update({
    where: { id },
    data: {
      descriptionMarkdown: markdown, // Store original
      descriptionHtml: html,         // Store sanitized HTML
    },
  })

  return { success: true }
}
import DOMPurify from 'isomorphic-dompurify'

export function sanitizeWithLinkValidation(markdown: string): string {
  return DOMPurify.sanitize(markdown, {
    ALLOWED_TAGS: ['a', 'p', 'h1', 'h2', 'h3', 'strong', 'em', 'code'],
    ALLOWED_ATTR: ['href'],
    ALLOWED_URI_REGEXP: /^https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$/,
    // Block certain domains
    FORBID_ATTR: [],
    HOOKS: {
      afterSanitizeAttributes: (node) => {
        if (node.tagName === 'A') {
          const href = node.getAttribute('href')
          if (href) {
            // Block suspicious links
            const suspiciousDomains = ['malicious.com', 'phishing.com']
            const url = new URL(href)
            if (suspiciousDomains.some(domain => url.hostname.includes(domain))) {
              node.removeAttribute('href')
            }
            // Force external links to open in new tab
            node.setAttribute('target', '_blank')
            node.setAttribute('rel', 'noopener noreferrer')
          }
        }
      },
    },
  })
}

Image Source Validation

import DOMPurify from 'isomorphic-dompurify'

export function sanitizeWithImageValidation(markdown: string): string {
  return DOMPurify.sanitize(markdown, {
    ALLOWED_TAGS: ['img', 'p', 'h1', 'h2', 'strong', 'em'],
    ALLOWED_ATTR: ['src', 'alt', 'title', 'width', 'height'],
    HOOKS: {
      afterSanitizeAttributes: (node) => {
        if (node.tagName === 'IMG') {
          const src = node.getAttribute('src')
          if (src) {
            try {
              const url = new URL(src)
              // Only allow images from trusted domains
              const trustedDomains = [
                'images.example.com',
                'cdn.example.com',
                'storage.googleapis.com',
              ]
              if (!trustedDomains.some(domain => url.hostname === domain)) {
                node.removeAttribute('src')
                node.setAttribute('alt', 'Image blocked: untrusted source')
              }
            } catch (error) {
              // Invalid URL, remove src
              node.removeAttribute('src')
            }
          }
        }
      },
    },
  })
}

Security Best Practices

1. Always Sanitize on Server

Never trust client-side sanitization alone:

// [ERROR] BAD: Only client-side sanitization
function onSubmit(data: FormValues) {
  await saveDescription(data.description) // Raw user input
}

// [OK] GOOD: Server-side sanitization
'use server'
export async function saveDescription(description: string) {
  const sanitized = sanitizeMarkdown(description)
  await db.save(sanitized)
}

2. Validate Input Length

const MAX_MARKDOWN_LENGTH = 50000 // 50KB

export async function saveDescription(description: string) {
  if (description.length > MAX_MARKDOWN_LENGTH) {
    throw new Error('Description too long')
  }

  const sanitized = sanitizeMarkdown(description)
  await db.save(sanitized)
}

3. Store Both Raw and Sanitized

export async function saveDescription(
  entityId: string,
  description: string
) {
  const sanitized = sanitizeMarkdown(description)

  await db.entity.update({
    where: { id: entityId },
    data: {
      descriptionRaw: description,     // Original for editing
      description: sanitized,           // Sanitized for display
      descriptionUpdatedAt: new Date(),
    },
  })
}

4. Implement Rate Limiting

import { Ratelimit } from '@upstash/ratelimit'
import { Redis } from '@upstash/redis'

const ratelimit = new Ratelimit({
  redis: Redis.fromEnv(),
  limiter: Ratelimit.slidingWindow(10, '1 m'), // 10 requests per minute
})

export async function saveDescription(
  userId: string,
  description: string
) {
  const { success } = await ratelimit.limit(userId)

  if (!success) {
    throw new Error('Rate limit exceeded')
  }

  const sanitized = sanitizeMarkdown(description)
  await db.save(sanitized)
}

5. Log Suspicious Content

export function sanitizeAndLog(
  markdown: string,
  userId: string
): string {
  const original = markdown
  const sanitized = DOMPurify.sanitize(markdown)

  // If content was modified, it contained potentially malicious code
  if (original !== sanitized) {
    console.warn('Suspicious content detected:', {
      userId,
      timestamp: new Date(),
      removed: original.length - sanitized.length,
    })

    // Optionally store for security review
    logSecurityEvent({
      type: 'SUSPICIOUS_CONTENT',
      userId,
      original: original.substring(0, 1000), // First 1KB
      sanitized: sanitized.substring(0, 1000),
    })
  }

  return sanitized
}

Testing Sanitization

Unit Tests

import { describe, it, expect } from 'vitest'
import { sanitizeMarkdown } from './sanitize-markdown'

describe('sanitizeMarkdown', () => {
  it('removes script tags', () => {
    const input = '<script>alert("XSS")</script>Hello'
    const output = sanitizeMarkdown(input)
    expect(output).not.toContain('<script>')
    expect(output).toContain('Hello')
  })

  it('removes event handlers', () => {
    const input = '<img src="x" onerror="alert(1)">'
    const output = sanitizeMarkdown(input)
    expect(output).not.toContain('onerror')
  })

  it('allows safe HTML', () => {
    const input = '<p><strong>Bold</strong> and <em>italic</em></p>'
    const output = sanitizeMarkdown(input)
    expect(output).toContain('<strong>')
    expect(output).toContain('<em>')
  })

  it('sanitizes links', () => {
    const input = '<a href="javascript:alert(1)">Click</a>'
    const output = sanitizeMarkdown(input)
    expect(output).not.toContain('javascript:')
  })

  it('handles nested tags', () => {
    const input = '<div><script>alert(1)</script><p>Safe</p></div>'
    const output = sanitizeMarkdown(input)
    expect(output).not.toContain('<script>')
    expect(output).toContain('Safe')
  })
})

Integration Tests

import { test, expect } from '@playwright/test'

test.describe('Markdown Editor Security', () => {
  test('prevents XSS through markdown', async ({ page }) => {
    await page.goto('/editor')

    const xssPayload = '<script>window.xssExecuted = true</script>Hello'

    await page.fill('[role="textbox"]', xssPayload)
    await page.click('button:has-text("Save")')

    await page.waitForTimeout(1000)

    // Check that script did not execute
    const xssExecuted = await page.evaluate(() => {
      return (window as any).xssExecuted
    })
    expect(xssExecuted).toBeUndefined()

    // Check that content was sanitized
    const preview = await page.locator('.preview').textContent()
    expect(preview).toContain('Hello')
    expect(preview).not.toContain('<script>')
  })
})

Common Attack Vectors

Script Injection

<!-- Attack -->
<script>alert('XSS')</script>
<img src=x onerror="alert('XSS')">
<svg onload="alert('XSS')">

<!-- Sanitized -->
<!-- Scripts and event handlers removed -->
<!-- Attack -->
<a href="javascript:alert('XSS')">Click</a>
<a href="data:text/html,<script>alert('XSS')</script>">Click</a>

<!-- Sanitized -->
<a>Click</a> <!-- href removed -->

Iframe Injection

<!-- Attack -->
<iframe src="https://malicious.com"></iframe>

<!-- Sanitized -->
<!-- iframe tag removed completely -->

Sanitization Checklist

  • Client-side sanitization with rehype-sanitize configured
  • Server-side sanitization with DOMPurify
  • Whitelist approach (allowed tags/attributes)
  • Protocol restrictions (https only)
  • Input length validation
  • Rate limiting on save endpoints
  • Logging of sanitized content
  • Both raw and sanitized versions stored
  • External links open in new tab with rel="noopener noreferrer"
  • Image sources validated against trusted domains
  • Unit tests for XSS prevention
  • Integration tests with malicious payloads
  • Security headers configured (CSP)
  • Regular dependency updates for security patches