zhongwei/gh-hopeoverture-worldbuilding-app-skills-plugins-markdown-editor-integrator

Fork 0

Files

Zhongwei Li 280d358cb6 Initial commit

2025-11-29 18:46:28 +08:00

14 KiB

Raw Blame History

Markdown Sanitization Security Guide

Why Sanitization is Critical

Markdown editors can be exploited for XSS attacks through:

Malicious JavaScript in HTML tags
Script injection via event handlers
Data exfiltration through image sources
Link-based phishing attacks
Iframe injection

Client-Side Sanitization

Using rehype-sanitize

npm install rehype-sanitize

import MDEditor from '@uiw/react-md-editor'
import rehypeSanitize from 'rehype-sanitize'

<MDEditor
  value={value}
  onChange={onChange}
  previewOptions={{
    rehypePlugins: [[rehypeSanitize]],
  }}
/>

Custom Sanitization Schema

import rehypeSanitize, { defaultSchema } from 'rehype-sanitize'
import { deepmerge } from 'deepmerge-ts'

const customSchema = deepmerge(defaultSchema, {
  attributes: {
    '*': ['className'], // Allow className on all elements
    a: ['href', 'title', 'target', 'rel'],
    img: ['src', 'alt', 'title', 'width', 'height'],
    code: ['className'], // For syntax highlighting
  },
  tagNames: [
    // Standard markdown
    'p', 'br', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
    'strong', 'em', 'u', 's', 'del', 'ins',
    'ul', 'ol', 'li',
    'blockquote', 'code', 'pre',
    'a', 'img',
    'table', 'thead', 'tbody', 'tr', 'th', 'td',
    'hr', 'div', 'span',
    // Additional if needed
    'kbd', 'mark', 'abbr',
  ],
  protocols: {
    href: ['http', 'https', 'mailto'],
    src: ['http', 'https'],
  },
})

<MDEditor
  previewOptions={{
    rehypePlugins: [[rehypeSanitize, customSchema]],
  }}
/>

Strict Sanitization (No HTML)

import rehypeSanitize from 'rehype-sanitize'

const strictSchema = {
  tagNames: [
    'p', 'br', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
    'strong', 'em', 'code', 'pre',
    'ul', 'ol', 'li',
    'blockquote',
    'a',
    'hr',
  ],
  attributes: {
    a: ['href'],
    code: ['className'],
  },
  protocols: {
    href: ['https'], // Only HTTPS links
  },
}

<MDEditor
  previewOptions={{
    rehypePlugins: [[rehypeSanitize, strictSchema]],
  }}
/>

Server-Side Sanitization

Using DOMPurify

npm install isomorphic-dompurify

// lib/sanitize-markdown.ts
import DOMPurify from 'isomorphic-dompurify'

export function sanitizeMarkdown(markdown: string): string {
  return DOMPurify.sanitize(markdown, {
    ALLOWED_TAGS: [
      'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
      'p', 'br', 'strong', 'em', 'u', 's',
      'ul', 'ol', 'li',
      'blockquote', 'code', 'pre',
      'a', 'img',
      'table', 'thead', 'tbody', 'tr', 'th', 'td',
      'hr', 'div', 'span',
    ],
    ALLOWED_ATTR: [
      'href', 'src', 'alt', 'title',
      'class', 'id',
      'width', 'height',
    ],
    ALLOWED_URI_REGEXP: /^(?:https?:|mailto:)/i,
    KEEP_CONTENT: true,
    RETURN_DOM: false,
    RETURN_DOM_FRAGMENT: false,
    RETURN_DOM_IMPORT: false,
  })
}

Server Action with Sanitization

'use server'

import { sanitizeMarkdown } from '@/lib/sanitize-markdown'
import { db } from '@/lib/db'
import { auth } from '@/lib/auth'

export async function saveCharacterBio(
  characterId: string,
  biography: string
) {
  const session = await auth()

  if (!session?.user) {
    return { success: false, message: 'Unauthorized' }
  }

  // Sanitize before saving
  const sanitizedBio = sanitizeMarkdown(biography)

  try {
    await db.character.update({
      where: { id: characterId },
      data: { biography: sanitizedBio },
    })

    return { success: true }
  } catch (error) {
    console.error('Failed to save biography:', error)
    return { success: false, message: 'Failed to save' }
  }
}

API Route with Sanitization

// app/api/entities/[id]/description/route.ts
import { NextRequest, NextResponse } from 'next/server'
import { sanitizeMarkdown } from '@/lib/sanitize-markdown'
import { db } from '@/lib/db'
import { getServerSession } from 'next-auth'

export async function PATCH(
  request: NextRequest,
  { params }: { params: { id: string } }
) {
  const session = await getServerSession()

  if (!session?.user) {
    return NextResponse.json(
      { error: 'Unauthorized' },
      { status: 401 }
    )
  }

  const { description } = await request.json()

  // Validate input
  if (!description || typeof description !== 'string') {
    return NextResponse.json(
      { error: 'Invalid description' },
      { status: 400 }
    )
  }

  // Sanitize
  const sanitized = sanitizeMarkdown(description)

  // Save to database
  await db.entity.update({
    where: { id: params.id },
    data: { description: sanitized },
  })

  return NextResponse.json({ success: true })
}

Advanced Sanitization Patterns

Markdown-to-HTML with Sanitization

import { remark } from 'remark'
import remarkHtml from 'remark-html'
import DOMPurify from 'isomorphic-dompurify'

export async function markdownToSafeHtml(markdown: string): Promise<string> {
  // Convert markdown to HTML
  const result = await remark()
    .use(remarkHtml, { sanitize: false }) // Don't double-sanitize
    .process(markdown)

  const html = result.toString()

  // Sanitize HTML
  const sanitized = DOMPurify.sanitize(html, {
    ALLOWED_TAGS: [
      'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
      'p', 'br', 'strong', 'em', 'code', 'pre',
      'ul', 'ol', 'li',
      'blockquote', 'a', 'img',
      'table', 'thead', 'tbody', 'tr', 'th', 'td',
      'hr',
    ],
    ALLOWED_ATTR: ['href', 'src', 'alt', 'title'],
  })

  return sanitized
}

// Usage in server action
export async function saveDescription(id: string, markdown: string) {
  const html = await markdownToSafeHtml(markdown)

  await db.entity.update({
    where: { id },
    data: {
      descriptionMarkdown: markdown, // Store original
      descriptionHtml: html,         // Store sanitized HTML
    },
  })

  return { success: true }
}

Link Validation

import DOMPurify from 'isomorphic-dompurify'

export function sanitizeWithLinkValidation(markdown: string): string {
  return DOMPurify.sanitize(markdown, {
    ALLOWED_TAGS: ['a', 'p', 'h1', 'h2', 'h3', 'strong', 'em', 'code'],
    ALLOWED_ATTR: ['href'],
    ALLOWED_URI_REGEXP: /^https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$/,
    // Block certain domains
    FORBID_ATTR: [],
    HOOKS: {
      afterSanitizeAttributes: (node) => {
        if (node.tagName === 'A') {
          const href = node.getAttribute('href')
          if (href) {
            // Block suspicious links
            const suspiciousDomains = ['malicious.com', 'phishing.com']
            const url = new URL(href)
            if (suspiciousDomains.some(domain => url.hostname.includes(domain))) {
              node.removeAttribute('href')
            }
            // Force external links to open in new tab
            node.setAttribute('target', '_blank')
            node.setAttribute('rel', 'noopener noreferrer')
          }
        }
      },
    },
  })
}

Image Source Validation

import DOMPurify from 'isomorphic-dompurify'

export function sanitizeWithImageValidation(markdown: string): string {
  return DOMPurify.sanitize(markdown, {
    ALLOWED_TAGS: ['img', 'p', 'h1', 'h2', 'strong', 'em'],
    ALLOWED_ATTR: ['src', 'alt', 'title', 'width', 'height'],
    HOOKS: {
      afterSanitizeAttributes: (node) => {
        if (node.tagName === 'IMG') {
          const src = node.getAttribute('src')
          if (src) {
            try {
              const url = new URL(src)
              // Only allow images from trusted domains
              const trustedDomains = [
                'images.example.com',
                'cdn.example.com',
                'storage.googleapis.com',
              ]
              if (!trustedDomains.some(domain => url.hostname === domain)) {
                node.removeAttribute('src')
                node.setAttribute('alt', 'Image blocked: untrusted source')
              }
            } catch (error) {
              // Invalid URL, remove src
              node.removeAttribute('src')
            }
          }
        }
      },
    },
  })
}

Security Best Practices

1. Always Sanitize on Server

Never trust client-side sanitization alone:

// [ERROR] BAD: Only client-side sanitization
function onSubmit(data: FormValues) {
  await saveDescription(data.description) // Raw user input
}

// [OK] GOOD: Server-side sanitization
'use server'
export async function saveDescription(description: string) {
  const sanitized = sanitizeMarkdown(description)
  await db.save(sanitized)
}

2. Validate Input Length

const MAX_MARKDOWN_LENGTH = 50000 // 50KB

export async function saveDescription(description: string) {
  if (description.length > MAX_MARKDOWN_LENGTH) {
    throw new Error('Description too long')
  }

  const sanitized = sanitizeMarkdown(description)
  await db.save(sanitized)
}

3. Store Both Raw and Sanitized

export async function saveDescription(
  entityId: string,
  description: string
) {
  const sanitized = sanitizeMarkdown(description)

  await db.entity.update({
    where: { id: entityId },
    data: {
      descriptionRaw: description,     // Original for editing
      description: sanitized,           // Sanitized for display
      descriptionUpdatedAt: new Date(),
    },
  })
}

4. Implement Rate Limiting

import { Ratelimit } from '@upstash/ratelimit'
import { Redis } from '@upstash/redis'

const ratelimit = new Ratelimit({
  redis: Redis.fromEnv(),
  limiter: Ratelimit.slidingWindow(10, '1 m'), // 10 requests per minute
})

export async function saveDescription(
  userId: string,
  description: string
) {
  const { success } = await ratelimit.limit(userId)

  if (!success) {
    throw new Error('Rate limit exceeded')
  }

  const sanitized = sanitizeMarkdown(description)
  await db.save(sanitized)
}

5. Log Suspicious Content

export function sanitizeAndLog(
  markdown: string,
  userId: string
): string {
  const original = markdown
  const sanitized = DOMPurify.sanitize(markdown)

  // If content was modified, it contained potentially malicious code
  if (original !== sanitized) {
    console.warn('Suspicious content detected:', {
      userId,
      timestamp: new Date(),
      removed: original.length - sanitized.length,
    })

    // Optionally store for security review
    logSecurityEvent({
      type: 'SUSPICIOUS_CONTENT',
      userId,
      original: original.substring(0, 1000), // First 1KB
      sanitized: sanitized.substring(0, 1000),
    })
  }

  return sanitized
}

Testing Sanitization

Unit Tests

import { describe, it, expect } from 'vitest'
import { sanitizeMarkdown } from './sanitize-markdown'

describe('sanitizeMarkdown', () => {
  it('removes script tags', () => {
    const input = '<script>alert("XSS")</script>Hello'
    const output = sanitizeMarkdown(input)
    expect(output).not.toContain('<script>')
    expect(output).toContain('Hello')
  })

  it('removes event handlers', () => {
    const input = '<img src="x" onerror="alert(1)">'
    const output = sanitizeMarkdown(input)
    expect(output).not.toContain('onerror')
  })

  it('allows safe HTML', () => {
    const input = '<p><strong>Bold</strong> and <em>italic</em></p>'
    const output = sanitizeMarkdown(input)
    expect(output).toContain('<strong>')
    expect(output).toContain('<em>')
  })

  it('sanitizes links', () => {
    const input = '<a href="javascript:alert(1)">Click</a>'
    const output = sanitizeMarkdown(input)
    expect(output).not.toContain('javascript:')
  })

  it('handles nested tags', () => {
    const input = '<div><script>alert(1)</script><p>Safe</p></div>'
    const output = sanitizeMarkdown(input)
    expect(output).not.toContain('<script>')
    expect(output).toContain('Safe')
  })
})

Integration Tests

import { test, expect } from '@playwright/test'

test.describe('Markdown Editor Security', () => {
  test('prevents XSS through markdown', async ({ page }) => {
    await page.goto('/editor')

    const xssPayload = '<script>window.xssExecuted = true</script>Hello'

    await page.fill('[role="textbox"]', xssPayload)
    await page.click('button:has-text("Save")')

    await page.waitForTimeout(1000)

    // Check that script did not execute
    const xssExecuted = await page.evaluate(() => {
      return (window as any).xssExecuted
    })
    expect(xssExecuted).toBeUndefined()

    // Check that content was sanitized
    const preview = await page.locator('.preview').textContent()
    expect(preview).toContain('Hello')
    expect(preview).not.toContain('<script>')
  })
})

Common Attack Vectors

Script Injection

<!-- Attack -->
<script>alert('XSS')</script>
<img src=x onerror="alert('XSS')">
<svg onload="alert('XSS')">

<!-- Sanitized -->
<!-- Scripts and event handlers removed -->

Link-based Attacks

<!-- Attack -->
<a href="javascript:alert('XSS')">Click</a>
<a href="data:text/html,<script>alert('XSS')</script>">Click</a>

<!-- Sanitized -->
<a>Click</a> <!-- href removed -->

Iframe Injection

<!-- Attack -->
<iframe src="https://malicious.com"></iframe>

<!-- Sanitized -->
<!-- iframe tag removed completely -->

Sanitization Checklist

Client-side sanitization with rehype-sanitize configured
Server-side sanitization with DOMPurify
Whitelist approach (allowed tags/attributes)
Protocol restrictions (https only)
Input length validation
Rate limiting on save endpoints
Logging of sanitized content
Both raw and sanitized versions stored
External links open in new tab with rel="noopener noreferrer"
Image sources validated against trusted domains
Unit tests for XSS prevention
Integration tests with malicious payloads
Security headers configured (CSP)
Regular dependency updates for security patches

14 KiB Raw Blame History

Markdown Sanitization Security Guide

Why Sanitization is Critical

Client-Side Sanitization

Using rehype-sanitize

Custom Sanitization Schema

Strict Sanitization (No HTML)

Server-Side Sanitization

Using DOMPurify

Server Action with Sanitization

API Route with Sanitization

Advanced Sanitization Patterns

Markdown-to-HTML with Sanitization

Link Validation

Image Source Validation

Security Best Practices

1. Always Sanitize on Server

2. Validate Input Length

3. Store Both Raw and Sanitized

4. Implement Rate Limiting

5. Log Suspicious Content

Testing Sanitization

Unit Tests

Integration Tests

Common Attack Vectors

Script Injection

Link-based Attacks

Iframe Injection

Sanitization Checklist

14 KiB

Raw Blame History