From ed84e15471903bfe35e0c0ab952ddf9f7a16f36d Mon Sep 17 00:00:00 2001 From: Zhongwei Li Date: Sat, 29 Nov 2025 18:18:23 +0800 Subject: [PATCH] Initial commit --- .claude-plugin/plugin.json | 12 + README.md | 3 + plugin.lock.json | 52 +++ skills/markdown-tools/SKILL.md | 146 ++++++++ .../references/conversion-examples.md | 346 ++++++++++++++++++ skills/markdown-tools/scripts/convert_path.py | 61 +++ 6 files changed, 620 insertions(+) create mode 100644 .claude-plugin/plugin.json create mode 100644 README.md create mode 100644 plugin.lock.json create mode 100644 skills/markdown-tools/SKILL.md create mode 100644 skills/markdown-tools/references/conversion-examples.md create mode 100644 skills/markdown-tools/scripts/convert_path.py diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..fe80335 --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,12 @@ +{ + "name": "markdown-tools", + "description": "Convert documents (PDFs, Word, PowerPoint, Confluence exports) to markdown with Windows/WSL path handling support", + "version": "0.0.0-2025.11.28", + "author": { + "name": "daymade", + "email": "daymadev89@gmail.com" + }, + "skills": [ + "./skills/markdown-tools" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..cada0cc --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# markdown-tools + +Convert documents (PDFs, Word, PowerPoint, Confluence exports) to markdown with Windows/WSL path handling support diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..85d4604 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,52 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:daymade/claude-code-skills:markdown-tools", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "62cf537b7bdb5ed19d740e664111a5912fbdfa95", + "treeHash": "17aa71a1268617ab036dc7319909a3154d7456e0f0f527921cdcc271c71a11fb", + "generatedAt": "2025-11-28T10:16:11.418407Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "markdown-tools", + "description": "Convert documents (PDFs, Word, PowerPoint, Confluence exports) to markdown with Windows/WSL path handling support" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "17f4045f916f7ad75a0224052fe5392b994d2779af4d7062989feedd7b7d0d5e" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "ae25054ec70038a07c517c230298edd2ab7e4ab5d69cacab5100ba9976f11971" + }, + { + "path": "skills/markdown-tools/SKILL.md", + "sha256": "e6e9aa2e4f75b4824d8ab664cbe004e4b3ec6849db43cef5b858940633d0fb4f" + }, + { + "path": "skills/markdown-tools/references/conversion-examples.md", + "sha256": "184b501c4e277f7df9db9fc09df8f0d1190c12071730ccd2d16d8fbcbffdaf71" + }, + { + "path": "skills/markdown-tools/scripts/convert_path.py", + "sha256": "09e22a4b8cce524c1f9b496e16378bde7db58b4825cf3a7599a4b2b4c5f9a853" + } + ], + "dirSha256": "17aa71a1268617ab036dc7319909a3154d7456e0f0f527921cdcc271c71a11fb" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/markdown-tools/SKILL.md b/skills/markdown-tools/SKILL.md new file mode 100644 index 0000000..1cbbe37 --- /dev/null +++ b/skills/markdown-tools/SKILL.md @@ -0,0 +1,146 @@ +--- +name: markdown-tools +description: Converts documents to markdown (PDFs, Word docs, PowerPoint, Confluence exports) with Windows/WSL path handling. Activates when converting .doc/.docx/PDF/PPTX files to markdown, processing Confluence exports, handling Windows/WSL path conversions, or working with markitdown utility. +--- + +# Markdown Tools + +## Overview + +This skill provides document conversion to markdown with Windows/WSL path handling support. It helps convert various document formats to markdown and handles path conversions between Windows and WSL environments. + +## Core Capabilities + +### 1. Markdown Conversion +Convert documents to markdown format with automatic Windows/WSL path handling. + +### 2. Confluence Export Processing +Handle Confluence .doc exports with special characters for knowledge base integration. + +## Quick Start + +### Convert Any Document to Markdown + +```bash +# Basic conversion +markitdown "path/to/document.pdf" > output.md + +# WSL path example +markitdown "/mnt/c/Users/username/Documents/file.docx" > output.md +``` + +See `references/conversion-examples.md` for detailed examples of various conversion scenarios. + +### Convert Confluence Export + +```bash +# Direct conversion for simple exports +markitdown "confluence-export.doc" > output.md + +# For exports with special characters, see references/ +``` + +## Path Conversion + +### Windows to WSL Path Format + +Windows paths must be converted to WSL format before use in bash commands. + +**Conversion rules:** +- Replace `C:\` with `/mnt/c/` +- Replace `\` with `/` +- Preserve spaces and special characters +- Use quotes for paths with spaces + +**Example conversions:** +```bash +# Windows path +C:\Users\username\Documents\file.doc + +# WSL path +/mnt/c/Users/username/Documents/file.doc +``` + +**Helper script:** Use `scripts/convert_path.py` to automate conversion: + +```bash +python scripts/convert_path.py "C:\Users\username\Downloads\document.doc" +``` + +See `references/conversion-examples.md` for detailed path conversion examples. + +## Document Conversion Workflows + +### Workflow 1: Simple Markdown Conversion + +For straightforward document conversions (PDF, .docx without special characters): + +1. Convert Windows path to WSL format (if needed) +2. Run markitdown +3. Redirect output to .md file + +See `references/conversion-examples.md` for detailed examples. + +### Workflow 2: Confluence Export with Special Characters + +For Confluence .doc exports that contain special characters or complex formatting: + +1. Save .doc file to accessible location +2. Use appropriate conversion method (see references) +3. Verify output formatting + +See `references/conversion-examples.md` for step-by-step command examples. + +## Error Handling + +### Common Issues and Solutions + +**markitdown not found:** +```bash +# Install markitdown via pip +pip install markitdown + +# Or via uv tools +uv tool install markitdown +``` + +**Path not found:** +```bash +# Verify path exists +ls -la "/mnt/c/Users/username/Documents/file.doc" + +# Use convert_path.py helper +python scripts/convert_path.py "C:\Users\username\Documents\file.doc" +``` + +**Encoding issues:** +- Ensure files are UTF-8 encoded +- Check for special characters in filenames +- Use quotes around paths with spaces + +## Resources + +### references/conversion-examples.md +Comprehensive examples for all conversion scenarios including: +- Simple document conversions (PDF, Word, PowerPoint) +- Confluence export handling +- Path conversion examples for Windows/WSL +- Batch conversion operations +- Error recovery and troubleshooting examples + +Load this reference when users need specific command examples or encounter conversion issues. + +### scripts/convert_path.py +Python script to automate Windows to WSL path conversion. Handles: +- Drive letter conversion (C:\ → /mnt/c/) +- Backslash to forward slash +- Special characters and spaces + +## Best Practices + +1. **Convert Windows paths to WSL format** before bash operations +2. **Verify paths exist** before operations using ls or test commands +3. **Check output quality** after conversion +4. **Use markitdown directly** for simple conversions +5. **Test incrementally** - Verify each conversion step before proceeding +6. **Preserve directory structure** when doing batch conversions diff --git a/skills/markdown-tools/references/conversion-examples.md b/skills/markdown-tools/references/conversion-examples.md new file mode 100644 index 0000000..d340ce5 --- /dev/null +++ b/skills/markdown-tools/references/conversion-examples.md @@ -0,0 +1,346 @@ +# Document Conversion Examples + +Comprehensive examples for converting various document formats to markdown. + +## Basic Document Conversions + +### PDF to Markdown + +```bash +# Simple PDF conversion +markitdown "document.pdf" > output.md + +# WSL path example +markitdown "/mnt/c/Users/username/Documents/report.pdf" > report.md + +# With explicit output +markitdown "slides.pdf" > "slides.md" +``` + +### Word Documents to Markdown + +```bash +# Modern Word document (.docx) +markitdown "document.docx" > output.md + +# Legacy Word document (.doc) +markitdown "legacy-doc.doc" > output.md + +# Preserve directory structure +markitdown "/path/to/docs/file.docx" > "/path/to/output/file.md" +``` + +### PowerPoint to Markdown + +```bash +# Convert presentation +markitdown "presentation.pptx" > slides.md + +# WSL path +markitdown "/mnt/c/Users/username/Desktop/slides.pptx" > slides.md +``` + +--- + +## Windows/WSL Path Conversion + +### Basic Path Conversion Rules + +```bash +# Windows path +C:\Users\username\Documents\file.doc + +# WSL equivalent +/mnt/c/Users/username/Documents/file.doc +``` + +### Conversion Examples + +```bash +# Single backslash to forward slash +C:\folder\file.txt +→ /mnt/c/folder/file.txt + +# Path with spaces (must use quotes) +C:\Users\John Doe\Documents\report.pdf +→ "/mnt/c/Users/John Doe/Documents/report.pdf" + +# OneDrive path +C:\Users\username\OneDrive\Documents\file.doc +→ "/mnt/c/Users/username/OneDrive/Documents/file.doc" + +# Different drive letters +D:\Projects\document.docx +→ /mnt/d/Projects/document.docx +``` + +### Using convert_path.py Helper + +```bash +# Automatic conversion +python scripts/convert_path.py "C:\Users\username\Downloads\document.doc" +# Output: /mnt/c/Users/username/Downloads/document.doc + +# Use in conversion command +wsl_path=$(python scripts/convert_path.py "C:\Users\username\file.docx") +markitdown "$wsl_path" > output.md +``` + +--- + +## Batch Conversions + +### Convert Multiple Files + +```bash +# Convert all PDFs in a directory +for pdf in /path/to/pdfs/*.pdf; do + filename=$(basename "$pdf" .pdf) + markitdown "$pdf" > "/path/to/output/${filename}.md" +done + +# Convert all Word documents +for doc in /path/to/docs/*.docx; do + filename=$(basename "$doc" .docx) + markitdown "$doc" > "/path/to/output/${filename}.md" +done +``` + +### Batch Conversion with Path Conversion + +```bash +# Windows batch (PowerShell) +Get-ChildItem "C:\Documents\*.pdf" | ForEach-Object { + $wslPath = "/mnt/c/Documents/$($_.Name)" + $outFile = "/mnt/c/Output/$($_.BaseName).md" + wsl markitdown $wslPath > $outFile +} +``` + +--- + +## Confluence Export Handling + +### Simple Confluence Export + +```bash +# Direct conversion for exports without special characters +markitdown "confluence-export.doc" > output.md +``` + +### Export with Special Characters + +For Confluence exports containing special characters: + +1. Save the .doc file to an accessible location +2. Try direct conversion first: + ```bash + markitdown "confluence-export.doc" > output.md + ``` + +3. If special characters cause issues: + - Open in Word and save as .docx + - Or use LibreOffice to convert: `libreoffice --headless --convert-to docx export.doc` + - Then convert the .docx file + +### Handling Encoding Issues + +```bash +# Check file encoding +file -i "document.doc" + +# Convert if needed (using iconv) +iconv -f ISO-8859-1 -t UTF-8 input.md > output.md +``` + +--- + +## Advanced Conversion Scenarios + +### Preserving Directory Structure + +```bash +# Mirror directory structure +src_dir="/mnt/c/Users/username/Documents" +out_dir="/path/to/output" + +find "$src_dir" -name "*.docx" | while read file; do + # Get relative path + rel_path="${file#$src_dir/}" + out_file="$out_dir/${rel_path%.docx}.md" + + # Create output directory + mkdir -p "$(dirname "$out_file")" + + # Convert + markitdown "$file" > "$out_file" +done +``` + +### Conversion with Metadata + +```bash +# Add frontmatter to converted file +{ + echo "---" + echo "title: $(basename "$file" .pdf)" + echo "converted: $(date -I)" + echo "source: $file" + echo "---" + echo "" + markitdown "$file" +} > output.md +``` + +--- + +## Error Recovery + +### Handling Failed Conversions + +```bash +# Check if markitdown succeeded +if markitdown "document.pdf" > output.md 2> error.log; then + echo "Conversion successful" +else + echo "Conversion failed, check error.log" +fi +``` + +### Retry Logic + +```bash +# Retry failed conversions +for file in *.pdf; do + output="${file%.pdf}.md" + if ! [ -f "$output" ]; then + echo "Converting $file..." + markitdown "$file" > "$output" || echo "Failed: $file" >> failed.txt + fi +done +``` + +--- + +## Quality Verification + +### Check Conversion Quality + +```bash +# Compare line counts +wc -l document.pdf.md + +# Check for common issues +grep "TODO\|ERROR\|MISSING" output.md + +# Preview first/last lines +head -n 20 output.md +tail -n 20 output.md +``` + +### Validate Output + +```bash +# Check for empty files +if [ ! -s output.md ]; then + echo "Warning: Output file is empty" +fi + +# Verify markdown syntax +# Use a markdown linter if available +markdownlint output.md +``` + +--- + +## Best Practices + +### 1. Path Handling +- Always quote paths with spaces +- Verify paths exist before conversion +- Use absolute paths for scripts + +### 2. Batch Processing +- Log conversions for audit trail +- Handle errors gracefully +- Preserve original files + +### 3. Output Organization +- Mirror source directory structure +- Use consistent naming conventions +- Separate by document type or date + +### 4. Quality Assurance +- Spot-check random conversions +- Validate critical documents manually +- Keep conversion logs + +### 5. Performance +- Use parallel processing for large batches +- Skip already converted files +- Clean up temporary files + +--- + +## Common Patterns + +### Pattern: Convert and Review + +```bash +#!/bin/bash +file="$1" +output="${file%.*}.md" + +# Convert +markitdown "$file" > "$output" + +# Open in editor for review +${EDITOR:-vim} "$output" +``` + +### Pattern: Safe Conversion + +```bash +#!/bin/bash +file="$1" +backup="${file}.backup" +output="${file%.*}.md" + +# Backup original +cp "$file" "$backup" + +# Convert with error handling +if markitdown "$file" > "$output" 2> conversion.log; then + echo "Success: $output" + rm "$backup" +else + echo "Failed: Check conversion.log" + mv "$backup" "$file" +fi +``` + +### Pattern: Metadata Preservation + +```bash +#!/bin/bash +# Extract and preserve document metadata + +file="$1" +output="${file%.*}.md" + +# Get file metadata +created=$(stat -c %w "$file" 2>/dev/null || stat -f %SB "$file") +modified=$(stat -c %y "$file" 2>/dev/null || stat -f %Sm "$file") + +# Convert with metadata +{ + echo "---" + echo "original_file: $(basename "$file")" + echo "created: $created" + echo "modified: $modified" + echo "converted: $(date -I)" + echo "---" + echo "" + markitdown "$file" +} > "$output" +``` diff --git a/skills/markdown-tools/scripts/convert_path.py b/skills/markdown-tools/scripts/convert_path.py new file mode 100644 index 0000000..cbc56d9 --- /dev/null +++ b/skills/markdown-tools/scripts/convert_path.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +""" +Convert Windows paths to WSL format. + +Usage: + python convert_path.py "C:\\Users\\username\\Downloads\\file.doc" + +Output: + /mnt/c/Users/username/Downloads/file.doc +""" + +import sys +import re + + +def convert_windows_to_wsl(windows_path: str) -> str: + """ + Convert a Windows path to WSL format. + + Args: + windows_path: Windows path (e.g., "C:\\Users\\username\\file.doc") + + Returns: + WSL path (e.g., "/mnt/c/Users/username/file.doc") + """ + # Remove quotes if present + path = windows_path.strip('"').strip("'") + + # Handle drive letter (C:\ or C:/) + drive_pattern = r'^([A-Za-z]):[\\\/]' + match = re.match(drive_pattern, path) + + if not match: + # Already a WSL path or relative path + return path + + drive_letter = match.group(1).lower() + path_without_drive = path[3:] # Remove "C:\" + + # Replace backslashes with forward slashes + path_without_drive = path_without_drive.replace('\\', '/') + + # Construct WSL path + wsl_path = f"/mnt/{drive_letter}/{path_without_drive}" + + return wsl_path + + +def main(): + if len(sys.argv) < 2: + print("Usage: python convert_path.py ") + print('Example: python convert_path.py "C:\\Users\\username\\Downloads\\file.doc"') + sys.exit(1) + + windows_path = sys.argv[1] + wsl_path = convert_windows_to_wsl(windows_path) + print(wsl_path) + + +if __name__ == "__main__": + main() \ No newline at end of file