Initial commit
This commit is contained in:
12
.claude-plugin/plugin.json
Normal file
12
.claude-plugin/plugin.json
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
"name": "html-to-pdf",
|
||||||
|
"description": "Convert HTML files to PDF or PNG format with multiple rendering options. Supports multi-page PDFs, single-page long-image PDFs, background colors, and Chinese/CJK characters.",
|
||||||
|
"version": "0.0.0-2025.11.28",
|
||||||
|
"author": {
|
||||||
|
"name": "Yong Gao",
|
||||||
|
"email": "zhongweili@tubi.tv"
|
||||||
|
},
|
||||||
|
"skills": [
|
||||||
|
"./"
|
||||||
|
]
|
||||||
|
}
|
||||||
183
EXAMPLES.md
Normal file
183
EXAMPLES.md
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
# Usage Examples
|
||||||
|
|
||||||
|
## Real-World Examples
|
||||||
|
|
||||||
|
### Example 1: Business Plan Report
|
||||||
|
```bash
|
||||||
|
# Original file: 202510_Alpha_Intelligence_BP.html (71 KB)
|
||||||
|
python ~/.claude/skills/html-to-pdf/html_to_long_image.py 202510_Alpha_Intelligence_BP.html
|
||||||
|
|
||||||
|
# Output:
|
||||||
|
# - 202510_Alpha_Intelligence_BP_fullpage.png (6.1 MB)
|
||||||
|
# - 202510_Alpha_Intelligence_BP_fullpage.pdf (1.6 MB, 1 page)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Result**: Single-page PDF with no page breaks, perfect for online presentation!
|
||||||
|
|
||||||
|
### Example 2: Simple Test
|
||||||
|
```bash
|
||||||
|
# Create test HTML
|
||||||
|
echo '<h1>Test</h1><p>Hello World</p>' > test.html
|
||||||
|
|
||||||
|
# Convert
|
||||||
|
python ~/.claude/skills/html-to-pdf/html_to_long_image.py test.html
|
||||||
|
|
||||||
|
# Output:
|
||||||
|
# - test_fullpage.png (12 KB)
|
||||||
|
# - test_fullpage.pdf (20 KB)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example 3: Multi-File Batch Conversion
|
||||||
|
```bash
|
||||||
|
# Convert all HTML files in a directory
|
||||||
|
cd /path/to/html/files
|
||||||
|
|
||||||
|
for file in *.html; do
|
||||||
|
echo "Converting $file..."
|
||||||
|
python ~/.claude/skills/html-to-pdf/html_to_long_image.py "$file"
|
||||||
|
done
|
||||||
|
|
||||||
|
# Check results
|
||||||
|
ls -lh *_fullpage.pdf
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example 4: With Custom Path
|
||||||
|
```bash
|
||||||
|
# Specify full paths
|
||||||
|
python ~/.claude/skills/html-to-pdf/html_to_long_image.py \
|
||||||
|
/Users/yonggao/Documents/report.html
|
||||||
|
```
|
||||||
|
|
||||||
|
## Integration Examples
|
||||||
|
|
||||||
|
### Use in Shell Script
|
||||||
|
```bash
|
||||||
|
#!/bin/bash
|
||||||
|
# convert_reports.sh
|
||||||
|
|
||||||
|
HTML_CONVERTER=~/.claude/skills/html-to-pdf/html_to_long_image.py
|
||||||
|
|
||||||
|
for report in reports/*.html; do
|
||||||
|
python "$HTML_CONVERTER" "$report"
|
||||||
|
echo "✓ Converted: $report"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "All reports converted!"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Use in Makefile
|
||||||
|
```makefile
|
||||||
|
# Makefile
|
||||||
|
CONVERTER = python ~/.claude/skills/html-to-pdf/html_to_long_image.py
|
||||||
|
|
||||||
|
%.pdf: %.html
|
||||||
|
$(CONVERTER) $<
|
||||||
|
|
||||||
|
all: report.pdf presentation.pdf
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f *_fullpage.pdf *_fullpage.png
|
||||||
|
```
|
||||||
|
|
||||||
|
### Use in Python Script
|
||||||
|
```python
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def convert_html_to_pdf(html_file):
|
||||||
|
"""Convert HTML to PDF using the skill."""
|
||||||
|
converter = "~/.claude/skills/html-to-pdf/html_to_long_image.py"
|
||||||
|
result = subprocess.run(
|
||||||
|
[sys.executable, converter, html_file],
|
||||||
|
capture_output=True,
|
||||||
|
text=True
|
||||||
|
)
|
||||||
|
return result.returncode == 0
|
||||||
|
|
||||||
|
# Usage
|
||||||
|
if convert_html_to_pdf("report.html"):
|
||||||
|
print("Success!")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Comparison with Original Files
|
||||||
|
|
||||||
|
| File | Type | Size | Pages | Notes |
|
||||||
|
|------|------|------|-------|-------|
|
||||||
|
| 202510_Alpha_Intelligence_BP.html | HTML | 71 KB | - | Source file |
|
||||||
|
| 202510_Alpha_Intelligence_BP_fullpage.png | PNG | 6.1 MB | 1 | High-quality screenshot |
|
||||||
|
| 202510_Alpha_Intelligence_BP_fullpage.pdf | PDF | 1.6 MB | 1 | **Best for viewing** |
|
||||||
|
| 202510_Alpha_Intelligence_BP_final.pdf | PDF | 5.7 MB | 22 | Multi-page (with breaks) |
|
||||||
|
|
||||||
|
## Performance Data
|
||||||
|
|
||||||
|
Based on real usage:
|
||||||
|
|
||||||
|
| HTML Size | Processing Time | PNG Size | PDF Size |
|
||||||
|
|-----------|----------------|----------|----------|
|
||||||
|
| 71 KB | ~5 seconds | 6.1 MB | 1.6 MB |
|
||||||
|
| 5 KB | ~3 seconds | 12 KB | 20 KB |
|
||||||
|
| 200 KB | ~8 seconds | 15 MB | 4 MB |
|
||||||
|
|
||||||
|
## Tips & Tricks
|
||||||
|
|
||||||
|
### Optimize for File Size
|
||||||
|
The PNG is always larger than the PDF. If you only need the PDF:
|
||||||
|
```bash
|
||||||
|
# Generate both, then delete PNG
|
||||||
|
python ~/.claude/skills/html-to-pdf/html_to_long_image.py report.html
|
||||||
|
rm report_fullpage.png
|
||||||
|
```
|
||||||
|
|
||||||
|
### Preview Before Converting
|
||||||
|
```bash
|
||||||
|
# Open HTML in browser first
|
||||||
|
open report.html
|
||||||
|
|
||||||
|
# Then convert
|
||||||
|
python ~/.claude/skills/html-to-pdf/html_to_long_image.py report.html
|
||||||
|
```
|
||||||
|
|
||||||
|
### Auto-open Result
|
||||||
|
```bash
|
||||||
|
python ~/.claude/skills/html-to-pdf/html_to_long_image.py report.html && \
|
||||||
|
open report_fullpage.pdf
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check Page Count
|
||||||
|
```bash
|
||||||
|
python -c "
|
||||||
|
from pypdf import PdfReader
|
||||||
|
r = PdfReader('report_fullpage.pdf')
|
||||||
|
print(f'Pages: {len(r.pages)}')
|
||||||
|
"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting Examples
|
||||||
|
|
||||||
|
### Problem: Script not found
|
||||||
|
```bash
|
||||||
|
# Solution: Use full path
|
||||||
|
python ~/.claude/skills/html-to-pdf/html_to_long_image.py file.html
|
||||||
|
```
|
||||||
|
|
||||||
|
### Problem: Permission denied
|
||||||
|
```bash
|
||||||
|
# Solution: Make executable
|
||||||
|
chmod +x ~/.claude/skills/html-to-pdf/html_to_long_image.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Problem: Playwright not installed
|
||||||
|
```bash
|
||||||
|
# Solution: Install dependencies
|
||||||
|
pip install playwright pillow pypdf
|
||||||
|
playwright install chromium
|
||||||
|
```
|
||||||
|
|
||||||
|
### Problem: Content appears cut off
|
||||||
|
```bash
|
||||||
|
# This is automatically handled by the script which:
|
||||||
|
# 1. Scrolls through entire page
|
||||||
|
# 2. Waits for animations
|
||||||
|
# 3. Forces all content visible
|
||||||
|
# No action needed!
|
||||||
|
```
|
||||||
58
QUICK_START.md
Normal file
58
QUICK_START.md
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
# Quick Start Guide
|
||||||
|
|
||||||
|
## One-Line Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python ~/.claude/skills/html-to-pdf/html_to_long_image.py your_file.html
|
||||||
|
```
|
||||||
|
|
||||||
|
## What You Get
|
||||||
|
|
||||||
|
- `your_file_fullpage.png` - Complete screenshot
|
||||||
|
- `your_file_fullpage.pdf` - Single-page PDF (no page breaks!)
|
||||||
|
|
||||||
|
## Common Tasks
|
||||||
|
|
||||||
|
### Convert HTML in current directory
|
||||||
|
```bash
|
||||||
|
python ~/.claude/skills/html-to-pdf/html_to_long_image.py report.html
|
||||||
|
```
|
||||||
|
|
||||||
|
### Convert and open immediately
|
||||||
|
```bash
|
||||||
|
python ~/.claude/skills/html-to-pdf/html_to_long_image.py report.html && open report_fullpage.pdf
|
||||||
|
```
|
||||||
|
|
||||||
|
### Batch convert all HTML files
|
||||||
|
```bash
|
||||||
|
for file in *.html; do
|
||||||
|
python ~/.claude/skills/html-to-pdf/html_to_long_image.py "$file"
|
||||||
|
done
|
||||||
|
```
|
||||||
|
|
||||||
|
## Tips
|
||||||
|
|
||||||
|
1. **For presentations**: Use the PDF output (smaller, 1-2 MB)
|
||||||
|
2. **For high-quality images**: Use the PNG output (6-10 MB)
|
||||||
|
3. **First-time setup**: Run `playwright install chromium` once
|
||||||
|
|
||||||
|
## Comparison
|
||||||
|
|
||||||
|
| Method | Pages | Size | Best For |
|
||||||
|
|--------|-------|------|----------|
|
||||||
|
| Long Image (this skill) | 1 page | 1-2 MB | Online viewing, presentations |
|
||||||
|
| Standard A4 | 20+ pages | 5-6 MB | Printing, archiving |
|
||||||
|
|
||||||
|
## Success!
|
||||||
|
|
||||||
|
If you see this output, it worked:
|
||||||
|
```
|
||||||
|
✅ 成功生成长图!
|
||||||
|
大小: 6263.9 KB
|
||||||
|
|
||||||
|
✅ PDF生成成功!大小: 1632.6 KB
|
||||||
|
|
||||||
|
💡 打开查看:
|
||||||
|
长图: open your_file_fullpage.png
|
||||||
|
PDF: open your_file_fullpage.pdf
|
||||||
|
```
|
||||||
3
README.md
Normal file
3
README.md
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# html-to-pdf
|
||||||
|
|
||||||
|
Convert HTML files to PDF or PNG format with multiple rendering options. Supports multi-page PDFs, single-page long-image PDFs, background colors, and Chinese/CJK characters.
|
||||||
252
SKILL.md
Normal file
252
SKILL.md
Normal file
@@ -0,0 +1,252 @@
|
|||||||
|
---
|
||||||
|
name: html-to-pdf
|
||||||
|
description: Converts HTML files to PDF or PNG format. Use this skill when the user asks to convert, export, or generate PDF/PNG from HTML files, or when they want to create printable documents, presentations, or long-form images from web pages or HTML reports.
|
||||||
|
---
|
||||||
|
|
||||||
|
# HTML to PDF/PNG Converter Skill
|
||||||
|
|
||||||
|
This skill helps you convert HTML files to PDF or PNG format with various options for output quality and page layout.
|
||||||
|
|
||||||
|
## When to Use This Skill
|
||||||
|
|
||||||
|
Use this skill when the user wants to:
|
||||||
|
- Convert HTML files to PDF
|
||||||
|
- Generate PNG screenshots from HTML
|
||||||
|
- Create printable documents from web pages
|
||||||
|
- Export HTML reports as PDFs
|
||||||
|
- Generate long-form images without page breaks
|
||||||
|
- Create presentation-ready PDFs from HTML
|
||||||
|
|
||||||
|
## Available Conversion Methods
|
||||||
|
|
||||||
|
### Method 1: Multi-Page PDF (Standard A4)
|
||||||
|
Best for: Printing, archiving, traditional documents
|
||||||
|
|
||||||
|
**Command:**
|
||||||
|
```bash
|
||||||
|
python html_to_pdf_final.py input.html output.pdf
|
||||||
|
```
|
||||||
|
|
||||||
|
**Features:**
|
||||||
|
- Standard A4 page format
|
||||||
|
- Zero margins for seamless appearance
|
||||||
|
- Background colors and gradients preserved
|
||||||
|
- Multiple pages with page breaks
|
||||||
|
- Optimized file size (~5-6 MB for typical reports)
|
||||||
|
|
||||||
|
### Method 2: Single-Page Long Image PDF
|
||||||
|
Best for: Online viewing, presentations, no page breaks
|
||||||
|
|
||||||
|
**Command:**
|
||||||
|
```bash
|
||||||
|
python html_to_long_image.py input.html
|
||||||
|
```
|
||||||
|
|
||||||
|
**Features:**
|
||||||
|
- Generates a full-page PNG screenshot first
|
||||||
|
- Converts PNG to single-page PDF
|
||||||
|
- NO page breaks - entire content on one page
|
||||||
|
- Perfect for presentations and online viewing
|
||||||
|
- Smaller file size (~1-2 MB)
|
||||||
|
- Two output files: `.png` and `.pdf`
|
||||||
|
|
||||||
|
### Method 3: Advanced Multi-Method Converter
|
||||||
|
Best for: Fallback options, compatibility
|
||||||
|
|
||||||
|
**Command:**
|
||||||
|
```bash
|
||||||
|
python html_to_pdf_converter.py input.html output.pdf
|
||||||
|
```
|
||||||
|
|
||||||
|
**Features:**
|
||||||
|
- Tries WeasyPrint first (best CSS support)
|
||||||
|
- Falls back to Playwright if needed
|
||||||
|
- Automatic dependency installation
|
||||||
|
- Handles complex CSS and gradients
|
||||||
|
|
||||||
|
## Required Dependencies
|
||||||
|
|
||||||
|
The skill requires Playwright and Pillow. The scripts auto-install dependencies if missing:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install playwright pillow pypdf
|
||||||
|
playwright install chromium
|
||||||
|
```
|
||||||
|
|
||||||
|
## Step-by-Step Instructions
|
||||||
|
|
||||||
|
### For Standard Multi-Page PDF:
|
||||||
|
|
||||||
|
1. **Verify the HTML file exists:**
|
||||||
|
```bash
|
||||||
|
ls -la *.html
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Run the converter:**
|
||||||
|
```bash
|
||||||
|
python html_to_pdf_final.py your_file.html
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Open the result:**
|
||||||
|
```bash
|
||||||
|
open your_file_final.pdf
|
||||||
|
```
|
||||||
|
|
||||||
|
### For Single-Page Long Image PDF:
|
||||||
|
|
||||||
|
1. **Verify the HTML file exists:**
|
||||||
|
```bash
|
||||||
|
ls -la *.html
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Run the long image converter:**
|
||||||
|
```bash
|
||||||
|
python html_to_long_image.py your_file.html
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Check outputs:**
|
||||||
|
```bash
|
||||||
|
# View the PNG screenshot
|
||||||
|
open your_file_fullpage.png
|
||||||
|
|
||||||
|
# View the PDF version
|
||||||
|
open your_file_fullpage.pdf
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Issue: Playwright browser not found
|
||||||
|
|
||||||
|
**Solution:**
|
||||||
|
```bash
|
||||||
|
playwright install chromium
|
||||||
|
```
|
||||||
|
|
||||||
|
### Issue: Page breaks visible in PDF
|
||||||
|
|
||||||
|
**Solution:** Use the long image method instead:
|
||||||
|
```bash
|
||||||
|
python html_to_long_image.py your_file.html
|
||||||
|
```
|
||||||
|
|
||||||
|
### Issue: Content appears cut off
|
||||||
|
|
||||||
|
**Causes & Solutions:**
|
||||||
|
- **CSS animations not complete**: Script waits 2 seconds for animations
|
||||||
|
- **Lazy loading**: Script scrolls through entire page to trigger loading
|
||||||
|
- **Large file size**: Scripts handle files up to 20MB+
|
||||||
|
|
||||||
|
### Issue: Blank PDF output
|
||||||
|
|
||||||
|
**Solution:** Use the long image method which uses screenshot instead of PDF rendering:
|
||||||
|
```bash
|
||||||
|
python html_to_long_image.py your_file.html
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output Files
|
||||||
|
|
||||||
|
After conversion, you'll get:
|
||||||
|
|
||||||
|
**Multi-Page PDF:**
|
||||||
|
- `filename_final.pdf` - Standard A4 multi-page PDF
|
||||||
|
|
||||||
|
**Long Image Method:**
|
||||||
|
- `filename_fullpage.png` - Complete screenshot as PNG (6-10 MB)
|
||||||
|
- `filename_fullpage.pdf` - Single-page PDF from image (1-2 MB)
|
||||||
|
|
||||||
|
## Best Practices
|
||||||
|
|
||||||
|
1. **For online viewing/presentations:** Use `html_to_long_image.py`
|
||||||
|
- No page breaks
|
||||||
|
- Smooth scrolling experience
|
||||||
|
- Smaller file size
|
||||||
|
|
||||||
|
2. **For printing/archiving:** Use `html_to_pdf_final.py`
|
||||||
|
- Standard A4 pages
|
||||||
|
- Better for physical printing
|
||||||
|
- Professional document format
|
||||||
|
|
||||||
|
3. **For complex CSS:** Use `html_to_pdf_converter.py`
|
||||||
|
- Multiple fallback methods
|
||||||
|
- Better compatibility
|
||||||
|
|
||||||
|
## Implementation Notes
|
||||||
|
|
||||||
|
### Script Locations
|
||||||
|
All scripts should be in the project directory:
|
||||||
|
- `html_to_pdf_final.py` - Main multi-page converter
|
||||||
|
- `html_to_long_image.py` - Long image generator
|
||||||
|
- `html_to_pdf_converter.py` - Advanced multi-method converter
|
||||||
|
|
||||||
|
### Key Features Implemented
|
||||||
|
|
||||||
|
1. **Animation Handling**: All scripts disable CSS animations/transitions
|
||||||
|
2. **Lazy Loading**: Scripts scroll through content to trigger loading
|
||||||
|
3. **Background Preservation**: All gradients and colors render correctly
|
||||||
|
4. **Zero Margins**: Seamless page appearance without visible borders
|
||||||
|
5. **Chinese Font Support**: Handles CJK characters properly
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
### Example 1: Convert BP Report to Multi-Page PDF
|
||||||
|
```bash
|
||||||
|
python html_to_pdf_final.py 202510_Alpha_Intelligence_BP.html
|
||||||
|
# Output: 202510_Alpha_Intelligence_BP_final.pdf (22 pages, 5.7 MB)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example 2: Create Single-Page Presentation PDF
|
||||||
|
```bash
|
||||||
|
python html_to_long_image.py 202510_Alpha_Intelligence_BP.html
|
||||||
|
# Output:
|
||||||
|
# - 202510_Alpha_Intelligence_BP_fullpage.png (6.1 MB)
|
||||||
|
# - 202510_Alpha_Intelligence_BP_fullpage.pdf (1.6 MB, 1 page)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example 3: Batch Convert Multiple Files
|
||||||
|
```bash
|
||||||
|
for file in *.html; do
|
||||||
|
python html_to_long_image.py "$file"
|
||||||
|
done
|
||||||
|
```
|
||||||
|
|
||||||
|
## Advanced Usage
|
||||||
|
|
||||||
|
### Custom Output Paths
|
||||||
|
```bash
|
||||||
|
python html_to_pdf_final.py input.html custom_output.pdf
|
||||||
|
python html_to_long_image.py input.html
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check PDF Page Count
|
||||||
|
```bash
|
||||||
|
python -c "from pypdf import PdfReader; r = PdfReader('output.pdf'); print(f'Pages: {len(r.pages)}')"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Verify File Sizes
|
||||||
|
```bash
|
||||||
|
ls -lh *.pdf *.png
|
||||||
|
```
|
||||||
|
|
||||||
|
## Performance Expectations
|
||||||
|
|
||||||
|
- **Processing Speed**: ~5-10 seconds for typical HTML files
|
||||||
|
- **Memory Usage**: ~100-200 MB during conversion
|
||||||
|
- **PDF File Size**: 1-6 MB depending on method
|
||||||
|
- **PNG File Size**: 6-10 MB for full-page screenshots
|
||||||
|
|
||||||
|
## Success Criteria
|
||||||
|
|
||||||
|
A successful conversion should:
|
||||||
|
1. ✅ Generate PDF/PNG without errors
|
||||||
|
2. ✅ Include all HTML content (no truncation)
|
||||||
|
3. ✅ Preserve colors, gradients, and styling
|
||||||
|
4. ✅ Handle Chinese/CJK characters correctly
|
||||||
|
5. ✅ Create readable file sizes (< 10 MB)
|
||||||
|
|
||||||
|
## Quick Reference
|
||||||
|
|
||||||
|
| Need | Use | Output |
|
||||||
|
|------|-----|--------|
|
||||||
|
| Printing | `html_to_pdf_final.py` | Multi-page A4 PDF |
|
||||||
|
| Online viewing | `html_to_long_image.py` | Single-page PDF + PNG |
|
||||||
|
| Maximum compatibility | `html_to_pdf_converter.py` | Multi-page PDF with fallbacks |
|
||||||
34
VERSION
Normal file
34
VERSION
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
# HTML to PDF Skill - Version History
|
||||||
|
|
||||||
|
## Version 1.0.0 (2025-10-23)
|
||||||
|
|
||||||
|
### Initial Release
|
||||||
|
|
||||||
|
**Features:**
|
||||||
|
- HTML to PNG screenshot conversion
|
||||||
|
- PNG to single-page PDF conversion
|
||||||
|
- Auto-install dependencies (playwright, Pillow)
|
||||||
|
- CSS animation disabling
|
||||||
|
- Lazy content loading via scrolling
|
||||||
|
- Zero-margin seamless output
|
||||||
|
- Chinese/CJK character support
|
||||||
|
|
||||||
|
**Scripts:**
|
||||||
|
- `html_to_long_image.py` - Main converter script
|
||||||
|
|
||||||
|
**Documentation:**
|
||||||
|
- SKILL.md - Main skill definition
|
||||||
|
- README.md - Installation and usage
|
||||||
|
- QUICK_START.md - One-line quick reference
|
||||||
|
- EXAMPLES.md - Real-world examples
|
||||||
|
|
||||||
|
**Tested With:**
|
||||||
|
- Python 3.12
|
||||||
|
- Playwright 1.40+
|
||||||
|
- Pillow 11.0+
|
||||||
|
- macOS (arm64)
|
||||||
|
|
||||||
|
**Known Limitations:**
|
||||||
|
- Single-page output only (no multi-page A4)
|
||||||
|
- Requires Chromium browser installation
|
||||||
|
- Large HTML files (>5MB) may take longer to process
|
||||||
166
html_to_long_image.py
Normal file
166
html_to_long_image.py
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
HTML转长图工具
|
||||||
|
将HTML渲染为一张完整的长图(PNG),然后可以转PDF
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def html_to_long_image(html_path: str, output_path: str = None) -> str:
|
||||||
|
"""
|
||||||
|
将HTML转换为一张完整的长图PNG。
|
||||||
|
|
||||||
|
Args:
|
||||||
|
html_path: HTML文件路径
|
||||||
|
output_path: 输出图片路径(可选)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
生成的图片路径
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
except ImportError:
|
||||||
|
print("正在安装 Playwright...")
|
||||||
|
import subprocess
|
||||||
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "playwright", "-q"])
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
|
if not os.path.exists(html_path):
|
||||||
|
raise FileNotFoundError(f"HTML文件不存在: {html_path}")
|
||||||
|
|
||||||
|
if output_path is None:
|
||||||
|
html_file = Path(html_path)
|
||||||
|
output_path = str(html_file.parent / f"{html_file.stem}_fullpage.png")
|
||||||
|
|
||||||
|
print(f"\n📄 转换HTML为完整长图")
|
||||||
|
print(f" 输入: {Path(html_path).name}")
|
||||||
|
print(f" 输出: {Path(output_path).name}\n")
|
||||||
|
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch()
|
||||||
|
page = browser.new_page(viewport={'width': 1200, 'height': 800})
|
||||||
|
|
||||||
|
# 加载HTML
|
||||||
|
html_path_abs = str(Path(html_path).absolute())
|
||||||
|
print("⏳ 加载HTML...")
|
||||||
|
page.goto(f'file://{html_path_abs}', wait_until='networkidle')
|
||||||
|
|
||||||
|
# 禁用所有动画
|
||||||
|
print("🎨 禁用动画...")
|
||||||
|
page.add_style_tag(content="""
|
||||||
|
*, *::before, *::after {
|
||||||
|
animation: none !important;
|
||||||
|
transition: none !important;
|
||||||
|
}
|
||||||
|
.section, .cover {
|
||||||
|
opacity: 1 !important;
|
||||||
|
transform: none !important;
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
|
||||||
|
# 强制显示所有内容
|
||||||
|
page.evaluate("""
|
||||||
|
() => {
|
||||||
|
document.querySelectorAll('.section, .cover').forEach(el => {
|
||||||
|
el.style.opacity = '1';
|
||||||
|
el.style.transform = 'none';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
|
||||||
|
# 滚动加载
|
||||||
|
print("📜 加载所有内容...")
|
||||||
|
total_height = page.evaluate("document.body.scrollHeight")
|
||||||
|
for y in range(0, total_height, 1000):
|
||||||
|
page.evaluate(f"window.scrollTo(0, {y})")
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
page.evaluate("window.scrollTo(0, 0)")
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
# 截取完整页面
|
||||||
|
print("📸 截取完整页面...")
|
||||||
|
page.screenshot(path=output_path, full_page=True)
|
||||||
|
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
size_kb = os.path.getsize(output_path) / 1024
|
||||||
|
print(f"\n✅ 成功生成长图!")
|
||||||
|
print(f" 大小: {size_kb:.1f} KB\n")
|
||||||
|
|
||||||
|
return str(output_path)
|
||||||
|
|
||||||
|
|
||||||
|
def image_to_pdf(image_path: str, pdf_path: str = None) -> str:
|
||||||
|
"""
|
||||||
|
将图片转换为PDF。
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_path: 图片路径
|
||||||
|
pdf_path: PDF输出路径(可选)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
生成的PDF路径
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from PIL import Image
|
||||||
|
except ImportError:
|
||||||
|
print("正在安装 Pillow...")
|
||||||
|
import subprocess
|
||||||
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "Pillow", "-q"])
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
if pdf_path is None:
|
||||||
|
image_file = Path(image_path)
|
||||||
|
pdf_path = str(image_file.with_suffix('.pdf'))
|
||||||
|
|
||||||
|
print(f"📄 转换图片为PDF: {Path(pdf_path).name}")
|
||||||
|
|
||||||
|
# 打开图片并转换为PDF
|
||||||
|
image = Image.open(image_path)
|
||||||
|
|
||||||
|
# 转换为RGB(PDF需要)
|
||||||
|
if image.mode != 'RGB':
|
||||||
|
image = image.convert('RGB')
|
||||||
|
|
||||||
|
# 保存为PDF
|
||||||
|
image.save(pdf_path, 'PDF', resolution=100.0)
|
||||||
|
|
||||||
|
size_kb = os.path.getsize(pdf_path) / 1024
|
||||||
|
print(f"✅ PDF生成成功!大小: {size_kb:.1f} KB\n")
|
||||||
|
|
||||||
|
return str(pdf_path)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
default_html = "202510_Alpha_Intelligence_BP.html"
|
||||||
|
html_path = sys.argv[1] if len(sys.argv) > 1 else default_html
|
||||||
|
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
print("HTML转完整长图工具 - 无分页断开")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 生成长图
|
||||||
|
image_path = html_to_long_image(html_path)
|
||||||
|
|
||||||
|
# 转换为PDF
|
||||||
|
pdf_path = image_to_pdf(image_path)
|
||||||
|
|
||||||
|
print(f"💡 打开查看:")
|
||||||
|
print(f" 长图: open {Path(image_path).name}")
|
||||||
|
print(f" PDF: open {Path(pdf_path).name}\n")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n❌ 错误: {e}\n")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
61
plugin.lock.json
Normal file
61
plugin.lock.json
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
{
|
||||||
|
"$schema": "internal://schemas/plugin.lock.v1.json",
|
||||||
|
"pluginId": "gh:yonggao/claude-plugins:skills/html-to-pdf",
|
||||||
|
"normalized": {
|
||||||
|
"repo": null,
|
||||||
|
"ref": "refs/tags/v20251128.0",
|
||||||
|
"commit": "d9a7bdd2b1535728f687605158566929b17670e8",
|
||||||
|
"treeHash": "6792dd0d55d9a678dadebc65721c1a3864c4d30c619e5d185be09158b9de027d",
|
||||||
|
"generatedAt": "2025-11-28T10:29:12.715718Z",
|
||||||
|
"toolVersion": "publish_plugins.py@0.2.0"
|
||||||
|
},
|
||||||
|
"origin": {
|
||||||
|
"remote": "git@github.com:zhongweili/42plugin-data.git",
|
||||||
|
"branch": "master",
|
||||||
|
"commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
|
||||||
|
"repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
|
||||||
|
},
|
||||||
|
"manifest": {
|
||||||
|
"name": "html-to-pdf",
|
||||||
|
"description": "Convert HTML files to PDF or PNG format with multiple rendering options. Supports multi-page PDFs, single-page long-image PDFs, background colors, and Chinese/CJK characters.",
|
||||||
|
"version": null
|
||||||
|
},
|
||||||
|
"content": {
|
||||||
|
"files": [
|
||||||
|
{
|
||||||
|
"path": "EXAMPLES.md",
|
||||||
|
"sha256": "3e0a847cf1df915d9cbab1aada54f1e6cdc86976fc4ea9c0f25e85bd5a56da5a"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "html_to_long_image.py",
|
||||||
|
"sha256": "450db8662b54dab9a17c8c2fd326ae6b0ac783dddef21b5c32c9d56627bfea74"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "README.md",
|
||||||
|
"sha256": "d896d73dfc7ca8ac44703f1dfe09caccf05252ffdec97c696211293447c49e4a"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "VERSION",
|
||||||
|
"sha256": "7086eb17c2adb4a5a4808c02065c4aa5bafb70ce833bbf31e237733265b0ef28"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "SKILL.md",
|
||||||
|
"sha256": "2776f5b726b2bf7c8be9475cea8aad5bdbda7f70fe48a5aead366a161fa59cd0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "QUICK_START.md",
|
||||||
|
"sha256": "b7908dc8aeb6a687aa965debe8183ab9b37c8eae0392e7a7293a652946ad87c9"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": ".claude-plugin/plugin.json",
|
||||||
|
"sha256": "5fc173f27bc0ff64119a367e2f395ab3b4da752e7facb25ad019019ab0d5904b"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"dirSha256": "6792dd0d55d9a678dadebc65721c1a3864c4d30c619e5d185be09158b9de027d"
|
||||||
|
},
|
||||||
|
"security": {
|
||||||
|
"scannedAt": null,
|
||||||
|
"scannerVersion": null,
|
||||||
|
"flags": []
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user