Files
gh-yonggao-claude-plugins-s…/html_to_long_image.py
2025-11-30 09:08:11 +08:00

167 lines
4.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
HTML转长图工具
将HTML渲染为一张完整的长图PNG然后可以转PDF
"""
import os
import sys
import time
from pathlib import Path
def html_to_long_image(html_path: str, output_path: str = None) -> str:
"""
将HTML转换为一张完整的长图PNG。
Args:
html_path: HTML文件路径
output_path: 输出图片路径(可选)
Returns:
生成的图片路径
"""
try:
from playwright.sync_api import sync_playwright
except ImportError:
print("正在安装 Playwright...")
import subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "playwright", "-q"])
from playwright.sync_api import sync_playwright
if not os.path.exists(html_path):
raise FileNotFoundError(f"HTML文件不存在: {html_path}")
if output_path is None:
html_file = Path(html_path)
output_path = str(html_file.parent / f"{html_file.stem}_fullpage.png")
print(f"\n📄 转换HTML为完整长图")
print(f" 输入: {Path(html_path).name}")
print(f" 输出: {Path(output_path).name}\n")
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.new_page(viewport={'width': 1200, 'height': 800})
# 加载HTML
html_path_abs = str(Path(html_path).absolute())
print("⏳ 加载HTML...")
page.goto(f'file://{html_path_abs}', wait_until='networkidle')
# 禁用所有动画
print("🎨 禁用动画...")
page.add_style_tag(content="""
*, *::before, *::after {
animation: none !important;
transition: none !important;
}
.section, .cover {
opacity: 1 !important;
transform: none !important;
}
""")
# 强制显示所有内容
page.evaluate("""
() => {
document.querySelectorAll('.section, .cover').forEach(el => {
el.style.opacity = '1';
el.style.transform = 'none';
});
}
""")
# 滚动加载
print("📜 加载所有内容...")
total_height = page.evaluate("document.body.scrollHeight")
for y in range(0, total_height, 1000):
page.evaluate(f"window.scrollTo(0, {y})")
time.sleep(0.1)
page.evaluate("window.scrollTo(0, 0)")
time.sleep(0.5)
# 截取完整页面
print("📸 截取完整页面...")
page.screenshot(path=output_path, full_page=True)
browser.close()
size_kb = os.path.getsize(output_path) / 1024
print(f"\n✅ 成功生成长图!")
print(f" 大小: {size_kb:.1f} KB\n")
return str(output_path)
def image_to_pdf(image_path: str, pdf_path: str = None) -> str:
"""
将图片转换为PDF。
Args:
image_path: 图片路径
pdf_path: PDF输出路径可选
Returns:
生成的PDF路径
"""
try:
from PIL import Image
except ImportError:
print("正在安装 Pillow...")
import subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "Pillow", "-q"])
from PIL import Image
if pdf_path is None:
image_file = Path(image_path)
pdf_path = str(image_file.with_suffix('.pdf'))
print(f"📄 转换图片为PDF: {Path(pdf_path).name}")
# 打开图片并转换为PDF
image = Image.open(image_path)
# 转换为RGBPDF需要
if image.mode != 'RGB':
image = image.convert('RGB')
# 保存为PDF
image.save(pdf_path, 'PDF', resolution=100.0)
size_kb = os.path.getsize(pdf_path) / 1024
print(f"✅ PDF生成成功大小: {size_kb:.1f} KB\n")
return str(pdf_path)
def main():
default_html = "202510_Alpha_Intelligence_BP.html"
html_path = sys.argv[1] if len(sys.argv) > 1 else default_html
print("\n" + "=" * 70)
print("HTML转完整长图工具 - 无分页断开")
print("=" * 70)
try:
# 生成长图
image_path = html_to_long_image(html_path)
# 转换为PDF
pdf_path = image_to_pdf(image_path)
print(f"💡 打开查看:")
print(f" 长图: open {Path(image_path).name}")
print(f" PDF: open {Path(pdf_path).name}\n")
except Exception as e:
print(f"\n❌ 错误: {e}\n")
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
main()