#!/usr/bin/env python3 """ HTML转长图工具 将HTML渲染为一张完整的长图(PNG),然后可以转PDF """ import os import sys import time from pathlib import Path def html_to_long_image(html_path: str, output_path: str = None) -> str: """ 将HTML转换为一张完整的长图PNG。 Args: html_path: HTML文件路径 output_path: 输出图片路径(可选) Returns: 生成的图片路径 """ try: from playwright.sync_api import sync_playwright except ImportError: print("正在安装 Playwright...") import subprocess subprocess.check_call([sys.executable, "-m", "pip", "install", "playwright", "-q"]) from playwright.sync_api import sync_playwright if not os.path.exists(html_path): raise FileNotFoundError(f"HTML文件不存在: {html_path}") if output_path is None: html_file = Path(html_path) output_path = str(html_file.parent / f"{html_file.stem}_fullpage.png") print(f"\n📄 转换HTML为完整长图") print(f" 输入: {Path(html_path).name}") print(f" 输出: {Path(output_path).name}\n") with sync_playwright() as p: browser = p.chromium.launch() page = browser.new_page(viewport={'width': 1200, 'height': 800}) # 加载HTML html_path_abs = str(Path(html_path).absolute()) print("⏳ 加载HTML...") page.goto(f'file://{html_path_abs}', wait_until='networkidle') # 禁用所有动画 print("🎨 禁用动画...") page.add_style_tag(content=""" *, *::before, *::after { animation: none !important; transition: none !important; } .section, .cover { opacity: 1 !important; transform: none !important; } """) # 强制显示所有内容 page.evaluate(""" () => { document.querySelectorAll('.section, .cover').forEach(el => { el.style.opacity = '1'; el.style.transform = 'none'; }); } """) # 滚动加载 print("📜 加载所有内容...") total_height = page.evaluate("document.body.scrollHeight") for y in range(0, total_height, 1000): page.evaluate(f"window.scrollTo(0, {y})") time.sleep(0.1) page.evaluate("window.scrollTo(0, 0)") time.sleep(0.5) # 截取完整页面 print("📸 截取完整页面...") page.screenshot(path=output_path, full_page=True) browser.close() size_kb = os.path.getsize(output_path) / 1024 print(f"\n✅ 成功生成长图!") print(f" 大小: {size_kb:.1f} KB\n") return str(output_path) def image_to_pdf(image_path: str, pdf_path: str = None) -> str: """ 将图片转换为PDF。 Args: image_path: 图片路径 pdf_path: PDF输出路径(可选) Returns: 生成的PDF路径 """ try: from PIL import Image except ImportError: print("正在安装 Pillow...") import subprocess subprocess.check_call([sys.executable, "-m", "pip", "install", "Pillow", "-q"]) from PIL import Image if pdf_path is None: image_file = Path(image_path) pdf_path = str(image_file.with_suffix('.pdf')) print(f"📄 转换图片为PDF: {Path(pdf_path).name}") # 打开图片并转换为PDF image = Image.open(image_path) # 转换为RGB(PDF需要) if image.mode != 'RGB': image = image.convert('RGB') # 保存为PDF image.save(pdf_path, 'PDF', resolution=100.0) size_kb = os.path.getsize(pdf_path) / 1024 print(f"✅ PDF生成成功!大小: {size_kb:.1f} KB\n") return str(pdf_path) def main(): default_html = "202510_Alpha_Intelligence_BP.html" html_path = sys.argv[1] if len(sys.argv) > 1 else default_html print("\n" + "=" * 70) print("HTML转完整长图工具 - 无分页断开") print("=" * 70) try: # 生成长图 image_path = html_to_long_image(html_path) # 转换为PDF pdf_path = image_to_pdf(image_path) print(f"💡 打开查看:") print(f" 长图: open {Path(image_path).name}") print(f" PDF: open {Path(pdf_path).name}\n") except Exception as e: print(f"\n❌ 错误: {e}\n") import traceback traceback.print_exc() sys.exit(1) if __name__ == "__main__": main()