Initial commit

2025-11-30 09:08:11 +08:00
commit e9e441dcb1
8 changed files with 769 additions and 0 deletions
--- a/html_to_long_image.py
+++ b/html_to_long_image.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+"""
+HTML转长图工具
+将HTML渲染为一张完整的长图（PNG），然后可以转PDF
+"""
+
+import os
+import sys
+import time
+from pathlib import Path
+
+
+def html_to_long_image(html_path: str, output_path: str = None) -> str:
+    """
+    将HTML转换为一张完整的长图PNG。
+
+    Args:
+        html_path: HTML文件路径
+        output_path: 输出图片路径（可选）
+
+    Returns:
+        生成的图片路径
+    """
+    try:
+        from playwright.sync_api import sync_playwright
+    except ImportError:
+        print("正在安装 Playwright...")
+        import subprocess
+        subprocess.check_call([sys.executable, "-m", "pip", "install", "playwright", "-q"])
+        from playwright.sync_api import sync_playwright
+
+    if not os.path.exists(html_path):
+        raise FileNotFoundError(f"HTML文件不存在: {html_path}")
+
+    if output_path is None:
+        html_file = Path(html_path)
+        output_path = str(html_file.parent / f"{html_file.stem}_fullpage.png")
+
+    print(f"\n📄 转换HTML为完整长图")
+    print(f"   输入: {Path(html_path).name}")
+    print(f"   输出: {Path(output_path).name}\n")
+
+    with sync_playwright() as p:
+        browser = p.chromium.launch()
+        page = browser.new_page(viewport={'width': 1200, 'height': 800})
+
+        # 加载HTML
+        html_path_abs = str(Path(html_path).absolute())
+        print("⏳ 加载HTML...")
+        page.goto(f'file://{html_path_abs}', wait_until='networkidle')
+
+        # 禁用所有动画
+        print("🎨 禁用动画...")
+        page.add_style_tag(content="""
+            *, *::before, *::after {
+                animation: none !important;
+                transition: none !important;
+            }
+            .section, .cover {
+                opacity: 1 !important;
+                transform: none !important;
+            }
+        """)
+
+        # 强制显示所有内容
+        page.evaluate("""
+            () => {
+                document.querySelectorAll('.section, .cover').forEach(el => {
+                    el.style.opacity = '1';
+                    el.style.transform = 'none';
+                });
+            }
+        """)
+
+        # 滚动加载
+        print("📜 加载所有内容...")
+        total_height = page.evaluate("document.body.scrollHeight")
+        for y in range(0, total_height, 1000):
+            page.evaluate(f"window.scrollTo(0, {y})")
+            time.sleep(0.1)
+
+        page.evaluate("window.scrollTo(0, 0)")
+        time.sleep(0.5)
+
+        # 截取完整页面
+        print("📸 截取完整页面...")
+        page.screenshot(path=output_path, full_page=True)
+
+        browser.close()
+
+    size_kb = os.path.getsize(output_path) / 1024
+    print(f"\n✅ 成功生成长图！")
+    print(f"   大小: {size_kb:.1f} KB\n")
+
+    return str(output_path)
+
+
+def image_to_pdf(image_path: str, pdf_path: str = None) -> str:
+    """
+    将图片转换为PDF。
+
+    Args:
+        image_path: 图片路径
+        pdf_path: PDF输出路径（可选）
+
+    Returns:
+        生成的PDF路径
+    """
+    try:
+        from PIL import Image
+    except ImportError:
+        print("正在安装 Pillow...")
+        import subprocess
+        subprocess.check_call([sys.executable, "-m", "pip", "install", "Pillow", "-q"])
+        from PIL import Image
+
+    if pdf_path is None:
+        image_file = Path(image_path)
+        pdf_path = str(image_file.with_suffix('.pdf'))
+
+    print(f"📄 转换图片为PDF: {Path(pdf_path).name}")
+
+    # 打开图片并转换为PDF
+    image = Image.open(image_path)
+
+    # 转换为RGB（PDF需要）
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+
+    # 保存为PDF
+    image.save(pdf_path, 'PDF', resolution=100.0)
+
+    size_kb = os.path.getsize(pdf_path) / 1024
+    print(f"✅ PDF生成成功！大小: {size_kb:.1f} KB\n")
+
+    return str(pdf_path)
+
+
+def main():
+    default_html = "202510_Alpha_Intelligence_BP.html"
+    html_path = sys.argv[1] if len(sys.argv) > 1 else default_html
+
+    print("\n" + "=" * 70)
+    print("HTML转完整长图工具 - 无分页断开")
+    print("=" * 70)
+
+    try:
+        # 生成长图
+        image_path = html_to_long_image(html_path)
+
+        # 转换为PDF
+        pdf_path = image_to_pdf(image_path)
+
+        print(f"💡 打开查看:")
+        print(f"   长图: open {Path(image_path).name}")
+        print(f"   PDF:  open {Path(pdf_path).name}\n")
+
+    except Exception as e:
+        print(f"\n❌ 错误: {e}\n")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()