167 lines
4.5 KiB
Python
167 lines
4.5 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
HTML转长图工具
|
||
将HTML渲染为一张完整的长图(PNG),然后可以转PDF
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import time
|
||
from pathlib import Path
|
||
|
||
|
||
def html_to_long_image(html_path: str, output_path: str = None) -> str:
|
||
"""
|
||
将HTML转换为一张完整的长图PNG。
|
||
|
||
Args:
|
||
html_path: HTML文件路径
|
||
output_path: 输出图片路径(可选)
|
||
|
||
Returns:
|
||
生成的图片路径
|
||
"""
|
||
try:
|
||
from playwright.sync_api import sync_playwright
|
||
except ImportError:
|
||
print("正在安装 Playwright...")
|
||
import subprocess
|
||
subprocess.check_call([sys.executable, "-m", "pip", "install", "playwright", "-q"])
|
||
from playwright.sync_api import sync_playwright
|
||
|
||
if not os.path.exists(html_path):
|
||
raise FileNotFoundError(f"HTML文件不存在: {html_path}")
|
||
|
||
if output_path is None:
|
||
html_file = Path(html_path)
|
||
output_path = str(html_file.parent / f"{html_file.stem}_fullpage.png")
|
||
|
||
print(f"\n📄 转换HTML为完整长图")
|
||
print(f" 输入: {Path(html_path).name}")
|
||
print(f" 输出: {Path(output_path).name}\n")
|
||
|
||
with sync_playwright() as p:
|
||
browser = p.chromium.launch()
|
||
page = browser.new_page(viewport={'width': 1200, 'height': 800})
|
||
|
||
# 加载HTML
|
||
html_path_abs = str(Path(html_path).absolute())
|
||
print("⏳ 加载HTML...")
|
||
page.goto(f'file://{html_path_abs}', wait_until='networkidle')
|
||
|
||
# 禁用所有动画
|
||
print("🎨 禁用动画...")
|
||
page.add_style_tag(content="""
|
||
*, *::before, *::after {
|
||
animation: none !important;
|
||
transition: none !important;
|
||
}
|
||
.section, .cover {
|
||
opacity: 1 !important;
|
||
transform: none !important;
|
||
}
|
||
""")
|
||
|
||
# 强制显示所有内容
|
||
page.evaluate("""
|
||
() => {
|
||
document.querySelectorAll('.section, .cover').forEach(el => {
|
||
el.style.opacity = '1';
|
||
el.style.transform = 'none';
|
||
});
|
||
}
|
||
""")
|
||
|
||
# 滚动加载
|
||
print("📜 加载所有内容...")
|
||
total_height = page.evaluate("document.body.scrollHeight")
|
||
for y in range(0, total_height, 1000):
|
||
page.evaluate(f"window.scrollTo(0, {y})")
|
||
time.sleep(0.1)
|
||
|
||
page.evaluate("window.scrollTo(0, 0)")
|
||
time.sleep(0.5)
|
||
|
||
# 截取完整页面
|
||
print("📸 截取完整页面...")
|
||
page.screenshot(path=output_path, full_page=True)
|
||
|
||
browser.close()
|
||
|
||
size_kb = os.path.getsize(output_path) / 1024
|
||
print(f"\n✅ 成功生成长图!")
|
||
print(f" 大小: {size_kb:.1f} KB\n")
|
||
|
||
return str(output_path)
|
||
|
||
|
||
def image_to_pdf(image_path: str, pdf_path: str = None) -> str:
|
||
"""
|
||
将图片转换为PDF。
|
||
|
||
Args:
|
||
image_path: 图片路径
|
||
pdf_path: PDF输出路径(可选)
|
||
|
||
Returns:
|
||
生成的PDF路径
|
||
"""
|
||
try:
|
||
from PIL import Image
|
||
except ImportError:
|
||
print("正在安装 Pillow...")
|
||
import subprocess
|
||
subprocess.check_call([sys.executable, "-m", "pip", "install", "Pillow", "-q"])
|
||
from PIL import Image
|
||
|
||
if pdf_path is None:
|
||
image_file = Path(image_path)
|
||
pdf_path = str(image_file.with_suffix('.pdf'))
|
||
|
||
print(f"📄 转换图片为PDF: {Path(pdf_path).name}")
|
||
|
||
# 打开图片并转换为PDF
|
||
image = Image.open(image_path)
|
||
|
||
# 转换为RGB(PDF需要)
|
||
if image.mode != 'RGB':
|
||
image = image.convert('RGB')
|
||
|
||
# 保存为PDF
|
||
image.save(pdf_path, 'PDF', resolution=100.0)
|
||
|
||
size_kb = os.path.getsize(pdf_path) / 1024
|
||
print(f"✅ PDF生成成功!大小: {size_kb:.1f} KB\n")
|
||
|
||
return str(pdf_path)
|
||
|
||
|
||
def main():
|
||
default_html = "202510_Alpha_Intelligence_BP.html"
|
||
html_path = sys.argv[1] if len(sys.argv) > 1 else default_html
|
||
|
||
print("\n" + "=" * 70)
|
||
print("HTML转完整长图工具 - 无分页断开")
|
||
print("=" * 70)
|
||
|
||
try:
|
||
# 生成长图
|
||
image_path = html_to_long_image(html_path)
|
||
|
||
# 转换为PDF
|
||
pdf_path = image_to_pdf(image_path)
|
||
|
||
print(f"💡 打开查看:")
|
||
print(f" 长图: open {Path(image_path).name}")
|
||
print(f" PDF: open {Path(pdf_path).name}\n")
|
||
|
||
except Exception as e:
|
||
print(f"\n❌ 错误: {e}\n")
|
||
import traceback
|
||
traceback.print_exc()
|
||
sys.exit(1)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|