Initial commit
This commit is contained in:
166
html_to_long_image.py
Normal file
166
html_to_long_image.py
Normal file
@@ -0,0 +1,166 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
HTML转长图工具
|
||||
将HTML渲染为一张完整的长图(PNG),然后可以转PDF
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def html_to_long_image(html_path: str, output_path: str = None) -> str:
|
||||
"""
|
||||
将HTML转换为一张完整的长图PNG。
|
||||
|
||||
Args:
|
||||
html_path: HTML文件路径
|
||||
output_path: 输出图片路径(可选)
|
||||
|
||||
Returns:
|
||||
生成的图片路径
|
||||
"""
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
except ImportError:
|
||||
print("正在安装 Playwright...")
|
||||
import subprocess
|
||||
subprocess.check_call([sys.executable, "-m", "pip", "install", "playwright", "-q"])
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
if not os.path.exists(html_path):
|
||||
raise FileNotFoundError(f"HTML文件不存在: {html_path}")
|
||||
|
||||
if output_path is None:
|
||||
html_file = Path(html_path)
|
||||
output_path = str(html_file.parent / f"{html_file.stem}_fullpage.png")
|
||||
|
||||
print(f"\n📄 转换HTML为完整长图")
|
||||
print(f" 输入: {Path(html_path).name}")
|
||||
print(f" 输出: {Path(output_path).name}\n")
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch()
|
||||
page = browser.new_page(viewport={'width': 1200, 'height': 800})
|
||||
|
||||
# 加载HTML
|
||||
html_path_abs = str(Path(html_path).absolute())
|
||||
print("⏳ 加载HTML...")
|
||||
page.goto(f'file://{html_path_abs}', wait_until='networkidle')
|
||||
|
||||
# 禁用所有动画
|
||||
print("🎨 禁用动画...")
|
||||
page.add_style_tag(content="""
|
||||
*, *::before, *::after {
|
||||
animation: none !important;
|
||||
transition: none !important;
|
||||
}
|
||||
.section, .cover {
|
||||
opacity: 1 !important;
|
||||
transform: none !important;
|
||||
}
|
||||
""")
|
||||
|
||||
# 强制显示所有内容
|
||||
page.evaluate("""
|
||||
() => {
|
||||
document.querySelectorAll('.section, .cover').forEach(el => {
|
||||
el.style.opacity = '1';
|
||||
el.style.transform = 'none';
|
||||
});
|
||||
}
|
||||
""")
|
||||
|
||||
# 滚动加载
|
||||
print("📜 加载所有内容...")
|
||||
total_height = page.evaluate("document.body.scrollHeight")
|
||||
for y in range(0, total_height, 1000):
|
||||
page.evaluate(f"window.scrollTo(0, {y})")
|
||||
time.sleep(0.1)
|
||||
|
||||
page.evaluate("window.scrollTo(0, 0)")
|
||||
time.sleep(0.5)
|
||||
|
||||
# 截取完整页面
|
||||
print("📸 截取完整页面...")
|
||||
page.screenshot(path=output_path, full_page=True)
|
||||
|
||||
browser.close()
|
||||
|
||||
size_kb = os.path.getsize(output_path) / 1024
|
||||
print(f"\n✅ 成功生成长图!")
|
||||
print(f" 大小: {size_kb:.1f} KB\n")
|
||||
|
||||
return str(output_path)
|
||||
|
||||
|
||||
def image_to_pdf(image_path: str, pdf_path: str = None) -> str:
|
||||
"""
|
||||
将图片转换为PDF。
|
||||
|
||||
Args:
|
||||
image_path: 图片路径
|
||||
pdf_path: PDF输出路径(可选)
|
||||
|
||||
Returns:
|
||||
生成的PDF路径
|
||||
"""
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
print("正在安装 Pillow...")
|
||||
import subprocess
|
||||
subprocess.check_call([sys.executable, "-m", "pip", "install", "Pillow", "-q"])
|
||||
from PIL import Image
|
||||
|
||||
if pdf_path is None:
|
||||
image_file = Path(image_path)
|
||||
pdf_path = str(image_file.with_suffix('.pdf'))
|
||||
|
||||
print(f"📄 转换图片为PDF: {Path(pdf_path).name}")
|
||||
|
||||
# 打开图片并转换为PDF
|
||||
image = Image.open(image_path)
|
||||
|
||||
# 转换为RGB(PDF需要)
|
||||
if image.mode != 'RGB':
|
||||
image = image.convert('RGB')
|
||||
|
||||
# 保存为PDF
|
||||
image.save(pdf_path, 'PDF', resolution=100.0)
|
||||
|
||||
size_kb = os.path.getsize(pdf_path) / 1024
|
||||
print(f"✅ PDF生成成功!大小: {size_kb:.1f} KB\n")
|
||||
|
||||
return str(pdf_path)
|
||||
|
||||
|
||||
def main():
|
||||
default_html = "202510_Alpha_Intelligence_BP.html"
|
||||
html_path = sys.argv[1] if len(sys.argv) > 1 else default_html
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("HTML转完整长图工具 - 无分页断开")
|
||||
print("=" * 70)
|
||||
|
||||
try:
|
||||
# 生成长图
|
||||
image_path = html_to_long_image(html_path)
|
||||
|
||||
# 转换为PDF
|
||||
pdf_path = image_to_pdf(image_path)
|
||||
|
||||
print(f"💡 打开查看:")
|
||||
print(f" 长图: open {Path(image_path).name}")
|
||||
print(f" PDF: open {Path(pdf_path).name}\n")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ 错误: {e}\n")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user