Files
gh-k-dense-ai-claude-scient…/skills/document-skills/pdf/scripts/convert_pdf_to_images.py
2025-11-30 08:30:14 +08:00

36 lines
1.1 KiB
Python

import os
import sys
from pdf2image import convert_from_path
# Converts each page of a PDF to a PNG image.
def convert(pdf_path, output_dir, max_dim=1000):
images = convert_from_path(pdf_path, dpi=200)
for i, image in enumerate(images):
# Scale image if needed to keep width/height under `max_dim`
width, height = image.size
if width > max_dim or height > max_dim:
scale_factor = min(max_dim / width, max_dim / height)
new_width = int(width * scale_factor)
new_height = int(height * scale_factor)
image = image.resize((new_width, new_height))
image_path = os.path.join(output_dir, f"page_{i+1}.png")
image.save(image_path)
print(f"Saved page {i+1} as {image_path} (size: {image.size})")
print(f"Converted {len(images)} pages to PNG images")
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: convert_pdf_to_images.py [input pdf] [output directory]")
sys.exit(1)
pdf_path = sys.argv[1]
output_directory = sys.argv[2]
convert(pdf_path, output_directory)