36 lines
1.1 KiB
Python
36 lines
1.1 KiB
Python
import os
|
|
import sys
|
|
|
|
from pdf2image import convert_from_path
|
|
|
|
|
|
# Converts each page of a PDF to a PNG image.
|
|
|
|
|
|
def convert(pdf_path, output_dir, max_dim=1000):
|
|
images = convert_from_path(pdf_path, dpi=200)
|
|
|
|
for i, image in enumerate(images):
|
|
# Scale image if needed to keep width/height under `max_dim`
|
|
width, height = image.size
|
|
if width > max_dim or height > max_dim:
|
|
scale_factor = min(max_dim / width, max_dim / height)
|
|
new_width = int(width * scale_factor)
|
|
new_height = int(height * scale_factor)
|
|
image = image.resize((new_width, new_height))
|
|
|
|
image_path = os.path.join(output_dir, f"page_{i+1}.png")
|
|
image.save(image_path)
|
|
print(f"Saved page {i+1} as {image_path} (size: {image.size})")
|
|
|
|
print(f"Converted {len(images)} pages to PNG images")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 3:
|
|
print("Usage: convert_pdf_to_images.py [input pdf] [output directory]")
|
|
sys.exit(1)
|
|
pdf_path = sys.argv[1]
|
|
output_directory = sys.argv[2]
|
|
convert(pdf_path, output_directory)
|