import os import sys from pdf2image import convert_from_path # Converts each page of a PDF to a PNG image. def convert(pdf_path, output_dir, max_dim=1000): images = convert_from_path(pdf_path, dpi=200) for i, image in enumerate(images): # Scale image if needed to keep width/height under `max_dim` width, height = image.size if width > max_dim or height > max_dim: scale_factor = min(max_dim / width, max_dim / height) new_width = int(width * scale_factor) new_height = int(height * scale_factor) image = image.resize((new_width, new_height)) image_path = os.path.join(output_dir, f"page_{i+1}.png") image.save(image_path) print(f"Saved page {i+1} as {image_path} (size: {image.size})") print(f"Converted {len(images)} pages to PNG images") if __name__ == "__main__": if len(sys.argv) != 3: print("Usage: convert_pdf_to_images.py [input pdf] [output directory]") sys.exit(1) pdf_path = sys.argv[1] output_directory = sys.argv[2] convert(pdf_path, output_directory)