Initial commit
This commit is contained in:
108
skills/pdf/scripts/fill_pdf_form_with_annotations.py
Normal file
108
skills/pdf/scripts/fill_pdf_form_with_annotations.py
Normal file
@@ -0,0 +1,108 @@
|
||||
import json
|
||||
import sys
|
||||
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
from pypdf.annotations import FreeText
|
||||
|
||||
|
||||
# Fills a PDF by adding text annotations defined in `fields.json`. See forms.md.
|
||||
|
||||
|
||||
def transform_coordinates(bbox, image_width, image_height, pdf_width, pdf_height):
|
||||
"""Transform bounding box from image coordinates to PDF coordinates"""
|
||||
# Image coordinates: origin at top-left, y increases downward
|
||||
# PDF coordinates: origin at bottom-left, y increases upward
|
||||
x_scale = pdf_width / image_width
|
||||
y_scale = pdf_height / image_height
|
||||
|
||||
left = bbox[0] * x_scale
|
||||
right = bbox[2] * x_scale
|
||||
|
||||
# Flip Y coordinates for PDF
|
||||
top = pdf_height - (bbox[1] * y_scale)
|
||||
bottom = pdf_height - (bbox[3] * y_scale)
|
||||
|
||||
return left, bottom, right, top
|
||||
|
||||
|
||||
def fill_pdf_form(input_pdf_path, fields_json_path, output_pdf_path):
|
||||
"""Fill the PDF form with data from fields.json"""
|
||||
|
||||
# `fields.json` format described in forms.md.
|
||||
with open(fields_json_path, "r") as f:
|
||||
fields_data = json.load(f)
|
||||
|
||||
# Open the PDF
|
||||
reader = PdfReader(input_pdf_path)
|
||||
writer = PdfWriter()
|
||||
|
||||
# Copy all pages to writer
|
||||
writer.append(reader)
|
||||
|
||||
# Get PDF dimensions for each page
|
||||
pdf_dimensions = {}
|
||||
for i, page in enumerate(reader.pages):
|
||||
mediabox = page.mediabox
|
||||
pdf_dimensions[i + 1] = [mediabox.width, mediabox.height]
|
||||
|
||||
# Process each form field
|
||||
annotations = []
|
||||
for field in fields_data["form_fields"]:
|
||||
page_num = field["page_number"]
|
||||
|
||||
# Get page dimensions and transform coordinates.
|
||||
page_info = next(p for p in fields_data["pages"] if p["page_number"] == page_num)
|
||||
image_width = page_info["image_width"]
|
||||
image_height = page_info["image_height"]
|
||||
pdf_width, pdf_height = pdf_dimensions[page_num]
|
||||
|
||||
transformed_entry_box = transform_coordinates(
|
||||
field["entry_bounding_box"],
|
||||
image_width, image_height,
|
||||
pdf_width, pdf_height
|
||||
)
|
||||
|
||||
# Skip empty fields
|
||||
if "entry_text" not in field or "text" not in field["entry_text"]:
|
||||
continue
|
||||
entry_text = field["entry_text"]
|
||||
text = entry_text["text"]
|
||||
if not text:
|
||||
continue
|
||||
|
||||
font_name = entry_text.get("font", "Arial")
|
||||
font_size = str(entry_text.get("font_size", 14)) + "pt"
|
||||
font_color = entry_text.get("font_color", "000000")
|
||||
|
||||
# Font size/color seems to not work reliably across viewers:
|
||||
# https://github.com/py-pdf/pypdf/issues/2084
|
||||
annotation = FreeText(
|
||||
text=text,
|
||||
rect=transformed_entry_box,
|
||||
font=font_name,
|
||||
font_size=font_size,
|
||||
font_color=font_color,
|
||||
border_color=None,
|
||||
background_color=None,
|
||||
)
|
||||
annotations.append(annotation)
|
||||
# page_number is 0-based for pypdf
|
||||
writer.add_annotation(page_number=page_num - 1, annotation=annotation)
|
||||
|
||||
# Save the filled PDF
|
||||
with open(output_pdf_path, "wb") as output:
|
||||
writer.write(output)
|
||||
|
||||
print(f"Successfully filled PDF form and saved to {output_pdf_path}")
|
||||
print(f"Added {len(annotations)} text annotations")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 4:
|
||||
print("Usage: fill_pdf_form_with_annotations.py [input pdf] [fields.json] [output pdf]")
|
||||
sys.exit(1)
|
||||
input_pdf = sys.argv[1]
|
||||
fields_json = sys.argv[2]
|
||||
output_pdf = sys.argv[3]
|
||||
|
||||
fill_pdf_form(input_pdf, fields_json, output_pdf)
|
||||
Reference in New Issue
Block a user