Initial commit
This commit is contained in:
Binary file not shown.
@@ -0,0 +1,28 @@
|
||||
# DO NOT add a docstring
|
||||
|
||||
from chatfield import chatfield
|
||||
|
||||
# The chatfield.cli module will import this `interview` object.
|
||||
# **CRITICAL** - Replace the commented examples below with the real data definition.
|
||||
interview = (chatfield()
|
||||
# .type(<form id, official name, or filename>)
|
||||
# .desc(<human-friendly form description>)
|
||||
|
||||
# Define Alice's type plus at least one trait.
|
||||
# .alice()
|
||||
# .type(<primary role for the AI agent>)
|
||||
# .trait(<characteristic or behavior hint for the AI agent>)
|
||||
# # Optional additional .trait() calls
|
||||
|
||||
# Define Bob's type plus at least one trait.
|
||||
# .bob()
|
||||
# .type(<primary role for the human user>)
|
||||
# .trait(<characteristic or guidance about conversing with the user>)
|
||||
# # Optional additional .trait() calls
|
||||
|
||||
# Define one or more fields.
|
||||
# .field(<field id>)
|
||||
# .desc(<human-friendly field description>)
|
||||
|
||||
.build()
|
||||
)
|
||||
158
skills/filling-pdf-forms/scripts/extract_form_field_info.py
Normal file
158
skills/filling-pdf-forms/scripts/extract_form_field_info.py
Normal file
@@ -0,0 +1,158 @@
|
||||
import json
|
||||
import sys
|
||||
|
||||
from pypdf import PdfReader
|
||||
|
||||
|
||||
# Extracts data for the fillable form fields in a PDF and outputs JSON that
|
||||
# Claude uses to fill the fields. See forms.md.
|
||||
|
||||
|
||||
# This matches the format used by PdfReader `get_fields` and `update_page_form_field_values` methods.
|
||||
def get_full_annotation_field_id(annotation):
|
||||
components = []
|
||||
while annotation:
|
||||
field_name = annotation.get('/T')
|
||||
if field_name:
|
||||
components.append(field_name)
|
||||
annotation = annotation.get('/Parent')
|
||||
return ".".join(reversed(components)) if components else None
|
||||
|
||||
|
||||
def make_field_dict(field, field_id):
|
||||
field_dict = {"field_id": field_id}
|
||||
ft = field.get('/FT')
|
||||
if ft == "/Tx":
|
||||
field_dict["type"] = "text"
|
||||
elif ft == "/Btn":
|
||||
field_dict["type"] = "checkbox" # radio groups handled separately
|
||||
states = field.get("/_States_", [])
|
||||
if len(states) == 2:
|
||||
# "/Off" seems to always be the unchecked value, as suggested by
|
||||
# https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/pdfstandards/pdf/PDF32000_2008.pdf#page=448
|
||||
# It can be either first or second in the "/_States_" list.
|
||||
if "/Off" in states:
|
||||
field_dict["checked_value"] = states[0] if states[0] != "/Off" else states[1]
|
||||
field_dict["unchecked_value"] = "/Off"
|
||||
else:
|
||||
print(f"Unexpected state values for checkbox `${field_id}`. Its checked and unchecked values may not be correct; if you're trying to check it, visually verify the results.")
|
||||
field_dict["checked_value"] = states[0]
|
||||
field_dict["unchecked_value"] = states[1]
|
||||
elif ft == "/Ch":
|
||||
field_dict["type"] = "choice"
|
||||
states = field.get("/_States_", [])
|
||||
field_dict["choice_options"] = [{
|
||||
"value": state[0],
|
||||
"text": state[1],
|
||||
} for state in states]
|
||||
else:
|
||||
field_dict["type"] = f"unknown ({ft})"
|
||||
|
||||
# Extract tooltip (TU = tooltip/user-facing text)
|
||||
tooltip = field.get('/TU')
|
||||
if tooltip:
|
||||
field_dict["tooltip"] = tooltip
|
||||
|
||||
return field_dict
|
||||
|
||||
|
||||
# Returns a list of fillable PDF fields:
|
||||
# [
|
||||
# {
|
||||
# "field_id": "name",
|
||||
# "page": 1,
|
||||
# "type": ("text", "checkbox", "radio_group", or "choice")
|
||||
# // Per-type additional fields described in forms.md
|
||||
# },
|
||||
# ]
|
||||
def get_field_info(reader: PdfReader):
|
||||
fields = reader.get_fields()
|
||||
|
||||
field_info_by_id = {}
|
||||
possible_radio_names = set()
|
||||
|
||||
for field_id, field in fields.items():
|
||||
# Skip if this is a container field with children, except that it might be
|
||||
# a parent group for radio button options.
|
||||
if field.get("/Kids"):
|
||||
if field.get("/FT") == "/Btn":
|
||||
possible_radio_names.add(field_id)
|
||||
continue
|
||||
field_info_by_id[field_id] = make_field_dict(field, field_id)
|
||||
|
||||
# Bounding rects are stored in annotations in page objects.
|
||||
|
||||
# Radio button options have a separate annotation for each choice;
|
||||
# all choices have the same field name.
|
||||
# See https://westhealth.github.io/exploring-fillable-forms-with-pdfrw.html
|
||||
radio_fields_by_id = {}
|
||||
|
||||
for page_index, page in enumerate(reader.pages):
|
||||
annotations = page.get('/Annots', [])
|
||||
for ann in annotations:
|
||||
field_id = get_full_annotation_field_id(ann)
|
||||
if field_id in field_info_by_id:
|
||||
field_info_by_id[field_id]["page"] = page_index + 1
|
||||
field_info_by_id[field_id]["rect"] = ann.get('/Rect')
|
||||
elif field_id in possible_radio_names:
|
||||
try:
|
||||
# ann['/AP']['/N'] should have two items. One of them is '/Off',
|
||||
# the other is the active value.
|
||||
on_values = [v for v in ann["/AP"]["/N"] if v != "/Off"]
|
||||
except KeyError:
|
||||
continue
|
||||
if len(on_values) == 1:
|
||||
rect = ann.get("/Rect")
|
||||
if field_id not in radio_fields_by_id:
|
||||
radio_fields_by_id[field_id] = {
|
||||
"field_id": field_id,
|
||||
"type": "radio_group",
|
||||
"page": page_index + 1,
|
||||
"radio_options": [],
|
||||
}
|
||||
# Note: at least on macOS 15.7, Preview.app doesn't show selected
|
||||
# radio buttons correctly. (It does if you remove the leading slash
|
||||
# from the value, but that causes them not to appear correctly in
|
||||
# Chrome/Firefox/Acrobat/etc).
|
||||
radio_fields_by_id[field_id]["radio_options"].append({
|
||||
"value": on_values[0],
|
||||
"rect": rect,
|
||||
})
|
||||
|
||||
# Some PDFs have form field definitions without corresponding annotations,
|
||||
# so we can't tell where they are. Ignore these fields for now.
|
||||
fields_with_location = []
|
||||
for field_info in field_info_by_id.values():
|
||||
if "page" in field_info:
|
||||
fields_with_location.append(field_info)
|
||||
else:
|
||||
print(f"Unable to determine location for field id: {field_info.get('field_id')}, ignoring")
|
||||
|
||||
# Sort by page number, then Y position (flipped in PDF coordinate system), then X.
|
||||
def sort_key(f):
|
||||
if "radio_options" in f:
|
||||
rect = f["radio_options"][0]["rect"] or [0, 0, 0, 0]
|
||||
else:
|
||||
rect = f.get("rect") or [0, 0, 0, 0]
|
||||
adjusted_position = [-rect[1], rect[0]]
|
||||
return [f.get("page"), adjusted_position]
|
||||
|
||||
sorted_fields = fields_with_location + list(radio_fields_by_id.values())
|
||||
sorted_fields.sort(key=sort_key)
|
||||
|
||||
return sorted_fields
|
||||
|
||||
|
||||
def write_field_info(pdf_path: str, json_output_path: str):
|
||||
reader = PdfReader(pdf_path)
|
||||
field_info = get_field_info(reader)
|
||||
with open(json_output_path, "w") as f:
|
||||
json.dump(field_info, f, indent=2)
|
||||
print(f"Wrote {len(field_info)} fields to {json_output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: extract_form_field_info.py [input pdf] [output json]")
|
||||
sys.exit(1)
|
||||
write_field_info(sys.argv[1], sys.argv[2])
|
||||
114
skills/filling-pdf-forms/scripts/fill_fillable_fields.py
Normal file
114
skills/filling-pdf-forms/scripts/fill_fillable_fields.py
Normal file
@@ -0,0 +1,114 @@
|
||||
import json
|
||||
import sys
|
||||
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
|
||||
from extract_form_field_info import get_field_info
|
||||
|
||||
|
||||
# Fills fillable form fields in a PDF. See forms.md.
|
||||
|
||||
|
||||
def fill_pdf_fields(input_pdf_path: str, fields_json_path: str, output_pdf_path: str):
|
||||
with open(fields_json_path) as f:
|
||||
fields = json.load(f)
|
||||
# Group by page number.
|
||||
fields_by_page = {}
|
||||
for field in fields:
|
||||
if "value" in field:
|
||||
field_id = field["field_id"]
|
||||
page = field["page"]
|
||||
if page not in fields_by_page:
|
||||
fields_by_page[page] = {}
|
||||
fields_by_page[page][field_id] = field["value"]
|
||||
|
||||
reader = PdfReader(input_pdf_path)
|
||||
|
||||
has_error = False
|
||||
field_info = get_field_info(reader)
|
||||
fields_by_ids = {f["field_id"]: f for f in field_info}
|
||||
for field in fields:
|
||||
existing_field = fields_by_ids.get(field["field_id"])
|
||||
if not existing_field:
|
||||
has_error = True
|
||||
print(f"ERROR: `{field['field_id']}` is not a valid field ID")
|
||||
elif field["page"] != existing_field["page"]:
|
||||
has_error = True
|
||||
print(f"ERROR: Incorrect page number for `{field['field_id']}` (got {field['page']}, expected {existing_field['page']})")
|
||||
else:
|
||||
if "value" in field:
|
||||
err = validation_error_for_field_value(existing_field, field["value"])
|
||||
if err:
|
||||
print(err)
|
||||
has_error = True
|
||||
if has_error:
|
||||
sys.exit(1)
|
||||
|
||||
writer = PdfWriter(clone_from=reader)
|
||||
for page, field_values in fields_by_page.items():
|
||||
writer.update_page_form_field_values(writer.pages[page - 1], field_values, auto_regenerate=False)
|
||||
|
||||
# This seems to be necessary for many PDF viewers to format the form values correctly.
|
||||
# It may cause the viewer to show a "save changes" dialog even if the user doesn't make any changes.
|
||||
writer.set_need_appearances_writer(True)
|
||||
|
||||
with open(output_pdf_path, "wb") as f:
|
||||
writer.write(f)
|
||||
|
||||
|
||||
def validation_error_for_field_value(field_info, field_value):
|
||||
field_type = field_info["type"]
|
||||
field_id = field_info["field_id"]
|
||||
if field_type == "checkbox":
|
||||
checked_val = field_info["checked_value"]
|
||||
unchecked_val = field_info["unchecked_value"]
|
||||
if field_value != checked_val and field_value != unchecked_val:
|
||||
return f'ERROR: Invalid value "{field_value}" for checkbox field "{field_id}". The checked value is "{checked_val}" and the unchecked value is "{unchecked_val}"'
|
||||
elif field_type == "radio_group":
|
||||
option_values = [opt["value"] for opt in field_info["radio_options"]]
|
||||
if field_value not in option_values:
|
||||
return f'ERROR: Invalid value "{field_value}" for radio group field "{field_id}". Valid values are: {option_values}'
|
||||
elif field_type == "choice":
|
||||
choice_values = [opt["value"] for opt in field_info["choice_options"]]
|
||||
if field_value not in choice_values:
|
||||
return f'ERROR: Invalid value "{field_value}" for choice field "{field_id}". Valid values are: {choice_values}'
|
||||
return None
|
||||
|
||||
|
||||
# pypdf (at least version 5.7.0) has a bug when setting the value for a selection list field.
|
||||
# In _writer.py around line 966:
|
||||
#
|
||||
# if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0:
|
||||
# txt = "\n".join(annotation.get_inherited(FA.Opt, []))
|
||||
#
|
||||
# The problem is that for selection lists, `get_inherited` returns a list of two-element lists like
|
||||
# [["value1", "Text 1"], ["value2", "Text 2"], ...]
|
||||
# This causes `join` to throw a TypeError because it expects an iterable of strings.
|
||||
# The horrible workaround is to patch `get_inherited` to return a list of the value strings.
|
||||
# We call the original method and adjust the return value only if the argument to `get_inherited`
|
||||
# is `FA.Opt` and if the return value is a list of two-element lists.
|
||||
def monkeypatch_pydpf_method():
|
||||
from pypdf.generic import DictionaryObject
|
||||
from pypdf.constants import FieldDictionaryAttributes
|
||||
|
||||
original_get_inherited = DictionaryObject.get_inherited
|
||||
|
||||
def patched_get_inherited(self, key: str, default = None):
|
||||
result = original_get_inherited(self, key, default)
|
||||
if key == FieldDictionaryAttributes.Opt:
|
||||
if isinstance(result, list) and all(isinstance(v, list) and len(v) == 2 for v in result):
|
||||
result = [r[0] for r in result]
|
||||
return result
|
||||
|
||||
DictionaryObject.get_inherited = patched_get_inherited
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 4:
|
||||
print("Usage: fill_fillable_fields.py [input pdf] [field_values.json] [output pdf]")
|
||||
sys.exit(1)
|
||||
monkeypatch_pydpf_method()
|
||||
input_pdf = sys.argv[1]
|
||||
fields_json = sys.argv[2]
|
||||
output_pdf = sys.argv[3]
|
||||
fill_pdf_fields(input_pdf, fields_json, output_pdf)
|
||||
134
skills/filling-pdf-forms/scripts/fill_nonfillable_fields.py
Normal file
134
skills/filling-pdf-forms/scripts/fill_nonfillable_fields.py
Normal file
@@ -0,0 +1,134 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fills a non-fillable PDF by adding text annotations.
|
||||
|
||||
This script reads:
|
||||
- .form.json (field definitions with bounding boxes in PDF coordinates)
|
||||
- .values.json (field values from the interview)
|
||||
|
||||
And creates an annotated PDF with the values placed at the specified locations.
|
||||
|
||||
Usage:
|
||||
python fill_nonfillable_fields.py <input.pdf> <basename>.chatfield/<basename>.values.json <output.pdf>
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
from pypdf.annotations import FreeText
|
||||
|
||||
|
||||
def fill_nonfillable_pdf(input_pdf_path, values_json_path, output_pdf_path):
|
||||
"""
|
||||
Fill a non-fillable PDF with text annotations.
|
||||
|
||||
Args:
|
||||
input_pdf_path: Path to the input PDF file
|
||||
values_json_path: Path to .values.json file with field values
|
||||
output_pdf_path: Path to write the filled PDF
|
||||
"""
|
||||
# Derive .form.json path from .values.json path
|
||||
values_path = Path(values_json_path)
|
||||
if not values_path.name.endswith('.values.json'):
|
||||
raise ValueError(f"Expected .values.json file, got: {values_path.name}")
|
||||
|
||||
form_json_path = values_path.parent / values_path.name.replace('.values.json', '.form.json')
|
||||
|
||||
if not form_json_path.exists():
|
||||
raise FileNotFoundError(
|
||||
f"Form definition file not found: {form_json_path}\n"
|
||||
f"Expected to find .form.json alongside .values.json"
|
||||
)
|
||||
|
||||
# Load field definitions (with bounding boxes in PDF coordinates)
|
||||
with open(form_json_path, 'r') as f:
|
||||
form_fields = json.load(f)
|
||||
|
||||
# Load field values
|
||||
with open(values_json_path, 'r') as f:
|
||||
values_data = json.load(f)
|
||||
|
||||
# Create a lookup map: field_id -> value
|
||||
values_map = {field['field_id']: field['value'] for field in values_data['fields']}
|
||||
|
||||
# Open the PDF
|
||||
reader = PdfReader(input_pdf_path)
|
||||
writer = PdfWriter()
|
||||
|
||||
# Copy all pages to writer
|
||||
writer.append(reader)
|
||||
|
||||
# Process each form field
|
||||
annotations_added = 0
|
||||
|
||||
for field_def in form_fields:
|
||||
field_id = field_def.get('field_id')
|
||||
|
||||
# Get the value for this field
|
||||
if field_id not in values_map:
|
||||
# No value provided for this field, skip it
|
||||
continue
|
||||
|
||||
value = values_map[field_id]
|
||||
|
||||
# Skip empty values
|
||||
if not value:
|
||||
continue
|
||||
|
||||
# Get field properties
|
||||
page_num = field_def.get('page', 1)
|
||||
rect = field_def.get('rect')
|
||||
|
||||
if not rect:
|
||||
print(f"Warning: Field {field_id} has no rect, skipping", file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Default font settings
|
||||
# Note: Font size/color may not work reliably across all PDF viewers
|
||||
# https://github.com/py-pdf/pypdf/issues/2084
|
||||
font_name = "Arial"
|
||||
font_size = "12pt"
|
||||
font_color = "000000" # Black
|
||||
|
||||
# Create the annotation
|
||||
annotation = FreeText(
|
||||
text=str(value),
|
||||
rect=rect, # Already in PDF coordinates
|
||||
font=font_name,
|
||||
font_size=font_size,
|
||||
font_color=font_color,
|
||||
border_color=None,
|
||||
background_color=None,
|
||||
)
|
||||
|
||||
# Add annotation to the appropriate page (pypdf uses 0-based indexing)
|
||||
writer.add_annotation(page_number=page_num - 1, annotation=annotation)
|
||||
annotations_added += 1
|
||||
|
||||
# Save the filled PDF
|
||||
with open(output_pdf_path, 'wb') as output:
|
||||
writer.write(output)
|
||||
|
||||
print(f"Successfully filled PDF and saved to {output_pdf_path}")
|
||||
print(f"Added {annotations_added} text annotations")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 4:
|
||||
print("Usage: fill_nonfillable_fields.py <input.pdf> <basename>.values.json <output.pdf>")
|
||||
print()
|
||||
print("Example:")
|
||||
print(" python fill_nonfillable_fields.py form.pdf form.chatfield/form.values.json form.done.pdf")
|
||||
sys.exit(1)
|
||||
|
||||
input_pdf = sys.argv[1]
|
||||
values_json = sys.argv[2]
|
||||
output_pdf = sys.argv[3]
|
||||
|
||||
try:
|
||||
fill_nonfillable_pdf(input_pdf, values_json, output_pdf)
|
||||
except Exception as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
Reference in New Issue
Block a user