Initial commit
This commit is contained in:
385
scripts/replace.py
Executable file
385
scripts/replace.py
Executable file
@@ -0,0 +1,385 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Apply text replacements to PowerPoint presentation.
|
||||
|
||||
Usage:
|
||||
python replace.py <input.pptx> <replacements.json> <output.pptx>
|
||||
|
||||
The replacements JSON should have the structure output by inventory.py.
|
||||
ALL text shapes identified by inventory.py will have their text cleared
|
||||
unless "paragraphs" is specified in the replacements for that shape.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from inventory import InventoryData, extract_text_inventory
|
||||
from pptx import Presentation
|
||||
from pptx.dml.color import RGBColor
|
||||
from pptx.enum.dml import MSO_THEME_COLOR
|
||||
from pptx.enum.text import PP_ALIGN
|
||||
from pptx.oxml.xmlchemy import OxmlElement
|
||||
from pptx.util import Pt
|
||||
|
||||
|
||||
def clear_paragraph_bullets(paragraph):
|
||||
"""Clear bullet formatting from a paragraph."""
|
||||
pPr = paragraph._element.get_or_add_pPr()
|
||||
|
||||
# Remove existing bullet elements
|
||||
for child in list(pPr):
|
||||
if (
|
||||
child.tag.endswith("buChar")
|
||||
or child.tag.endswith("buNone")
|
||||
or child.tag.endswith("buAutoNum")
|
||||
or child.tag.endswith("buFont")
|
||||
):
|
||||
pPr.remove(child)
|
||||
|
||||
return pPr
|
||||
|
||||
|
||||
def apply_paragraph_properties(paragraph, para_data: Dict[str, Any]):
|
||||
"""Apply formatting properties to a paragraph."""
|
||||
# Get the text but don't set it on paragraph directly yet
|
||||
text = para_data.get("text", "")
|
||||
|
||||
# Get or create paragraph properties
|
||||
pPr = clear_paragraph_bullets(paragraph)
|
||||
|
||||
# Handle bullet formatting
|
||||
if para_data.get("bullet", False):
|
||||
level = para_data.get("level", 0)
|
||||
paragraph.level = level
|
||||
|
||||
# Calculate font-proportional indentation
|
||||
font_size = para_data.get("font_size", 18.0)
|
||||
level_indent_emu = int((font_size * (1.6 + level * 1.6)) * 12700)
|
||||
hanging_indent_emu = int(-font_size * 0.8 * 12700)
|
||||
|
||||
# Set indentation
|
||||
pPr.attrib["marL"] = str(level_indent_emu)
|
||||
pPr.attrib["indent"] = str(hanging_indent_emu)
|
||||
|
||||
# Add bullet character
|
||||
buChar = OxmlElement("a:buChar")
|
||||
buChar.set("char", "•")
|
||||
pPr.append(buChar)
|
||||
|
||||
# Default to left alignment for bullets if not specified
|
||||
if "alignment" not in para_data:
|
||||
paragraph.alignment = PP_ALIGN.LEFT
|
||||
else:
|
||||
# Remove indentation for non-bullet text
|
||||
pPr.attrib["marL"] = "0"
|
||||
pPr.attrib["indent"] = "0"
|
||||
|
||||
# Add buNone element
|
||||
buNone = OxmlElement("a:buNone")
|
||||
pPr.insert(0, buNone)
|
||||
|
||||
# Apply alignment
|
||||
if "alignment" in para_data:
|
||||
alignment_map = {
|
||||
"LEFT": PP_ALIGN.LEFT,
|
||||
"CENTER": PP_ALIGN.CENTER,
|
||||
"RIGHT": PP_ALIGN.RIGHT,
|
||||
"JUSTIFY": PP_ALIGN.JUSTIFY,
|
||||
}
|
||||
if para_data["alignment"] in alignment_map:
|
||||
paragraph.alignment = alignment_map[para_data["alignment"]]
|
||||
|
||||
# Apply spacing
|
||||
if "space_before" in para_data:
|
||||
paragraph.space_before = Pt(para_data["space_before"])
|
||||
if "space_after" in para_data:
|
||||
paragraph.space_after = Pt(para_data["space_after"])
|
||||
if "line_spacing" in para_data:
|
||||
paragraph.line_spacing = Pt(para_data["line_spacing"])
|
||||
|
||||
# Apply run-level formatting
|
||||
if not paragraph.runs:
|
||||
run = paragraph.add_run()
|
||||
run.text = text
|
||||
else:
|
||||
run = paragraph.runs[0]
|
||||
run.text = text
|
||||
|
||||
# Apply font properties
|
||||
apply_font_properties(run, para_data)
|
||||
|
||||
|
||||
def apply_font_properties(run, para_data: Dict[str, Any]):
|
||||
"""Apply font properties to a text run."""
|
||||
if "bold" in para_data:
|
||||
run.font.bold = para_data["bold"]
|
||||
if "italic" in para_data:
|
||||
run.font.italic = para_data["italic"]
|
||||
if "underline" in para_data:
|
||||
run.font.underline = para_data["underline"]
|
||||
if "font_size" in para_data:
|
||||
run.font.size = Pt(para_data["font_size"])
|
||||
if "font_name" in para_data:
|
||||
run.font.name = para_data["font_name"]
|
||||
|
||||
# Apply color - prefer RGB, fall back to theme_color
|
||||
if "color" in para_data:
|
||||
color_hex = para_data["color"].lstrip("#")
|
||||
if len(color_hex) == 6:
|
||||
r = int(color_hex[0:2], 16)
|
||||
g = int(color_hex[2:4], 16)
|
||||
b = int(color_hex[4:6], 16)
|
||||
run.font.color.rgb = RGBColor(r, g, b)
|
||||
elif "theme_color" in para_data:
|
||||
# Get theme color by name (e.g., "DARK_1", "ACCENT_1")
|
||||
theme_name = para_data["theme_color"]
|
||||
try:
|
||||
run.font.color.theme_color = getattr(MSO_THEME_COLOR, theme_name)
|
||||
except AttributeError:
|
||||
print(f" WARNING: Unknown theme color name '{theme_name}'")
|
||||
|
||||
|
||||
def detect_frame_overflow(inventory: InventoryData) -> Dict[str, Dict[str, float]]:
|
||||
"""Detect text overflow in shapes (text exceeding shape bounds).
|
||||
|
||||
Returns dict of slide_key -> shape_key -> overflow_inches.
|
||||
Only includes shapes that have text overflow.
|
||||
"""
|
||||
overflow_map = {}
|
||||
|
||||
for slide_key, shapes_dict in inventory.items():
|
||||
for shape_key, shape_data in shapes_dict.items():
|
||||
# Check for frame overflow (text exceeding shape bounds)
|
||||
if shape_data.frame_overflow_bottom is not None:
|
||||
if slide_key not in overflow_map:
|
||||
overflow_map[slide_key] = {}
|
||||
overflow_map[slide_key][shape_key] = shape_data.frame_overflow_bottom
|
||||
|
||||
return overflow_map
|
||||
|
||||
|
||||
def validate_replacements(inventory: InventoryData, replacements: Dict) -> List[str]:
|
||||
"""Validate that all shapes in replacements exist in inventory.
|
||||
|
||||
Returns list of error messages.
|
||||
"""
|
||||
errors = []
|
||||
|
||||
for slide_key, shapes_data in replacements.items():
|
||||
if not slide_key.startswith("slide-"):
|
||||
continue
|
||||
|
||||
# Check if slide exists
|
||||
if slide_key not in inventory:
|
||||
errors.append(f"Slide '{slide_key}' not found in inventory")
|
||||
continue
|
||||
|
||||
# Check each shape
|
||||
for shape_key in shapes_data.keys():
|
||||
if shape_key not in inventory[slide_key]:
|
||||
# Find shapes without replacements defined and show their content
|
||||
unused_with_content = []
|
||||
for k in inventory[slide_key].keys():
|
||||
if k not in shapes_data:
|
||||
shape_data = inventory[slide_key][k]
|
||||
# Get text from paragraphs as preview
|
||||
paragraphs = shape_data.paragraphs
|
||||
if paragraphs and paragraphs[0].text:
|
||||
first_text = paragraphs[0].text[:50]
|
||||
if len(paragraphs[0].text) > 50:
|
||||
first_text += "..."
|
||||
unused_with_content.append(f"{k} ('{first_text}')")
|
||||
else:
|
||||
unused_with_content.append(k)
|
||||
|
||||
errors.append(
|
||||
f"Shape '{shape_key}' not found on '{slide_key}'. "
|
||||
f"Shapes without replacements: {', '.join(sorted(unused_with_content)) if unused_with_content else 'none'}"
|
||||
)
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def check_duplicate_keys(pairs):
|
||||
"""Check for duplicate keys when loading JSON."""
|
||||
result = {}
|
||||
for key, value in pairs:
|
||||
if key in result:
|
||||
raise ValueError(f"Duplicate key found in JSON: '{key}'")
|
||||
result[key] = value
|
||||
return result
|
||||
|
||||
|
||||
def apply_replacements(pptx_file: str, json_file: str, output_file: str):
|
||||
"""Apply text replacements from JSON to PowerPoint presentation."""
|
||||
|
||||
# Load presentation
|
||||
prs = Presentation(pptx_file)
|
||||
|
||||
# Get inventory of all text shapes (returns ShapeData objects)
|
||||
# Pass prs to use same Presentation instance
|
||||
inventory = extract_text_inventory(Path(pptx_file), prs)
|
||||
|
||||
# Detect text overflow in original presentation
|
||||
original_overflow = detect_frame_overflow(inventory)
|
||||
|
||||
# Load replacement data with duplicate key detection
|
||||
with open(json_file, "r") as f:
|
||||
replacements = json.load(f, object_pairs_hook=check_duplicate_keys)
|
||||
|
||||
# Validate replacements
|
||||
errors = validate_replacements(inventory, replacements)
|
||||
if errors:
|
||||
print("ERROR: Invalid shapes in replacement JSON:")
|
||||
for error in errors:
|
||||
print(f" - {error}")
|
||||
print("\nPlease check the inventory and update your replacement JSON.")
|
||||
print(
|
||||
"You can regenerate the inventory with: python inventory.py <input.pptx> <output.json>"
|
||||
)
|
||||
raise ValueError(f"Found {len(errors)} validation error(s)")
|
||||
|
||||
# Track statistics
|
||||
shapes_processed = 0
|
||||
shapes_cleared = 0
|
||||
shapes_replaced = 0
|
||||
|
||||
# Process each slide from inventory
|
||||
for slide_key, shapes_dict in inventory.items():
|
||||
if not slide_key.startswith("slide-"):
|
||||
continue
|
||||
|
||||
slide_index = int(slide_key.split("-")[1])
|
||||
|
||||
if slide_index >= len(prs.slides):
|
||||
print(f"Warning: Slide {slide_index} not found")
|
||||
continue
|
||||
|
||||
# Process each shape from inventory
|
||||
for shape_key, shape_data in shapes_dict.items():
|
||||
shapes_processed += 1
|
||||
|
||||
# Get the shape directly from ShapeData
|
||||
shape = shape_data.shape
|
||||
if not shape:
|
||||
print(f"Warning: {shape_key} has no shape reference")
|
||||
continue
|
||||
|
||||
# ShapeData already validates text_frame in __init__
|
||||
text_frame = shape.text_frame # type: ignore
|
||||
|
||||
text_frame.clear() # type: ignore
|
||||
shapes_cleared += 1
|
||||
|
||||
# Check for replacement paragraphs
|
||||
replacement_shape_data = replacements.get(slide_key, {}).get(shape_key, {})
|
||||
if "paragraphs" not in replacement_shape_data:
|
||||
continue
|
||||
|
||||
shapes_replaced += 1
|
||||
|
||||
# Add replacement paragraphs
|
||||
for i, para_data in enumerate(replacement_shape_data["paragraphs"]):
|
||||
if i == 0:
|
||||
p = text_frame.paragraphs[0] # type: ignore
|
||||
else:
|
||||
p = text_frame.add_paragraph() # type: ignore
|
||||
|
||||
apply_paragraph_properties(p, para_data)
|
||||
|
||||
# Check for issues after replacements
|
||||
# Save to a temporary file and reload to avoid modifying the presentation during inventory
|
||||
# (extract_text_inventory accesses font.color which adds empty <a:solidFill/> elements)
|
||||
import tempfile
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
|
||||
tmp_path = Path(tmp.name)
|
||||
prs.save(str(tmp_path))
|
||||
|
||||
try:
|
||||
updated_inventory = extract_text_inventory(tmp_path)
|
||||
updated_overflow = detect_frame_overflow(updated_inventory)
|
||||
finally:
|
||||
tmp_path.unlink() # Clean up temp file
|
||||
|
||||
# Check if any text overflow got worse
|
||||
overflow_errors = []
|
||||
for slide_key, shape_overflows in updated_overflow.items():
|
||||
for shape_key, new_overflow in shape_overflows.items():
|
||||
# Get original overflow (0 if there was no overflow before)
|
||||
original = original_overflow.get(slide_key, {}).get(shape_key, 0.0)
|
||||
|
||||
# Error if overflow increased
|
||||
if new_overflow > original + 0.01: # Small tolerance for rounding
|
||||
increase = new_overflow - original
|
||||
overflow_errors.append(
|
||||
f'{slide_key}/{shape_key}: overflow worsened by {increase:.2f}" '
|
||||
f'(was {original:.2f}", now {new_overflow:.2f}")'
|
||||
)
|
||||
|
||||
# Collect warnings from updated shapes
|
||||
warnings = []
|
||||
for slide_key, shapes_dict in updated_inventory.items():
|
||||
for shape_key, shape_data in shapes_dict.items():
|
||||
if shape_data.warnings:
|
||||
for warning in shape_data.warnings:
|
||||
warnings.append(f"{slide_key}/{shape_key}: {warning}")
|
||||
|
||||
# Fail if there are any issues
|
||||
if overflow_errors or warnings:
|
||||
print("\nERROR: Issues detected in replacement output:")
|
||||
if overflow_errors:
|
||||
print("\nText overflow worsened:")
|
||||
for error in overflow_errors:
|
||||
print(f" - {error}")
|
||||
if warnings:
|
||||
print("\nFormatting warnings:")
|
||||
for warning in warnings:
|
||||
print(f" - {warning}")
|
||||
print("\nPlease fix these issues before saving.")
|
||||
raise ValueError(
|
||||
f"Found {len(overflow_errors)} overflow error(s) and {len(warnings)} warning(s)"
|
||||
)
|
||||
|
||||
# Save the presentation
|
||||
prs.save(output_file)
|
||||
|
||||
# Report results
|
||||
print(f"Saved updated presentation to: {output_file}")
|
||||
print(f"Processed {len(prs.slides)} slides")
|
||||
print(f" - Shapes processed: {shapes_processed}")
|
||||
print(f" - Shapes cleared: {shapes_cleared}")
|
||||
print(f" - Shapes replaced: {shapes_replaced}")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for command-line usage."""
|
||||
if len(sys.argv) != 4:
|
||||
print(__doc__)
|
||||
sys.exit(1)
|
||||
|
||||
input_pptx = Path(sys.argv[1])
|
||||
replacements_json = Path(sys.argv[2])
|
||||
output_pptx = Path(sys.argv[3])
|
||||
|
||||
if not input_pptx.exists():
|
||||
print(f"Error: Input file '{input_pptx}' not found")
|
||||
sys.exit(1)
|
||||
|
||||
if not replacements_json.exists():
|
||||
print(f"Error: Replacements JSON file '{replacements_json}' not found")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
apply_replacements(str(input_pptx), str(replacements_json), str(output_pptx))
|
||||
except Exception as e:
|
||||
print(f"Error applying replacements: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user