Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 18:01:18 +08:00
commit 5643131949
133 changed files with 54100 additions and 0 deletions

385
skills/pptx/scripts/replace.py Executable file
View File

@@ -0,0 +1,385 @@
#!/usr/bin/env python3
"""Apply text replacements to PowerPoint presentation.
Usage:
python replace.py <input.pptx> <replacements.json> <output.pptx>
The replacements JSON should have the structure output by inventory.py.
ALL text shapes identified by inventory.py will have their text cleared
unless "paragraphs" is specified in the replacements for that shape.
"""
import json
import sys
from pathlib import Path
from typing import Any, Dict, List
from inventory import InventoryData, extract_text_inventory
from pptx import Presentation
from pptx.dml.color import RGBColor
from pptx.enum.dml import MSO_THEME_COLOR
from pptx.enum.text import PP_ALIGN
from pptx.oxml.xmlchemy import OxmlElement
from pptx.util import Pt
def clear_paragraph_bullets(paragraph):
"""Clear bullet formatting from a paragraph."""
pPr = paragraph._element.get_or_add_pPr()
# Remove existing bullet elements
for child in list(pPr):
if (
child.tag.endswith("buChar")
or child.tag.endswith("buNone")
or child.tag.endswith("buAutoNum")
or child.tag.endswith("buFont")
):
pPr.remove(child)
return pPr
def apply_paragraph_properties(paragraph, para_data: Dict[str, Any]):
"""Apply formatting properties to a paragraph."""
# Get the text but don't set it on paragraph directly yet
text = para_data.get("text", "")
# Get or create paragraph properties
pPr = clear_paragraph_bullets(paragraph)
# Handle bullet formatting
if para_data.get("bullet", False):
level = para_data.get("level", 0)
paragraph.level = level
# Calculate font-proportional indentation
font_size = para_data.get("font_size", 18.0)
level_indent_emu = int((font_size * (1.6 + level * 1.6)) * 12700)
hanging_indent_emu = int(-font_size * 0.8 * 12700)
# Set indentation
pPr.attrib["marL"] = str(level_indent_emu)
pPr.attrib["indent"] = str(hanging_indent_emu)
# Add bullet character
buChar = OxmlElement("a:buChar")
buChar.set("char", "")
pPr.append(buChar)
# Default to left alignment for bullets if not specified
if "alignment" not in para_data:
paragraph.alignment = PP_ALIGN.LEFT
else:
# Remove indentation for non-bullet text
pPr.attrib["marL"] = "0"
pPr.attrib["indent"] = "0"
# Add buNone element
buNone = OxmlElement("a:buNone")
pPr.insert(0, buNone)
# Apply alignment
if "alignment" in para_data:
alignment_map = {
"LEFT": PP_ALIGN.LEFT,
"CENTER": PP_ALIGN.CENTER,
"RIGHT": PP_ALIGN.RIGHT,
"JUSTIFY": PP_ALIGN.JUSTIFY,
}
if para_data["alignment"] in alignment_map:
paragraph.alignment = alignment_map[para_data["alignment"]]
# Apply spacing
if "space_before" in para_data:
paragraph.space_before = Pt(para_data["space_before"])
if "space_after" in para_data:
paragraph.space_after = Pt(para_data["space_after"])
if "line_spacing" in para_data:
paragraph.line_spacing = Pt(para_data["line_spacing"])
# Apply run-level formatting
if not paragraph.runs:
run = paragraph.add_run()
run.text = text
else:
run = paragraph.runs[0]
run.text = text
# Apply font properties
apply_font_properties(run, para_data)
def apply_font_properties(run, para_data: Dict[str, Any]):
"""Apply font properties to a text run."""
if "bold" in para_data:
run.font.bold = para_data["bold"]
if "italic" in para_data:
run.font.italic = para_data["italic"]
if "underline" in para_data:
run.font.underline = para_data["underline"]
if "font_size" in para_data:
run.font.size = Pt(para_data["font_size"])
if "font_name" in para_data:
run.font.name = para_data["font_name"]
# Apply color - prefer RGB, fall back to theme_color
if "color" in para_data:
color_hex = para_data["color"].lstrip("#")
if len(color_hex) == 6:
r = int(color_hex[0:2], 16)
g = int(color_hex[2:4], 16)
b = int(color_hex[4:6], 16)
run.font.color.rgb = RGBColor(r, g, b)
elif "theme_color" in para_data:
# Get theme color by name (e.g., "DARK_1", "ACCENT_1")
theme_name = para_data["theme_color"]
try:
run.font.color.theme_color = getattr(MSO_THEME_COLOR, theme_name)
except AttributeError:
print(f" WARNING: Unknown theme color name '{theme_name}'")
def detect_frame_overflow(inventory: InventoryData) -> Dict[str, Dict[str, float]]:
"""Detect text overflow in shapes (text exceeding shape bounds).
Returns dict of slide_key -> shape_key -> overflow_inches.
Only includes shapes that have text overflow.
"""
overflow_map = {}
for slide_key, shapes_dict in inventory.items():
for shape_key, shape_data in shapes_dict.items():
# Check for frame overflow (text exceeding shape bounds)
if shape_data.frame_overflow_bottom is not None:
if slide_key not in overflow_map:
overflow_map[slide_key] = {}
overflow_map[slide_key][shape_key] = shape_data.frame_overflow_bottom
return overflow_map
def validate_replacements(inventory: InventoryData, replacements: Dict) -> List[str]:
"""Validate that all shapes in replacements exist in inventory.
Returns list of error messages.
"""
errors = []
for slide_key, shapes_data in replacements.items():
if not slide_key.startswith("slide-"):
continue
# Check if slide exists
if slide_key not in inventory:
errors.append(f"Slide '{slide_key}' not found in inventory")
continue
# Check each shape
for shape_key in shapes_data.keys():
if shape_key not in inventory[slide_key]:
# Find shapes without replacements defined and show their content
unused_with_content = []
for k in inventory[slide_key].keys():
if k not in shapes_data:
shape_data = inventory[slide_key][k]
# Get text from paragraphs as preview
paragraphs = shape_data.paragraphs
if paragraphs and paragraphs[0].text:
first_text = paragraphs[0].text[:50]
if len(paragraphs[0].text) > 50:
first_text += "..."
unused_with_content.append(f"{k} ('{first_text}')")
else:
unused_with_content.append(k)
errors.append(
f"Shape '{shape_key}' not found on '{slide_key}'. "
f"Shapes without replacements: {', '.join(sorted(unused_with_content)) if unused_with_content else 'none'}"
)
return errors
def check_duplicate_keys(pairs):
"""Check for duplicate keys when loading JSON."""
result = {}
for key, value in pairs:
if key in result:
raise ValueError(f"Duplicate key found in JSON: '{key}'")
result[key] = value
return result
def apply_replacements(pptx_file: str, json_file: str, output_file: str):
"""Apply text replacements from JSON to PowerPoint presentation."""
# Load presentation
prs = Presentation(pptx_file)
# Get inventory of all text shapes (returns ShapeData objects)
# Pass prs to use same Presentation instance
inventory = extract_text_inventory(Path(pptx_file), prs)
# Detect text overflow in original presentation
original_overflow = detect_frame_overflow(inventory)
# Load replacement data with duplicate key detection
with open(json_file, "r") as f:
replacements = json.load(f, object_pairs_hook=check_duplicate_keys)
# Validate replacements
errors = validate_replacements(inventory, replacements)
if errors:
print("ERROR: Invalid shapes in replacement JSON:")
for error in errors:
print(f" - {error}")
print("\nPlease check the inventory and update your replacement JSON.")
print(
"You can regenerate the inventory with: python inventory.py <input.pptx> <output.json>"
)
raise ValueError(f"Found {len(errors)} validation error(s)")
# Track statistics
shapes_processed = 0
shapes_cleared = 0
shapes_replaced = 0
# Process each slide from inventory
for slide_key, shapes_dict in inventory.items():
if not slide_key.startswith("slide-"):
continue
slide_index = int(slide_key.split("-")[1])
if slide_index >= len(prs.slides):
print(f"Warning: Slide {slide_index} not found")
continue
# Process each shape from inventory
for shape_key, shape_data in shapes_dict.items():
shapes_processed += 1
# Get the shape directly from ShapeData
shape = shape_data.shape
if not shape:
print(f"Warning: {shape_key} has no shape reference")
continue
# ShapeData already validates text_frame in __init__
text_frame = shape.text_frame # type: ignore
text_frame.clear() # type: ignore
shapes_cleared += 1
# Check for replacement paragraphs
replacement_shape_data = replacements.get(slide_key, {}).get(shape_key, {})
if "paragraphs" not in replacement_shape_data:
continue
shapes_replaced += 1
# Add replacement paragraphs
for i, para_data in enumerate(replacement_shape_data["paragraphs"]):
if i == 0:
p = text_frame.paragraphs[0] # type: ignore
else:
p = text_frame.add_paragraph() # type: ignore
apply_paragraph_properties(p, para_data)
# Check for issues after replacements
# Save to a temporary file and reload to avoid modifying the presentation during inventory
# (extract_text_inventory accesses font.color which adds empty <a:solidFill/> elements)
import tempfile
with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
tmp_path = Path(tmp.name)
prs.save(str(tmp_path))
try:
updated_inventory = extract_text_inventory(tmp_path)
updated_overflow = detect_frame_overflow(updated_inventory)
finally:
tmp_path.unlink() # Clean up temp file
# Check if any text overflow got worse
overflow_errors = []
for slide_key, shape_overflows in updated_overflow.items():
for shape_key, new_overflow in shape_overflows.items():
# Get original overflow (0 if there was no overflow before)
original = original_overflow.get(slide_key, {}).get(shape_key, 0.0)
# Error if overflow increased
if new_overflow > original + 0.01: # Small tolerance for rounding
increase = new_overflow - original
overflow_errors.append(
f'{slide_key}/{shape_key}: overflow worsened by {increase:.2f}" '
f'(was {original:.2f}", now {new_overflow:.2f}")'
)
# Collect warnings from updated shapes
warnings = []
for slide_key, shapes_dict in updated_inventory.items():
for shape_key, shape_data in shapes_dict.items():
if shape_data.warnings:
for warning in shape_data.warnings:
warnings.append(f"{slide_key}/{shape_key}: {warning}")
# Fail if there are any issues
if overflow_errors or warnings:
print("\nERROR: Issues detected in replacement output:")
if overflow_errors:
print("\nText overflow worsened:")
for error in overflow_errors:
print(f" - {error}")
if warnings:
print("\nFormatting warnings:")
for warning in warnings:
print(f" - {warning}")
print("\nPlease fix these issues before saving.")
raise ValueError(
f"Found {len(overflow_errors)} overflow error(s) and {len(warnings)} warning(s)"
)
# Save the presentation
prs.save(output_file)
# Report results
print(f"Saved updated presentation to: {output_file}")
print(f"Processed {len(prs.slides)} slides")
print(f" - Shapes processed: {shapes_processed}")
print(f" - Shapes cleared: {shapes_cleared}")
print(f" - Shapes replaced: {shapes_replaced}")
def main():
"""Main entry point for command-line usage."""
if len(sys.argv) != 4:
print(__doc__)
sys.exit(1)
input_pptx = Path(sys.argv[1])
replacements_json = Path(sys.argv[2])
output_pptx = Path(sys.argv[3])
if not input_pptx.exists():
print(f"Error: Input file '{input_pptx}' not found")
sys.exit(1)
if not replacements_json.exists():
print(f"Error: Replacements JSON file '{replacements_json}' not found")
sys.exit(1)
try:
apply_replacements(str(input_pptx), str(replacements_json), str(output_pptx))
except Exception as e:
print(f"Error applying replacements: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
main()