Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 09:06:10 +08:00
commit 8b332b4007
64 changed files with 26839 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,231 @@
#!/usr/bin/env python3
"""
Rearrange PowerPoint slides based on a sequence of indices.
Usage:
python rearrange.py template.pptx output.pptx 0,34,34,50,52
This will create output.pptx using slides from template.pptx in the specified order.
Slides can be repeated (e.g., 34 appears twice).
"""
import argparse
import shutil
import sys
from copy import deepcopy
from pathlib import Path
import six
from pptx import Presentation
def main():
parser = argparse.ArgumentParser(
description="Rearrange PowerPoint slides based on a sequence of indices.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python rearrange.py template.pptx output.pptx 0,34,34,50,52
Creates output.pptx using slides 0, 34 (twice), 50, and 52 from template.pptx
python rearrange.py template.pptx output.pptx 5,3,1,2,4
Creates output.pptx with slides reordered as specified
Note: Slide indices are 0-based (first slide is 0, second is 1, etc.)
""",
)
parser.add_argument("template", help="Path to template PPTX file")
parser.add_argument("output", help="Path for output PPTX file")
parser.add_argument(
"sequence", help="Comma-separated sequence of slide indices (0-based)"
)
args = parser.parse_args()
# Parse the slide sequence
try:
slide_sequence = [int(x.strip()) for x in args.sequence.split(",")]
except ValueError:
print(
"Error: Invalid sequence format. Use comma-separated integers (e.g., 0,34,34,50,52)"
)
sys.exit(1)
# Check template exists
template_path = Path(args.template)
if not template_path.exists():
print(f"Error: Template file not found: {args.template}")
sys.exit(1)
# Create output directory if needed
output_path = Path(args.output)
output_path.parent.mkdir(parents=True, exist_ok=True)
try:
rearrange_presentation(template_path, output_path, slide_sequence)
except ValueError as e:
print(f"Error: {e}")
sys.exit(1)
except Exception as e:
print(f"Error processing presentation: {e}")
sys.exit(1)
def duplicate_slide(pres, index):
"""Duplicate a slide in the presentation."""
source = pres.slides[index]
# Use source's layout to preserve formatting
new_slide = pres.slides.add_slide(source.slide_layout)
# Collect all image and media relationships from the source slide
image_rels = {}
for rel_id, rel in six.iteritems(source.part.rels):
if "image" in rel.reltype or "media" in rel.reltype:
image_rels[rel_id] = rel
# CRITICAL: Clear placeholder shapes to avoid duplicates
for shape in new_slide.shapes:
sp = shape.element
sp.getparent().remove(sp)
# Copy all shapes from source
for shape in source.shapes:
el = shape.element
new_el = deepcopy(el)
new_slide.shapes._spTree.insert_element_before(new_el, "p:extLst")
# Handle picture shapes - need to update the blip reference
# Look for all blip elements (they can be in pic or other contexts)
# Using the element's own xpath method without namespaces argument
blips = new_el.xpath(".//a:blip[@r:embed]")
for blip in blips:
old_rId = blip.get(
"{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed"
)
if old_rId in image_rels:
# Create a new relationship in the destination slide for this image
old_rel = image_rels[old_rId]
# get_or_add returns the rId directly, or adds and returns new rId
new_rId = new_slide.part.rels.get_or_add(
old_rel.reltype, old_rel._target
)
# Update the blip's embed reference to use the new relationship ID
blip.set(
"{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed",
new_rId,
)
# Copy any additional image/media relationships that might be referenced elsewhere
for rel_id, rel in image_rels.items():
try:
new_slide.part.rels.get_or_add(rel.reltype, rel._target)
except Exception:
pass # Relationship might already exist
return new_slide
def delete_slide(pres, index):
"""Delete a slide from the presentation."""
rId = pres.slides._sldIdLst[index].rId
pres.part.drop_rel(rId)
del pres.slides._sldIdLst[index]
def reorder_slides(pres, slide_index, target_index):
"""Move a slide from one position to another."""
slides = pres.slides._sldIdLst
# Remove slide element from current position
slide_element = slides[slide_index]
slides.remove(slide_element)
# Insert at target position
slides.insert(target_index, slide_element)
def rearrange_presentation(template_path, output_path, slide_sequence):
"""
Create a new presentation with slides from template in specified order.
Args:
template_path: Path to template PPTX file
output_path: Path for output PPTX file
slide_sequence: List of slide indices (0-based) to include
"""
# Copy template to preserve dimensions and theme
if template_path != output_path:
shutil.copy2(template_path, output_path)
prs = Presentation(output_path)
else:
prs = Presentation(template_path)
total_slides = len(prs.slides)
# Validate indices
for idx in slide_sequence:
if idx < 0 or idx >= total_slides:
raise ValueError(f"Slide index {idx} out of range (0-{total_slides - 1})")
# Track original slides and their duplicates
slide_map = [] # List of actual slide indices for final presentation
duplicated = {} # Track duplicates: original_idx -> [duplicate_indices]
# Step 1: DUPLICATE repeated slides
print(f"Processing {len(slide_sequence)} slides from template...")
for i, template_idx in enumerate(slide_sequence):
if template_idx in duplicated and duplicated[template_idx]:
# Already duplicated this slide, use the duplicate
slide_map.append(duplicated[template_idx].pop(0))
print(f" [{i}] Using duplicate of slide {template_idx}")
elif slide_sequence.count(template_idx) > 1 and template_idx not in duplicated:
# First occurrence of a repeated slide - create duplicates
slide_map.append(template_idx)
duplicates = []
count = slide_sequence.count(template_idx) - 1
print(
f" [{i}] Using original slide {template_idx}, creating {count} duplicate(s)"
)
for _ in range(count):
duplicate_slide(prs, template_idx)
duplicates.append(len(prs.slides) - 1)
duplicated[template_idx] = duplicates
else:
# Unique slide or first occurrence already handled, use original
slide_map.append(template_idx)
print(f" [{i}] Using original slide {template_idx}")
# Step 2: DELETE unwanted slides (work backwards)
slides_to_keep = set(slide_map)
print(f"\nDeleting {len(prs.slides) - len(slides_to_keep)} unused slides...")
for i in range(len(prs.slides) - 1, -1, -1):
if i not in slides_to_keep:
delete_slide(prs, i)
# Update slide_map indices after deletion
slide_map = [idx - 1 if idx > i else idx for idx in slide_map]
# Step 3: REORDER to final sequence
print(f"Reordering {len(slide_map)} slides to final sequence...")
for target_pos in range(len(slide_map)):
# Find which slide should be at target_pos
current_pos = slide_map[target_pos]
if current_pos != target_pos:
reorder_slides(prs, current_pos, target_pos)
# Update slide_map: the move shifts other slides
for i in range(len(slide_map)):
if slide_map[i] > current_pos and slide_map[i] <= target_pos:
slide_map[i] -= 1
elif slide_map[i] < current_pos and slide_map[i] >= target_pos:
slide_map[i] += 1
slide_map[target_pos] = target_pos
# Save the presentation
prs.save(output_path)
print(f"\nSaved rearranged presentation to: {output_path}")
print(f"Final presentation has {len(prs.slides)} slides")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,385 @@
#!/usr/bin/env python3
"""Apply text replacements to PowerPoint presentation.
Usage:
python replace.py <input.pptx> <replacements.json> <output.pptx>
The replacements JSON should have the structure output by inventory.py.
ALL text shapes identified by inventory.py will have their text cleared
unless "paragraphs" is specified in the replacements for that shape.
"""
import json
import sys
from pathlib import Path
from typing import Any, Dict, List
from inventory import InventoryData, extract_text_inventory
from pptx import Presentation
from pptx.dml.color import RGBColor
from pptx.enum.dml import MSO_THEME_COLOR
from pptx.enum.text import PP_ALIGN
from pptx.oxml.xmlchemy import OxmlElement
from pptx.util import Pt
def clear_paragraph_bullets(paragraph):
"""Clear bullet formatting from a paragraph."""
pPr = paragraph._element.get_or_add_pPr()
# Remove existing bullet elements
for child in list(pPr):
if (
child.tag.endswith("buChar")
or child.tag.endswith("buNone")
or child.tag.endswith("buAutoNum")
or child.tag.endswith("buFont")
):
pPr.remove(child)
return pPr
def apply_paragraph_properties(paragraph, para_data: Dict[str, Any]):
"""Apply formatting properties to a paragraph."""
# Get the text but don't set it on paragraph directly yet
text = para_data.get("text", "")
# Get or create paragraph properties
pPr = clear_paragraph_bullets(paragraph)
# Handle bullet formatting
if para_data.get("bullet", False):
level = para_data.get("level", 0)
paragraph.level = level
# Calculate font-proportional indentation
font_size = para_data.get("font_size", 18.0)
level_indent_emu = int((font_size * (1.6 + level * 1.6)) * 12700)
hanging_indent_emu = int(-font_size * 0.8 * 12700)
# Set indentation
pPr.attrib["marL"] = str(level_indent_emu)
pPr.attrib["indent"] = str(hanging_indent_emu)
# Add bullet character
buChar = OxmlElement("a:buChar")
buChar.set("char", "")
pPr.append(buChar)
# Default to left alignment for bullets if not specified
if "alignment" not in para_data:
paragraph.alignment = PP_ALIGN.LEFT
else:
# Remove indentation for non-bullet text
pPr.attrib["marL"] = "0"
pPr.attrib["indent"] = "0"
# Add buNone element
buNone = OxmlElement("a:buNone")
pPr.insert(0, buNone)
# Apply alignment
if "alignment" in para_data:
alignment_map = {
"LEFT": PP_ALIGN.LEFT,
"CENTER": PP_ALIGN.CENTER,
"RIGHT": PP_ALIGN.RIGHT,
"JUSTIFY": PP_ALIGN.JUSTIFY,
}
if para_data["alignment"] in alignment_map:
paragraph.alignment = alignment_map[para_data["alignment"]]
# Apply spacing
if "space_before" in para_data:
paragraph.space_before = Pt(para_data["space_before"])
if "space_after" in para_data:
paragraph.space_after = Pt(para_data["space_after"])
if "line_spacing" in para_data:
paragraph.line_spacing = Pt(para_data["line_spacing"])
# Apply run-level formatting
if not paragraph.runs:
run = paragraph.add_run()
run.text = text
else:
run = paragraph.runs[0]
run.text = text
# Apply font properties
apply_font_properties(run, para_data)
def apply_font_properties(run, para_data: Dict[str, Any]):
"""Apply font properties to a text run."""
if "bold" in para_data:
run.font.bold = para_data["bold"]
if "italic" in para_data:
run.font.italic = para_data["italic"]
if "underline" in para_data:
run.font.underline = para_data["underline"]
if "font_size" in para_data:
run.font.size = Pt(para_data["font_size"])
if "font_name" in para_data:
run.font.name = para_data["font_name"]
# Apply color - prefer RGB, fall back to theme_color
if "color" in para_data:
color_hex = para_data["color"].lstrip("#")
if len(color_hex) == 6:
r = int(color_hex[0:2], 16)
g = int(color_hex[2:4], 16)
b = int(color_hex[4:6], 16)
run.font.color.rgb = RGBColor(r, g, b)
elif "theme_color" in para_data:
# Get theme color by name (e.g., "DARK_1", "ACCENT_1")
theme_name = para_data["theme_color"]
try:
run.font.color.theme_color = getattr(MSO_THEME_COLOR, theme_name)
except AttributeError:
print(f" WARNING: Unknown theme color name '{theme_name}'")
def detect_frame_overflow(inventory: InventoryData) -> Dict[str, Dict[str, float]]:
"""Detect text overflow in shapes (text exceeding shape bounds).
Returns dict of slide_key -> shape_key -> overflow_inches.
Only includes shapes that have text overflow.
"""
overflow_map = {}
for slide_key, shapes_dict in inventory.items():
for shape_key, shape_data in shapes_dict.items():
# Check for frame overflow (text exceeding shape bounds)
if shape_data.frame_overflow_bottom is not None:
if slide_key not in overflow_map:
overflow_map[slide_key] = {}
overflow_map[slide_key][shape_key] = shape_data.frame_overflow_bottom
return overflow_map
def validate_replacements(inventory: InventoryData, replacements: Dict) -> List[str]:
"""Validate that all shapes in replacements exist in inventory.
Returns list of error messages.
"""
errors = []
for slide_key, shapes_data in replacements.items():
if not slide_key.startswith("slide-"):
continue
# Check if slide exists
if slide_key not in inventory:
errors.append(f"Slide '{slide_key}' not found in inventory")
continue
# Check each shape
for shape_key in shapes_data.keys():
if shape_key not in inventory[slide_key]:
# Find shapes without replacements defined and show their content
unused_with_content = []
for k in inventory[slide_key].keys():
if k not in shapes_data:
shape_data = inventory[slide_key][k]
# Get text from paragraphs as preview
paragraphs = shape_data.paragraphs
if paragraphs and paragraphs[0].text:
first_text = paragraphs[0].text[:50]
if len(paragraphs[0].text) > 50:
first_text += "..."
unused_with_content.append(f"{k} ('{first_text}')")
else:
unused_with_content.append(k)
errors.append(
f"Shape '{shape_key}' not found on '{slide_key}'. "
f"Shapes without replacements: {', '.join(sorted(unused_with_content)) if unused_with_content else 'none'}"
)
return errors
def check_duplicate_keys(pairs):
"""Check for duplicate keys when loading JSON."""
result = {}
for key, value in pairs:
if key in result:
raise ValueError(f"Duplicate key found in JSON: '{key}'")
result[key] = value
return result
def apply_replacements(pptx_file: str, json_file: str, output_file: str):
"""Apply text replacements from JSON to PowerPoint presentation."""
# Load presentation
prs = Presentation(pptx_file)
# Get inventory of all text shapes (returns ShapeData objects)
# Pass prs to use same Presentation instance
inventory = extract_text_inventory(Path(pptx_file), prs)
# Detect text overflow in original presentation
original_overflow = detect_frame_overflow(inventory)
# Load replacement data with duplicate key detection
with open(json_file, "r") as f:
replacements = json.load(f, object_pairs_hook=check_duplicate_keys)
# Validate replacements
errors = validate_replacements(inventory, replacements)
if errors:
print("ERROR: Invalid shapes in replacement JSON:")
for error in errors:
print(f" - {error}")
print("\nPlease check the inventory and update your replacement JSON.")
print(
"You can regenerate the inventory with: python inventory.py <input.pptx> <output.json>"
)
raise ValueError(f"Found {len(errors)} validation error(s)")
# Track statistics
shapes_processed = 0
shapes_cleared = 0
shapes_replaced = 0
# Process each slide from inventory
for slide_key, shapes_dict in inventory.items():
if not slide_key.startswith("slide-"):
continue
slide_index = int(slide_key.split("-")[1])
if slide_index >= len(prs.slides):
print(f"Warning: Slide {slide_index} not found")
continue
# Process each shape from inventory
for shape_key, shape_data in shapes_dict.items():
shapes_processed += 1
# Get the shape directly from ShapeData
shape = shape_data.shape
if not shape:
print(f"Warning: {shape_key} has no shape reference")
continue
# ShapeData already validates text_frame in __init__
text_frame = shape.text_frame # type: ignore
text_frame.clear() # type: ignore
shapes_cleared += 1
# Check for replacement paragraphs
replacement_shape_data = replacements.get(slide_key, {}).get(shape_key, {})
if "paragraphs" not in replacement_shape_data:
continue
shapes_replaced += 1
# Add replacement paragraphs
for i, para_data in enumerate(replacement_shape_data["paragraphs"]):
if i == 0:
p = text_frame.paragraphs[0] # type: ignore
else:
p = text_frame.add_paragraph() # type: ignore
apply_paragraph_properties(p, para_data)
# Check for issues after replacements
# Save to a temporary file and reload to avoid modifying the presentation during inventory
# (extract_text_inventory accesses font.color which adds empty <a:solidFill/> elements)
import tempfile
with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
tmp_path = Path(tmp.name)
prs.save(str(tmp_path))
try:
updated_inventory = extract_text_inventory(tmp_path)
updated_overflow = detect_frame_overflow(updated_inventory)
finally:
tmp_path.unlink() # Clean up temp file
# Check if any text overflow got worse
overflow_errors = []
for slide_key, shape_overflows in updated_overflow.items():
for shape_key, new_overflow in shape_overflows.items():
# Get original overflow (0 if there was no overflow before)
original = original_overflow.get(slide_key, {}).get(shape_key, 0.0)
# Error if overflow increased
if new_overflow > original + 0.01: # Small tolerance for rounding
increase = new_overflow - original
overflow_errors.append(
f'{slide_key}/{shape_key}: overflow worsened by {increase:.2f}" '
f'(was {original:.2f}", now {new_overflow:.2f}")'
)
# Collect warnings from updated shapes
warnings = []
for slide_key, shapes_dict in updated_inventory.items():
for shape_key, shape_data in shapes_dict.items():
if shape_data.warnings:
for warning in shape_data.warnings:
warnings.append(f"{slide_key}/{shape_key}: {warning}")
# Fail if there are any issues
if overflow_errors or warnings:
print("\nERROR: Issues detected in replacement output:")
if overflow_errors:
print("\nText overflow worsened:")
for error in overflow_errors:
print(f" - {error}")
if warnings:
print("\nFormatting warnings:")
for warning in warnings:
print(f" - {warning}")
print("\nPlease fix these issues before saving.")
raise ValueError(
f"Found {len(overflow_errors)} overflow error(s) and {len(warnings)} warning(s)"
)
# Save the presentation
prs.save(output_file)
# Report results
print(f"Saved updated presentation to: {output_file}")
print(f"Processed {len(prs.slides)} slides")
print(f" - Shapes processed: {shapes_processed}")
print(f" - Shapes cleared: {shapes_cleared}")
print(f" - Shapes replaced: {shapes_replaced}")
def main():
"""Main entry point for command-line usage."""
if len(sys.argv) != 4:
print(__doc__)
sys.exit(1)
input_pptx = Path(sys.argv[1])
replacements_json = Path(sys.argv[2])
output_pptx = Path(sys.argv[3])
if not input_pptx.exists():
print(f"Error: Input file '{input_pptx}' not found")
sys.exit(1)
if not replacements_json.exists():
print(f"Error: Replacements JSON file '{replacements_json}' not found")
sys.exit(1)
try:
apply_replacements(str(input_pptx), str(replacements_json), str(output_pptx))
except Exception as e:
print(f"Error applying replacements: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,450 @@
#!/usr/bin/env python3
"""
Create thumbnail grids from PowerPoint presentation slides.
Creates a grid layout of slide thumbnails with configurable columns (max 6).
Each grid contains up to cols×(cols+1) images. For presentations with more
slides, multiple numbered grid files are created automatically.
The program outputs the names of all files created.
Output:
- Single grid: {prefix}.jpg (if slides fit in one grid)
- Multiple grids: {prefix}-1.jpg, {prefix}-2.jpg, etc.
Grid limits by column count:
- 3 cols: max 12 slides per grid (3×4)
- 4 cols: max 20 slides per grid (4×5)
- 5 cols: max 30 slides per grid (5×6) [default]
- 6 cols: max 42 slides per grid (6×7)
Usage:
python thumbnail.py input.pptx [output_prefix] [--cols N] [--outline-placeholders]
Examples:
python thumbnail.py presentation.pptx
# Creates: thumbnails.jpg (using default prefix)
# Outputs:
# Created 1 grid(s):
# - thumbnails.jpg
python thumbnail.py large-deck.pptx grid --cols 4
# Creates: grid-1.jpg, grid-2.jpg, grid-3.jpg
# Outputs:
# Created 3 grid(s):
# - grid-1.jpg
# - grid-2.jpg
# - grid-3.jpg
python thumbnail.py template.pptx analysis --outline-placeholders
# Creates thumbnail grids with red outlines around text placeholders
"""
import argparse
import subprocess
import sys
import tempfile
from pathlib import Path
from inventory import extract_text_inventory
from PIL import Image, ImageDraw, ImageFont
from pptx import Presentation
# Constants
THUMBNAIL_WIDTH = 300 # Fixed thumbnail width in pixels
CONVERSION_DPI = 100 # DPI for PDF to image conversion
MAX_COLS = 6 # Maximum number of columns
DEFAULT_COLS = 5 # Default number of columns
JPEG_QUALITY = 95 # JPEG compression quality
# Grid layout constants
GRID_PADDING = 20 # Padding between thumbnails
BORDER_WIDTH = 2 # Border width around thumbnails
FONT_SIZE_RATIO = 0.12 # Font size as fraction of thumbnail width
LABEL_PADDING_RATIO = 0.4 # Label padding as fraction of font size
def main():
parser = argparse.ArgumentParser(
description="Create thumbnail grids from PowerPoint slides."
)
parser.add_argument("input", help="Input PowerPoint file (.pptx)")
parser.add_argument(
"output_prefix",
nargs="?",
default="thumbnails",
help="Output prefix for image files (default: thumbnails, will create prefix.jpg or prefix-N.jpg)",
)
parser.add_argument(
"--cols",
type=int,
default=DEFAULT_COLS,
help=f"Number of columns (default: {DEFAULT_COLS}, max: {MAX_COLS})",
)
parser.add_argument(
"--outline-placeholders",
action="store_true",
help="Outline text placeholders with a colored border",
)
args = parser.parse_args()
# Validate columns
cols = min(args.cols, MAX_COLS)
if args.cols > MAX_COLS:
print(f"Warning: Columns limited to {MAX_COLS} (requested {args.cols})")
# Validate input
input_path = Path(args.input)
if not input_path.exists() or input_path.suffix.lower() != ".pptx":
print(f"Error: Invalid PowerPoint file: {args.input}")
sys.exit(1)
# Construct output path (always JPG)
output_path = Path(f"{args.output_prefix}.jpg")
print(f"Processing: {args.input}")
try:
with tempfile.TemporaryDirectory() as temp_dir:
# Get placeholder regions if outlining is enabled
placeholder_regions = None
slide_dimensions = None
if args.outline_placeholders:
print("Extracting placeholder regions...")
placeholder_regions, slide_dimensions = get_placeholder_regions(
input_path
)
if placeholder_regions:
print(f"Found placeholders on {len(placeholder_regions)} slides")
# Convert slides to images
slide_images = convert_to_images(input_path, Path(temp_dir), CONVERSION_DPI)
if not slide_images:
print("Error: No slides found")
sys.exit(1)
print(f"Found {len(slide_images)} slides")
# Create grids (max cols×(cols+1) images per grid)
grid_files = create_grids(
slide_images,
cols,
THUMBNAIL_WIDTH,
output_path,
placeholder_regions,
slide_dimensions,
)
# Print saved files
print(f"Created {len(grid_files)} grid(s):")
for grid_file in grid_files:
print(f" - {grid_file}")
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
def create_hidden_slide_placeholder(size):
"""Create placeholder image for hidden slides."""
img = Image.new("RGB", size, color="#F0F0F0")
draw = ImageDraw.Draw(img)
line_width = max(5, min(size) // 100)
draw.line([(0, 0), size], fill="#CCCCCC", width=line_width)
draw.line([(size[0], 0), (0, size[1])], fill="#CCCCCC", width=line_width)
return img
def get_placeholder_regions(pptx_path):
"""Extract ALL text regions from the presentation.
Returns a tuple of (placeholder_regions, slide_dimensions).
text_regions is a dict mapping slide indices to lists of text regions.
Each region is a dict with 'left', 'top', 'width', 'height' in inches.
slide_dimensions is a tuple of (width_inches, height_inches).
"""
prs = Presentation(str(pptx_path))
inventory = extract_text_inventory(pptx_path, prs)
placeholder_regions = {}
# Get actual slide dimensions in inches (EMU to inches conversion)
slide_width_inches = (prs.slide_width or 9144000) / 914400.0
slide_height_inches = (prs.slide_height or 5143500) / 914400.0
for slide_key, shapes in inventory.items():
# Extract slide index from "slide-N" format
slide_idx = int(slide_key.split("-")[1])
regions = []
for shape_key, shape_data in shapes.items():
# The inventory only contains shapes with text, so all shapes should be highlighted
regions.append(
{
"left": shape_data.left,
"top": shape_data.top,
"width": shape_data.width,
"height": shape_data.height,
}
)
if regions:
placeholder_regions[slide_idx] = regions
return placeholder_regions, (slide_width_inches, slide_height_inches)
def convert_to_images(pptx_path, temp_dir, dpi):
"""Convert PowerPoint to images via PDF, handling hidden slides."""
# Detect hidden slides
print("Analyzing presentation...")
prs = Presentation(str(pptx_path))
total_slides = len(prs.slides)
# Find hidden slides (1-based indexing for display)
hidden_slides = {
idx + 1
for idx, slide in enumerate(prs.slides)
if slide.element.get("show") == "0"
}
print(f"Total slides: {total_slides}")
if hidden_slides:
print(f"Hidden slides: {sorted(hidden_slides)}")
pdf_path = temp_dir / f"{pptx_path.stem}.pdf"
# Convert to PDF
print("Converting to PDF...")
result = subprocess.run(
[
"soffice",
"--headless",
"--convert-to",
"pdf",
"--outdir",
str(temp_dir),
str(pptx_path),
],
capture_output=True,
text=True,
)
if result.returncode != 0 or not pdf_path.exists():
raise RuntimeError("PDF conversion failed")
# Convert PDF to images
print(f"Converting to images at {dpi} DPI...")
result = subprocess.run(
["pdftoppm", "-jpeg", "-r", str(dpi), str(pdf_path), str(temp_dir / "slide")],
capture_output=True,
text=True,
)
if result.returncode != 0:
raise RuntimeError("Image conversion failed")
visible_images = sorted(temp_dir.glob("slide-*.jpg"))
# Create full list with placeholders for hidden slides
all_images = []
visible_idx = 0
# Get placeholder dimensions from first visible slide
if visible_images:
with Image.open(visible_images[0]) as img:
placeholder_size = img.size
else:
placeholder_size = (1920, 1080)
for slide_num in range(1, total_slides + 1):
if slide_num in hidden_slides:
# Create placeholder image for hidden slide
placeholder_path = temp_dir / f"hidden-{slide_num:03d}.jpg"
placeholder_img = create_hidden_slide_placeholder(placeholder_size)
placeholder_img.save(placeholder_path, "JPEG")
all_images.append(placeholder_path)
else:
# Use the actual visible slide image
if visible_idx < len(visible_images):
all_images.append(visible_images[visible_idx])
visible_idx += 1
return all_images
def create_grids(
image_paths,
cols,
width,
output_path,
placeholder_regions=None,
slide_dimensions=None,
):
"""Create multiple thumbnail grids from slide images, max cols×(cols+1) images per grid."""
# Maximum images per grid is cols × (cols + 1) for better proportions
max_images_per_grid = cols * (cols + 1)
grid_files = []
print(
f"Creating grids with {cols} columns (max {max_images_per_grid} images per grid)"
)
# Split images into chunks
for chunk_idx, start_idx in enumerate(
range(0, len(image_paths), max_images_per_grid)
):
end_idx = min(start_idx + max_images_per_grid, len(image_paths))
chunk_images = image_paths[start_idx:end_idx]
# Create grid for this chunk
grid = create_grid(
chunk_images, cols, width, start_idx, placeholder_regions, slide_dimensions
)
# Generate output filename
if len(image_paths) <= max_images_per_grid:
# Single grid - use base filename without suffix
grid_filename = output_path
else:
# Multiple grids - insert index before extension with dash
stem = output_path.stem
suffix = output_path.suffix
grid_filename = output_path.parent / f"{stem}-{chunk_idx + 1}{suffix}"
# Save grid
grid_filename.parent.mkdir(parents=True, exist_ok=True)
grid.save(str(grid_filename), quality=JPEG_QUALITY)
grid_files.append(str(grid_filename))
return grid_files
def create_grid(
image_paths,
cols,
width,
start_slide_num=0,
placeholder_regions=None,
slide_dimensions=None,
):
"""Create thumbnail grid from slide images with optional placeholder outlining."""
font_size = int(width * FONT_SIZE_RATIO)
label_padding = int(font_size * LABEL_PADDING_RATIO)
# Get dimensions
with Image.open(image_paths[0]) as img:
aspect = img.height / img.width
height = int(width * aspect)
# Calculate grid size
rows = (len(image_paths) + cols - 1) // cols
grid_w = cols * width + (cols + 1) * GRID_PADDING
grid_h = rows * (height + font_size + label_padding * 2) + (rows + 1) * GRID_PADDING
# Create grid
grid = Image.new("RGB", (grid_w, grid_h), "white")
draw = ImageDraw.Draw(grid)
# Load font with size based on thumbnail width
try:
# Use Pillow's default font with size
font = ImageFont.load_default(size=font_size)
except Exception:
# Fall back to basic default font if size parameter not supported
font = ImageFont.load_default()
# Place thumbnails
for i, img_path in enumerate(image_paths):
row, col = i // cols, i % cols
x = col * width + (col + 1) * GRID_PADDING
y_base = (
row * (height + font_size + label_padding * 2) + (row + 1) * GRID_PADDING
)
# Add label with actual slide number
label = f"{start_slide_num + i}"
bbox = draw.textbbox((0, 0), label, font=font)
text_w = bbox[2] - bbox[0]
draw.text(
(x + (width - text_w) // 2, y_base + label_padding),
label,
fill="black",
font=font,
)
# Add thumbnail below label with proportional spacing
y_thumbnail = y_base + label_padding + font_size + label_padding
with Image.open(img_path) as img:
# Get original dimensions before thumbnail
orig_w, orig_h = img.size
# Apply placeholder outlines if enabled
if placeholder_regions and (start_slide_num + i) in placeholder_regions:
# Convert to RGBA for transparency support
if img.mode != "RGBA":
img = img.convert("RGBA")
# Get the regions for this slide
regions = placeholder_regions[start_slide_num + i]
# Calculate scale factors using actual slide dimensions
if slide_dimensions:
slide_width_inches, slide_height_inches = slide_dimensions
else:
# Fallback: estimate from image size at CONVERSION_DPI
slide_width_inches = orig_w / CONVERSION_DPI
slide_height_inches = orig_h / CONVERSION_DPI
x_scale = orig_w / slide_width_inches
y_scale = orig_h / slide_height_inches
# Create a highlight overlay
overlay = Image.new("RGBA", img.size, (255, 255, 255, 0))
overlay_draw = ImageDraw.Draw(overlay)
# Highlight each placeholder region
for region in regions:
# Convert from inches to pixels in the original image
px_left = int(region["left"] * x_scale)
px_top = int(region["top"] * y_scale)
px_width = int(region["width"] * x_scale)
px_height = int(region["height"] * y_scale)
# Draw highlight outline with red color and thick stroke
# Using a bright red outline instead of fill
stroke_width = max(
5, min(orig_w, orig_h) // 150
) # Thicker proportional stroke width
overlay_draw.rectangle(
[(px_left, px_top), (px_left + px_width, px_top + px_height)],
outline=(255, 0, 0, 255), # Bright red, fully opaque
width=stroke_width,
)
# Composite the overlay onto the image using alpha blending
img = Image.alpha_composite(img, overlay)
# Convert back to RGB for JPEG saving
img = img.convert("RGB")
img.thumbnail((width, height), Image.Resampling.LANCZOS)
w, h = img.size
tx = x + (width - w) // 2
ty = y_thumbnail + (height - h) // 2
grid.paste(img, (tx, ty))
# Add border
if BORDER_WIDTH > 0:
draw.rectangle(
[
(tx - BORDER_WIDTH, ty - BORDER_WIDTH),
(tx + w + BORDER_WIDTH - 1, ty + h + BORDER_WIDTH - 1),
],
outline="gray",
width=BORDER_WIDTH,
)
return grid
if __name__ == "__main__":
main()