Initial commit
This commit is contained in:
979
skills/document-skills/pptx/scripts/html2pptx.js
Executable file
979
skills/document-skills/pptx/scripts/html2pptx.js
Executable file
@@ -0,0 +1,979 @@
|
||||
/**
|
||||
* html2pptx - Convert HTML slide to pptxgenjs slide with positioned elements
|
||||
*
|
||||
* USAGE:
|
||||
* const pptx = new pptxgen();
|
||||
* pptx.layout = 'LAYOUT_16x9'; // Must match HTML body dimensions
|
||||
*
|
||||
* const { slide, placeholders } = await html2pptx('slide.html', pptx);
|
||||
* slide.addChart(pptx.charts.LINE, data, placeholders[0]);
|
||||
*
|
||||
* await pptx.writeFile('output.pptx');
|
||||
*
|
||||
* FEATURES:
|
||||
* - Converts HTML to PowerPoint with accurate positioning
|
||||
* - Supports text, images, shapes, and bullet lists
|
||||
* - Extracts placeholder elements (class="placeholder") with positions
|
||||
* - Handles CSS gradients, borders, and margins
|
||||
*
|
||||
* VALIDATION:
|
||||
* - Uses body width/height from HTML for viewport sizing
|
||||
* - Throws error if HTML dimensions don't match presentation layout
|
||||
* - Throws error if content overflows body (with overflow details)
|
||||
*
|
||||
* RETURNS:
|
||||
* { slide, placeholders } where placeholders is an array of { id, x, y, w, h }
|
||||
*/
|
||||
|
||||
const { chromium } = require('playwright');
|
||||
const path = require('path');
|
||||
const sharp = require('sharp');
|
||||
|
||||
const PT_PER_PX = 0.75;
|
||||
const PX_PER_IN = 96;
|
||||
const EMU_PER_IN = 914400;
|
||||
|
||||
// Helper: Get body dimensions and check for overflow
|
||||
async function getBodyDimensions(page) {
|
||||
const bodyDimensions = await page.evaluate(() => {
|
||||
const body = document.body;
|
||||
const style = window.getComputedStyle(body);
|
||||
|
||||
return {
|
||||
width: parseFloat(style.width),
|
||||
height: parseFloat(style.height),
|
||||
scrollWidth: body.scrollWidth,
|
||||
scrollHeight: body.scrollHeight
|
||||
};
|
||||
});
|
||||
|
||||
const errors = [];
|
||||
const widthOverflowPx = Math.max(0, bodyDimensions.scrollWidth - bodyDimensions.width - 1);
|
||||
const heightOverflowPx = Math.max(0, bodyDimensions.scrollHeight - bodyDimensions.height - 1);
|
||||
|
||||
const widthOverflowPt = widthOverflowPx * PT_PER_PX;
|
||||
const heightOverflowPt = heightOverflowPx * PT_PER_PX;
|
||||
|
||||
if (widthOverflowPt > 0 || heightOverflowPt > 0) {
|
||||
const directions = [];
|
||||
if (widthOverflowPt > 0) directions.push(`${widthOverflowPt.toFixed(1)}pt horizontally`);
|
||||
if (heightOverflowPt > 0) directions.push(`${heightOverflowPt.toFixed(1)}pt vertically`);
|
||||
const reminder = heightOverflowPt > 0 ? ' (Remember: leave 0.5" margin at bottom of slide)' : '';
|
||||
errors.push(`HTML content overflows body by ${directions.join(' and ')}${reminder}`);
|
||||
}
|
||||
|
||||
return { ...bodyDimensions, errors };
|
||||
}
|
||||
|
||||
// Helper: Validate dimensions match presentation layout
|
||||
function validateDimensions(bodyDimensions, pres) {
|
||||
const errors = [];
|
||||
const widthInches = bodyDimensions.width / PX_PER_IN;
|
||||
const heightInches = bodyDimensions.height / PX_PER_IN;
|
||||
|
||||
if (pres.presLayout) {
|
||||
const layoutWidth = pres.presLayout.width / EMU_PER_IN;
|
||||
const layoutHeight = pres.presLayout.height / EMU_PER_IN;
|
||||
|
||||
if (Math.abs(layoutWidth - widthInches) > 0.1 || Math.abs(layoutHeight - heightInches) > 0.1) {
|
||||
errors.push(
|
||||
`HTML dimensions (${widthInches.toFixed(1)}" × ${heightInches.toFixed(1)}") ` +
|
||||
`don't match presentation layout (${layoutWidth.toFixed(1)}" × ${layoutHeight.toFixed(1)}")`
|
||||
);
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
|
||||
function validateTextBoxPosition(slideData, bodyDimensions) {
|
||||
const errors = [];
|
||||
const slideHeightInches = bodyDimensions.height / PX_PER_IN;
|
||||
const minBottomMargin = 0.5; // 0.5 inches from bottom
|
||||
|
||||
for (const el of slideData.elements) {
|
||||
// Check text elements (p, h1-h6, list)
|
||||
if (['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'list'].includes(el.type)) {
|
||||
const fontSize = el.style?.fontSize || 0;
|
||||
const bottomEdge = el.position.y + el.position.h;
|
||||
const distanceFromBottom = slideHeightInches - bottomEdge;
|
||||
|
||||
if (fontSize > 12 && distanceFromBottom < minBottomMargin) {
|
||||
const getText = () => {
|
||||
if (typeof el.text === 'string') return el.text;
|
||||
if (Array.isArray(el.text)) return el.text.find(t => t.text)?.text || '';
|
||||
if (Array.isArray(el.items)) return el.items.find(item => item.text)?.text || '';
|
||||
return '';
|
||||
};
|
||||
const textPrefix = getText().substring(0, 50) + (getText().length > 50 ? '...' : '');
|
||||
|
||||
errors.push(
|
||||
`Text box "${textPrefix}" ends too close to bottom edge ` +
|
||||
`(${distanceFromBottom.toFixed(2)}" from bottom, minimum ${minBottomMargin}" required)`
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
// Helper: Add background to slide
|
||||
async function addBackground(slideData, targetSlide, tmpDir) {
|
||||
if (slideData.background.type === 'image' && slideData.background.path) {
|
||||
let imagePath = slideData.background.path.startsWith('file://')
|
||||
? slideData.background.path.replace('file://', '')
|
||||
: slideData.background.path;
|
||||
targetSlide.background = { path: imagePath };
|
||||
} else if (slideData.background.type === 'color' && slideData.background.value) {
|
||||
targetSlide.background = { color: slideData.background.value };
|
||||
}
|
||||
}
|
||||
|
||||
// Helper: Add elements to slide
|
||||
function addElements(slideData, targetSlide, pres) {
|
||||
for (const el of slideData.elements) {
|
||||
if (el.type === 'image') {
|
||||
let imagePath = el.src.startsWith('file://') ? el.src.replace('file://', '') : el.src;
|
||||
targetSlide.addImage({
|
||||
path: imagePath,
|
||||
x: el.position.x,
|
||||
y: el.position.y,
|
||||
w: el.position.w,
|
||||
h: el.position.h
|
||||
});
|
||||
} else if (el.type === 'line') {
|
||||
targetSlide.addShape(pres.ShapeType.line, {
|
||||
x: el.x1,
|
||||
y: el.y1,
|
||||
w: el.x2 - el.x1,
|
||||
h: el.y2 - el.y1,
|
||||
line: { color: el.color, width: el.width }
|
||||
});
|
||||
} else if (el.type === 'shape') {
|
||||
const shapeOptions = {
|
||||
x: el.position.x,
|
||||
y: el.position.y,
|
||||
w: el.position.w,
|
||||
h: el.position.h,
|
||||
shape: el.shape.rectRadius > 0 ? pres.ShapeType.roundRect : pres.ShapeType.rect
|
||||
};
|
||||
|
||||
if (el.shape.fill) {
|
||||
shapeOptions.fill = { color: el.shape.fill };
|
||||
if (el.shape.transparency != null) shapeOptions.fill.transparency = el.shape.transparency;
|
||||
}
|
||||
if (el.shape.line) shapeOptions.line = el.shape.line;
|
||||
if (el.shape.rectRadius > 0) shapeOptions.rectRadius = el.shape.rectRadius;
|
||||
if (el.shape.shadow) shapeOptions.shadow = el.shape.shadow;
|
||||
|
||||
targetSlide.addText(el.text || '', shapeOptions);
|
||||
} else if (el.type === 'list') {
|
||||
const listOptions = {
|
||||
x: el.position.x,
|
||||
y: el.position.y,
|
||||
w: el.position.w,
|
||||
h: el.position.h,
|
||||
fontSize: el.style.fontSize,
|
||||
fontFace: el.style.fontFace,
|
||||
color: el.style.color,
|
||||
align: el.style.align,
|
||||
valign: 'top',
|
||||
lineSpacing: el.style.lineSpacing,
|
||||
paraSpaceBefore: el.style.paraSpaceBefore,
|
||||
paraSpaceAfter: el.style.paraSpaceAfter,
|
||||
margin: el.style.margin
|
||||
};
|
||||
if (el.style.margin) listOptions.margin = el.style.margin;
|
||||
targetSlide.addText(el.items, listOptions);
|
||||
} else {
|
||||
// Check if text is single-line (height suggests one line)
|
||||
const lineHeight = el.style.lineSpacing || el.style.fontSize * 1.2;
|
||||
const isSingleLine = el.position.h <= lineHeight * 1.5;
|
||||
|
||||
let adjustedX = el.position.x;
|
||||
let adjustedW = el.position.w;
|
||||
|
||||
// Make single-line text 2% wider to account for underestimate
|
||||
if (isSingleLine) {
|
||||
const widthIncrease = el.position.w * 0.02;
|
||||
const align = el.style.align;
|
||||
|
||||
if (align === 'center') {
|
||||
// Center: expand both sides
|
||||
adjustedX = el.position.x - (widthIncrease / 2);
|
||||
adjustedW = el.position.w + widthIncrease;
|
||||
} else if (align === 'right') {
|
||||
// Right: expand to the left
|
||||
adjustedX = el.position.x - widthIncrease;
|
||||
adjustedW = el.position.w + widthIncrease;
|
||||
} else {
|
||||
// Left (default): expand to the right
|
||||
adjustedW = el.position.w + widthIncrease;
|
||||
}
|
||||
}
|
||||
|
||||
const textOptions = {
|
||||
x: adjustedX,
|
||||
y: el.position.y,
|
||||
w: adjustedW,
|
||||
h: el.position.h,
|
||||
fontSize: el.style.fontSize,
|
||||
fontFace: el.style.fontFace,
|
||||
color: el.style.color,
|
||||
bold: el.style.bold,
|
||||
italic: el.style.italic,
|
||||
underline: el.style.underline,
|
||||
valign: 'top',
|
||||
lineSpacing: el.style.lineSpacing,
|
||||
paraSpaceBefore: el.style.paraSpaceBefore,
|
||||
paraSpaceAfter: el.style.paraSpaceAfter,
|
||||
inset: 0 // Remove default PowerPoint internal padding
|
||||
};
|
||||
|
||||
if (el.style.align) textOptions.align = el.style.align;
|
||||
if (el.style.margin) textOptions.margin = el.style.margin;
|
||||
if (el.style.rotate !== undefined) textOptions.rotate = el.style.rotate;
|
||||
if (el.style.transparency !== null && el.style.transparency !== undefined) textOptions.transparency = el.style.transparency;
|
||||
|
||||
targetSlide.addText(el.text, textOptions);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper: Extract slide data from HTML page
|
||||
async function extractSlideData(page) {
|
||||
return await page.evaluate(() => {
|
||||
const PT_PER_PX = 0.75;
|
||||
const PX_PER_IN = 96;
|
||||
|
||||
// Fonts that are single-weight and should not have bold applied
|
||||
// (applying bold causes PowerPoint to use faux bold which makes text wider)
|
||||
const SINGLE_WEIGHT_FONTS = ['impact'];
|
||||
|
||||
// Helper: Check if a font should skip bold formatting
|
||||
const shouldSkipBold = (fontFamily) => {
|
||||
if (!fontFamily) return false;
|
||||
const normalizedFont = fontFamily.toLowerCase().replace(/['"]/g, '').split(',')[0].trim();
|
||||
return SINGLE_WEIGHT_FONTS.includes(normalizedFont);
|
||||
};
|
||||
|
||||
// Unit conversion helpers
|
||||
const pxToInch = (px) => px / PX_PER_IN;
|
||||
const pxToPoints = (pxStr) => parseFloat(pxStr) * PT_PER_PX;
|
||||
const rgbToHex = (rgbStr) => {
|
||||
// Handle transparent backgrounds by defaulting to white
|
||||
if (rgbStr === 'rgba(0, 0, 0, 0)' || rgbStr === 'transparent') return 'FFFFFF';
|
||||
|
||||
const match = rgbStr.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)/);
|
||||
if (!match) return 'FFFFFF';
|
||||
return match.slice(1).map(n => parseInt(n).toString(16).padStart(2, '0')).join('');
|
||||
};
|
||||
|
||||
const extractAlpha = (rgbStr) => {
|
||||
const match = rgbStr.match(/rgba\((\d+),\s*(\d+),\s*(\d+),\s*([\d.]+)\)/);
|
||||
if (!match || !match[4]) return null;
|
||||
const alpha = parseFloat(match[4]);
|
||||
return Math.round((1 - alpha) * 100);
|
||||
};
|
||||
|
||||
const applyTextTransform = (text, textTransform) => {
|
||||
if (textTransform === 'uppercase') return text.toUpperCase();
|
||||
if (textTransform === 'lowercase') return text.toLowerCase();
|
||||
if (textTransform === 'capitalize') {
|
||||
return text.replace(/\b\w/g, c => c.toUpperCase());
|
||||
}
|
||||
return text;
|
||||
};
|
||||
|
||||
// Extract rotation angle from CSS transform and writing-mode
|
||||
const getRotation = (transform, writingMode) => {
|
||||
let angle = 0;
|
||||
|
||||
// Handle writing-mode first
|
||||
// PowerPoint: 90° = text rotated 90° clockwise (reads top to bottom, letters upright)
|
||||
// PowerPoint: 270° = text rotated 270° clockwise (reads bottom to top, letters upright)
|
||||
if (writingMode === 'vertical-rl') {
|
||||
// vertical-rl alone = text reads top to bottom = 90° in PowerPoint
|
||||
angle = 90;
|
||||
} else if (writingMode === 'vertical-lr') {
|
||||
// vertical-lr alone = text reads bottom to top = 270° in PowerPoint
|
||||
angle = 270;
|
||||
}
|
||||
|
||||
// Then add any transform rotation
|
||||
if (transform && transform !== 'none') {
|
||||
// Try to match rotate() function
|
||||
const rotateMatch = transform.match(/rotate\((-?\d+(?:\.\d+)?)deg\)/);
|
||||
if (rotateMatch) {
|
||||
angle += parseFloat(rotateMatch[1]);
|
||||
} else {
|
||||
// Browser may compute as matrix - extract rotation from matrix
|
||||
const matrixMatch = transform.match(/matrix\(([^)]+)\)/);
|
||||
if (matrixMatch) {
|
||||
const values = matrixMatch[1].split(',').map(parseFloat);
|
||||
// matrix(a, b, c, d, e, f) where rotation = atan2(b, a)
|
||||
const matrixAngle = Math.atan2(values[1], values[0]) * (180 / Math.PI);
|
||||
angle += Math.round(matrixAngle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize to 0-359 range
|
||||
angle = angle % 360;
|
||||
if (angle < 0) angle += 360;
|
||||
|
||||
return angle === 0 ? null : angle;
|
||||
};
|
||||
|
||||
// Get position/dimensions accounting for rotation
|
||||
const getPositionAndSize = (el, rect, rotation) => {
|
||||
if (rotation === null) {
|
||||
return { x: rect.left, y: rect.top, w: rect.width, h: rect.height };
|
||||
}
|
||||
|
||||
// For 90° or 270° rotations, swap width and height
|
||||
// because PowerPoint applies rotation to the original (unrotated) box
|
||||
const isVertical = rotation === 90 || rotation === 270;
|
||||
|
||||
if (isVertical) {
|
||||
// The browser shows us the rotated dimensions (tall box for vertical text)
|
||||
// But PowerPoint needs the pre-rotation dimensions (wide box that will be rotated)
|
||||
// So we swap: browser's height becomes PPT's width, browser's width becomes PPT's height
|
||||
const centerX = rect.left + rect.width / 2;
|
||||
const centerY = rect.top + rect.height / 2;
|
||||
|
||||
return {
|
||||
x: centerX - rect.height / 2,
|
||||
y: centerY - rect.width / 2,
|
||||
w: rect.height,
|
||||
h: rect.width
|
||||
};
|
||||
}
|
||||
|
||||
// For other rotations, use element's offset dimensions
|
||||
const centerX = rect.left + rect.width / 2;
|
||||
const centerY = rect.top + rect.height / 2;
|
||||
return {
|
||||
x: centerX - el.offsetWidth / 2,
|
||||
y: centerY - el.offsetHeight / 2,
|
||||
w: el.offsetWidth,
|
||||
h: el.offsetHeight
|
||||
};
|
||||
};
|
||||
|
||||
// Parse CSS box-shadow into PptxGenJS shadow properties
|
||||
const parseBoxShadow = (boxShadow) => {
|
||||
if (!boxShadow || boxShadow === 'none') return null;
|
||||
|
||||
// Browser computed style format: "rgba(0, 0, 0, 0.3) 2px 2px 8px 0px [inset]"
|
||||
// CSS format: "[inset] 2px 2px 8px 0px rgba(0, 0, 0, 0.3)"
|
||||
|
||||
const insetMatch = boxShadow.match(/inset/);
|
||||
|
||||
// IMPORTANT: PptxGenJS/PowerPoint doesn't properly support inset shadows
|
||||
// Only process outer shadows to avoid file corruption
|
||||
if (insetMatch) return null;
|
||||
|
||||
// Extract color first (rgba or rgb at start)
|
||||
const colorMatch = boxShadow.match(/rgba?\([^)]+\)/);
|
||||
|
||||
// Extract numeric values (handles both px and pt units)
|
||||
const parts = boxShadow.match(/([-\d.]+)(px|pt)/g);
|
||||
|
||||
if (!parts || parts.length < 2) return null;
|
||||
|
||||
const offsetX = parseFloat(parts[0]);
|
||||
const offsetY = parseFloat(parts[1]);
|
||||
const blur = parts.length > 2 ? parseFloat(parts[2]) : 0;
|
||||
|
||||
// Calculate angle from offsets (in degrees, 0 = right, 90 = down)
|
||||
let angle = 0;
|
||||
if (offsetX !== 0 || offsetY !== 0) {
|
||||
angle = Math.atan2(offsetY, offsetX) * (180 / Math.PI);
|
||||
if (angle < 0) angle += 360;
|
||||
}
|
||||
|
||||
// Calculate offset distance (hypotenuse)
|
||||
const offset = Math.sqrt(offsetX * offsetX + offsetY * offsetY) * PT_PER_PX;
|
||||
|
||||
// Extract opacity from rgba
|
||||
let opacity = 0.5;
|
||||
if (colorMatch) {
|
||||
const opacityMatch = colorMatch[0].match(/[\d.]+\)$/);
|
||||
if (opacityMatch) {
|
||||
opacity = parseFloat(opacityMatch[0].replace(')', ''));
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
type: 'outer',
|
||||
angle: Math.round(angle),
|
||||
blur: blur * 0.75, // Convert to points
|
||||
color: colorMatch ? rgbToHex(colorMatch[0]) : '000000',
|
||||
offset: offset,
|
||||
opacity
|
||||
};
|
||||
};
|
||||
|
||||
// Parse inline formatting tags (<b>, <i>, <u>, <strong>, <em>, <span>) into text runs
|
||||
const parseInlineFormatting = (element, baseOptions = {}, runs = [], baseTextTransform = (x) => x) => {
|
||||
let prevNodeIsText = false;
|
||||
|
||||
element.childNodes.forEach((node) => {
|
||||
let textTransform = baseTextTransform;
|
||||
|
||||
const isText = node.nodeType === Node.TEXT_NODE || node.tagName === 'BR';
|
||||
if (isText) {
|
||||
const text = node.tagName === 'BR' ? '\n' : textTransform(node.textContent.replace(/\s+/g, ' '));
|
||||
const prevRun = runs[runs.length - 1];
|
||||
if (prevNodeIsText && prevRun) {
|
||||
prevRun.text += text;
|
||||
} else {
|
||||
runs.push({ text, options: { ...baseOptions } });
|
||||
}
|
||||
|
||||
} else if (node.nodeType === Node.ELEMENT_NODE && node.textContent.trim()) {
|
||||
const options = { ...baseOptions };
|
||||
const computed = window.getComputedStyle(node);
|
||||
|
||||
// Handle inline elements with computed styles
|
||||
if (node.tagName === 'SPAN' || node.tagName === 'B' || node.tagName === 'STRONG' || node.tagName === 'I' || node.tagName === 'EM' || node.tagName === 'U') {
|
||||
const isBold = computed.fontWeight === 'bold' || parseInt(computed.fontWeight) >= 600;
|
||||
if (isBold && !shouldSkipBold(computed.fontFamily)) options.bold = true;
|
||||
if (computed.fontStyle === 'italic') options.italic = true;
|
||||
if (computed.textDecoration && computed.textDecoration.includes('underline')) options.underline = true;
|
||||
if (computed.color && computed.color !== 'rgb(0, 0, 0)') {
|
||||
options.color = rgbToHex(computed.color);
|
||||
const transparency = extractAlpha(computed.color);
|
||||
if (transparency !== null) options.transparency = transparency;
|
||||
}
|
||||
if (computed.fontSize) options.fontSize = pxToPoints(computed.fontSize);
|
||||
|
||||
// Apply text-transform on the span element itself
|
||||
if (computed.textTransform && computed.textTransform !== 'none') {
|
||||
const transformStr = computed.textTransform;
|
||||
textTransform = (text) => applyTextTransform(text, transformStr);
|
||||
}
|
||||
|
||||
// Validate: Check for margins on inline elements
|
||||
if (computed.marginLeft && parseFloat(computed.marginLeft) > 0) {
|
||||
errors.push(`Inline element <${node.tagName.toLowerCase()}> has margin-left which is not supported in PowerPoint. Remove margin from inline elements.`);
|
||||
}
|
||||
if (computed.marginRight && parseFloat(computed.marginRight) > 0) {
|
||||
errors.push(`Inline element <${node.tagName.toLowerCase()}> has margin-right which is not supported in PowerPoint. Remove margin from inline elements.`);
|
||||
}
|
||||
if (computed.marginTop && parseFloat(computed.marginTop) > 0) {
|
||||
errors.push(`Inline element <${node.tagName.toLowerCase()}> has margin-top which is not supported in PowerPoint. Remove margin from inline elements.`);
|
||||
}
|
||||
if (computed.marginBottom && parseFloat(computed.marginBottom) > 0) {
|
||||
errors.push(`Inline element <${node.tagName.toLowerCase()}> has margin-bottom which is not supported in PowerPoint. Remove margin from inline elements.`);
|
||||
}
|
||||
|
||||
// Recursively process the child node. This will flatten nested spans into multiple runs.
|
||||
parseInlineFormatting(node, options, runs, textTransform);
|
||||
}
|
||||
}
|
||||
|
||||
prevNodeIsText = isText;
|
||||
});
|
||||
|
||||
// Trim leading space from first run and trailing space from last run
|
||||
if (runs.length > 0) {
|
||||
runs[0].text = runs[0].text.replace(/^\s+/, '');
|
||||
runs[runs.length - 1].text = runs[runs.length - 1].text.replace(/\s+$/, '');
|
||||
}
|
||||
|
||||
return runs.filter(r => r.text.length > 0);
|
||||
};
|
||||
|
||||
// Extract background from body (image or color)
|
||||
const body = document.body;
|
||||
const bodyStyle = window.getComputedStyle(body);
|
||||
const bgImage = bodyStyle.backgroundImage;
|
||||
const bgColor = bodyStyle.backgroundColor;
|
||||
|
||||
// Collect validation errors
|
||||
const errors = [];
|
||||
|
||||
// Validate: Check for CSS gradients
|
||||
if (bgImage && (bgImage.includes('linear-gradient') || bgImage.includes('radial-gradient'))) {
|
||||
errors.push(
|
||||
'CSS gradients are not supported. Use Sharp to rasterize gradients as PNG images first, ' +
|
||||
'then reference with background-image: url(\'gradient.png\')'
|
||||
);
|
||||
}
|
||||
|
||||
let background;
|
||||
if (bgImage && bgImage !== 'none') {
|
||||
// Extract URL from url("...") or url(...)
|
||||
const urlMatch = bgImage.match(/url\(["']?([^"')]+)["']?\)/);
|
||||
if (urlMatch) {
|
||||
background = {
|
||||
type: 'image',
|
||||
path: urlMatch[1]
|
||||
};
|
||||
} else {
|
||||
background = {
|
||||
type: 'color',
|
||||
value: rgbToHex(bgColor)
|
||||
};
|
||||
}
|
||||
} else {
|
||||
background = {
|
||||
type: 'color',
|
||||
value: rgbToHex(bgColor)
|
||||
};
|
||||
}
|
||||
|
||||
// Process all elements
|
||||
const elements = [];
|
||||
const placeholders = [];
|
||||
const textTags = ['P', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'UL', 'OL', 'LI'];
|
||||
const processed = new Set();
|
||||
|
||||
document.querySelectorAll('*').forEach((el) => {
|
||||
if (processed.has(el)) return;
|
||||
|
||||
// Validate text elements don't have backgrounds, borders, or shadows
|
||||
if (textTags.includes(el.tagName)) {
|
||||
const computed = window.getComputedStyle(el);
|
||||
const hasBg = computed.backgroundColor && computed.backgroundColor !== 'rgba(0, 0, 0, 0)';
|
||||
const hasBorder = (computed.borderWidth && parseFloat(computed.borderWidth) > 0) ||
|
||||
(computed.borderTopWidth && parseFloat(computed.borderTopWidth) > 0) ||
|
||||
(computed.borderRightWidth && parseFloat(computed.borderRightWidth) > 0) ||
|
||||
(computed.borderBottomWidth && parseFloat(computed.borderBottomWidth) > 0) ||
|
||||
(computed.borderLeftWidth && parseFloat(computed.borderLeftWidth) > 0);
|
||||
const hasShadow = computed.boxShadow && computed.boxShadow !== 'none';
|
||||
|
||||
if (hasBg || hasBorder || hasShadow) {
|
||||
errors.push(
|
||||
`Text element <${el.tagName.toLowerCase()}> has ${hasBg ? 'background' : hasBorder ? 'border' : 'shadow'}. ` +
|
||||
'Backgrounds, borders, and shadows are only supported on <div> elements, not text elements.'
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Extract placeholder elements (for charts, etc.)
|
||||
if (el.className && el.className.includes('placeholder')) {
|
||||
const rect = el.getBoundingClientRect();
|
||||
if (rect.width === 0 || rect.height === 0) {
|
||||
errors.push(
|
||||
`Placeholder "${el.id || 'unnamed'}" has ${rect.width === 0 ? 'width: 0' : 'height: 0'}. Check the layout CSS.`
|
||||
);
|
||||
} else {
|
||||
placeholders.push({
|
||||
id: el.id || `placeholder-${placeholders.length}`,
|
||||
x: pxToInch(rect.left),
|
||||
y: pxToInch(rect.top),
|
||||
w: pxToInch(rect.width),
|
||||
h: pxToInch(rect.height)
|
||||
});
|
||||
}
|
||||
processed.add(el);
|
||||
return;
|
||||
}
|
||||
|
||||
// Extract images
|
||||
if (el.tagName === 'IMG') {
|
||||
const rect = el.getBoundingClientRect();
|
||||
if (rect.width > 0 && rect.height > 0) {
|
||||
elements.push({
|
||||
type: 'image',
|
||||
src: el.src,
|
||||
position: {
|
||||
x: pxToInch(rect.left),
|
||||
y: pxToInch(rect.top),
|
||||
w: pxToInch(rect.width),
|
||||
h: pxToInch(rect.height)
|
||||
}
|
||||
});
|
||||
processed.add(el);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Extract DIVs with backgrounds/borders as shapes
|
||||
const isContainer = el.tagName === 'DIV' && !textTags.includes(el.tagName);
|
||||
if (isContainer) {
|
||||
const computed = window.getComputedStyle(el);
|
||||
const hasBg = computed.backgroundColor && computed.backgroundColor !== 'rgba(0, 0, 0, 0)';
|
||||
|
||||
// Validate: Check for unwrapped text content in DIV
|
||||
for (const node of el.childNodes) {
|
||||
if (node.nodeType === Node.TEXT_NODE) {
|
||||
const text = node.textContent.trim();
|
||||
if (text) {
|
||||
errors.push(
|
||||
`DIV element contains unwrapped text "${text.substring(0, 50)}${text.length > 50 ? '...' : ''}". ` +
|
||||
'All text must be wrapped in <p>, <h1>-<h6>, <ul>, or <ol> tags to appear in PowerPoint.'
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for background images on shapes
|
||||
const bgImage = computed.backgroundImage;
|
||||
if (bgImage && bgImage !== 'none') {
|
||||
errors.push(
|
||||
'Background images on DIV elements are not supported. ' +
|
||||
'Use solid colors or borders for shapes, or use slide.addImage() in PptxGenJS to layer images.'
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for borders - both uniform and partial
|
||||
const borderTop = computed.borderTopWidth;
|
||||
const borderRight = computed.borderRightWidth;
|
||||
const borderBottom = computed.borderBottomWidth;
|
||||
const borderLeft = computed.borderLeftWidth;
|
||||
const borders = [borderTop, borderRight, borderBottom, borderLeft].map(b => parseFloat(b) || 0);
|
||||
const hasBorder = borders.some(b => b > 0);
|
||||
const hasUniformBorder = hasBorder && borders.every(b => b === borders[0]);
|
||||
const borderLines = [];
|
||||
|
||||
if (hasBorder && !hasUniformBorder) {
|
||||
const rect = el.getBoundingClientRect();
|
||||
const x = pxToInch(rect.left);
|
||||
const y = pxToInch(rect.top);
|
||||
const w = pxToInch(rect.width);
|
||||
const h = pxToInch(rect.height);
|
||||
|
||||
// Collect lines to add after shape (inset by half the line width to center on edge)
|
||||
if (parseFloat(borderTop) > 0) {
|
||||
const widthPt = pxToPoints(borderTop);
|
||||
const inset = (widthPt / 72) / 2; // Convert points to inches, then half
|
||||
borderLines.push({
|
||||
type: 'line',
|
||||
x1: x, y1: y + inset, x2: x + w, y2: y + inset,
|
||||
width: widthPt,
|
||||
color: rgbToHex(computed.borderTopColor)
|
||||
});
|
||||
}
|
||||
if (parseFloat(borderRight) > 0) {
|
||||
const widthPt = pxToPoints(borderRight);
|
||||
const inset = (widthPt / 72) / 2;
|
||||
borderLines.push({
|
||||
type: 'line',
|
||||
x1: x + w - inset, y1: y, x2: x + w - inset, y2: y + h,
|
||||
width: widthPt,
|
||||
color: rgbToHex(computed.borderRightColor)
|
||||
});
|
||||
}
|
||||
if (parseFloat(borderBottom) > 0) {
|
||||
const widthPt = pxToPoints(borderBottom);
|
||||
const inset = (widthPt / 72) / 2;
|
||||
borderLines.push({
|
||||
type: 'line',
|
||||
x1: x, y1: y + h - inset, x2: x + w, y2: y + h - inset,
|
||||
width: widthPt,
|
||||
color: rgbToHex(computed.borderBottomColor)
|
||||
});
|
||||
}
|
||||
if (parseFloat(borderLeft) > 0) {
|
||||
const widthPt = pxToPoints(borderLeft);
|
||||
const inset = (widthPt / 72) / 2;
|
||||
borderLines.push({
|
||||
type: 'line',
|
||||
x1: x + inset, y1: y, x2: x + inset, y2: y + h,
|
||||
width: widthPt,
|
||||
color: rgbToHex(computed.borderLeftColor)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (hasBg || hasBorder) {
|
||||
const rect = el.getBoundingClientRect();
|
||||
if (rect.width > 0 && rect.height > 0) {
|
||||
const shadow = parseBoxShadow(computed.boxShadow);
|
||||
|
||||
// Only add shape if there's background or uniform border
|
||||
if (hasBg || hasUniformBorder) {
|
||||
elements.push({
|
||||
type: 'shape',
|
||||
text: '', // Shape only - child text elements render on top
|
||||
position: {
|
||||
x: pxToInch(rect.left),
|
||||
y: pxToInch(rect.top),
|
||||
w: pxToInch(rect.width),
|
||||
h: pxToInch(rect.height)
|
||||
},
|
||||
shape: {
|
||||
fill: hasBg ? rgbToHex(computed.backgroundColor) : null,
|
||||
transparency: hasBg ? extractAlpha(computed.backgroundColor) : null,
|
||||
line: hasUniformBorder ? {
|
||||
color: rgbToHex(computed.borderColor),
|
||||
width: pxToPoints(computed.borderWidth)
|
||||
} : null,
|
||||
// Convert border-radius to rectRadius (in inches)
|
||||
// % values: 50%+ = circle (1), <50% = percentage of min dimension
|
||||
// pt values: divide by 72 (72pt = 1 inch)
|
||||
// px values: divide by 96 (96px = 1 inch)
|
||||
rectRadius: (() => {
|
||||
const radius = computed.borderRadius;
|
||||
const radiusValue = parseFloat(radius);
|
||||
if (radiusValue === 0) return 0;
|
||||
|
||||
if (radius.includes('%')) {
|
||||
if (radiusValue >= 50) return 1;
|
||||
// Calculate percentage of smaller dimension
|
||||
const minDim = Math.min(rect.width, rect.height);
|
||||
return (radiusValue / 100) * pxToInch(minDim);
|
||||
}
|
||||
|
||||
if (radius.includes('pt')) return radiusValue / 72;
|
||||
return radiusValue / PX_PER_IN;
|
||||
})(),
|
||||
shadow: shadow
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Add partial border lines
|
||||
elements.push(...borderLines);
|
||||
|
||||
processed.add(el);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract bullet lists as single text block
|
||||
if (el.tagName === 'UL' || el.tagName === 'OL') {
|
||||
const rect = el.getBoundingClientRect();
|
||||
if (rect.width === 0 || rect.height === 0) return;
|
||||
|
||||
const liElements = Array.from(el.querySelectorAll('li'));
|
||||
const items = [];
|
||||
const ulComputed = window.getComputedStyle(el);
|
||||
const ulPaddingLeftPt = pxToPoints(ulComputed.paddingLeft);
|
||||
|
||||
// Split: margin-left for bullet position, indent for text position
|
||||
// margin-left + indent = ul padding-left
|
||||
const marginLeft = ulPaddingLeftPt * 0.5;
|
||||
const textIndent = ulPaddingLeftPt * 0.5;
|
||||
|
||||
liElements.forEach((li, idx) => {
|
||||
const isLast = idx === liElements.length - 1;
|
||||
const runs = parseInlineFormatting(li, { breakLine: false });
|
||||
// Clean manual bullets from first run
|
||||
if (runs.length > 0) {
|
||||
runs[0].text = runs[0].text.replace(/^[•\-\*▪▸]\s*/, '');
|
||||
runs[0].options.bullet = { indent: textIndent };
|
||||
}
|
||||
// Set breakLine on last run
|
||||
if (runs.length > 0 && !isLast) {
|
||||
runs[runs.length - 1].options.breakLine = true;
|
||||
}
|
||||
items.push(...runs);
|
||||
});
|
||||
|
||||
const computed = window.getComputedStyle(liElements[0] || el);
|
||||
|
||||
elements.push({
|
||||
type: 'list',
|
||||
items: items,
|
||||
position: {
|
||||
x: pxToInch(rect.left),
|
||||
y: pxToInch(rect.top),
|
||||
w: pxToInch(rect.width),
|
||||
h: pxToInch(rect.height)
|
||||
},
|
||||
style: {
|
||||
fontSize: pxToPoints(computed.fontSize),
|
||||
fontFace: computed.fontFamily.split(',')[0].replace(/['"]/g, '').trim(),
|
||||
color: rgbToHex(computed.color),
|
||||
transparency: extractAlpha(computed.color),
|
||||
align: computed.textAlign === 'start' ? 'left' : computed.textAlign,
|
||||
lineSpacing: computed.lineHeight && computed.lineHeight !== 'normal' ? pxToPoints(computed.lineHeight) : null,
|
||||
paraSpaceBefore: 0,
|
||||
paraSpaceAfter: pxToPoints(computed.marginBottom),
|
||||
// PptxGenJS margin array is [left, right, bottom, top]
|
||||
margin: [marginLeft, 0, 0, 0]
|
||||
}
|
||||
});
|
||||
|
||||
liElements.forEach(li => processed.add(li));
|
||||
processed.add(el);
|
||||
return;
|
||||
}
|
||||
|
||||
// Extract text elements (P, H1, H2, etc.)
|
||||
if (!textTags.includes(el.tagName)) return;
|
||||
|
||||
const rect = el.getBoundingClientRect();
|
||||
const text = el.textContent.trim();
|
||||
if (rect.width === 0 || rect.height === 0 || !text) return;
|
||||
|
||||
// Validate: Check for manual bullet symbols in text elements (not in lists)
|
||||
if (el.tagName !== 'LI' && /^[•\-\*▪▸○●◆◇■□]\s/.test(text.trimStart())) {
|
||||
errors.push(
|
||||
`Text element <${el.tagName.toLowerCase()}> starts with bullet symbol "${text.substring(0, 20)}...". ` +
|
||||
'Use <ul> or <ol> lists instead of manual bullet symbols.'
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const computed = window.getComputedStyle(el);
|
||||
const rotation = getRotation(computed.transform, computed.writingMode);
|
||||
const { x, y, w, h } = getPositionAndSize(el, rect, rotation);
|
||||
|
||||
const baseStyle = {
|
||||
fontSize: pxToPoints(computed.fontSize),
|
||||
fontFace: computed.fontFamily.split(',')[0].replace(/['"]/g, '').trim(),
|
||||
color: rgbToHex(computed.color),
|
||||
align: computed.textAlign === 'start' ? 'left' : computed.textAlign,
|
||||
lineSpacing: pxToPoints(computed.lineHeight),
|
||||
paraSpaceBefore: pxToPoints(computed.marginTop),
|
||||
paraSpaceAfter: pxToPoints(computed.marginBottom),
|
||||
// PptxGenJS margin array is [left, right, bottom, top] (not [top, right, bottom, left] as documented)
|
||||
margin: [
|
||||
pxToPoints(computed.paddingLeft),
|
||||
pxToPoints(computed.paddingRight),
|
||||
pxToPoints(computed.paddingBottom),
|
||||
pxToPoints(computed.paddingTop)
|
||||
]
|
||||
};
|
||||
|
||||
const transparency = extractAlpha(computed.color);
|
||||
if (transparency !== null) baseStyle.transparency = transparency;
|
||||
|
||||
if (rotation !== null) baseStyle.rotate = rotation;
|
||||
|
||||
const hasFormatting = el.querySelector('b, i, u, strong, em, span, br');
|
||||
|
||||
if (hasFormatting) {
|
||||
// Text with inline formatting
|
||||
const transformStr = computed.textTransform;
|
||||
const runs = parseInlineFormatting(el, {}, [], (str) => applyTextTransform(str, transformStr));
|
||||
|
||||
// Adjust lineSpacing based on largest fontSize in runs
|
||||
const adjustedStyle = { ...baseStyle };
|
||||
if (adjustedStyle.lineSpacing) {
|
||||
const maxFontSize = Math.max(
|
||||
adjustedStyle.fontSize,
|
||||
...runs.map(r => r.options?.fontSize || 0)
|
||||
);
|
||||
if (maxFontSize > adjustedStyle.fontSize) {
|
||||
const lineHeightMultiplier = adjustedStyle.lineSpacing / adjustedStyle.fontSize;
|
||||
adjustedStyle.lineSpacing = maxFontSize * lineHeightMultiplier;
|
||||
}
|
||||
}
|
||||
|
||||
elements.push({
|
||||
type: el.tagName.toLowerCase(),
|
||||
text: runs,
|
||||
position: { x: pxToInch(x), y: pxToInch(y), w: pxToInch(w), h: pxToInch(h) },
|
||||
style: adjustedStyle
|
||||
});
|
||||
} else {
|
||||
// Plain text - inherit CSS formatting
|
||||
const textTransform = computed.textTransform;
|
||||
const transformedText = applyTextTransform(text, textTransform);
|
||||
|
||||
const isBold = computed.fontWeight === 'bold' || parseInt(computed.fontWeight) >= 600;
|
||||
|
||||
elements.push({
|
||||
type: el.tagName.toLowerCase(),
|
||||
text: transformedText,
|
||||
position: { x: pxToInch(x), y: pxToInch(y), w: pxToInch(w), h: pxToInch(h) },
|
||||
style: {
|
||||
...baseStyle,
|
||||
bold: isBold && !shouldSkipBold(computed.fontFamily),
|
||||
italic: computed.fontStyle === 'italic',
|
||||
underline: computed.textDecoration.includes('underline')
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
processed.add(el);
|
||||
});
|
||||
|
||||
return { background, elements, placeholders, errors };
|
||||
});
|
||||
}
|
||||
|
||||
async function html2pptx(htmlFile, pres, options = {}) {
|
||||
const {
|
||||
tmpDir = process.env.TMPDIR || '/tmp',
|
||||
slide = null
|
||||
} = options;
|
||||
|
||||
try {
|
||||
// Use Chrome on macOS, default Chromium on Unix
|
||||
const launchOptions = { env: { TMPDIR: tmpDir } };
|
||||
if (process.platform === 'darwin') {
|
||||
launchOptions.channel = 'chrome';
|
||||
}
|
||||
|
||||
const browser = await chromium.launch(launchOptions);
|
||||
|
||||
let bodyDimensions;
|
||||
let slideData;
|
||||
|
||||
const filePath = path.isAbsolute(htmlFile) ? htmlFile : path.join(process.cwd(), htmlFile);
|
||||
const validationErrors = [];
|
||||
|
||||
try {
|
||||
const page = await browser.newPage();
|
||||
page.on('console', (msg) => {
|
||||
// Log the message text to your test runner's console
|
||||
console.log(`Browser console: ${msg.text()}`);
|
||||
});
|
||||
|
||||
await page.goto(`file://${filePath}`);
|
||||
|
||||
bodyDimensions = await getBodyDimensions(page);
|
||||
|
||||
await page.setViewportSize({
|
||||
width: Math.round(bodyDimensions.width),
|
||||
height: Math.round(bodyDimensions.height)
|
||||
});
|
||||
|
||||
slideData = await extractSlideData(page);
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
// Collect all validation errors
|
||||
if (bodyDimensions.errors && bodyDimensions.errors.length > 0) {
|
||||
validationErrors.push(...bodyDimensions.errors);
|
||||
}
|
||||
|
||||
const dimensionErrors = validateDimensions(bodyDimensions, pres);
|
||||
if (dimensionErrors.length > 0) {
|
||||
validationErrors.push(...dimensionErrors);
|
||||
}
|
||||
|
||||
const textBoxPositionErrors = validateTextBoxPosition(slideData, bodyDimensions);
|
||||
if (textBoxPositionErrors.length > 0) {
|
||||
validationErrors.push(...textBoxPositionErrors);
|
||||
}
|
||||
|
||||
if (slideData.errors && slideData.errors.length > 0) {
|
||||
validationErrors.push(...slideData.errors);
|
||||
}
|
||||
|
||||
// Throw all errors at once if any exist
|
||||
if (validationErrors.length > 0) {
|
||||
const errorMessage = validationErrors.length === 1
|
||||
? validationErrors[0]
|
||||
: `Multiple validation errors found:\n${validationErrors.map((e, i) => ` ${i + 1}. ${e}`).join('\n')}`;
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
|
||||
const targetSlide = slide || pres.addSlide();
|
||||
|
||||
await addBackground(slideData, targetSlide, tmpDir);
|
||||
addElements(slideData, targetSlide, pres);
|
||||
|
||||
return { slide: targetSlide, placeholders: slideData.placeholders };
|
||||
} catch (error) {
|
||||
if (!error.message.startsWith(htmlFile)) {
|
||||
throw new Error(`${htmlFile}: ${error.message}`);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = html2pptx;
|
||||
1020
skills/document-skills/pptx/scripts/inventory.py
Executable file
1020
skills/document-skills/pptx/scripts/inventory.py
Executable file
File diff suppressed because it is too large
Load Diff
231
skills/document-skills/pptx/scripts/rearrange.py
Executable file
231
skills/document-skills/pptx/scripts/rearrange.py
Executable file
@@ -0,0 +1,231 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Rearrange PowerPoint slides based on a sequence of indices.
|
||||
|
||||
Usage:
|
||||
python rearrange.py template.pptx output.pptx 0,34,34,50,52
|
||||
|
||||
This will create output.pptx using slides from template.pptx in the specified order.
|
||||
Slides can be repeated (e.g., 34 appears twice).
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import shutil
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
from pathlib import Path
|
||||
|
||||
import six
|
||||
from pptx import Presentation
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Rearrange PowerPoint slides based on a sequence of indices.",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python rearrange.py template.pptx output.pptx 0,34,34,50,52
|
||||
Creates output.pptx using slides 0, 34 (twice), 50, and 52 from template.pptx
|
||||
|
||||
python rearrange.py template.pptx output.pptx 5,3,1,2,4
|
||||
Creates output.pptx with slides reordered as specified
|
||||
|
||||
Note: Slide indices are 0-based (first slide is 0, second is 1, etc.)
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument("template", help="Path to template PPTX file")
|
||||
parser.add_argument("output", help="Path for output PPTX file")
|
||||
parser.add_argument(
|
||||
"sequence", help="Comma-separated sequence of slide indices (0-based)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse the slide sequence
|
||||
try:
|
||||
slide_sequence = [int(x.strip()) for x in args.sequence.split(",")]
|
||||
except ValueError:
|
||||
print(
|
||||
"Error: Invalid sequence format. Use comma-separated integers (e.g., 0,34,34,50,52)"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# Check template exists
|
||||
template_path = Path(args.template)
|
||||
if not template_path.exists():
|
||||
print(f"Error: Template file not found: {args.template}")
|
||||
sys.exit(1)
|
||||
|
||||
# Create output directory if needed
|
||||
output_path = Path(args.output)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
rearrange_presentation(template_path, output_path, slide_sequence)
|
||||
except ValueError as e:
|
||||
print(f"Error: {e}")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"Error processing presentation: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def duplicate_slide(pres, index):
|
||||
"""Duplicate a slide in the presentation."""
|
||||
source = pres.slides[index]
|
||||
|
||||
# Use source's layout to preserve formatting
|
||||
new_slide = pres.slides.add_slide(source.slide_layout)
|
||||
|
||||
# Collect all image and media relationships from the source slide
|
||||
image_rels = {}
|
||||
for rel_id, rel in six.iteritems(source.part.rels):
|
||||
if "image" in rel.reltype or "media" in rel.reltype:
|
||||
image_rels[rel_id] = rel
|
||||
|
||||
# CRITICAL: Clear placeholder shapes to avoid duplicates
|
||||
for shape in new_slide.shapes:
|
||||
sp = shape.element
|
||||
sp.getparent().remove(sp)
|
||||
|
||||
# Copy all shapes from source
|
||||
for shape in source.shapes:
|
||||
el = shape.element
|
||||
new_el = deepcopy(el)
|
||||
new_slide.shapes._spTree.insert_element_before(new_el, "p:extLst")
|
||||
|
||||
# Handle picture shapes - need to update the blip reference
|
||||
# Look for all blip elements (they can be in pic or other contexts)
|
||||
# Using the element's own xpath method without namespaces argument
|
||||
blips = new_el.xpath(".//a:blip[@r:embed]")
|
||||
for blip in blips:
|
||||
old_rId = blip.get(
|
||||
"{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed"
|
||||
)
|
||||
if old_rId in image_rels:
|
||||
# Create a new relationship in the destination slide for this image
|
||||
old_rel = image_rels[old_rId]
|
||||
# get_or_add returns the rId directly, or adds and returns new rId
|
||||
new_rId = new_slide.part.rels.get_or_add(
|
||||
old_rel.reltype, old_rel._target
|
||||
)
|
||||
# Update the blip's embed reference to use the new relationship ID
|
||||
blip.set(
|
||||
"{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed",
|
||||
new_rId,
|
||||
)
|
||||
|
||||
# Copy any additional image/media relationships that might be referenced elsewhere
|
||||
for rel_id, rel in image_rels.items():
|
||||
try:
|
||||
new_slide.part.rels.get_or_add(rel.reltype, rel._target)
|
||||
except Exception:
|
||||
pass # Relationship might already exist
|
||||
|
||||
return new_slide
|
||||
|
||||
|
||||
def delete_slide(pres, index):
|
||||
"""Delete a slide from the presentation."""
|
||||
rId = pres.slides._sldIdLst[index].rId
|
||||
pres.part.drop_rel(rId)
|
||||
del pres.slides._sldIdLst[index]
|
||||
|
||||
|
||||
def reorder_slides(pres, slide_index, target_index):
|
||||
"""Move a slide from one position to another."""
|
||||
slides = pres.slides._sldIdLst
|
||||
|
||||
# Remove slide element from current position
|
||||
slide_element = slides[slide_index]
|
||||
slides.remove(slide_element)
|
||||
|
||||
# Insert at target position
|
||||
slides.insert(target_index, slide_element)
|
||||
|
||||
|
||||
def rearrange_presentation(template_path, output_path, slide_sequence):
|
||||
"""
|
||||
Create a new presentation with slides from template in specified order.
|
||||
|
||||
Args:
|
||||
template_path: Path to template PPTX file
|
||||
output_path: Path for output PPTX file
|
||||
slide_sequence: List of slide indices (0-based) to include
|
||||
"""
|
||||
# Copy template to preserve dimensions and theme
|
||||
if template_path != output_path:
|
||||
shutil.copy2(template_path, output_path)
|
||||
prs = Presentation(output_path)
|
||||
else:
|
||||
prs = Presentation(template_path)
|
||||
|
||||
total_slides = len(prs.slides)
|
||||
|
||||
# Validate indices
|
||||
for idx in slide_sequence:
|
||||
if idx < 0 or idx >= total_slides:
|
||||
raise ValueError(f"Slide index {idx} out of range (0-{total_slides - 1})")
|
||||
|
||||
# Track original slides and their duplicates
|
||||
slide_map = [] # List of actual slide indices for final presentation
|
||||
duplicated = {} # Track duplicates: original_idx -> [duplicate_indices]
|
||||
|
||||
# Step 1: DUPLICATE repeated slides
|
||||
print(f"Processing {len(slide_sequence)} slides from template...")
|
||||
for i, template_idx in enumerate(slide_sequence):
|
||||
if template_idx in duplicated and duplicated[template_idx]:
|
||||
# Already duplicated this slide, use the duplicate
|
||||
slide_map.append(duplicated[template_idx].pop(0))
|
||||
print(f" [{i}] Using duplicate of slide {template_idx}")
|
||||
elif slide_sequence.count(template_idx) > 1 and template_idx not in duplicated:
|
||||
# First occurrence of a repeated slide - create duplicates
|
||||
slide_map.append(template_idx)
|
||||
duplicates = []
|
||||
count = slide_sequence.count(template_idx) - 1
|
||||
print(
|
||||
f" [{i}] Using original slide {template_idx}, creating {count} duplicate(s)"
|
||||
)
|
||||
for _ in range(count):
|
||||
duplicate_slide(prs, template_idx)
|
||||
duplicates.append(len(prs.slides) - 1)
|
||||
duplicated[template_idx] = duplicates
|
||||
else:
|
||||
# Unique slide or first occurrence already handled, use original
|
||||
slide_map.append(template_idx)
|
||||
print(f" [{i}] Using original slide {template_idx}")
|
||||
|
||||
# Step 2: DELETE unwanted slides (work backwards)
|
||||
slides_to_keep = set(slide_map)
|
||||
print(f"\nDeleting {len(prs.slides) - len(slides_to_keep)} unused slides...")
|
||||
for i in range(len(prs.slides) - 1, -1, -1):
|
||||
if i not in slides_to_keep:
|
||||
delete_slide(prs, i)
|
||||
# Update slide_map indices after deletion
|
||||
slide_map = [idx - 1 if idx > i else idx for idx in slide_map]
|
||||
|
||||
# Step 3: REORDER to final sequence
|
||||
print(f"Reordering {len(slide_map)} slides to final sequence...")
|
||||
for target_pos in range(len(slide_map)):
|
||||
# Find which slide should be at target_pos
|
||||
current_pos = slide_map[target_pos]
|
||||
if current_pos != target_pos:
|
||||
reorder_slides(prs, current_pos, target_pos)
|
||||
# Update slide_map: the move shifts other slides
|
||||
for i in range(len(slide_map)):
|
||||
if slide_map[i] > current_pos and slide_map[i] <= target_pos:
|
||||
slide_map[i] -= 1
|
||||
elif slide_map[i] < current_pos and slide_map[i] >= target_pos:
|
||||
slide_map[i] += 1
|
||||
slide_map[target_pos] = target_pos
|
||||
|
||||
# Save the presentation
|
||||
prs.save(output_path)
|
||||
print(f"\nSaved rearranged presentation to: {output_path}")
|
||||
print(f"Final presentation has {len(prs.slides)} slides")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
385
skills/document-skills/pptx/scripts/replace.py
Executable file
385
skills/document-skills/pptx/scripts/replace.py
Executable file
@@ -0,0 +1,385 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Apply text replacements to PowerPoint presentation.
|
||||
|
||||
Usage:
|
||||
python replace.py <input.pptx> <replacements.json> <output.pptx>
|
||||
|
||||
The replacements JSON should have the structure output by inventory.py.
|
||||
ALL text shapes identified by inventory.py will have their text cleared
|
||||
unless "paragraphs" is specified in the replacements for that shape.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from inventory import InventoryData, extract_text_inventory
|
||||
from pptx import Presentation
|
||||
from pptx.dml.color import RGBColor
|
||||
from pptx.enum.dml import MSO_THEME_COLOR
|
||||
from pptx.enum.text import PP_ALIGN
|
||||
from pptx.oxml.xmlchemy import OxmlElement
|
||||
from pptx.util import Pt
|
||||
|
||||
|
||||
def clear_paragraph_bullets(paragraph):
|
||||
"""Clear bullet formatting from a paragraph."""
|
||||
pPr = paragraph._element.get_or_add_pPr()
|
||||
|
||||
# Remove existing bullet elements
|
||||
for child in list(pPr):
|
||||
if (
|
||||
child.tag.endswith("buChar")
|
||||
or child.tag.endswith("buNone")
|
||||
or child.tag.endswith("buAutoNum")
|
||||
or child.tag.endswith("buFont")
|
||||
):
|
||||
pPr.remove(child)
|
||||
|
||||
return pPr
|
||||
|
||||
|
||||
def apply_paragraph_properties(paragraph, para_data: Dict[str, Any]):
|
||||
"""Apply formatting properties to a paragraph."""
|
||||
# Get the text but don't set it on paragraph directly yet
|
||||
text = para_data.get("text", "")
|
||||
|
||||
# Get or create paragraph properties
|
||||
pPr = clear_paragraph_bullets(paragraph)
|
||||
|
||||
# Handle bullet formatting
|
||||
if para_data.get("bullet", False):
|
||||
level = para_data.get("level", 0)
|
||||
paragraph.level = level
|
||||
|
||||
# Calculate font-proportional indentation
|
||||
font_size = para_data.get("font_size", 18.0)
|
||||
level_indent_emu = int((font_size * (1.6 + level * 1.6)) * 12700)
|
||||
hanging_indent_emu = int(-font_size * 0.8 * 12700)
|
||||
|
||||
# Set indentation
|
||||
pPr.attrib["marL"] = str(level_indent_emu)
|
||||
pPr.attrib["indent"] = str(hanging_indent_emu)
|
||||
|
||||
# Add bullet character
|
||||
buChar = OxmlElement("a:buChar")
|
||||
buChar.set("char", "•")
|
||||
pPr.append(buChar)
|
||||
|
||||
# Default to left alignment for bullets if not specified
|
||||
if "alignment" not in para_data:
|
||||
paragraph.alignment = PP_ALIGN.LEFT
|
||||
else:
|
||||
# Remove indentation for non-bullet text
|
||||
pPr.attrib["marL"] = "0"
|
||||
pPr.attrib["indent"] = "0"
|
||||
|
||||
# Add buNone element
|
||||
buNone = OxmlElement("a:buNone")
|
||||
pPr.insert(0, buNone)
|
||||
|
||||
# Apply alignment
|
||||
if "alignment" in para_data:
|
||||
alignment_map = {
|
||||
"LEFT": PP_ALIGN.LEFT,
|
||||
"CENTER": PP_ALIGN.CENTER,
|
||||
"RIGHT": PP_ALIGN.RIGHT,
|
||||
"JUSTIFY": PP_ALIGN.JUSTIFY,
|
||||
}
|
||||
if para_data["alignment"] in alignment_map:
|
||||
paragraph.alignment = alignment_map[para_data["alignment"]]
|
||||
|
||||
# Apply spacing
|
||||
if "space_before" in para_data:
|
||||
paragraph.space_before = Pt(para_data["space_before"])
|
||||
if "space_after" in para_data:
|
||||
paragraph.space_after = Pt(para_data["space_after"])
|
||||
if "line_spacing" in para_data:
|
||||
paragraph.line_spacing = Pt(para_data["line_spacing"])
|
||||
|
||||
# Apply run-level formatting
|
||||
if not paragraph.runs:
|
||||
run = paragraph.add_run()
|
||||
run.text = text
|
||||
else:
|
||||
run = paragraph.runs[0]
|
||||
run.text = text
|
||||
|
||||
# Apply font properties
|
||||
apply_font_properties(run, para_data)
|
||||
|
||||
|
||||
def apply_font_properties(run, para_data: Dict[str, Any]):
|
||||
"""Apply font properties to a text run."""
|
||||
if "bold" in para_data:
|
||||
run.font.bold = para_data["bold"]
|
||||
if "italic" in para_data:
|
||||
run.font.italic = para_data["italic"]
|
||||
if "underline" in para_data:
|
||||
run.font.underline = para_data["underline"]
|
||||
if "font_size" in para_data:
|
||||
run.font.size = Pt(para_data["font_size"])
|
||||
if "font_name" in para_data:
|
||||
run.font.name = para_data["font_name"]
|
||||
|
||||
# Apply color - prefer RGB, fall back to theme_color
|
||||
if "color" in para_data:
|
||||
color_hex = para_data["color"].lstrip("#")
|
||||
if len(color_hex) == 6:
|
||||
r = int(color_hex[0:2], 16)
|
||||
g = int(color_hex[2:4], 16)
|
||||
b = int(color_hex[4:6], 16)
|
||||
run.font.color.rgb = RGBColor(r, g, b)
|
||||
elif "theme_color" in para_data:
|
||||
# Get theme color by name (e.g., "DARK_1", "ACCENT_1")
|
||||
theme_name = para_data["theme_color"]
|
||||
try:
|
||||
run.font.color.theme_color = getattr(MSO_THEME_COLOR, theme_name)
|
||||
except AttributeError:
|
||||
print(f" WARNING: Unknown theme color name '{theme_name}'")
|
||||
|
||||
|
||||
def detect_frame_overflow(inventory: InventoryData) -> Dict[str, Dict[str, float]]:
|
||||
"""Detect text overflow in shapes (text exceeding shape bounds).
|
||||
|
||||
Returns dict of slide_key -> shape_key -> overflow_inches.
|
||||
Only includes shapes that have text overflow.
|
||||
"""
|
||||
overflow_map = {}
|
||||
|
||||
for slide_key, shapes_dict in inventory.items():
|
||||
for shape_key, shape_data in shapes_dict.items():
|
||||
# Check for frame overflow (text exceeding shape bounds)
|
||||
if shape_data.frame_overflow_bottom is not None:
|
||||
if slide_key not in overflow_map:
|
||||
overflow_map[slide_key] = {}
|
||||
overflow_map[slide_key][shape_key] = shape_data.frame_overflow_bottom
|
||||
|
||||
return overflow_map
|
||||
|
||||
|
||||
def validate_replacements(inventory: InventoryData, replacements: Dict) -> List[str]:
|
||||
"""Validate that all shapes in replacements exist in inventory.
|
||||
|
||||
Returns list of error messages.
|
||||
"""
|
||||
errors = []
|
||||
|
||||
for slide_key, shapes_data in replacements.items():
|
||||
if not slide_key.startswith("slide-"):
|
||||
continue
|
||||
|
||||
# Check if slide exists
|
||||
if slide_key not in inventory:
|
||||
errors.append(f"Slide '{slide_key}' not found in inventory")
|
||||
continue
|
||||
|
||||
# Check each shape
|
||||
for shape_key in shapes_data.keys():
|
||||
if shape_key not in inventory[slide_key]:
|
||||
# Find shapes without replacements defined and show their content
|
||||
unused_with_content = []
|
||||
for k in inventory[slide_key].keys():
|
||||
if k not in shapes_data:
|
||||
shape_data = inventory[slide_key][k]
|
||||
# Get text from paragraphs as preview
|
||||
paragraphs = shape_data.paragraphs
|
||||
if paragraphs and paragraphs[0].text:
|
||||
first_text = paragraphs[0].text[:50]
|
||||
if len(paragraphs[0].text) > 50:
|
||||
first_text += "..."
|
||||
unused_with_content.append(f"{k} ('{first_text}')")
|
||||
else:
|
||||
unused_with_content.append(k)
|
||||
|
||||
errors.append(
|
||||
f"Shape '{shape_key}' not found on '{slide_key}'. "
|
||||
f"Shapes without replacements: {', '.join(sorted(unused_with_content)) if unused_with_content else 'none'}"
|
||||
)
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def check_duplicate_keys(pairs):
|
||||
"""Check for duplicate keys when loading JSON."""
|
||||
result = {}
|
||||
for key, value in pairs:
|
||||
if key in result:
|
||||
raise ValueError(f"Duplicate key found in JSON: '{key}'")
|
||||
result[key] = value
|
||||
return result
|
||||
|
||||
|
||||
def apply_replacements(pptx_file: str, json_file: str, output_file: str):
|
||||
"""Apply text replacements from JSON to PowerPoint presentation."""
|
||||
|
||||
# Load presentation
|
||||
prs = Presentation(pptx_file)
|
||||
|
||||
# Get inventory of all text shapes (returns ShapeData objects)
|
||||
# Pass prs to use same Presentation instance
|
||||
inventory = extract_text_inventory(Path(pptx_file), prs)
|
||||
|
||||
# Detect text overflow in original presentation
|
||||
original_overflow = detect_frame_overflow(inventory)
|
||||
|
||||
# Load replacement data with duplicate key detection
|
||||
with open(json_file, "r") as f:
|
||||
replacements = json.load(f, object_pairs_hook=check_duplicate_keys)
|
||||
|
||||
# Validate replacements
|
||||
errors = validate_replacements(inventory, replacements)
|
||||
if errors:
|
||||
print("ERROR: Invalid shapes in replacement JSON:")
|
||||
for error in errors:
|
||||
print(f" - {error}")
|
||||
print("\nPlease check the inventory and update your replacement JSON.")
|
||||
print(
|
||||
"You can regenerate the inventory with: python inventory.py <input.pptx> <output.json>"
|
||||
)
|
||||
raise ValueError(f"Found {len(errors)} validation error(s)")
|
||||
|
||||
# Track statistics
|
||||
shapes_processed = 0
|
||||
shapes_cleared = 0
|
||||
shapes_replaced = 0
|
||||
|
||||
# Process each slide from inventory
|
||||
for slide_key, shapes_dict in inventory.items():
|
||||
if not slide_key.startswith("slide-"):
|
||||
continue
|
||||
|
||||
slide_index = int(slide_key.split("-")[1])
|
||||
|
||||
if slide_index >= len(prs.slides):
|
||||
print(f"Warning: Slide {slide_index} not found")
|
||||
continue
|
||||
|
||||
# Process each shape from inventory
|
||||
for shape_key, shape_data in shapes_dict.items():
|
||||
shapes_processed += 1
|
||||
|
||||
# Get the shape directly from ShapeData
|
||||
shape = shape_data.shape
|
||||
if not shape:
|
||||
print(f"Warning: {shape_key} has no shape reference")
|
||||
continue
|
||||
|
||||
# ShapeData already validates text_frame in __init__
|
||||
text_frame = shape.text_frame # type: ignore
|
||||
|
||||
text_frame.clear() # type: ignore
|
||||
shapes_cleared += 1
|
||||
|
||||
# Check for replacement paragraphs
|
||||
replacement_shape_data = replacements.get(slide_key, {}).get(shape_key, {})
|
||||
if "paragraphs" not in replacement_shape_data:
|
||||
continue
|
||||
|
||||
shapes_replaced += 1
|
||||
|
||||
# Add replacement paragraphs
|
||||
for i, para_data in enumerate(replacement_shape_data["paragraphs"]):
|
||||
if i == 0:
|
||||
p = text_frame.paragraphs[0] # type: ignore
|
||||
else:
|
||||
p = text_frame.add_paragraph() # type: ignore
|
||||
|
||||
apply_paragraph_properties(p, para_data)
|
||||
|
||||
# Check for issues after replacements
|
||||
# Save to a temporary file and reload to avoid modifying the presentation during inventory
|
||||
# (extract_text_inventory accesses font.color which adds empty <a:solidFill/> elements)
|
||||
import tempfile
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
|
||||
tmp_path = Path(tmp.name)
|
||||
prs.save(str(tmp_path))
|
||||
|
||||
try:
|
||||
updated_inventory = extract_text_inventory(tmp_path)
|
||||
updated_overflow = detect_frame_overflow(updated_inventory)
|
||||
finally:
|
||||
tmp_path.unlink() # Clean up temp file
|
||||
|
||||
# Check if any text overflow got worse
|
||||
overflow_errors = []
|
||||
for slide_key, shape_overflows in updated_overflow.items():
|
||||
for shape_key, new_overflow in shape_overflows.items():
|
||||
# Get original overflow (0 if there was no overflow before)
|
||||
original = original_overflow.get(slide_key, {}).get(shape_key, 0.0)
|
||||
|
||||
# Error if overflow increased
|
||||
if new_overflow > original + 0.01: # Small tolerance for rounding
|
||||
increase = new_overflow - original
|
||||
overflow_errors.append(
|
||||
f'{slide_key}/{shape_key}: overflow worsened by {increase:.2f}" '
|
||||
f'(was {original:.2f}", now {new_overflow:.2f}")'
|
||||
)
|
||||
|
||||
# Collect warnings from updated shapes
|
||||
warnings = []
|
||||
for slide_key, shapes_dict in updated_inventory.items():
|
||||
for shape_key, shape_data in shapes_dict.items():
|
||||
if shape_data.warnings:
|
||||
for warning in shape_data.warnings:
|
||||
warnings.append(f"{slide_key}/{shape_key}: {warning}")
|
||||
|
||||
# Fail if there are any issues
|
||||
if overflow_errors or warnings:
|
||||
print("\nERROR: Issues detected in replacement output:")
|
||||
if overflow_errors:
|
||||
print("\nText overflow worsened:")
|
||||
for error in overflow_errors:
|
||||
print(f" - {error}")
|
||||
if warnings:
|
||||
print("\nFormatting warnings:")
|
||||
for warning in warnings:
|
||||
print(f" - {warning}")
|
||||
print("\nPlease fix these issues before saving.")
|
||||
raise ValueError(
|
||||
f"Found {len(overflow_errors)} overflow error(s) and {len(warnings)} warning(s)"
|
||||
)
|
||||
|
||||
# Save the presentation
|
||||
prs.save(output_file)
|
||||
|
||||
# Report results
|
||||
print(f"Saved updated presentation to: {output_file}")
|
||||
print(f"Processed {len(prs.slides)} slides")
|
||||
print(f" - Shapes processed: {shapes_processed}")
|
||||
print(f" - Shapes cleared: {shapes_cleared}")
|
||||
print(f" - Shapes replaced: {shapes_replaced}")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for command-line usage."""
|
||||
if len(sys.argv) != 4:
|
||||
print(__doc__)
|
||||
sys.exit(1)
|
||||
|
||||
input_pptx = Path(sys.argv[1])
|
||||
replacements_json = Path(sys.argv[2])
|
||||
output_pptx = Path(sys.argv[3])
|
||||
|
||||
if not input_pptx.exists():
|
||||
print(f"Error: Input file '{input_pptx}' not found")
|
||||
sys.exit(1)
|
||||
|
||||
if not replacements_json.exists():
|
||||
print(f"Error: Replacements JSON file '{replacements_json}' not found")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
apply_replacements(str(input_pptx), str(replacements_json), str(output_pptx))
|
||||
except Exception as e:
|
||||
print(f"Error applying replacements: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
450
skills/document-skills/pptx/scripts/thumbnail.py
Executable file
450
skills/document-skills/pptx/scripts/thumbnail.py
Executable file
@@ -0,0 +1,450 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Create thumbnail grids from PowerPoint presentation slides.
|
||||
|
||||
Creates a grid layout of slide thumbnails with configurable columns (max 6).
|
||||
Each grid contains up to cols×(cols+1) images. For presentations with more
|
||||
slides, multiple numbered grid files are created automatically.
|
||||
|
||||
The program outputs the names of all files created.
|
||||
|
||||
Output:
|
||||
- Single grid: {prefix}.jpg (if slides fit in one grid)
|
||||
- Multiple grids: {prefix}-1.jpg, {prefix}-2.jpg, etc.
|
||||
|
||||
Grid limits by column count:
|
||||
- 3 cols: max 12 slides per grid (3×4)
|
||||
- 4 cols: max 20 slides per grid (4×5)
|
||||
- 5 cols: max 30 slides per grid (5×6) [default]
|
||||
- 6 cols: max 42 slides per grid (6×7)
|
||||
|
||||
Usage:
|
||||
python thumbnail.py input.pptx [output_prefix] [--cols N] [--outline-placeholders]
|
||||
|
||||
Examples:
|
||||
python thumbnail.py presentation.pptx
|
||||
# Creates: thumbnails.jpg (using default prefix)
|
||||
# Outputs:
|
||||
# Created 1 grid(s):
|
||||
# - thumbnails.jpg
|
||||
|
||||
python thumbnail.py large-deck.pptx grid --cols 4
|
||||
# Creates: grid-1.jpg, grid-2.jpg, grid-3.jpg
|
||||
# Outputs:
|
||||
# Created 3 grid(s):
|
||||
# - grid-1.jpg
|
||||
# - grid-2.jpg
|
||||
# - grid-3.jpg
|
||||
|
||||
python thumbnail.py template.pptx analysis --outline-placeholders
|
||||
# Creates thumbnail grids with red outlines around text placeholders
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from inventory import extract_text_inventory
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
from pptx import Presentation
|
||||
|
||||
# Constants
|
||||
THUMBNAIL_WIDTH = 300 # Fixed thumbnail width in pixels
|
||||
CONVERSION_DPI = 100 # DPI for PDF to image conversion
|
||||
MAX_COLS = 6 # Maximum number of columns
|
||||
DEFAULT_COLS = 5 # Default number of columns
|
||||
JPEG_QUALITY = 95 # JPEG compression quality
|
||||
|
||||
# Grid layout constants
|
||||
GRID_PADDING = 20 # Padding between thumbnails
|
||||
BORDER_WIDTH = 2 # Border width around thumbnails
|
||||
FONT_SIZE_RATIO = 0.12 # Font size as fraction of thumbnail width
|
||||
LABEL_PADDING_RATIO = 0.4 # Label padding as fraction of font size
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Create thumbnail grids from PowerPoint slides."
|
||||
)
|
||||
parser.add_argument("input", help="Input PowerPoint file (.pptx)")
|
||||
parser.add_argument(
|
||||
"output_prefix",
|
||||
nargs="?",
|
||||
default="thumbnails",
|
||||
help="Output prefix for image files (default: thumbnails, will create prefix.jpg or prefix-N.jpg)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cols",
|
||||
type=int,
|
||||
default=DEFAULT_COLS,
|
||||
help=f"Number of columns (default: {DEFAULT_COLS}, max: {MAX_COLS})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--outline-placeholders",
|
||||
action="store_true",
|
||||
help="Outline text placeholders with a colored border",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate columns
|
||||
cols = min(args.cols, MAX_COLS)
|
||||
if args.cols > MAX_COLS:
|
||||
print(f"Warning: Columns limited to {MAX_COLS} (requested {args.cols})")
|
||||
|
||||
# Validate input
|
||||
input_path = Path(args.input)
|
||||
if not input_path.exists() or input_path.suffix.lower() != ".pptx":
|
||||
print(f"Error: Invalid PowerPoint file: {args.input}")
|
||||
sys.exit(1)
|
||||
|
||||
# Construct output path (always JPG)
|
||||
output_path = Path(f"{args.output_prefix}.jpg")
|
||||
|
||||
print(f"Processing: {args.input}")
|
||||
|
||||
try:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Get placeholder regions if outlining is enabled
|
||||
placeholder_regions = None
|
||||
slide_dimensions = None
|
||||
if args.outline_placeholders:
|
||||
print("Extracting placeholder regions...")
|
||||
placeholder_regions, slide_dimensions = get_placeholder_regions(
|
||||
input_path
|
||||
)
|
||||
if placeholder_regions:
|
||||
print(f"Found placeholders on {len(placeholder_regions)} slides")
|
||||
|
||||
# Convert slides to images
|
||||
slide_images = convert_to_images(input_path, Path(temp_dir), CONVERSION_DPI)
|
||||
if not slide_images:
|
||||
print("Error: No slides found")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Found {len(slide_images)} slides")
|
||||
|
||||
# Create grids (max cols×(cols+1) images per grid)
|
||||
grid_files = create_grids(
|
||||
slide_images,
|
||||
cols,
|
||||
THUMBNAIL_WIDTH,
|
||||
output_path,
|
||||
placeholder_regions,
|
||||
slide_dimensions,
|
||||
)
|
||||
|
||||
# Print saved files
|
||||
print(f"Created {len(grid_files)} grid(s):")
|
||||
for grid_file in grid_files:
|
||||
print(f" - {grid_file}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def create_hidden_slide_placeholder(size):
|
||||
"""Create placeholder image for hidden slides."""
|
||||
img = Image.new("RGB", size, color="#F0F0F0")
|
||||
draw = ImageDraw.Draw(img)
|
||||
line_width = max(5, min(size) // 100)
|
||||
draw.line([(0, 0), size], fill="#CCCCCC", width=line_width)
|
||||
draw.line([(size[0], 0), (0, size[1])], fill="#CCCCCC", width=line_width)
|
||||
return img
|
||||
|
||||
|
||||
def get_placeholder_regions(pptx_path):
|
||||
"""Extract ALL text regions from the presentation.
|
||||
|
||||
Returns a tuple of (placeholder_regions, slide_dimensions).
|
||||
text_regions is a dict mapping slide indices to lists of text regions.
|
||||
Each region is a dict with 'left', 'top', 'width', 'height' in inches.
|
||||
slide_dimensions is a tuple of (width_inches, height_inches).
|
||||
"""
|
||||
prs = Presentation(str(pptx_path))
|
||||
inventory = extract_text_inventory(pptx_path, prs)
|
||||
placeholder_regions = {}
|
||||
|
||||
# Get actual slide dimensions in inches (EMU to inches conversion)
|
||||
slide_width_inches = (prs.slide_width or 9144000) / 914400.0
|
||||
slide_height_inches = (prs.slide_height or 5143500) / 914400.0
|
||||
|
||||
for slide_key, shapes in inventory.items():
|
||||
# Extract slide index from "slide-N" format
|
||||
slide_idx = int(slide_key.split("-")[1])
|
||||
regions = []
|
||||
|
||||
for shape_key, shape_data in shapes.items():
|
||||
# The inventory only contains shapes with text, so all shapes should be highlighted
|
||||
regions.append(
|
||||
{
|
||||
"left": shape_data.left,
|
||||
"top": shape_data.top,
|
||||
"width": shape_data.width,
|
||||
"height": shape_data.height,
|
||||
}
|
||||
)
|
||||
|
||||
if regions:
|
||||
placeholder_regions[slide_idx] = regions
|
||||
|
||||
return placeholder_regions, (slide_width_inches, slide_height_inches)
|
||||
|
||||
|
||||
def convert_to_images(pptx_path, temp_dir, dpi):
|
||||
"""Convert PowerPoint to images via PDF, handling hidden slides."""
|
||||
# Detect hidden slides
|
||||
print("Analyzing presentation...")
|
||||
prs = Presentation(str(pptx_path))
|
||||
total_slides = len(prs.slides)
|
||||
|
||||
# Find hidden slides (1-based indexing for display)
|
||||
hidden_slides = {
|
||||
idx + 1
|
||||
for idx, slide in enumerate(prs.slides)
|
||||
if slide.element.get("show") == "0"
|
||||
}
|
||||
|
||||
print(f"Total slides: {total_slides}")
|
||||
if hidden_slides:
|
||||
print(f"Hidden slides: {sorted(hidden_slides)}")
|
||||
|
||||
pdf_path = temp_dir / f"{pptx_path.stem}.pdf"
|
||||
|
||||
# Convert to PDF
|
||||
print("Converting to PDF...")
|
||||
result = subprocess.run(
|
||||
[
|
||||
"soffice",
|
||||
"--headless",
|
||||
"--convert-to",
|
||||
"pdf",
|
||||
"--outdir",
|
||||
str(temp_dir),
|
||||
str(pptx_path),
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result.returncode != 0 or not pdf_path.exists():
|
||||
raise RuntimeError("PDF conversion failed")
|
||||
|
||||
# Convert PDF to images
|
||||
print(f"Converting to images at {dpi} DPI...")
|
||||
result = subprocess.run(
|
||||
["pdftoppm", "-jpeg", "-r", str(dpi), str(pdf_path), str(temp_dir / "slide")],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError("Image conversion failed")
|
||||
|
||||
visible_images = sorted(temp_dir.glob("slide-*.jpg"))
|
||||
|
||||
# Create full list with placeholders for hidden slides
|
||||
all_images = []
|
||||
visible_idx = 0
|
||||
|
||||
# Get placeholder dimensions from first visible slide
|
||||
if visible_images:
|
||||
with Image.open(visible_images[0]) as img:
|
||||
placeholder_size = img.size
|
||||
else:
|
||||
placeholder_size = (1920, 1080)
|
||||
|
||||
for slide_num in range(1, total_slides + 1):
|
||||
if slide_num in hidden_slides:
|
||||
# Create placeholder image for hidden slide
|
||||
placeholder_path = temp_dir / f"hidden-{slide_num:03d}.jpg"
|
||||
placeholder_img = create_hidden_slide_placeholder(placeholder_size)
|
||||
placeholder_img.save(placeholder_path, "JPEG")
|
||||
all_images.append(placeholder_path)
|
||||
else:
|
||||
# Use the actual visible slide image
|
||||
if visible_idx < len(visible_images):
|
||||
all_images.append(visible_images[visible_idx])
|
||||
visible_idx += 1
|
||||
|
||||
return all_images
|
||||
|
||||
|
||||
def create_grids(
|
||||
image_paths,
|
||||
cols,
|
||||
width,
|
||||
output_path,
|
||||
placeholder_regions=None,
|
||||
slide_dimensions=None,
|
||||
):
|
||||
"""Create multiple thumbnail grids from slide images, max cols×(cols+1) images per grid."""
|
||||
# Maximum images per grid is cols × (cols + 1) for better proportions
|
||||
max_images_per_grid = cols * (cols + 1)
|
||||
grid_files = []
|
||||
|
||||
print(
|
||||
f"Creating grids with {cols} columns (max {max_images_per_grid} images per grid)"
|
||||
)
|
||||
|
||||
# Split images into chunks
|
||||
for chunk_idx, start_idx in enumerate(
|
||||
range(0, len(image_paths), max_images_per_grid)
|
||||
):
|
||||
end_idx = min(start_idx + max_images_per_grid, len(image_paths))
|
||||
chunk_images = image_paths[start_idx:end_idx]
|
||||
|
||||
# Create grid for this chunk
|
||||
grid = create_grid(
|
||||
chunk_images, cols, width, start_idx, placeholder_regions, slide_dimensions
|
||||
)
|
||||
|
||||
# Generate output filename
|
||||
if len(image_paths) <= max_images_per_grid:
|
||||
# Single grid - use base filename without suffix
|
||||
grid_filename = output_path
|
||||
else:
|
||||
# Multiple grids - insert index before extension with dash
|
||||
stem = output_path.stem
|
||||
suffix = output_path.suffix
|
||||
grid_filename = output_path.parent / f"{stem}-{chunk_idx + 1}{suffix}"
|
||||
|
||||
# Save grid
|
||||
grid_filename.parent.mkdir(parents=True, exist_ok=True)
|
||||
grid.save(str(grid_filename), quality=JPEG_QUALITY)
|
||||
grid_files.append(str(grid_filename))
|
||||
|
||||
return grid_files
|
||||
|
||||
|
||||
def create_grid(
|
||||
image_paths,
|
||||
cols,
|
||||
width,
|
||||
start_slide_num=0,
|
||||
placeholder_regions=None,
|
||||
slide_dimensions=None,
|
||||
):
|
||||
"""Create thumbnail grid from slide images with optional placeholder outlining."""
|
||||
font_size = int(width * FONT_SIZE_RATIO)
|
||||
label_padding = int(font_size * LABEL_PADDING_RATIO)
|
||||
|
||||
# Get dimensions
|
||||
with Image.open(image_paths[0]) as img:
|
||||
aspect = img.height / img.width
|
||||
height = int(width * aspect)
|
||||
|
||||
# Calculate grid size
|
||||
rows = (len(image_paths) + cols - 1) // cols
|
||||
grid_w = cols * width + (cols + 1) * GRID_PADDING
|
||||
grid_h = rows * (height + font_size + label_padding * 2) + (rows + 1) * GRID_PADDING
|
||||
|
||||
# Create grid
|
||||
grid = Image.new("RGB", (grid_w, grid_h), "white")
|
||||
draw = ImageDraw.Draw(grid)
|
||||
|
||||
# Load font with size based on thumbnail width
|
||||
try:
|
||||
# Use Pillow's default font with size
|
||||
font = ImageFont.load_default(size=font_size)
|
||||
except Exception:
|
||||
# Fall back to basic default font if size parameter not supported
|
||||
font = ImageFont.load_default()
|
||||
|
||||
# Place thumbnails
|
||||
for i, img_path in enumerate(image_paths):
|
||||
row, col = i // cols, i % cols
|
||||
x = col * width + (col + 1) * GRID_PADDING
|
||||
y_base = (
|
||||
row * (height + font_size + label_padding * 2) + (row + 1) * GRID_PADDING
|
||||
)
|
||||
|
||||
# Add label with actual slide number
|
||||
label = f"{start_slide_num + i}"
|
||||
bbox = draw.textbbox((0, 0), label, font=font)
|
||||
text_w = bbox[2] - bbox[0]
|
||||
draw.text(
|
||||
(x + (width - text_w) // 2, y_base + label_padding),
|
||||
label,
|
||||
fill="black",
|
||||
font=font,
|
||||
)
|
||||
|
||||
# Add thumbnail below label with proportional spacing
|
||||
y_thumbnail = y_base + label_padding + font_size + label_padding
|
||||
|
||||
with Image.open(img_path) as img:
|
||||
# Get original dimensions before thumbnail
|
||||
orig_w, orig_h = img.size
|
||||
|
||||
# Apply placeholder outlines if enabled
|
||||
if placeholder_regions and (start_slide_num + i) in placeholder_regions:
|
||||
# Convert to RGBA for transparency support
|
||||
if img.mode != "RGBA":
|
||||
img = img.convert("RGBA")
|
||||
|
||||
# Get the regions for this slide
|
||||
regions = placeholder_regions[start_slide_num + i]
|
||||
|
||||
# Calculate scale factors using actual slide dimensions
|
||||
if slide_dimensions:
|
||||
slide_width_inches, slide_height_inches = slide_dimensions
|
||||
else:
|
||||
# Fallback: estimate from image size at CONVERSION_DPI
|
||||
slide_width_inches = orig_w / CONVERSION_DPI
|
||||
slide_height_inches = orig_h / CONVERSION_DPI
|
||||
|
||||
x_scale = orig_w / slide_width_inches
|
||||
y_scale = orig_h / slide_height_inches
|
||||
|
||||
# Create a highlight overlay
|
||||
overlay = Image.new("RGBA", img.size, (255, 255, 255, 0))
|
||||
overlay_draw = ImageDraw.Draw(overlay)
|
||||
|
||||
# Highlight each placeholder region
|
||||
for region in regions:
|
||||
# Convert from inches to pixels in the original image
|
||||
px_left = int(region["left"] * x_scale)
|
||||
px_top = int(region["top"] * y_scale)
|
||||
px_width = int(region["width"] * x_scale)
|
||||
px_height = int(region["height"] * y_scale)
|
||||
|
||||
# Draw highlight outline with red color and thick stroke
|
||||
# Using a bright red outline instead of fill
|
||||
stroke_width = max(
|
||||
5, min(orig_w, orig_h) // 150
|
||||
) # Thicker proportional stroke width
|
||||
overlay_draw.rectangle(
|
||||
[(px_left, px_top), (px_left + px_width, px_top + px_height)],
|
||||
outline=(255, 0, 0, 255), # Bright red, fully opaque
|
||||
width=stroke_width,
|
||||
)
|
||||
|
||||
# Composite the overlay onto the image using alpha blending
|
||||
img = Image.alpha_composite(img, overlay)
|
||||
# Convert back to RGB for JPEG saving
|
||||
img = img.convert("RGB")
|
||||
|
||||
img.thumbnail((width, height), Image.Resampling.LANCZOS)
|
||||
w, h = img.size
|
||||
tx = x + (width - w) // 2
|
||||
ty = y_thumbnail + (height - h) // 2
|
||||
grid.paste(img, (tx, ty))
|
||||
|
||||
# Add border
|
||||
if BORDER_WIDTH > 0:
|
||||
draw.rectangle(
|
||||
[
|
||||
(tx - BORDER_WIDTH, ty - BORDER_WIDTH),
|
||||
(tx + w + BORDER_WIDTH - 1, ty + h + BORDER_WIDTH - 1),
|
||||
],
|
||||
outline="gray",
|
||||
width=BORDER_WIDTH,
|
||||
)
|
||||
|
||||
return grid
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user