#!/usr/bin/env python3 """ Presentation Validation Script Validates scientific presentations for common issues: - Slide count vs. duration - LaTeX compilation - File size checks - Basic format validation """ import sys import os import argparse import subprocess from pathlib import Path from typing import Dict, List, Tuple, Optional # Try to import PyPDF2 for PDF analysis try: import PyPDF2 HAS_PYPDF2 = True except ImportError: HAS_PYPDF2 = False # Try to import python-pptx for PowerPoint analysis try: from pptx import Presentation HAS_PPTX = True except ImportError: HAS_PPTX = False class PresentationValidator: """Validates presentations for common issues.""" # Recommended slide counts by duration (min, recommended, max) SLIDE_GUIDELINES = { 5: (5, 6, 8), 10: (8, 11, 14), 15: (13, 16, 20), 20: (18, 22, 26), 30: (22, 27, 33), 45: (32, 40, 50), 60: (40, 52, 65), } def __init__(self, filepath: str, duration: Optional[int] = None): self.filepath = Path(filepath) self.duration = duration self.file_type = self.filepath.suffix.lower() self.issues = [] self.warnings = [] self.info = [] def validate(self) -> Dict: """Run all validations and return results.""" print(f"Validating: {self.filepath.name}") print(f"File type: {self.file_type}") print("=" * 60) # Check file exists if not self.filepath.exists(): self.issues.append(f"File not found: {self.filepath}") return self._format_results() # File size check self._check_file_size() # Type-specific validation if self.file_type == '.pdf': self._validate_pdf() elif self.file_type in ['.pptx', '.ppt']: self._validate_pptx() elif self.file_type in ['.tex']: self._validate_latex() else: self.warnings.append(f"Unknown file type: {self.file_type}") return self._format_results() def _check_file_size(self): """Check if file size is reasonable.""" size_mb = self.filepath.stat().st_size / (1024 * 1024) self.info.append(f"File size: {size_mb:.2f} MB") if size_mb > 100: self.issues.append( f"File is very large ({size_mb:.1f} MB). " "Consider compressing images." ) elif size_mb > 50: self.warnings.append( f"File is large ({size_mb:.1f} MB). " "May be slow to email or upload." ) def _validate_pdf(self): """Validate PDF presentation.""" if not HAS_PYPDF2: self.warnings.append( "PyPDF2 not installed. Install with: pip install PyPDF2" ) return try: with open(self.filepath, 'rb') as f: reader = PyPDF2.PdfReader(f) num_pages = len(reader.pages) self.info.append(f"Number of slides: {num_pages}") # Check slide count against duration if self.duration: self._check_slide_count(num_pages) # Get page size first_page = reader.pages[0] media_box = first_page.mediabox width = float(media_box.width) height = float(media_box.height) # Convert points to inches (72 points = 1 inch) width_in = width / 72 height_in = height / 72 aspect = width / height self.info.append( f"Slide dimensions: {width_in:.1f}\" Ɨ {height_in:.1f}\" " f"(aspect ratio: {aspect:.2f})" ) # Check common aspect ratios if abs(aspect - 16/9) < 0.01: self.info.append("Aspect ratio: 16:9 (widescreen)") elif abs(aspect - 4/3) < 0.01: self.info.append("Aspect ratio: 4:3 (standard)") else: self.warnings.append( f"Unusual aspect ratio: {aspect:.2f}. " "Confirm this matches venue requirements." ) except Exception as e: self.issues.append(f"Error reading PDF: {str(e)}") def _validate_pptx(self): """Validate PowerPoint presentation.""" if not HAS_PPTX: self.warnings.append( "python-pptx not installed. Install with: pip install python-pptx" ) return try: prs = Presentation(self.filepath) num_slides = len(prs.slides) self.info.append(f"Number of slides: {num_slides}") # Check slide count against duration if self.duration: self._check_slide_count(num_slides) # Get slide dimensions width_inches = prs.slide_width / 914400 # EMU to inches height_inches = prs.slide_height / 914400 aspect = prs.slide_width / prs.slide_height self.info.append( f"Slide dimensions: {width_inches:.1f}\" Ɨ {height_inches:.1f}\" " f"(aspect ratio: {aspect:.2f})" ) # Check fonts and text self._check_pptx_content(prs) except Exception as e: self.issues.append(f"Error reading PowerPoint: {str(e)}") def _check_pptx_content(self, prs): """Check PowerPoint content for common issues.""" small_text_slides = [] many_bullets_slides = [] for idx, slide in enumerate(prs.slides, start=1): for shape in slide.shapes: if not shape.has_text_frame: continue text_frame = shape.text_frame # Check for small fonts for paragraph in text_frame.paragraphs: for run in paragraph.runs: if run.font.size and run.font.size.pt < 18: small_text_slides.append(idx) break # Check for too many bullets bullet_count = sum(1 for p in text_frame.paragraphs if p.level == 0) if bullet_count > 6: many_bullets_slides.append(idx) # Report issues if small_text_slides: unique_slides = sorted(set(small_text_slides)) self.warnings.append( f"Small text (<18pt) found on slides: {unique_slides[:5]}" + (" ..." if len(unique_slides) > 5 else "") ) if many_bullets_slides: unique_slides = sorted(set(many_bullets_slides)) self.warnings.append( f"Many bullets (>6) on slides: {unique_slides[:5]}" + (" ..." if len(unique_slides) > 5 else "") ) def _validate_latex(self): """Validate LaTeX Beamer presentation.""" self.info.append("LaTeX source file detected") # Try to compile if self._try_compile_latex(): self.info.append("LaTeX compilation: SUCCESS") # If PDF was generated, validate it pdf_path = self.filepath.with_suffix('.pdf') if pdf_path.exists(): pdf_validator = PresentationValidator(str(pdf_path), self.duration) pdf_results = pdf_validator.validate() # Merge results self.info.extend(pdf_results['info']) self.warnings.extend(pdf_results['warnings']) self.issues.extend(pdf_results['issues']) else: self.issues.append( "LaTeX compilation failed. Check .log file for errors." ) def _try_compile_latex(self) -> bool: """Try to compile LaTeX file.""" try: # Try pdflatex result = subprocess.run( ['pdflatex', '-interaction=nonstopmode', self.filepath.name], cwd=self.filepath.parent, capture_output=True, timeout=60 ) return result.returncode == 0 except (subprocess.TimeoutExpired, FileNotFoundError): return False def _check_slide_count(self, num_slides: int): """Check if slide count is appropriate for duration.""" if self.duration not in self.SLIDE_GUIDELINES: # Find nearest duration durations = sorted(self.SLIDE_GUIDELINES.keys()) nearest = min(durations, key=lambda x: abs(x - self.duration)) min_slides, rec_slides, max_slides = self.SLIDE_GUIDELINES[nearest] self.info.append( f"Using guidelines for {nearest}-minute talk " f"(closest to {self.duration} minutes)" ) else: min_slides, rec_slides, max_slides = self.SLIDE_GUIDELINES[self.duration] self.info.append( f"Recommended slides for {self.duration}-minute talk: " f"{min_slides}-{max_slides} (optimal: ~{rec_slides})" ) if num_slides < min_slides: self.warnings.append( f"Fewer slides ({num_slides}) than recommended ({min_slides}-{max_slides}). " "May have too much time or too little content." ) elif num_slides > max_slides: self.warnings.append( f"More slides ({num_slides}) than recommended ({min_slides}-{max_slides}). " "Likely to run over time." ) else: self.info.append( f"Slide count ({num_slides}) is within recommended range." ) def _format_results(self) -> Dict: """Format validation results.""" return { 'filepath': str(self.filepath), 'file_type': self.file_type, 'info': self.info, 'warnings': self.warnings, 'issues': self.issues, 'valid': len(self.issues) == 0 } def print_results(results: Dict): """Print validation results in a readable format.""" print() print("=" * 60) print("VALIDATION RESULTS") print("=" * 60) # Print info if results['info']: print("\nšŸ“‹ Information:") for item in results['info']: print(f" • {item}") # Print warnings if results['warnings']: print("\nāš ļø Warnings:") for item in results['warnings']: print(f" • {item}") # Print issues if results['issues']: print("\nāŒ Issues:") for item in results['issues']: print(f" • {item}") # Overall status print("\n" + "=" * 60) if results['valid']: print("āœ… Validation PASSED") if results['warnings']: print(f" ({len(results['warnings'])} warning(s) found)") else: print("āŒ Validation FAILED") print(f" ({len(results['issues'])} issue(s) found)") print("=" * 60) def main(): parser = argparse.ArgumentParser( description='Validate scientific presentations', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: %(prog)s presentation.pdf --duration 15 %(prog)s slides.pptx --duration 45 %(prog)s beamer_talk.tex --duration 20 Supported file types: - PDF (.pdf) - PowerPoint (.pptx, .ppt) - LaTeX Beamer (.tex) Validation checks: - Slide count vs. duration - File size - Slide dimensions - Font sizes (PowerPoint) - LaTeX compilation (Beamer) """ ) parser.add_argument( 'filepath', help='Path to presentation file (PDF, PPTX, or TEX)' ) parser.add_argument( '--duration', '-d', type=int, help='Presentation duration in minutes' ) parser.add_argument( '--quiet', '-q', action='store_true', help='Only show issues and warnings' ) args = parser.parse_args() # Validate validator = PresentationValidator(args.filepath, args.duration) results = validator.validate() # Print results if args.quiet: # Only show warnings and issues if results['warnings'] or results['issues']: print_results(results) else: print("āœ… No issues found") else: print_results(results) # Exit with appropriate code sys.exit(0 if results['valid'] else 1) if __name__ == '__main__': main()