177 lines
5.3 KiB
Python
177 lines
5.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
PDF Generation Script for Literature Reviews
|
|
Converts markdown files to professionally formatted PDFs with proper styling.
|
|
"""
|
|
|
|
import subprocess
|
|
import sys
|
|
import os
|
|
from pathlib import Path
|
|
|
|
def generate_pdf(
|
|
markdown_file: str,
|
|
output_pdf: str = None,
|
|
citation_style: str = "apa",
|
|
template: str = None,
|
|
toc: bool = True,
|
|
number_sections: bool = True
|
|
) -> bool:
|
|
"""
|
|
Generate a PDF from a markdown file using pandoc.
|
|
|
|
Args:
|
|
markdown_file: Path to the markdown file
|
|
output_pdf: Path for output PDF (defaults to same name as markdown)
|
|
citation_style: Citation style (apa, nature, chicago, etc.)
|
|
template: Path to custom LaTeX template
|
|
toc: Include table of contents
|
|
number_sections: Number the sections
|
|
|
|
Returns:
|
|
True if successful, False otherwise
|
|
"""
|
|
|
|
# Verify markdown file exists
|
|
if not os.path.exists(markdown_file):
|
|
print(f"Error: Markdown file not found: {markdown_file}")
|
|
return False
|
|
|
|
# Set default output path
|
|
if output_pdf is None:
|
|
output_pdf = Path(markdown_file).with_suffix('.pdf')
|
|
|
|
# Check if pandoc is installed
|
|
try:
|
|
subprocess.run(['pandoc', '--version'], capture_output=True, check=True)
|
|
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
print("Error: pandoc is not installed.")
|
|
print("Install with: brew install pandoc (macOS) or apt-get install pandoc (Linux)")
|
|
return False
|
|
|
|
# Build pandoc command
|
|
cmd = [
|
|
'pandoc',
|
|
markdown_file,
|
|
'-o', str(output_pdf),
|
|
'--pdf-engine=xelatex', # Better Unicode support
|
|
'-V', 'geometry:margin=1in',
|
|
'-V', 'fontsize=11pt',
|
|
'-V', 'colorlinks=true',
|
|
'-V', 'linkcolor=blue',
|
|
'-V', 'urlcolor=blue',
|
|
'-V', 'citecolor=blue',
|
|
]
|
|
|
|
# Add table of contents
|
|
if toc:
|
|
cmd.extend(['--toc', '--toc-depth=3'])
|
|
|
|
# Add section numbering
|
|
if number_sections:
|
|
cmd.append('--number-sections')
|
|
|
|
# Add citation processing if bibliography exists
|
|
bib_file = Path(markdown_file).with_suffix('.bib')
|
|
if bib_file.exists():
|
|
cmd.extend([
|
|
'--citeproc',
|
|
'--bibliography', str(bib_file),
|
|
'--csl', f'{citation_style}.csl' if not citation_style.endswith('.csl') else citation_style
|
|
])
|
|
|
|
# Add custom template if provided
|
|
if template and os.path.exists(template):
|
|
cmd.extend(['--template', template])
|
|
|
|
# Execute pandoc
|
|
try:
|
|
print(f"Generating PDF: {output_pdf}")
|
|
print(f"Command: {' '.join(cmd)}")
|
|
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
print(f"✓ PDF generated successfully: {output_pdf}")
|
|
return True
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"Error generating PDF:")
|
|
print(f"STDOUT: {e.stdout}")
|
|
print(f"STDERR: {e.stderr}")
|
|
return False
|
|
|
|
def check_dependencies():
|
|
"""Check if required dependencies are installed."""
|
|
dependencies = {
|
|
'pandoc': 'pandoc --version',
|
|
'xelatex': 'xelatex --version'
|
|
}
|
|
|
|
missing = []
|
|
for name, cmd in dependencies.items():
|
|
try:
|
|
subprocess.run(cmd.split(), capture_output=True, check=True)
|
|
print(f"✓ {name} is installed")
|
|
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
print(f"✗ {name} is NOT installed")
|
|
missing.append(name)
|
|
|
|
if missing:
|
|
print("\n" + "="*60)
|
|
print("Missing dependencies:")
|
|
for dep in missing:
|
|
if dep == 'pandoc':
|
|
print(" - pandoc: brew install pandoc (macOS) or apt-get install pandoc (Linux)")
|
|
elif dep == 'xelatex':
|
|
print(" - xelatex: brew install --cask mactex (macOS) or apt-get install texlive-xetex (Linux)")
|
|
return False
|
|
|
|
return True
|
|
|
|
def main():
|
|
"""Command-line interface."""
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python generate_pdf.py <markdown_file> [output_pdf] [--citation-style STYLE]")
|
|
print("\nOptions:")
|
|
print(" --citation-style STYLE Citation style (default: apa)")
|
|
print(" --no-toc Disable table of contents")
|
|
print(" --no-numbers Disable section numbering")
|
|
print(" --check-deps Check if dependencies are installed")
|
|
sys.exit(1)
|
|
|
|
# Check dependencies mode
|
|
if '--check-deps' in sys.argv:
|
|
check_dependencies()
|
|
sys.exit(0)
|
|
|
|
# Parse arguments
|
|
markdown_file = sys.argv[1]
|
|
output_pdf = sys.argv[2] if len(sys.argv) > 2 and not sys.argv[2].startswith('--') else None
|
|
|
|
citation_style = 'apa'
|
|
toc = True
|
|
number_sections = True
|
|
|
|
# Parse optional flags
|
|
if '--citation-style' in sys.argv:
|
|
idx = sys.argv.index('--citation-style')
|
|
if idx + 1 < len(sys.argv):
|
|
citation_style = sys.argv[idx + 1]
|
|
|
|
if '--no-toc' in sys.argv:
|
|
toc = False
|
|
|
|
if '--no-numbers' in sys.argv:
|
|
number_sections = False
|
|
|
|
# Generate PDF
|
|
success = generate_pdf(
|
|
markdown_file,
|
|
output_pdf,
|
|
citation_style=citation_style,
|
|
toc=toc,
|
|
number_sections=number_sections
|
|
)
|
|
|
|
sys.exit(0 if success else 1)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|