Files
2025-11-30 08:30:14 +08:00

177 lines
5.3 KiB
Python

#!/usr/bin/env python3
"""
PDF Generation Script for Literature Reviews
Converts markdown files to professionally formatted PDFs with proper styling.
"""
import subprocess
import sys
import os
from pathlib import Path
def generate_pdf(
markdown_file: str,
output_pdf: str = None,
citation_style: str = "apa",
template: str = None,
toc: bool = True,
number_sections: bool = True
) -> bool:
"""
Generate a PDF from a markdown file using pandoc.
Args:
markdown_file: Path to the markdown file
output_pdf: Path for output PDF (defaults to same name as markdown)
citation_style: Citation style (apa, nature, chicago, etc.)
template: Path to custom LaTeX template
toc: Include table of contents
number_sections: Number the sections
Returns:
True if successful, False otherwise
"""
# Verify markdown file exists
if not os.path.exists(markdown_file):
print(f"Error: Markdown file not found: {markdown_file}")
return False
# Set default output path
if output_pdf is None:
output_pdf = Path(markdown_file).with_suffix('.pdf')
# Check if pandoc is installed
try:
subprocess.run(['pandoc', '--version'], capture_output=True, check=True)
except (subprocess.CalledProcessError, FileNotFoundError):
print("Error: pandoc is not installed.")
print("Install with: brew install pandoc (macOS) or apt-get install pandoc (Linux)")
return False
# Build pandoc command
cmd = [
'pandoc',
markdown_file,
'-o', str(output_pdf),
'--pdf-engine=xelatex', # Better Unicode support
'-V', 'geometry:margin=1in',
'-V', 'fontsize=11pt',
'-V', 'colorlinks=true',
'-V', 'linkcolor=blue',
'-V', 'urlcolor=blue',
'-V', 'citecolor=blue',
]
# Add table of contents
if toc:
cmd.extend(['--toc', '--toc-depth=3'])
# Add section numbering
if number_sections:
cmd.append('--number-sections')
# Add citation processing if bibliography exists
bib_file = Path(markdown_file).with_suffix('.bib')
if bib_file.exists():
cmd.extend([
'--citeproc',
'--bibliography', str(bib_file),
'--csl', f'{citation_style}.csl' if not citation_style.endswith('.csl') else citation_style
])
# Add custom template if provided
if template and os.path.exists(template):
cmd.extend(['--template', template])
# Execute pandoc
try:
print(f"Generating PDF: {output_pdf}")
print(f"Command: {' '.join(cmd)}")
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
print(f"✓ PDF generated successfully: {output_pdf}")
return True
except subprocess.CalledProcessError as e:
print(f"Error generating PDF:")
print(f"STDOUT: {e.stdout}")
print(f"STDERR: {e.stderr}")
return False
def check_dependencies():
"""Check if required dependencies are installed."""
dependencies = {
'pandoc': 'pandoc --version',
'xelatex': 'xelatex --version'
}
missing = []
for name, cmd in dependencies.items():
try:
subprocess.run(cmd.split(), capture_output=True, check=True)
print(f"{name} is installed")
except (subprocess.CalledProcessError, FileNotFoundError):
print(f"{name} is NOT installed")
missing.append(name)
if missing:
print("\n" + "="*60)
print("Missing dependencies:")
for dep in missing:
if dep == 'pandoc':
print(" - pandoc: brew install pandoc (macOS) or apt-get install pandoc (Linux)")
elif dep == 'xelatex':
print(" - xelatex: brew install --cask mactex (macOS) or apt-get install texlive-xetex (Linux)")
return False
return True
def main():
"""Command-line interface."""
if len(sys.argv) < 2:
print("Usage: python generate_pdf.py <markdown_file> [output_pdf] [--citation-style STYLE]")
print("\nOptions:")
print(" --citation-style STYLE Citation style (default: apa)")
print(" --no-toc Disable table of contents")
print(" --no-numbers Disable section numbering")
print(" --check-deps Check if dependencies are installed")
sys.exit(1)
# Check dependencies mode
if '--check-deps' in sys.argv:
check_dependencies()
sys.exit(0)
# Parse arguments
markdown_file = sys.argv[1]
output_pdf = sys.argv[2] if len(sys.argv) > 2 and not sys.argv[2].startswith('--') else None
citation_style = 'apa'
toc = True
number_sections = True
# Parse optional flags
if '--citation-style' in sys.argv:
idx = sys.argv.index('--citation-style')
if idx + 1 < len(sys.argv):
citation_style = sys.argv[idx + 1]
if '--no-toc' in sys.argv:
toc = False
if '--no-numbers' in sys.argv:
number_sections = False
# Generate PDF
success = generate_pdf(
markdown_file,
output_pdf,
citation_style=citation_style,
toc=toc,
number_sections=number_sections
)
sys.exit(0 if success else 1)
if __name__ == "__main__":
main()