Initial commit
This commit is contained in:
168
skills/pandoc/scripts/validate.py
Normal file
168
skills/pandoc/scripts/validate.py
Normal file
@@ -0,0 +1,168 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Validate Pandoc markdown file with YAML frontmatter"""
|
||||
|
||||
import yaml
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def validate_file(filepath):
|
||||
"""Validate markdown file with YAML frontmatter
|
||||
|
||||
Returns:
|
||||
tuple: (is_valid, errors, warnings)
|
||||
"""
|
||||
issues = []
|
||||
warnings = []
|
||||
|
||||
# Check file exists
|
||||
if not os.path.exists(filepath):
|
||||
return False, [f"File not found: {filepath}"], []
|
||||
|
||||
# Read file content
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
except Exception as e:
|
||||
return False, [f"Failed to read file: {e}"], []
|
||||
|
||||
# Check for YAML frontmatter
|
||||
if not content.startswith('---'):
|
||||
issues.append("No YAML frontmatter found (should start with '---')")
|
||||
return False, issues, warnings
|
||||
|
||||
# Find end of YAML block
|
||||
try:
|
||||
yaml_end = content.index('---', 3)
|
||||
yaml_content = content[3:yaml_end]
|
||||
except ValueError:
|
||||
issues.append("YAML frontmatter not properly closed (missing second '---')")
|
||||
return False, issues, warnings
|
||||
|
||||
# Check for tabs in YAML
|
||||
if '\t' in yaml_content:
|
||||
issues.append("YAML contains tabs - use spaces for indentation")
|
||||
|
||||
# Parse YAML
|
||||
try:
|
||||
metadata = yaml.safe_load(yaml_content)
|
||||
except yaml.YAMLError as e:
|
||||
issues.append(f"YAML syntax error: {e}")
|
||||
return False, issues, warnings
|
||||
|
||||
if metadata is None:
|
||||
metadata = {}
|
||||
|
||||
# Get file directory for relative path resolution
|
||||
file_dir = os.path.dirname(os.path.abspath(filepath))
|
||||
|
||||
# Validate bibliography file
|
||||
if 'bibliography' in metadata:
|
||||
bib_path = metadata['bibliography']
|
||||
# Handle relative paths
|
||||
if not os.path.isabs(bib_path):
|
||||
bib_path = os.path.join(file_dir, bib_path)
|
||||
|
||||
if not os.path.exists(bib_path):
|
||||
issues.append(f"Bibliography file not found: {metadata['bibliography']}")
|
||||
|
||||
# Validate CSL file
|
||||
if 'csl' in metadata:
|
||||
csl_path = metadata['csl']
|
||||
# Handle relative paths
|
||||
if not os.path.isabs(csl_path):
|
||||
csl_path = os.path.join(file_dir, csl_path)
|
||||
|
||||
if not os.path.exists(csl_path):
|
||||
issues.append(f"CSL file not found: {metadata['csl']}")
|
||||
|
||||
# Check for images in markdown content
|
||||
markdown_content = content[yaml_end + 3:]
|
||||
image_pattern = r'!\[.*?\]\((.*?)\)'
|
||||
images = re.findall(image_pattern, markdown_content)
|
||||
|
||||
missing_images = []
|
||||
for img_path in images:
|
||||
# Skip URLs
|
||||
if img_path.startswith(('http://', 'https://')):
|
||||
continue
|
||||
|
||||
# Handle relative paths
|
||||
if not os.path.isabs(img_path):
|
||||
img_path = os.path.join(file_dir, img_path)
|
||||
|
||||
if not os.path.exists(img_path):
|
||||
missing_images.append(img_path)
|
||||
|
||||
if missing_images:
|
||||
issues.append(f"Missing images: {', '.join(missing_images)}")
|
||||
|
||||
# Check recommended fields
|
||||
recommended_fields = ['title', 'author']
|
||||
for field in recommended_fields:
|
||||
if field not in metadata or not metadata[field]:
|
||||
warnings.append(f"Missing recommended field: {field}")
|
||||
|
||||
# Check date format
|
||||
if 'date' in metadata:
|
||||
date_val = str(metadata['date'])
|
||||
# Warn if date looks like it needs formatting
|
||||
if date_val.lower() in ['date', 'todo', 'tbd']:
|
||||
warnings.append(f"Date field needs to be filled in: '{date_val}'")
|
||||
|
||||
# Check for common mistakes
|
||||
if 'documentclass' in metadata:
|
||||
valid_classes = ['article', 'report', 'book', 'beamer']
|
||||
if metadata['documentclass'] not in valid_classes:
|
||||
warnings.append(f"Unusual documentclass: '{metadata['documentclass']}' (common: {', '.join(valid_classes)})")
|
||||
|
||||
# Bibliography + CSL warning
|
||||
if 'bibliography' in metadata and 'csl' not in metadata:
|
||||
warnings.append("Bibliography specified but no CSL file (will use default citation style)")
|
||||
|
||||
return len(issues) == 0, issues, warnings
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: validate.py <file>")
|
||||
sys.exit(1)
|
||||
|
||||
filepath = sys.argv[1]
|
||||
is_valid, errors, warnings = validate_file(filepath)
|
||||
|
||||
print("Pandoc Markdown Validation")
|
||||
print("=" * 40)
|
||||
print(f"File: {filepath}")
|
||||
print()
|
||||
|
||||
# Print errors
|
||||
if errors:
|
||||
print("❌ Errors:")
|
||||
for error in errors:
|
||||
print(f" • {error}")
|
||||
print()
|
||||
|
||||
# Print warnings
|
||||
if warnings:
|
||||
print("⚠️ Warnings:")
|
||||
for warning in warnings:
|
||||
print(f" • {warning}")
|
||||
print()
|
||||
|
||||
# Print result
|
||||
if is_valid:
|
||||
if warnings:
|
||||
print(f"✅ Validation passed with {len(warnings)} warning(s)")
|
||||
else:
|
||||
print("✅ Validation passed - ready to convert!")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print(f"❌ Validation failed with {len(errors)} error(s)")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user