Initial commit
This commit is contained in:
176
skills/literature-review/scripts/generate_pdf.py
Normal file
176
skills/literature-review/scripts/generate_pdf.py
Normal file
@@ -0,0 +1,176 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
PDF Generation Script for Literature Reviews
|
||||
Converts markdown files to professionally formatted PDFs with proper styling.
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
def generate_pdf(
|
||||
markdown_file: str,
|
||||
output_pdf: str = None,
|
||||
citation_style: str = "apa",
|
||||
template: str = None,
|
||||
toc: bool = True,
|
||||
number_sections: bool = True
|
||||
) -> bool:
|
||||
"""
|
||||
Generate a PDF from a markdown file using pandoc.
|
||||
|
||||
Args:
|
||||
markdown_file: Path to the markdown file
|
||||
output_pdf: Path for output PDF (defaults to same name as markdown)
|
||||
citation_style: Citation style (apa, nature, chicago, etc.)
|
||||
template: Path to custom LaTeX template
|
||||
toc: Include table of contents
|
||||
number_sections: Number the sections
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
|
||||
# Verify markdown file exists
|
||||
if not os.path.exists(markdown_file):
|
||||
print(f"Error: Markdown file not found: {markdown_file}")
|
||||
return False
|
||||
|
||||
# Set default output path
|
||||
if output_pdf is None:
|
||||
output_pdf = Path(markdown_file).with_suffix('.pdf')
|
||||
|
||||
# Check if pandoc is installed
|
||||
try:
|
||||
subprocess.run(['pandoc', '--version'], capture_output=True, check=True)
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
print("Error: pandoc is not installed.")
|
||||
print("Install with: brew install pandoc (macOS) or apt-get install pandoc (Linux)")
|
||||
return False
|
||||
|
||||
# Build pandoc command
|
||||
cmd = [
|
||||
'pandoc',
|
||||
markdown_file,
|
||||
'-o', str(output_pdf),
|
||||
'--pdf-engine=xelatex', # Better Unicode support
|
||||
'-V', 'geometry:margin=1in',
|
||||
'-V', 'fontsize=11pt',
|
||||
'-V', 'colorlinks=true',
|
||||
'-V', 'linkcolor=blue',
|
||||
'-V', 'urlcolor=blue',
|
||||
'-V', 'citecolor=blue',
|
||||
]
|
||||
|
||||
# Add table of contents
|
||||
if toc:
|
||||
cmd.extend(['--toc', '--toc-depth=3'])
|
||||
|
||||
# Add section numbering
|
||||
if number_sections:
|
||||
cmd.append('--number-sections')
|
||||
|
||||
# Add citation processing if bibliography exists
|
||||
bib_file = Path(markdown_file).with_suffix('.bib')
|
||||
if bib_file.exists():
|
||||
cmd.extend([
|
||||
'--citeproc',
|
||||
'--bibliography', str(bib_file),
|
||||
'--csl', f'{citation_style}.csl' if not citation_style.endswith('.csl') else citation_style
|
||||
])
|
||||
|
||||
# Add custom template if provided
|
||||
if template and os.path.exists(template):
|
||||
cmd.extend(['--template', template])
|
||||
|
||||
# Execute pandoc
|
||||
try:
|
||||
print(f"Generating PDF: {output_pdf}")
|
||||
print(f"Command: {' '.join(cmd)}")
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
print(f"✓ PDF generated successfully: {output_pdf}")
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error generating PDF:")
|
||||
print(f"STDOUT: {e.stdout}")
|
||||
print(f"STDERR: {e.stderr}")
|
||||
return False
|
||||
|
||||
def check_dependencies():
|
||||
"""Check if required dependencies are installed."""
|
||||
dependencies = {
|
||||
'pandoc': 'pandoc --version',
|
||||
'xelatex': 'xelatex --version'
|
||||
}
|
||||
|
||||
missing = []
|
||||
for name, cmd in dependencies.items():
|
||||
try:
|
||||
subprocess.run(cmd.split(), capture_output=True, check=True)
|
||||
print(f"✓ {name} is installed")
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
print(f"✗ {name} is NOT installed")
|
||||
missing.append(name)
|
||||
|
||||
if missing:
|
||||
print("\n" + "="*60)
|
||||
print("Missing dependencies:")
|
||||
for dep in missing:
|
||||
if dep == 'pandoc':
|
||||
print(" - pandoc: brew install pandoc (macOS) or apt-get install pandoc (Linux)")
|
||||
elif dep == 'xelatex':
|
||||
print(" - xelatex: brew install --cask mactex (macOS) or apt-get install texlive-xetex (Linux)")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def main():
|
||||
"""Command-line interface."""
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python generate_pdf.py <markdown_file> [output_pdf] [--citation-style STYLE]")
|
||||
print("\nOptions:")
|
||||
print(" --citation-style STYLE Citation style (default: apa)")
|
||||
print(" --no-toc Disable table of contents")
|
||||
print(" --no-numbers Disable section numbering")
|
||||
print(" --check-deps Check if dependencies are installed")
|
||||
sys.exit(1)
|
||||
|
||||
# Check dependencies mode
|
||||
if '--check-deps' in sys.argv:
|
||||
check_dependencies()
|
||||
sys.exit(0)
|
||||
|
||||
# Parse arguments
|
||||
markdown_file = sys.argv[1]
|
||||
output_pdf = sys.argv[2] if len(sys.argv) > 2 and not sys.argv[2].startswith('--') else None
|
||||
|
||||
citation_style = 'apa'
|
||||
toc = True
|
||||
number_sections = True
|
||||
|
||||
# Parse optional flags
|
||||
if '--citation-style' in sys.argv:
|
||||
idx = sys.argv.index('--citation-style')
|
||||
if idx + 1 < len(sys.argv):
|
||||
citation_style = sys.argv[idx + 1]
|
||||
|
||||
if '--no-toc' in sys.argv:
|
||||
toc = False
|
||||
|
||||
if '--no-numbers' in sys.argv:
|
||||
number_sections = False
|
||||
|
||||
# Generate PDF
|
||||
success = generate_pdf(
|
||||
markdown_file,
|
||||
output_pdf,
|
||||
citation_style=citation_style,
|
||||
toc=toc,
|
||||
number_sections=number_sections
|
||||
)
|
||||
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
303
skills/literature-review/scripts/search_databases.py
Normal file
303
skills/literature-review/scripts/search_databases.py
Normal file
@@ -0,0 +1,303 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Literature Database Search Script
|
||||
Searches multiple literature databases and aggregates results.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from typing import Dict, List
|
||||
from datetime import datetime
|
||||
|
||||
def format_search_results(results: List[Dict], output_format: str = 'json') -> str:
|
||||
"""
|
||||
Format search results for output.
|
||||
|
||||
Args:
|
||||
results: List of search results
|
||||
output_format: Format (json, markdown, or bibtex)
|
||||
|
||||
Returns:
|
||||
Formatted string
|
||||
"""
|
||||
if output_format == 'json':
|
||||
return json.dumps(results, indent=2)
|
||||
|
||||
elif output_format == 'markdown':
|
||||
md = f"# Literature Search Results\n\n"
|
||||
md += f"**Search Date**: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n"
|
||||
md += f"**Total Results**: {len(results)}\n\n"
|
||||
|
||||
for i, result in enumerate(results, 1):
|
||||
md += f"## {i}. {result.get('title', 'Untitled')}\n\n"
|
||||
md += f"**Authors**: {result.get('authors', 'Unknown')}\n\n"
|
||||
md += f"**Year**: {result.get('year', 'N/A')}\n\n"
|
||||
md += f"**Source**: {result.get('source', 'Unknown')}\n\n"
|
||||
|
||||
if result.get('abstract'):
|
||||
md += f"**Abstract**: {result['abstract']}\n\n"
|
||||
|
||||
if result.get('doi'):
|
||||
md += f"**DOI**: [{result['doi']}](https://doi.org/{result['doi']})\n\n"
|
||||
|
||||
if result.get('url'):
|
||||
md += f"**URL**: {result['url']}\n\n"
|
||||
|
||||
if result.get('citations'):
|
||||
md += f"**Citations**: {result['citations']}\n\n"
|
||||
|
||||
md += "---\n\n"
|
||||
|
||||
return md
|
||||
|
||||
elif output_format == 'bibtex':
|
||||
bibtex = ""
|
||||
for i, result in enumerate(results, 1):
|
||||
entry_type = result.get('type', 'article')
|
||||
cite_key = f"{result.get('first_author', 'unknown')}{result.get('year', '0000')}"
|
||||
|
||||
bibtex += f"@{entry_type}{{{cite_key},\n"
|
||||
bibtex += f" title = {{{result.get('title', '')}}},\n"
|
||||
bibtex += f" author = {{{result.get('authors', '')}}},\n"
|
||||
bibtex += f" year = {{{result.get('year', '')}}},\n"
|
||||
|
||||
if result.get('journal'):
|
||||
bibtex += f" journal = {{{result['journal']}}},\n"
|
||||
|
||||
if result.get('volume'):
|
||||
bibtex += f" volume = {{{result['volume']}}},\n"
|
||||
|
||||
if result.get('pages'):
|
||||
bibtex += f" pages = {{{result['pages']}}},\n"
|
||||
|
||||
if result.get('doi'):
|
||||
bibtex += f" doi = {{{result['doi']}}},\n"
|
||||
|
||||
bibtex += "}\n\n"
|
||||
|
||||
return bibtex
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown format: {output_format}")
|
||||
|
||||
def deduplicate_results(results: List[Dict]) -> List[Dict]:
|
||||
"""
|
||||
Remove duplicate results based on DOI or title.
|
||||
|
||||
Args:
|
||||
results: List of search results
|
||||
|
||||
Returns:
|
||||
Deduplicated list
|
||||
"""
|
||||
seen_dois = set()
|
||||
seen_titles = set()
|
||||
unique_results = []
|
||||
|
||||
for result in results:
|
||||
doi = result.get('doi', '').lower().strip()
|
||||
title = result.get('title', '').lower().strip()
|
||||
|
||||
# Check DOI first (more reliable)
|
||||
if doi and doi in seen_dois:
|
||||
continue
|
||||
|
||||
# Check title as fallback
|
||||
if not doi and title in seen_titles:
|
||||
continue
|
||||
|
||||
# Add to results
|
||||
if doi:
|
||||
seen_dois.add(doi)
|
||||
if title:
|
||||
seen_titles.add(title)
|
||||
|
||||
unique_results.append(result)
|
||||
|
||||
return unique_results
|
||||
|
||||
def rank_results(results: List[Dict], criteria: str = 'citations') -> List[Dict]:
|
||||
"""
|
||||
Rank results by specified criteria.
|
||||
|
||||
Args:
|
||||
results: List of search results
|
||||
criteria: Ranking criteria (citations, year, relevance)
|
||||
|
||||
Returns:
|
||||
Ranked list
|
||||
"""
|
||||
if criteria == 'citations':
|
||||
return sorted(results, key=lambda x: x.get('citations', 0), reverse=True)
|
||||
elif criteria == 'year':
|
||||
return sorted(results, key=lambda x: x.get('year', '0'), reverse=True)
|
||||
elif criteria == 'relevance':
|
||||
return sorted(results, key=lambda x: x.get('relevance_score', 0), reverse=True)
|
||||
else:
|
||||
return results
|
||||
|
||||
def filter_by_year(results: List[Dict], start_year: int = None, end_year: int = None) -> List[Dict]:
|
||||
"""
|
||||
Filter results by publication year range.
|
||||
|
||||
Args:
|
||||
results: List of search results
|
||||
start_year: Minimum year (inclusive)
|
||||
end_year: Maximum year (inclusive)
|
||||
|
||||
Returns:
|
||||
Filtered list
|
||||
"""
|
||||
filtered = []
|
||||
|
||||
for result in results:
|
||||
try:
|
||||
year = int(result.get('year', 0))
|
||||
if start_year and year < start_year:
|
||||
continue
|
||||
if end_year and year > end_year:
|
||||
continue
|
||||
filtered.append(result)
|
||||
except (ValueError, TypeError):
|
||||
# Include if year parsing fails
|
||||
filtered.append(result)
|
||||
|
||||
return filtered
|
||||
|
||||
def generate_search_summary(results: List[Dict]) -> Dict:
|
||||
"""
|
||||
Generate summary statistics for search results.
|
||||
|
||||
Args:
|
||||
results: List of search results
|
||||
|
||||
Returns:
|
||||
Summary dictionary
|
||||
"""
|
||||
summary = {
|
||||
'total_results': len(results),
|
||||
'sources': {},
|
||||
'year_distribution': {},
|
||||
'avg_citations': 0,
|
||||
'total_citations': 0
|
||||
}
|
||||
|
||||
citations = []
|
||||
|
||||
for result in results:
|
||||
# Count by source
|
||||
source = result.get('source', 'Unknown')
|
||||
summary['sources'][source] = summary['sources'].get(source, 0) + 1
|
||||
|
||||
# Count by year
|
||||
year = result.get('year', 'Unknown')
|
||||
summary['year_distribution'][year] = summary['year_distribution'].get(year, 0) + 1
|
||||
|
||||
# Collect citations
|
||||
if result.get('citations'):
|
||||
try:
|
||||
citations.append(int(result['citations']))
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
if citations:
|
||||
summary['avg_citations'] = sum(citations) / len(citations)
|
||||
summary['total_citations'] = sum(citations)
|
||||
|
||||
return summary
|
||||
|
||||
def main():
|
||||
"""Command-line interface for search result processing."""
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python search_databases.py <results.json> [options]")
|
||||
print("\nOptions:")
|
||||
print(" --format FORMAT Output format (json, markdown, bibtex)")
|
||||
print(" --output FILE Output file (default: stdout)")
|
||||
print(" --rank CRITERIA Rank by (citations, year, relevance)")
|
||||
print(" --year-start YEAR Filter by start year")
|
||||
print(" --year-end YEAR Filter by end year")
|
||||
print(" --deduplicate Remove duplicates")
|
||||
print(" --summary Show summary statistics")
|
||||
sys.exit(1)
|
||||
|
||||
# Load results
|
||||
results_file = sys.argv[1]
|
||||
try:
|
||||
with open(results_file, 'r', encoding='utf-8') as f:
|
||||
results = json.load(f)
|
||||
except Exception as e:
|
||||
print(f"Error loading results: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# Parse options
|
||||
output_format = 'markdown'
|
||||
output_file = None
|
||||
rank_criteria = None
|
||||
year_start = None
|
||||
year_end = None
|
||||
do_dedup = False
|
||||
show_summary = False
|
||||
|
||||
i = 2
|
||||
while i < len(sys.argv):
|
||||
arg = sys.argv[i]
|
||||
|
||||
if arg == '--format' and i + 1 < len(sys.argv):
|
||||
output_format = sys.argv[i + 1]
|
||||
i += 2
|
||||
elif arg == '--output' and i + 1 < len(sys.argv):
|
||||
output_file = sys.argv[i + 1]
|
||||
i += 2
|
||||
elif arg == '--rank' and i + 1 < len(sys.argv):
|
||||
rank_criteria = sys.argv[i + 1]
|
||||
i += 2
|
||||
elif arg == '--year-start' and i + 1 < len(sys.argv):
|
||||
year_start = int(sys.argv[i + 1])
|
||||
i += 2
|
||||
elif arg == '--year-end' and i + 1 < len(sys.argv):
|
||||
year_end = int(sys.argv[i + 1])
|
||||
i += 2
|
||||
elif arg == '--deduplicate':
|
||||
do_dedup = True
|
||||
i += 1
|
||||
elif arg == '--summary':
|
||||
show_summary = True
|
||||
i += 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
# Process results
|
||||
if do_dedup:
|
||||
results = deduplicate_results(results)
|
||||
print(f"After deduplication: {len(results)} results")
|
||||
|
||||
if year_start or year_end:
|
||||
results = filter_by_year(results, year_start, year_end)
|
||||
print(f"After year filter: {len(results)} results")
|
||||
|
||||
if rank_criteria:
|
||||
results = rank_results(results, rank_criteria)
|
||||
print(f"Ranked by: {rank_criteria}")
|
||||
|
||||
# Show summary
|
||||
if show_summary:
|
||||
summary = generate_search_summary(results)
|
||||
print("\n" + "="*60)
|
||||
print("SEARCH SUMMARY")
|
||||
print("="*60)
|
||||
print(json.dumps(summary, indent=2))
|
||||
print()
|
||||
|
||||
# Format output
|
||||
output = format_search_results(results, output_format)
|
||||
|
||||
# Write output
|
||||
if output_file:
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
f.write(output)
|
||||
print(f"✓ Results saved to: {output_file}")
|
||||
else:
|
||||
print(output)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
222
skills/literature-review/scripts/verify_citations.py
Normal file
222
skills/literature-review/scripts/verify_citations.py
Normal file
@@ -0,0 +1,222 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Citation Verification Script
|
||||
Verifies DOIs, URLs, and citation metadata for accuracy.
|
||||
"""
|
||||
|
||||
import re
|
||||
import requests
|
||||
import json
|
||||
from typing import Dict, List, Tuple
|
||||
from urllib.parse import urlparse
|
||||
import time
|
||||
|
||||
class CitationVerifier:
|
||||
def __init__(self):
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({
|
||||
'User-Agent': 'CitationVerifier/1.0 (Literature Review Tool)'
|
||||
})
|
||||
|
||||
def extract_dois(self, text: str) -> List[str]:
|
||||
"""Extract all DOIs from text."""
|
||||
doi_pattern = r'10\.\d{4,}/[^\s\]\)"]+'
|
||||
return re.findall(doi_pattern, text)
|
||||
|
||||
def verify_doi(self, doi: str) -> Tuple[bool, Dict]:
|
||||
"""
|
||||
Verify a DOI and retrieve metadata.
|
||||
Returns (is_valid, metadata)
|
||||
"""
|
||||
try:
|
||||
url = f"https://doi.org/api/handles/{doi}"
|
||||
response = self.session.get(url, timeout=10)
|
||||
|
||||
if response.status_code == 200:
|
||||
# DOI exists, now get metadata from CrossRef
|
||||
metadata = self._get_crossref_metadata(doi)
|
||||
return True, metadata
|
||||
else:
|
||||
return False, {}
|
||||
except Exception as e:
|
||||
return False, {"error": str(e)}
|
||||
|
||||
def _get_crossref_metadata(self, doi: str) -> Dict:
|
||||
"""Get metadata from CrossRef API."""
|
||||
try:
|
||||
url = f"https://api.crossref.org/works/{doi}"
|
||||
response = self.session.get(url, timeout=10)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
message = data.get('message', {})
|
||||
|
||||
# Extract key metadata
|
||||
metadata = {
|
||||
'title': message.get('title', [''])[0],
|
||||
'authors': self._format_authors(message.get('author', [])),
|
||||
'year': self._extract_year(message),
|
||||
'journal': message.get('container-title', [''])[0],
|
||||
'volume': message.get('volume', ''),
|
||||
'pages': message.get('page', ''),
|
||||
'doi': doi
|
||||
}
|
||||
return metadata
|
||||
return {}
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
def _format_authors(self, authors: List[Dict]) -> str:
|
||||
"""Format author list."""
|
||||
if not authors:
|
||||
return ""
|
||||
|
||||
formatted = []
|
||||
for author in authors[:3]: # First 3 authors
|
||||
given = author.get('given', '')
|
||||
family = author.get('family', '')
|
||||
if family:
|
||||
formatted.append(f"{family}, {given[0]}." if given else family)
|
||||
|
||||
if len(authors) > 3:
|
||||
formatted.append("et al.")
|
||||
|
||||
return ", ".join(formatted)
|
||||
|
||||
def _extract_year(self, message: Dict) -> str:
|
||||
"""Extract publication year."""
|
||||
date_parts = message.get('published-print', {}).get('date-parts', [[]])
|
||||
if not date_parts or not date_parts[0]:
|
||||
date_parts = message.get('published-online', {}).get('date-parts', [[]])
|
||||
|
||||
if date_parts and date_parts[0]:
|
||||
return str(date_parts[0][0])
|
||||
return ""
|
||||
|
||||
def verify_url(self, url: str) -> Tuple[bool, int]:
|
||||
"""
|
||||
Verify a URL is accessible.
|
||||
Returns (is_accessible, status_code)
|
||||
"""
|
||||
try:
|
||||
response = self.session.head(url, timeout=10, allow_redirects=True)
|
||||
is_accessible = response.status_code < 400
|
||||
return is_accessible, response.status_code
|
||||
except Exception as e:
|
||||
return False, 0
|
||||
|
||||
def verify_citations_in_file(self, filepath: str) -> Dict:
|
||||
"""
|
||||
Verify all citations in a markdown file.
|
||||
Returns a report of verification results.
|
||||
"""
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
dois = self.extract_dois(content)
|
||||
|
||||
report = {
|
||||
'total_dois': len(dois),
|
||||
'verified': [],
|
||||
'failed': [],
|
||||
'metadata': {}
|
||||
}
|
||||
|
||||
for doi in dois:
|
||||
print(f"Verifying DOI: {doi}")
|
||||
is_valid, metadata = self.verify_doi(doi)
|
||||
|
||||
if is_valid:
|
||||
report['verified'].append(doi)
|
||||
report['metadata'][doi] = metadata
|
||||
else:
|
||||
report['failed'].append(doi)
|
||||
|
||||
time.sleep(0.5) # Rate limiting
|
||||
|
||||
return report
|
||||
|
||||
def format_citation_apa(self, metadata: Dict) -> str:
|
||||
"""Format citation in APA style."""
|
||||
authors = metadata.get('authors', '')
|
||||
year = metadata.get('year', 'n.d.')
|
||||
title = metadata.get('title', '')
|
||||
journal = metadata.get('journal', '')
|
||||
volume = metadata.get('volume', '')
|
||||
pages = metadata.get('pages', '')
|
||||
doi = metadata.get('doi', '')
|
||||
|
||||
citation = f"{authors} ({year}). {title}. "
|
||||
if journal:
|
||||
citation += f"*{journal}*"
|
||||
if volume:
|
||||
citation += f", *{volume}*"
|
||||
if pages:
|
||||
citation += f", {pages}"
|
||||
if doi:
|
||||
citation += f". https://doi.org/{doi}"
|
||||
|
||||
return citation
|
||||
|
||||
def format_citation_nature(self, metadata: Dict) -> str:
|
||||
"""Format citation in Nature style."""
|
||||
authors = metadata.get('authors', '')
|
||||
title = metadata.get('title', '')
|
||||
journal = metadata.get('journal', '')
|
||||
volume = metadata.get('volume', '')
|
||||
pages = metadata.get('pages', '')
|
||||
year = metadata.get('year', '')
|
||||
|
||||
citation = f"{authors} {title}. "
|
||||
if journal:
|
||||
citation += f"*{journal}* "
|
||||
if volume:
|
||||
citation += f"**{volume}**, "
|
||||
if pages:
|
||||
citation += f"{pages} "
|
||||
if year:
|
||||
citation += f"({year})"
|
||||
|
||||
return citation
|
||||
|
||||
def main():
|
||||
"""Example usage."""
|
||||
import sys
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python verify_citations.py <markdown_file>")
|
||||
sys.exit(1)
|
||||
|
||||
filepath = sys.argv[1]
|
||||
verifier = CitationVerifier()
|
||||
|
||||
print(f"Verifying citations in: {filepath}")
|
||||
report = verifier.verify_citations_in_file(filepath)
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("CITATION VERIFICATION REPORT")
|
||||
print("="*60)
|
||||
print(f"\nTotal DOIs found: {report['total_dois']}")
|
||||
print(f"Verified: {len(report['verified'])}")
|
||||
print(f"Failed: {len(report['failed'])}")
|
||||
|
||||
if report['failed']:
|
||||
print("\nFailed DOIs:")
|
||||
for doi in report['failed']:
|
||||
print(f" - {doi}")
|
||||
|
||||
if report['metadata']:
|
||||
print("\n\nVerified Citations (APA format):")
|
||||
for doi, metadata in report['metadata'].items():
|
||||
citation = verifier.format_citation_apa(metadata)
|
||||
print(f"\n{citation}")
|
||||
|
||||
# Save detailed report
|
||||
output_file = filepath.replace('.md', '_citation_report.json')
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(report, f, indent=2)
|
||||
|
||||
print(f"\n\nDetailed report saved to: {output_file}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user