Files
2025-11-30 08:30:18 +08:00

205 lines
6.2 KiB
Python

#!/usr/bin/env python3
"""
DOI to BibTeX Converter
Quick utility to convert DOIs to BibTeX format using CrossRef API.
"""
import sys
import requests
import argparse
import time
import json
from typing import Optional, List
class DOIConverter:
"""Convert DOIs to BibTeX entries using CrossRef API."""
def __init__(self):
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'DOIConverter/1.0 (Citation Management Tool; mailto:support@example.com)'
})
def doi_to_bibtex(self, doi: str) -> Optional[str]:
"""
Convert a single DOI to BibTeX format.
Args:
doi: Digital Object Identifier
Returns:
BibTeX string or None if conversion fails
"""
# Clean DOI (remove URL prefix if present)
doi = doi.strip()
if doi.startswith('https://doi.org/'):
doi = doi.replace('https://doi.org/', '')
elif doi.startswith('http://doi.org/'):
doi = doi.replace('http://doi.org/', '')
elif doi.startswith('doi:'):
doi = doi.replace('doi:', '')
# Request BibTeX from CrossRef content negotiation
url = f'https://doi.org/{doi}'
headers = {
'Accept': 'application/x-bibtex',
'User-Agent': 'DOIConverter/1.0 (Citation Management Tool)'
}
try:
response = self.session.get(url, headers=headers, timeout=15)
if response.status_code == 200:
bibtex = response.text.strip()
# CrossRef sometimes returns entries with @data type, convert to @misc
if bibtex.startswith('@data{'):
bibtex = bibtex.replace('@data{', '@misc{', 1)
return bibtex
elif response.status_code == 404:
print(f'Error: DOI not found: {doi}', file=sys.stderr)
return None
else:
print(f'Error: Failed to retrieve BibTeX for {doi} (status {response.status_code})', file=sys.stderr)
return None
except requests.exceptions.Timeout:
print(f'Error: Request timeout for DOI: {doi}', file=sys.stderr)
return None
except requests.exceptions.RequestException as e:
print(f'Error: Request failed for {doi}: {e}', file=sys.stderr)
return None
def convert_multiple(self, dois: List[str], delay: float = 0.5) -> List[str]:
"""
Convert multiple DOIs to BibTeX.
Args:
dois: List of DOIs
delay: Delay between requests (seconds) for rate limiting
Returns:
List of BibTeX entries (excludes failed conversions)
"""
bibtex_entries = []
for i, doi in enumerate(dois):
print(f'Converting DOI {i+1}/{len(dois)}: {doi}', file=sys.stderr)
bibtex = self.doi_to_bibtex(doi)
if bibtex:
bibtex_entries.append(bibtex)
# Rate limiting
if i < len(dois) - 1: # Don't delay after last request
time.sleep(delay)
return bibtex_entries
def main():
"""Command-line interface."""
parser = argparse.ArgumentParser(
description='Convert DOIs to BibTeX format using CrossRef API',
epilog='Example: python doi_to_bibtex.py 10.1038/s41586-021-03819-2'
)
parser.add_argument(
'dois',
nargs='*',
help='DOI(s) to convert (can provide multiple)'
)
parser.add_argument(
'-i', '--input',
help='Input file with DOIs (one per line)'
)
parser.add_argument(
'-o', '--output',
help='Output file for BibTeX (default: stdout)'
)
parser.add_argument(
'--delay',
type=float,
default=0.5,
help='Delay between requests in seconds (default: 0.5)'
)
parser.add_argument(
'--format',
choices=['bibtex', 'json'],
default='bibtex',
help='Output format (default: bibtex)'
)
args = parser.parse_args()
# Collect DOIs from command line and/or file
dois = []
if args.dois:
dois.extend(args.dois)
if args.input:
try:
with open(args.input, 'r', encoding='utf-8') as f:
file_dois = [line.strip() for line in f if line.strip()]
dois.extend(file_dois)
except FileNotFoundError:
print(f'Error: Input file not found: {args.input}', file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f'Error reading input file: {e}', file=sys.stderr)
sys.exit(1)
if not dois:
parser.print_help()
sys.exit(1)
# Convert DOIs
converter = DOIConverter()
if len(dois) == 1:
bibtex = converter.doi_to_bibtex(dois[0])
if bibtex:
bibtex_entries = [bibtex]
else:
sys.exit(1)
else:
bibtex_entries = converter.convert_multiple(dois, delay=args.delay)
if not bibtex_entries:
print('Error: No successful conversions', file=sys.stderr)
sys.exit(1)
# Format output
if args.format == 'bibtex':
output = '\n\n'.join(bibtex_entries) + '\n'
else: # json
output = json.dumps({
'count': len(bibtex_entries),
'entries': bibtex_entries
}, indent=2)
# Write output
if args.output:
try:
with open(args.output, 'w', encoding='utf-8') as f:
f.write(output)
print(f'Successfully wrote {len(bibtex_entries)} entries to {args.output}', file=sys.stderr)
except Exception as e:
print(f'Error writing output file: {e}', file=sys.stderr)
sys.exit(1)
else:
print(output)
# Summary
if len(dois) > 1:
success_rate = len(bibtex_entries) / len(dois) * 100
print(f'\nConverted {len(bibtex_entries)}/{len(dois)} DOIs ({success_rate:.1f}%)', file=sys.stderr)
if __name__ == '__main__':
main()