Initial commit

2025-11-30 08:30:18 +08:00
commit 74bee324ab
335 changed files with 147377 additions and 0 deletions
--- a/skills/citation-management/SKILL.md
+++ b/skills/citation-management/SKILL.md
--- a/skills/citation-management/assets/bibtex_template.bib
+++ b/skills/citation-management/assets/bibtex_template.bib
@@ -0,0 +1,264 @@
+% BibTeX Template File
+% Examples of properly formatted entries for all common types
+
+% =============================================================================
+% JOURNAL ARTICLES
+% =============================================================================
+
+@article{Jumper2021,
+  author  = {Jumper, John and Evans, Richard and Pritzel, Alexander and Green, Tim and Figurnov, Michael and Ronneberger, Olaf and Tunyasuvunakool, Kathryn and Bates, Russ and {\v{Z}}{\'\i}dek, Augustin and Potapenko, Anna and others},
+  title   = {Highly Accurate Protein Structure Prediction with {AlphaFold}},
+  journal = {Nature},
+  year    = {2021},
+  volume  = {596},
+  number  = {7873},
+  pages   = {583--589},
+  doi     = {10.1038/s41586-021-03819-2}
+}
+
+@article{Watson1953,
+  author  = {Watson, James D. and Crick, Francis H. C.},
+  title   = {Molecular Structure of Nucleic Acids: A Structure for Deoxyribose Nucleic Acid},
+  journal = {Nature},
+  year    = {1953},
+  volume  = {171},
+  number  = {4356},
+  pages   = {737--738},
+  doi     = {10.1038/171737a0}
+}
+
+@article{Doudna2014,
+  author  = {Doudna, Jennifer A. and Charpentier, Emmanuelle},
+  title   = {The New Frontier of Genome Engineering with {CRISPR-Cas9}},
+  journal = {Science},
+  year    = {2014},
+  volume  = {346},
+  number  = {6213},
+  pages   = {1258096},
+  doi     = {10.1126/science.1258096}
+}
+
+% =============================================================================
+% BOOKS
+% =============================================================================
+
+@book{Kumar2021,
+  author    = {Kumar, Vinay and Abbas, Abul K. and Aster, Jon C.},
+  title     = {Robbins and Cotran Pathologic Basis of Disease},
+  publisher = {Elsevier},
+  year      = {2021},
+  edition   = {10},
+  address   = {Philadelphia, PA},
+  isbn      = {978-0-323-53113-9}
+}
+
+@book{Alberts2014,
+  author    = {Alberts, Bruce and Johnson, Alexander and Lewis, Julian and Morgan, David and Raff, Martin and Roberts, Keith and Walter, Peter},
+  title     = {Molecular Biology of the Cell},
+  publisher = {Garland Science},
+  year      = {2014},
+  edition   = {6},
+  address   = {New York, NY},
+  isbn      = {978-0-815-34432-2}
+}
+
+% Book with editor instead of author
+@book{Sambrook2001,
+  editor    = {Sambrook, Joseph and Russell, David W.},
+  title     = {Molecular Cloning: A Laboratory Manual},
+  publisher = {Cold Spring Harbor Laboratory Press},
+  year      = {2001},
+  edition   = {3},
+  address   = {Cold Spring Harbor, NY},
+  isbn      = {978-0-879-69576-7}
+}
+
+% =============================================================================
+% CONFERENCE PAPERS (PROCEEDINGS)
+% =============================================================================
+
+@inproceedings{Vaswani2017,
+  author    = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N. and Kaiser, {\L}ukasz and Polosukhin, Illia},
+  title     = {Attention is All You Need},
+  booktitle = {Advances in Neural Information Processing Systems 30 (NeurIPS 2017)},
+  year      = {2017},
+  pages     = {5998--6008},
+  address   = {Long Beach, CA},
+  url       = {https://proceedings.neurips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html}
+}
+
+@inproceedings{He2016,
+  author    = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+  title     = {Deep Residual Learning for Image Recognition},
+  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+  year      = {2016},
+  pages     = {770--778},
+  address   = {Las Vegas, NV},
+  doi       = {10.1109/CVPR.2016.90}
+}
+
+% =============================================================================
+% BOOK CHAPTERS
+% =============================================================================
+
+@incollection{Brown2020,
+  author    = {Brown, Peter O. and Botstein, David},
+  title     = {Exploring the New World of the Genome with {DNA} Microarrays},
+  booktitle = {DNA Microarrays: A Molecular Cloning Manual},
+  editor    = {Eisen, Michael B. and Brown, Patrick O.},
+  publisher = {Cold Spring Harbor Laboratory Press},
+  year      = {2020},
+  pages     = {1--45},
+  address   = {Cold Spring Harbor, NY}
+}
+
+% =============================================================================
+% PHD THESES / DISSERTATIONS
+% =============================================================================
+
+@phdthesis{Johnson2023,
+  author  = {Johnson, Mary L.},
+  title   = {Novel Approaches to Cancer Immunotherapy Using {CRISPR} Technology},
+  school  = {Stanford University},
+  year    = {2023},
+  type    = {{PhD} dissertation},
+  address = {Stanford, CA}
+}
+
+% =============================================================================
+% MASTER'S THESES
+% =============================================================================
+
+@mastersthesis{Smith2022,
+  author  = {Smith, Robert J.},
+  title   = {Machine Learning Methods for Protein Structure Prediction},
+  school  = {Massachusetts Institute of Technology},
+  year    = {2022},
+  type    = {{Master's} thesis},
+  address = {Cambridge, MA}
+}
+
+% =============================================================================
+% TECHNICAL REPORTS
+% =============================================================================
+
+@techreport{WHO2020,
+  author      = {{World Health Organization}},
+  title       = {Clinical Management of {COVID-19}: Interim Guidance},
+  institution = {World Health Organization},
+  year        = {2020},
+  type        = {Technical Report},
+  number      = {WHO/2019-nCoV/clinical/2020.5},
+  address     = {Geneva, Switzerland}
+}
+
+% =============================================================================
+% PREPRINTS
+% =============================================================================
+
+% bioRxiv preprint
+@misc{Zhang2024preprint,
+  author       = {Zhang, Yi and Chen, Li and Wang, Hui and Liu, Xin},
+  title        = {Novel Therapeutic Targets in {Alzheimer}'s Disease},
+  year         = {2024},
+  howpublished = {bioRxiv},
+  doi          = {10.1101/2024.01.15.575432},
+  note         = {Preprint}
+}
+
+% arXiv preprint
+@misc{Brown2024arxiv,
+  author       = {Brown, Alice and Green, Bob},
+  title        = {Advances in Quantum Computing},
+  year         = {2024},
+  howpublished = {arXiv},
+  note         = {arXiv:2401.12345}
+}
+
+% =============================================================================
+% DATASETS
+% =============================================================================
+
+@misc{AlphaFoldDB2021,
+  author       = {{DeepMind} and {EMBL-EBI}},
+  title        = {{AlphaFold} Protein Structure Database},
+  year         = {2021},
+  howpublished = {Database},
+  url          = {https://alphafold.ebi.ac.uk/},
+  doi          = {10.1093/nar/gkab1061},
+  note         = {Version 4}
+}
+
+% =============================================================================
+% SOFTWARE / CODE
+% =============================================================================
+
+@misc{McKinney2010pandas,
+  author       = {McKinney, Wes},
+  title        = {pandas: A Foundational {Python} Library for Data Analysis and Statistics},
+  year         = {2010},
+  howpublished = {Software},
+  url          = {https://pandas.pydata.org/},
+  note         = {Python Data Analysis Library}
+}
+
+% =============================================================================
+% WEBSITES / ONLINE RESOURCES
+% =============================================================================
+
+@misc{NCBI2024,
+  author       = {{National Center for Biotechnology Information}},
+  title        = {{PubMed}: Database of Biomedical Literature},
+  year         = {2024},
+  howpublished = {Website},
+  url          = {https://pubmed.ncbi.nlm.nih.gov/},
+  note         = {Accessed: 2024-01-15}
+}
+
+% =============================================================================
+% SPECIAL CASES
+% =============================================================================
+
+% Article with organization as author
+@article{NatureEditorial2023,
+  author  = {{Nature Editorial Board}},
+  title   = {The Future of {AI} in Scientific Research},
+  journal = {Nature},
+  year    = {2023},
+  volume  = {615},
+  pages   = {1--2},
+  doi     = {10.1038/d41586-023-00001-1}
+}
+
+% Article with no volume number (some journals)
+@article{OpenAccess2024,
+  author  = {Williams, Sarah and Thomas, Michael},
+  title   = {Open Access Publishing in the 21st Century},
+  journal = {Journal of Scholarly Communication},
+  year    = {2024},
+  pages   = {e123456},
+  doi     = {10.1234/jsc.2024.123456}
+}
+
+% Conference paper with DOI
+@inproceedings{Garcia2023,
+  author    = {Garc{\'i}a-Mart{\'i}nez, Jos{\'e} and M{\"u}ller, Hans},
+  title     = {International Collaboration in Science},
+  booktitle = {Proceedings of the International Conference on Academic Publishing},
+  year      = {2023},
+  pages     = {45--52},
+  doi       = {10.1109/ICAP.2023.123456}
+}
+
+% Article with PMID but no DOI (older papers)
+@article{OldPaper1995,
+  author  = {Anderson, Philip W.},
+  title   = {Through the Glass Lightly},
+  journal = {Science},
+  year    = {1995},
+  volume  = {267},
+  number  = {5204},
+  pages   = {1615--1616},
+  note    = {PMID: 17808148}
+}
+
--- a/skills/citation-management/assets/citation_checklist.md
+++ b/skills/citation-management/assets/citation_checklist.md
@@ -0,0 +1,386 @@
+# Citation Quality Checklist
+
+Use this checklist to ensure your citations are accurate, complete, and properly formatted before final submission.
+
+## Pre-Submission Checklist
+
+### ✓ Metadata Accuracy
+
+- [ ] All author names are correct and properly formatted
+- [ ] Article titles match the actual publication
+- [ ] Journal/conference names are complete (not abbreviated unless required)
+- [ ] Publication years are accurate
+- [ ] Volume and issue numbers are correct
+- [ ] Page ranges are accurate
+
+### ✓ Required Fields
+
+- [ ] All @article entries have: author, title, journal, year
+- [ ] All @book entries have: author/editor, title, publisher, year
+- [ ] All @inproceedings entries have: author, title, booktitle, year
+- [ ] Modern papers (2000+) include DOI when available
+- [ ] All entries have unique citation keys
+
+### ✓ DOI Verification
+
+- [ ] All DOIs are properly formatted (10.XXXX/...)
+- [ ] DOIs resolve correctly to the article
+- [ ] No DOI prefix in the BibTeX field (no "doi:" or "https://doi.org/")
+- [ ] Metadata from CrossRef matches your BibTeX entry
+- [ ] Run: `python scripts/validate_citations.py references.bib --check-dois`
+
+### ✓ Formatting Consistency
+
+- [ ] Page ranges use double hyphen (--) not single (-)
+- [ ] No "pp." prefix in pages field
+- [ ] Author names use "and" separator (not semicolon or ampersand)
+- [ ] Capitalization protected in titles ({AlphaFold}, {CRISPR}, etc.)
+- [ ] Month names use standard abbreviations if included
+- [ ] Citation keys follow consistent format
+
+### ✓ Duplicate Detection
+
+- [ ] No duplicate DOIs in bibliography
+- [ ] No duplicate citation keys
+- [ ] No near-duplicate titles
+- [ ] Preprints updated to published versions when available
+- [ ] Run: `python scripts/validate_citations.py references.bib`
+
+### ✓ Special Characters
+
+- [ ] Accented characters properly formatted (e.g., {\"u} for ü)
+- [ ] Mathematical symbols use LaTeX commands
+- [ ] Chemical formulas properly formatted
+- [ ] No unescaped special characters (%, &, $, #, etc.)
+
+### ✓ BibTeX Syntax
+
+- [ ] All entries have balanced braces {}
+- [ ] Fields separated by commas
+- [ ] No comma after last field in each entry
+- [ ] Valid entry types (@article, @book, etc.)
+- [ ] Run: `python scripts/validate_citations.py references.bib`
+
+### ✓ File Organization
+
+- [ ] Bibliography sorted in logical order (by year, author, or key)
+- [ ] Consistent formatting throughout
+- [ ] No formatting inconsistencies between entries
+- [ ] Run: `python scripts/format_bibtex.py references.bib --sort year`
+
+## Automated Validation
+
+### Step 1: Format and Clean
+
+```bash
+python scripts/format_bibtex.py references.bib \
+  --deduplicate \
+  --sort year \
+  --descending \
+  --output clean_references.bib
+```
+
+**What this does**:
+- Removes duplicates
+- Standardizes formatting
+- Fixes common issues (page ranges, DOI format, etc.)
+- Sorts by year (newest first)
+
+### Step 2: Validate
+
+```bash
+python scripts/validate_citations.py clean_references.bib \
+  --check-dois \
+  --report validation_report.json \
+  --verbose
+```
+
+**What this does**:
+- Checks required fields
+- Verifies DOIs resolve
+- Detects duplicates
+- Validates syntax
+- Generates detailed report
+
+### Step 3: Review Report
+
+```bash
+cat validation_report.json
+```
+
+**Address any**:
+- **Errors**: Must fix (missing fields, broken DOIs, syntax errors)
+- **Warnings**: Should fix (missing recommended fields, formatting issues)
+- **Duplicates**: Remove or consolidate
+
+### Step 4: Final Check
+
+```bash
+python scripts/validate_citations.py clean_references.bib --verbose
+```
+
+**Goal**: Zero errors, minimal warnings
+
+## Manual Review Checklist
+
+### Critical Citations (Top 10-20 Most Important)
+
+For your most important citations, manually verify:
+
+- [ ] Visit DOI link and confirm it's the correct article
+- [ ] Check author names against the actual publication
+- [ ] Verify year matches publication date
+- [ ] Confirm journal/conference name is correct
+- [ ] Check that volume/pages match
+
+### Common Issues to Watch For
+
+**Missing Information**:
+- [ ] No DOI for papers published after 2000
+- [ ] Missing volume or page numbers for journal articles
+- [ ] Missing publisher for books
+- [ ] Missing conference location for proceedings
+
+**Formatting Errors**:
+- [ ] Single hyphen in page ranges (123-145 → 123--145)
+- [ ] Ampersands in author lists (Smith & Jones → Smith and Jones)
+- [ ] Unprotected acronyms in titles (DNA → {DNA})
+- [ ] DOI includes URL prefix (https://doi.org/10.xxx → 10.xxx)
+
+**Metadata Mismatches**:
+- [ ] Author names differ from publication
+- [ ] Year is online-first instead of print publication
+- [ ] Journal name abbreviated when it should be full
+- [ ] Volume/issue numbers swapped
+
+**Duplicates**:
+- [ ] Same paper cited with different citation keys
+- [ ] Preprint and published version both cited
+- [ ] Conference paper and journal version both cited
+
+## Field-Specific Checks
+
+### Biomedical Sciences
+
+- [ ] PubMed Central ID (PMCID) included when available
+- [ ] MeSH terms appropriate (if using)
+- [ ] Clinical trial registration number included (if applicable)
+- [ ] All references to treatments/drugs accurately cited
+
+### Computer Science
+
+- [ ] arXiv ID included for preprints
+- [ ] Conference proceedings properly cited (not just "NeurIPS")
+- [ ] Software/dataset citations include version numbers
+- [ ] GitHub links stable and permanent
+
+### General Sciences
+
+- [ ] Data availability statements properly cited
+- [ ] Retracted papers identified and removed
+- [ ] Preprints checked for published versions
+- [ ] Supplementary materials referenced if critical
+
+## Final Pre-Submission Steps
+
+### 1 Week Before Submission
+
+- [ ] Run full validation with DOI checking
+- [ ] Fix all errors and critical warnings
+- [ ] Manually verify top 10-20 most important citations
+- [ ] Check for any retracted papers
+
+### 3 Days Before Submission
+
+- [ ] Re-run validation after any manual edits
+- [ ] Ensure all in-text citations have corresponding bibliography entries
+- [ ] Ensure all bibliography entries are cited in text
+- [ ] Check citation style matches journal requirements
+
+### 1 Day Before Submission
+
+- [ ] Final validation check
+- [ ] LaTeX compilation successful with no warnings
+- [ ] PDF renders all citations correctly
+- [ ] Bibliography appears in correct format
+- [ ] No placeholder citations (Smith et al. XXXX)
+
+### Submission Day
+
+- [ ] One final validation run
+- [ ] No last-minute edits without re-validation
+- [ ] Bibliography file included in submission package
+- [ ] Figures/tables referenced in text match bibliography
+
+## Quality Metrics
+
+### Excellent Bibliography
+
+- ✓ 100% of entries have DOIs (for modern papers)
+- ✓ Zero validation errors
+- ✓ Zero missing required fields
+- ✓ Zero broken DOIs
+- ✓ Zero duplicates
+- ✓ Consistent formatting throughout
+- ✓ All citations manually spot-checked
+
+### Acceptable Bibliography
+
+- ✓ 90%+ of modern entries have DOIs
+- ✓ Zero high-severity errors
+- ✓ Minor warnings only (e.g., missing recommended fields)
+- ✓ Key citations manually verified
+- ✓ Compilation succeeds without errors
+
+### Needs Improvement
+
+- ✗ Missing DOIs for recent papers
+- ✗ High-severity validation errors
+- ✗ Broken or incorrect DOIs
+- ✗ Duplicate entries
+- ✗ Inconsistent formatting
+- ✗ Compilation warnings or errors
+
+## Emergency Fixes
+
+If you discover issues at the last minute:
+
+### Broken DOI
+
+```bash
+# Find correct DOI
+# Option 1: Search CrossRef
+# https://www.crossref.org/
+
+# Option 2: Search on publisher website
+# Option 3: Google Scholar
+
+# Re-extract metadata
+python scripts/extract_metadata.py --doi CORRECT_DOI
+```
+
+### Missing Information
+
+```bash
+# Extract from DOI
+python scripts/extract_metadata.py --doi 10.xxxx/yyyy
+
+# Or from PMID (biomedical)
+python scripts/extract_metadata.py --pmid 12345678
+
+# Or from arXiv
+python scripts/extract_metadata.py --arxiv 2103.12345
+```
+
+### Duplicate Entries
+
+```bash
+# Auto-remove duplicates
+python scripts/format_bibtex.py references.bib \
+  --deduplicate \
+  --output fixed_references.bib
+```
+
+### Formatting Errors
+
+```bash
+# Auto-fix common issues
+python scripts/format_bibtex.py references.bib \
+  --output fixed_references.bib
+
+# Then validate
+python scripts/validate_citations.py fixed_references.bib
+```
+
+## Long-Term Best Practices
+
+### During Research
+
+- [ ] Add citations to bibliography file as you find them
+- [ ] Extract metadata immediately using DOI
+- [ ] Validate after every 10-20 additions
+- [ ] Keep bibliography file under version control
+
+### During Writing
+
+- [ ] Cite as you write
+- [ ] Use consistent citation keys
+- [ ] Don't delay adding references
+- [ ] Validate weekly
+
+### Before Submission
+
+- [ ] Allow 2-3 days for citation cleanup
+- [ ] Don't wait until the last day
+- [ ] Automate what you can
+- [ ] Manually verify critical citations
+
+## Tool Quick Reference
+
+### Extract Metadata
+
+```bash
+# From DOI
+python scripts/doi_to_bibtex.py 10.1038/nature12345
+
+# From multiple sources
+python scripts/extract_metadata.py \
+  --doi 10.1038/nature12345 \
+  --pmid 12345678 \
+  --arxiv 2103.12345 \
+  --output references.bib
+```
+
+### Validate
+
+```bash
+# Basic validation
+python scripts/validate_citations.py references.bib
+
+# With DOI checking (slow but thorough)
+python scripts/validate_citations.py references.bib --check-dois
+
+# Generate report
+python scripts/validate_citations.py references.bib \
+  --report validation.json \
+  --verbose
+```
+
+### Format and Clean
+
+```bash
+# Format and fix issues
+python scripts/format_bibtex.py references.bib
+
+# Remove duplicates and sort
+python scripts/format_bibtex.py references.bib \
+  --deduplicate \
+  --sort year \
+  --descending \
+  --output clean_refs.bib
+```
+
+## Summary
+
+**Minimum Requirements**:
+1. Run `format_bibtex.py --deduplicate`
+2. Run `validate_citations.py`
+3. Fix all errors
+4. Compile successfully
+
+**Recommended**:
+1. Format, deduplicate, and sort
+2. Validate with `--check-dois`
+3. Fix all errors and warnings
+4. Manually verify top citations
+5. Re-validate after fixes
+
+**Best Practice**:
+1. Validate throughout research process
+2. Use automated tools consistently
+3. Keep bibliography clean and organized
+4. Document any special cases
+5. Final validation 1-3 days before submission
+
+**Remember**: Citation errors reflect poorly on your scholarship. Taking time to ensure accuracy is worthwhile!
+
--- a/skills/citation-management/references/bibtex_formatting.md
+++ b/skills/citation-management/references/bibtex_formatting.md
@@ -0,0 +1,908 @@
+# BibTeX Formatting Guide
+
+Comprehensive guide to BibTeX entry types, required fields, formatting conventions, and best practices.
+
+## Overview
+
+BibTeX is the standard bibliography format for LaTeX documents. Proper formatting ensures:
+- Correct citation rendering
+- Consistent formatting
+- Compatibility with citation styles
+- No compilation errors
+
+This guide covers all common entry types and formatting rules.
+
+## Entry Types
+
+### @article - Journal Articles
+
+**Most common entry type** for peer-reviewed journal articles.
+
+**Required fields**:
+- `author`: Author names
+- `title`: Article title
+- `journal`: Journal name
+- `year`: Publication year
+
+**Optional fields**:
+- `volume`: Volume number
+- `number`: Issue number
+- `pages`: Page range
+- `month`: Publication month
+- `doi`: Digital Object Identifier
+- `url`: URL
+- `note`: Additional notes
+
+**Template**:
+```bibtex
+@article{CitationKey2024,
+  author  = {Last1, First1 and Last2, First2},
+  title   = {Article Title Here},
+  journal = {Journal Name},
+  year    = {2024},
+  volume  = {10},
+  number  = {3},
+  pages   = {123--145},
+  doi     = {10.1234/journal.2024.123456},
+  month   = jan
+}
+```
+
+**Example**:
+```bibtex
+@article{Jumper2021,
+  author  = {Jumper, John and Evans, Richard and Pritzel, Alexander and others},
+  title   = {Highly Accurate Protein Structure Prediction with {AlphaFold}},
+  journal = {Nature},
+  year    = {2021},
+  volume  = {596},
+  number  = {7873},
+  pages   = {583--589},
+  doi     = {10.1038/s41586-021-03819-2}
+}
+```
+
+### @book - Books
+
+**For entire books**.
+
+**Required fields**:
+- `author` OR `editor`: Author(s) or editor(s)
+- `title`: Book title
+- `publisher`: Publisher name
+- `year`: Publication year
+
+**Optional fields**:
+- `volume`: Volume number (if multi-volume)
+- `series`: Series name
+- `address`: Publisher location
+- `edition`: Edition number
+- `isbn`: ISBN
+- `url`: URL
+
+**Template**:
+```bibtex
+@book{CitationKey2024,
+  author    = {Last, First},
+  title     = {Book Title},
+  publisher = {Publisher Name},
+  year      = {2024},
+  edition   = {3},
+  address   = {City, Country},
+  isbn      = {978-0-123-45678-9}
+}
+```
+
+**Example**:
+```bibtex
+@book{Kumar2021,
+  author    = {Kumar, Vinay and Abbas, Abul K. and Aster, Jon C.},
+  title     = {Robbins and Cotran Pathologic Basis of Disease},
+  publisher = {Elsevier},
+  year      = {2021},
+  edition   = {10},
+  address   = {Philadelphia, PA},
+  isbn      = {978-0-323-53113-9}
+}
+```
+
+### @inproceedings - Conference Papers
+
+**For papers in conference proceedings**.
+
+**Required fields**:
+- `author`: Author names
+- `title`: Paper title
+- `booktitle`: Conference/proceedings name
+- `year`: Year
+
+**Optional fields**:
+- `editor`: Proceedings editor(s)
+- `volume`: Volume number
+- `series`: Series name
+- `pages`: Page range
+- `address`: Conference location
+- `month`: Conference month
+- `organization`: Organizing body
+- `publisher`: Publisher
+- `doi`: DOI
+
+**Template**:
+```bibtex
+@inproceedings{CitationKey2024,
+  author    = {Last, First},
+  title     = {Paper Title},
+  booktitle = {Proceedings of Conference Name},
+  year      = {2024},
+  pages     = {123--145},
+  address   = {City, Country},
+  month     = jun
+}
+```
+
+**Example**:
+```bibtex
+@inproceedings{Vaswani2017,
+  author    = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and others},
+  title     = {Attention is All You Need},
+  booktitle = {Advances in Neural Information Processing Systems 30 (NeurIPS 2017)},
+  year      = {2017},
+  pages     = {5998--6008},
+  address   = {Long Beach, CA}
+}
+```
+
+**Note**: `@conference` is an alias for `@inproceedings`.
+
+### @incollection - Book Chapters
+
+**For chapters in edited books**.
+
+**Required fields**:
+- `author`: Chapter author(s)
+- `title`: Chapter title
+- `booktitle`: Book title
+- `publisher`: Publisher name
+- `year`: Publication year
+
+**Optional fields**:
+- `editor`: Book editor(s)
+- `volume`: Volume number
+- `series`: Series name
+- `type`: Type of section (e.g., "chapter")
+- `chapter`: Chapter number
+- `pages`: Page range
+- `address`: Publisher location
+- `edition`: Edition
+- `month`: Month
+
+**Template**:
+```bibtex
+@incollection{CitationKey2024,
+  author    = {Last, First},
+  title     = {Chapter Title},
+  booktitle = {Book Title},
+  editor    = {Editor, Last and Editor2, Last},
+  publisher = {Publisher Name},
+  year      = {2024},
+  pages     = {123--145},
+  chapter   = {5}
+}
+```
+
+**Example**:
+```bibtex
+@incollection{Brown2020,
+  author    = {Brown, Peter O. and Botstein, David},
+  title     = {Exploring the New World of the Genome with {DNA} Microarrays},
+  booktitle = {DNA Microarrays: A Molecular Cloning Manual},
+  editor    = {Eisen, Michael B. and Brown, Patrick O.},
+  publisher = {Cold Spring Harbor Laboratory Press},
+  year      = {2020},
+  pages     = {1--45},
+  address   = {Cold Spring Harbor, NY}
+}
+```
+
+### @phdthesis - Doctoral Dissertations
+
+**For PhD dissertations and theses**.
+
+**Required fields**:
+- `author`: Author name
+- `title`: Thesis title
+- `school`: Institution
+- `year`: Year
+
+**Optional fields**:
+- `type`: Type (e.g., "PhD dissertation", "PhD thesis")
+- `address`: Institution location
+- `month`: Month
+- `url`: URL
+- `note`: Additional notes
+
+**Template**:
+```bibtex
+@phdthesis{CitationKey2024,
+  author = {Last, First},
+  title  = {Dissertation Title},
+  school = {University Name},
+  year   = {2024},
+  type   = {{PhD} dissertation},
+  address = {City, State}
+}
+```
+
+**Example**:
+```bibtex
+@phdthesis{Johnson2023,
+  author  = {Johnson, Mary L.},
+  title   = {Novel Approaches to Cancer Immunotherapy Using {CRISPR} Technology},
+  school  = {Stanford University},
+  year    = {2023},
+  type    = {{PhD} dissertation},
+  address = {Stanford, CA}
+}
+```
+
+**Note**: `@mastersthesis` is similar but for Master's theses.
+
+### @mastersthesis - Master's Theses
+
+**For Master's theses**.
+
+**Required fields**:
+- `author`: Author name
+- `title`: Thesis title
+- `school`: Institution
+- `year`: Year
+
+**Template**:
+```bibtex
+@mastersthesis{CitationKey2024,
+  author = {Last, First},
+  title  = {Thesis Title},
+  school = {University Name},
+  year   = {2024}
+}
+```
+
+### @misc - Miscellaneous
+
+**For items that don't fit other categories** (preprints, datasets, software, websites, etc.).
+
+**Required fields**:
+- `author` (if known)
+- `title`
+- `year`
+
+**Optional fields**:
+- `howpublished`: Repository, website, format
+- `url`: URL
+- `doi`: DOI
+- `note`: Additional information
+- `month`: Month
+
+**Template for preprints**:
+```bibtex
+@misc{CitationKey2024,
+  author       = {Last, First},
+  title        = {Preprint Title},
+  year         = {2024},
+  howpublished = {bioRxiv},
+  doi          = {10.1101/2024.01.01.123456},
+  note         = {Preprint}
+}
+```
+
+**Template for datasets**:
+```bibtex
+@misc{DatasetName2024,
+  author       = {Last, First},
+  title        = {Dataset Title},
+  year         = {2024},
+  howpublished = {Zenodo},
+  doi          = {10.5281/zenodo.123456},
+  note         = {Version 1.2}
+}
+```
+
+**Template for software**:
+```bibtex
+@misc{SoftwareName2024,
+  author       = {Last, First},
+  title        = {Software Name},
+  year         = {2024},
+  howpublished = {GitHub},
+  url          = {https://github.com/user/repo},
+  note         = {Version 2.0}
+}
+```
+
+### @techreport - Technical Reports
+
+**For technical reports**.
+
+**Required fields**:
+- `author`: Author name(s)
+- `title`: Report title
+- `institution`: Institution
+- `year`: Year
+
+**Optional fields**:
+- `type`: Type of report
+- `number`: Report number
+- `address`: Institution location
+- `month`: Month
+
+**Template**:
+```bibtex
+@techreport{CitationKey2024,
+  author      = {Last, First},
+  title       = {Report Title},
+  institution = {Institution Name},
+  year        = {2024},
+  type        = {Technical Report},
+  number      = {TR-2024-01}
+}
+```
+
+### @unpublished - Unpublished Work
+
+**For unpublished works** (not preprints - use @misc for those).
+
+**Required fields**:
+- `author`: Author name(s)
+- `title`: Work title
+- `note`: Description
+
+**Optional fields**:
+- `month`: Month
+- `year`: Year
+
+**Template**:
+```bibtex
+@unpublished{CitationKey2024,
+  author = {Last, First},
+  title  = {Work Title},
+  note   = {Unpublished manuscript},
+  year   = {2024}
+}
+```
+
+### @online/@electronic - Online Resources
+
+**For web pages and online-only content**.
+
+**Note**: Not standard BibTeX, but supported by many bibliography packages (biblatex).
+
+**Required fields**:
+- `author` OR `organization`
+- `title`
+- `url`
+- `year`
+
+**Template**:
+```bibtex
+@online{CitationKey2024,
+  author = {{Organization Name}},
+  title  = {Page Title},
+  url    = {https://example.com/page},
+  year   = {2024},
+  note   = {Accessed: 2024-01-15}
+}
+```
+
+## Formatting Rules
+
+### Citation Keys
+
+**Convention**: `FirstAuthorYEARkeyword`
+
+**Examples**:
+```bibtex
+Smith2024protein
+Doe2023machine
+JohnsonWilliams2024cancer  % Multiple authors, no space
+NatureEditorial2024        % No author, use publication
+WHO2024guidelines          % Organization author
+```
+
+**Rules**:
+- Alphanumeric plus: `-`, `_`, `.`, `:`
+- No spaces
+- Case-sensitive
+- Unique within file
+- Descriptive
+
+**Avoid**:
+- Special characters: `@`, `#`, `&`, `%`, `$`
+- Spaces: use CamelCase or underscores
+- Starting with numbers: `2024Smith` (some systems disallow)
+
+### Author Names
+
+**Recommended format**: `Last, First Middle`
+
+**Single author**:
+```bibtex
+author = {Smith, John}
+author = {Smith, John A.}
+author = {Smith, John Andrew}
+```
+
+**Multiple authors** - separate with `and`:
+```bibtex
+author = {Smith, John and Doe, Jane}
+author = {Smith, John A. and Doe, Jane M. and Johnson, Mary L.}
+```
+
+**Many authors** (10+):
+```bibtex
+author = {Smith, John and Doe, Jane and Johnson, Mary and others}
+```
+
+**Special cases**:
+```bibtex
+% Suffix (Jr., III, etc.)
+author = {King, Jr., Martin Luther}
+
+% Organization as author
+author = {{World Health Organization}}
+% Note: Double braces keep as single entity
+
+% Multiple surnames
+author = {Garc{\'i}a-Mart{\'i}nez, Jos{\'e}}
+
+% Particles (van, von, de, etc.)
+author = {van der Waals, Johannes}
+author = {de Broglie, Louis}
+```
+
+**Wrong formats** (don't use):
+```bibtex
+author = {Smith, J.; Doe, J.}  % Semicolons (wrong)
+author = {Smith, J., Doe, J.}  % Commas (wrong)
+author = {Smith, J. & Doe, J.} % Ampersand (wrong)
+author = {Smith J}             % No comma
+```
+
+### Title Capitalization
+
+**Protect capitalization** with braces:
+
+```bibtex
+% Proper nouns, acronyms, formulas
+title = {{AlphaFold}: Protein Structure Prediction}
+title = {Machine Learning for {DNA} Sequencing}
+title = {The {Ising} Model in Statistical Physics}
+title = {{CRISPR-Cas9} Gene Editing Technology}
+```
+
+**Reason**: Citation styles may change capitalization. Braces protect.
+
+**Examples**:
+```bibtex
+% Good
+title = {Advances in {COVID-19} Treatment}
+title = {Using {Python} for Data Analysis}
+title = {The {AlphaFold} Protein Structure Database}
+
+% Will be lowercase in title case styles
+title = {Advances in COVID-19 Treatment}  % covid-19
+title = {Using Python for Data Analysis}  % python
+```
+
+**Whole title protection** (rarely needed):
+```bibtex
+title = {{This Entire Title Keeps Its Capitalization}}
+```
+
+### Page Ranges
+
+**Use en-dash** (double hyphen `--`):
+
+```bibtex
+pages = {123--145}     % Correct
+pages = {1234--1256}   % Correct
+pages = {e0123456}     % Article ID (PLOS, etc.)
+pages = {123}          % Single page
+```
+
+**Wrong**:
+```bibtex
+pages = {123-145}      % Single hyphen (don't use)
+pages = {pp. 123-145}  % "pp." not needed
+pages = {123–145}      % Unicode en-dash (may cause issues)
+```
+
+### Month Names
+
+**Use three-letter abbreviations** (unquoted):
+
+```bibtex
+month = jan
+month = feb
+month = mar
+month = apr
+month = may
+month = jun
+month = jul
+month = aug
+month = sep
+month = oct
+month = nov
+month = dec
+```
+
+**Or numeric**:
+```bibtex
+month = {1}   % January
+month = {12}  % December
+```
+
+**Or full name in braces**:
+```bibtex
+month = {January}
+```
+
+**Standard abbreviations work without quotes** because they're defined in BibTeX.
+
+### Journal Names
+
+**Full name** (not abbreviated):
+
+```bibtex
+journal = {Nature}
+journal = {Science}
+journal = {Cell}
+journal = {Proceedings of the National Academy of Sciences}
+journal = {Journal of the American Chemical Society}
+```
+
+**Bibliography style** will handle abbreviation if needed.
+
+**Avoid manual abbreviation**:
+```bibtex
+% Don't do this in BibTeX file
+journal = {Proc. Natl. Acad. Sci. U.S.A.}
+
+% Do this instead
+journal = {Proceedings of the National Academy of Sciences}
+```
+
+**Exception**: If style requires abbreviations, use full abbreviated form:
+```bibtex
+journal = {Proc. Natl. Acad. Sci. U.S.A.}  % If required by style
+```
+
+### DOI Formatting
+
+**URL format** (preferred):
+
+```bibtex
+doi = {10.1038/s41586-021-03819-2}
+```
+
+**Not**:
+```bibtex
+doi = {https://doi.org/10.1038/s41586-021-03819-2}  % Don't include URL
+doi = {doi:10.1038/s41586-021-03819-2}              % Don't include prefix
+```
+
+**LaTeX** will format as URL automatically.
+
+**Note**: No period after DOI field!
+
+### URL Formatting
+
+```bibtex
+url = {https://www.example.com/article}
+```
+
+**Use**:
+- When DOI not available
+- For web pages
+- For supplementary materials
+
+**Don't duplicate**:
+```bibtex
+% Don't include both if DOI URL is same as url
+doi = {10.1038/nature12345}
+url = {https://doi.org/10.1038/nature12345}  % Redundant!
+```
+
+### Special Characters
+
+**Accents and diacritics**:
+```bibtex
+author = {M{\"u}ller, Hans}        % ü
+author = {Garc{\'i}a, Jos{\'e}}    % í, é
+author = {Erd{\H{o}}s, Paul}       % ő
+author = {Schr{\"o}dinger, Erwin}  % ö
+```
+
+**Or use UTF-8** (with proper LaTeX setup):
+```bibtex
+author = {Müller, Hans}
+author = {García, José}
+```
+
+**Mathematical symbols**:
+```bibtex
+title = {The $\alpha$-helix Structure}
+title = {$\beta$-sheet Prediction}
+```
+
+**Chemical formulas**:
+```bibtex
+title = {H$_2$O Molecular Dynamics}
+% Or with chemformula package:
+title = {\ce{H2O} Molecular Dynamics}
+```
+
+### Field Order
+
+**Recommended order** (for readability):
+
+```bibtex
+@article{Key,
+  author  = {},
+  title   = {},
+  journal = {},
+  year    = {},
+  volume  = {},
+  number  = {},
+  pages   = {},
+  doi     = {},
+  url     = {},
+  note    = {}
+}
+```
+
+**Rules**:
+- Most important fields first
+- Consistent across entries
+- Use formatter to standardize
+
+## Best Practices
+
+### 1. Consistent Formatting
+
+Use same format throughout:
+- Author name format
+- Title capitalization
+- Journal names
+- Citation key style
+
+### 2. Required Fields
+
+Always include:
+- All required fields for entry type
+- DOI for modern papers (2000+)
+- Volume and pages for articles
+- Publisher for books
+
+### 3. Protect Capitalization
+
+Use braces for:
+- Proper nouns: `{AlphaFold}`
+- Acronyms: `{DNA}`, `{CRISPR}`
+- Formulas: `{H2O}`
+- Names: `{Python}`, `{R}`
+
+### 4. Complete Author Lists
+
+Include all authors when possible:
+- All authors if <10
+- Use "and others" for 10+
+- Don't abbreviate to "et al." manually
+
+### 5. Use Standard Entry Types
+
+Choose correct entry type:
+- Journal article → `@article`
+- Book → `@book`
+- Conference paper → `@inproceedings`
+- Preprint → `@misc`
+
+### 6. Validate Syntax
+
+Check for:
+- Balanced braces
+- Commas after fields
+- Unique citation keys
+- Valid entry types
+
+### 7. Use Formatters
+
+Use automated tools:
+```bash
+python scripts/format_bibtex.py references.bib
+```
+
+Benefits:
+- Consistent formatting
+- Catch syntax errors
+- Standardize field order
+- Fix common issues
+
+## Common Mistakes
+
+### 1. Wrong Author Separator
+
+**Wrong**:
+```bibtex
+author = {Smith, J.; Doe, J.}    % Semicolon
+author = {Smith, J., Doe, J.}    % Comma
+author = {Smith, J. & Doe, J.}   % Ampersand
+```
+
+**Correct**:
+```bibtex
+author = {Smith, John and Doe, Jane}
+```
+
+### 2. Missing Commas
+
+**Wrong**:
+```bibtex
+@article{Smith2024,
+  author = {Smith, John}    % Missing comma!
+  title = {Title}
+}
+```
+
+**Correct**:
+```bibtex
+@article{Smith2024,
+  author = {Smith, John},   % Comma after each field
+  title = {Title}
+}
+```
+
+### 3. Unprotected Capitalization
+
+**Wrong**:
+```bibtex
+title = {Machine Learning with Python}
+% "Python" will become "python" in title case
+```
+
+**Correct**:
+```bibtex
+title = {Machine Learning with {Python}}
+```
+
+### 4. Single Hyphen in Pages
+
+**Wrong**:
+```bibtex
+pages = {123-145}   % Single hyphen
+```
+
+**Correct**:
+```bibtex
+pages = {123--145}  % Double hyphen (en-dash)
+```
+
+### 5. Redundant "pp." in Pages
+
+**Wrong**:
+```bibtex
+pages = {pp. 123--145}
+```
+
+**Correct**:
+```bibtex
+pages = {123--145}
+```
+
+### 6. DOI with URL Prefix
+
+**Wrong**:
+```bibtex
+doi = {https://doi.org/10.1038/nature12345}
+doi = {doi:10.1038/nature12345}
+```
+
+**Correct**:
+```bibtex
+doi = {10.1038/nature12345}
+```
+
+## Example Complete Bibliography
+
+```bibtex
+% Journal article
+@article{Jumper2021,
+  author  = {Jumper, John and Evans, Richard and Pritzel, Alexander and others},
+  title   = {Highly Accurate Protein Structure Prediction with {AlphaFold}},
+  journal = {Nature},
+  year    = {2021},
+  volume  = {596},
+  number  = {7873},
+  pages   = {583--589},
+  doi     = {10.1038/s41586-021-03819-2}
+}
+
+% Book
+@book{Kumar2021,
+  author    = {Kumar, Vinay and Abbas, Abul K. and Aster, Jon C.},
+  title     = {Robbins and Cotran Pathologic Basis of Disease},
+  publisher = {Elsevier},
+  year      = {2021},
+  edition   = {10},
+  address   = {Philadelphia, PA},
+  isbn      = {978-0-323-53113-9}
+}
+
+% Conference paper
+@inproceedings{Vaswani2017,
+  author    = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and others},
+  title     = {Attention is All You Need},
+  booktitle = {Advances in Neural Information Processing Systems 30 (NeurIPS 2017)},
+  year      = {2017},
+  pages     = {5998--6008}
+}
+
+% Book chapter
+@incollection{Brown2020,
+  author    = {Brown, Peter O. and Botstein, David},
+  title     = {Exploring the New World of the Genome with {DNA} Microarrays},
+  booktitle = {DNA Microarrays: A Molecular Cloning Manual},
+  editor    = {Eisen, Michael B. and Brown, Patrick O.},
+  publisher = {Cold Spring Harbor Laboratory Press},
+  year      = {2020},
+  pages     = {1--45}
+}
+
+% PhD thesis
+@phdthesis{Johnson2023,
+  author  = {Johnson, Mary L.},
+  title   = {Novel Approaches to Cancer Immunotherapy},
+  school  = {Stanford University},
+  year    = {2023},
+  type    = {{PhD} dissertation}
+}
+
+% Preprint
+@misc{Zhang2024,
+  author       = {Zhang, Yi and Chen, Li and Wang, Hui},
+  title        = {Novel Therapeutic Targets in {Alzheimer}'s Disease},
+  year         = {2024},
+  howpublished = {bioRxiv},
+  doi          = {10.1101/2024.01.001},
+  note         = {Preprint}
+}
+
+% Dataset
+@misc{AlphaFoldDB2021,
+  author       = {{DeepMind} and {EMBL-EBI}},
+  title        = {{AlphaFold} Protein Structure Database},
+  year         = {2021},
+  howpublished = {Database},
+  url          = {https://alphafold.ebi.ac.uk/},
+  doi          = {10.1093/nar/gkab1061}
+}
+```
+
+## Summary
+
+BibTeX formatting essentials:
+
+✓ **Choose correct entry type** (@article, @book, etc.)  
+✓ **Include all required fields**  
+✓ **Use `and` for multiple authors**  
+✓ **Protect capitalization** with braces  
+✓ **Use `--` for page ranges**  
+✓ **Include DOI** for modern papers  
+✓ **Validate syntax** before compilation  
+
+Use formatting tools to ensure consistency:
+```bash
+python scripts/format_bibtex.py references.bib
+```
+
+Properly formatted BibTeX ensures correct, consistent citations across all bibliography styles!
+
--- a/skills/citation-management/references/citation_validation.md
+++ b/skills/citation-management/references/citation_validation.md
@@ -0,0 +1,794 @@
+# Citation Validation Guide
+
+Comprehensive guide to validating citation accuracy, completeness, and formatting in BibTeX files.
+
+## Overview
+
+Citation validation ensures:
+- All citations are accurate and complete
+- DOIs resolve correctly
+- Required fields are present
+- No duplicate entries
+- Proper formatting and syntax
+- Links are accessible
+
+Validation should be performed:
+- After extracting metadata
+- Before manuscript submission
+- After manual edits to BibTeX files
+- Periodically for maintained bibliographies
+
+## Validation Categories
+
+### 1. DOI Verification
+
+**Purpose**: Ensure DOIs are valid and resolve correctly.
+
+#### What to Check
+
+**DOI format**:
+```
+Valid:   10.1038/s41586-021-03819-2
+Valid:   10.1126/science.aam9317
+Invalid: 10.1038/invalid
+Invalid: doi:10.1038/... (should omit "doi:" prefix in BibTeX)
+```
+
+**DOI resolution**:
+- DOI should resolve via https://doi.org/
+- Should redirect to actual article
+- Should not return 404 or error
+
+**Metadata consistency**:
+- CrossRef metadata should match BibTeX
+- Author names should align
+- Title should match
+- Year should match
+
+#### How to Validate
+
+**Manual check**:
+1. Copy DOI from BibTeX
+2. Visit https://doi.org/10.1038/nature12345
+3. Verify it redirects to correct article
+4. Check metadata matches
+
+**Automated check** (recommended):
+```bash
+python scripts/validate_citations.py references.bib --check-dois
+```
+
+**Process**:
+1. Extract all DOIs from BibTeX file
+2. Query doi.org resolver for each
+3. Query CrossRef API for metadata
+4. Compare metadata with BibTeX entry
+5. Report discrepancies
+
+#### Common Issues
+
+**Broken DOIs**:
+- Typos in DOI
+- Publisher changed DOI (rare)
+- Article retracted
+- Solution: Find correct DOI from publisher site
+
+**Mismatched metadata**:
+- BibTeX has old/incorrect information
+- Solution: Re-extract metadata from CrossRef
+
+**Missing DOIs**:
+- Older articles may not have DOIs
+- Acceptable for pre-2000 publications
+- Add URL or PMID instead
+
+### 2. Required Fields
+
+**Purpose**: Ensure all necessary information is present.
+
+#### Required by Entry Type
+
+**@article**:
+```bibtex
+author   % REQUIRED
+title    % REQUIRED
+journal  % REQUIRED
+year     % REQUIRED
+volume   % Highly recommended
+pages    % Highly recommended
+doi      % Highly recommended for modern papers
+```
+
+**@book**:
+```bibtex
+author OR editor  % REQUIRED (at least one)
+title            % REQUIRED
+publisher        % REQUIRED
+year             % REQUIRED
+isbn             % Recommended
+```
+
+**@inproceedings**:
+```bibtex
+author     % REQUIRED
+title      % REQUIRED
+booktitle  % REQUIRED (conference/proceedings name)
+year       % REQUIRED
+pages      % Recommended
+```
+
+**@incollection** (book chapter):
+```bibtex
+author     % REQUIRED
+title      % REQUIRED (chapter title)
+booktitle  % REQUIRED (book title)
+publisher  % REQUIRED
+year       % REQUIRED
+editor     % Recommended
+pages      % Recommended
+```
+
+**@phdthesis**:
+```bibtex
+author  % REQUIRED
+title   % REQUIRED
+school  % REQUIRED
+year    % REQUIRED
+```
+
+**@misc** (preprints, datasets, etc.):
+```bibtex
+author  % REQUIRED
+title   % REQUIRED
+year    % REQUIRED
+howpublished  % Recommended (bioRxiv, Zenodo, etc.)
+doi OR url    % At least one required
+```
+
+#### Validation Script
+
+```bash
+python scripts/validate_citations.py references.bib --check-required-fields
+```
+
+**Output**:
+```
+Error: Entry 'Smith2024' missing required field 'journal'
+Error: Entry 'Doe2023' missing required field 'year'
+Warning: Entry 'Jones2022' missing recommended field 'volume'
+```
+
+### 3. Author Name Formatting
+
+**Purpose**: Ensure consistent, correct author name formatting.
+
+#### Proper Format
+
+**Recommended BibTeX format**:
+```bibtex
+author = {Last1, First1 and Last2, First2 and Last3, First3}
+```
+
+**Examples**:
+```bibtex
+% Correct
+author = {Smith, John}
+author = {Smith, John A.}
+author = {Smith, John Andrew}
+author = {Smith, John and Doe, Jane}
+author = {Smith, John and Doe, Jane and Johnson, Mary}
+
+% For many authors
+author = {Smith, John and Doe, Jane and others}
+
+% Incorrect
+author = {John Smith}  % First Last format (not recommended)
+author = {Smith, J.; Doe, J.}  % Semicolon separator (wrong)
+author = {Smith J, Doe J}  % Missing commas
+```
+
+#### Special Cases
+
+**Suffixes (Jr., III, etc.)**:
+```bibtex
+author = {King, Jr., Martin Luther}
+```
+
+**Multiple surnames (hyphenated)**:
+```bibtex
+author = {Smith-Jones, Mary}
+```
+
+**Van, von, de, etc.**:
+```bibtex
+author = {van der Waals, Johannes}
+author = {de Broglie, Louis}
+```
+
+**Organizations as authors**:
+```bibtex
+author = {{World Health Organization}}
+% Double braces treat as single author
+```
+
+#### Validation Checks
+
+**Automated validation**:
+```bash
+python scripts/validate_citations.py references.bib --check-authors
+```
+
+**Checks for**:
+- Proper separator (and, not &, ; , etc.)
+- Comma placement
+- Empty author fields
+- Malformed names
+
+### 4. Data Consistency
+
+**Purpose**: Ensure all fields contain valid, reasonable values.
+
+#### Year Validation
+
+**Valid years**:
+```bibtex
+year = {2024}    % Current/recent
+year = {1953}    % Watson & Crick DNA structure (historical)
+year = {1665}    % Hooke's Micrographia (very old)
+```
+
+**Invalid years**:
+```bibtex
+year = {24}      % Two digits (ambiguous)
+year = {202}     % Typo
+year = {2025}    % Future (unless accepted/in press)
+year = {0}       % Obviously wrong
+```
+
+**Check**:
+- Four digits
+- Reasonable range (1600-current+1)
+- Not all zeros
+
+#### Volume/Number Validation
+
+```bibtex
+volume = {123}      % Numeric
+volume = {12}       % Valid
+number = {3}        % Valid
+number = {S1}       % Supplement issue (valid)
+```
+
+**Invalid**:
+```bibtex
+volume = {Vol. 123}  % Should be just number
+number = {Issue 3}   % Should be just number
+```
+
+#### Page Range Validation
+
+**Correct format**:
+```bibtex
+pages = {123--145}    % En-dash (two hyphens)
+pages = {e0123456}    % PLOS-style article ID
+pages = {123}         % Single page
+```
+
+**Incorrect format**:
+```bibtex
+pages = {123-145}     % Single hyphen (use --)
+pages = {pp. 123-145} % Remove "pp."
+pages = {123–145}     % Unicode en-dash (may cause issues)
+```
+
+#### URL Validation
+
+**Check**:
+- URLs are accessible (return 200 status)
+- HTTPS when available
+- No obvious typos
+- Permanent links (not temporary)
+
+**Valid**:
+```bibtex
+url = {https://www.nature.com/articles/nature12345}
+url = {https://arxiv.org/abs/2103.14030}
+```
+
+**Questionable**:
+```bibtex
+url = {http://...}  % HTTP instead of HTTPS
+url = {file:///...} % Local file path
+url = {bit.ly/...}  % URL shortener (not permanent)
+```
+
+### 5. Duplicate Detection
+
+**Purpose**: Find and remove duplicate entries.
+
+#### Types of Duplicates
+
+**Exact duplicates** (same DOI):
+```bibtex
+@article{Smith2024a,
+  doi = {10.1038/nature12345},
+  ...
+}
+
+@article{Smith2024b,
+  doi = {10.1038/nature12345},  % Same DOI!
+  ...
+}
+```
+
+**Near duplicates** (similar title/authors):
+```bibtex
+@article{Smith2024,
+  title = {Machine Learning for Drug Discovery},
+  ...
+}
+
+@article{Smith2024method,
+  title = {Machine learning for drug discovery},  % Same, different case
+  ...
+}
+```
+
+**Preprint + Published**:
+```bibtex
+@misc{Smith2023arxiv,
+  title = {AlphaFold Results},
+  howpublished = {arXiv},
+  ...
+}
+
+@article{Smith2024,
+  title = {AlphaFold Results},  % Same paper, now published
+  journal = {Nature},
+  ...
+}
+% Keep published version only
+```
+
+#### Detection Methods
+
+**By DOI** (most reliable):
+- Same DOI = exact duplicate
+- Keep one, remove other
+
+**By title similarity**:
+- Normalize: lowercase, remove punctuation
+- Calculate similarity (e.g., Levenshtein distance)
+- Flag if >90% similar
+
+**By author-year-title**:
+- Same first author + year + similar title
+- Likely duplicate
+
+**Automated detection**:
+```bash
+python scripts/validate_citations.py references.bib --check-duplicates
+```
+
+**Output**:
+```
+Warning: Possible duplicate entries:
+  - Smith2024a (DOI: 10.1038/nature12345)
+  - Smith2024b (DOI: 10.1038/nature12345)
+  Recommendation: Keep one entry, remove the other.
+```
+
+### 6. Format and Syntax
+
+**Purpose**: Ensure valid BibTeX syntax.
+
+#### Common Syntax Errors
+
+**Missing commas**:
+```bibtex
+@article{Smith2024,
+  author = {Smith, John}   % Missing comma!
+  title = {Title}
+}
+% Should be:
+  author = {Smith, John},  % Comma after each field
+```
+
+**Unbalanced braces**:
+```bibtex
+title = {Title with {Protected} Text  % Missing closing brace
+% Should be:
+title = {Title with {Protected} Text}
+```
+
+**Missing closing brace for entry**:
+```bibtex
+@article{Smith2024,
+  author = {Smith, John},
+  title = {Title}
+  % Missing closing brace!
+% Should end with:
+}
+```
+
+**Invalid characters in keys**:
+```bibtex
+@article{Smith&Doe2024,  % & not allowed in key
+  ...
+}
+% Use:
+@article{SmithDoe2024,
+  ...
+}
+```
+
+#### BibTeX Syntax Rules
+
+**Entry structure**:
+```bibtex
+@TYPE{citationkey,
+  field1 = {value1},
+  field2 = {value2},
+  ...
+  fieldN = {valueN}
+}
+```
+
+**Citation keys**:
+- Alphanumeric and some punctuation (-, _, ., :)
+- No spaces
+- Case-sensitive
+- Unique within file
+
+**Field values**:
+- Enclosed in {braces} or "quotes"
+- Braces preferred for complex text
+- Numbers can be unquoted: `year = 2024`
+
+**Special characters**:
+- `{` and `}` for grouping
+- `\` for LaTeX commands
+- Protect capitalization: `{AlphaFold}`
+- Accents: `{\"u}`, `{\'e}`, `{\aa}`
+
+#### Validation
+
+```bash
+python scripts/validate_citations.py references.bib --check-syntax
+```
+
+**Checks**:
+- Valid BibTeX structure
+- Balanced braces
+- Proper commas
+- Valid entry types
+- Unique citation keys
+
+## Validation Workflow
+
+### Step 1: Basic Validation
+
+Run comprehensive validation:
+
+```bash
+python scripts/validate_citations.py references.bib
+```
+
+**Checks all**:
+- DOI resolution
+- Required fields
+- Author formatting
+- Data consistency
+- Duplicates
+- Syntax
+
+### Step 2: Review Report
+
+Examine validation report:
+
+```json
+{
+  "total_entries": 150,
+  "valid_entries": 140,
+  "errors": [
+    {
+      "entry": "Smith2024",
+      "error": "missing_required_field",
+      "field": "journal",
+      "severity": "high"
+    },
+    {
+      "entry": "Doe2023",
+      "error": "invalid_doi",
+      "doi": "10.1038/broken",
+      "severity": "high"
+    }
+  ],
+  "warnings": [
+    {
+      "entry": "Jones2022",
+      "warning": "missing_recommended_field",
+      "field": "volume",
+      "severity": "medium"
+    }
+  ],
+  "duplicates": [
+    {
+      "entries": ["Smith2024a", "Smith2024b"],
+      "reason": "same_doi",
+      "doi": "10.1038/nature12345"
+    }
+  ]
+}
+```
+
+### Step 3: Fix Issues
+
+**High-priority** (errors):
+1. Add missing required fields
+2. Fix broken DOIs
+3. Remove duplicates
+4. Correct syntax errors
+
+**Medium-priority** (warnings):
+1. Add recommended fields
+2. Improve author formatting
+3. Fix page ranges
+
+**Low-priority**:
+1. Standardize formatting
+2. Add URLs for accessibility
+
+### Step 4: Auto-Fix
+
+Use auto-fix for safe corrections:
+
+```bash
+python scripts/validate_citations.py references.bib \
+  --auto-fix \
+  --output fixed_references.bib
+```
+
+**Auto-fix can**:
+- Fix page range format (- to --)
+- Remove "pp." from pages
+- Standardize author separators
+- Fix common syntax errors
+- Normalize field order
+
+**Auto-fix cannot**:
+- Add missing information
+- Find correct DOIs
+- Determine which duplicate to keep
+- Fix semantic errors
+
+### Step 5: Manual Review
+
+Review auto-fixed file:
+```bash
+# Check what changed
+diff references.bib fixed_references.bib
+
+# Review specific entries that had errors
+grep -A 10 "Smith2024" fixed_references.bib
+```
+
+### Step 6: Re-Validate
+
+Validate after fixes:
+
+```bash
+python scripts/validate_citations.py fixed_references.bib --verbose
+```
+
+Should show:
+```
+✓ All DOIs valid
+✓ All required fields present
+✓ No duplicates found
+✓ Syntax valid
+✓ 150/150 entries valid
+```
+
+## Validation Checklist
+
+Use this checklist before final submission:
+
+### DOI Validation
+- [ ] All DOIs resolve correctly
+- [ ] Metadata matches between BibTeX and CrossRef
+- [ ] No broken or invalid DOIs
+
+### Completeness
+- [ ] All entries have required fields
+- [ ] Modern papers (2000+) have DOIs
+- [ ] Authors properly formatted
+- [ ] Journals/conferences properly named
+
+### Consistency
+- [ ] Years are 4-digit numbers
+- [ ] Page ranges use -- not -
+- [ ] Volume/number are numeric
+- [ ] URLs are accessible
+
+### Duplicates
+- [ ] No entries with same DOI
+- [ ] No near-duplicate titles
+- [ ] Preprints updated to published versions
+
+### Formatting
+- [ ] Valid BibTeX syntax
+- [ ] Balanced braces
+- [ ] Proper commas
+- [ ] Unique citation keys
+
+### Final Checks
+- [ ] Bibliography compiles without errors
+- [ ] All citations in text appear in bibliography
+- [ ] All bibliography entries cited in text
+- [ ] Citation style matches journal requirements
+
+## Best Practices
+
+### 1. Validate Early and Often
+
+```bash
+# After extraction
+python scripts/extract_metadata.py --doi ... --output refs.bib
+python scripts/validate_citations.py refs.bib
+
+# After manual edits
+python scripts/validate_citations.py refs.bib
+
+# Before submission
+python scripts/validate_citations.py refs.bib --strict
+```
+
+### 2. Use Automated Tools
+
+Don't validate manually - use scripts:
+- Faster
+- More comprehensive
+- Catches errors humans miss
+- Generates reports
+
+### 3. Keep Backup
+
+```bash
+# Before auto-fix
+cp references.bib references_backup.bib
+
+# Run auto-fix
+python scripts/validate_citations.py references.bib \
+  --auto-fix \
+  --output references_fixed.bib
+
+# Review changes
+diff references.bib references_fixed.bib
+
+# If satisfied, replace
+mv references_fixed.bib references.bib
+```
+
+### 4. Fix High-Priority First
+
+**Priority order**:
+1. Syntax errors (prevent compilation)
+2. Missing required fields (incomplete citations)
+3. Broken DOIs (broken links)
+4. Duplicates (confusion, wasted space)
+5. Missing recommended fields
+6. Formatting inconsistencies
+
+### 5. Document Exceptions
+
+For entries that can't be fixed:
+
+```bibtex
+@article{Old1950,
+  author = {Smith, John},
+  title = {Title},
+  journal = {Obscure Journal},
+  year = {1950},
+  volume = {12},
+  pages = {34--56},
+  note = {DOI not available for publications before 2000}
+}
+```
+
+### 6. Validate Against Journal Requirements
+
+Different journals have different requirements:
+- Citation style (numbered, author-year)
+- Abbreviations (journal names)
+- Maximum reference count
+- Format (BibTeX, EndNote, manual)
+
+Check journal author guidelines!
+
+## Common Validation Issues
+
+### Issue 1: Metadata Mismatch
+
+**Problem**: BibTeX says 2023, CrossRef says 2024.
+
+**Cause**:
+- Online-first vs print publication
+- Correction/update
+- Extraction error
+
+**Solution**:
+1. Check actual article
+2. Use more recent/accurate date
+3. Update BibTeX entry
+4. Re-validate
+
+### Issue 2: Special Characters
+
+**Problem**: LaTeX compilation fails on special characters.
+
+**Cause**:
+- Accented characters (é, ü, ñ)
+- Chemical formulas (H₂O)
+- Math symbols (α, β, ±)
+
+**Solution**:
+```bibtex
+% Use LaTeX commands
+author = {M{\"u}ller, Hans}  % Müller
+title = {Study of H\textsubscript{2}O}  % H₂O
+% Or use UTF-8 with proper LaTeX packages
+```
+
+### Issue 3: Incomplete Extraction
+
+**Problem**: Extracted metadata missing fields.
+
+**Cause**:
+- Source doesn't provide all metadata
+- Extraction error
+- Incomplete record
+
+**Solution**:
+1. Check original article
+2. Manually add missing fields
+3. Use alternative source (PubMed vs CrossRef)
+
+### Issue 4: Cannot Find Duplicate
+
+**Problem**: Same paper appears twice, not detected.
+
+**Cause**:
+- Different DOIs (should be rare)
+- Different titles (abbreviated, typo)
+- Different citation keys
+
+**Solution**:
+- Manual search for author + year
+- Check for similar titles
+- Remove manually
+
+## Summary
+
+Validation ensures citation quality:
+
+✓ **Accuracy**: DOIs resolve, metadata correct  
+✓ **Completeness**: All required fields present  
+✓ **Consistency**: Proper formatting throughout  
+✓ **No duplicates**: Each paper cited once  
+✓ **Valid syntax**: BibTeX compiles without errors  
+
+**Always validate** before final submission!
+
+Use automated tools:
+```bash
+python scripts/validate_citations.py references.bib
+```
+
+Follow workflow:
+1. Extract metadata
+2. Validate
+3. Fix errors
+4. Re-validate
+5. Submit
+
--- a/skills/citation-management/references/google_scholar_search.md
+++ b/skills/citation-management/references/google_scholar_search.md
@@ -0,0 +1,725 @@
+# Google Scholar Search Guide
+
+Comprehensive guide to searching Google Scholar for academic papers, including advanced search operators, filtering strategies, and metadata extraction.
+
+## Overview
+
+Google Scholar provides the most comprehensive coverage of academic literature across all disciplines:
+- **Coverage**: 100+ million scholarly documents
+- **Scope**: All academic disciplines
+- **Content types**: Journal articles, books, theses, conference papers, preprints, patents, court opinions
+- **Citation tracking**: "Cited by" links for forward citation tracking
+- **Accessibility**: Free to use, no account required
+
+## Basic Search
+
+### Simple Keyword Search
+
+Search for papers containing specific terms anywhere in the document (title, abstract, full text):
+
+```
+CRISPR gene editing
+machine learning protein folding
+climate change impact agriculture
+quantum computing algorithms
+```
+
+**Tips**:
+- Use specific technical terms
+- Include key acronyms and abbreviations
+- Start broad, then refine
+- Check spelling of technical terms
+
+### Exact Phrase Search
+
+Use quotation marks to search for exact phrases:
+
+```
+"deep learning"
+"CRISPR-Cas9"
+"systematic review"
+"randomized controlled trial"
+```
+
+**When to use**:
+- Technical terms that must appear together
+- Proper names
+- Specific methodologies
+- Exact titles
+
+## Advanced Search Operators
+
+### Author Search
+
+Find papers by specific authors:
+
+```
+author:LeCun
+author:"Geoffrey Hinton"
+author:Church synthetic biology
+```
+
+**Variations**:
+- Single last name: `author:Smith`
+- Full name in quotes: `author:"Jane Smith"`
+- Author + topic: `author:Doudna CRISPR`
+
+**Tips**:
+- Authors may publish under different name variations
+- Try with and without middle initials
+- Consider name changes (marriage, etc.)
+- Use quotation marks for full names
+
+### Title Search
+
+Search only in article titles:
+
+```
+intitle:transformer
+intitle:"attention mechanism"
+intitle:review climate change
+```
+
+**Use cases**:
+- Finding papers specifically about a topic
+- More precise than full-text search
+- Reduces irrelevant results
+- Good for finding reviews or methods
+
+### Source (Journal) Search
+
+Search within specific journals or conferences:
+
+```
+source:Nature
+source:"Nature Communications"
+source:NeurIPS
+source:"Journal of Machine Learning Research"
+```
+
+**Applications**:
+- Track publications in top-tier venues
+- Find papers in specialized journals
+- Identify conference-specific work
+- Verify publication venue
+
+### Exclusion Operator
+
+Exclude terms from results:
+
+```
+machine learning -survey
+CRISPR -patent
+climate change -news
+deep learning -tutorial -review
+```
+
+**Common exclusions**:
+- `-survey`: Exclude survey papers
+- `-review`: Exclude review articles
+- `-patent`: Exclude patents
+- `-book`: Exclude books
+- `-news`: Exclude news articles
+- `-tutorial`: Exclude tutorials
+
+### OR Operator
+
+Search for papers containing any of multiple terms:
+
+```
+"machine learning" OR "deep learning"
+CRISPR OR "gene editing"
+"climate change" OR "global warming"
+```
+
+**Best practices**:
+- OR must be uppercase
+- Combine synonyms
+- Include acronyms and spelled-out versions
+- Use with exact phrases
+
+### Wildcard Search
+
+Use asterisk (*) as wildcard for unknown words:
+
+```
+"machine * learning"
+"CRISPR * editing"
+"* neural network"
+```
+
+**Note**: Limited wildcard support in Google Scholar compared to other databases.
+
+## Advanced Filtering
+
+### Year Range
+
+Filter by publication year:
+
+**Using interface**:
+- Click "Since [year]" on left sidebar
+- Select custom range
+
+**Using search operators**:
+```
+# Not directly in search query
+# Use interface or URL parameters
+```
+
+**In script**:
+```bash
+python scripts/search_google_scholar.py "quantum computing" \
+  --year-start 2020 \
+  --year-end 2024
+```
+
+### Sorting Options
+
+**By relevance** (default):
+- Google's algorithm determines relevance
+- Considers citations, author reputation, publication venue
+- Generally good for most searches
+
+**By date**:
+- Most recent papers first
+- Good for fast-moving fields
+- May miss highly cited older papers
+- Click "Sort by date" in interface
+
+**By citation count** (via script):
+```bash
+python scripts/search_google_scholar.py "transformers" \
+  --sort-by citations \
+  --limit 50
+```
+
+### Language Filtering
+
+**In interface**:
+- Settings → Languages
+- Select preferred languages
+
+**Default**: English and papers with English abstracts
+
+## Search Strategies
+
+### Finding Seminal Papers
+
+Identify highly influential papers in a field:
+
+1. **Search by topic** with broad terms
+2. **Sort by citations** (most cited first)
+3. **Look for review articles** for comprehensive overviews
+4. **Check publication dates** for foundational vs recent work
+
+**Example**:
+```
+"generative adversarial networks"
+# Sort by citations
+# Top results: original GAN paper (Goodfellow et al., 2014), key variants
+```
+
+### Finding Recent Work
+
+Stay current with latest research:
+
+1. **Search by topic**
+2. **Filter to recent years** (last 1-2 years)
+3. **Sort by date** for newest first
+4. **Set up alerts** for ongoing tracking
+
+**Example**:
+```bash
+python scripts/search_google_scholar.py "AlphaFold protein structure" \
+  --year-start 2023 \
+  --year-end 2024 \
+  --limit 50
+```
+
+### Finding Review Articles
+
+Get comprehensive overviews of a field:
+
+```
+intitle:review "machine learning"
+"systematic review" CRISPR
+intitle:survey "natural language processing"
+```
+
+**Indicators**:
+- "review", "survey", "perspective" in title
+- Often highly cited
+- Published in review journals (Nature Reviews, Trends, etc.)
+- Comprehensive reference lists
+
+### Citation Chain Search
+
+**Forward citations** (papers citing a key paper):
+1. Find seminal paper
+2. Click "Cited by X"
+3. See all papers that cite it
+4. Identify how field has developed
+
+**Backward citations** (references in a key paper):
+1. Find recent review or important paper
+2. Check its reference list
+3. Identify foundational work
+4. Trace development of ideas
+
+**Example workflow**:
+```
+# Find original transformer paper
+"Attention is all you need" author:Vaswani
+
+# Check "Cited by 120,000+"
+# See evolution: BERT, GPT, T5, etc.
+
+# Check references in original paper
+# Find RNN, LSTM, attention mechanism origins
+```
+
+### Comprehensive Literature Search
+
+For thorough coverage (e.g., systematic reviews):
+
+1. **Generate synonym list**:
+   - Main terms + alternatives
+   - Acronyms + spelled out
+   - US vs UK spelling
+
+2. **Use OR operators**:
+   ```
+   ("machine learning" OR "deep learning" OR "neural networks")
+   ```
+
+3. **Combine multiple concepts**:
+   ```
+   ("machine learning" OR "deep learning") ("drug discovery" OR "drug development")
+   ```
+
+4. **Search without date filters** initially:
+   - Get total landscape
+   - Filter later if too many results
+
+5. **Export results** for systematic analysis:
+   ```bash
+   python scripts/search_google_scholar.py \
+     '"machine learning" OR "deep learning" drug discovery' \
+     --limit 500 \
+     --output comprehensive_search.json
+   ```
+
+## Extracting Citation Information
+
+### From Google Scholar Results Page
+
+Each result shows:
+- **Title**: Paper title (linked to full text if available)
+- **Authors**: Author list (often truncated)
+- **Source**: Journal/conference, year, publisher
+- **Cited by**: Number of citations + link to citing papers
+- **Related articles**: Link to similar papers
+- **All versions**: Different versions of the same paper
+
+### Export Options
+
+**Manual export**:
+1. Click "Cite" under paper
+2. Select BibTeX format
+3. Copy citation
+
+**Limitations**:
+- One paper at a time
+- Manual process
+- Time-consuming for many papers
+
+**Automated export** (using script):
+```bash
+# Search and export to BibTeX
+python scripts/search_google_scholar.py "quantum computing" \
+  --limit 50 \
+  --format bibtex \
+  --output quantum_papers.bib
+```
+
+### Metadata Available
+
+From Google Scholar you can typically extract:
+- Title
+- Authors (may be incomplete)
+- Year
+- Source (journal/conference)
+- Citation count
+- Link to full text (when available)
+- Link to PDF (when available)
+
+**Note**: Metadata quality varies:
+- Some fields may be missing
+- Author names may be incomplete
+- Need to verify with DOI lookup for accuracy
+
+## Rate Limiting and Access
+
+### Rate Limits
+
+Google Scholar has rate limiting to prevent automated scraping:
+
+**Symptoms of rate limiting**:
+- CAPTCHA challenges
+- Temporary IP blocks
+- 429 "Too Many Requests" errors
+
+**Best practices**:
+1. **Add delays between requests**: 2-5 seconds minimum
+2. **Limit query volume**: Don't search hundreds of queries rapidly
+3. **Use scholarly library**: Handles rate limiting automatically
+4. **Rotate User-Agents**: Appear as different browsers
+5. **Consider proxies**: For large-scale searches (use ethically)
+
+**In our scripts**:
+```python
+# Automatic rate limiting built in
+time.sleep(random.uniform(3, 7))  # Random delay 3-7 seconds
+```
+
+### Ethical Considerations
+
+**DO**:
+- Respect rate limits
+- Use reasonable delays
+- Cache results (don't re-query)
+- Use official APIs when available
+- Attribute data properly
+
+**DON'T**:
+- Scrape aggressively
+- Use multiple IPs to bypass limits
+- Violate terms of service
+- Burden servers unnecessarily
+- Use data commercially without permission
+
+### Institutional Access
+
+**Benefits of institutional access**:
+- Access to full-text PDFs through library subscriptions
+- Better download capabilities
+- Integration with library systems
+- Link resolver to full text
+
+**Setup**:
+- Google Scholar → Settings → Library links
+- Add your institution
+- Links appear in search results
+
+## Tips and Best Practices
+
+### Search Optimization
+
+1. **Start simple, then refine**:
+   ```
+   # Too specific initially
+   intitle:"deep learning" intitle:review source:Nature 2023..2024
+   
+   # Better approach
+   deep learning review
+   # Review results
+   # Add intitle:, source:, year filters as needed
+   ```
+
+2. **Use multiple search strategies**:
+   - Keyword search
+   - Author search for known experts
+   - Citation chaining from key papers
+   - Source search in top journals
+
+3. **Check spelling and variations**:
+   - Color vs colour
+   - Optimization vs optimisation
+   - Tumor vs tumour
+   - Try common misspellings if few results
+
+4. **Combine operators strategically**:
+   ```
+   # Good combination
+   author:Church intitle:"synthetic biology" 2015..2024
+   
+   # Find reviews by specific author on topic in recent years
+   ```
+
+### Result Evaluation
+
+1. **Check citation counts**:
+   - High citations indicate influence
+   - Recent papers may have low citations but be important
+   - Citation counts vary by field
+
+2. **Verify publication venue**:
+   - Peer-reviewed journals vs preprints
+   - Conference proceedings
+   - Book chapters
+   - Technical reports
+
+3. **Check for full text access**:
+   - [PDF] link on right side
+   - "All X versions" may have open access version
+   - Check institutional access
+   - Try author's website or ResearchGate
+
+4. **Look for review articles**:
+   - Comprehensive overviews
+   - Good starting point for new topics
+   - Extensive reference lists
+
+### Managing Results
+
+1. **Use citation manager integration**:
+   - Export to BibTeX
+   - Import to Zotero, Mendeley, EndNote
+   - Maintain organized library
+
+2. **Set up alerts** for ongoing research:
+   - Google Scholar → Alerts
+   - Get emails for new papers matching query
+   - Track specific authors or topics
+
+3. **Create collections**:
+   - Save papers to Google Scholar Library
+   - Organize by project or topic
+   - Add labels and notes
+
+4. **Export systematically**:
+   ```bash
+   # Save search results for later analysis
+   python scripts/search_google_scholar.py "your topic" \
+     --output topic_papers.json
+   
+   # Can re-process later without re-searching
+   python scripts/extract_metadata.py \
+     --input topic_papers.json \
+     --output topic_refs.bib
+   ```
+
+## Advanced Techniques
+
+### Boolean Logic Combinations
+
+Combine multiple operators for precise searches:
+
+```
+# Highly cited reviews on specific topic by known authors
+intitle:review "machine learning" ("drug discovery" OR "drug development")
+author:Horvath OR author:Bengio 2020..2024
+
+# Method papers excluding reviews
+intitle:method "protein folding" -review -survey
+
+# Papers in top journals only
+("Nature" OR "Science" OR "Cell") CRISPR 2022..2024
+```
+
+### Finding Open Access Papers
+
+```
+# Search with generic terms
+machine learning
+
+# Filter by "All versions" which often includes preprints
+# Look for green [PDF] links (often open access)
+# Check arXiv, bioRxiv versions
+```
+
+**In script**:
+```bash
+python scripts/search_google_scholar.py "topic" \
+  --open-access-only \
+  --output open_access_papers.json
+```
+
+### Tracking Research Impact
+
+**For a specific paper**:
+1. Find the paper
+2. Click "Cited by X"
+3. Analyze citing papers:
+   - How is it being used?
+   - What fields cite it?
+   - Recent vs older citations?
+
+**For an author**:
+1. Search `author:LastName`
+2. Check h-index and i10-index
+3. View citation history graph
+4. Identify most influential papers
+
+**For a topic**:
+1. Search topic
+2. Sort by citations
+3. Identify seminal papers (highly cited, older)
+4. Check recent highly-cited papers (emerging important work)
+
+### Finding Preprints and Early Work
+
+```
+# arXiv papers
+source:arxiv "deep learning"
+
+# bioRxiv papers
+source:biorxiv CRISPR
+
+# All preprint servers
+("arxiv" OR "biorxiv" OR "medrxiv") your topic
+```
+
+**Note**: Preprints are not peer-reviewed. Always check if published version exists.
+
+## Common Issues and Solutions
+
+### Too Many Results
+
+**Problem**: Search returns 100,000+ results, overwhelming.
+
+**Solutions**:
+1. Add more specific terms
+2. Use `intitle:` to search only titles
+3. Filter by recent years
+4. Add exclusions (e.g., `-review`)
+5. Search within specific journals
+
+### Too Few Results
+
+**Problem**: Search returns 0-10 results, suspiciously few.
+
+**Solutions**:
+1. Remove restrictive operators
+2. Try synonyms and related terms
+3. Check spelling
+4. Broaden year range
+5. Use OR for alternative terms
+
+### Irrelevant Results
+
+**Problem**: Results don't match intent.
+
+**Solutions**:
+1. Use exact phrases with quotes
+2. Add more specific context terms
+3. Use `intitle:` for title-only search
+4. Exclude common irrelevant terms
+5. Combine multiple specific terms
+
+### CAPTCHA or Rate Limiting
+
+**Problem**: Google Scholar shows CAPTCHA or blocks access.
+
+**Solutions**:
+1. Wait several minutes before continuing
+2. Reduce query frequency
+3. Use longer delays in scripts (5-10 seconds)
+4. Switch to different IP/network
+5. Consider using institutional access
+
+### Missing Metadata
+
+**Problem**: Author names, year, or venue missing from results.
+
+**Solutions**:
+1. Click through to see full details
+2. Check "All versions" for better metadata
+3. Look up by DOI if available
+4. Extract metadata from CrossRef/PubMed instead
+5. Manually verify from paper PDF
+
+### Duplicate Results
+
+**Problem**: Same paper appears multiple times.
+
+**Solutions**:
+1. Click "All X versions" to see consolidated view
+2. Choose version with best metadata
+3. Use deduplication in post-processing:
+   ```bash
+   python scripts/format_bibtex.py results.bib \
+     --deduplicate \
+     --output clean_results.bib
+   ```
+
+## Integration with Scripts
+
+### search_google_scholar.py Usage
+
+**Basic search**:
+```bash
+python scripts/search_google_scholar.py "machine learning drug discovery"
+```
+
+**With year filter**:
+```bash
+python scripts/search_google_scholar.py "CRISPR" \
+  --year-start 2020 \
+  --year-end 2024 \
+  --limit 100
+```
+
+**Sort by citations**:
+```bash
+python scripts/search_google_scholar.py "transformers" \
+  --sort-by citations \
+  --limit 50
+```
+
+**Export to BibTeX**:
+```bash
+python scripts/search_google_scholar.py "quantum computing" \
+  --format bibtex \
+  --output quantum.bib
+```
+
+**Export to JSON for later processing**:
+```bash
+python scripts/search_google_scholar.py "topic" \
+  --format json \
+  --output results.json
+
+# Later: extract full metadata
+python scripts/extract_metadata.py \
+  --input results.json \
+  --output references.bib
+```
+
+### Batch Searching
+
+For multiple topics:
+
+```bash
+# Create file with search queries (queries.txt)
+# One query per line
+
+# Search each query
+while read query; do
+  python scripts/search_google_scholar.py "$query" \
+    --limit 50 \
+    --output "${query// /_}.json"
+  sleep 10  # Delay between queries
+done < queries.txt
+```
+
+## Summary
+
+Google Scholar is the most comprehensive academic search engine, providing:
+
+✓ **Broad coverage**: All disciplines, 100M+ documents  
+✓ **Free access**: No account or subscription required  
+✓ **Citation tracking**: "Cited by" for impact analysis  
+✓ **Multiple formats**: Articles, books, theses, patents  
+✓ **Full-text search**: Not just abstracts  
+
+Key strategies:
+- Use advanced operators for precision
+- Combine author, title, source searches
+- Track citations for impact
+- Export systematically to citation manager
+- Respect rate limits and access policies
+- Verify metadata with CrossRef/PubMed
+
+For biomedical research, complement with PubMed for MeSH terms and curated metadata.
+
--- a/skills/citation-management/references/metadata_extraction.md
+++ b/skills/citation-management/references/metadata_extraction.md
@@ -0,0 +1,870 @@
+# Metadata Extraction Guide
+
+Comprehensive guide to extracting accurate citation metadata from DOIs, PMIDs, arXiv IDs, and URLs using various APIs and services.
+
+## Overview
+
+Accurate metadata is essential for proper citations. This guide covers:
+- Identifying paper identifiers (DOI, PMID, arXiv ID)
+- Querying metadata APIs (CrossRef, PubMed, arXiv, DataCite)
+- Required BibTeX fields by entry type
+- Handling edge cases and special situations
+- Validating extracted metadata
+
+## Paper Identifiers
+
+### DOI (Digital Object Identifier)
+
+**Format**: `10.XXXX/suffix`
+
+**Examples**:
+```
+10.1038/s41586-021-03819-2    # Nature article
+10.1126/science.aam9317       # Science article
+10.1016/j.cell.2023.01.001    # Cell article
+10.1371/journal.pone.0123456  # PLOS ONE article
+```
+
+**Properties**:
+- Permanent identifier
+- Most reliable for metadata
+- Resolves to current location
+- Publisher-assigned
+
+**Where to find**:
+- First page of article
+- Article webpage
+- CrossRef, Google Scholar, PubMed
+- Usually prominent on publisher site
+
+### PMID (PubMed ID)
+
+**Format**: 8-digit number (typically)
+
+**Examples**:
+```
+34265844
+28445112
+35476778
+```
+
+**Properties**:
+- Specific to PubMed database
+- Biomedical literature only
+- Assigned by NCBI
+- Permanent identifier
+
+**Where to find**:
+- PubMed search results
+- Article page on PubMed
+- Often in article PDF footer
+- PMC (PubMed Central) pages
+
+### PMCID (PubMed Central ID)
+
+**Format**: PMC followed by numbers
+
+**Examples**:
+```
+PMC8287551
+PMC7456789
+```
+
+**Properties**:
+- Free full-text articles in PMC
+- Subset of PubMed articles
+- Open access or author manuscripts
+
+### arXiv ID
+
+**Format**: YYMM.NNNNN or archive/YYMMNNN
+
+**Examples**:
+```
+2103.14030        # New format (since 2007)
+2401.12345        # 2024 submission
+arXiv:hep-th/9901001  # Old format
+```
+
+**Properties**:
+- Preprints (not peer-reviewed)
+- Physics, math, CS, q-bio, etc.
+- Version tracking (v1, v2, etc.)
+- Free, open access
+
+**Where to find**:
+- arXiv.org
+- Often cited before publication
+- Paper PDF header
+
+### Other Identifiers
+
+**ISBN** (Books):
+```
+978-0-12-345678-9
+0-123-45678-9
+```
+
+**arXiv category**:
+```
+cs.LG    # Computer Science - Machine Learning
+q-bio.QM # Quantitative Biology - Quantitative Methods
+math.ST  # Mathematics - Statistics
+```
+
+## Metadata APIs
+
+### CrossRef API
+
+**Primary source for DOIs** - Most comprehensive metadata for journal articles.
+
+**Base URL**: `https://api.crossref.org/works/`
+
+**No API key required**, but polite pool recommended:
+- Add email to User-Agent
+- Gets better service
+- No rate limits
+
+#### Basic DOI Lookup
+
+**Request**:
+```
+GET https://api.crossref.org/works/10.1038/s41586-021-03819-2
+```
+
+**Response** (simplified):
+```json
+{
+  "message": {
+    "DOI": "10.1038/s41586-021-03819-2",
+    "title": ["Article title here"],
+    "author": [
+      {"given": "John", "family": "Smith"},
+      {"given": "Jane", "family": "Doe"}
+    ],
+    "container-title": ["Nature"],
+    "volume": "595",
+    "issue": "7865",
+    "page": "123-128",
+    "published-print": {"date-parts": [[2021, 7, 1]]},
+    "publisher": "Springer Nature",
+    "type": "journal-article",
+    "ISSN": ["0028-0836"]
+  }
+}
+```
+
+#### Fields Available
+
+**Always present**:
+- `DOI`: Digital Object Identifier
+- `title`: Article title (array)
+- `type`: Content type (journal-article, book-chapter, etc.)
+
+**Usually present**:
+- `author`: Array of author objects
+- `container-title`: Journal/book title
+- `published-print` or `published-online`: Publication date
+- `volume`, `issue`, `page`: Publication details
+- `publisher`: Publisher name
+
+**Sometimes present**:
+- `abstract`: Article abstract
+- `subject`: Subject categories
+- `ISSN`: Journal ISSN
+- `ISBN`: Book ISBN
+- `reference`: Reference list
+- `is-referenced-by-count`: Citation count
+
+#### Content Types
+
+CrossRef `type` field values:
+- `journal-article`: Journal articles
+- `book-chapter`: Book chapters
+- `book`: Books
+- `proceedings-article`: Conference papers
+- `posted-content`: Preprints
+- `dataset`: Research datasets
+- `report`: Technical reports
+- `dissertation`: Theses/dissertations
+
+### PubMed E-utilities API
+
+**Specialized for biomedical literature** - Curated metadata with MeSH terms.
+
+**Base URL**: `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/`
+
+**API key recommended** (free):
+- Higher rate limits
+- Better performance
+
+#### PMID to Metadata
+
+**Step 1: EFetch for full record**
+
+```
+GET https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?
+  db=pubmed&
+  id=34265844&
+  retmode=xml&
+  api_key=YOUR_KEY
+```
+
+**Response**: XML with comprehensive metadata
+
+**Step 2: Parse XML**
+
+Key fields:
+```xml
+<PubmedArticle>
+  <MedlineCitation>
+    <PMID>34265844</PMID>
+    <Article>
+      <ArticleTitle>Title here</ArticleTitle>
+      <AuthorList>
+        <Author><LastName>Smith</LastName><ForeName>John</ForeName></Author>
+      </AuthorList>
+      <Journal>
+        <Title>Nature</Title>
+        <JournalIssue>
+          <Volume>595</Volume>
+          <Issue>7865</Issue>
+          <PubDate><Year>2021</Year></PubDate>
+        </JournalIssue>
+      </Journal>
+      <Pagination><MedlinePgn>123-128</MedlinePgn></Pagination>
+      <Abstract><AbstractText>Abstract text here</AbstractText></Abstract>
+    </Article>
+  </MedlineCitation>
+  <PubmedData>
+    <ArticleIdList>
+      <ArticleId IdType="doi">10.1038/s41586-021-03819-2</ArticleId>
+      <ArticleId IdType="pmc">PMC8287551</ArticleId>
+    </ArticleIdList>
+  </PubmedData>
+</PubmedArticle>
+```
+
+#### Unique PubMed Fields
+
+**MeSH Terms**: Controlled vocabulary
+```xml
+<MeshHeadingList>
+  <MeshHeading>
+    <DescriptorName UI="D003920">Diabetes Mellitus</DescriptorName>
+  </MeshHeading>
+</MeshHeadingList>
+```
+
+**Publication Types**:
+```xml
+<PublicationTypeList>
+  <PublicationType UI="D016428">Journal Article</PublicationType>
+  <PublicationType UI="D016449">Randomized Controlled Trial</PublicationType>
+</PublicationTypeList>
+```
+
+**Grant Information**:
+```xml
+<GrantList>
+  <Grant>
+    <GrantID>R01-123456</GrantID>
+    <Agency>NIAID NIH HHS</Agency>
+    <Country>United States</Country>
+  </Grant>
+</GrantList>
+```
+
+### arXiv API
+
+**Preprints in physics, math, CS, q-bio** - Free, open access.
+
+**Base URL**: `http://export.arxiv.org/api/query`
+
+**No API key required**
+
+#### arXiv ID to Metadata
+
+**Request**:
+```
+GET http://export.arxiv.org/api/query?id_list=2103.14030
+```
+
+**Response**: Atom XML
+
+```xml
+<entry>
+  <id>http://arxiv.org/abs/2103.14030v2</id>
+  <title>Highly accurate protein structure prediction with AlphaFold</title>
+  <author><name>John Jumper</name></author>
+  <author><name>Richard Evans</name></author>
+  <published>2021-03-26T17:47:17Z</published>
+  <updated>2021-07-01T16:51:46Z</updated>
+  <summary>Abstract text here...</summary>
+  <arxiv:doi>10.1038/s41586-021-03819-2</arxiv:doi>
+  <category term="q-bio.BM" scheme="http://arxiv.org/schemas/atom"/>
+  <category term="cs.LG" scheme="http://arxiv.org/schemas/atom"/>
+</entry>
+```
+
+#### Key Fields
+
+- `id`: arXiv URL
+- `title`: Preprint title
+- `author`: Author list
+- `published`: First version date
+- `updated`: Latest version date
+- `summary`: Abstract
+- `arxiv:doi`: DOI if published
+- `arxiv:journal_ref`: Journal reference if published
+- `category`: arXiv categories
+
+#### Version Tracking
+
+arXiv tracks versions:
+- `v1`: Initial submission
+- `v2`, `v3`, etc.: Revisions
+
+**Always check** if preprint has been published in journal (use DOI if available).
+
+### DataCite API
+
+**Research datasets, software, other outputs** - Assigns DOIs to non-traditional scholarly works.
+
+**Base URL**: `https://api.datacite.org/dois/`
+
+**Similar to CrossRef** but for datasets, software, code, etc.
+
+**Request**:
+```
+GET https://api.datacite.org/dois/10.5281/zenodo.1234567
+```
+
+**Response**: JSON with metadata for dataset/software
+
+## Required BibTeX Fields
+
+### @article (Journal Articles)
+
+**Required**:
+- `author`: Author names
+- `title`: Article title
+- `journal`: Journal name
+- `year`: Publication year
+
+**Optional but recommended**:
+- `volume`: Volume number
+- `number`: Issue number
+- `pages`: Page range (e.g., 123--145)
+- `doi`: Digital Object Identifier
+- `url`: URL if no DOI
+- `month`: Publication month
+
+**Example**:
+```bibtex
+@article{Smith2024,
+  author  = {Smith, John and Doe, Jane},
+  title   = {Novel Approach to Protein Folding},
+  journal = {Nature},
+  year    = {2024},
+  volume  = {625},
+  number  = {8001},
+  pages   = {123--145},
+  doi     = {10.1038/nature12345}
+}
+```
+
+### @book (Books)
+
+**Required**:
+- `author` or `editor`: Author(s) or editor(s)
+- `title`: Book title
+- `publisher`: Publisher name
+- `year`: Publication year
+
+**Optional but recommended**:
+- `edition`: Edition number (if not first)
+- `address`: Publisher location
+- `isbn`: ISBN
+- `url`: URL
+- `series`: Series name
+
+**Example**:
+```bibtex
+@book{Kumar2021,
+  author    = {Kumar, Vinay and Abbas, Abul K. and Aster, Jon C.},
+  title     = {Robbins and Cotran Pathologic Basis of Disease},
+  publisher = {Elsevier},
+  year      = {2021},
+  edition   = {10},
+  isbn      = {978-0-323-53113-9}
+}
+```
+
+### @inproceedings (Conference Papers)
+
+**Required**:
+- `author`: Author names
+- `title`: Paper title
+- `booktitle`: Conference/proceedings name
+- `year`: Year
+
+**Optional but recommended**:
+- `pages`: Page range
+- `organization`: Organizing body
+- `publisher`: Publisher
+- `address`: Conference location
+- `month`: Conference month
+- `doi`: DOI if available
+
+**Example**:
+```bibtex
+@inproceedings{Vaswani2017,
+  author    = {Vaswani, Ashish and Shazeer, Noam and others},
+  title     = {Attention is All You Need},
+  booktitle = {Advances in Neural Information Processing Systems},
+  year      = {2017},
+  pages     = {5998--6008},
+  volume    = {30}
+}
+```
+
+### @incollection (Book Chapters)
+
+**Required**:
+- `author`: Chapter author(s)
+- `title`: Chapter title
+- `booktitle`: Book title
+- `publisher`: Publisher name
+- `year`: Publication year
+
+**Optional but recommended**:
+- `editor`: Book editor(s)
+- `pages`: Chapter page range
+- `chapter`: Chapter number
+- `edition`: Edition
+- `address`: Publisher location
+
+**Example**:
+```bibtex
+@incollection{Brown2020,
+  author    = {Brown, Peter O. and Botstein, David},
+  title     = {Exploring the New World of the Genome with {DNA} Microarrays},
+  booktitle = {DNA Microarrays: A Molecular Cloning Manual},
+  editor    = {Eisen, Michael B. and Brown, Patrick O.},
+  publisher = {Cold Spring Harbor Laboratory Press},
+  year      = {2020},
+  pages     = {1--45}
+}
+```
+
+### @phdthesis (Dissertations)
+
+**Required**:
+- `author`: Author name
+- `title`: Thesis title
+- `school`: Institution
+- `year`: Year
+
+**Optional**:
+- `type`: Type (e.g., "PhD dissertation")
+- `address`: Institution location
+- `month`: Month
+- `url`: URL
+
+**Example**:
+```bibtex
+@phdthesis{Johnson2023,
+  author = {Johnson, Mary L.},
+  title  = {Novel Approaches to Cancer Immunotherapy},
+  school = {Stanford University},
+  year   = {2023},
+  type   = {{PhD} dissertation}
+}
+```
+
+### @misc (Preprints, Software, Datasets)
+
+**Required**:
+- `author`: Author(s)
+- `title`: Title
+- `year`: Year
+
+**For preprints, add**:
+- `howpublished`: Repository (e.g., "bioRxiv")
+- `doi`: Preprint DOI
+- `note`: Preprint ID
+
+**Example (preprint)**:
+```bibtex
+@misc{Zhang2024,
+  author       = {Zhang, Yi and Chen, Li and Wang, Hui},
+  title        = {Novel Therapeutic Targets in Alzheimer's Disease},
+  year         = {2024},
+  howpublished = {bioRxiv},
+  doi          = {10.1101/2024.01.001},
+  note         = {Preprint}
+}
+```
+
+**Example (software)**:
+```bibtex
+@misc{AlphaFold2021,
+  author       = {DeepMind},
+  title        = {{AlphaFold} Protein Structure Database},
+  year         = {2021},
+  howpublished = {Software},
+  url          = {https://alphafold.ebi.ac.uk/},
+  doi          = {10.5281/zenodo.5123456}
+}
+```
+
+## Extraction Workflows
+
+### From DOI
+
+**Best practice** - Most reliable source:
+
+```bash
+# Single DOI
+python scripts/extract_metadata.py --doi 10.1038/s41586-021-03819-2
+
+# Multiple DOIs
+python scripts/extract_metadata.py \
+  --doi 10.1038/nature12345 \
+  --doi 10.1126/science.abc1234 \
+  --output refs.bib
+```
+
+**Process**:
+1. Query CrossRef API with DOI
+2. Parse JSON response
+3. Extract required fields
+4. Determine entry type (@article, @book, etc.)
+5. Format as BibTeX
+6. Validate completeness
+
+### From PMID
+
+**For biomedical literature**:
+
+```bash
+# Single PMID
+python scripts/extract_metadata.py --pmid 34265844
+
+# Multiple PMIDs
+python scripts/extract_metadata.py \
+  --pmid 34265844 \
+  --pmid 28445112 \
+  --output refs.bib
+```
+
+**Process**:
+1. Query PubMed EFetch with PMID
+2. Parse XML response
+3. Extract metadata including MeSH terms
+4. Check for DOI in response
+5. If DOI exists, optionally query CrossRef for additional metadata
+6. Format as BibTeX
+
+### From arXiv ID
+
+**For preprints**:
+
+```bash
+python scripts/extract_metadata.py --arxiv 2103.14030
+```
+
+**Process**:
+1. Query arXiv API with ID
+2. Parse Atom XML response
+3. Check for published version (DOI in response)
+4. If published: Use DOI and CrossRef
+5. If not published: Use preprint metadata
+6. Format as @misc with preprint note
+
+**Important**: Always check if preprint has been published!
+
+### From URL
+
+**When you only have URL**:
+
+```bash
+python scripts/extract_metadata.py \
+  --url "https://www.nature.com/articles/s41586-021-03819-2"
+```
+
+**Process**:
+1. Parse URL to extract identifier
+2. Identify type (DOI, PMID, arXiv)
+3. Extract identifier from URL
+4. Query appropriate API
+5. Format as BibTeX
+
+**URL patterns**:
+```
+# DOI URLs
+https://doi.org/10.1038/nature12345
+https://dx.doi.org/10.1126/science.abc123
+https://www.nature.com/articles/s41586-021-03819-2
+
+# PubMed URLs
+https://pubmed.ncbi.nlm.nih.gov/34265844/
+https://www.ncbi.nlm.nih.gov/pubmed/34265844
+
+# arXiv URLs
+https://arxiv.org/abs/2103.14030
+https://arxiv.org/pdf/2103.14030.pdf
+```
+
+### Batch Processing
+
+**From file with mixed identifiers**:
+
+```bash
+# Create file with one identifier per line
+# identifiers.txt:
+#   10.1038/nature12345
+#   34265844
+#   2103.14030
+#   https://doi.org/10.1126/science.abc123
+
+python scripts/extract_metadata.py \
+  --input identifiers.txt \
+  --output references.bib
+```
+
+**Process**:
+- Script auto-detects identifier type
+- Queries appropriate API
+- Combines all into single BibTeX file
+- Handles errors gracefully
+
+## Special Cases and Edge Cases
+
+### Preprints Later Published
+
+**Issue**: Preprint cited, but journal version now available.
+
+**Solution**:
+1. Check arXiv metadata for DOI field
+2. If DOI present, use published version
+3. Update citation to journal article
+4. Note preprint version in comments if needed
+
+**Example**:
+```bibtex
+% Originally: arXiv:2103.14030
+% Published as:
+@article{Jumper2021,
+  author  = {Jumper, John and Evans, Richard and others},
+  title   = {Highly Accurate Protein Structure Prediction with {AlphaFold}},
+  journal = {Nature},
+  year    = {2021},
+  volume  = {596},
+  pages   = {583--589},
+  doi     = {10.1038/s41586-021-03819-2}
+}
+```
+
+### Multiple Authors (et al.)
+
+**Issue**: Many authors (10+).
+
+**BibTeX practice**:
+- Include all authors if <10
+- Use "and others" for 10+
+- Or list all (journals vary)
+
+**Example**:
+```bibtex
+@article{LargeCollaboration2024,
+  author = {First, Author and Second, Author and Third, Author and others},
+  ...
+}
+```
+
+### Author Name Variations
+
+**Issue**: Authors publish under different name formats.
+
+**Standardization**:
+```
+# Common variations
+John Smith
+John A. Smith
+John Andrew Smith
+J. A. Smith
+Smith, J.
+Smith, J. A.
+
+# BibTeX format (recommended)
+author = {Smith, John A.}
+```
+
+**Extraction preference**:
+1. Use full name if available
+2. Include middle initial if available
+3. Format: Last, First Middle
+
+### No DOI Available
+
+**Issue**: Older papers or books without DOIs.
+
+**Solutions**:
+1. Use PMID if available (biomedical)
+2. Use ISBN for books
+3. Use URL to stable source
+4. Include full publication details
+
+**Example**:
+```bibtex
+@article{OldPaper1995,
+  author  = {Author, Name},
+  title   = {Title Here},
+  journal = {Journal Name},
+  year    = {1995},
+  volume  = {123},
+  pages   = {45--67},
+  url     = {https://stable-url-here},
+  note    = {PMID: 12345678}
+}
+```
+
+### Conference Papers vs Journal Articles
+
+**Issue**: Same work published in both.
+
+**Best practice**:
+- Cite journal version if both available
+- Journal version is archival
+- Conference version for timeliness
+
+**If citing conference**:
+```bibtex
+@inproceedings{Smith2024conf,
+  author    = {Smith, John},
+  title     = {Title},
+  booktitle = {Proceedings of NeurIPS 2024},
+  year      = {2024}
+}
+```
+
+**If citing journal**:
+```bibtex
+@article{Smith2024journal,
+  author  = {Smith, John},
+  title   = {Title},
+  journal = {Journal of Machine Learning Research},
+  year    = {2024}
+}
+```
+
+### Book Chapters vs Edited Collections
+
+**Extract correctly**:
+- Chapter: Use `@incollection`
+- Whole book: Use `@book`
+- Book editor: List in `editor` field
+- Chapter author: List in `author` field
+
+### Datasets and Software
+
+**Use @misc** with appropriate fields:
+
+```bibtex
+@misc{DatasetName2024,
+  author       = {Author, Name},
+  title        = {Dataset Title},
+  year         = {2024},
+  howpublished = {Zenodo},
+  doi          = {10.5281/zenodo.123456},
+  note         = {Version 1.2}
+}
+```
+
+## Validation After Extraction
+
+Always validate extracted metadata:
+
+```bash
+python scripts/validate_citations.py extracted_refs.bib
+```
+
+**Check**:
+- All required fields present
+- DOI resolves correctly
+- Author names formatted consistently
+- Year is reasonable (4 digits)
+- Journal/publisher names correct
+- Page ranges use -- not -
+- Special characters handled properly
+
+## Best Practices
+
+### 1. Prefer DOI When Available
+
+DOIs provide:
+- Permanent identifier
+- Best metadata source
+- Publisher-verified information
+- Resolvable link
+
+### 2. Verify Automatically Extracted Metadata
+
+Spot-check:
+- Author names match publication
+- Title matches (including capitalization)
+- Year is correct
+- Journal name is complete
+
+### 3. Handle Special Characters
+
+**LaTeX special characters**:
+- Protect capitalization: `{AlphaFold}`
+- Handle accents: `M{\"u}ller` or use Unicode
+- Chemical formulas: `H$_2$O` or `\ce{H2O}`
+
+### 4. Use Consistent Citation Keys
+
+**Convention**: `FirstAuthorYEARkeyword`
+```
+Smith2024protein
+Doe2023machine
+Johnson2024cancer
+```
+
+### 5. Include DOI for Modern Papers
+
+All papers published after ~2000 should have DOI:
+```bibtex
+doi = {10.1038/nature12345}
+```
+
+### 6. Document Source
+
+For non-standard sources, add note:
+```bibtex
+note = {Preprint, not peer-reviewed}
+note = {Technical report}
+note = {Dataset accompanying [citation]}
+```
+
+## Summary
+
+Metadata extraction workflow:
+
+1. **Identify**: Determine identifier type (DOI, PMID, arXiv, URL)
+2. **Query**: Use appropriate API (CrossRef, PubMed, arXiv)
+3. **Extract**: Parse response for required fields
+4. **Format**: Create properly formatted BibTeX entry
+5. **Validate**: Check completeness and accuracy
+6. **Verify**: Spot-check critical citations
+
+**Use scripts** to automate:
+- `extract_metadata.py`: Universal extractor
+- `doi_to_bibtex.py`: Quick DOI conversion
+- `validate_citations.py`: Verify accuracy
+
+**Always validate** extracted metadata before final submission!
+
--- a/skills/citation-management/references/pubmed_search.md
+++ b/skills/citation-management/references/pubmed_search.md
@@ -0,0 +1,839 @@
+# PubMed Search Guide
+
+Comprehensive guide to searching PubMed for biomedical and life sciences literature, including MeSH terms, field tags, advanced search strategies, and E-utilities API usage.
+
+## Overview
+
+PubMed is the premier database for biomedical literature:
+- **Coverage**: 35+ million citations
+- **Scope**: Biomedical and life sciences
+- **Sources**: MEDLINE, life science journals, online books
+- **Authority**: Maintained by National Library of Medicine (NLM) / NCBI
+- **Access**: Free, no account required
+- **Updates**: Daily with new citations
+- **Curation**: High-quality metadata, MeSH indexing
+
+## Basic Search
+
+### Simple Keyword Search
+
+PubMed automatically maps terms to MeSH and searches multiple fields:
+
+```
+diabetes
+CRISPR gene editing
+Alzheimer's disease treatment
+cancer immunotherapy
+```
+
+**Automatic Features**:
+- Automatic MeSH mapping
+- Plural/singular variants
+- Abbreviation expansion
+- Spell checking
+
+### Exact Phrase Search
+
+Use quotation marks for exact phrases:
+
+```
+"CRISPR-Cas9"
+"systematic review"
+"randomized controlled trial"
+"machine learning"
+```
+
+## MeSH (Medical Subject Headings)
+
+### What is MeSH?
+
+MeSH is a controlled vocabulary thesaurus for indexing biomedical literature:
+- **Hierarchical structure**: Organized in tree structures
+- **Consistent indexing**: Same concept always tagged the same way
+- **Comprehensive**: Covers diseases, drugs, anatomy, techniques, etc.
+- **Professional curation**: NLM indexers assign MeSH terms
+
+### Finding MeSH Terms
+
+**MeSH Browser**: https://meshb.nlm.nih.gov/search
+
+**Example**:
+```
+Search: "heart attack"
+MeSH term: "Myocardial Infarction"
+```
+
+**In PubMed**:
+1. Search with keyword
+2. Check "MeSH Terms" in left sidebar
+3. Select relevant MeSH terms
+4. Add to search
+
+### Using MeSH in Searches
+
+**Basic MeSH search**:
+```
+"Diabetes Mellitus"[MeSH]
+"CRISPR-Cas Systems"[MeSH]
+"Alzheimer Disease"[MeSH]
+"Neoplasms"[MeSH]
+```
+
+**MeSH with subheadings**:
+```
+"Diabetes Mellitus/drug therapy"[MeSH]
+"Neoplasms/genetics"[MeSH]
+"Heart Failure/prevention and control"[MeSH]
+```
+
+**Common subheadings**:
+- `/drug therapy`: Drug treatment
+- `/diagnosis`: Diagnostic aspects
+- `/genetics`: Genetic aspects
+- `/epidemiology`: Occurrence and distribution
+- `/prevention and control`: Prevention methods
+- `/etiology`: Causes
+- `/surgery`: Surgical treatment
+- `/metabolism`: Metabolic aspects
+
+### MeSH Explosion
+
+By default, MeSH searches include narrower terms (explosion):
+
+```
+"Neoplasms"[MeSH]
+# Includes: Breast Neoplasms, Lung Neoplasms, etc.
+```
+
+**Disable explosion** (exact term only):
+```
+"Neoplasms"[MeSH:NoExp]
+```
+
+### MeSH Major Topic
+
+Search only where MeSH term is a major focus:
+
+```
+"Diabetes Mellitus"[MeSH Major Topic]
+# Only papers where diabetes is main topic
+```
+
+## Field Tags
+
+Field tags specify which part of the record to search.
+
+### Common Field Tags
+
+**Title and Abstract**:
+```
+cancer[Title]                    # In title only
+treatment[Title/Abstract]        # In title or abstract
+"machine learning"[Title/Abstract]
+```
+
+**Author**:
+```
+"Smith J"[Author]
+"Doudna JA"[Author]
+"Collins FS"[Author]
+```
+
+**Author - Full Name**:
+```
+"Smith, John"[Full Author Name]
+```
+
+**Journal**:
+```
+"Nature"[Journal]
+"Science"[Journal]
+"New England Journal of Medicine"[Journal]
+"Nat Commun"[Journal]           # Abbreviated form
+```
+
+**Publication Date**:
+```
+2023[Publication Date]
+2020:2024[Publication Date]      # Date range
+2023/01/01:2023/12/31[Publication Date]
+```
+
+**Date Created**:
+```
+2023[Date - Create]              # When added to PubMed
+```
+
+**Publication Type**:
+```
+"Review"[Publication Type]
+"Clinical Trial"[Publication Type]
+"Meta-Analysis"[Publication Type]
+"Randomized Controlled Trial"[Publication Type]
+```
+
+**Language**:
+```
+English[Language]
+French[Language]
+```
+
+**DOI**:
+```
+10.1038/nature12345[DOI]
+```
+
+**PMID (PubMed ID)**:
+```
+12345678[PMID]
+```
+
+**Article ID**:
+```
+PMC1234567[PMC]                  # PubMed Central ID
+```
+
+### Less Common But Useful Tags
+
+```
+humans[MeSH Terms]               # Only human studies
+animals[MeSH Terms]              # Only animal studies
+"United States"[Place of Publication]
+nih[Grant Number]                # NIH-funded research
+"Female"[Sex]                    # Female subjects
+"Aged, 80 and over"[Age]        # Elderly subjects
+```
+
+## Boolean Operators
+
+Combine search terms with Boolean logic.
+
+### AND
+
+Both terms must be present (default behavior):
+
+```
+diabetes AND treatment
+"CRISPR-Cas9" AND "gene editing"
+cancer AND immunotherapy AND "clinical trial"[Publication Type]
+```
+
+### OR
+
+Either term must be present:
+
+```
+"heart attack" OR "myocardial infarction"
+diabetes OR "diabetes mellitus"
+CRISPR OR Cas9 OR "gene editing"
+```
+
+**Use case**: Synonyms and related terms
+
+### NOT
+
+Exclude terms:
+
+```
+cancer NOT review
+diabetes NOT animal
+"machine learning" NOT "deep learning"
+```
+
+**Caution**: May exclude relevant papers that mention both terms.
+
+### Combining Operators
+
+Use parentheses for complex logic:
+
+```
+(diabetes OR "diabetes mellitus") AND (treatment OR therapy)
+
+("CRISPR" OR "gene editing") AND ("therapeutic" OR "therapy") 
+  AND 2020:2024[Publication Date]
+
+(cancer OR neoplasm) AND (immunotherapy OR "immune checkpoint inhibitor") 
+  AND ("clinical trial"[Publication Type] OR "randomized controlled trial"[Publication Type])
+```
+
+## Advanced Search Builder
+
+**Access**: https://pubmed.ncbi.nlm.nih.gov/advanced/
+
+**Features**:
+- Visual query builder
+- Add multiple query boxes
+- Select field tags from dropdowns
+- Combine with AND/OR/NOT
+- Preview results
+- Shows final query string
+- Save queries
+
+**Workflow**:
+1. Add search terms in separate boxes
+2. Select field tags
+3. Choose Boolean operators
+4. Preview results
+5. Refine as needed
+6. Copy final query string
+7. Use in scripts or save
+
+**Example built query**:
+```
+#1: "Diabetes Mellitus, Type 2"[MeSH]
+#2: "Metformin"[MeSH]
+#3: "Clinical Trial"[Publication Type]
+#4: 2020:2024[Publication Date]
+#5: #1 AND #2 AND #3 AND #4
+```
+
+## Filters and Limits
+
+### Article Types
+
+```
+"Review"[Publication Type]
+"Systematic Review"[Publication Type]
+"Meta-Analysis"[Publication Type]
+"Clinical Trial"[Publication Type]
+"Randomized Controlled Trial"[Publication Type]
+"Case Reports"[Publication Type]
+"Comparative Study"[Publication Type]
+```
+
+### Species
+
+```
+humans[MeSH Terms]
+mice[MeSH Terms]
+rats[MeSH Terms]
+```
+
+### Sex
+
+```
+"Female"[MeSH Terms]
+"Male"[MeSH Terms]
+```
+
+### Age Groups
+
+```
+"Infant"[MeSH Terms]
+"Child"[MeSH Terms]
+"Adolescent"[MeSH Terms]
+"Adult"[MeSH Terms]
+"Aged"[MeSH Terms]
+"Aged, 80 and over"[MeSH Terms]
+```
+
+### Text Availability
+
+```
+free full text[Filter]           # Free full-text available
+```
+
+### Journal Categories
+
+```
+"Journal Article"[Publication Type]
+```
+
+## E-utilities API
+
+NCBI provides programmatic access via E-utilities (Entrez Programming Utilities).
+
+### Overview
+
+**Base URL**: `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/`
+
+**Main Tools**:
+- **ESearch**: Search and retrieve PMIDs
+- **EFetch**: Retrieve full records
+- **ESummary**: Retrieve document summaries
+- **ELink**: Find related articles
+- **EInfo**: Database statistics
+
+**No API key required**, but recommended for:
+- Higher rate limits (10/sec vs 3/sec)
+- Better performance
+- Identify your project
+
+**Get API key**: https://www.ncbi.nlm.nih.gov/account/
+
+### ESearch - Search PubMed
+
+Retrieve PMIDs for a query.
+
+**Endpoint**: `/esearch.fcgi`
+
+**Parameters**:
+- `db`: Database (pubmed)
+- `term`: Search query
+- `retmax`: Maximum results (default 20, max 10000)
+- `retstart`: Starting position (for pagination)
+- `sort`: Sort order (relevance, pub_date, author)
+- `api_key`: Your API key (optional but recommended)
+
+**Example URL**:
+```
+https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?
+  db=pubmed&
+  term=diabetes+AND+treatment&
+  retmax=100&
+  retmode=json&
+  api_key=YOUR_API_KEY
+```
+
+**Response**:
+```json
+{
+  "esearchresult": {
+    "count": "250000",
+    "retmax": "100",
+    "idlist": ["12345678", "12345679", ...]
+  }
+}
+```
+
+### EFetch - Retrieve Records
+
+Get full metadata for PMIDs.
+
+**Endpoint**: `/efetch.fcgi`
+
+**Parameters**:
+- `db`: Database (pubmed)
+- `id`: Comma-separated PMIDs
+- `retmode`: Format (xml, json, text)
+- `rettype`: Type (abstract, medline, full)
+- `api_key`: Your API key
+
+**Example URL**:
+```
+https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?
+  db=pubmed&
+  id=12345678,12345679&
+  retmode=xml&
+  api_key=YOUR_API_KEY
+```
+
+**Response**: XML with complete metadata including:
+- Title
+- Authors (with affiliations)
+- Abstract
+- Journal
+- Publication date
+- DOI
+- PMID, PMCID
+- MeSH terms
+- Keywords
+
+### ESummary - Get Summaries
+
+Lighter-weight alternative to EFetch.
+
+**Example**:
+```
+https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?
+  db=pubmed&
+  id=12345678&
+  retmode=json&
+  api_key=YOUR_API_KEY
+```
+
+**Returns**: Key metadata without full abstract and details.
+
+### ELink - Find Related Articles
+
+Find related articles or links to other databases.
+
+**Example**:
+```
+https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?
+  dbfrom=pubmed&
+  db=pubmed&
+  id=12345678&
+  linkname=pubmed_pubmed_citedin
+```
+
+**Link types**:
+- `pubmed_pubmed`: Related articles
+- `pubmed_pubmed_citedin`: Papers citing this article
+- `pubmed_pmc`: PMC full-text versions
+- `pubmed_protein`: Related protein records
+
+### Rate Limiting
+
+**Without API key**:
+- 3 requests per second
+- Block if exceeded
+
+**With API key**:
+- 10 requests per second
+- Better for programmatic access
+
+**Best practice**:
+```python
+import time
+time.sleep(0.34)  # ~3 requests/second
+# or
+time.sleep(0.11)  # ~10 requests/second with API key
+```
+
+### API Key Usage
+
+**Get API key**:
+1. Create NCBI account: https://www.ncbi.nlm.nih.gov/account/
+2. Settings → API Key Management
+3. Create new API key
+4. Copy key
+
+**Use in requests**:
+```
+&api_key=YOUR_API_KEY_HERE
+```
+
+**Store securely**:
+```bash
+# In environment variable
+export NCBI_API_KEY="your_key_here"
+
+# In script
+import os
+api_key = os.getenv('NCBI_API_KEY')
+```
+
+## Search Strategies
+
+### Comprehensive Systematic Search
+
+For systematic reviews and meta-analyses:
+
+```
+# 1. Identify key concepts
+Concept 1: Diabetes
+Concept 2: Treatment
+Concept 3: Outcomes
+
+# 2. Find MeSH terms and synonyms
+Concept 1: "Diabetes Mellitus"[MeSH] OR diabetes OR diabetic
+Concept 2: "Drug Therapy"[MeSH] OR treatment OR therapy OR medication
+Concept 3: "Treatment Outcome"[MeSH] OR outcome OR efficacy OR effectiveness
+
+# 3. Combine with AND
+("Diabetes Mellitus"[MeSH] OR diabetes OR diabetic) 
+  AND ("Drug Therapy"[MeSH] OR treatment OR therapy OR medication)
+  AND ("Treatment Outcome"[MeSH] OR outcome OR efficacy OR effectiveness)
+
+# 4. Add filters
+AND 2015:2024[Publication Date]
+AND ("Clinical Trial"[Publication Type] OR "Randomized Controlled Trial"[Publication Type])
+AND English[Language]
+AND humans[MeSH Terms]
+```
+
+### Finding Clinical Trials
+
+```
+# Specific disease + clinical trials
+"Alzheimer Disease"[MeSH] 
+  AND ("Clinical Trial"[Publication Type] 
+       OR "Randomized Controlled Trial"[Publication Type])
+  AND 2020:2024[Publication Date]
+
+# Specific drug trials
+"Metformin"[MeSH] 
+  AND "Diabetes Mellitus, Type 2"[MeSH]
+  AND "Randomized Controlled Trial"[Publication Type]
+```
+
+### Finding Reviews
+
+```
+# Systematic reviews on topic
+"CRISPR-Cas Systems"[MeSH] 
+  AND ("Systematic Review"[Publication Type] OR "Meta-Analysis"[Publication Type])
+
+# Reviews in high-impact journals
+cancer immunotherapy 
+  AND "Review"[Publication Type]
+  AND ("Nature"[Journal] OR "Science"[Journal] OR "Cell"[Journal])
+```
+
+### Finding Recent Papers
+
+```
+# Papers from last year
+"machine learning"[Title/Abstract] 
+  AND "drug discovery"[Title/Abstract]
+  AND 2024[Publication Date]
+
+# Recent papers in specific journal
+"CRISPR"[Title/Abstract] 
+  AND "Nature"[Journal]
+  AND 2023:2024[Publication Date]
+```
+
+### Author Tracking
+
+```
+# Specific author's recent work
+"Doudna JA"[Author] AND 2020:2024[Publication Date]
+
+# Author + topic
+"Church GM"[Author] AND "synthetic biology"[Title/Abstract]
+```
+
+### High-Quality Evidence
+
+```
+# Meta-analyses and systematic reviews
+(diabetes OR "diabetes mellitus") 
+  AND (treatment OR therapy)
+  AND ("Meta-Analysis"[Publication Type] OR "Systematic Review"[Publication Type])
+
+# RCTs only
+cancer immunotherapy 
+  AND "Randomized Controlled Trial"[Publication Type]
+  AND 2020:2024[Publication Date]
+```
+
+## Script Integration
+
+### search_pubmed.py Usage
+
+**Basic search**:
+```bash
+python scripts/search_pubmed.py "diabetes treatment"
+```
+
+**With MeSH terms**:
+```bash
+python scripts/search_pubmed.py \
+  --query '"Diabetes Mellitus"[MeSH] AND "Drug Therapy"[MeSH]'
+```
+
+**Date range filter**:
+```bash
+python scripts/search_pubmed.py "CRISPR" \
+  --date-start 2020-01-01 \
+  --date-end 2024-12-31 \
+  --limit 200
+```
+
+**Publication type filter**:
+```bash
+python scripts/search_pubmed.py "cancer immunotherapy" \
+  --publication-types "Clinical Trial,Randomized Controlled Trial" \
+  --limit 100
+```
+
+**Export to BibTeX**:
+```bash
+python scripts/search_pubmed.py "Alzheimer's disease" \
+  --limit 100 \
+  --format bibtex \
+  --output alzheimers.bib
+```
+
+**Complex query from file**:
+```bash
+# Save complex query in query.txt
+cat > query.txt << 'EOF'
+("Diabetes Mellitus, Type 2"[MeSH] OR "diabetes"[Title/Abstract])
+AND ("Metformin"[MeSH] OR "metformin"[Title/Abstract])
+AND "Randomized Controlled Trial"[Publication Type]
+AND 2015:2024[Publication Date]
+AND English[Language]
+EOF
+
+# Run search
+python scripts/search_pubmed.py --query-file query.txt --limit 500
+```
+
+### Batch Searches
+
+```bash
+# Search multiple topics
+TOPICS=("diabetes treatment" "cancer immunotherapy" "CRISPR gene editing")
+
+for topic in "${TOPICS[@]}"; do
+  python scripts/search_pubmed.py "$topic" \
+    --limit 100 \
+    --output "${topic// /_}.json"
+  sleep 1
+done
+```
+
+### Extract Metadata
+
+```bash
+# Search returns PMIDs
+python scripts/search_pubmed.py "topic" --output results.json
+
+# Extract full metadata
+python scripts/extract_metadata.py \
+  --input results.json \
+  --output references.bib
+```
+
+## Tips and Best Practices
+
+### Search Construction
+
+1. **Start with MeSH terms**:
+   - Use MeSH Browser to find correct terms
+   - More precise than keyword search
+   - Captures all papers on topic regardless of terminology
+
+2. **Include text word variants**:
+   ```
+   # Better coverage
+   ("Diabetes Mellitus"[MeSH] OR diabetes OR diabetic)
+   ```
+
+3. **Use field tags appropriately**:
+   - `[MeSH]` for standardized concepts
+   - `[Title/Abstract]` for specific terms
+   - `[Author]` for known authors
+   - `[Journal]` for specific venues
+
+4. **Build incrementally**:
+   ```
+   # Step 1: Basic search
+   diabetes
+   
+   # Step 2: Add specificity
+   "Diabetes Mellitus, Type 2"[MeSH]
+   
+   # Step 3: Add treatment
+   "Diabetes Mellitus, Type 2"[MeSH] AND "Metformin"[MeSH]
+   
+   # Step 4: Add study type
+   "Diabetes Mellitus, Type 2"[MeSH] AND "Metformin"[MeSH] 
+     AND "Clinical Trial"[Publication Type]
+   
+   # Step 5: Add date range
+   ... AND 2020:2024[Publication Date]
+   ```
+
+### Optimizing Results
+
+1. **Too many results**: Add filters
+   - Restrict publication type
+   - Narrow date range
+   - Add more specific MeSH terms
+   - Use Major Topic: `[MeSH Major Topic]`
+
+2. **Too few results**: Broaden search
+   - Remove restrictive filters
+   - Use OR for synonyms
+   - Expand date range
+   - Use MeSH explosion (default)
+
+3. **Irrelevant results**: Refine terms
+   - Use more specific MeSH terms
+   - Add exclusions with NOT
+   - Use Title field instead of all fields
+   - Add MeSH subheadings
+
+### Quality Control
+
+1. **Document search strategy**:
+   - Save exact query string
+   - Record search date
+   - Note number of results
+   - Save filters used
+
+2. **Export systematically**:
+   - Use consistent file naming
+   - Export to JSON for flexibility
+   - Convert to BibTeX as needed
+   - Keep original search results
+
+3. **Validate retrieved citations**:
+   ```bash
+   python scripts/validate_citations.py pubmed_results.bib
+   ```
+
+### Staying Current
+
+1. **Set up search alerts**:
+   - PubMed → Save search
+   - Receive email updates
+   - Daily, weekly, or monthly
+
+2. **Track specific journals**:
+   ```
+   "Nature"[Journal] AND CRISPR[Title]
+   ```
+
+3. **Follow key authors**:
+   ```
+   "Church GM"[Author]
+   ```
+
+## Common Issues and Solutions
+
+### Issue: MeSH Term Not Found
+
+**Solution**: 
+- Check spelling
+- Use MeSH Browser
+- Try related terms
+- Use text word search as fallback
+
+### Issue: Zero Results
+
+**Solution**:
+- Remove filters
+- Check query syntax
+- Use OR for broader search
+- Try synonyms
+
+### Issue: Poor Quality Results
+
+**Solution**:
+- Add publication type filters
+- Restrict to recent years
+- Use MeSH Major Topic
+- Filter by journal quality
+
+### Issue: Duplicates from Different Sources
+
+**Solution**:
+```bash
+python scripts/format_bibtex.py results.bib \
+  --deduplicate \
+  --output clean.bib
+```
+
+### Issue: API Rate Limiting
+
+**Solution**:
+- Get API key (increases limit to 10/sec)
+- Add delays in scripts
+- Process in batches
+- Use off-peak hours
+
+## Summary
+
+PubMed provides authoritative biomedical literature search:
+
+✓ **Curated content**: MeSH indexing, quality control  
+✓ **Precise search**: Field tags, MeSH terms, filters  
+✓ **Programmatic access**: E-utilities API  
+✓ **Free access**: No subscription required  
+✓ **Comprehensive**: 35M+ citations, daily updates  
+
+Key strategies:
+- Use MeSH terms for precise searching
+- Combine with text words for comprehensive coverage
+- Apply appropriate field tags
+- Filter by publication type and date
+- Use E-utilities API for automation
+- Document search strategy for reproducibility
+
+For broader coverage across disciplines, complement with Google Scholar.
+
--- a/skills/citation-management/scripts/doi_to_bibtex.py
+++ b/skills/citation-management/scripts/doi_to_bibtex.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python3
+"""
+DOI to BibTeX Converter
+Quick utility to convert DOIs to BibTeX format using CrossRef API.
+"""
+
+import sys
+import requests
+import argparse
+import time
+import json
+from typing import Optional, List
+
+class DOIConverter:
+    """Convert DOIs to BibTeX entries using CrossRef API."""
+    
+    def __init__(self):
+        self.session = requests.Session()
+        self.session.headers.update({
+            'User-Agent': 'DOIConverter/1.0 (Citation Management Tool; mailto:support@example.com)'
+        })
+    
+    def doi_to_bibtex(self, doi: str) -> Optional[str]:
+        """
+        Convert a single DOI to BibTeX format.
+        
+        Args:
+            doi: Digital Object Identifier
+            
+        Returns:
+            BibTeX string or None if conversion fails
+        """
+        # Clean DOI (remove URL prefix if present)
+        doi = doi.strip()
+        if doi.startswith('https://doi.org/'):
+            doi = doi.replace('https://doi.org/', '')
+        elif doi.startswith('http://doi.org/'):
+            doi = doi.replace('http://doi.org/', '')
+        elif doi.startswith('doi:'):
+            doi = doi.replace('doi:', '')
+        
+        # Request BibTeX from CrossRef content negotiation
+        url = f'https://doi.org/{doi}'
+        headers = {
+            'Accept': 'application/x-bibtex',
+            'User-Agent': 'DOIConverter/1.0 (Citation Management Tool)'
+        }
+        
+        try:
+            response = self.session.get(url, headers=headers, timeout=15)
+            
+            if response.status_code == 200:
+                bibtex = response.text.strip()
+                # CrossRef sometimes returns entries with @data type, convert to @misc
+                if bibtex.startswith('@data{'):
+                    bibtex = bibtex.replace('@data{', '@misc{', 1)
+                return bibtex
+            elif response.status_code == 404:
+                print(f'Error: DOI not found: {doi}', file=sys.stderr)
+                return None
+            else:
+                print(f'Error: Failed to retrieve BibTeX for {doi} (status {response.status_code})', file=sys.stderr)
+                return None
+                
+        except requests.exceptions.Timeout:
+            print(f'Error: Request timeout for DOI: {doi}', file=sys.stderr)
+            return None
+        except requests.exceptions.RequestException as e:
+            print(f'Error: Request failed for {doi}: {e}', file=sys.stderr)
+            return None
+    
+    def convert_multiple(self, dois: List[str], delay: float = 0.5) -> List[str]:
+        """
+        Convert multiple DOIs to BibTeX.
+        
+        Args:
+            dois: List of DOIs
+            delay: Delay between requests (seconds) for rate limiting
+            
+        Returns:
+            List of BibTeX entries (excludes failed conversions)
+        """
+        bibtex_entries = []
+        
+        for i, doi in enumerate(dois):
+            print(f'Converting DOI {i+1}/{len(dois)}: {doi}', file=sys.stderr)
+            bibtex = self.doi_to_bibtex(doi)
+            
+            if bibtex:
+                bibtex_entries.append(bibtex)
+            
+            # Rate limiting
+            if i < len(dois) - 1:  # Don't delay after last request
+                time.sleep(delay)
+        
+        return bibtex_entries
+
+
+def main():
+    """Command-line interface."""
+    parser = argparse.ArgumentParser(
+        description='Convert DOIs to BibTeX format using CrossRef API',
+        epilog='Example: python doi_to_bibtex.py 10.1038/s41586-021-03819-2'
+    )
+    
+    parser.add_argument(
+        'dois',
+        nargs='*',
+        help='DOI(s) to convert (can provide multiple)'
+    )
+    
+    parser.add_argument(
+        '-i', '--input',
+        help='Input file with DOIs (one per line)'
+    )
+    
+    parser.add_argument(
+        '-o', '--output',
+        help='Output file for BibTeX (default: stdout)'
+    )
+    
+    parser.add_argument(
+        '--delay',
+        type=float,
+        default=0.5,
+        help='Delay between requests in seconds (default: 0.5)'
+    )
+    
+    parser.add_argument(
+        '--format',
+        choices=['bibtex', 'json'],
+        default='bibtex',
+        help='Output format (default: bibtex)'
+    )
+    
+    args = parser.parse_args()
+    
+    # Collect DOIs from command line and/or file
+    dois = []
+    
+    if args.dois:
+        dois.extend(args.dois)
+    
+    if args.input:
+        try:
+            with open(args.input, 'r', encoding='utf-8') as f:
+                file_dois = [line.strip() for line in f if line.strip()]
+                dois.extend(file_dois)
+        except FileNotFoundError:
+            print(f'Error: Input file not found: {args.input}', file=sys.stderr)
+            sys.exit(1)
+        except Exception as e:
+            print(f'Error reading input file: {e}', file=sys.stderr)
+            sys.exit(1)
+    
+    if not dois:
+        parser.print_help()
+        sys.exit(1)
+    
+    # Convert DOIs
+    converter = DOIConverter()
+    
+    if len(dois) == 1:
+        bibtex = converter.doi_to_bibtex(dois[0])
+        if bibtex:
+            bibtex_entries = [bibtex]
+        else:
+            sys.exit(1)
+    else:
+        bibtex_entries = converter.convert_multiple(dois, delay=args.delay)
+    
+    if not bibtex_entries:
+        print('Error: No successful conversions', file=sys.stderr)
+        sys.exit(1)
+    
+    # Format output
+    if args.format == 'bibtex':
+        output = '\n\n'.join(bibtex_entries) + '\n'
+    else:  # json
+        output = json.dumps({
+            'count': len(bibtex_entries),
+            'entries': bibtex_entries
+        }, indent=2)
+    
+    # Write output
+    if args.output:
+        try:
+            with open(args.output, 'w', encoding='utf-8') as f:
+                f.write(output)
+            print(f'Successfully wrote {len(bibtex_entries)} entries to {args.output}', file=sys.stderr)
+        except Exception as e:
+            print(f'Error writing output file: {e}', file=sys.stderr)
+            sys.exit(1)
+    else:
+        print(output)
+    
+    # Summary
+    if len(dois) > 1:
+        success_rate = len(bibtex_entries) / len(dois) * 100
+        print(f'\nConverted {len(bibtex_entries)}/{len(dois)} DOIs ({success_rate:.1f}%)', file=sys.stderr)
+
+
+if __name__ == '__main__':
+    main()
--- a/skills/citation-management/scripts/extract_metadata.py
+++ b/skills/citation-management/scripts/extract_metadata.py
@@ -0,0 +1,569 @@
+#!/usr/bin/env python3
+"""
+Metadata Extraction Tool
+Extract citation metadata from DOI, PMID, arXiv ID, or URL using various APIs.
+"""
+
+import sys
+import os
+import requests
+import argparse
+import time
+import re
+import json
+import xml.etree.ElementTree as ET
+from typing import Optional, Dict, List, Tuple
+from urllib.parse import urlparse
+
+class MetadataExtractor:
+    """Extract metadata from various sources and generate BibTeX."""
+    
+    def __init__(self, email: Optional[str] = None):
+        """
+        Initialize extractor.
+        
+        Args:
+            email: Email for Entrez API (recommended for PubMed)
+        """
+        self.session = requests.Session()
+        self.session.headers.update({
+            'User-Agent': 'MetadataExtractor/1.0 (Citation Management Tool)'
+        })
+        self.email = email or os.getenv('NCBI_EMAIL', '')
+    
+    def identify_type(self, identifier: str) -> Tuple[str, str]:
+        """
+        Identify the type of identifier.
+        
+        Args:
+            identifier: DOI, PMID, arXiv ID, or URL
+            
+        Returns:
+            Tuple of (type, cleaned_identifier)
+        """
+        identifier = identifier.strip()
+        
+        # Check if URL
+        if identifier.startswith('http://') or identifier.startswith('https://'):
+            return self._parse_url(identifier)
+        
+        # Check for DOI
+        if identifier.startswith('10.'):
+            return ('doi', identifier)
+        
+        # Check for arXiv ID
+        if re.match(r'^\d{4}\.\d{4,5}(v\d+)?$', identifier):
+            return ('arxiv', identifier)
+        if identifier.startswith('arXiv:'):
+            return ('arxiv', identifier.replace('arXiv:', ''))
+        
+        # Check for PMID (8-digit number typically)
+        if identifier.isdigit() and len(identifier) >= 7:
+            return ('pmid', identifier)
+        
+        # Check for PMCID
+        if identifier.upper().startswith('PMC') and identifier[3:].isdigit():
+            return ('pmcid', identifier.upper())
+        
+        return ('unknown', identifier)
+    
+    def _parse_url(self, url: str) -> Tuple[str, str]:
+        """Parse URL to extract identifier type and value."""
+        parsed = urlparse(url)
+        
+        # DOI URLs
+        if 'doi.org' in parsed.netloc:
+            doi = parsed.path.lstrip('/')
+            return ('doi', doi)
+        
+        # PubMed URLs
+        if 'pubmed.ncbi.nlm.nih.gov' in parsed.netloc or 'ncbi.nlm.nih.gov/pubmed' in url:
+            pmid = re.search(r'/(\d+)', parsed.path)
+            if pmid:
+                return ('pmid', pmid.group(1))
+        
+        # arXiv URLs
+        if 'arxiv.org' in parsed.netloc:
+            arxiv_id = re.search(r'/abs/(\d{4}\.\d{4,5})', parsed.path)
+            if arxiv_id:
+                return ('arxiv', arxiv_id.group(1))
+        
+        # Nature, Science, Cell, etc. - try to extract DOI from URL
+        doi_match = re.search(r'10\.\d{4,}/[^\s/]+', url)
+        if doi_match:
+            return ('doi', doi_match.group())
+        
+        return ('url', url)
+    
+    def extract_from_doi(self, doi: str) -> Optional[Dict]:
+        """
+        Extract metadata from DOI using CrossRef API.
+        
+        Args:
+            doi: Digital Object Identifier
+            
+        Returns:
+            Metadata dictionary or None
+        """
+        url = f'https://api.crossref.org/works/{doi}'
+        
+        try:
+            response = self.session.get(url, timeout=15)
+            
+            if response.status_code == 200:
+                data = response.json()
+                message = data.get('message', {})
+                
+                metadata = {
+                    'type': 'doi',
+                    'entry_type': self._crossref_type_to_bibtex(message.get('type')),
+                    'doi': doi,
+                    'title': message.get('title', [''])[0],
+                    'authors': self._format_authors_crossref(message.get('author', [])),
+                    'year': self._extract_year_crossref(message),
+                    'journal': message.get('container-title', [''])[0] if message.get('container-title') else '',
+                    'volume': str(message.get('volume', '')) if message.get('volume') else '',
+                    'issue': str(message.get('issue', '')) if message.get('issue') else '',
+                    'pages': message.get('page', ''),
+                    'publisher': message.get('publisher', ''),
+                    'url': f'https://doi.org/{doi}'
+                }
+                
+                return metadata
+            else:
+                print(f'Error: CrossRef API returned status {response.status_code} for DOI: {doi}', file=sys.stderr)
+                return None
+                
+        except Exception as e:
+            print(f'Error extracting metadata from DOI {doi}: {e}', file=sys.stderr)
+            return None
+    
+    def extract_from_pmid(self, pmid: str) -> Optional[Dict]:
+        """
+        Extract metadata from PMID using PubMed E-utilities.
+        
+        Args:
+            pmid: PubMed ID
+            
+        Returns:
+            Metadata dictionary or None
+        """
+        url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
+        params = {
+            'db': 'pubmed',
+            'id': pmid,
+            'retmode': 'xml',
+            'rettype': 'abstract'
+        }
+        
+        if self.email:
+            params['email'] = self.email
+        
+        api_key = os.getenv('NCBI_API_KEY')
+        if api_key:
+            params['api_key'] = api_key
+        
+        try:
+            response = self.session.get(url, params=params, timeout=15)
+            
+            if response.status_code == 200:
+                root = ET.fromstring(response.content)
+                article = root.find('.//PubmedArticle')
+                
+                if article is None:
+                    print(f'Error: No article found for PMID: {pmid}', file=sys.stderr)
+                    return None
+                
+                # Extract metadata from XML
+                medline_citation = article.find('.//MedlineCitation')
+                article_elem = medline_citation.find('.//Article')
+                journal = article_elem.find('.//Journal')
+                
+                # Get DOI if available
+                doi = None
+                article_ids = article.findall('.//ArticleId')
+                for article_id in article_ids:
+                    if article_id.get('IdType') == 'doi':
+                        doi = article_id.text
+                        break
+                
+                metadata = {
+                    'type': 'pmid',
+                    'entry_type': 'article',
+                    'pmid': pmid,
+                    'title': article_elem.findtext('.//ArticleTitle', ''),
+                    'authors': self._format_authors_pubmed(article_elem.findall('.//Author')),
+                    'year': self._extract_year_pubmed(article_elem),
+                    'journal': journal.findtext('.//Title', ''),
+                    'volume': journal.findtext('.//JournalIssue/Volume', ''),
+                    'issue': journal.findtext('.//JournalIssue/Issue', ''),
+                    'pages': article_elem.findtext('.//Pagination/MedlinePgn', ''),
+                    'doi': doi
+                }
+                
+                return metadata
+            else:
+                print(f'Error: PubMed API returned status {response.status_code} for PMID: {pmid}', file=sys.stderr)
+                return None
+                
+        except Exception as e:
+            print(f'Error extracting metadata from PMID {pmid}: {e}', file=sys.stderr)
+            return None
+    
+    def extract_from_arxiv(self, arxiv_id: str) -> Optional[Dict]:
+        """
+        Extract metadata from arXiv ID using arXiv API.
+        
+        Args:
+            arxiv_id: arXiv identifier
+            
+        Returns:
+            Metadata dictionary or None
+        """
+        url = 'http://export.arxiv.org/api/query'
+        params = {
+            'id_list': arxiv_id,
+            'max_results': 1
+        }
+        
+        try:
+            response = self.session.get(url, params=params, timeout=15)
+            
+            if response.status_code == 200:
+                # Parse Atom XML
+                root = ET.fromstring(response.content)
+                ns = {'atom': 'http://www.w3.org/2005/Atom', 'arxiv': 'http://arxiv.org/schemas/atom'}
+                
+                entry = root.find('atom:entry', ns)
+                if entry is None:
+                    print(f'Error: No entry found for arXiv ID: {arxiv_id}', file=sys.stderr)
+                    return None
+                
+                # Extract DOI if published
+                doi_elem = entry.find('arxiv:doi', ns)
+                doi = doi_elem.text if doi_elem is not None else None
+                
+                # Extract journal reference if published
+                journal_ref_elem = entry.find('arxiv:journal_ref', ns)
+                journal_ref = journal_ref_elem.text if journal_ref_elem is not None else None
+                
+                # Get publication date
+                published = entry.findtext('atom:published', '', ns)
+                year = published[:4] if published else ''
+                
+                # Get authors
+                authors = []
+                for author in entry.findall('atom:author', ns):
+                    name = author.findtext('atom:name', '', ns)
+                    if name:
+                        authors.append(name)
+                
+                metadata = {
+                    'type': 'arxiv',
+                    'entry_type': 'misc' if not doi else 'article',
+                    'arxiv_id': arxiv_id,
+                    'title': entry.findtext('atom:title', '', ns).strip().replace('\n', ' '),
+                    'authors': ' and '.join(authors),
+                    'year': year,
+                    'doi': doi,
+                    'journal_ref': journal_ref,
+                    'abstract': entry.findtext('atom:summary', '', ns).strip().replace('\n', ' '),
+                    'url': f'https://arxiv.org/abs/{arxiv_id}'
+                }
+                
+                return metadata
+            else:
+                print(f'Error: arXiv API returned status {response.status_code} for ID: {arxiv_id}', file=sys.stderr)
+                return None
+                
+        except Exception as e:
+            print(f'Error extracting metadata from arXiv {arxiv_id}: {e}', file=sys.stderr)
+            return None
+    
+    def metadata_to_bibtex(self, metadata: Dict, citation_key: Optional[str] = None) -> str:
+        """
+        Convert metadata dictionary to BibTeX format.
+        
+        Args:
+            metadata: Metadata dictionary
+            citation_key: Optional custom citation key
+            
+        Returns:
+            BibTeX string
+        """
+        if not citation_key:
+            citation_key = self._generate_citation_key(metadata)
+        
+        entry_type = metadata.get('entry_type', 'misc')
+        
+        # Build BibTeX entry
+        lines = [f'@{entry_type}{{{citation_key},']
+        
+        # Add fields
+        if metadata.get('authors'):
+            lines.append(f'  author  = {{{metadata["authors"]}}},')
+        
+        if metadata.get('title'):
+            # Protect capitalization
+            title = self._protect_title(metadata['title'])
+            lines.append(f'  title   = {{{title}}},')
+        
+        if entry_type == 'article' and metadata.get('journal'):
+            lines.append(f'  journal = {{{metadata["journal"]}}},')
+        elif entry_type == 'misc' and metadata.get('type') == 'arxiv':
+            lines.append(f'  howpublished = {{arXiv}},')
+        
+        if metadata.get('year'):
+            lines.append(f'  year    = {{{metadata["year"]}}},')
+        
+        if metadata.get('volume'):
+            lines.append(f'  volume  = {{{metadata["volume"]}}},')
+        
+        if metadata.get('issue'):
+            lines.append(f'  number  = {{{metadata["issue"]}}},')
+        
+        if metadata.get('pages'):
+            pages = metadata['pages'].replace('-', '--')  # En-dash
+            lines.append(f'  pages   = {{{pages}}},')
+        
+        if metadata.get('doi'):
+            lines.append(f'  doi     = {{{metadata["doi"]}}},')
+        elif metadata.get('url'):
+            lines.append(f'  url     = {{{metadata["url"]}}},')
+        
+        if metadata.get('pmid'):
+            lines.append(f'  note    = {{PMID: {metadata["pmid"]}}},')
+        
+        if metadata.get('type') == 'arxiv' and not metadata.get('doi'):
+            lines.append(f'  note    = {{Preprint}},')
+        
+        # Remove trailing comma from last field
+        if lines[-1].endswith(','):
+            lines[-1] = lines[-1][:-1]
+        
+        lines.append('}')
+        
+        return '\n'.join(lines)
+    
+    def _crossref_type_to_bibtex(self, crossref_type: str) -> str:
+        """Map CrossRef type to BibTeX entry type."""
+        type_map = {
+            'journal-article': 'article',
+            'book': 'book',
+            'book-chapter': 'incollection',
+            'proceedings-article': 'inproceedings',
+            'posted-content': 'misc',
+            'dataset': 'misc',
+            'report': 'techreport'
+        }
+        return type_map.get(crossref_type, 'misc')
+    
+    def _format_authors_crossref(self, authors: List[Dict]) -> str:
+        """Format author list from CrossRef data."""
+        if not authors:
+            return ''
+        
+        formatted = []
+        for author in authors:
+            given = author.get('given', '')
+            family = author.get('family', '')
+            if family:
+                if given:
+                    formatted.append(f'{family}, {given}')
+                else:
+                    formatted.append(family)
+        
+        return ' and '.join(formatted)
+    
+    def _format_authors_pubmed(self, authors: List) -> str:
+        """Format author list from PubMed XML."""
+        formatted = []
+        for author in authors:
+            last_name = author.findtext('.//LastName', '')
+            fore_name = author.findtext('.//ForeName', '')
+            if last_name:
+                if fore_name:
+                    formatted.append(f'{last_name}, {fore_name}')
+                else:
+                    formatted.append(last_name)
+        
+        return ' and '.join(formatted)
+    
+    def _extract_year_crossref(self, message: Dict) -> str:
+        """Extract year from CrossRef message."""
+        # Try published-print first, then published-online
+        date_parts = message.get('published-print', {}).get('date-parts', [[]])
+        if not date_parts or not date_parts[0]:
+            date_parts = message.get('published-online', {}).get('date-parts', [[]])
+        
+        if date_parts and date_parts[0]:
+            return str(date_parts[0][0])
+        return ''
+    
+    def _extract_year_pubmed(self, article: ET.Element) -> str:
+        """Extract year from PubMed XML."""
+        year = article.findtext('.//Journal/JournalIssue/PubDate/Year', '')
+        if not year:
+            medline_date = article.findtext('.//Journal/JournalIssue/PubDate/MedlineDate', '')
+            if medline_date:
+                year_match = re.search(r'\d{4}', medline_date)
+                if year_match:
+                    year = year_match.group()
+        return year
+    
+    def _generate_citation_key(self, metadata: Dict) -> str:
+        """Generate a citation key from metadata."""
+        # Get first author last name
+        authors = metadata.get('authors', '')
+        if authors:
+            first_author = authors.split(' and ')[0]
+            if ',' in first_author:
+                last_name = first_author.split(',')[0].strip()
+            else:
+                last_name = first_author.split()[-1] if first_author else 'Unknown'
+        else:
+            last_name = 'Unknown'
+        
+        # Get year
+        year = metadata.get('year', '').strip()
+        if not year:
+            year = 'XXXX'
+        
+        # Clean last name (remove special characters)
+        last_name = re.sub(r'[^a-zA-Z]', '', last_name)
+        
+        # Get keyword from title
+        title = metadata.get('title', '')
+        words = re.findall(r'\b[a-zA-Z]{4,}\b', title)
+        keyword = words[0].lower() if words else 'paper'
+        
+        return f'{last_name}{year}{keyword}'
+    
+    def _protect_title(self, title: str) -> str:
+        """Protect capitalization in title for BibTeX."""
+        # Protect common acronyms and proper nouns
+        protected_words = [
+            'DNA', 'RNA', 'CRISPR', 'COVID', 'HIV', 'AIDS', 'AlphaFold',
+            'Python', 'AI', 'ML', 'GPU', 'CPU', 'USA', 'UK', 'EU'
+        ]
+        
+        for word in protected_words:
+            title = re.sub(rf'\b{word}\b', f'{{{word}}}', title, flags=re.IGNORECASE)
+        
+        return title
+    
+    def extract(self, identifier: str) -> Optional[str]:
+        """
+        Extract metadata and return BibTeX.
+        
+        Args:
+            identifier: DOI, PMID, arXiv ID, or URL
+            
+        Returns:
+            BibTeX string or None
+        """
+        id_type, clean_id = self.identify_type(identifier)
+        
+        print(f'Identified as {id_type}: {clean_id}', file=sys.stderr)
+        
+        metadata = None
+        
+        if id_type == 'doi':
+            metadata = self.extract_from_doi(clean_id)
+        elif id_type == 'pmid':
+            metadata = self.extract_from_pmid(clean_id)
+        elif id_type == 'arxiv':
+            metadata = self.extract_from_arxiv(clean_id)
+        else:
+            print(f'Error: Unknown identifier type: {identifier}', file=sys.stderr)
+            return None
+        
+        if metadata:
+            return self.metadata_to_bibtex(metadata)
+        else:
+            return None
+
+
+def main():
+    """Command-line interface."""
+    parser = argparse.ArgumentParser(
+        description='Extract citation metadata from DOI, PMID, arXiv ID, or URL',
+        epilog='Example: python extract_metadata.py --doi 10.1038/s41586-021-03819-2'
+    )
+    
+    parser.add_argument('--doi', help='Digital Object Identifier')
+    parser.add_argument('--pmid', help='PubMed ID')
+    parser.add_argument('--arxiv', help='arXiv ID')
+    parser.add_argument('--url', help='URL to article')
+    parser.add_argument('-i', '--input', help='Input file with identifiers (one per line)')
+    parser.add_argument('-o', '--output', help='Output file for BibTeX (default: stdout)')
+    parser.add_argument('--format', choices=['bibtex', 'json'], default='bibtex', help='Output format')
+    parser.add_argument('--email', help='Email for NCBI E-utilities (recommended)')
+    
+    args = parser.parse_args()
+    
+    # Collect identifiers
+    identifiers = []
+    if args.doi:
+        identifiers.append(args.doi)
+    if args.pmid:
+        identifiers.append(args.pmid)
+    if args.arxiv:
+        identifiers.append(args.arxiv)
+    if args.url:
+        identifiers.append(args.url)
+    
+    if args.input:
+        try:
+            with open(args.input, 'r', encoding='utf-8') as f:
+                file_ids = [line.strip() for line in f if line.strip()]
+                identifiers.extend(file_ids)
+        except Exception as e:
+            print(f'Error reading input file: {e}', file=sys.stderr)
+            sys.exit(1)
+    
+    if not identifiers:
+        parser.print_help()
+        sys.exit(1)
+    
+    # Extract metadata
+    extractor = MetadataExtractor(email=args.email)
+    bibtex_entries = []
+    
+    for i, identifier in enumerate(identifiers):
+        print(f'\nProcessing {i+1}/{len(identifiers)}...', file=sys.stderr)
+        bibtex = extractor.extract(identifier)
+        if bibtex:
+            bibtex_entries.append(bibtex)
+        
+        # Rate limiting
+        if i < len(identifiers) - 1:
+            time.sleep(0.5)
+    
+    if not bibtex_entries:
+        print('Error: No successful extractions', file=sys.stderr)
+        sys.exit(1)
+    
+    # Format output
+    if args.format == 'bibtex':
+        output = '\n\n'.join(bibtex_entries) + '\n'
+    else:  # json
+        output = json.dumps({
+            'count': len(bibtex_entries),
+            'entries': bibtex_entries
+        }, indent=2)
+    
+    # Write output
+    if args.output:
+        with open(args.output, 'w', encoding='utf-8') as f:
+            f.write(output)
+        print(f'\nSuccessfully wrote {len(bibtex_entries)} entries to {args.output}', file=sys.stderr)
+    else:
+        print(output)
+    
+    print(f'\nExtracted {len(bibtex_entries)}/{len(identifiers)} entries', file=sys.stderr)
+
+
+if __name__ == '__main__':
+    main()
+
--- a/skills/citation-management/scripts/format_bibtex.py
+++ b/skills/citation-management/scripts/format_bibtex.py
@@ -0,0 +1,349 @@
+#!/usr/bin/env python3
+"""
+BibTeX Formatter and Cleaner
+Format, clean, sort, and deduplicate BibTeX files.
+"""
+
+import sys
+import re
+import argparse
+from typing import List, Dict, Tuple
+from collections import OrderedDict
+
+class BibTeXFormatter:
+    """Format and clean BibTeX entries."""
+    
+    def __init__(self):
+        # Standard field order for readability
+        self.field_order = [
+            'author', 'editor', 'title', 'booktitle', 'journal',
+            'year', 'month', 'volume', 'number', 'pages',
+            'publisher', 'address', 'edition', 'series',
+            'school', 'institution', 'organization',
+            'howpublished', 'doi', 'url', 'isbn', 'issn',
+            'note', 'abstract', 'keywords'
+        ]
+    
+    def parse_bibtex_file(self, filepath: str) -> List[Dict]:
+        """
+        Parse BibTeX file and extract entries.
+        
+        Args:
+            filepath: Path to BibTeX file
+            
+        Returns:
+            List of entry dictionaries
+        """
+        try:
+            with open(filepath, 'r', encoding='utf-8') as f:
+                content = f.read()
+        except Exception as e:
+            print(f'Error reading file: {e}', file=sys.stderr)
+            return []
+        
+        entries = []
+        
+        # Match BibTeX entries
+        pattern = r'@(\w+)\s*\{\s*([^,\s]+)\s*,(.*?)\n\}'
+        matches = re.finditer(pattern, content, re.DOTALL | re.IGNORECASE)
+        
+        for match in matches:
+            entry_type = match.group(1).lower()
+            citation_key = match.group(2).strip()
+            fields_text = match.group(3)
+            
+            # Parse fields
+            fields = OrderedDict()
+            field_pattern = r'(\w+)\s*=\s*\{([^}]*)\}|(\w+)\s*=\s*"([^"]*)"'
+            field_matches = re.finditer(field_pattern, fields_text)
+            
+            for field_match in field_matches:
+                if field_match.group(1):
+                    field_name = field_match.group(1).lower()
+                    field_value = field_match.group(2)
+                else:
+                    field_name = field_match.group(3).lower()
+                    field_value = field_match.group(4)
+                
+                fields[field_name] = field_value.strip()
+            
+            entries.append({
+                'type': entry_type,
+                'key': citation_key,
+                'fields': fields
+            })
+        
+        return entries
+    
+    def format_entry(self, entry: Dict) -> str:
+        """
+        Format a single BibTeX entry.
+        
+        Args:
+            entry: Entry dictionary
+            
+        Returns:
+            Formatted BibTeX string
+        """
+        lines = [f'@{entry["type"]}{{{entry["key"]},']
+        
+        # Order fields according to standard order
+        ordered_fields = OrderedDict()
+        
+        # Add fields in standard order
+        for field_name in self.field_order:
+            if field_name in entry['fields']:
+                ordered_fields[field_name] = entry['fields'][field_name]
+        
+        # Add any remaining fields
+        for field_name, field_value in entry['fields'].items():
+            if field_name not in ordered_fields:
+                ordered_fields[field_name] = field_value
+        
+        # Format each field
+        max_field_len = max(len(f) for f in ordered_fields.keys()) if ordered_fields else 0
+        
+        for field_name, field_value in ordered_fields.items():
+            # Pad field name for alignment
+            padded_field = field_name.ljust(max_field_len)
+            lines.append(f'  {padded_field} = {{{field_value}}},')
+        
+        # Remove trailing comma from last field
+        if lines[-1].endswith(','):
+            lines[-1] = lines[-1][:-1]
+        
+        lines.append('}')
+        
+        return '\n'.join(lines)
+    
+    def fix_common_issues(self, entry: Dict) -> Dict:
+        """
+        Fix common formatting issues in entry.
+        
+        Args:
+            entry: Entry dictionary
+            
+        Returns:
+            Fixed entry dictionary
+        """
+        fixed = entry.copy()
+        fields = fixed['fields'].copy()
+        
+        # Fix page ranges (single hyphen to double hyphen)
+        if 'pages' in fields:
+            pages = fields['pages']
+            # Replace single hyphen with double hyphen if it's a range
+            if re.search(r'\d-\d', pages) and '--' not in pages:
+                pages = re.sub(r'(\d)-(\d)', r'\1--\2', pages)
+                fields['pages'] = pages
+        
+        # Remove "pp." from pages
+        if 'pages' in fields:
+            pages = fields['pages']
+            pages = re.sub(r'^pp\.\s*', '', pages, flags=re.IGNORECASE)
+            fields['pages'] = pages
+        
+        # Fix DOI (remove URL prefix if present)
+        if 'doi' in fields:
+            doi = fields['doi']
+            doi = doi.replace('https://doi.org/', '')
+            doi = doi.replace('http://doi.org/', '')
+            doi = doi.replace('doi:', '')
+            fields['doi'] = doi
+        
+        # Fix author separators (semicolon or ampersand to 'and')
+        if 'author' in fields:
+            author = fields['author']
+            author = author.replace(';', ' and')
+            author = author.replace(' & ', ' and ')
+            # Clean up multiple 'and's
+            author = re.sub(r'\s+and\s+and\s+', ' and ', author)
+            fields['author'] = author
+        
+        fixed['fields'] = fields
+        return fixed
+    
+    def deduplicate_entries(self, entries: List[Dict]) -> List[Dict]:
+        """
+        Remove duplicate entries based on DOI or citation key.
+        
+        Args:
+            entries: List of entry dictionaries
+            
+        Returns:
+            List of unique entries
+        """
+        seen_dois = set()
+        seen_keys = set()
+        unique_entries = []
+        
+        for entry in entries:
+            doi = entry['fields'].get('doi', '').strip()
+            key = entry['key']
+            
+            # Check DOI first (more reliable)
+            if doi:
+                if doi in seen_dois:
+                    print(f'Duplicate DOI found: {doi} (skipping {key})', file=sys.stderr)
+                    continue
+                seen_dois.add(doi)
+            
+            # Check citation key
+            if key in seen_keys:
+                print(f'Duplicate citation key found: {key} (skipping)', file=sys.stderr)
+                continue
+            seen_keys.add(key)
+            
+            unique_entries.append(entry)
+        
+        return unique_entries
+    
+    def sort_entries(self, entries: List[Dict], sort_by: str = 'key', descending: bool = False) -> List[Dict]:
+        """
+        Sort entries by specified field.
+        
+        Args:
+            entries: List of entry dictionaries
+            sort_by: Field to sort by ('key', 'year', 'author', 'title')
+            descending: Sort in descending order
+            
+        Returns:
+            Sorted list of entries
+        """
+        def get_sort_key(entry: Dict) -> str:
+            if sort_by == 'key':
+                return entry['key'].lower()
+            elif sort_by == 'year':
+                year = entry['fields'].get('year', '9999')
+                return year
+            elif sort_by == 'author':
+                author = entry['fields'].get('author', 'ZZZ')
+                # Get last name of first author
+                if ',' in author:
+                    return author.split(',')[0].lower()
+                else:
+                    return author.split()[0].lower() if author else 'zzz'
+            elif sort_by == 'title':
+                return entry['fields'].get('title', '').lower()
+            else:
+                return entry['key'].lower()
+        
+        return sorted(entries, key=get_sort_key, reverse=descending)
+    
+    def format_file(self, filepath: str, output: str = None,
+                   deduplicate: bool = False, sort_by: str = None,
+                   descending: bool = False, fix_issues: bool = True) -> None:
+        """
+        Format entire BibTeX file.
+        
+        Args:
+            filepath: Input BibTeX file
+            output: Output file (None for in-place)
+            deduplicate: Remove duplicates
+            sort_by: Field to sort by
+            descending: Sort in descending order
+            fix_issues: Fix common formatting issues
+        """
+        print(f'Parsing {filepath}...', file=sys.stderr)
+        entries = self.parse_bibtex_file(filepath)
+        
+        if not entries:
+            print('No entries found', file=sys.stderr)
+            return
+        
+        print(f'Found {len(entries)} entries', file=sys.stderr)
+        
+        # Fix common issues
+        if fix_issues:
+            print('Fixing common issues...', file=sys.stderr)
+            entries = [self.fix_common_issues(e) for e in entries]
+        
+        # Deduplicate
+        if deduplicate:
+            print('Removing duplicates...', file=sys.stderr)
+            original_count = len(entries)
+            entries = self.deduplicate_entries(entries)
+            removed = original_count - len(entries)
+            if removed > 0:
+                print(f'Removed {removed} duplicate(s)', file=sys.stderr)
+        
+        # Sort
+        if sort_by:
+            print(f'Sorting by {sort_by}...', file=sys.stderr)
+            entries = self.sort_entries(entries, sort_by, descending)
+        
+        # Format entries
+        print('Formatting entries...', file=sys.stderr)
+        formatted_entries = [self.format_entry(e) for e in entries]
+        
+        # Write output
+        output_content = '\n\n'.join(formatted_entries) + '\n'
+        
+        output_file = output or filepath
+        try:
+            with open(output_file, 'w', encoding='utf-8') as f:
+                f.write(output_content)
+            print(f'Successfully wrote {len(entries)} entries to {output_file}', file=sys.stderr)
+        except Exception as e:
+            print(f'Error writing file: {e}', file=sys.stderr)
+            sys.exit(1)
+
+
+def main():
+    """Command-line interface."""
+    parser = argparse.ArgumentParser(
+        description='Format, clean, sort, and deduplicate BibTeX files',
+        epilog='Example: python format_bibtex.py references.bib --deduplicate --sort year'
+    )
+    
+    parser.add_argument(
+        'file',
+        help='BibTeX file to format'
+    )
+    
+    parser.add_argument(
+        '-o', '--output',
+        help='Output file (default: overwrite input file)'
+    )
+    
+    parser.add_argument(
+        '--deduplicate',
+        action='store_true',
+        help='Remove duplicate entries'
+    )
+    
+    parser.add_argument(
+        '--sort',
+        choices=['key', 'year', 'author', 'title'],
+        help='Sort entries by field'
+    )
+    
+    parser.add_argument(
+        '--descending',
+        action='store_true',
+        help='Sort in descending order'
+    )
+    
+    parser.add_argument(
+        '--no-fix',
+        action='store_true',
+        help='Do not fix common issues'
+    )
+    
+    args = parser.parse_args()
+    
+    # Format file
+    formatter = BibTeXFormatter()
+    formatter.format_file(
+        args.file,
+        output=args.output,
+        deduplicate=args.deduplicate,
+        sort_by=args.sort,
+        descending=args.descending,
+        fix_issues=not args.no_fix
+    )
+
+
+if __name__ == '__main__':
+    main()
+
--- a/skills/citation-management/scripts/search_google_scholar.py
+++ b/skills/citation-management/scripts/search_google_scholar.py
@@ -0,0 +1,282 @@
+#!/usr/bin/env python3
+"""
+Google Scholar Search Tool
+Search Google Scholar and export results.
+
+Note: This script requires the 'scholarly' library.
+Install with: pip install scholarly
+"""
+
+import sys
+import argparse
+import json
+import time
+import random
+from typing import List, Dict, Optional
+
+try:
+    from scholarly import scholarly, ProxyGenerator
+    SCHOLARLY_AVAILABLE = True
+except ImportError:
+    SCHOLARLY_AVAILABLE = False
+    print('Warning: scholarly library not installed. Install with: pip install scholarly', file=sys.stderr)
+
+class GoogleScholarSearcher:
+    """Search Google Scholar using scholarly library."""
+    
+    def __init__(self, use_proxy: bool = False):
+        """
+        Initialize searcher.
+        
+        Args:
+            use_proxy: Use free proxy (helps avoid rate limiting)
+        """
+        if not SCHOLARLY_AVAILABLE:
+            raise ImportError('scholarly library required. Install with: pip install scholarly')
+        
+        # Setup proxy if requested
+        if use_proxy:
+            try:
+                pg = ProxyGenerator()
+                pg.FreeProxies()
+                scholarly.use_proxy(pg)
+                print('Using free proxy', file=sys.stderr)
+            except Exception as e:
+                print(f'Warning: Could not setup proxy: {e}', file=sys.stderr)
+    
+    def search(self, query: str, max_results: int = 50,
+               year_start: Optional[int] = None, year_end: Optional[int] = None,
+               sort_by: str = 'relevance') -> List[Dict]:
+        """
+        Search Google Scholar.
+        
+        Args:
+            query: Search query
+            max_results: Maximum number of results
+            year_start: Start year filter
+            year_end: End year filter
+            sort_by: Sort order ('relevance' or 'citations')
+            
+        Returns:
+            List of result dictionaries
+        """
+        if not SCHOLARLY_AVAILABLE:
+            print('Error: scholarly library not installed', file=sys.stderr)
+            return []
+        
+        print(f'Searching Google Scholar: {query}', file=sys.stderr)
+        print(f'Max results: {max_results}', file=sys.stderr)
+        
+        results = []
+        
+        try:
+            # Perform search
+            search_query = scholarly.search_pubs(query)
+            
+            for i, result in enumerate(search_query):
+                if i >= max_results:
+                    break
+                
+                print(f'Retrieved {i+1}/{max_results}', file=sys.stderr)
+                
+                # Extract metadata
+                metadata = {
+                    'title': result.get('bib', {}).get('title', ''),
+                    'authors': ', '.join(result.get('bib', {}).get('author', [])),
+                    'year': result.get('bib', {}).get('pub_year', ''),
+                    'venue': result.get('bib', {}).get('venue', ''),
+                    'abstract': result.get('bib', {}).get('abstract', ''),
+                    'citations': result.get('num_citations', 0),
+                    'url': result.get('pub_url', ''),
+                    'eprint_url': result.get('eprint_url', ''),
+                }
+                
+                # Filter by year
+                if year_start or year_end:
+                    try:
+                        pub_year = int(metadata['year']) if metadata['year'] else 0
+                        if year_start and pub_year < year_start:
+                            continue
+                        if year_end and pub_year > year_end:
+                            continue
+                    except ValueError:
+                        pass
+                
+                results.append(metadata)
+                
+                # Rate limiting to avoid blocking
+                time.sleep(random.uniform(2, 5))
+            
+        except Exception as e:
+            print(f'Error during search: {e}', file=sys.stderr)
+        
+        # Sort if requested
+        if sort_by == 'citations' and results:
+            results.sort(key=lambda x: x.get('citations', 0), reverse=True)
+        
+        return results
+    
+    def metadata_to_bibtex(self, metadata: Dict) -> str:
+        """Convert metadata to BibTeX format."""
+        # Generate citation key
+        if metadata.get('authors'):
+            first_author = metadata['authors'].split(',')[0].strip()
+            last_name = first_author.split()[-1] if first_author else 'Unknown'
+        else:
+            last_name = 'Unknown'
+        
+        year = metadata.get('year', 'XXXX')
+        
+        # Get keyword from title
+        import re
+        title = metadata.get('title', '')
+        words = re.findall(r'\b[a-zA-Z]{4,}\b', title)
+        keyword = words[0].lower() if words else 'paper'
+        
+        citation_key = f'{last_name}{year}{keyword}'
+        
+        # Determine entry type (guess based on venue)
+        venue = metadata.get('venue', '').lower()
+        if 'proceedings' in venue or 'conference' in venue:
+            entry_type = 'inproceedings'
+            venue_field = 'booktitle'
+        else:
+            entry_type = 'article'
+            venue_field = 'journal'
+        
+        # Build BibTeX
+        lines = [f'@{entry_type}{{{citation_key},']
+        
+        # Convert authors format
+        if metadata.get('authors'):
+            authors = metadata['authors'].replace(',', ' and')
+            lines.append(f'  author  = {{{authors}}},')
+        
+        if metadata.get('title'):
+            lines.append(f'  title   = {{{metadata["title"]}}},')
+        
+        if metadata.get('venue'):
+            lines.append(f'  {venue_field} = {{{metadata["venue"]}}},')
+        
+        if metadata.get('year'):
+            lines.append(f'  year    = {{{metadata["year"]}}},')
+        
+        if metadata.get('url'):
+            lines.append(f'  url     = {{{metadata["url"]}}},')
+        
+        if metadata.get('citations'):
+            lines.append(f'  note    = {{Cited by: {metadata["citations"]}}},')
+        
+        # Remove trailing comma
+        if lines[-1].endswith(','):
+            lines[-1] = lines[-1][:-1]
+        
+        lines.append('}')
+        
+        return '\n'.join(lines)
+
+
+def main():
+    """Command-line interface."""
+    parser = argparse.ArgumentParser(
+        description='Search Google Scholar (requires scholarly library)',
+        epilog='Example: python search_google_scholar.py "machine learning" --limit 50'
+    )
+    
+    parser.add_argument(
+        'query',
+        help='Search query'
+    )
+    
+    parser.add_argument(
+        '--limit',
+        type=int,
+        default=50,
+        help='Maximum number of results (default: 50)'
+    )
+    
+    parser.add_argument(
+        '--year-start',
+        type=int,
+        help='Start year for filtering'
+    )
+    
+    parser.add_argument(
+        '--year-end',
+        type=int,
+        help='End year for filtering'
+    )
+    
+    parser.add_argument(
+        '--sort-by',
+        choices=['relevance', 'citations'],
+        default='relevance',
+        help='Sort order (default: relevance)'
+    )
+    
+    parser.add_argument(
+        '--use-proxy',
+        action='store_true',
+        help='Use free proxy to avoid rate limiting'
+    )
+    
+    parser.add_argument(
+        '-o', '--output',
+        help='Output file (default: stdout)'
+    )
+    
+    parser.add_argument(
+        '--format',
+        choices=['json', 'bibtex'],
+        default='json',
+        help='Output format (default: json)'
+    )
+    
+    args = parser.parse_args()
+    
+    if not SCHOLARLY_AVAILABLE:
+        print('\nError: scholarly library not installed', file=sys.stderr)
+        print('Install with: pip install scholarly', file=sys.stderr)
+        print('\nAlternatively, use PubMed search for biomedical literature:', file=sys.stderr)
+        print('  python search_pubmed.py "your query"', file=sys.stderr)
+        sys.exit(1)
+    
+    # Search
+    searcher = GoogleScholarSearcher(use_proxy=args.use_proxy)
+    results = searcher.search(
+        args.query,
+        max_results=args.limit,
+        year_start=args.year_start,
+        year_end=args.year_end,
+        sort_by=args.sort_by
+    )
+    
+    if not results:
+        print('No results found', file=sys.stderr)
+        sys.exit(1)
+    
+    # Format output
+    if args.format == 'json':
+        output = json.dumps({
+            'query': args.query,
+            'count': len(results),
+            'results': results
+        }, indent=2)
+    else:  # bibtex
+        bibtex_entries = [searcher.metadata_to_bibtex(r) for r in results]
+        output = '\n\n'.join(bibtex_entries) + '\n'
+    
+    # Write output
+    if args.output:
+        with open(args.output, 'w', encoding='utf-8') as f:
+            f.write(output)
+        print(f'Wrote {len(results)} results to {args.output}', file=sys.stderr)
+    else:
+        print(output)
+    
+    print(f'\nRetrieved {len(results)} results', file=sys.stderr)
+
+
+if __name__ == '__main__':
+    main()
+
--- a/skills/citation-management/scripts/search_pubmed.py
+++ b/skills/citation-management/scripts/search_pubmed.py
@@ -0,0 +1,398 @@
+#!/usr/bin/env python3
+"""
+PubMed Search Tool
+Search PubMed using E-utilities API and export results.
+"""
+
+import sys
+import os
+import requests
+import argparse
+import json
+import time
+import xml.etree.ElementTree as ET
+from typing import List, Dict, Optional
+from datetime import datetime
+
+class PubMedSearcher:
+    """Search PubMed using NCBI E-utilities API."""
+    
+    def __init__(self, api_key: Optional[str] = None, email: Optional[str] = None):
+        """
+        Initialize searcher.
+        
+        Args:
+            api_key: NCBI API key (optional but recommended)
+            email: Email for Entrez (optional but recommended)
+        """
+        self.api_key = api_key or os.getenv('NCBI_API_KEY', '')
+        self.email = email or os.getenv('NCBI_EMAIL', '')
+        self.base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
+        self.session = requests.Session()
+        
+        # Rate limiting
+        self.delay = 0.11 if self.api_key else 0.34  # 10/sec with key, 3/sec without
+    
+    def search(self, query: str, max_results: int = 100,
+               date_start: Optional[str] = None, date_end: Optional[str] = None,
+               publication_types: Optional[List[str]] = None) -> List[str]:
+        """
+        Search PubMed and return PMIDs.
+        
+        Args:
+            query: Search query
+            max_results: Maximum number of results
+            date_start: Start date (YYYY/MM/DD or YYYY)
+            date_end: End date (YYYY/MM/DD or YYYY)
+            publication_types: List of publication types to filter
+            
+        Returns:
+            List of PMIDs
+        """
+        # Build query with filters
+        full_query = query
+        
+        # Add date range
+        if date_start or date_end:
+            start = date_start or '1900'
+            end = date_end or datetime.now().strftime('%Y')
+            full_query += f' AND {start}:{end}[Publication Date]'
+        
+        # Add publication types
+        if publication_types:
+            pub_type_query = ' OR '.join([f'"{pt}"[Publication Type]' for pt in publication_types])
+            full_query += f' AND ({pub_type_query})'
+        
+        print(f'Searching PubMed: {full_query}', file=sys.stderr)
+        
+        # ESearch to get PMIDs
+        esearch_url = self.base_url + 'esearch.fcgi'
+        params = {
+            'db': 'pubmed',
+            'term': full_query,
+            'retmax': max_results,
+            'retmode': 'json'
+        }
+        
+        if self.email:
+            params['email'] = self.email
+        if self.api_key:
+            params['api_key'] = self.api_key
+        
+        try:
+            response = self.session.get(esearch_url, params=params, timeout=30)
+            response.raise_for_status()
+            
+            data = response.json()
+            pmids = data['esearchresult']['idlist']
+            count = int(data['esearchresult']['count'])
+            
+            print(f'Found {count} results, retrieving {len(pmids)}', file=sys.stderr)
+            
+            return pmids
+            
+        except Exception as e:
+            print(f'Error searching PubMed: {e}', file=sys.stderr)
+            return []
+    
+    def fetch_metadata(self, pmids: List[str]) -> List[Dict]:
+        """
+        Fetch metadata for PMIDs.
+        
+        Args:
+            pmids: List of PubMed IDs
+            
+        Returns:
+            List of metadata dictionaries
+        """
+        if not pmids:
+            return []
+        
+        metadata_list = []
+        
+        # Fetch in batches of 200
+        batch_size = 200
+        for i in range(0, len(pmids), batch_size):
+            batch = pmids[i:i+batch_size]
+            print(f'Fetching metadata for PMIDs {i+1}-{min(i+batch_size, len(pmids))}...', file=sys.stderr)
+            
+            efetch_url = self.base_url + 'efetch.fcgi'
+            params = {
+                'db': 'pubmed',
+                'id': ','.join(batch),
+                'retmode': 'xml',
+                'rettype': 'abstract'
+            }
+            
+            if self.email:
+                params['email'] = self.email
+            if self.api_key:
+                params['api_key'] = self.api_key
+            
+            try:
+                response = self.session.get(efetch_url, params=params, timeout=60)
+                response.raise_for_status()
+                
+                # Parse XML
+                root = ET.fromstring(response.content)
+                articles = root.findall('.//PubmedArticle')
+                
+                for article in articles:
+                    metadata = self._extract_metadata_from_xml(article)
+                    if metadata:
+                        metadata_list.append(metadata)
+                
+                # Rate limiting
+                time.sleep(self.delay)
+                
+            except Exception as e:
+                print(f'Error fetching metadata for batch: {e}', file=sys.stderr)
+                continue
+        
+        return metadata_list
+    
+    def _extract_metadata_from_xml(self, article: ET.Element) -> Optional[Dict]:
+        """Extract metadata from PubmedArticle XML element."""
+        try:
+            medline_citation = article.find('.//MedlineCitation')
+            article_elem = medline_citation.find('.//Article')
+            journal = article_elem.find('.//Journal')
+            
+            # Get PMID
+            pmid = medline_citation.findtext('.//PMID', '')
+            
+            # Get DOI
+            doi = None
+            article_ids = article.findall('.//ArticleId')
+            for article_id in article_ids:
+                if article_id.get('IdType') == 'doi':
+                    doi = article_id.text
+                    break
+            
+            # Get authors
+            authors = []
+            author_list = article_elem.find('.//AuthorList')
+            if author_list is not None:
+                for author in author_list.findall('.//Author'):
+                    last_name = author.findtext('.//LastName', '')
+                    fore_name = author.findtext('.//ForeName', '')
+                    if last_name:
+                        if fore_name:
+                            authors.append(f'{last_name}, {fore_name}')
+                        else:
+                            authors.append(last_name)
+            
+            # Get year
+            year = article_elem.findtext('.//Journal/JournalIssue/PubDate/Year', '')
+            if not year:
+                medline_date = article_elem.findtext('.//Journal/JournalIssue/PubDate/MedlineDate', '')
+                if medline_date:
+                    import re
+                    year_match = re.search(r'\d{4}', medline_date)
+                    if year_match:
+                        year = year_match.group()
+            
+            metadata = {
+                'pmid': pmid,
+                'doi': doi,
+                'title': article_elem.findtext('.//ArticleTitle', ''),
+                'authors': ' and '.join(authors),
+                'journal': journal.findtext('.//Title', ''),
+                'year': year,
+                'volume': journal.findtext('.//JournalIssue/Volume', ''),
+                'issue': journal.findtext('.//JournalIssue/Issue', ''),
+                'pages': article_elem.findtext('.//Pagination/MedlinePgn', ''),
+                'abstract': article_elem.findtext('.//Abstract/AbstractText', '')
+            }
+            
+            return metadata
+            
+        except Exception as e:
+            print(f'Error extracting metadata: {e}', file=sys.stderr)
+            return None
+    
+    def metadata_to_bibtex(self, metadata: Dict) -> str:
+        """Convert metadata to BibTeX format."""
+        # Generate citation key
+        if metadata.get('authors'):
+            first_author = metadata['authors'].split(' and ')[0]
+            if ',' in first_author:
+                last_name = first_author.split(',')[0].strip()
+            else:
+                last_name = first_author.split()[0]
+        else:
+            last_name = 'Unknown'
+        
+        year = metadata.get('year', 'XXXX')
+        citation_key = f'{last_name}{year}pmid{metadata.get("pmid", "")}'
+        
+        # Build BibTeX entry
+        lines = [f'@article{{{citation_key},']
+        
+        if metadata.get('authors'):
+            lines.append(f'  author  = {{{metadata["authors"]}}},')
+        
+        if metadata.get('title'):
+            lines.append(f'  title   = {{{metadata["title"]}}},')
+        
+        if metadata.get('journal'):
+            lines.append(f'  journal = {{{metadata["journal"]}}},')
+        
+        if metadata.get('year'):
+            lines.append(f'  year    = {{{metadata["year"]}}},')
+        
+        if metadata.get('volume'):
+            lines.append(f'  volume  = {{{metadata["volume"]}}},')
+        
+        if metadata.get('issue'):
+            lines.append(f'  number  = {{{metadata["issue"]}}},')
+        
+        if metadata.get('pages'):
+            pages = metadata['pages'].replace('-', '--')
+            lines.append(f'  pages   = {{{pages}}},')
+        
+        if metadata.get('doi'):
+            lines.append(f'  doi     = {{{metadata["doi"]}}},')
+        
+        if metadata.get('pmid'):
+            lines.append(f'  note    = {{PMID: {metadata["pmid"]}}},')
+        
+        # Remove trailing comma
+        if lines[-1].endswith(','):
+            lines[-1] = lines[-1][:-1]
+        
+        lines.append('}')
+        
+        return '\n'.join(lines)
+
+
+def main():
+    """Command-line interface."""
+    parser = argparse.ArgumentParser(
+        description='Search PubMed using E-utilities API',
+        epilog='Example: python search_pubmed.py "CRISPR gene editing" --limit 100'
+    )
+    
+    parser.add_argument(
+        'query',
+        nargs='?',
+        help='Search query (PubMed syntax)'
+    )
+    
+    parser.add_argument(
+        '--query',
+        dest='query_arg',
+        help='Search query (alternative to positional argument)'
+    )
+    
+    parser.add_argument(
+        '--query-file',
+        help='File containing search query'
+    )
+    
+    parser.add_argument(
+        '--limit',
+        type=int,
+        default=100,
+        help='Maximum number of results (default: 100)'
+    )
+    
+    parser.add_argument(
+        '--date-start',
+        help='Start date (YYYY/MM/DD or YYYY)'
+    )
+    
+    parser.add_argument(
+        '--date-end',
+        help='End date (YYYY/MM/DD or YYYY)'
+    )
+    
+    parser.add_argument(
+        '--publication-types',
+        help='Comma-separated publication types (e.g., "Review,Clinical Trial")'
+    )
+    
+    parser.add_argument(
+        '-o', '--output',
+        help='Output file (default: stdout)'
+    )
+    
+    parser.add_argument(
+        '--format',
+        choices=['json', 'bibtex'],
+        default='json',
+        help='Output format (default: json)'
+    )
+    
+    parser.add_argument(
+        '--api-key',
+        help='NCBI API key (or set NCBI_API_KEY env var)'
+    )
+    
+    parser.add_argument(
+        '--email',
+        help='Email for Entrez (or set NCBI_EMAIL env var)'
+    )
+    
+    args = parser.parse_args()
+    
+    # Get query
+    query = args.query or args.query_arg
+    
+    if args.query_file:
+        try:
+            with open(args.query_file, 'r', encoding='utf-8') as f:
+                query = f.read().strip()
+        except Exception as e:
+            print(f'Error reading query file: {e}', file=sys.stderr)
+            sys.exit(1)
+    
+    if not query:
+        parser.print_help()
+        sys.exit(1)
+    
+    # Parse publication types
+    pub_types = None
+    if args.publication_types:
+        pub_types = [pt.strip() for pt in args.publication_types.split(',')]
+    
+    # Search PubMed
+    searcher = PubMedSearcher(api_key=args.api_key, email=args.email)
+    pmids = searcher.search(
+        query,
+        max_results=args.limit,
+        date_start=args.date_start,
+        date_end=args.date_end,
+        publication_types=pub_types
+    )
+    
+    if not pmids:
+        print('No results found', file=sys.stderr)
+        sys.exit(1)
+    
+    # Fetch metadata
+    metadata_list = searcher.fetch_metadata(pmids)
+    
+    # Format output
+    if args.format == 'json':
+        output = json.dumps({
+            'query': query,
+            'count': len(metadata_list),
+            'results': metadata_list
+        }, indent=2)
+    else:  # bibtex
+        bibtex_entries = [searcher.metadata_to_bibtex(m) for m in metadata_list]
+        output = '\n\n'.join(bibtex_entries) + '\n'
+    
+    # Write output
+    if args.output:
+        with open(args.output, 'w', encoding='utf-8') as f:
+            f.write(output)
+        print(f'Wrote {len(metadata_list)} results to {args.output}', file=sys.stderr)
+    else:
+        print(output)
+
+
+if __name__ == '__main__':
+    main()
+
--- a/skills/citation-management/scripts/validate_citations.py
+++ b/skills/citation-management/scripts/validate_citations.py
@@ -0,0 +1,497 @@
+#!/usr/bin/env python3
+"""
+Citation Validation Tool
+Validate BibTeX files for accuracy, completeness, and format compliance.
+"""
+
+import sys
+import re
+import requests
+import argparse
+import json
+from typing import Dict, List, Tuple, Optional
+from collections import defaultdict
+
+class CitationValidator:
+    """Validate BibTeX entries for errors and inconsistencies."""
+    
+    def __init__(self):
+        self.session = requests.Session()
+        self.session.headers.update({
+            'User-Agent': 'CitationValidator/1.0 (Citation Management Tool)'
+        })
+        
+        # Required fields by entry type
+        self.required_fields = {
+            'article': ['author', 'title', 'journal', 'year'],
+            'book': ['title', 'publisher', 'year'],  # author OR editor
+            'inproceedings': ['author', 'title', 'booktitle', 'year'],
+            'incollection': ['author', 'title', 'booktitle', 'publisher', 'year'],
+            'phdthesis': ['author', 'title', 'school', 'year'],
+            'mastersthesis': ['author', 'title', 'school', 'year'],
+            'techreport': ['author', 'title', 'institution', 'year'],
+            'misc': ['title', 'year']
+        }
+        
+        # Recommended fields
+        self.recommended_fields = {
+            'article': ['volume', 'pages', 'doi'],
+            'book': ['isbn'],
+            'inproceedings': ['pages'],
+        }
+    
+    def parse_bibtex_file(self, filepath: str) -> List[Dict]:
+        """
+        Parse BibTeX file and extract entries.
+        
+        Args:
+            filepath: Path to BibTeX file
+            
+        Returns:
+            List of entry dictionaries
+        """
+        try:
+            with open(filepath, 'r', encoding='utf-8') as f:
+                content = f.read()
+        except Exception as e:
+            print(f'Error reading file: {e}', file=sys.stderr)
+            return []
+        
+        entries = []
+        
+        # Match BibTeX entries
+        pattern = r'@(\w+)\s*\{\s*([^,\s]+)\s*,(.*?)\n\}'
+        matches = re.finditer(pattern, content, re.DOTALL | re.IGNORECASE)
+        
+        for match in matches:
+            entry_type = match.group(1).lower()
+            citation_key = match.group(2).strip()
+            fields_text = match.group(3)
+            
+            # Parse fields
+            fields = {}
+            field_pattern = r'(\w+)\s*=\s*\{([^}]*)\}|(\w+)\s*=\s*"([^"]*)"'
+            field_matches = re.finditer(field_pattern, fields_text)
+            
+            for field_match in field_matches:
+                if field_match.group(1):
+                    field_name = field_match.group(1).lower()
+                    field_value = field_match.group(2)
+                else:
+                    field_name = field_match.group(3).lower()
+                    field_value = field_match.group(4)
+                
+                fields[field_name] = field_value.strip()
+            
+            entries.append({
+                'type': entry_type,
+                'key': citation_key,
+                'fields': fields,
+                'raw': match.group(0)
+            })
+        
+        return entries
+    
+    def validate_entry(self, entry: Dict) -> Tuple[List[Dict], List[Dict]]:
+        """
+        Validate a single BibTeX entry.
+        
+        Args:
+            entry: Entry dictionary
+            
+        Returns:
+            Tuple of (errors, warnings)
+        """
+        errors = []
+        warnings = []
+        
+        entry_type = entry['type']
+        key = entry['key']
+        fields = entry['fields']
+        
+        # Check required fields
+        if entry_type in self.required_fields:
+            for req_field in self.required_fields[entry_type]:
+                if req_field not in fields or not fields[req_field]:
+                    # Special case: book can have author OR editor
+                    if entry_type == 'book' and req_field == 'author':
+                        if 'editor' not in fields or not fields['editor']:
+                            errors.append({
+                                'type': 'missing_required_field',
+                                'field': 'author or editor',
+                                'severity': 'high',
+                                'message': f'Entry {key}: Missing required field "author" or "editor"'
+                            })
+                    else:
+                        errors.append({
+                            'type': 'missing_required_field',
+                            'field': req_field,
+                            'severity': 'high',
+                            'message': f'Entry {key}: Missing required field "{req_field}"'
+                        })
+        
+        # Check recommended fields
+        if entry_type in self.recommended_fields:
+            for rec_field in self.recommended_fields[entry_type]:
+                if rec_field not in fields or not fields[rec_field]:
+                    warnings.append({
+                        'type': 'missing_recommended_field',
+                        'field': rec_field,
+                        'severity': 'medium',
+                        'message': f'Entry {key}: Missing recommended field "{rec_field}"'
+                    })
+        
+        # Validate year
+        if 'year' in fields:
+            year = fields['year']
+            if not re.match(r'^\d{4}$', year):
+                errors.append({
+                    'type': 'invalid_year',
+                    'field': 'year',
+                    'value': year,
+                    'severity': 'high',
+                    'message': f'Entry {key}: Invalid year format "{year}" (should be 4 digits)'
+                })
+            elif int(year) < 1600 or int(year) > 2030:
+                warnings.append({
+                    'type': 'suspicious_year',
+                    'field': 'year',
+                    'value': year,
+                    'severity': 'medium',
+                    'message': f'Entry {key}: Suspicious year "{year}" (outside reasonable range)'
+                })
+        
+        # Validate DOI format
+        if 'doi' in fields:
+            doi = fields['doi']
+            if not re.match(r'^10\.\d{4,}/[^\s]+$', doi):
+                warnings.append({
+                    'type': 'invalid_doi_format',
+                    'field': 'doi',
+                    'value': doi,
+                    'severity': 'medium',
+                    'message': f'Entry {key}: Invalid DOI format "{doi}"'
+                })
+        
+        # Check for single hyphen in pages (should be --)
+        if 'pages' in fields:
+            pages = fields['pages']
+            if re.search(r'\d-\d', pages) and '--' not in pages:
+                warnings.append({
+                    'type': 'page_range_format',
+                    'field': 'pages',
+                    'value': pages,
+                    'severity': 'low',
+                    'message': f'Entry {key}: Page range uses single hyphen, should use -- (en-dash)'
+                })
+        
+        # Check author format
+        if 'author' in fields:
+            author = fields['author']
+            if ';' in author or '&' in author:
+                errors.append({
+                    'type': 'invalid_author_format',
+                    'field': 'author',
+                    'severity': 'high',
+                    'message': f'Entry {key}: Authors should be separated by " and ", not ";" or "&"'
+                })
+        
+        return errors, warnings
+    
+    def verify_doi(self, doi: str) -> Tuple[bool, Optional[Dict]]:
+        """
+        Verify DOI resolves correctly and get metadata.
+        
+        Args:
+            doi: Digital Object Identifier
+            
+        Returns:
+            Tuple of (is_valid, metadata)
+        """
+        try:
+            url = f'https://doi.org/{doi}'
+            response = self.session.head(url, timeout=10, allow_redirects=True)
+            
+            if response.status_code < 400:
+                # DOI resolves, now get metadata from CrossRef
+                crossref_url = f'https://api.crossref.org/works/{doi}'
+                metadata_response = self.session.get(crossref_url, timeout=10)
+                
+                if metadata_response.status_code == 200:
+                    data = metadata_response.json()
+                    message = data.get('message', {})
+                    
+                    # Extract key metadata
+                    metadata = {
+                        'title': message.get('title', [''])[0],
+                        'year': self._extract_year_crossref(message),
+                        'authors': self._format_authors_crossref(message.get('author', [])),
+                    }
+                    return True, metadata
+                else:
+                    return True, None  # DOI resolves but no CrossRef metadata
+            else:
+                return False, None
+                
+        except Exception:
+            return False, None
+    
+    def detect_duplicates(self, entries: List[Dict]) -> List[Dict]:
+        """
+        Detect duplicate entries.
+        
+        Args:
+            entries: List of entry dictionaries
+            
+        Returns:
+            List of duplicate groups
+        """
+        duplicates = []
+        
+        # Check for duplicate DOIs
+        doi_map = defaultdict(list)
+        for entry in entries:
+            doi = entry['fields'].get('doi', '').strip()
+            if doi:
+                doi_map[doi].append(entry['key'])
+        
+        for doi, keys in doi_map.items():
+            if len(keys) > 1:
+                duplicates.append({
+                    'type': 'duplicate_doi',
+                    'doi': doi,
+                    'entries': keys,
+                    'severity': 'high',
+                    'message': f'Duplicate DOI {doi} found in entries: {", ".join(keys)}'
+                })
+        
+        # Check for duplicate citation keys
+        key_counts = defaultdict(int)
+        for entry in entries:
+            key_counts[entry['key']] += 1
+        
+        for key, count in key_counts.items():
+            if count > 1:
+                duplicates.append({
+                    'type': 'duplicate_key',
+                    'key': key,
+                    'count': count,
+                    'severity': 'high',
+                    'message': f'Citation key "{key}" appears {count} times'
+                })
+        
+        # Check for similar titles (possible duplicates)
+        titles = {}
+        for entry in entries:
+            title = entry['fields'].get('title', '').lower()
+            title = re.sub(r'[^\w\s]', '', title)  # Remove punctuation
+            title = ' '.join(title.split())  # Normalize whitespace
+            
+            if title:
+                if title in titles:
+                    duplicates.append({
+                        'type': 'similar_title',
+                        'entries': [titles[title], entry['key']],
+                        'severity': 'medium',
+                        'message': f'Possible duplicate: "{titles[title]}" and "{entry["key"]}" have identical titles'
+                    })
+                else:
+                    titles[title] = entry['key']
+        
+        return duplicates
+    
+    def validate_file(self, filepath: str, check_dois: bool = False) -> Dict:
+        """
+        Validate entire BibTeX file.
+        
+        Args:
+            filepath: Path to BibTeX file
+            check_dois: Whether to verify DOIs (slow)
+            
+        Returns:
+            Validation report dictionary
+        """
+        print(f'Parsing {filepath}...', file=sys.stderr)
+        entries = self.parse_bibtex_file(filepath)
+        
+        if not entries:
+            return {
+                'total_entries': 0,
+                'errors': [],
+                'warnings': [],
+                'duplicates': []
+            }
+        
+        print(f'Found {len(entries)} entries', file=sys.stderr)
+        
+        all_errors = []
+        all_warnings = []
+        
+        # Validate each entry
+        for i, entry in enumerate(entries):
+            print(f'Validating entry {i+1}/{len(entries)}: {entry["key"]}', file=sys.stderr)
+            errors, warnings = self.validate_entry(entry)
+            
+            for error in errors:
+                error['entry'] = entry['key']
+                all_errors.append(error)
+            
+            for warning in warnings:
+                warning['entry'] = entry['key']
+                all_warnings.append(warning)
+        
+        # Check for duplicates
+        print('Checking for duplicates...', file=sys.stderr)
+        duplicates = self.detect_duplicates(entries)
+        
+        # Verify DOIs if requested
+        doi_errors = []
+        if check_dois:
+            print('Verifying DOIs...', file=sys.stderr)
+            for i, entry in enumerate(entries):
+                doi = entry['fields'].get('doi', '')
+                if doi:
+                    print(f'Verifying DOI {i+1}: {doi}', file=sys.stderr)
+                    is_valid, metadata = self.verify_doi(doi)
+                    
+                    if not is_valid:
+                        doi_errors.append({
+                            'type': 'invalid_doi',
+                            'entry': entry['key'],
+                            'doi': doi,
+                            'severity': 'high',
+                            'message': f'Entry {entry["key"]}: DOI does not resolve: {doi}'
+                        })
+        
+        all_errors.extend(doi_errors)
+        
+        return {
+            'filepath': filepath,
+            'total_entries': len(entries),
+            'valid_entries': len(entries) - len([e for e in all_errors if e['severity'] == 'high']),
+            'errors': all_errors,
+            'warnings': all_warnings,
+            'duplicates': duplicates
+        }
+    
+    def _extract_year_crossref(self, message: Dict) -> str:
+        """Extract year from CrossRef message."""
+        date_parts = message.get('published-print', {}).get('date-parts', [[]])
+        if not date_parts or not date_parts[0]:
+            date_parts = message.get('published-online', {}).get('date-parts', [[]])
+        
+        if date_parts and date_parts[0]:
+            return str(date_parts[0][0])
+        return ''
+    
+    def _format_authors_crossref(self, authors: List[Dict]) -> str:
+        """Format author list from CrossRef."""
+        if not authors:
+            return ''
+        
+        formatted = []
+        for author in authors[:3]:  # First 3 authors
+            given = author.get('given', '')
+            family = author.get('family', '')
+            if family:
+                formatted.append(f'{family}, {given}' if given else family)
+        
+        if len(authors) > 3:
+            formatted.append('et al.')
+        
+        return ', '.join(formatted)
+
+
+def main():
+    """Command-line interface."""
+    parser = argparse.ArgumentParser(
+        description='Validate BibTeX files for errors and inconsistencies',
+        epilog='Example: python validate_citations.py references.bib'
+    )
+    
+    parser.add_argument(
+        'file',
+        help='BibTeX file to validate'
+    )
+    
+    parser.add_argument(
+        '--check-dois',
+        action='store_true',
+        help='Verify DOIs resolve correctly (slow)'
+    )
+    
+    parser.add_argument(
+        '--auto-fix',
+        action='store_true',
+        help='Attempt to auto-fix common issues (not implemented yet)'
+    )
+    
+    parser.add_argument(
+        '--report',
+        help='Output file for JSON validation report'
+    )
+    
+    parser.add_argument(
+        '--verbose',
+        action='store_true',
+        help='Show detailed output'
+    )
+    
+    args = parser.parse_args()
+    
+    # Validate file
+    validator = CitationValidator()
+    report = validator.validate_file(args.file, check_dois=args.check_dois)
+    
+    # Print summary
+    print('\n' + '='*60)
+    print('CITATION VALIDATION REPORT')
+    print('='*60)
+    print(f'\nFile: {args.file}')
+    print(f'Total entries: {report["total_entries"]}')
+    print(f'Valid entries: {report["valid_entries"]}')
+    print(f'Errors: {len(report["errors"])}')
+    print(f'Warnings: {len(report["warnings"])}')
+    print(f'Duplicates: {len(report["duplicates"])}')
+    
+    # Print errors
+    if report['errors']:
+        print('\n' + '-'*60)
+        print('ERRORS (must fix):')
+        print('-'*60)
+        for error in report['errors']:
+            print(f'\n{error["message"]}')
+            if args.verbose:
+                print(f'  Type: {error["type"]}')
+                print(f'  Severity: {error["severity"]}')
+    
+    # Print warnings
+    if report['warnings'] and args.verbose:
+        print('\n' + '-'*60)
+        print('WARNINGS (should fix):')
+        print('-'*60)
+        for warning in report['warnings']:
+            print(f'\n{warning["message"]}')
+    
+    # Print duplicates
+    if report['duplicates']:
+        print('\n' + '-'*60)
+        print('DUPLICATES:')
+        print('-'*60)
+        for dup in report['duplicates']:
+            print(f'\n{dup["message"]}')
+    
+    # Save report
+    if args.report:
+        with open(args.report, 'w', encoding='utf-8') as f:
+            json.dump(report, f, indent=2)
+        print(f'\nDetailed report saved to: {args.report}')
+    
+    # Exit with error code if there are errors
+    if report['errors']:
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
+