Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:30:14 +08:00
commit 1dd5bee3b4
335 changed files with 147360 additions and 0 deletions

View File

@@ -0,0 +1,264 @@
% BibTeX Template File
% Examples of properly formatted entries for all common types
% =============================================================================
% JOURNAL ARTICLES
% =============================================================================
@article{Jumper2021,
author = {Jumper, John and Evans, Richard and Pritzel, Alexander and Green, Tim and Figurnov, Michael and Ronneberger, Olaf and Tunyasuvunakool, Kathryn and Bates, Russ and {\v{Z}}{\'\i}dek, Augustin and Potapenko, Anna and others},
title = {Highly Accurate Protein Structure Prediction with {AlphaFold}},
journal = {Nature},
year = {2021},
volume = {596},
number = {7873},
pages = {583--589},
doi = {10.1038/s41586-021-03819-2}
}
@article{Watson1953,
author = {Watson, James D. and Crick, Francis H. C.},
title = {Molecular Structure of Nucleic Acids: A Structure for Deoxyribose Nucleic Acid},
journal = {Nature},
year = {1953},
volume = {171},
number = {4356},
pages = {737--738},
doi = {10.1038/171737a0}
}
@article{Doudna2014,
author = {Doudna, Jennifer A. and Charpentier, Emmanuelle},
title = {The New Frontier of Genome Engineering with {CRISPR-Cas9}},
journal = {Science},
year = {2014},
volume = {346},
number = {6213},
pages = {1258096},
doi = {10.1126/science.1258096}
}
% =============================================================================
% BOOKS
% =============================================================================
@book{Kumar2021,
author = {Kumar, Vinay and Abbas, Abul K. and Aster, Jon C.},
title = {Robbins and Cotran Pathologic Basis of Disease},
publisher = {Elsevier},
year = {2021},
edition = {10},
address = {Philadelphia, PA},
isbn = {978-0-323-53113-9}
}
@book{Alberts2014,
author = {Alberts, Bruce and Johnson, Alexander and Lewis, Julian and Morgan, David and Raff, Martin and Roberts, Keith and Walter, Peter},
title = {Molecular Biology of the Cell},
publisher = {Garland Science},
year = {2014},
edition = {6},
address = {New York, NY},
isbn = {978-0-815-34432-2}
}
% Book with editor instead of author
@book{Sambrook2001,
editor = {Sambrook, Joseph and Russell, David W.},
title = {Molecular Cloning: A Laboratory Manual},
publisher = {Cold Spring Harbor Laboratory Press},
year = {2001},
edition = {3},
address = {Cold Spring Harbor, NY},
isbn = {978-0-879-69576-7}
}
% =============================================================================
% CONFERENCE PAPERS (PROCEEDINGS)
% =============================================================================
@inproceedings{Vaswani2017,
author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N. and Kaiser, {\L}ukasz and Polosukhin, Illia},
title = {Attention is All You Need},
booktitle = {Advances in Neural Information Processing Systems 30 (NeurIPS 2017)},
year = {2017},
pages = {5998--6008},
address = {Long Beach, CA},
url = {https://proceedings.neurips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html}
}
@inproceedings{He2016,
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
title = {Deep Residual Learning for Image Recognition},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2016},
pages = {770--778},
address = {Las Vegas, NV},
doi = {10.1109/CVPR.2016.90}
}
% =============================================================================
% BOOK CHAPTERS
% =============================================================================
@incollection{Brown2020,
author = {Brown, Peter O. and Botstein, David},
title = {Exploring the New World of the Genome with {DNA} Microarrays},
booktitle = {DNA Microarrays: A Molecular Cloning Manual},
editor = {Eisen, Michael B. and Brown, Patrick O.},
publisher = {Cold Spring Harbor Laboratory Press},
year = {2020},
pages = {1--45},
address = {Cold Spring Harbor, NY}
}
% =============================================================================
% PHD THESES / DISSERTATIONS
% =============================================================================
@phdthesis{Johnson2023,
author = {Johnson, Mary L.},
title = {Novel Approaches to Cancer Immunotherapy Using {CRISPR} Technology},
school = {Stanford University},
year = {2023},
type = {{PhD} dissertation},
address = {Stanford, CA}
}
% =============================================================================
% MASTER'S THESES
% =============================================================================
@mastersthesis{Smith2022,
author = {Smith, Robert J.},
title = {Machine Learning Methods for Protein Structure Prediction},
school = {Massachusetts Institute of Technology},
year = {2022},
type = {{Master's} thesis},
address = {Cambridge, MA}
}
% =============================================================================
% TECHNICAL REPORTS
% =============================================================================
@techreport{WHO2020,
author = {{World Health Organization}},
title = {Clinical Management of {COVID-19}: Interim Guidance},
institution = {World Health Organization},
year = {2020},
type = {Technical Report},
number = {WHO/2019-nCoV/clinical/2020.5},
address = {Geneva, Switzerland}
}
% =============================================================================
% PREPRINTS
% =============================================================================
% bioRxiv preprint
@misc{Zhang2024preprint,
author = {Zhang, Yi and Chen, Li and Wang, Hui and Liu, Xin},
title = {Novel Therapeutic Targets in {Alzheimer}'s Disease},
year = {2024},
howpublished = {bioRxiv},
doi = {10.1101/2024.01.15.575432},
note = {Preprint}
}
% arXiv preprint
@misc{Brown2024arxiv,
author = {Brown, Alice and Green, Bob},
title = {Advances in Quantum Computing},
year = {2024},
howpublished = {arXiv},
note = {arXiv:2401.12345}
}
% =============================================================================
% DATASETS
% =============================================================================
@misc{AlphaFoldDB2021,
author = {{DeepMind} and {EMBL-EBI}},
title = {{AlphaFold} Protein Structure Database},
year = {2021},
howpublished = {Database},
url = {https://alphafold.ebi.ac.uk/},
doi = {10.1093/nar/gkab1061},
note = {Version 4}
}
% =============================================================================
% SOFTWARE / CODE
% =============================================================================
@misc{McKinney2010pandas,
author = {McKinney, Wes},
title = {pandas: A Foundational {Python} Library for Data Analysis and Statistics},
year = {2010},
howpublished = {Software},
url = {https://pandas.pydata.org/},
note = {Python Data Analysis Library}
}
% =============================================================================
% WEBSITES / ONLINE RESOURCES
% =============================================================================
@misc{NCBI2024,
author = {{National Center for Biotechnology Information}},
title = {{PubMed}: Database of Biomedical Literature},
year = {2024},
howpublished = {Website},
url = {https://pubmed.ncbi.nlm.nih.gov/},
note = {Accessed: 2024-01-15}
}
% =============================================================================
% SPECIAL CASES
% =============================================================================
% Article with organization as author
@article{NatureEditorial2023,
author = {{Nature Editorial Board}},
title = {The Future of {AI} in Scientific Research},
journal = {Nature},
year = {2023},
volume = {615},
pages = {1--2},
doi = {10.1038/d41586-023-00001-1}
}
% Article with no volume number (some journals)
@article{OpenAccess2024,
author = {Williams, Sarah and Thomas, Michael},
title = {Open Access Publishing in the 21st Century},
journal = {Journal of Scholarly Communication},
year = {2024},
pages = {e123456},
doi = {10.1234/jsc.2024.123456}
}
% Conference paper with DOI
@inproceedings{Garcia2023,
author = {Garc{\'i}a-Mart{\'i}nez, Jos{\'e} and M{\"u}ller, Hans},
title = {International Collaboration in Science},
booktitle = {Proceedings of the International Conference on Academic Publishing},
year = {2023},
pages = {45--52},
doi = {10.1109/ICAP.2023.123456}
}
% Article with PMID but no DOI (older papers)
@article{OldPaper1995,
author = {Anderson, Philip W.},
title = {Through the Glass Lightly},
journal = {Science},
year = {1995},
volume = {267},
number = {5204},
pages = {1615--1616},
note = {PMID: 17808148}
}

View File

@@ -0,0 +1,386 @@
# Citation Quality Checklist
Use this checklist to ensure your citations are accurate, complete, and properly formatted before final submission.
## Pre-Submission Checklist
### ✓ Metadata Accuracy
- [ ] All author names are correct and properly formatted
- [ ] Article titles match the actual publication
- [ ] Journal/conference names are complete (not abbreviated unless required)
- [ ] Publication years are accurate
- [ ] Volume and issue numbers are correct
- [ ] Page ranges are accurate
### ✓ Required Fields
- [ ] All @article entries have: author, title, journal, year
- [ ] All @book entries have: author/editor, title, publisher, year
- [ ] All @inproceedings entries have: author, title, booktitle, year
- [ ] Modern papers (2000+) include DOI when available
- [ ] All entries have unique citation keys
### ✓ DOI Verification
- [ ] All DOIs are properly formatted (10.XXXX/...)
- [ ] DOIs resolve correctly to the article
- [ ] No DOI prefix in the BibTeX field (no "doi:" or "https://doi.org/")
- [ ] Metadata from CrossRef matches your BibTeX entry
- [ ] Run: `python scripts/validate_citations.py references.bib --check-dois`
### ✓ Formatting Consistency
- [ ] Page ranges use double hyphen (--) not single (-)
- [ ] No "pp." prefix in pages field
- [ ] Author names use "and" separator (not semicolon or ampersand)
- [ ] Capitalization protected in titles ({AlphaFold}, {CRISPR}, etc.)
- [ ] Month names use standard abbreviations if included
- [ ] Citation keys follow consistent format
### ✓ Duplicate Detection
- [ ] No duplicate DOIs in bibliography
- [ ] No duplicate citation keys
- [ ] No near-duplicate titles
- [ ] Preprints updated to published versions when available
- [ ] Run: `python scripts/validate_citations.py references.bib`
### ✓ Special Characters
- [ ] Accented characters properly formatted (e.g., {\"u} for ü)
- [ ] Mathematical symbols use LaTeX commands
- [ ] Chemical formulas properly formatted
- [ ] No unescaped special characters (%, &, $, #, etc.)
### ✓ BibTeX Syntax
- [ ] All entries have balanced braces {}
- [ ] Fields separated by commas
- [ ] No comma after last field in each entry
- [ ] Valid entry types (@article, @book, etc.)
- [ ] Run: `python scripts/validate_citations.py references.bib`
### ✓ File Organization
- [ ] Bibliography sorted in logical order (by year, author, or key)
- [ ] Consistent formatting throughout
- [ ] No formatting inconsistencies between entries
- [ ] Run: `python scripts/format_bibtex.py references.bib --sort year`
## Automated Validation
### Step 1: Format and Clean
```bash
python scripts/format_bibtex.py references.bib \
--deduplicate \
--sort year \
--descending \
--output clean_references.bib
```
**What this does**:
- Removes duplicates
- Standardizes formatting
- Fixes common issues (page ranges, DOI format, etc.)
- Sorts by year (newest first)
### Step 2: Validate
```bash
python scripts/validate_citations.py clean_references.bib \
--check-dois \
--report validation_report.json \
--verbose
```
**What this does**:
- Checks required fields
- Verifies DOIs resolve
- Detects duplicates
- Validates syntax
- Generates detailed report
### Step 3: Review Report
```bash
cat validation_report.json
```
**Address any**:
- **Errors**: Must fix (missing fields, broken DOIs, syntax errors)
- **Warnings**: Should fix (missing recommended fields, formatting issues)
- **Duplicates**: Remove or consolidate
### Step 4: Final Check
```bash
python scripts/validate_citations.py clean_references.bib --verbose
```
**Goal**: Zero errors, minimal warnings
## Manual Review Checklist
### Critical Citations (Top 10-20 Most Important)
For your most important citations, manually verify:
- [ ] Visit DOI link and confirm it's the correct article
- [ ] Check author names against the actual publication
- [ ] Verify year matches publication date
- [ ] Confirm journal/conference name is correct
- [ ] Check that volume/pages match
### Common Issues to Watch For
**Missing Information**:
- [ ] No DOI for papers published after 2000
- [ ] Missing volume or page numbers for journal articles
- [ ] Missing publisher for books
- [ ] Missing conference location for proceedings
**Formatting Errors**:
- [ ] Single hyphen in page ranges (123-145 → 123--145)
- [ ] Ampersands in author lists (Smith & Jones → Smith and Jones)
- [ ] Unprotected acronyms in titles (DNA → {DNA})
- [ ] DOI includes URL prefix (https://doi.org/10.xxx → 10.xxx)
**Metadata Mismatches**:
- [ ] Author names differ from publication
- [ ] Year is online-first instead of print publication
- [ ] Journal name abbreviated when it should be full
- [ ] Volume/issue numbers swapped
**Duplicates**:
- [ ] Same paper cited with different citation keys
- [ ] Preprint and published version both cited
- [ ] Conference paper and journal version both cited
## Field-Specific Checks
### Biomedical Sciences
- [ ] PubMed Central ID (PMCID) included when available
- [ ] MeSH terms appropriate (if using)
- [ ] Clinical trial registration number included (if applicable)
- [ ] All references to treatments/drugs accurately cited
### Computer Science
- [ ] arXiv ID included for preprints
- [ ] Conference proceedings properly cited (not just "NeurIPS")
- [ ] Software/dataset citations include version numbers
- [ ] GitHub links stable and permanent
### General Sciences
- [ ] Data availability statements properly cited
- [ ] Retracted papers identified and removed
- [ ] Preprints checked for published versions
- [ ] Supplementary materials referenced if critical
## Final Pre-Submission Steps
### 1 Week Before Submission
- [ ] Run full validation with DOI checking
- [ ] Fix all errors and critical warnings
- [ ] Manually verify top 10-20 most important citations
- [ ] Check for any retracted papers
### 3 Days Before Submission
- [ ] Re-run validation after any manual edits
- [ ] Ensure all in-text citations have corresponding bibliography entries
- [ ] Ensure all bibliography entries are cited in text
- [ ] Check citation style matches journal requirements
### 1 Day Before Submission
- [ ] Final validation check
- [ ] LaTeX compilation successful with no warnings
- [ ] PDF renders all citations correctly
- [ ] Bibliography appears in correct format
- [ ] No placeholder citations (Smith et al. XXXX)
### Submission Day
- [ ] One final validation run
- [ ] No last-minute edits without re-validation
- [ ] Bibliography file included in submission package
- [ ] Figures/tables referenced in text match bibliography
## Quality Metrics
### Excellent Bibliography
- ✓ 100% of entries have DOIs (for modern papers)
- ✓ Zero validation errors
- ✓ Zero missing required fields
- ✓ Zero broken DOIs
- ✓ Zero duplicates
- ✓ Consistent formatting throughout
- ✓ All citations manually spot-checked
### Acceptable Bibliography
- ✓ 90%+ of modern entries have DOIs
- ✓ Zero high-severity errors
- ✓ Minor warnings only (e.g., missing recommended fields)
- ✓ Key citations manually verified
- ✓ Compilation succeeds without errors
### Needs Improvement
- ✗ Missing DOIs for recent papers
- ✗ High-severity validation errors
- ✗ Broken or incorrect DOIs
- ✗ Duplicate entries
- ✗ Inconsistent formatting
- ✗ Compilation warnings or errors
## Emergency Fixes
If you discover issues at the last minute:
### Broken DOI
```bash
# Find correct DOI
# Option 1: Search CrossRef
# https://www.crossref.org/
# Option 2: Search on publisher website
# Option 3: Google Scholar
# Re-extract metadata
python scripts/extract_metadata.py --doi CORRECT_DOI
```
### Missing Information
```bash
# Extract from DOI
python scripts/extract_metadata.py --doi 10.xxxx/yyyy
# Or from PMID (biomedical)
python scripts/extract_metadata.py --pmid 12345678
# Or from arXiv
python scripts/extract_metadata.py --arxiv 2103.12345
```
### Duplicate Entries
```bash
# Auto-remove duplicates
python scripts/format_bibtex.py references.bib \
--deduplicate \
--output fixed_references.bib
```
### Formatting Errors
```bash
# Auto-fix common issues
python scripts/format_bibtex.py references.bib \
--output fixed_references.bib
# Then validate
python scripts/validate_citations.py fixed_references.bib
```
## Long-Term Best Practices
### During Research
- [ ] Add citations to bibliography file as you find them
- [ ] Extract metadata immediately using DOI
- [ ] Validate after every 10-20 additions
- [ ] Keep bibliography file under version control
### During Writing
- [ ] Cite as you write
- [ ] Use consistent citation keys
- [ ] Don't delay adding references
- [ ] Validate weekly
### Before Submission
- [ ] Allow 2-3 days for citation cleanup
- [ ] Don't wait until the last day
- [ ] Automate what you can
- [ ] Manually verify critical citations
## Tool Quick Reference
### Extract Metadata
```bash
# From DOI
python scripts/doi_to_bibtex.py 10.1038/nature12345
# From multiple sources
python scripts/extract_metadata.py \
--doi 10.1038/nature12345 \
--pmid 12345678 \
--arxiv 2103.12345 \
--output references.bib
```
### Validate
```bash
# Basic validation
python scripts/validate_citations.py references.bib
# With DOI checking (slow but thorough)
python scripts/validate_citations.py references.bib --check-dois
# Generate report
python scripts/validate_citations.py references.bib \
--report validation.json \
--verbose
```
### Format and Clean
```bash
# Format and fix issues
python scripts/format_bibtex.py references.bib
# Remove duplicates and sort
python scripts/format_bibtex.py references.bib \
--deduplicate \
--sort year \
--descending \
--output clean_refs.bib
```
## Summary
**Minimum Requirements**:
1. Run `format_bibtex.py --deduplicate`
2. Run `validate_citations.py`
3. Fix all errors
4. Compile successfully
**Recommended**:
1. Format, deduplicate, and sort
2. Validate with `--check-dois`
3. Fix all errors and warnings
4. Manually verify top citations
5. Re-validate after fixes
**Best Practice**:
1. Validate throughout research process
2. Use automated tools consistently
3. Keep bibliography clean and organized
4. Document any special cases
5. Final validation 1-3 days before submission
**Remember**: Citation errors reflect poorly on your scholarship. Taking time to ensure accuracy is worthwhile!