Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:30:18 +08:00
commit 74bee324ab
335 changed files with 147377 additions and 0 deletions

View File

@@ -0,0 +1,334 @@
#!/usr/bin/env python3
"""
Validate case reports against CARE (CAse REport) guidelines.
This script checks a clinical case report for compliance with CARE guidelines
and provides a checklist of required elements.
Usage:
python validate_case_report.py <input_file.md|.txt>
python validate_case_report.py <input_file> --output report.json
"""
import argparse
import json
import re
from pathlib import Path
from typing import Dict, List, Tuple
class CareValidator:
"""Validator for CARE guideline compliance."""
# CARE checklist items with regex patterns
CARE_REQUIREMENTS = {
"title": {
"name": "Title contains 'case report'",
"pattern": r"(?i)(case\s+report|case\s+study)",
"section": "Title",
"required": True
},
"keywords": {
"name": "Keywords provided (2-5)",
"pattern": r"(?i)keywords?[:]\s*(.+)",
"section": "Keywords",
"required": True
},
"abstract": {
"name": "Abstract present",
"pattern": r"(?i)##?\s*abstract",
"section": "Abstract",
"required": True
},
"introduction": {
"name": "Introduction explaining novelty",
"pattern": r"(?i)##?\s*introduction",
"section": "Introduction",
"required": True
},
"patient_info": {
"name": "Patient demographics present",
"pattern": r"(?i)(patient\s+information|demographics?)",
"section": "Patient Information",
"required": True
},
"clinical_findings": {
"name": "Clinical findings documented",
"pattern": r"(?i)(clinical\s+findings?|physical\s+exam)",
"section": "Clinical Findings",
"required": True
},
"timeline": {
"name": "Timeline of events",
"pattern": r"(?i)(timeline|chronology)",
"section": "Timeline",
"required": True
},
"diagnostic": {
"name": "Diagnostic assessment",
"pattern": r"(?i)diagnostic\s+(assessment|evaluation|workup)",
"section": "Diagnostic Assessment",
"required": True
},
"therapeutic": {
"name": "Therapeutic interventions",
"pattern": r"(?i)(therapeutic\s+intervention|treatment)",
"section": "Therapeutic Interventions",
"required": True
},
"followup": {
"name": "Follow-up and outcomes",
"pattern": r"(?i)(follow[\-\s]?up|outcomes?)",
"section": "Follow-up and Outcomes",
"required": True
},
"discussion": {
"name": "Discussion with literature review",
"pattern": r"(?i)##?\s*discussion",
"section": "Discussion",
"required": True
},
"consent": {
"name": "Informed consent statement",
"pattern": r"(?i)(informed\s+consent|written\s+consent|consent.*obtained)",
"section": "Informed Consent",
"required": True
},
}
# HIPAA identifiers to check for
HIPAA_PATTERNS = {
"dates": r"\b(0?[1-9]|1[0-2])/(0?[1-9]|[12][0-9]|3[01])/\d{4}\b",
"phone": r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b",
"email": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
"ssn": r"\b\d{3}-\d{2}-\d{4}\b",
"mrn": r"(?i)(mrn|medical\s+record)[:]\s*\d+",
"zip_full": r"\b\d{5}-\d{4}\b",
}
def __init__(self, filename: str):
"""Initialize validator with input file."""
self.filename = Path(filename)
self.content = self._read_file()
self.results = {}
def _read_file(self) -> str:
"""Read input file content."""
try:
with open(self.filename, 'r', encoding='utf-8') as f:
return f.read()
except FileNotFoundError:
raise FileNotFoundError(f"File not found: {self.filename}")
except Exception as e:
raise Exception(f"Error reading file: {e}")
def validate_care_compliance(self) -> Dict[str, Dict]:
"""Validate compliance with CARE guidelines."""
results = {}
for key, item in self.CARE_REQUIREMENTS.items():
pattern = item["pattern"]
found = bool(re.search(pattern, self.content))
results[key] = {
"name": item["name"],
"section": item["section"],
"required": item["required"],
"found": found,
"status": "PASS" if found else "FAIL" if item["required"] else "WARNING"
}
self.results["care_compliance"] = results
return results
def check_deidentification(self) -> Dict[str, List[str]]:
"""Check for potential HIPAA identifier violations."""
violations = {}
for identifier, pattern in self.HIPAA_PATTERNS.items():
matches = re.findall(pattern, self.content)
if matches:
violations[identifier] = matches[:5] # Limit to first 5 examples
self.results["hipaa_violations"] = violations
return violations
def check_word_count(self) -> Dict[str, int]:
"""Check word count and provide limits guidance."""
words = len(re.findall(r'\b\w+\b', self.content))
word_count = {
"total_words": words,
"typical_min": 1500,
"typical_max": 3000,
"status": "ACCEPTABLE" if 1500 <= words <= 3500 else "CHECK"
}
self.results["word_count"] = word_count
return word_count
def check_references(self) -> Dict[str, any]:
"""Check for presence of references."""
ref_patterns = [
r"##?\s*references",
r"\[\d+\]",
r"\d+\.\s+[A-Z][a-z]+.*\d{4}", # Numbered references
]
has_refs = any(re.search(p, self.content, re.IGNORECASE) for p in ref_patterns)
ref_count = len(re.findall(r"\[\d+\]", self.content))
references = {
"has_references": has_refs,
"estimated_count": ref_count,
"recommended_min": 10,
"status": "ACCEPTABLE" if ref_count >= 10 else "LOW"
}
self.results["references"] = references
return references
def generate_report(self) -> Dict:
"""Generate comprehensive validation report."""
if not self.results:
self.validate_care_compliance()
self.check_deidentification()
self.check_word_count()
self.check_references()
# Calculate overall compliance
care = self.results["care_compliance"]
total_required = sum(1 for v in care.values() if v["required"])
passed = sum(1 for v in care.values() if v["required"] and v["found"])
compliance_rate = (passed / total_required * 100) if total_required > 0 else 0
report = {
"filename": str(self.filename),
"compliance_rate": round(compliance_rate, 1),
"care_compliance": care,
"hipaa_violations": self.results["hipaa_violations"],
"word_count": self.results["word_count"],
"references": self.results["references"],
"overall_status": "PASS" if compliance_rate >= 90 and not self.results["hipaa_violations"] else "NEEDS_REVISION"
}
return report
def print_report(self):
"""Print human-readable validation report."""
report = self.generate_report()
print("=" * 70)
print(f"CARE Guideline Validation Report")
print(f"File: {report['filename']}")
print("=" * 70)
print()
print(f"Overall Compliance: {report['compliance_rate']}%")
print(f"Status: {report['overall_status']}")
print()
print("CARE Checklist:")
print("-" * 70)
for key, item in report["care_compliance"].items():
status_symbol = "" if item["found"] else ""
print(f"{status_symbol} [{item['status']:8}] {item['name']}")
print()
if report["hipaa_violations"]:
print("HIPAA DE-IDENTIFICATION WARNINGS:")
print("-" * 70)
for identifier, examples in report["hipaa_violations"].items():
print(f"{identifier.upper()}: {len(examples)} instance(s) found")
for ex in examples[:3]:
print(f" Example: {ex}")
print()
else:
print("✓ No obvious HIPAA identifiers detected")
print()
wc = report["word_count"]
print(f"Word Count: {wc['total_words']} words")
print(f" Typical range: {wc['typical_min']}-{wc['typical_max']} words")
print(f" Status: {wc['status']}")
print()
refs = report["references"]
print(f"References: {refs['estimated_count']} citation(s) detected")
print(f" Recommended minimum: {refs['recommended_min']}")
print(f" Status: {refs['status']}")
print()
print("=" * 70)
# Recommendations
issues = []
if report['compliance_rate'] < 100:
missing = [v["name"] for v in report["care_compliance"].values() if v["required"] and not v["found"]]
issues.append(f"Missing required sections: {', '.join(missing)}")
if report["hipaa_violations"]:
issues.append("HIPAA identifiers detected - review de-identification")
if refs["status"] == "LOW":
issues.append("Low reference count - consider adding more citations")
if issues:
print("RECOMMENDATIONS:")
for i, issue in enumerate(issues, 1):
print(f"{i}. {issue}")
else:
print("✓ Case report meets CARE guidelines!")
print("=" * 70)
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(
description="Validate clinical case reports against CARE guidelines"
)
parser.add_argument(
"input_file",
help="Path to case report file (Markdown or text)"
)
parser.add_argument(
"--output",
"-o",
help="Output JSON report to file"
)
parser.add_argument(
"--json",
action="store_true",
help="Output JSON to stdout instead of human-readable report"
)
args = parser.parse_args()
try:
validator = CareValidator(args.input_file)
report = validator.generate_report()
if args.json:
print(json.dumps(report, indent=2))
else:
validator.print_report()
if args.output:
with open(args.output, 'w') as f:
json.dumps(report, f, indent=2)
print(f"\nJSON report saved to: {args.output}")
# Exit with non-zero if validation failed
exit_code = 0 if report["overall_status"] == "PASS" else 1
return exit_code
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
return 1
if __name__ == "__main__":
import sys
sys.exit(main())