Initial commit
This commit is contained in:
334
skills/clinical-reports/scripts/validate_case_report.py
Executable file
334
skills/clinical-reports/scripts/validate_case_report.py
Executable file
@@ -0,0 +1,334 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validate case reports against CARE (CAse REport) guidelines.
|
||||
|
||||
This script checks a clinical case report for compliance with CARE guidelines
|
||||
and provides a checklist of required elements.
|
||||
|
||||
Usage:
|
||||
python validate_case_report.py <input_file.md|.txt>
|
||||
python validate_case_report.py <input_file> --output report.json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
|
||||
class CareValidator:
|
||||
"""Validator for CARE guideline compliance."""
|
||||
|
||||
# CARE checklist items with regex patterns
|
||||
CARE_REQUIREMENTS = {
|
||||
"title": {
|
||||
"name": "Title contains 'case report'",
|
||||
"pattern": r"(?i)(case\s+report|case\s+study)",
|
||||
"section": "Title",
|
||||
"required": True
|
||||
},
|
||||
"keywords": {
|
||||
"name": "Keywords provided (2-5)",
|
||||
"pattern": r"(?i)keywords?[:]\s*(.+)",
|
||||
"section": "Keywords",
|
||||
"required": True
|
||||
},
|
||||
"abstract": {
|
||||
"name": "Abstract present",
|
||||
"pattern": r"(?i)##?\s*abstract",
|
||||
"section": "Abstract",
|
||||
"required": True
|
||||
},
|
||||
"introduction": {
|
||||
"name": "Introduction explaining novelty",
|
||||
"pattern": r"(?i)##?\s*introduction",
|
||||
"section": "Introduction",
|
||||
"required": True
|
||||
},
|
||||
"patient_info": {
|
||||
"name": "Patient demographics present",
|
||||
"pattern": r"(?i)(patient\s+information|demographics?)",
|
||||
"section": "Patient Information",
|
||||
"required": True
|
||||
},
|
||||
"clinical_findings": {
|
||||
"name": "Clinical findings documented",
|
||||
"pattern": r"(?i)(clinical\s+findings?|physical\s+exam)",
|
||||
"section": "Clinical Findings",
|
||||
"required": True
|
||||
},
|
||||
"timeline": {
|
||||
"name": "Timeline of events",
|
||||
"pattern": r"(?i)(timeline|chronology)",
|
||||
"section": "Timeline",
|
||||
"required": True
|
||||
},
|
||||
"diagnostic": {
|
||||
"name": "Diagnostic assessment",
|
||||
"pattern": r"(?i)diagnostic\s+(assessment|evaluation|workup)",
|
||||
"section": "Diagnostic Assessment",
|
||||
"required": True
|
||||
},
|
||||
"therapeutic": {
|
||||
"name": "Therapeutic interventions",
|
||||
"pattern": r"(?i)(therapeutic\s+intervention|treatment)",
|
||||
"section": "Therapeutic Interventions",
|
||||
"required": True
|
||||
},
|
||||
"followup": {
|
||||
"name": "Follow-up and outcomes",
|
||||
"pattern": r"(?i)(follow[\-\s]?up|outcomes?)",
|
||||
"section": "Follow-up and Outcomes",
|
||||
"required": True
|
||||
},
|
||||
"discussion": {
|
||||
"name": "Discussion with literature review",
|
||||
"pattern": r"(?i)##?\s*discussion",
|
||||
"section": "Discussion",
|
||||
"required": True
|
||||
},
|
||||
"consent": {
|
||||
"name": "Informed consent statement",
|
||||
"pattern": r"(?i)(informed\s+consent|written\s+consent|consent.*obtained)",
|
||||
"section": "Informed Consent",
|
||||
"required": True
|
||||
},
|
||||
}
|
||||
|
||||
# HIPAA identifiers to check for
|
||||
HIPAA_PATTERNS = {
|
||||
"dates": r"\b(0?[1-9]|1[0-2])/(0?[1-9]|[12][0-9]|3[01])/\d{4}\b",
|
||||
"phone": r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b",
|
||||
"email": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
|
||||
"ssn": r"\b\d{3}-\d{2}-\d{4}\b",
|
||||
"mrn": r"(?i)(mrn|medical\s+record)[:]\s*\d+",
|
||||
"zip_full": r"\b\d{5}-\d{4}\b",
|
||||
}
|
||||
|
||||
def __init__(self, filename: str):
|
||||
"""Initialize validator with input file."""
|
||||
self.filename = Path(filename)
|
||||
self.content = self._read_file()
|
||||
self.results = {}
|
||||
|
||||
def _read_file(self) -> str:
|
||||
"""Read input file content."""
|
||||
try:
|
||||
with open(self.filename, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
except FileNotFoundError:
|
||||
raise FileNotFoundError(f"File not found: {self.filename}")
|
||||
except Exception as e:
|
||||
raise Exception(f"Error reading file: {e}")
|
||||
|
||||
def validate_care_compliance(self) -> Dict[str, Dict]:
|
||||
"""Validate compliance with CARE guidelines."""
|
||||
results = {}
|
||||
|
||||
for key, item in self.CARE_REQUIREMENTS.items():
|
||||
pattern = item["pattern"]
|
||||
found = bool(re.search(pattern, self.content))
|
||||
|
||||
results[key] = {
|
||||
"name": item["name"],
|
||||
"section": item["section"],
|
||||
"required": item["required"],
|
||||
"found": found,
|
||||
"status": "PASS" if found else "FAIL" if item["required"] else "WARNING"
|
||||
}
|
||||
|
||||
self.results["care_compliance"] = results
|
||||
return results
|
||||
|
||||
def check_deidentification(self) -> Dict[str, List[str]]:
|
||||
"""Check for potential HIPAA identifier violations."""
|
||||
violations = {}
|
||||
|
||||
for identifier, pattern in self.HIPAA_PATTERNS.items():
|
||||
matches = re.findall(pattern, self.content)
|
||||
if matches:
|
||||
violations[identifier] = matches[:5] # Limit to first 5 examples
|
||||
|
||||
self.results["hipaa_violations"] = violations
|
||||
return violations
|
||||
|
||||
def check_word_count(self) -> Dict[str, int]:
|
||||
"""Check word count and provide limits guidance."""
|
||||
words = len(re.findall(r'\b\w+\b', self.content))
|
||||
|
||||
word_count = {
|
||||
"total_words": words,
|
||||
"typical_min": 1500,
|
||||
"typical_max": 3000,
|
||||
"status": "ACCEPTABLE" if 1500 <= words <= 3500 else "CHECK"
|
||||
}
|
||||
|
||||
self.results["word_count"] = word_count
|
||||
return word_count
|
||||
|
||||
def check_references(self) -> Dict[str, any]:
|
||||
"""Check for presence of references."""
|
||||
ref_patterns = [
|
||||
r"##?\s*references",
|
||||
r"\[\d+\]",
|
||||
r"\d+\.\s+[A-Z][a-z]+.*\d{4}", # Numbered references
|
||||
]
|
||||
|
||||
has_refs = any(re.search(p, self.content, re.IGNORECASE) for p in ref_patterns)
|
||||
ref_count = len(re.findall(r"\[\d+\]", self.content))
|
||||
|
||||
references = {
|
||||
"has_references": has_refs,
|
||||
"estimated_count": ref_count,
|
||||
"recommended_min": 10,
|
||||
"status": "ACCEPTABLE" if ref_count >= 10 else "LOW"
|
||||
}
|
||||
|
||||
self.results["references"] = references
|
||||
return references
|
||||
|
||||
def generate_report(self) -> Dict:
|
||||
"""Generate comprehensive validation report."""
|
||||
if not self.results:
|
||||
self.validate_care_compliance()
|
||||
self.check_deidentification()
|
||||
self.check_word_count()
|
||||
self.check_references()
|
||||
|
||||
# Calculate overall compliance
|
||||
care = self.results["care_compliance"]
|
||||
total_required = sum(1 for v in care.values() if v["required"])
|
||||
passed = sum(1 for v in care.values() if v["required"] and v["found"])
|
||||
compliance_rate = (passed / total_required * 100) if total_required > 0 else 0
|
||||
|
||||
report = {
|
||||
"filename": str(self.filename),
|
||||
"compliance_rate": round(compliance_rate, 1),
|
||||
"care_compliance": care,
|
||||
"hipaa_violations": self.results["hipaa_violations"],
|
||||
"word_count": self.results["word_count"],
|
||||
"references": self.results["references"],
|
||||
"overall_status": "PASS" if compliance_rate >= 90 and not self.results["hipaa_violations"] else "NEEDS_REVISION"
|
||||
}
|
||||
|
||||
return report
|
||||
|
||||
def print_report(self):
|
||||
"""Print human-readable validation report."""
|
||||
report = self.generate_report()
|
||||
|
||||
print("=" * 70)
|
||||
print(f"CARE Guideline Validation Report")
|
||||
print(f"File: {report['filename']}")
|
||||
print("=" * 70)
|
||||
print()
|
||||
|
||||
print(f"Overall Compliance: {report['compliance_rate']}%")
|
||||
print(f"Status: {report['overall_status']}")
|
||||
print()
|
||||
|
||||
print("CARE Checklist:")
|
||||
print("-" * 70)
|
||||
for key, item in report["care_compliance"].items():
|
||||
status_symbol = "✓" if item["found"] else "✗"
|
||||
print(f"{status_symbol} [{item['status']:8}] {item['name']}")
|
||||
print()
|
||||
|
||||
if report["hipaa_violations"]:
|
||||
print("HIPAA DE-IDENTIFICATION WARNINGS:")
|
||||
print("-" * 70)
|
||||
for identifier, examples in report["hipaa_violations"].items():
|
||||
print(f"⚠ {identifier.upper()}: {len(examples)} instance(s) found")
|
||||
for ex in examples[:3]:
|
||||
print(f" Example: {ex}")
|
||||
print()
|
||||
else:
|
||||
print("✓ No obvious HIPAA identifiers detected")
|
||||
print()
|
||||
|
||||
wc = report["word_count"]
|
||||
print(f"Word Count: {wc['total_words']} words")
|
||||
print(f" Typical range: {wc['typical_min']}-{wc['typical_max']} words")
|
||||
print(f" Status: {wc['status']}")
|
||||
print()
|
||||
|
||||
refs = report["references"]
|
||||
print(f"References: {refs['estimated_count']} citation(s) detected")
|
||||
print(f" Recommended minimum: {refs['recommended_min']}")
|
||||
print(f" Status: {refs['status']}")
|
||||
print()
|
||||
|
||||
print("=" * 70)
|
||||
|
||||
# Recommendations
|
||||
issues = []
|
||||
if report['compliance_rate'] < 100:
|
||||
missing = [v["name"] for v in report["care_compliance"].values() if v["required"] and not v["found"]]
|
||||
issues.append(f"Missing required sections: {', '.join(missing)}")
|
||||
|
||||
if report["hipaa_violations"]:
|
||||
issues.append("HIPAA identifiers detected - review de-identification")
|
||||
|
||||
if refs["status"] == "LOW":
|
||||
issues.append("Low reference count - consider adding more citations")
|
||||
|
||||
if issues:
|
||||
print("RECOMMENDATIONS:")
|
||||
for i, issue in enumerate(issues, 1):
|
||||
print(f"{i}. {issue}")
|
||||
else:
|
||||
print("✓ Case report meets CARE guidelines!")
|
||||
|
||||
print("=" * 70)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Validate clinical case reports against CARE guidelines"
|
||||
)
|
||||
parser.add_argument(
|
||||
"input_file",
|
||||
help="Path to case report file (Markdown or text)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
"-o",
|
||||
help="Output JSON report to file"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--json",
|
||||
action="store_true",
|
||||
help="Output JSON to stdout instead of human-readable report"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
validator = CareValidator(args.input_file)
|
||||
report = validator.generate_report()
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(report, indent=2))
|
||||
else:
|
||||
validator.print_report()
|
||||
|
||||
if args.output:
|
||||
with open(args.output, 'w') as f:
|
||||
json.dumps(report, f, indent=2)
|
||||
print(f"\nJSON report saved to: {args.output}")
|
||||
|
||||
# Exit with non-zero if validation failed
|
||||
exit_code = 0 if report["overall_status"] == "PASS" else 1
|
||||
return exit_code
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
sys.exit(main())
|
||||
|
||||
Reference in New Issue
Block a user