#!/usr/bin/env python3 """ Validate case reports against CARE (CAse REport) guidelines. This script checks a clinical case report for compliance with CARE guidelines and provides a checklist of required elements. Usage: python validate_case_report.py python validate_case_report.py --output report.json """ import argparse import json import re from pathlib import Path from typing import Dict, List, Tuple class CareValidator: """Validator for CARE guideline compliance.""" # CARE checklist items with regex patterns CARE_REQUIREMENTS = { "title": { "name": "Title contains 'case report'", "pattern": r"(?i)(case\s+report|case\s+study)", "section": "Title", "required": True }, "keywords": { "name": "Keywords provided (2-5)", "pattern": r"(?i)keywords?[:]\s*(.+)", "section": "Keywords", "required": True }, "abstract": { "name": "Abstract present", "pattern": r"(?i)##?\s*abstract", "section": "Abstract", "required": True }, "introduction": { "name": "Introduction explaining novelty", "pattern": r"(?i)##?\s*introduction", "section": "Introduction", "required": True }, "patient_info": { "name": "Patient demographics present", "pattern": r"(?i)(patient\s+information|demographics?)", "section": "Patient Information", "required": True }, "clinical_findings": { "name": "Clinical findings documented", "pattern": r"(?i)(clinical\s+findings?|physical\s+exam)", "section": "Clinical Findings", "required": True }, "timeline": { "name": "Timeline of events", "pattern": r"(?i)(timeline|chronology)", "section": "Timeline", "required": True }, "diagnostic": { "name": "Diagnostic assessment", "pattern": r"(?i)diagnostic\s+(assessment|evaluation|workup)", "section": "Diagnostic Assessment", "required": True }, "therapeutic": { "name": "Therapeutic interventions", "pattern": r"(?i)(therapeutic\s+intervention|treatment)", "section": "Therapeutic Interventions", "required": True }, "followup": { "name": "Follow-up and outcomes", "pattern": r"(?i)(follow[\-\s]?up|outcomes?)", "section": "Follow-up and Outcomes", "required": True }, "discussion": { "name": "Discussion with literature review", "pattern": r"(?i)##?\s*discussion", "section": "Discussion", "required": True }, "consent": { "name": "Informed consent statement", "pattern": r"(?i)(informed\s+consent|written\s+consent|consent.*obtained)", "section": "Informed Consent", "required": True }, } # HIPAA identifiers to check for HIPAA_PATTERNS = { "dates": r"\b(0?[1-9]|1[0-2])/(0?[1-9]|[12][0-9]|3[01])/\d{4}\b", "phone": r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b", "email": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", "ssn": r"\b\d{3}-\d{2}-\d{4}\b", "mrn": r"(?i)(mrn|medical\s+record)[:]\s*\d+", "zip_full": r"\b\d{5}-\d{4}\b", } def __init__(self, filename: str): """Initialize validator with input file.""" self.filename = Path(filename) self.content = self._read_file() self.results = {} def _read_file(self) -> str: """Read input file content.""" try: with open(self.filename, 'r', encoding='utf-8') as f: return f.read() except FileNotFoundError: raise FileNotFoundError(f"File not found: {self.filename}") except Exception as e: raise Exception(f"Error reading file: {e}") def validate_care_compliance(self) -> Dict[str, Dict]: """Validate compliance with CARE guidelines.""" results = {} for key, item in self.CARE_REQUIREMENTS.items(): pattern = item["pattern"] found = bool(re.search(pattern, self.content)) results[key] = { "name": item["name"], "section": item["section"], "required": item["required"], "found": found, "status": "PASS" if found else "FAIL" if item["required"] else "WARNING" } self.results["care_compliance"] = results return results def check_deidentification(self) -> Dict[str, List[str]]: """Check for potential HIPAA identifier violations.""" violations = {} for identifier, pattern in self.HIPAA_PATTERNS.items(): matches = re.findall(pattern, self.content) if matches: violations[identifier] = matches[:5] # Limit to first 5 examples self.results["hipaa_violations"] = violations return violations def check_word_count(self) -> Dict[str, int]: """Check word count and provide limits guidance.""" words = len(re.findall(r'\b\w+\b', self.content)) word_count = { "total_words": words, "typical_min": 1500, "typical_max": 3000, "status": "ACCEPTABLE" if 1500 <= words <= 3500 else "CHECK" } self.results["word_count"] = word_count return word_count def check_references(self) -> Dict[str, any]: """Check for presence of references.""" ref_patterns = [ r"##?\s*references", r"\[\d+\]", r"\d+\.\s+[A-Z][a-z]+.*\d{4}", # Numbered references ] has_refs = any(re.search(p, self.content, re.IGNORECASE) for p in ref_patterns) ref_count = len(re.findall(r"\[\d+\]", self.content)) references = { "has_references": has_refs, "estimated_count": ref_count, "recommended_min": 10, "status": "ACCEPTABLE" if ref_count >= 10 else "LOW" } self.results["references"] = references return references def generate_report(self) -> Dict: """Generate comprehensive validation report.""" if not self.results: self.validate_care_compliance() self.check_deidentification() self.check_word_count() self.check_references() # Calculate overall compliance care = self.results["care_compliance"] total_required = sum(1 for v in care.values() if v["required"]) passed = sum(1 for v in care.values() if v["required"] and v["found"]) compliance_rate = (passed / total_required * 100) if total_required > 0 else 0 report = { "filename": str(self.filename), "compliance_rate": round(compliance_rate, 1), "care_compliance": care, "hipaa_violations": self.results["hipaa_violations"], "word_count": self.results["word_count"], "references": self.results["references"], "overall_status": "PASS" if compliance_rate >= 90 and not self.results["hipaa_violations"] else "NEEDS_REVISION" } return report def print_report(self): """Print human-readable validation report.""" report = self.generate_report() print("=" * 70) print(f"CARE Guideline Validation Report") print(f"File: {report['filename']}") print("=" * 70) print() print(f"Overall Compliance: {report['compliance_rate']}%") print(f"Status: {report['overall_status']}") print() print("CARE Checklist:") print("-" * 70) for key, item in report["care_compliance"].items(): status_symbol = "✓" if item["found"] else "✗" print(f"{status_symbol} [{item['status']:8}] {item['name']}") print() if report["hipaa_violations"]: print("HIPAA DE-IDENTIFICATION WARNINGS:") print("-" * 70) for identifier, examples in report["hipaa_violations"].items(): print(f"⚠ {identifier.upper()}: {len(examples)} instance(s) found") for ex in examples[:3]: print(f" Example: {ex}") print() else: print("✓ No obvious HIPAA identifiers detected") print() wc = report["word_count"] print(f"Word Count: {wc['total_words']} words") print(f" Typical range: {wc['typical_min']}-{wc['typical_max']} words") print(f" Status: {wc['status']}") print() refs = report["references"] print(f"References: {refs['estimated_count']} citation(s) detected") print(f" Recommended minimum: {refs['recommended_min']}") print(f" Status: {refs['status']}") print() print("=" * 70) # Recommendations issues = [] if report['compliance_rate'] < 100: missing = [v["name"] for v in report["care_compliance"].values() if v["required"] and not v["found"]] issues.append(f"Missing required sections: {', '.join(missing)}") if report["hipaa_violations"]: issues.append("HIPAA identifiers detected - review de-identification") if refs["status"] == "LOW": issues.append("Low reference count - consider adding more citations") if issues: print("RECOMMENDATIONS:") for i, issue in enumerate(issues, 1): print(f"{i}. {issue}") else: print("✓ Case report meets CARE guidelines!") print("=" * 70) def main(): """Main entry point.""" parser = argparse.ArgumentParser( description="Validate clinical case reports against CARE guidelines" ) parser.add_argument( "input_file", help="Path to case report file (Markdown or text)" ) parser.add_argument( "--output", "-o", help="Output JSON report to file" ) parser.add_argument( "--json", action="store_true", help="Output JSON to stdout instead of human-readable report" ) args = parser.parse_args() try: validator = CareValidator(args.input_file) report = validator.generate_report() if args.json: print(json.dumps(report, indent=2)) else: validator.print_report() if args.output: with open(args.output, 'w') as f: json.dumps(report, f, indent=2) print(f"\nJSON report saved to: {args.output}") # Exit with non-zero if validation failed exit_code = 0 if report["overall_status"] == "PASS" else 1 return exit_code except Exception as e: print(f"Error: {e}", file=sys.stderr) return 1 if __name__ == "__main__": import sys sys.exit(main())