#!/usr/bin/env python3 """ Validate medical terminology and coding in clinical reports. Usage: python terminology_validator.py """ import argparse import json import re # Common medical abbreviations that should be avoided (JCAHO "Do Not Use" list) DO_NOT_USE = { "U": "Unit", "IU": "International Unit", "QD": "daily", "QOD": "every other day", "MS": "morphine sulfate or magnesium sulfate", "MSO4": "morphine sulfate", "MgSO4": "magnesium sulfate", } # Common abbreviations with potential ambiguity AMBIGUOUS = ["cc", "hs", "TIW", "SC", "SQ", "D/C", "AS", "AD", "AU", "OS", "OD", "OU"] def check_do_not_use_abbreviations(content: str) -> dict: """Check for prohibited abbreviations.""" violations = {} for abbrev, meaning in DO_NOT_USE.items(): # Word boundary pattern to avoid false positives pattern = rf"\b{re.escape(abbrev)}\b" matches = re.findall(pattern, content) if matches: violations[abbrev] = { "count": len(matches), "should_use": meaning, "severity": "HIGH" } return violations def check_ambiguous_abbreviations(content: str) -> dict: """Check for ambiguous abbreviations.""" found = {} for abbrev in AMBIGUOUS: pattern = rf"\b{re.escape(abbrev)}\b" matches = re.findall(pattern, content, re.IGNORECASE) if matches: found[abbrev] = { "count": len(matches), "severity": "MEDIUM" } return found def validate_icd10_format(content: str) -> list: """Check ICD-10 code format.""" # ICD-10 format: Letter + 2 digits + optional decimal + 0-4 more digits pattern = r"\b[A-Z]\d{2}\.?\d{0,4}\b" codes = re.findall(pattern, content) return list(set(codes)) # Unique codes def main(): """Main entry point.""" parser = argparse.ArgumentParser(description="Validate medical terminology") parser.add_argument("input_file", help="Path to clinical report") parser.add_argument("--json", action="store_true") args = parser.parse_args() try: with open(args.input_file, 'r', encoding='utf-8') as f: content = f.read() do_not_use = check_do_not_use_abbreviations(content) ambiguous = check_ambiguous_abbreviations(content) icd10_codes = validate_icd10_format(content) report = { "filename": args.input_file, "do_not_use_violations": do_not_use, "ambiguous_abbreviations": ambiguous, "icd10_codes_found": icd10_codes, "total_issues": len(do_not_use) + len(ambiguous) } if args.json: print(json.dumps(report, indent=2)) else: print("\nTerminology Validation Report:\n") if do_not_use: print("❌ DO NOT USE Abbreviations Found:") for abbrev, details in do_not_use.items(): print(f" {abbrev}: {details['count']} occurrence(s)") print(f" → Use '{details['should_use']}' instead") print() else: print("✓ No prohibited abbreviations found\n") if ambiguous: print("⚠ Ambiguous Abbreviations Found:") for abbrev, details in ambiguous.items(): print(f" {abbrev}: {details['count']} occurrence(s)") print(" Consider spelling out for clarity\n") if icd10_codes: print(f"ℹ ICD-10 codes detected: {len(icd10_codes)}") for code in icd10_codes[:5]: print(f" - {code}") if len(icd10_codes) > 5: print(f" ... and {len(icd10_codes) - 5} more") print() return 0 if not do_not_use else 1 except Exception as e: print(f"Error: {e}") return 1 if __name__ == "__main__": import sys sys.exit(main())