Initial commit

2025-11-30 08:30:18 +08:00
commit 74bee324ab
335 changed files with 147377 additions and 0 deletions
--- a/skills/clinical-reports/scripts/terminology_validator.py
+++ b/skills/clinical-reports/scripts/terminology_validator.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python3
+"""
+Validate medical terminology and coding in clinical reports.
+
+Usage:
+    python terminology_validator.py <report_file>
+"""
+
+import argparse
+import json
+import re
+
+
+# Common medical abbreviations that should be avoided (JCAHO "Do Not Use" list)
+DO_NOT_USE = {
+    "U": "Unit",
+    "IU": "International Unit",
+    "QD": "daily",
+    "QOD": "every other day",
+    "MS": "morphine sulfate or magnesium sulfate",
+    "MSO4": "morphine sulfate",
+    "MgSO4": "magnesium sulfate",
+}
+
+# Common abbreviations with potential ambiguity
+AMBIGUOUS = ["cc", "hs", "TIW", "SC", "SQ", "D/C", "AS", "AD", "AU", "OS", "OD", "OU"]
+
+
+def check_do_not_use_abbreviations(content: str) -> dict:
+    """Check for prohibited abbreviations."""
+    violations = {}
+    
+    for abbrev, meaning in DO_NOT_USE.items():
+        # Word boundary pattern to avoid false positives
+        pattern = rf"\b{re.escape(abbrev)}\b"
+        matches = re.findall(pattern, content)
+        if matches:
+            violations[abbrev] = {
+                "count": len(matches),
+                "should_use": meaning,
+                "severity": "HIGH"
+            }
+    
+    return violations
+
+
+def check_ambiguous_abbreviations(content: str) -> dict:
+    """Check for ambiguous abbreviations."""
+    found = {}
+    
+    for abbrev in AMBIGUOUS:
+        pattern = rf"\b{re.escape(abbrev)}\b"
+        matches = re.findall(pattern, content, re.IGNORECASE)
+        if matches:
+            found[abbrev] = {
+                "count": len(matches),
+                "severity": "MEDIUM"
+            }
+    
+    return found
+
+
+def validate_icd10_format(content: str) -> list:
+    """Check ICD-10 code format."""
+    # ICD-10 format: Letter + 2 digits + optional decimal + 0-4 more digits
+    pattern = r"\b[A-Z]\d{2}\.?\d{0,4}\b"
+    codes = re.findall(pattern, content)
+    return list(set(codes))  # Unique codes
+
+
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(description="Validate medical terminology")
+    parser.add_argument("input_file", help="Path to clinical report")
+    parser.add_argument("--json", action="store_true")
+    
+    args = parser.parse_args()
+    
+    try:
+        with open(args.input_file, 'r', encoding='utf-8') as f:
+            content = f.read()
+        
+        do_not_use = check_do_not_use_abbreviations(content)
+        ambiguous = check_ambiguous_abbreviations(content)
+        icd10_codes = validate_icd10_format(content)
+        
+        report = {
+            "filename": args.input_file,
+            "do_not_use_violations": do_not_use,
+            "ambiguous_abbreviations": ambiguous,
+            "icd10_codes_found": icd10_codes,
+            "total_issues": len(do_not_use) + len(ambiguous)
+        }
+        
+        if args.json:
+            print(json.dumps(report, indent=2))
+        else:
+            print("\nTerminology Validation Report:\n")
+            
+            if do_not_use:
+                print("❌ DO NOT USE Abbreviations Found:")
+                for abbrev, details in do_not_use.items():
+                    print(f"  {abbrev}: {details['count']} occurrence(s)")
+                    print(f"    → Use '{details['should_use']}' instead")
+                print()
+            else:
+                print("✓ No prohibited abbreviations found\n")
+            
+            if ambiguous:
+                print("⚠  Ambiguous Abbreviations Found:")
+                for abbrev, details in ambiguous.items():
+                    print(f"  {abbrev}: {details['count']} occurrence(s)")
+                print("  Consider spelling out for clarity\n")
+            
+            if icd10_codes:
+                print(f"ℹ  ICD-10 codes detected: {len(icd10_codes)}")
+                for code in icd10_codes[:5]:
+                    print(f"  - {code}")
+                if len(icd10_codes) > 5:
+                    print(f"  ... and {len(icd10_codes) - 5} more")
+            print()
+        
+        return 0 if not do_not_use else 1
+        
+    except Exception as e:
+        print(f"Error: {e}")
+        return 1
+
+
+if __name__ == "__main__":
+    import sys
+    sys.exit(main())
+