Files
gh-k-dense-ai-claude-scient…/skills/clinical-reports/scripts/terminology_validator.py
2025-11-30 08:30:18 +08:00

134 lines
4.0 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Validate medical terminology and coding in clinical reports.
Usage:
python terminology_validator.py <report_file>
"""
import argparse
import json
import re
# Common medical abbreviations that should be avoided (JCAHO "Do Not Use" list)
DO_NOT_USE = {
"U": "Unit",
"IU": "International Unit",
"QD": "daily",
"QOD": "every other day",
"MS": "morphine sulfate or magnesium sulfate",
"MSO4": "morphine sulfate",
"MgSO4": "magnesium sulfate",
}
# Common abbreviations with potential ambiguity
AMBIGUOUS = ["cc", "hs", "TIW", "SC", "SQ", "D/C", "AS", "AD", "AU", "OS", "OD", "OU"]
def check_do_not_use_abbreviations(content: str) -> dict:
"""Check for prohibited abbreviations."""
violations = {}
for abbrev, meaning in DO_NOT_USE.items():
# Word boundary pattern to avoid false positives
pattern = rf"\b{re.escape(abbrev)}\b"
matches = re.findall(pattern, content)
if matches:
violations[abbrev] = {
"count": len(matches),
"should_use": meaning,
"severity": "HIGH"
}
return violations
def check_ambiguous_abbreviations(content: str) -> dict:
"""Check for ambiguous abbreviations."""
found = {}
for abbrev in AMBIGUOUS:
pattern = rf"\b{re.escape(abbrev)}\b"
matches = re.findall(pattern, content, re.IGNORECASE)
if matches:
found[abbrev] = {
"count": len(matches),
"severity": "MEDIUM"
}
return found
def validate_icd10_format(content: str) -> list:
"""Check ICD-10 code format."""
# ICD-10 format: Letter + 2 digits + optional decimal + 0-4 more digits
pattern = r"\b[A-Z]\d{2}\.?\d{0,4}\b"
codes = re.findall(pattern, content)
return list(set(codes)) # Unique codes
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(description="Validate medical terminology")
parser.add_argument("input_file", help="Path to clinical report")
parser.add_argument("--json", action="store_true")
args = parser.parse_args()
try:
with open(args.input_file, 'r', encoding='utf-8') as f:
content = f.read()
do_not_use = check_do_not_use_abbreviations(content)
ambiguous = check_ambiguous_abbreviations(content)
icd10_codes = validate_icd10_format(content)
report = {
"filename": args.input_file,
"do_not_use_violations": do_not_use,
"ambiguous_abbreviations": ambiguous,
"icd10_codes_found": icd10_codes,
"total_issues": len(do_not_use) + len(ambiguous)
}
if args.json:
print(json.dumps(report, indent=2))
else:
print("\nTerminology Validation Report:\n")
if do_not_use:
print("❌ DO NOT USE Abbreviations Found:")
for abbrev, details in do_not_use.items():
print(f" {abbrev}: {details['count']} occurrence(s)")
print(f" → Use '{details['should_use']}' instead")
print()
else:
print("✓ No prohibited abbreviations found\n")
if ambiguous:
print("⚠ Ambiguous Abbreviations Found:")
for abbrev, details in ambiguous.items():
print(f" {abbrev}: {details['count']} occurrence(s)")
print(" Consider spelling out for clarity\n")
if icd10_codes:
print(f" ICD-10 codes detected: {len(icd10_codes)}")
for code in icd10_codes[:5]:
print(f" - {code}")
if len(icd10_codes) > 5:
print(f" ... and {len(icd10_codes) - 5} more")
print()
return 0 if not do_not_use else 1
except Exception as e:
print(f"Error: {e}")
return 1
if __name__ == "__main__":
import sys
sys.exit(main())