134 lines
4.0 KiB
Python
Executable File
134 lines
4.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
Validate medical terminology and coding in clinical reports.
|
||
|
||
Usage:
|
||
python terminology_validator.py <report_file>
|
||
"""
|
||
|
||
import argparse
|
||
import json
|
||
import re
|
||
|
||
|
||
# Common medical abbreviations that should be avoided (JCAHO "Do Not Use" list)
|
||
DO_NOT_USE = {
|
||
"U": "Unit",
|
||
"IU": "International Unit",
|
||
"QD": "daily",
|
||
"QOD": "every other day",
|
||
"MS": "morphine sulfate or magnesium sulfate",
|
||
"MSO4": "morphine sulfate",
|
||
"MgSO4": "magnesium sulfate",
|
||
}
|
||
|
||
# Common abbreviations with potential ambiguity
|
||
AMBIGUOUS = ["cc", "hs", "TIW", "SC", "SQ", "D/C", "AS", "AD", "AU", "OS", "OD", "OU"]
|
||
|
||
|
||
def check_do_not_use_abbreviations(content: str) -> dict:
|
||
"""Check for prohibited abbreviations."""
|
||
violations = {}
|
||
|
||
for abbrev, meaning in DO_NOT_USE.items():
|
||
# Word boundary pattern to avoid false positives
|
||
pattern = rf"\b{re.escape(abbrev)}\b"
|
||
matches = re.findall(pattern, content)
|
||
if matches:
|
||
violations[abbrev] = {
|
||
"count": len(matches),
|
||
"should_use": meaning,
|
||
"severity": "HIGH"
|
||
}
|
||
|
||
return violations
|
||
|
||
|
||
def check_ambiguous_abbreviations(content: str) -> dict:
|
||
"""Check for ambiguous abbreviations."""
|
||
found = {}
|
||
|
||
for abbrev in AMBIGUOUS:
|
||
pattern = rf"\b{re.escape(abbrev)}\b"
|
||
matches = re.findall(pattern, content, re.IGNORECASE)
|
||
if matches:
|
||
found[abbrev] = {
|
||
"count": len(matches),
|
||
"severity": "MEDIUM"
|
||
}
|
||
|
||
return found
|
||
|
||
|
||
def validate_icd10_format(content: str) -> list:
|
||
"""Check ICD-10 code format."""
|
||
# ICD-10 format: Letter + 2 digits + optional decimal + 0-4 more digits
|
||
pattern = r"\b[A-Z]\d{2}\.?\d{0,4}\b"
|
||
codes = re.findall(pattern, content)
|
||
return list(set(codes)) # Unique codes
|
||
|
||
|
||
def main():
|
||
"""Main entry point."""
|
||
parser = argparse.ArgumentParser(description="Validate medical terminology")
|
||
parser.add_argument("input_file", help="Path to clinical report")
|
||
parser.add_argument("--json", action="store_true")
|
||
|
||
args = parser.parse_args()
|
||
|
||
try:
|
||
with open(args.input_file, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
do_not_use = check_do_not_use_abbreviations(content)
|
||
ambiguous = check_ambiguous_abbreviations(content)
|
||
icd10_codes = validate_icd10_format(content)
|
||
|
||
report = {
|
||
"filename": args.input_file,
|
||
"do_not_use_violations": do_not_use,
|
||
"ambiguous_abbreviations": ambiguous,
|
||
"icd10_codes_found": icd10_codes,
|
||
"total_issues": len(do_not_use) + len(ambiguous)
|
||
}
|
||
|
||
if args.json:
|
||
print(json.dumps(report, indent=2))
|
||
else:
|
||
print("\nTerminology Validation Report:\n")
|
||
|
||
if do_not_use:
|
||
print("❌ DO NOT USE Abbreviations Found:")
|
||
for abbrev, details in do_not_use.items():
|
||
print(f" {abbrev}: {details['count']} occurrence(s)")
|
||
print(f" → Use '{details['should_use']}' instead")
|
||
print()
|
||
else:
|
||
print("✓ No prohibited abbreviations found\n")
|
||
|
||
if ambiguous:
|
||
print("⚠ Ambiguous Abbreviations Found:")
|
||
for abbrev, details in ambiguous.items():
|
||
print(f" {abbrev}: {details['count']} occurrence(s)")
|
||
print(" Consider spelling out for clarity\n")
|
||
|
||
if icd10_codes:
|
||
print(f"ℹ ICD-10 codes detected: {len(icd10_codes)}")
|
||
for code in icd10_codes[:5]:
|
||
print(f" - {code}")
|
||
if len(icd10_codes) > 5:
|
||
print(f" ... and {len(icd10_codes) - 5} more")
|
||
print()
|
||
|
||
return 0 if not do_not_use else 1
|
||
|
||
except Exception as e:
|
||
print(f"Error: {e}")
|
||
return 1
|
||
|
||
|
||
if __name__ == "__main__":
|
||
import sys
|
||
sys.exit(main())
|
||
|