Initial commit
This commit is contained in:
137
skills/pydicom/scripts/anonymize_dicom.py
Executable file
137
skills/pydicom/scripts/anonymize_dicom.py
Executable file
@@ -0,0 +1,137 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Anonymize DICOM files by removing or replacing Protected Health Information (PHI).
|
||||
|
||||
Usage:
|
||||
python anonymize_dicom.py input.dcm output.dcm
|
||||
python anonymize_dicom.py input.dcm output.dcm --patient-id ANON001
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import pydicom
|
||||
except ImportError:
|
||||
print("Error: pydicom is not installed. Install it with: pip install pydicom")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# Tags commonly containing PHI (Protected Health Information)
|
||||
PHI_TAGS = [
|
||||
'PatientName', 'PatientID', 'PatientBirthDate', 'PatientBirthTime',
|
||||
'PatientSex', 'PatientAge', 'PatientSize', 'PatientWeight',
|
||||
'PatientAddress', 'PatientTelephoneNumbers', 'PatientMotherBirthName',
|
||||
'MilitaryRank', 'EthnicGroup', 'Occupation', 'PatientComments',
|
||||
'InstitutionName', 'InstitutionAddress', 'InstitutionalDepartmentName',
|
||||
'ReferringPhysicianName', 'ReferringPhysicianAddress',
|
||||
'ReferringPhysicianTelephoneNumbers', 'ReferringPhysicianIdentificationSequence',
|
||||
'PerformingPhysicianName', 'PerformingPhysicianIdentificationSequence',
|
||||
'OperatorsName', 'PhysiciansOfRecord', 'PhysiciansOfRecordIdentificationSequence',
|
||||
'NameOfPhysiciansReadingStudy', 'PhysiciansReadingStudyIdentificationSequence',
|
||||
'StudyDescription', 'SeriesDescription', 'AdmittingDiagnosesDescription',
|
||||
'DerivationDescription', 'RequestingPhysician', 'RequestingService',
|
||||
'RequestedProcedureDescription', 'ScheduledPerformingPhysicianName',
|
||||
'PerformedLocation', 'PerformedStationName',
|
||||
]
|
||||
|
||||
|
||||
def anonymize_dicom(input_path, output_path, patient_id='ANONYMOUS', patient_name='ANONYMOUS'):
|
||||
"""
|
||||
Anonymize a DICOM file by removing or replacing PHI.
|
||||
|
||||
Args:
|
||||
input_path: Path to input DICOM file
|
||||
output_path: Path to output anonymized DICOM file
|
||||
patient_id: Replacement patient ID (default: 'ANONYMOUS')
|
||||
patient_name: Replacement patient name (default: 'ANONYMOUS')
|
||||
"""
|
||||
try:
|
||||
# Read DICOM file
|
||||
ds = pydicom.dcmread(input_path)
|
||||
|
||||
# Track what was anonymized
|
||||
anonymized = []
|
||||
|
||||
# Remove or replace sensitive data
|
||||
for tag in PHI_TAGS:
|
||||
if hasattr(ds, tag):
|
||||
if tag == 'PatientName':
|
||||
ds.PatientName = patient_name
|
||||
anonymized.append(f"{tag}: replaced with '{patient_name}'")
|
||||
elif tag == 'PatientID':
|
||||
ds.PatientID = patient_id
|
||||
anonymized.append(f"{tag}: replaced with '{patient_id}'")
|
||||
elif tag == 'PatientBirthDate':
|
||||
ds.PatientBirthDate = '19000101'
|
||||
anonymized.append(f"{tag}: replaced with '19000101'")
|
||||
else:
|
||||
delattr(ds, tag)
|
||||
anonymized.append(f"{tag}: removed")
|
||||
|
||||
# Anonymize UIDs if present (optional - maintains referential integrity)
|
||||
# Uncomment if you want to anonymize UIDs as well
|
||||
# if hasattr(ds, 'StudyInstanceUID'):
|
||||
# ds.StudyInstanceUID = pydicom.uid.generate_uid()
|
||||
# if hasattr(ds, 'SeriesInstanceUID'):
|
||||
# ds.SeriesInstanceUID = pydicom.uid.generate_uid()
|
||||
# if hasattr(ds, 'SOPInstanceUID'):
|
||||
# ds.SOPInstanceUID = pydicom.uid.generate_uid()
|
||||
|
||||
# Save anonymized file
|
||||
ds.save_as(output_path)
|
||||
|
||||
return True, anonymized
|
||||
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Anonymize DICOM files by removing or replacing PHI',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python anonymize_dicom.py input.dcm output.dcm
|
||||
python anonymize_dicom.py input.dcm output.dcm --patient-id ANON001
|
||||
python anonymize_dicom.py input.dcm output.dcm --patient-id ANON001 --patient-name "Anonymous^Patient"
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('input', type=str, help='Input DICOM file')
|
||||
parser.add_argument('output', type=str, help='Output anonymized DICOM file')
|
||||
parser.add_argument('--patient-id', type=str, default='ANONYMOUS',
|
||||
help='Replacement patient ID (default: ANONYMOUS)')
|
||||
parser.add_argument('--patient-name', type=str, default='ANONYMOUS',
|
||||
help='Replacement patient name (default: ANONYMOUS)')
|
||||
parser.add_argument('-v', '--verbose', action='store_true',
|
||||
help='Show detailed anonymization information')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate input file exists
|
||||
input_path = Path(args.input)
|
||||
if not input_path.exists():
|
||||
print(f"Error: Input file '{args.input}' not found")
|
||||
sys.exit(1)
|
||||
|
||||
# Anonymize the file
|
||||
print(f"Anonymizing: {args.input}")
|
||||
success, result = anonymize_dicom(args.input, args.output,
|
||||
args.patient_id, args.patient_name)
|
||||
|
||||
if success:
|
||||
print(f"✓ Successfully anonymized DICOM file: {args.output}")
|
||||
if args.verbose:
|
||||
print(f"\nAnonymized {len(result)} fields:")
|
||||
for item in result:
|
||||
print(f" - {item}")
|
||||
else:
|
||||
print(f"✗ Error: {result}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user