Initial commit
This commit is contained in:
137
skills/pydicom/scripts/anonymize_dicom.py
Executable file
137
skills/pydicom/scripts/anonymize_dicom.py
Executable file
@@ -0,0 +1,137 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Anonymize DICOM files by removing or replacing Protected Health Information (PHI).
|
||||
|
||||
Usage:
|
||||
python anonymize_dicom.py input.dcm output.dcm
|
||||
python anonymize_dicom.py input.dcm output.dcm --patient-id ANON001
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import pydicom
|
||||
except ImportError:
|
||||
print("Error: pydicom is not installed. Install it with: pip install pydicom")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# Tags commonly containing PHI (Protected Health Information)
|
||||
PHI_TAGS = [
|
||||
'PatientName', 'PatientID', 'PatientBirthDate', 'PatientBirthTime',
|
||||
'PatientSex', 'PatientAge', 'PatientSize', 'PatientWeight',
|
||||
'PatientAddress', 'PatientTelephoneNumbers', 'PatientMotherBirthName',
|
||||
'MilitaryRank', 'EthnicGroup', 'Occupation', 'PatientComments',
|
||||
'InstitutionName', 'InstitutionAddress', 'InstitutionalDepartmentName',
|
||||
'ReferringPhysicianName', 'ReferringPhysicianAddress',
|
||||
'ReferringPhysicianTelephoneNumbers', 'ReferringPhysicianIdentificationSequence',
|
||||
'PerformingPhysicianName', 'PerformingPhysicianIdentificationSequence',
|
||||
'OperatorsName', 'PhysiciansOfRecord', 'PhysiciansOfRecordIdentificationSequence',
|
||||
'NameOfPhysiciansReadingStudy', 'PhysiciansReadingStudyIdentificationSequence',
|
||||
'StudyDescription', 'SeriesDescription', 'AdmittingDiagnosesDescription',
|
||||
'DerivationDescription', 'RequestingPhysician', 'RequestingService',
|
||||
'RequestedProcedureDescription', 'ScheduledPerformingPhysicianName',
|
||||
'PerformedLocation', 'PerformedStationName',
|
||||
]
|
||||
|
||||
|
||||
def anonymize_dicom(input_path, output_path, patient_id='ANONYMOUS', patient_name='ANONYMOUS'):
|
||||
"""
|
||||
Anonymize a DICOM file by removing or replacing PHI.
|
||||
|
||||
Args:
|
||||
input_path: Path to input DICOM file
|
||||
output_path: Path to output anonymized DICOM file
|
||||
patient_id: Replacement patient ID (default: 'ANONYMOUS')
|
||||
patient_name: Replacement patient name (default: 'ANONYMOUS')
|
||||
"""
|
||||
try:
|
||||
# Read DICOM file
|
||||
ds = pydicom.dcmread(input_path)
|
||||
|
||||
# Track what was anonymized
|
||||
anonymized = []
|
||||
|
||||
# Remove or replace sensitive data
|
||||
for tag in PHI_TAGS:
|
||||
if hasattr(ds, tag):
|
||||
if tag == 'PatientName':
|
||||
ds.PatientName = patient_name
|
||||
anonymized.append(f"{tag}: replaced with '{patient_name}'")
|
||||
elif tag == 'PatientID':
|
||||
ds.PatientID = patient_id
|
||||
anonymized.append(f"{tag}: replaced with '{patient_id}'")
|
||||
elif tag == 'PatientBirthDate':
|
||||
ds.PatientBirthDate = '19000101'
|
||||
anonymized.append(f"{tag}: replaced with '19000101'")
|
||||
else:
|
||||
delattr(ds, tag)
|
||||
anonymized.append(f"{tag}: removed")
|
||||
|
||||
# Anonymize UIDs if present (optional - maintains referential integrity)
|
||||
# Uncomment if you want to anonymize UIDs as well
|
||||
# if hasattr(ds, 'StudyInstanceUID'):
|
||||
# ds.StudyInstanceUID = pydicom.uid.generate_uid()
|
||||
# if hasattr(ds, 'SeriesInstanceUID'):
|
||||
# ds.SeriesInstanceUID = pydicom.uid.generate_uid()
|
||||
# if hasattr(ds, 'SOPInstanceUID'):
|
||||
# ds.SOPInstanceUID = pydicom.uid.generate_uid()
|
||||
|
||||
# Save anonymized file
|
||||
ds.save_as(output_path)
|
||||
|
||||
return True, anonymized
|
||||
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Anonymize DICOM files by removing or replacing PHI',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python anonymize_dicom.py input.dcm output.dcm
|
||||
python anonymize_dicom.py input.dcm output.dcm --patient-id ANON001
|
||||
python anonymize_dicom.py input.dcm output.dcm --patient-id ANON001 --patient-name "Anonymous^Patient"
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('input', type=str, help='Input DICOM file')
|
||||
parser.add_argument('output', type=str, help='Output anonymized DICOM file')
|
||||
parser.add_argument('--patient-id', type=str, default='ANONYMOUS',
|
||||
help='Replacement patient ID (default: ANONYMOUS)')
|
||||
parser.add_argument('--patient-name', type=str, default='ANONYMOUS',
|
||||
help='Replacement patient name (default: ANONYMOUS)')
|
||||
parser.add_argument('-v', '--verbose', action='store_true',
|
||||
help='Show detailed anonymization information')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate input file exists
|
||||
input_path = Path(args.input)
|
||||
if not input_path.exists():
|
||||
print(f"Error: Input file '{args.input}' not found")
|
||||
sys.exit(1)
|
||||
|
||||
# Anonymize the file
|
||||
print(f"Anonymizing: {args.input}")
|
||||
success, result = anonymize_dicom(args.input, args.output,
|
||||
args.patient_id, args.patient_name)
|
||||
|
||||
if success:
|
||||
print(f"✓ Successfully anonymized DICOM file: {args.output}")
|
||||
if args.verbose:
|
||||
print(f"\nAnonymized {len(result)} fields:")
|
||||
for item in result:
|
||||
print(f" - {item}")
|
||||
else:
|
||||
print(f"✗ Error: {result}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
172
skills/pydicom/scripts/dicom_to_image.py
Executable file
172
skills/pydicom/scripts/dicom_to_image.py
Executable file
@@ -0,0 +1,172 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Convert DICOM files to common image formats (PNG, JPEG, TIFF).
|
||||
|
||||
Usage:
|
||||
python dicom_to_image.py input.dcm output.png
|
||||
python dicom_to_image.py input.dcm output.jpg --format JPEG
|
||||
python dicom_to_image.py input.dcm output.tiff --apply-windowing
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import pydicom
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
except ImportError as e:
|
||||
print(f"Error: Required package not installed: {e}")
|
||||
print("Install with: pip install pydicom pillow numpy")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def apply_windowing(pixel_array, ds):
|
||||
"""Apply VOI LUT windowing if available."""
|
||||
try:
|
||||
from pydicom.pixel_data_handlers.util import apply_voi_lut
|
||||
return apply_voi_lut(pixel_array, ds)
|
||||
except (ImportError, AttributeError):
|
||||
return pixel_array
|
||||
|
||||
|
||||
def normalize_to_uint8(pixel_array):
|
||||
"""Normalize pixel array to uint8 (0-255) range."""
|
||||
if pixel_array.dtype == np.uint8:
|
||||
return pixel_array
|
||||
|
||||
# Normalize to 0-1 range
|
||||
pix_min = pixel_array.min()
|
||||
pix_max = pixel_array.max()
|
||||
|
||||
if pix_max > pix_min:
|
||||
normalized = (pixel_array - pix_min) / (pix_max - pix_min)
|
||||
else:
|
||||
normalized = np.zeros_like(pixel_array, dtype=float)
|
||||
|
||||
# Scale to 0-255
|
||||
return (normalized * 255).astype(np.uint8)
|
||||
|
||||
|
||||
def convert_dicom_to_image(input_path, output_path, image_format='PNG',
|
||||
apply_window=False, frame=0):
|
||||
"""
|
||||
Convert DICOM file to standard image format.
|
||||
|
||||
Args:
|
||||
input_path: Path to input DICOM file
|
||||
output_path: Path to output image file
|
||||
image_format: Output format (PNG, JPEG, TIFF, etc.)
|
||||
apply_window: Whether to apply VOI LUT windowing
|
||||
frame: Frame number for multi-frame DICOM files
|
||||
"""
|
||||
try:
|
||||
# Read DICOM file
|
||||
ds = pydicom.dcmread(input_path)
|
||||
|
||||
# Get pixel array
|
||||
pixel_array = ds.pixel_array
|
||||
|
||||
# Handle multi-frame DICOM
|
||||
if len(pixel_array.shape) == 3 and pixel_array.shape[0] > 1:
|
||||
if frame >= pixel_array.shape[0]:
|
||||
return False, f"Frame {frame} out of range (0-{pixel_array.shape[0]-1})"
|
||||
pixel_array = pixel_array[frame]
|
||||
print(f"Extracting frame {frame} of {ds.NumberOfFrames}")
|
||||
|
||||
# Apply windowing if requested
|
||||
if apply_window and hasattr(ds, 'WindowCenter'):
|
||||
pixel_array = apply_windowing(pixel_array, ds)
|
||||
|
||||
# Handle color images
|
||||
if len(pixel_array.shape) == 3 and pixel_array.shape[2] in [3, 4]:
|
||||
# RGB or RGBA image
|
||||
if ds.PhotometricInterpretation in ['YBR_FULL', 'YBR_FULL_422']:
|
||||
# Convert from YBR to RGB
|
||||
try:
|
||||
from pydicom.pixel_data_handlers.util import convert_color_space
|
||||
pixel_array = convert_color_space(pixel_array,
|
||||
ds.PhotometricInterpretation, 'RGB')
|
||||
except ImportError:
|
||||
print("Warning: Could not convert color space, using as-is")
|
||||
|
||||
image = Image.fromarray(pixel_array)
|
||||
else:
|
||||
# Grayscale image - normalize to uint8
|
||||
pixel_array = normalize_to_uint8(pixel_array)
|
||||
image = Image.fromarray(pixel_array, mode='L')
|
||||
|
||||
# Save image
|
||||
image.save(output_path, format=image_format)
|
||||
|
||||
return True, {
|
||||
'shape': ds.pixel_array.shape,
|
||||
'modality': ds.Modality if hasattr(ds, 'Modality') else 'Unknown',
|
||||
'bits_allocated': ds.BitsAllocated if hasattr(ds, 'BitsAllocated') else 'Unknown',
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Convert DICOM files to common image formats',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python dicom_to_image.py input.dcm output.png
|
||||
python dicom_to_image.py input.dcm output.jpg --format JPEG
|
||||
python dicom_to_image.py input.dcm output.tiff --apply-windowing
|
||||
python dicom_to_image.py multiframe.dcm frame5.png --frame 5
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('input', type=str, help='Input DICOM file')
|
||||
parser.add_argument('output', type=str, help='Output image file')
|
||||
parser.add_argument('--format', type=str, choices=['PNG', 'JPEG', 'TIFF', 'BMP'],
|
||||
help='Output image format (default: inferred from extension)')
|
||||
parser.add_argument('--apply-windowing', action='store_true',
|
||||
help='Apply VOI LUT windowing if available')
|
||||
parser.add_argument('--frame', type=int, default=0,
|
||||
help='Frame number for multi-frame DICOM files (default: 0)')
|
||||
parser.add_argument('-v', '--verbose', action='store_true',
|
||||
help='Show detailed conversion information')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate input file exists
|
||||
input_path = Path(args.input)
|
||||
if not input_path.exists():
|
||||
print(f"Error: Input file '{args.input}' not found")
|
||||
sys.exit(1)
|
||||
|
||||
# Determine output format
|
||||
if args.format:
|
||||
image_format = args.format
|
||||
else:
|
||||
# Infer from extension
|
||||
ext = Path(args.output).suffix.upper().lstrip('.')
|
||||
image_format = ext if ext in ['PNG', 'JPEG', 'JPG', 'TIFF', 'BMP'] else 'PNG'
|
||||
|
||||
# Convert the file
|
||||
print(f"Converting: {args.input} -> {args.output}")
|
||||
success, result = convert_dicom_to_image(args.input, args.output,
|
||||
image_format, args.apply_windowing,
|
||||
args.frame)
|
||||
|
||||
if success:
|
||||
print(f"✓ Successfully converted to {image_format}")
|
||||
if args.verbose:
|
||||
print(f"\nImage information:")
|
||||
print(f" - Shape: {result['shape']}")
|
||||
print(f" - Modality: {result['modality']}")
|
||||
print(f" - Bits Allocated: {result['bits_allocated']}")
|
||||
else:
|
||||
print(f"✗ Error: {result}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
173
skills/pydicom/scripts/extract_metadata.py
Executable file
173
skills/pydicom/scripts/extract_metadata.py
Executable file
@@ -0,0 +1,173 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Extract and display DICOM metadata in a readable format.
|
||||
|
||||
Usage:
|
||||
python extract_metadata.py file.dcm
|
||||
python extract_metadata.py file.dcm --output metadata.txt
|
||||
python extract_metadata.py file.dcm --format json --output metadata.json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import pydicom
|
||||
except ImportError:
|
||||
print("Error: pydicom is not installed. Install it with: pip install pydicom")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def format_value(value):
|
||||
"""Format DICOM values for display."""
|
||||
if isinstance(value, bytes):
|
||||
try:
|
||||
return value.decode('utf-8', errors='ignore')
|
||||
except:
|
||||
return str(value)
|
||||
elif isinstance(value, pydicom.multival.MultiValue):
|
||||
return ', '.join(str(v) for v in value)
|
||||
elif isinstance(value, pydicom.sequence.Sequence):
|
||||
return f"Sequence with {len(value)} item(s)"
|
||||
else:
|
||||
return str(value)
|
||||
|
||||
|
||||
def extract_metadata_text(ds, show_sequences=False):
|
||||
"""Extract metadata as formatted text."""
|
||||
lines = []
|
||||
lines.append("=" * 80)
|
||||
lines.append("DICOM Metadata")
|
||||
lines.append("=" * 80)
|
||||
|
||||
# File Meta Information
|
||||
if hasattr(ds, 'file_meta'):
|
||||
lines.append("\n[File Meta Information]")
|
||||
for elem in ds.file_meta:
|
||||
lines.append(f"{elem.name:40s} {format_value(elem.value)}")
|
||||
|
||||
# Patient Information
|
||||
lines.append("\n[Patient Information]")
|
||||
patient_tags = ['PatientName', 'PatientID', 'PatientBirthDate',
|
||||
'PatientSex', 'PatientAge', 'PatientWeight']
|
||||
for tag in patient_tags:
|
||||
if hasattr(ds, tag):
|
||||
value = getattr(ds, tag)
|
||||
lines.append(f"{tag:40s} {format_value(value)}")
|
||||
|
||||
# Study Information
|
||||
lines.append("\n[Study Information]")
|
||||
study_tags = ['StudyInstanceUID', 'StudyDate', 'StudyTime',
|
||||
'StudyDescription', 'AccessionNumber', 'StudyID']
|
||||
for tag in study_tags:
|
||||
if hasattr(ds, tag):
|
||||
value = getattr(ds, tag)
|
||||
lines.append(f"{tag:40s} {format_value(value)}")
|
||||
|
||||
# Series Information
|
||||
lines.append("\n[Series Information]")
|
||||
series_tags = ['SeriesInstanceUID', 'SeriesNumber', 'SeriesDescription',
|
||||
'Modality', 'SeriesDate', 'SeriesTime']
|
||||
for tag in series_tags:
|
||||
if hasattr(ds, tag):
|
||||
value = getattr(ds, tag)
|
||||
lines.append(f"{tag:40s} {format_value(value)}")
|
||||
|
||||
# Image Information
|
||||
lines.append("\n[Image Information]")
|
||||
image_tags = ['SOPInstanceUID', 'InstanceNumber', 'ImageType',
|
||||
'Rows', 'Columns', 'BitsAllocated', 'BitsStored',
|
||||
'PhotometricInterpretation', 'SamplesPerPixel',
|
||||
'PixelSpacing', 'SliceThickness', 'ImagePositionPatient',
|
||||
'ImageOrientationPatient', 'WindowCenter', 'WindowWidth']
|
||||
for tag in image_tags:
|
||||
if hasattr(ds, tag):
|
||||
value = getattr(ds, tag)
|
||||
lines.append(f"{tag:40s} {format_value(value)}")
|
||||
|
||||
# All other elements
|
||||
if show_sequences:
|
||||
lines.append("\n[All Elements]")
|
||||
for elem in ds:
|
||||
if elem.VR != 'SQ': # Skip sequences for brevity
|
||||
lines.append(f"{elem.name:40s} {format_value(elem.value)}")
|
||||
else:
|
||||
lines.append(f"{elem.name:40s} {format_value(elem.value)}")
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
|
||||
def extract_metadata_json(ds):
|
||||
"""Extract metadata as JSON."""
|
||||
metadata = {}
|
||||
|
||||
# File Meta Information
|
||||
if hasattr(ds, 'file_meta'):
|
||||
metadata['file_meta'] = {}
|
||||
for elem in ds.file_meta:
|
||||
metadata['file_meta'][elem.keyword] = format_value(elem.value)
|
||||
|
||||
# All data elements (excluding sequences for simplicity)
|
||||
metadata['dataset'] = {}
|
||||
for elem in ds:
|
||||
if elem.VR != 'SQ':
|
||||
metadata['dataset'][elem.keyword] = format_value(elem.value)
|
||||
|
||||
return json.dumps(metadata, indent=2)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Extract and display DICOM metadata',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python extract_metadata.py file.dcm
|
||||
python extract_metadata.py file.dcm --output metadata.txt
|
||||
python extract_metadata.py file.dcm --format json --output metadata.json
|
||||
python extract_metadata.py file.dcm --show-sequences
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('input', type=str, help='Input DICOM file')
|
||||
parser.add_argument('--output', '-o', type=str, help='Output file (default: print to console)')
|
||||
parser.add_argument('--format', type=str, choices=['text', 'json'], default='text',
|
||||
help='Output format (default: text)')
|
||||
parser.add_argument('--show-sequences', action='store_true',
|
||||
help='Include all data elements including sequences')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate input file exists
|
||||
input_path = Path(args.input)
|
||||
if not input_path.exists():
|
||||
print(f"Error: Input file '{args.input}' not found")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Read DICOM file
|
||||
ds = pydicom.dcmread(args.input)
|
||||
|
||||
# Extract metadata
|
||||
if args.format == 'json':
|
||||
output = extract_metadata_json(ds)
|
||||
else:
|
||||
output = extract_metadata_text(ds, args.show_sequences)
|
||||
|
||||
# Write or print output
|
||||
if args.output:
|
||||
with open(args.output, 'w') as f:
|
||||
f.write(output)
|
||||
print(f"✓ Metadata extracted to: {args.output}")
|
||||
else:
|
||||
print(output)
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user