Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:30:10 +08:00
commit f0bd18fb4e
824 changed files with 331919 additions and 0 deletions

428
skills/pydicom/SKILL.md Normal file
View File

@@ -0,0 +1,428 @@
---
name: pydicom
description: Python library for working with DICOM (Digital Imaging and Communications in Medicine) files. Use this skill when reading, writing, or modifying medical imaging data in DICOM format, extracting pixel data from medical images (CT, MRI, X-ray, ultrasound), anonymizing DICOM files, working with DICOM metadata and tags, converting DICOM images to other formats, handling compressed DICOM data, or processing medical imaging datasets. Applies to tasks involving medical image analysis, PACS systems, radiology workflows, and healthcare imaging applications.
---
# Pydicom
## Overview
Pydicom is a pure Python package for working with DICOM files, the standard format for medical imaging data. This skill provides guidance on reading, writing, and manipulating DICOM files, including working with pixel data, metadata, and various compression formats.
## When to Use This Skill
Use this skill when working with:
- Medical imaging files (CT, MRI, X-ray, ultrasound, PET, etc.)
- DICOM datasets requiring metadata extraction or modification
- Pixel data extraction and image processing from medical scans
- DICOM anonymization for research or data sharing
- Converting DICOM files to standard image formats
- Compressed DICOM data requiring decompression
- DICOM sequences and structured reports
- Multi-slice volume reconstruction
- PACS (Picture Archiving and Communication System) integration
## Installation
Install pydicom and common dependencies:
```bash
uv pip install pydicom
uv pip install pillow # For image format conversion
uv pip install numpy # For pixel array manipulation
uv pip install matplotlib # For visualization
```
For handling compressed DICOM files, additional packages may be needed:
```bash
uv pip install pylibjpeg pylibjpeg-libjpeg pylibjpeg-openjpeg # JPEG compression
uv pip install python-gdcm # Alternative compression handler
```
## Core Workflows
### Reading DICOM Files
Read a DICOM file using `pydicom.dcmread()`:
```python
import pydicom
# Read a DICOM file
ds = pydicom.dcmread('path/to/file.dcm')
# Access metadata
print(f"Patient Name: {ds.PatientName}")
print(f"Study Date: {ds.StudyDate}")
print(f"Modality: {ds.Modality}")
# Display all elements
print(ds)
```
**Key points:**
- `dcmread()` returns a `Dataset` object
- Access data elements using attribute notation (e.g., `ds.PatientName`) or tag notation (e.g., `ds[0x0010, 0x0010]`)
- Use `ds.file_meta` to access file metadata like Transfer Syntax UID
- Handle missing attributes with `getattr(ds, 'AttributeName', default_value)` or `hasattr(ds, 'AttributeName')`
### Working with Pixel Data
Extract and manipulate image data from DICOM files:
```python
import pydicom
import numpy as np
import matplotlib.pyplot as plt
# Read DICOM file
ds = pydicom.dcmread('image.dcm')
# Get pixel array (requires numpy)
pixel_array = ds.pixel_array
# Image information
print(f"Shape: {pixel_array.shape}")
print(f"Data type: {pixel_array.dtype}")
print(f"Rows: {ds.Rows}, Columns: {ds.Columns}")
# Apply windowing for display (CT/MRI)
if hasattr(ds, 'WindowCenter') and hasattr(ds, 'WindowWidth'):
from pydicom.pixel_data_handlers.util import apply_voi_lut
windowed_image = apply_voi_lut(pixel_array, ds)
else:
windowed_image = pixel_array
# Display image
plt.imshow(windowed_image, cmap='gray')
plt.title(f"{ds.Modality} - {ds.StudyDescription}")
plt.axis('off')
plt.show()
```
**Working with color images:**
```python
# RGB images have shape (rows, columns, 3)
if ds.PhotometricInterpretation == 'RGB':
rgb_image = ds.pixel_array
plt.imshow(rgb_image)
elif ds.PhotometricInterpretation == 'YBR_FULL':
from pydicom.pixel_data_handlers.util import convert_color_space
rgb_image = convert_color_space(ds.pixel_array, 'YBR_FULL', 'RGB')
plt.imshow(rgb_image)
```
**Multi-frame images (videos/series):**
```python
# For multi-frame DICOM files
if hasattr(ds, 'NumberOfFrames') and ds.NumberOfFrames > 1:
frames = ds.pixel_array # Shape: (num_frames, rows, columns)
print(f"Number of frames: {frames.shape[0]}")
# Display specific frame
plt.imshow(frames[0], cmap='gray')
```
### Converting DICOM to Image Formats
Use the provided `dicom_to_image.py` script or convert manually:
```python
from PIL import Image
import pydicom
import numpy as np
ds = pydicom.dcmread('input.dcm')
pixel_array = ds.pixel_array
# Normalize to 0-255 range
if pixel_array.dtype != np.uint8:
pixel_array = ((pixel_array - pixel_array.min()) /
(pixel_array.max() - pixel_array.min()) * 255).astype(np.uint8)
# Save as PNG
image = Image.fromarray(pixel_array)
image.save('output.png')
```
Use the script: `python scripts/dicom_to_image.py input.dcm output.png`
### Modifying Metadata
Modify DICOM data elements:
```python
import pydicom
from datetime import datetime
ds = pydicom.dcmread('input.dcm')
# Modify existing elements
ds.PatientName = "Doe^John"
ds.StudyDate = datetime.now().strftime('%Y%m%d')
ds.StudyDescription = "Modified Study"
# Add new elements
ds.SeriesNumber = 1
ds.SeriesDescription = "New Series"
# Remove elements
if hasattr(ds, 'PatientComments'):
delattr(ds, 'PatientComments')
# Or using del
if 'PatientComments' in ds:
del ds.PatientComments
# Save modified file
ds.save_as('modified.dcm')
```
### Anonymizing DICOM Files
Remove or replace patient identifiable information:
```python
import pydicom
from datetime import datetime
ds = pydicom.dcmread('input.dcm')
# Tags commonly containing PHI (Protected Health Information)
tags_to_anonymize = [
'PatientName', 'PatientID', 'PatientBirthDate',
'PatientSex', 'PatientAge', 'PatientAddress',
'InstitutionName', 'InstitutionAddress',
'ReferringPhysicianName', 'PerformingPhysicianName',
'OperatorsName', 'StudyDescription', 'SeriesDescription',
]
# Remove or replace sensitive data
for tag in tags_to_anonymize:
if hasattr(ds, tag):
if tag in ['PatientName', 'PatientID']:
setattr(ds, tag, 'ANONYMOUS')
elif tag == 'PatientBirthDate':
setattr(ds, tag, '19000101')
else:
delattr(ds, tag)
# Update dates to maintain temporal relationships
if hasattr(ds, 'StudyDate'):
# Shift dates by a random offset
ds.StudyDate = '20000101'
# Keep pixel data intact
ds.save_as('anonymized.dcm')
```
Use the provided script: `python scripts/anonymize_dicom.py input.dcm output.dcm`
### Writing DICOM Files
Create DICOM files from scratch:
```python
import pydicom
from pydicom.dataset import Dataset, FileDataset
from datetime import datetime
import numpy as np
# Create file meta information
file_meta = Dataset()
file_meta.MediaStorageSOPClassUID = pydicom.uid.generate_uid()
file_meta.MediaStorageSOPInstanceUID = pydicom.uid.generate_uid()
file_meta.TransferSyntaxUID = pydicom.uid.ExplicitVRLittleEndian
# Create the FileDataset instance
ds = FileDataset('new_dicom.dcm', {}, file_meta=file_meta, preamble=b"\0" * 128)
# Add required DICOM elements
ds.PatientName = "Test^Patient"
ds.PatientID = "123456"
ds.Modality = "CT"
ds.StudyDate = datetime.now().strftime('%Y%m%d')
ds.StudyTime = datetime.now().strftime('%H%M%S')
ds.ContentDate = ds.StudyDate
ds.ContentTime = ds.StudyTime
# Add image-specific elements
ds.SamplesPerPixel = 1
ds.PhotometricInterpretation = "MONOCHROME2"
ds.Rows = 512
ds.Columns = 512
ds.BitsAllocated = 16
ds.BitsStored = 16
ds.HighBit = 15
ds.PixelRepresentation = 0
# Create pixel data
pixel_array = np.random.randint(0, 4096, (512, 512), dtype=np.uint16)
ds.PixelData = pixel_array.tobytes()
# Add required UIDs
ds.SOPClassUID = pydicom.uid.CTImageStorage
ds.SOPInstanceUID = file_meta.MediaStorageSOPInstanceUID
ds.SeriesInstanceUID = pydicom.uid.generate_uid()
ds.StudyInstanceUID = pydicom.uid.generate_uid()
# Save the file
ds.save_as('new_dicom.dcm')
```
### Compression and Decompression
Handle compressed DICOM files:
```python
import pydicom
# Read compressed DICOM file
ds = pydicom.dcmread('compressed.dcm')
# Check transfer syntax
print(f"Transfer Syntax: {ds.file_meta.TransferSyntaxUID}")
print(f"Transfer Syntax Name: {ds.file_meta.TransferSyntaxUID.name}")
# Decompress and save as uncompressed
ds.decompress()
ds.save_as('uncompressed.dcm', write_like_original=False)
# Or compress when saving (requires appropriate encoder)
ds_uncompressed = pydicom.dcmread('uncompressed.dcm')
ds_uncompressed.compress(pydicom.uid.JPEGBaseline8Bit)
ds_uncompressed.save_as('compressed_jpeg.dcm')
```
**Common transfer syntaxes:**
- `ExplicitVRLittleEndian` - Uncompressed, most common
- `JPEGBaseline8Bit` - JPEG lossy compression
- `JPEGLossless` - JPEG lossless compression
- `JPEG2000Lossless` - JPEG 2000 lossless
- `RLELossless` - Run-Length Encoding lossless
See `references/transfer_syntaxes.md` for complete list.
### Working with DICOM Sequences
Handle nested data structures:
```python
import pydicom
ds = pydicom.dcmread('file.dcm')
# Access sequences
if 'ReferencedStudySequence' in ds:
for item in ds.ReferencedStudySequence:
print(f"Referenced SOP Instance UID: {item.ReferencedSOPInstanceUID}")
# Create a sequence
from pydicom.sequence import Sequence
sequence_item = Dataset()
sequence_item.ReferencedSOPClassUID = pydicom.uid.CTImageStorage
sequence_item.ReferencedSOPInstanceUID = pydicom.uid.generate_uid()
ds.ReferencedImageSequence = Sequence([sequence_item])
```
### Processing DICOM Series
Work with multiple related DICOM files:
```python
import pydicom
import numpy as np
from pathlib import Path
# Read all DICOM files in a directory
dicom_dir = Path('dicom_series/')
slices = []
for file_path in dicom_dir.glob('*.dcm'):
ds = pydicom.dcmread(file_path)
slices.append(ds)
# Sort by slice location or instance number
slices.sort(key=lambda x: float(x.ImagePositionPatient[2]))
# Or: slices.sort(key=lambda x: int(x.InstanceNumber))
# Create 3D volume
volume = np.stack([s.pixel_array for s in slices])
print(f"Volume shape: {volume.shape}") # (num_slices, rows, columns)
# Get spacing information for proper scaling
pixel_spacing = slices[0].PixelSpacing # [row_spacing, col_spacing]
slice_thickness = slices[0].SliceThickness
print(f"Voxel size: {pixel_spacing[0]}x{pixel_spacing[1]}x{slice_thickness} mm")
```
## Helper Scripts
This skill includes utility scripts in the `scripts/` directory:
### anonymize_dicom.py
Anonymize DICOM files by removing or replacing Protected Health Information (PHI).
```bash
python scripts/anonymize_dicom.py input.dcm output.dcm
```
### dicom_to_image.py
Convert DICOM files to common image formats (PNG, JPEG, TIFF).
```bash
python scripts/dicom_to_image.py input.dcm output.png
python scripts/dicom_to_image.py input.dcm output.jpg --format JPEG
```
### extract_metadata.py
Extract and display DICOM metadata in a readable format.
```bash
python scripts/extract_metadata.py file.dcm
python scripts/extract_metadata.py file.dcm --output metadata.txt
```
## Reference Materials
Detailed reference information is available in the `references/` directory:
- **common_tags.md**: Comprehensive list of commonly used DICOM tags organized by category (Patient, Study, Series, Image, etc.)
- **transfer_syntaxes.md**: Complete reference of DICOM transfer syntaxes and compression formats
## Common Issues and Solutions
**Issue: "Unable to decode pixel data"**
- Solution: Install additional compression handlers: `uv pip install pylibjpeg pylibjpeg-libjpeg python-gdcm`
**Issue: "AttributeError" when accessing tags**
- Solution: Check if attribute exists with `hasattr(ds, 'AttributeName')` or use `ds.get('AttributeName', default)`
**Issue: Incorrect image display (too dark/bright)**
- Solution: Apply VOI LUT windowing: `apply_voi_lut(pixel_array, ds)` or manually adjust with `WindowCenter` and `WindowWidth`
**Issue: Memory issues with large series**
- Solution: Process files iteratively, use memory-mapped arrays, or downsample images
## Best Practices
1. **Always check for required attributes** before accessing them using `hasattr()` or `get()`
2. **Preserve file metadata** when modifying files by using `save_as()` with `write_like_original=True`
3. **Use Transfer Syntax UIDs** to understand compression format before processing pixel data
4. **Handle exceptions** when reading files from untrusted sources
5. **Apply proper windowing** (VOI LUT) for medical image visualization
6. **Maintain spatial information** (pixel spacing, slice thickness) when processing 3D volumes
7. **Verify anonymization** thoroughly before sharing medical data
8. **Use UIDs correctly** - generate new UIDs when creating new instances, preserve them when modifying
## Documentation
Official pydicom documentation: https://pydicom.github.io/pydicom/dev/
- User Guide: https://pydicom.github.io/pydicom/dev/guides/user/index.html
- Tutorials: https://pydicom.github.io/pydicom/dev/tutorials/index.html
- API Reference: https://pydicom.github.io/pydicom/dev/reference/index.html
- Examples: https://pydicom.github.io/pydicom/dev/auto_examples/index.html

View File

@@ -0,0 +1,228 @@
# Common DICOM Tags Reference
This document provides a comprehensive list of commonly used DICOM tags organized by category. Tags can be accessed in pydicom using attribute notation (e.g., `ds.PatientName`) or tag tuple notation (e.g., `ds[0x0010, 0x0010]`).
## Patient Information Tags
| Tag | Name | Type | Description |
|-----|------|------|-------------|
| (0010,0010) | PatientName | PN | Patient's full name |
| (0010,0020) | PatientID | LO | Primary identifier for the patient |
| (0010,0030) | PatientBirthDate | DA | Date of birth (YYYYMMDD) |
| (0010,0032) | PatientBirthTime | TM | Time of birth (HHMMSS) |
| (0010,0040) | PatientSex | CS | Patient's sex (M, F, O) |
| (0010,1010) | PatientAge | AS | Patient's age (format: nnnD/W/M/Y) |
| (0010,1020) | PatientSize | DS | Patient's height in meters |
| (0010,1030) | PatientWeight | DS | Patient's weight in kilograms |
| (0010,1040) | PatientAddress | LO | Patient's mailing address |
| (0010,2160) | EthnicGroup | SH | Ethnic group of patient |
| (0010,4000) | PatientComments | LT | Additional comments about patient |
## Study Information Tags
| Tag | Name | Type | Description |
|-----|------|------|-------------|
| (0020,000D) | StudyInstanceUID | UI | Unique identifier for the study |
| (0008,0020) | StudyDate | DA | Date study started (YYYYMMDD) |
| (0008,0030) | StudyTime | TM | Time study started (HHMMSS) |
| (0008,1030) | StudyDescription | LO | Description of the study |
| (0020,0010) | StudyID | SH | User or site-defined study identifier |
| (0008,0050) | AccessionNumber | SH | RIS-generated study identifier |
| (0008,0090) | ReferringPhysicianName | PN | Name of patient's referring physician |
| (0008,1060) | NameOfPhysiciansReadingStudy | PN | Name of physician(s) reading study |
| (0008,1080) | AdmittingDiagnosesDescription | LO | Diagnosis description at admission |
## Series Information Tags
| Tag | Name | Type | Description |
|-----|------|------|-------------|
| (0020,000E) | SeriesInstanceUID | UI | Unique identifier for the series |
| (0020,0011) | SeriesNumber | IS | Numeric identifier for this series |
| (0008,103E) | SeriesDescription | LO | Description of the series |
| (0008,0060) | Modality | CS | Type of equipment (CT, MR, US, etc.) |
| (0008,0021) | SeriesDate | DA | Date series started (YYYYMMDD) |
| (0008,0031) | SeriesTime | TM | Time series started (HHMMSS) |
| (0018,0015) | BodyPartExamined | CS | Body part examined |
| (0018,5100) | PatientPosition | CS | Patient position (HFS, FFS, etc.) |
| (0020,0060) | Laterality | CS | Laterality of paired body part (R, L) |
## Image Information Tags
| Tag | Name | Type | Description |
|-----|------|------|-------------|
| (0008,0018) | SOPInstanceUID | UI | Unique identifier for this instance |
| (0020,0013) | InstanceNumber | IS | Number that identifies this image |
| (0008,0008) | ImageType | CS | Image identification characteristics |
| (0008,0023) | ContentDate | DA | Date of content creation (YYYYMMDD) |
| (0008,0033) | ContentTime | TM | Time of content creation (HHMMSS) |
| (0020,0032) | ImagePositionPatient | DS | Position of image (x, y, z) in mm |
| (0020,0037) | ImageOrientationPatient | DS | Direction cosines of image rows/columns |
| (0020,1041) | SliceLocation | DS | Relative position of image plane |
| (0018,0050) | SliceThickness | DS | Slice thickness in mm |
| (0018,0088) | SpacingBetweenSlices | DS | Spacing between slices in mm |
## Pixel Data Tags
| Tag | Name | Type | Description |
|-----|------|------|-------------|
| (7FE0,0010) | PixelData | OB/OW | Actual pixel data of the image |
| (0028,0010) | Rows | US | Number of rows in image |
| (0028,0011) | Columns | US | Number of columns in image |
| (0028,0100) | BitsAllocated | US | Bits allocated for each pixel sample |
| (0028,0101) | BitsStored | US | Bits stored for each pixel sample |
| (0028,0102) | HighBit | US | Most significant bit for pixel sample |
| (0028,0103) | PixelRepresentation | US | 0=unsigned, 1=signed |
| (0028,0002) | SamplesPerPixel | US | Number of samples per pixel (1 or 3) |
| (0028,0004) | PhotometricInterpretation | CS | Color space (MONOCHROME2, RGB, etc.) |
| (0028,0006) | PlanarConfiguration | US | Color pixel data arrangement |
| (0028,0030) | PixelSpacing | DS | Physical spacing [row, column] in mm |
| (0028,0008) | NumberOfFrames | IS | Number of frames in multi-frame image |
| (0028,0034) | PixelAspectRatio | IS | Ratio of vertical to horizontal pixel |
## Windowing and Display Tags
| Tag | Name | Type | Description |
|-----|------|------|-------------|
| (0028,1050) | WindowCenter | DS | Window center for display |
| (0028,1051) | WindowWidth | DS | Window width for display |
| (0028,1052) | RescaleIntercept | DS | b in output = m*SV + b |
| (0028,1053) | RescaleSlope | DS | m in output = m*SV + b |
| (0028,1054) | RescaleType | LO | Type of rescaling (HU, etc.) |
| (0028,1055) | WindowCenterWidthExplanation | LO | Explanation of window values |
| (0028,3010) | VOILUTSequence | SQ | VOI LUT description |
## CT-Specific Tags
| Tag | Name | Type | Description |
|-----|------|------|-------------|
| (0018,0060) | KVP | DS | Peak kilovoltage |
| (0018,1030) | ProtocolName | LO | Scan protocol name |
| (0018,1100) | ReconstructionDiameter | DS | Diameter of reconstruction circle |
| (0018,1110) | DistanceSourceToDetector | DS | Distance in mm |
| (0018,1111) | DistanceSourceToPatient | DS | Distance in mm |
| (0018,1120) | GantryDetectorTilt | DS | Gantry tilt in degrees |
| (0018,1130) | TableHeight | DS | Table height in mm |
| (0018,1150) | ExposureTime | IS | Exposure time in ms |
| (0018,1151) | XRayTubeCurrent | IS | X-ray tube current in mA |
| (0018,1152) | Exposure | IS | Exposure in mAs |
| (0018,1160) | FilterType | SH | X-ray filter material |
| (0018,1210) | ConvolutionKernel | SH | Reconstruction algorithm |
## MR-Specific Tags
| Tag | Name | Type | Description |
|-----|------|------|-------------|
| (0018,0080) | RepetitionTime | DS | TR in ms |
| (0018,0081) | EchoTime | DS | TE in ms |
| (0018,0082) | InversionTime | DS | TI in ms |
| (0018,0083) | NumberOfAverages | DS | Number of times data was averaged |
| (0018,0084) | ImagingFrequency | DS | Frequency in MHz |
| (0018,0085) | ImagedNucleus | SH | Nucleus that is imaged (1H, etc.) |
| (0018,0086) | EchoNumbers | IS | Echo number(s) |
| (0018,0087) | MagneticFieldStrength | DS | Field strength in Tesla |
| (0018,0088) | SpacingBetweenSlices | DS | Spacing in mm |
| (0018,0089) | NumberOfPhaseEncodingSteps | IS | Number of encoding steps |
| (0018,0091) | EchoTrainLength | IS | Number of echoes in a train |
| (0018,0093) | PercentSampling | DS | Fraction of acquisition matrix sampled |
| (0018,0094) | PercentPhaseFieldOfView | DS | Ratio of phase to frequency FOV |
| (0018,1030) | ProtocolName | LO | Scan protocol name |
| (0018,1314) | FlipAngle | DS | Flip angle in degrees |
## File Meta Information Tags
| Tag | Name | Type | Description |
|-----|------|------|-------------|
| (0002,0000) | FileMetaInformationGroupLength | UL | Length of file meta information |
| (0002,0001) | FileMetaInformationVersion | OB | Version of file meta information |
| (0002,0002) | MediaStorageSOPClassUID | UI | SOP Class UID |
| (0002,0003) | MediaStorageSOPInstanceUID | UI | SOP Instance UID |
| (0002,0010) | TransferSyntaxUID | UI | Transfer syntax UID |
| (0002,0012) | ImplementationClassUID | UI | Implementation class UID |
| (0002,0013) | ImplementationVersionName | SH | Implementation version name |
## Equipment Tags
| Tag | Name | Type | Description |
|-----|------|------|-------------|
| (0008,0070) | Manufacturer | LO | Equipment manufacturer |
| (0008,0080) | InstitutionName | LO | Institution name |
| (0008,0081) | InstitutionAddress | ST | Institution address |
| (0008,1010) | StationName | SH | Equipment station name |
| (0008,1040) | InstitutionalDepartmentName | LO | Department name |
| (0008,1050) | PerformingPhysicianName | PN | Physician performing procedure |
| (0008,1070) | OperatorsName | PN | Operator name(s) |
| (0008,1090) | ManufacturerModelName | LO | Model name |
| (0018,1000) | DeviceSerialNumber | LO | Device serial number |
| (0018,1020) | SoftwareVersions | LO | Software version(s) |
## Timing Tags
| Tag | Name | Type | Description |
|-----|------|------|-------------|
| (0008,0012) | InstanceCreationDate | DA | Date instance was created |
| (0008,0013) | InstanceCreationTime | TM | Time instance was created |
| (0008,0022) | AcquisitionDate | DA | Date acquisition started |
| (0008,0032) | AcquisitionTime | TM | Time acquisition started |
| (0008,002A) | AcquisitionDateTime | DT | Acquisition date and time |
## DICOM Value Representations (VR)
Common value representation types used in DICOM:
- **AE**: Application Entity (max 16 chars)
- **AS**: Age String (nnnD/W/M/Y)
- **CS**: Code String (max 16 chars)
- **DA**: Date (YYYYMMDD)
- **DS**: Decimal String
- **DT**: Date Time (YYYYMMDDHHMMSS.FFFFFF&ZZXX)
- **IS**: Integer String
- **LO**: Long String (max 64 chars)
- **LT**: Long Text (max 10240 chars)
- **PN**: Person Name
- **SH**: Short String (max 16 chars)
- **SQ**: Sequence of Items
- **ST**: Short Text (max 1024 chars)
- **TM**: Time (HHMMSS.FFFFFF)
- **UI**: Unique Identifier (UID)
- **UL**: Unsigned Long (4 bytes)
- **US**: Unsigned Short (2 bytes)
- **OB**: Other Byte String
- **OW**: Other Word String
## Usage Examples
### Accessing Tags by Name
```python
patient_name = ds.PatientName
study_date = ds.StudyDate
modality = ds.Modality
```
### Accessing Tags by Number
```python
patient_name = ds[0x0010, 0x0010].value
study_date = ds[0x0008, 0x0020].value
modality = ds[0x0008, 0x0060].value
```
### Checking if Tag Exists
```python
if hasattr(ds, 'PatientName'):
print(ds.PatientName)
# Or using 'in' operator
if (0x0010, 0x0010) in ds:
print(ds[0x0010, 0x0010].value)
```
### Safe Access with Default Value
```python
patient_name = getattr(ds, 'PatientName', 'Unknown')
study_desc = ds.get('StudyDescription', 'No description')
```
## References
- DICOM Standard: https://www.dicomstandard.org/
- DICOM Tag Browser: https://dicom.innolitics.com/ciods
- Pydicom Documentation: https://pydicom.github.io/pydicom/

View File

@@ -0,0 +1,352 @@
# DICOM Transfer Syntaxes Reference
This document provides a comprehensive reference for DICOM transfer syntaxes and compression formats. Transfer syntaxes define how DICOM data is encoded, including byte ordering, compression method, and other encoding rules.
## Overview
A Transfer Syntax UID specifies:
1. **Byte ordering**: Little Endian or Big Endian
2. **Value Representation (VR)**: Implicit or Explicit
3. **Compression**: None, or specific compression algorithm
## Uncompressed Transfer Syntaxes
### Implicit VR Little Endian (1.2.840.10008.1.2)
- **Default** transfer syntax
- Value Representations are implicit (not explicitly encoded)
- Little Endian byte ordering
- **Pydicom constant**: `pydicom.uid.ImplicitVRLittleEndian`
**Usage:**
```python
import pydicom
ds.file_meta.TransferSyntaxUID = pydicom.uid.ImplicitVRLittleEndian
```
### Explicit VR Little Endian (1.2.840.10008.1.2.1)
- **Most common** transfer syntax
- Value Representations are explicit
- Little Endian byte ordering
- **Pydicom constant**: `pydicom.uid.ExplicitVRLittleEndian`
**Usage:**
```python
ds.file_meta.TransferSyntaxUID = pydicom.uid.ExplicitVRLittleEndian
```
### Explicit VR Big Endian (1.2.840.10008.1.2.2) - RETIRED
- Value Representations are explicit
- Big Endian byte ordering
- **Deprecated** - not recommended for new implementations
- **Pydicom constant**: `pydicom.uid.ExplicitVRBigEndian`
## JPEG Compression
### JPEG Baseline (Process 1) (1.2.840.10008.1.2.4.50)
- **Lossy** compression
- 8-bit samples only
- Most widely supported JPEG format
- **Pydicom constant**: `pydicom.uid.JPEGBaseline8Bit`
**Dependencies:** Requires `pylibjpeg` or `pillow`
**Usage:**
```python
# Compress
ds.compress(pydicom.uid.JPEGBaseline8Bit)
# Decompress
ds.decompress()
```
### JPEG Extended (Process 2 & 4) (1.2.840.10008.1.2.4.51)
- **Lossy** compression
- 8-bit and 12-bit samples
- **Pydicom constant**: `pydicom.uid.JPEGExtended12Bit`
### JPEG Lossless, Non-Hierarchical (Process 14) (1.2.840.10008.1.2.4.57)
- **Lossless** compression
- First-Order Prediction
- **Pydicom constant**: `pydicom.uid.JPEGLossless`
**Dependencies:** Requires `pylibjpeg-libjpeg` or `gdcm`
### JPEG Lossless, Non-Hierarchical, First-Order Prediction (1.2.840.10008.1.2.4.70)
- **Lossless** compression
- Uses Process 14 Selection Value 1
- **Pydicom constant**: `pydicom.uid.JPEGLosslessSV1`
**Usage:**
```python
# Compress to JPEG Lossless
ds.compress(pydicom.uid.JPEGLossless)
```
### JPEG-LS Lossless (1.2.840.10008.1.2.4.80)
- **Lossless** compression
- Low complexity, good compression
- **Pydicom constant**: `pydicom.uid.JPEGLSLossless`
**Dependencies:** Requires `pylibjpeg-libjpeg` or `gdcm`
### JPEG-LS Lossy (Near-Lossless) (1.2.840.10008.1.2.4.81)
- **Near-lossless** compression
- Allows controlled loss of precision
- **Pydicom constant**: `pydicom.uid.JPEGLSNearLossless`
## JPEG 2000 Compression
### JPEG 2000 Lossless Only (1.2.840.10008.1.2.4.90)
- **Lossless** compression
- Wavelet-based compression
- Better compression than JPEG Lossless
- **Pydicom constant**: `pydicom.uid.JPEG2000Lossless`
**Dependencies:** Requires `pylibjpeg-openjpeg`, `gdcm`, or `pillow`
**Usage:**
```python
# Compress to JPEG 2000 Lossless
ds.compress(pydicom.uid.JPEG2000Lossless)
```
### JPEG 2000 (1.2.840.10008.1.2.4.91)
- **Lossy or lossless** compression
- Wavelet-based compression
- High quality at low bit rates
- **Pydicom constant**: `pydicom.uid.JPEG2000`
**Dependencies:** Requires `pylibjpeg-openjpeg`, `gdcm`, or `pillow`
### JPEG 2000 Part 2 Multi-component Lossless (1.2.840.10008.1.2.4.92)
- **Lossless** compression
- Supports multi-component images
- **Pydicom constant**: `pydicom.uid.JPEG2000MCLossless`
### JPEG 2000 Part 2 Multi-component (1.2.840.10008.1.2.4.93)
- **Lossy or lossless** compression
- Supports multi-component images
- **Pydicom constant**: `pydicom.uid.JPEG2000MC`
## RLE Compression
### RLE Lossless (1.2.840.10008.1.2.5)
- **Lossless** compression
- Run-Length Encoding
- Simple, fast algorithm
- Good for images with repeated values
- **Pydicom constant**: `pydicom.uid.RLELossless`
**Dependencies:** Built into pydicom (no additional packages needed)
**Usage:**
```python
# Compress with RLE
ds.compress(pydicom.uid.RLELossless)
# Decompress
ds.decompress()
```
## Deflated Transfer Syntaxes
### Deflated Explicit VR Little Endian (1.2.840.10008.1.2.1.99)
- Uses ZLIB compression on entire dataset
- Not commonly used
- **Pydicom constant**: `pydicom.uid.DeflatedExplicitVRLittleEndian`
## MPEG Compression
### MPEG2 Main Profile @ Main Level (1.2.840.10008.1.2.4.100)
- **Lossy** video compression
- For multi-frame images/videos
- **Pydicom constant**: `pydicom.uid.MPEG2MPML`
### MPEG2 Main Profile @ High Level (1.2.840.10008.1.2.4.101)
- **Lossy** video compression
- Higher resolution than MPML
- **Pydicom constant**: `pydicom.uid.MPEG2MPHL`
### MPEG-4 AVC/H.264 High Profile (1.2.840.10008.1.2.4.102-106)
- **Lossy** video compression
- Various levels (BD, 2D, 3D, Stereo)
- Modern video codec
## Checking Transfer Syntax
### Identify Current Transfer Syntax
```python
import pydicom
ds = pydicom.dcmread('image.dcm')
# Get transfer syntax UID
ts_uid = ds.file_meta.TransferSyntaxUID
print(f"Transfer Syntax UID: {ts_uid}")
# Get human-readable name
print(f"Transfer Syntax Name: {ts_uid.name}")
# Check if compressed
print(f"Is compressed: {ts_uid.is_compressed}")
```
### Common Checks
```python
# Check if little endian
if ts_uid.is_little_endian:
print("Little Endian")
# Check if implicit VR
if ts_uid.is_implicit_VR:
print("Implicit VR")
# Check compression type
if 'JPEG' in ts_uid.name:
print("JPEG compressed")
elif 'JPEG2000' in ts_uid.name:
print("JPEG 2000 compressed")
elif 'RLE' in ts_uid.name:
print("RLE compressed")
```
## Decompression
### Automatic Decompression
Pydicom can automatically decompress pixel data when accessing `pixel_array`:
```python
import pydicom
# Read compressed DICOM
ds = pydicom.dcmread('compressed.dcm')
# Pixel data is automatically decompressed
pixel_array = ds.pixel_array # Decompresses if needed
```
### Manual Decompression
```python
import pydicom
ds = pydicom.dcmread('compressed.dcm')
# Decompress in-place
ds.decompress()
# Now save as uncompressed
ds.save_as('uncompressed.dcm', write_like_original=False)
```
## Compression
### Compressing DICOM Files
```python
import pydicom
ds = pydicom.dcmread('uncompressed.dcm')
# Compress using JPEG 2000 Lossless
ds.compress(pydicom.uid.JPEG2000Lossless)
ds.save_as('compressed_j2k.dcm')
# Compress using RLE Lossless (no additional dependencies)
ds.compress(pydicom.uid.RLELossless)
ds.save_as('compressed_rle.dcm')
# Compress using JPEG Baseline (lossy)
ds.compress(pydicom.uid.JPEGBaseline8Bit)
ds.save_as('compressed_jpeg.dcm')
```
### Compression with Custom Encoding Parameters
```python
import pydicom
from pydicom.encoders import JPEGLSLosslessEncoder
ds = pydicom.dcmread('uncompressed.dcm')
# Compress with custom parameters
ds.compress(pydicom.uid.JPEGLSLossless, encoding_plugin='pylibjpeg')
```
## Installing Compression Handlers
Different transfer syntaxes require different Python packages:
### JPEG Baseline/Extended
```bash
pip install pylibjpeg pylibjpeg-libjpeg
# Or
pip install pillow
```
### JPEG Lossless/JPEG-LS
```bash
pip install pylibjpeg pylibjpeg-libjpeg
# Or
pip install python-gdcm
```
### JPEG 2000
```bash
pip install pylibjpeg pylibjpeg-openjpeg
# Or
pip install python-gdcm
# Or
pip install pillow
```
### RLE
No additional packages needed - built into pydicom
### Comprehensive Installation
```bash
# Install all common handlers
pip install pylibjpeg pylibjpeg-libjpeg pylibjpeg-openjpeg python-gdcm
```
## Checking Available Handlers
```python
import pydicom
# List available pixel data handlers
from pydicom.pixel_data_handlers.util import get_pixel_data_handlers
handlers = get_pixel_data_handlers()
print("Available handlers:")
for handler in handlers:
print(f" - {handler.__name__}")
```
## Best Practices
1. **Use Explicit VR Little Endian** for maximum compatibility when creating new files
2. **Use JPEG 2000 Lossless** for good compression with no quality loss
3. **Use RLE Lossless** if you can't install additional dependencies
4. **Check Transfer Syntax** before processing to ensure you have the right handlers
5. **Test decompression** before deploying to ensure all required packages are installed
6. **Preserve original** transfer syntax when possible using `write_like_original=True`
7. **Consider file size** vs. quality tradeoffs when choosing lossy compression
8. **Use lossless compression** for diagnostic images to maintain clinical quality
## Common Issues
### Issue: "Unable to decode pixel data"
**Cause:** Missing compression handler
**Solution:** Install the appropriate package (see Installing Compression Handlers above)
### Issue: "Unsupported Transfer Syntax"
**Cause:** Rare or unsupported compression format
**Solution:** Try installing `python-gdcm` which supports more formats
### Issue: "Pixel data decompressed but looks wrong"
**Cause:** May need to apply VOI LUT or rescale
**Solution:** Use `apply_voi_lut()` or apply `RescaleSlope`/`RescaleIntercept`
## References
- DICOM Standard Part 5 (Data Structures and Encoding): https://dicom.nema.org/medical/dicom/current/output/chtml/part05/PS3.5.html
- Pydicom Transfer Syntax Documentation: https://pydicom.github.io/pydicom/stable/guides/user/transfer_syntaxes.html
- Pydicom Compression Guide: https://pydicom.github.io/pydicom/stable/old/image_data_compression.html

View File

@@ -0,0 +1,137 @@
#!/usr/bin/env python3
"""
Anonymize DICOM files by removing or replacing Protected Health Information (PHI).
Usage:
python anonymize_dicom.py input.dcm output.dcm
python anonymize_dicom.py input.dcm output.dcm --patient-id ANON001
"""
import argparse
import sys
from pathlib import Path
try:
import pydicom
except ImportError:
print("Error: pydicom is not installed. Install it with: pip install pydicom")
sys.exit(1)
# Tags commonly containing PHI (Protected Health Information)
PHI_TAGS = [
'PatientName', 'PatientID', 'PatientBirthDate', 'PatientBirthTime',
'PatientSex', 'PatientAge', 'PatientSize', 'PatientWeight',
'PatientAddress', 'PatientTelephoneNumbers', 'PatientMotherBirthName',
'MilitaryRank', 'EthnicGroup', 'Occupation', 'PatientComments',
'InstitutionName', 'InstitutionAddress', 'InstitutionalDepartmentName',
'ReferringPhysicianName', 'ReferringPhysicianAddress',
'ReferringPhysicianTelephoneNumbers', 'ReferringPhysicianIdentificationSequence',
'PerformingPhysicianName', 'PerformingPhysicianIdentificationSequence',
'OperatorsName', 'PhysiciansOfRecord', 'PhysiciansOfRecordIdentificationSequence',
'NameOfPhysiciansReadingStudy', 'PhysiciansReadingStudyIdentificationSequence',
'StudyDescription', 'SeriesDescription', 'AdmittingDiagnosesDescription',
'DerivationDescription', 'RequestingPhysician', 'RequestingService',
'RequestedProcedureDescription', 'ScheduledPerformingPhysicianName',
'PerformedLocation', 'PerformedStationName',
]
def anonymize_dicom(input_path, output_path, patient_id='ANONYMOUS', patient_name='ANONYMOUS'):
"""
Anonymize a DICOM file by removing or replacing PHI.
Args:
input_path: Path to input DICOM file
output_path: Path to output anonymized DICOM file
patient_id: Replacement patient ID (default: 'ANONYMOUS')
patient_name: Replacement patient name (default: 'ANONYMOUS')
"""
try:
# Read DICOM file
ds = pydicom.dcmread(input_path)
# Track what was anonymized
anonymized = []
# Remove or replace sensitive data
for tag in PHI_TAGS:
if hasattr(ds, tag):
if tag == 'PatientName':
ds.PatientName = patient_name
anonymized.append(f"{tag}: replaced with '{patient_name}'")
elif tag == 'PatientID':
ds.PatientID = patient_id
anonymized.append(f"{tag}: replaced with '{patient_id}'")
elif tag == 'PatientBirthDate':
ds.PatientBirthDate = '19000101'
anonymized.append(f"{tag}: replaced with '19000101'")
else:
delattr(ds, tag)
anonymized.append(f"{tag}: removed")
# Anonymize UIDs if present (optional - maintains referential integrity)
# Uncomment if you want to anonymize UIDs as well
# if hasattr(ds, 'StudyInstanceUID'):
# ds.StudyInstanceUID = pydicom.uid.generate_uid()
# if hasattr(ds, 'SeriesInstanceUID'):
# ds.SeriesInstanceUID = pydicom.uid.generate_uid()
# if hasattr(ds, 'SOPInstanceUID'):
# ds.SOPInstanceUID = pydicom.uid.generate_uid()
# Save anonymized file
ds.save_as(output_path)
return True, anonymized
except Exception as e:
return False, str(e)
def main():
parser = argparse.ArgumentParser(
description='Anonymize DICOM files by removing or replacing PHI',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python anonymize_dicom.py input.dcm output.dcm
python anonymize_dicom.py input.dcm output.dcm --patient-id ANON001
python anonymize_dicom.py input.dcm output.dcm --patient-id ANON001 --patient-name "Anonymous^Patient"
"""
)
parser.add_argument('input', type=str, help='Input DICOM file')
parser.add_argument('output', type=str, help='Output anonymized DICOM file')
parser.add_argument('--patient-id', type=str, default='ANONYMOUS',
help='Replacement patient ID (default: ANONYMOUS)')
parser.add_argument('--patient-name', type=str, default='ANONYMOUS',
help='Replacement patient name (default: ANONYMOUS)')
parser.add_argument('-v', '--verbose', action='store_true',
help='Show detailed anonymization information')
args = parser.parse_args()
# Validate input file exists
input_path = Path(args.input)
if not input_path.exists():
print(f"Error: Input file '{args.input}' not found")
sys.exit(1)
# Anonymize the file
print(f"Anonymizing: {args.input}")
success, result = anonymize_dicom(args.input, args.output,
args.patient_id, args.patient_name)
if success:
print(f"✓ Successfully anonymized DICOM file: {args.output}")
if args.verbose:
print(f"\nAnonymized {len(result)} fields:")
for item in result:
print(f" - {item}")
else:
print(f"✗ Error: {result}")
sys.exit(1)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,172 @@
#!/usr/bin/env python3
"""
Convert DICOM files to common image formats (PNG, JPEG, TIFF).
Usage:
python dicom_to_image.py input.dcm output.png
python dicom_to_image.py input.dcm output.jpg --format JPEG
python dicom_to_image.py input.dcm output.tiff --apply-windowing
"""
import argparse
import sys
from pathlib import Path
try:
import pydicom
import numpy as np
from PIL import Image
except ImportError as e:
print(f"Error: Required package not installed: {e}")
print("Install with: pip install pydicom pillow numpy")
sys.exit(1)
def apply_windowing(pixel_array, ds):
"""Apply VOI LUT windowing if available."""
try:
from pydicom.pixel_data_handlers.util import apply_voi_lut
return apply_voi_lut(pixel_array, ds)
except (ImportError, AttributeError):
return pixel_array
def normalize_to_uint8(pixel_array):
"""Normalize pixel array to uint8 (0-255) range."""
if pixel_array.dtype == np.uint8:
return pixel_array
# Normalize to 0-1 range
pix_min = pixel_array.min()
pix_max = pixel_array.max()
if pix_max > pix_min:
normalized = (pixel_array - pix_min) / (pix_max - pix_min)
else:
normalized = np.zeros_like(pixel_array, dtype=float)
# Scale to 0-255
return (normalized * 255).astype(np.uint8)
def convert_dicom_to_image(input_path, output_path, image_format='PNG',
apply_window=False, frame=0):
"""
Convert DICOM file to standard image format.
Args:
input_path: Path to input DICOM file
output_path: Path to output image file
image_format: Output format (PNG, JPEG, TIFF, etc.)
apply_window: Whether to apply VOI LUT windowing
frame: Frame number for multi-frame DICOM files
"""
try:
# Read DICOM file
ds = pydicom.dcmread(input_path)
# Get pixel array
pixel_array = ds.pixel_array
# Handle multi-frame DICOM
if len(pixel_array.shape) == 3 and pixel_array.shape[0] > 1:
if frame >= pixel_array.shape[0]:
return False, f"Frame {frame} out of range (0-{pixel_array.shape[0]-1})"
pixel_array = pixel_array[frame]
print(f"Extracting frame {frame} of {ds.NumberOfFrames}")
# Apply windowing if requested
if apply_window and hasattr(ds, 'WindowCenter'):
pixel_array = apply_windowing(pixel_array, ds)
# Handle color images
if len(pixel_array.shape) == 3 and pixel_array.shape[2] in [3, 4]:
# RGB or RGBA image
if ds.PhotometricInterpretation in ['YBR_FULL', 'YBR_FULL_422']:
# Convert from YBR to RGB
try:
from pydicom.pixel_data_handlers.util import convert_color_space
pixel_array = convert_color_space(pixel_array,
ds.PhotometricInterpretation, 'RGB')
except ImportError:
print("Warning: Could not convert color space, using as-is")
image = Image.fromarray(pixel_array)
else:
# Grayscale image - normalize to uint8
pixel_array = normalize_to_uint8(pixel_array)
image = Image.fromarray(pixel_array, mode='L')
# Save image
image.save(output_path, format=image_format)
return True, {
'shape': ds.pixel_array.shape,
'modality': ds.Modality if hasattr(ds, 'Modality') else 'Unknown',
'bits_allocated': ds.BitsAllocated if hasattr(ds, 'BitsAllocated') else 'Unknown',
}
except Exception as e:
return False, str(e)
def main():
parser = argparse.ArgumentParser(
description='Convert DICOM files to common image formats',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python dicom_to_image.py input.dcm output.png
python dicom_to_image.py input.dcm output.jpg --format JPEG
python dicom_to_image.py input.dcm output.tiff --apply-windowing
python dicom_to_image.py multiframe.dcm frame5.png --frame 5
"""
)
parser.add_argument('input', type=str, help='Input DICOM file')
parser.add_argument('output', type=str, help='Output image file')
parser.add_argument('--format', type=str, choices=['PNG', 'JPEG', 'TIFF', 'BMP'],
help='Output image format (default: inferred from extension)')
parser.add_argument('--apply-windowing', action='store_true',
help='Apply VOI LUT windowing if available')
parser.add_argument('--frame', type=int, default=0,
help='Frame number for multi-frame DICOM files (default: 0)')
parser.add_argument('-v', '--verbose', action='store_true',
help='Show detailed conversion information')
args = parser.parse_args()
# Validate input file exists
input_path = Path(args.input)
if not input_path.exists():
print(f"Error: Input file '{args.input}' not found")
sys.exit(1)
# Determine output format
if args.format:
image_format = args.format
else:
# Infer from extension
ext = Path(args.output).suffix.upper().lstrip('.')
image_format = ext if ext in ['PNG', 'JPEG', 'JPG', 'TIFF', 'BMP'] else 'PNG'
# Convert the file
print(f"Converting: {args.input} -> {args.output}")
success, result = convert_dicom_to_image(args.input, args.output,
image_format, args.apply_windowing,
args.frame)
if success:
print(f"✓ Successfully converted to {image_format}")
if args.verbose:
print(f"\nImage information:")
print(f" - Shape: {result['shape']}")
print(f" - Modality: {result['modality']}")
print(f" - Bits Allocated: {result['bits_allocated']}")
else:
print(f"✗ Error: {result}")
sys.exit(1)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,173 @@
#!/usr/bin/env python3
"""
Extract and display DICOM metadata in a readable format.
Usage:
python extract_metadata.py file.dcm
python extract_metadata.py file.dcm --output metadata.txt
python extract_metadata.py file.dcm --format json --output metadata.json
"""
import argparse
import sys
import json
from pathlib import Path
try:
import pydicom
except ImportError:
print("Error: pydicom is not installed. Install it with: pip install pydicom")
sys.exit(1)
def format_value(value):
"""Format DICOM values for display."""
if isinstance(value, bytes):
try:
return value.decode('utf-8', errors='ignore')
except:
return str(value)
elif isinstance(value, pydicom.multival.MultiValue):
return ', '.join(str(v) for v in value)
elif isinstance(value, pydicom.sequence.Sequence):
return f"Sequence with {len(value)} item(s)"
else:
return str(value)
def extract_metadata_text(ds, show_sequences=False):
"""Extract metadata as formatted text."""
lines = []
lines.append("=" * 80)
lines.append("DICOM Metadata")
lines.append("=" * 80)
# File Meta Information
if hasattr(ds, 'file_meta'):
lines.append("\n[File Meta Information]")
for elem in ds.file_meta:
lines.append(f"{elem.name:40s} {format_value(elem.value)}")
# Patient Information
lines.append("\n[Patient Information]")
patient_tags = ['PatientName', 'PatientID', 'PatientBirthDate',
'PatientSex', 'PatientAge', 'PatientWeight']
for tag in patient_tags:
if hasattr(ds, tag):
value = getattr(ds, tag)
lines.append(f"{tag:40s} {format_value(value)}")
# Study Information
lines.append("\n[Study Information]")
study_tags = ['StudyInstanceUID', 'StudyDate', 'StudyTime',
'StudyDescription', 'AccessionNumber', 'StudyID']
for tag in study_tags:
if hasattr(ds, tag):
value = getattr(ds, tag)
lines.append(f"{tag:40s} {format_value(value)}")
# Series Information
lines.append("\n[Series Information]")
series_tags = ['SeriesInstanceUID', 'SeriesNumber', 'SeriesDescription',
'Modality', 'SeriesDate', 'SeriesTime']
for tag in series_tags:
if hasattr(ds, tag):
value = getattr(ds, tag)
lines.append(f"{tag:40s} {format_value(value)}")
# Image Information
lines.append("\n[Image Information]")
image_tags = ['SOPInstanceUID', 'InstanceNumber', 'ImageType',
'Rows', 'Columns', 'BitsAllocated', 'BitsStored',
'PhotometricInterpretation', 'SamplesPerPixel',
'PixelSpacing', 'SliceThickness', 'ImagePositionPatient',
'ImageOrientationPatient', 'WindowCenter', 'WindowWidth']
for tag in image_tags:
if hasattr(ds, tag):
value = getattr(ds, tag)
lines.append(f"{tag:40s} {format_value(value)}")
# All other elements
if show_sequences:
lines.append("\n[All Elements]")
for elem in ds:
if elem.VR != 'SQ': # Skip sequences for brevity
lines.append(f"{elem.name:40s} {format_value(elem.value)}")
else:
lines.append(f"{elem.name:40s} {format_value(elem.value)}")
return '\n'.join(lines)
def extract_metadata_json(ds):
"""Extract metadata as JSON."""
metadata = {}
# File Meta Information
if hasattr(ds, 'file_meta'):
metadata['file_meta'] = {}
for elem in ds.file_meta:
metadata['file_meta'][elem.keyword] = format_value(elem.value)
# All data elements (excluding sequences for simplicity)
metadata['dataset'] = {}
for elem in ds:
if elem.VR != 'SQ':
metadata['dataset'][elem.keyword] = format_value(elem.value)
return json.dumps(metadata, indent=2)
def main():
parser = argparse.ArgumentParser(
description='Extract and display DICOM metadata',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python extract_metadata.py file.dcm
python extract_metadata.py file.dcm --output metadata.txt
python extract_metadata.py file.dcm --format json --output metadata.json
python extract_metadata.py file.dcm --show-sequences
"""
)
parser.add_argument('input', type=str, help='Input DICOM file')
parser.add_argument('--output', '-o', type=str, help='Output file (default: print to console)')
parser.add_argument('--format', type=str, choices=['text', 'json'], default='text',
help='Output format (default: text)')
parser.add_argument('--show-sequences', action='store_true',
help='Include all data elements including sequences')
args = parser.parse_args()
# Validate input file exists
input_path = Path(args.input)
if not input_path.exists():
print(f"Error: Input file '{args.input}' not found")
sys.exit(1)
try:
# Read DICOM file
ds = pydicom.dcmread(args.input)
# Extract metadata
if args.format == 'json':
output = extract_metadata_json(ds)
else:
output = extract_metadata_text(ds, args.show_sequences)
# Write or print output
if args.output:
with open(args.output, 'w') as f:
f.write(output)
print(f"✓ Metadata extracted to: {args.output}")
else:
print(output)
except Exception as e:
print(f"✗ Error: {e}")
sys.exit(1)
if __name__ == '__main__':
main()