# File handling
import os
import glob
# Data manipulation
import pandas as pd
import numpy as np
# Data visualization
import matplotlib
from matplotlib import pyplot as plt
from matplotlib import image as mpimage
import seaborn as sns
from tqdm import tqdm
# Date manipulation
import datetime
# DICOM format reader
import pydicom
DICOM Introduction
Exploring a DICOM dataset
Adapted from :
Kaggle Kernel: Getting to know DICOM and the Data by schlerp
https://www.kaggle.com/schlerp/getting-to-know-dicom-and-the-data/notebook
A Multi-platform DICOM Toolbox for Academic Radiologists by
http://uwmsk.org/jupyter/Jupyter_DICOM_toolbox.html
A Radiologist’s Exploration of the Stanford ML Group’s MRNet data by Walter Wiggins
https://towardsdatascience.com/a-radiologists-exploration-of-the-stanford-ml-group-s-mrnet-data-8e2374e11bfb
Imports
Import libraries and write settings here.
def show_dcm_info(ds):
print()
print("Filename.........: ", file_path)
print("Storage type.....: ", ds.SOPClassUID)
print()
= ds.PatientName
pat_name = pat_name.family_name + ", " + pat_name.given_name
display_name print("Patient name.....: ", display_name)
print("Patient ID.......: ", ds.PatientID)
print("Patient Age......: ", ds.PatientAge)
print("Patient Sex......: ", ds.PatientSex)
print("Modality.........: ", ds.Modality)
print("Body Part........: ", ds.BodyPartExamined)
print("View Position....: ", ds.ViewPosition)
if 'PixelData' in ds:
= int(ds.Rows)
rows = int(ds.Columns)
cols print("Image size.......: {rows:d} x {cols:d}, {size:d} bytes".format(
=rows, cols=cols, size=len(ds.PixelData)))
rowsif 'PixelSpacing' in ds:
print("Pixel spacing....: ", ds.PixelSpacing)
def plot_pixel_array(ds, figsize=(10, 10)):
=figsize)
plt.figure(figsize=plt.cm.bone)
plt.imshow(ds.pixel_array, cmap plt.show()
= 1
i = 5
num_to_plot = '../dicom/train_images/'
path for file_name in os.listdir(path):
= os.path.join(path, file_name)
file_path = pydicom.dcmread(file_path)
ds
show_dcm_info(ds)
plot_pixel_array(ds)
if i >= num_to_plot:
break
+= 1 i
Filename.........: ../dicom/train_images/1.2.276.0.7230010.3.1.4.8323329.300.1517875162.258081.dcm
Storage type.....: 1.2.840.10008.5.1.4.1.1.7
Patient name.....: 88c14312-3265-4a3f-b7bb-41818107d607,
Patient ID.......: 88c14312-3265-4a3f-b7bb-41818107d607
Patient Age......: 58
Patient Sex......: F
Modality.........: CR
Body Part........: CHEST
View Position....: AP
Image size.......: 1024 x 1024, 154050 bytes
Pixel spacing....: [0.139, 0.139]
Filename.........: ../dicom/train_images/1.2.276.0.7230010.3.1.4.8323329.304.1517875162.301989.dcm
Storage type.....: 1.2.840.10008.5.1.4.1.1.7
Patient name.....: fa43083b-0d94-4849-a5c4-40120c380164,
Patient ID.......: fa43083b-0d94-4849-a5c4-40120c380164
Patient Age......: 60
Patient Sex......: M
Modality.........: CR
Body Part........: CHEST
View Position....: PA
Image size.......: 1024 x 1024, 150238 bytes
Pixel spacing....: [0.14300000000000002, 0.14300000000000002]
Filename.........: ../dicom/train_images/1.2.276.0.7230010.3.1.4.8323329.301.1517875162.280319.dcm
Storage type.....: 1.2.840.10008.5.1.4.1.1.7
Patient name.....: 97d32841-8836-4630-873c-be0b4d2e5478,
Patient ID.......: 97d32841-8836-4630-873c-be0b4d2e5478
Patient Age......: 50
Patient Sex......: F
Modality.........: CR
Body Part........: CHEST
View Position....: PA
Image size.......: 1024 x 1024, 159756 bytes
Pixel spacing....: [0.14300000000000002, 0.14300000000000002]
Filename.........: ../dicom/train_images/1.2.276.0.7230010.3.1.4.8323329.303.1517875162.295039.dcm
Storage type.....: 1.2.840.10008.5.1.4.1.1.7
Patient name.....: 019a6d6a-4eac-4372-a1d4-fe9193826333,
Patient ID.......: 019a6d6a-4eac-4372-a1d4-fe9193826333
Patient Age......: 44
Patient Sex......: M
Modality.........: CR
Body Part........: CHEST
View Position....: PA
Image size.......: 1024 x 1024, 130602 bytes
Pixel spacing....: [0.171, 0.171]
Filename.........: ../dicom/train_images/1.2.276.0.7230010.3.1.4.8323329.302.1517875162.286330.dcm
Storage type.....: 1.2.840.10008.5.1.4.1.1.7
Patient name.....: e6f57005-8262-46ac-92ab-7c858e4ae126,
Patient ID.......: e6f57005-8262-46ac-92ab-7c858e4ae126
Patient Age......: 74
Patient Sex......: M
Modality.........: CR
Body Part........: CHEST
View Position....: PA
Image size.......: 1024 x 1024, 136118 bytes
Pixel spacing....: [0.171, 0.171]
Data in DICOM header
ds
Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length UL: 200
(0002, 0001) File Meta Information Version OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID UI: Secondary Capture Image Storage
(0002, 0003) Media Storage SOP Instance UID UI: 1.2.276.0.7230010.3.1.4.8323329.302.1517875162.286330
(0002, 0010) Transfer Syntax UID UI: JPEG Baseline (Process 1)
(0002, 0012) Implementation Class UID UI: 1.2.276.0.7230010.3.0.3.6.0
(0002, 0013) Implementation Version Name SH: 'OFFIS_DCMTK_360'
-------------------------------------------------
(0008, 0005) Specific Character Set CS: 'ISO_IR 100'
(0008, 0016) SOP Class UID UI: Secondary Capture Image Storage
(0008, 0018) SOP Instance UID UI: 1.2.276.0.7230010.3.1.4.8323329.302.1517875162.286330
(0008, 0020) Study Date DA: '19010101'
(0008, 0030) Study Time TM: '000000.00'
(0008, 0050) Accession Number SH: ''
(0008, 0060) Modality CS: 'CR'
(0008, 0064) Conversion Type CS: 'WSD'
(0008, 0090) Referring Physician's Name PN: ''
(0008, 103e) Series Description LO: 'view: PA'
(0010, 0010) Patient's Name PN: 'e6f57005-8262-46ac-92ab-7c858e4ae126'
(0010, 0020) Patient ID LO: 'e6f57005-8262-46ac-92ab-7c858e4ae126'
(0010, 0030) Patient's Birth Date DA: ''
(0010, 0040) Patient's Sex CS: 'M'
(0010, 1010) Patient's Age AS: '74'
(0018, 0015) Body Part Examined CS: 'CHEST'
(0018, 5101) View Position CS: 'PA'
(0020, 000d) Study Instance UID UI: 1.2.276.0.7230010.3.1.2.8323329.302.1517875162.286329
(0020, 000e) Series Instance UID UI: 1.2.276.0.7230010.3.1.3.8323329.302.1517875162.286328
(0020, 0010) Study ID SH: ''
(0020, 0011) Series Number IS: '1'
(0020, 0013) Instance Number IS: '1'
(0020, 0020) Patient Orientation CS: ''
(0028, 0002) Samples per Pixel US: 1
(0028, 0004) Photometric Interpretation CS: 'MONOCHROME2'
(0028, 0010) Rows US: 1024
(0028, 0011) Columns US: 1024
(0028, 0030) Pixel Spacing DS: [0.171, 0.171]
(0028, 0100) Bits Allocated US: 8
(0028, 0101) Bits Stored US: 8
(0028, 0102) High Bit US: 7
(0028, 0103) Pixel Representation US: 0
(0028, 2110) Lossy Image Compression CS: '01'
(0028, 2114) Lossy Image Compression Method CS: 'ISO_10918_1'
(7fe0, 0010) Pixel Data OB: Array of 136118 elements
Info about undelying pixel data
= ds.pixel_array
im print(type(im))
print(im.dtype)
print(im.shape)
<class 'numpy.ndarray'>
uint8
(1024, 1024)
Convert DICOM to JPG/PNG
import cv2
def dicom_to_jpg():
# Change to True to make PNG format
= False
PNG # DICOM folder path
= '../dicom/train_images/'
folder_path # Output path
= '../dicom/converted/'
output_path = os.listdir(folder_path)
images_path for n, image in enumerate(images_path):
= pydicom.dcmread(os.path.join(folder_path, image))
ds = ds.pixel_array
pixel_array_numpy if PNG == False:
= image.replace('.dcm', '.jpg')
image else:
= image.replace('.dcm', '.png')
image
cv2.imwrite(os.path.join(output_path, image), pixel_array_numpy)if n % 50 == 0:
print('{} image converted'.format(n))
dicom_to_jpg()
0 image converted
Patient Demographics
# Load functions from scikit-image library
from skimage import exposure
import skimage.morphology as morp
from skimage.filters import rank
# For date functions
from datetime import datetime
dir('patient') ds.
['PatientAge',
'PatientBirthDate',
'PatientID',
'PatientName',
'PatientOrientation',
'PatientSex']
ds.PatientName
'e6f57005-8262-46ac-92ab-7c858e4ae126'
# Change Patient Name
= "Blaine, Richard"
AKA = AKA
ds.PatientName ds.PatientName
'Blaine, Richard'
# Change DOB
= '19700101'
epoch = epoch
ds.PatientBirthDate
ds.PatientBirthDate
'19700101'
! python --version
Python 3.9.7
# Calculate patient age at time of scan
= datetime.strptime(ds.PatientBirthDate, '%Y%m%d')
dob = datetime.strptime(ds.StudyDate, '%Y%m%d')
study_date = (study_date - dob).days/365
age
print ("Age = ", age)
Age = -69.04657534246576
# Calculate current age
= datetime.strptime(ds.PatientBirthDate, '%Y%m%d')
dob = datetime.now()
now = (now - dob).days/365
current_age
print("Current age = ", current_age)
Current age = 53.25205479452055
# Change study date
= '20210504'
fictional = fictional
ds.StudyDate
ds.StudyDate
'20210504'
Anonymizing a folder of DICOM images
for filename in glob.iglob('cleft/*.DCM', recursive=True):
= pydicom.read_file(filename, force=True)
ds
# Obtain the name of the folder containing the DICOM files
=os.path.basename(os.path.dirname(os.path.dirname(filename)))
foldername
# Delete patient name, patientID, accession number and patient birthdate
del ds.PatientName
del ds.PatientID
del ds.AccessionNumber
del ds.PatientBirthDate
pydicom.write_file(filename,ds)print(filename)
iPython Widgets
from ipywidgets import interactive, interact, widgets, Layout, Button, Box, Dropdown, IntSlider
from IPython.display import display
# Turn off warnings
import warnings
'ignore') warnings.filterwarnings(
def contrast_adjust(image_name, percentile_lo, percentile_hi):
# save optimized image array to global vaiable so that other functions can use
global img_rescale_interactive, image_name_global
= image_name
image_name_global = np.percentile(eval(image_name), (percentile_lo, percentile_hi))
p_lo, p_hi
= exposure.rescale_intensity(eval(image_name), in_range=(p_lo, p_hi))
img_rescale
= img_rescale
img_rescale_interactive
= (6,6), dpi=100)
plt.figure(figsize =plt.cm.gray)
plt.imshow(img_rescale, cmap
plt.show()
# Widget to save image in differnt formats
= widgets.Button(description = "Save .jpg version")
button_jpg = widgets.Button(description = "Save .png version")
button_png = widgets.Button(description = "Save .tiff version")
button_tiff
= [
items
button_jpg,
button_png,
button_tiff
]
= Layout(display = 'flex',
box_layout = 'row',
flex_flow = 'stretch'
align_items
)
= Box(children = items, layout = box_layout)
box
def jpg_button_clicked(b):
+ '.jpg', img_rescale_interactive, cmap=plt.cm.gray)
plt.imsave(image_name_global
def png_button_clicked(b):
+ '.png', img_rescale_interactive, cmap=plt.cm.gray)
plt.imsave(image_name_global
def tiff_button_clicked(b):
+ '.tiff', img_rescale_interactive, cmap=plt.cm.gray)
plt.imsave(image_name_global
button_jpg.on_click(jpg_button_clicked)
button_png.on_click(png_button_clicked) button_tiff.on_click(tiff_button_clicked)
= interactive(contrast_adjust, image_name='ds.pixel_array', percentile_lo=(1,100,.5), percentile_hi=(1,100,.5))
w
display(w)
box
image_name_global
'ds.pixel_array'
Invert Image
= (6, 6), dpi=100)
plt.figure(figsize = plt.imshow(ds.pixel_array, cmap=plt.cm.gist_yarg) figure
Mirror Image R > L
=plt.cm.gray) plt.imshow(np.fliplr(ds.pixel_array), cmap
<matplotlib.image.AxesImage at 0x7fa920252670>
Flip Image
=plt.cm.gray) plt.imshow(np.flipud(ds.pixel_array), cmap
<matplotlib.image.AxesImage at 0x7fa911a87e80>
Histogram
=64)
plt.hist(ds.pixel_array.flatten(), bins
plt.show()
print("pixel array = ", ds.pixel_array.shape)
print("minimum value = ", np.amin(ds.pixel_array)) # Find minimum value in pixel array
print("maximum value = ", np.amax(ds.pixel_array)) # Find maximum value in pixel array
pixel array = (1024, 1024)
minimum value = 0
maximum value = 248