-
Notifications
You must be signed in to change notification settings - Fork 2
/
data_conversion.py
146 lines (127 loc) · 5.2 KB
/
data_conversion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import shutil
from pathlib import Path
import numpy as np
import SimpleITK
from batchgenerators.utilities.file_and_folder_operations import *
from nnunet.dataset_conversion.utils import generate_dataset_json
from nnunet.paths import nnUNet_raw_data
from tifffile import TiffFile
from tqdm import tqdm
def get_spacing_from_tif_airwaySeg(filename: str):
"""
Extracts the spacing information from a TIFF file using its metadata.
If a file does not have metadata information, a default spacing is assigned.
Args:
filename (str): Path to the TIFF file.
Returns:
tuple: A tuple containing the z, y, and x spacings. If spacing information is not available, returns default values.
"""
with TiffFile(filename) as tif:
try:
imagej_metadata = tif.imagej_metadata
x_spacing = (
tif.pages[0].tags["XResolution"].value[1]
/ tif.pages[0].tags["XResolution"].value[0]
)
y_spacing = (
tif.pages[0].tags["YResolution"].value[1]
/ tif.pages[0].tags["YResolution"].value[0]
)
z_spacing = imagej_metadata["spacing"]
print(filename.split("/")[-1], z_spacing, y_spacing, x_spacing)
return (z_spacing, y_spacing, x_spacing)
except:
print(filename.split("/")[-1], 10, 5.159, 5.159)
return (10, 5.159, 5.159)
def convert_to_nifti(input_filename, output_filename, spacing, is_seg=False):
"""
Converts a TIFF image to a NIfTI file format, with specified spacing.
Args:
input_filename (str): Path to the input TIFF file.
output_filename (str): Path where the output NIfTI file will be saved.
spacing (tuple): Spacing to set for the NIfTI image.
is_seg (bool): If True, the image is treated as a segmentation map, and all values greater than 0 are set to 1.
"""
with TiffFile(input_filename) as tif:
npy_image = tif.asarray()
if is_seg:
print(input_filename.split("/")[-1], np.unique(npy_image))
npy_image[npy_image > 0] = 1
sitk_image = SimpleITK.GetImageFromArray(npy_image)
sitk_image.SetSpacing(list(spacing)[::-1])
SimpleITK.WriteImage(sitk_image, output_filename)
if __name__ == "__main__":
# Download the data from https://zenodo.org/records/7413818
download_path = Path("<path/to/your/downloaded/zenodo/data>")
train_set = [
"ITLI_002",
"ITLI_003",
"ITLI_011",
"NOAI_001",
"VAAD_002",
"VAAD_004",
"VAAD_010",
"VAAD_015",
"VAAD_018",
"Lung_003",
"Lung_004",
"Lung_005",
"Lung_006",
"Lung_007",
"Lung_008",
"Lung_009",
"Lung_010",
]
zip_files = list(download_path.glob("*.zip"))
# only unzip the train files for reproducing the training
for zf in tqdm(zip_files):
if zf.name.replace(".zip", "") in train_set:
shutil.unpack_archive(zf, download_path / "extracted", "zip")
# specify the task ID that will be used in the nnUnet run functions. In our case its 145.
task_name = "Task145_LungAirway"
target_base = join(nnUNet_raw_data, task_name)
target_imagesTr = join(target_base, "imagesTr")
target_labelsTr = join(target_base, "labelsTr")
maybe_mkdir_p(target_imagesTr)
maybe_mkdir_p(target_labelsTr)
# glob in the extracted dir and only consider files that have GT (not "AI result")
mcai_files1 = list((download_path / "extracted").glob("*/*/*MCAI*.tif"))
mcai_files2 = list((download_path / "extracted").glob("*/*MCAI*.tif"))
mcai_files = mcai_files1 + mcai_files2
# iter over the MCAIs (manually corrected AI results -> last Active Learning Iteration)
for mcai in sorted(mcai_files):
# get img list
if mcai.parent.name.startswith("02"):
# there are subdirectories, filter NP files
img_files = [
i
for i in list(mcai.parent.parent.glob("01*/*.tif"))
if not "NP" in str(i)
]
else:
# there are no subdirectories and no NP files
img_files = [
i for i in list(mcai.parent.glob("*.tif")) if not "MCAI" in str(i)
]
# convert raw and GT to nifti and save with _0000 in nnunet dataset
# first GT, infer spacing from this file
spacing = get_spacing_from_tif_airwaySeg(str(mcai))
# iter over img_files, use same spacing as GT
for img_file in img_files:
output_file = join(
target_imagesTr, img_file.name.replace(".tif", "_0000.nii.gz")
)
convert_to_nifti(str(img_file), output_file, spacing, is_seg=False)
output_file_label = join(
target_labelsTr, img_file.name.replace(".tif", ".nii.gz")
)
convert_to_nifti(str(mcai), output_file_label, spacing, is_seg=True)
# finally we can call the utility for generating a dataset.json
generate_dataset_json(
join(target_base, "dataset.json"),
target_imagesTr,
None,
(["LSmicroscopy"]),
labels={0: "background", 1: "airway"},
dataset_name=task_name,
)