-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_data.py
178 lines (136 loc) · 5.12 KB
/
extract_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
''' Extracts data from various places
'''
import h5py
import numpy as np
import xml.etree.ElementTree as ET
import os
from datetime import datetime
def get_lpd_filename(file_path, filename):
''' Returns absolute path of data file
'''
return file_path + filename
def get_lpd_file(filename):
''' Gets hdf file based on filename
'''
lpd_file = h5py.File(filename, 'r')
return lpd_file
def get_lpd_data(lpd_file):
''' Get all data from a hdf file. The overall process inside get_lpd_file() and this function
has been separated so the metadata can be accessed without the need of two h5py file
objects in the code
'''
try:
lpd_data = lpd_file['data'][()]
except KeyError:
lpd_data = lpd_file['lpd/data/image'][()]
return lpd_data
def get_first_image(lpd_data):
''' Get first image from lpd_data
'''
single_image = lpd_data[:1, :, :]
single_image = np.reshape(single_image, (256, 256))
return single_image
def get_single_tile(lpd_data, tile_position, image_num):
''' Get a single tile (left or right) from lpd_data from any image in the file
'''
single_tile = lpd_data[image_num:image_num + 1, tile_position[0]:tile_position[0] + 32,
tile_position[1]:tile_position[1] + 128]
single_tile = np.reshape(single_tile, (32, 128))
return single_tile
def get_mean_tile(lpd_data, tile_position):
''' Get a mean tile of all the tiles in the file
'''
tile_data = get_total_tile(lpd_data, tile_position)
mean_tile = np.mean(tile_data, axis=0)
return mean_tile
def get_total_tile(lpd_data, tile_position):
''' Return a 32 x 128 tile that contains the aggregate of all the images in the file
Used in get_stdev_tile()
'''
total_tile_data = lpd_data[:lpd_data.shape[0], tile_position[0]:tile_position[0] + 32,
tile_position[1]:tile_position[1] + 128]
return total_tile_data
def get_stdev_image(lpd_data):
''' Get an image that contains the standard deviation of all data in the file
'''
stdev_image = np.std(lpd_data, axis=0)
return stdev_image
def get_stdev_tile(lpd_data, tile_position):
''' Get a tile that contains the standard deviation of the data in the file
'''
tile_data = get_total_tile(lpd_data, tile_position)
stdev_tile = np.std(tile_data, axis=0)
return stdev_tile
def get_single_chip(tile, chip_position):
''' Get single chip from a single tile
chip_position - int from 0 to 7
'''
chip_position = chip_position * 16
single_chip = tile[:, chip_position:chip_position + 16]
return single_chip
def get_single_column(tile, col_position):
''' Get a single column within a tile
'''
single_column = tile[:, col_position:col_position + 1]
return single_column
def get_single_row(tile, row_position):
''' Get a single row within the tile
'''
single_row = tile[row_position :]
return single_row
def set_tile_position(tile_orientation, mini_connector):
''' Used to set variables defining where to get the data from lpd_data
'''
tile_position = []
# Tiles have a 32 pixel height
tile_position.append(32 * (mini_connector - 1))
# Left or right tile
if tile_orientation == "Left Tile":
# Left tiles are on the RHS of the image, and vice versa
tile_position.append(128)
else:
tile_position.append(0)
return tile_position
def get_file_metadata(file):
''' Gets metadata groups from open h5 file
'''
try:
metadata = file['metadata']
except KeyError:
metadata = file['lpd/metadata']
return metadata
def get_num_images_per_train(metadata):
''' Gets value for the number of images per train, which is then used in the analysis details
and when plotting the trigger images
'''
# Get contents of readoutParamFile
readout = metadata['readoutParamFile'][0]
# Pass contents of readout (of type bytes) into an XML parser and find relevant parameter
tree = ET.fromstring(readout)
img_param = tree.find('numberImages')
return int(img_param.get('val'))
def get_num_trains(metadata):
''' Returns the number of trains in the data file's metadata
'''
return int(metadata.attrs['numTrains'])
def get_file_date_created(file_path, metadata):
''' Gets the timestamp when file was created, either from the metadata or timestamp of the file
'''
metadata_key = 'runDate'
if metadata_key in metadata.attrs.keys():
date_str = metadata.attrs[metadata_key]
else:
date_created = os.path.getmtime(file_path)
date_str = datetime.fromtimestamp(date_created).strftime('%d-%m-%Y %H:%M:%S')
return date_str
def get_total_num_images(metadata):
''' Calculates total number of images in the data file
'''
return get_num_images_per_train(metadata) * get_num_trains(metadata)
def get_cmd_seq_filename(metadata):
''' Gets the filename of the command sequence file
'''
filename = str(metadata.attrs['cmdSequenceFile']).split('/')[-1]
if filename.endswith('\''):
filename = filename[:-len('\'')]
return filename