-
Notifications
You must be signed in to change notification settings - Fork 2
/
med_spec_loop.py
338 lines (234 loc) · 11.6 KB
/
med_spec_loop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
#!/opt/anaconda/envs/seisenv/bin python
# -*- coding: utf-8 -*-
"""
med_spec_loop.py
This wrapper script handles the data loading and manipulation, and passes a coarse_duration length of seismic data to the script get_med_spectra, to
calculate the median spectra of many small samples of seismic data with length fine_duration. get_med_spectra.py returns the median spectra, which is stored
in an array, and finally plotted.
Created on Fri Feb 19 12:06:45 2016
_v1 Mar 27, 2017: TCB
_v2 Nov 29, 2017: Modified to deconvolve the instrument response from the
waveforms.
_v3 Nov 15, 2018: Modified to use a parameter file for all parameters.
_v5 on October 28, 2022 (yoram terleth): Modified to accomodate long data gaps, bug fixes, clean up.
Can run script from commandline nd overide par file for network and station using:
>> nohup python -u ./med_spec_loop.py med_spec.par XF BBWL > BBWL.log &
"""
#%% load packages
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import dates as mdates
import obspy
from obspy.core import read
from obspy import UTCDateTime
from obspy.clients.fdsn import Client
import datetime as dt
import pickle
import glob
import os, time
import sys
import configparser
from clone_inv import clone_inv
# custom function for calculating median spectra
import get_med_spectra_v1
# custom functions for working with obspy UTCDateTimes
from UTCDateTime_funcs import UTCfloor, UTCceil, UTC2dn, UTC2dt64
# Set the system time to be UTC
os.environ['TZ'] = 'UTC'
time.tzset()
#%% READ PARAMETERS FROM THE PAR FILE
config = configparser.ConfigParser()
config.read(sys.argv[1])
# general info
data_source = config['DEFAULT']['data_source']
network = config['DEFAULT']['network']
station = config['DEFAULT']['station']
channel = config['DEFAULT']['channel']
# directories
data_dir = config['DEFAULT']['data_dir']
resp_file = config['DEFAULT']['resp_file']
out_dir = config['DEFAULT']['out_dir']
# window lenghts (see example par file for details)
pp = {'coarse_duration': float(config['DEFAULT']['pp_coarse_duration']),
'coarse_overlap' : float(config['DEFAULT']['pp_coarse_overlap']),
'fine_duration' : float(config['DEFAULT']['pp_fine_duration']),
'fine_overlap' : float(config['DEFAULT']['pp_fine_overlap']) }
# frequency filters
pre_filt = ( float(config['DEFAULT']['pre_filt0']),
float(config['DEFAULT']['pre_filt1']),
float(config['DEFAULT']['pre_filt2']),
float(config['DEFAULT']['pre_filt3']) )
# temporal range to analyse
t_start = UTCDateTime(config['DEFAULT']['t_start'])
t_end = UTCDateTime(config['DEFAULT']['t_end'])
#%% ALLOW FOR OPTIONAL COMMAND LINE OVERRIDES OF THE PAR FILE
# sys.argv consists [0] is the file path, [1] is the par_file path, [2] (if it exists) is the network, [3] is the station, [4] is channel
# one override of the par file
if len(sys.argv) == 3:
network = sys.argv[2]
# two overrides of the par file
elif len(sys.argv) == 4:
network = sys.argv[2]
station = sys.argv[3]
# three overrides of the par file
elif len(sys.argv) == 5:
network = sys.argv[2]
station = sys.argv[3]
channel = sys.argv[4]
#%% PRINT OUTPUTS ABOUT THE RUN, FOR THE PURPOSE OF RECORDING IN THE LOG FILE
run_start_time = dt.datetime.now()
print('\n\n' + '===========================================')
print(station + ' run started: ' + '{:%b %d, %Y, %H:%M}'.format(run_start_time))
print('===========================================' + '\n')
print('Run executed from "' + os.getcwd() + '/"')
print('Run arguments consist of: ' + str(sys.argv))
print('\n\n' + '-------------------------------------------')
print('Start display of parameter file: ' + sys.argv[1])
par_time_sec = os.path.getmtime(sys.argv[1])
print('Parameter file last saved: ' + time.ctime( par_time_sec ) )
print('-------------------------------------------')
with open(sys.argv[1], 'r') as fin:
print(fin.read())
print('-------------------------------------------')
print('End display of parameter file: ' + sys.argv[1])
print('-------------------------------------------' + '\n\n')
if len(sys.argv) > 2:
print('-------------------------------------------')
print('Parameter file overwritten with: ' + str(sys.argv[2:]) )
print('-------------------------------------------' + '\n\n')
#%% INITIALIZATION
# this section loads one stream to initialize the output arrays
print('Initialisation: loading first stream...\n\n')
# read from local day_vol files
if data_source=='local':
# get the response file
inv_file = resp_file
inv = obspy.read_inventory(inv_file)
inv = clone_inv(inv, network, station)
# # subset the inventory to just that which is necessary for script
inv = inv.select(channel=channel, station=station)
# build an array of all the file names of interest
file_names = glob.glob(data_dir + station + '/*' + channel +'*')
file_names.sort()
file_counter = 0
# read the first stream from the files
st = read(file_names[0])
# if there are files that precede our t_start, skip ahead through them
while(st[0].stats.starttime < t_start):
if file_counter == 0:
print('File(s) found that pre-date t_start from the par file.\n')
file_counter += 1
st = read(file_names[file_counter])
st.merge(method=0)
print('\n')
print('Skipped ' + str(file_counter) + ' files.\n')
# read from a client server
elif data_source != 'local':
# defien the client
fdsn_client = Client(data_source)
# read in client response file
inv = fdsn_client.get_stations(network=network, station=station, channel=channel,
location='', level="response")
# Read in client data fron the first day
st = fdsn_client.get_waveforms(network=network, station=station, location='',
channel=channel, starttime=t_start, endtime=t_start+86400)
# initiate token file counter
file_counter = 0
else:
print('No valid data source given!')
# remove response from initalization data
st_IC = st.copy().remove_response(inventory=inv, output="VEL", pre_filt=pre_filt)
print('First considered stream is ' + str(st_IC))
# build arrays
print('\n\n Initialisation: building arrays...')
# create an np.array of times at which to calculate the median power.
t = np.arange( t_start, t_end, pp['coarse_duration'] * pp['coarse_overlap'])
# Convert a np.array of obspy UTCDateTimes into datenums for the purpose of plotting
t_dt64 = UTC2dt64(t)
# initialize the sampling rate with zero, to ensure proper running in the first iteration
Fs_old = 0
# Find the number of frequencies that the FFT will produce
L = int(st[0].stats.sampling_rate) * pp['fine_duration']
freq_nums = int(2**np.ceil(np.log2( L )) / 2) + 1
# initialize the final array of median powers
Pdb_array = np.ones( (freq_nums, len(t)) ) * np.nan
# intialize printing flag
print_out = True
#%% Start the big for loop that will go through each time and each miniseed file and calculate the median powers. These are each time of the spectrogram, each x-axis.
print('\n\nStarting for loop...\n\n')
for i in range(len(t)):
## CURRENT LONG WINDOW: ##
# copy instrument corrected trace
tr_trim = st_IC[0].copy()
# the minus small number and False nearest_sample make the tr include the first data point, but stop just shy of the last data point
tr_trim.trim(t[i], t[i] + pp['coarse_duration'] - tr_trim.stats.delta)
## IF NEEDED, DEAL WITH FILE CHANGE: ##
# if the trim is within a coarse seample duration of the end, preload a new one
while tr_trim.stats.endtime > st_IC[-1].stats.endtime - pp['coarse_duration']:
file_counter += 1
# print where we are to output
print("{:>4.0%}".format(float(i)/len(t)) + ' complete. Loading time: ' + t[i].strftime('%d %b %Y, %H:%M'))
try:
# reading in from local files
if data_source=='local':
# Read in new dayvol file:
print('About to read in new datafile: ' + file_names[file_counter])
st += read(file_names[file_counter])
# Merge the new and old day volumes
st.merge(fill_value='interpolate')
# trim off the part of the merged stream that's already been processed.
st.trim(starttime=t[i] - pp['coarse_duration'], endtime=st[-1].stats.endtime )
# reading in from client server
if data_source!='local':
# read in the next 24 hours of data
st = fdsn_client.get_waveforms(network=network, station=station, location='',
channel=channel, starttime=t[i]-pp['coarse_duration'], endtime=t[i]+86400)
# if there is no file of the querried name, break out of while loop
except Exception as e:
print(e)
print('Requested file does not exist for '+ str(t[i]))
break
# remove response from newly loaded stream:
st_IC = st.copy().remove_response(inventory=inv, output="VEL", pre_filt=pre_filt)
# copy instrument corrected trace
tr_trim = st_IC[0].copy()
# the minus small number and False nearest_sample make the tr include the first data point, but stop just shy of the last data point
tr_trim.trim(t[i], t[i] + pp['coarse_duration'] - tr_trim.stats.delta)
## DEAL WITH DATA GAPS:
# if the current stream is longer than 5 days, we are getting to a data gap: this statement skips to the where there is renewed data if that is the case.
if st[-1].stats.endtime - st[0].stats.starttime > 5 * 24 * 3600:
if print_out:
print('Stream in memory is longer than 5 days at ' + str(t[i]))
print('Stream: ' + str(st))
print('skipping ahead...')
print_out = False
## locate the index in t of the end of the stream (which is where there is new data):
# as long as we are not to the end_date of the stram in memory, skip ahead
if t[i] < st[-1].stats.endtime:
continue
# stop skipping ahead if we are at the end of the loaded stream
else:
print('Done skipping at ' + str(t[i]))
print('Stream: ' + str(st))
print('Back to normal processing.')
print_out = True
## DEAL WITH EMPTY OR SHORT STREAMS:
# If tr_trim does not contain a full coarse_duration of samples, then skip the present coarse_duration and go onto the next iteration of the for loop
if tr_trim.stats.npts < pp['coarse_duration'] * int(tr_trim.stats.sampling_rate) * 1:
print('Incomplete coarse_duration at ' + UTCDateTime.strftime(t[i], "%d %b %y %H:%M") + ': Skipping')
continue
## COMPUTE MEDIAN SPECTRA
# run function
freqs, Pdb, Fs_old = get_med_spectra_v1.med_spec(tr_trim, pp, Fs_old)
# Save the median spectra into array
Pdb_array[:len(freqs),i] = Pdb[:len(freqs)]
print(' Serial iteration done: ' + '{:%b %d, %Y, %H:%M}'.format(dt.datetime.now()))
# end of the for loop
#%% SAVE THE OUTPUT TO PICKLE FILE
with open(out_dir + 'mp' + network + '_' + station + '.pickle', 'wb') as f:
pickle.dump([t, t_dt64, freqs, Pdb_array, pp, network, station, run_start_time], f)
#%% PRINT END MESSAGE
print('\n\n' + '===========================================')
print(station + ' run complete: ' + '{:%b %d, %Y, %H:%M}'.format(dt.datetime.now()))
print('Elapsed time: ' + str(dt.datetime.now() - run_start_time))
print('===========================================' + '\n\n')