diff --git a/openquake/hmtk/parsers/catalogue/csv_catalogue_parser.py b/openquake/hmtk/parsers/catalogue/csv_catalogue_parser.py index e3fb1e777add..e4775e007223 100644 --- a/openquake/hmtk/parsers/catalogue/csv_catalogue_parser.py +++ b/openquake/hmtk/parsers/catalogue/csv_catalogue_parser.py @@ -162,6 +162,7 @@ class CsvCatalogueWriter(BaseCatalogueWriter): # Because the catalogues TOTAL_ATTRIBUTE_LIST is randomly ordered, # the preferred output order is given as a list here + OUTPUT_LIST = [ "eventID", "Agency", @@ -182,6 +183,12 @@ class CsvCatalogueWriter(BaseCatalogueWriter): "magnitude", "sigmaMagnitude", "magnitudeType", + 'str1', + 'dip1', + 'rake1', + 'str2', + 'dip2', + 'rake2' ] def write_file(self, catalogue, flag_vector=None, magnitude_table=None): @@ -256,6 +263,7 @@ def apply_purging(self, catalogue, flag_vector, magnitude_table): class CsvGCMTCatalogueWriter(CsvCatalogueWriter): """ Writes GCMT catalogue to csv file + """ OUTPUT_LIST = [ @@ -299,6 +307,7 @@ class CsvGCMTCatalogueWriter(CsvCatalogueWriter): "plunge_t", ] + def write_file(self, catalogue, flag_vector=None, magnitude_table=None): """ Writes the catalogue to file, purging events if necessary. diff --git a/openquake/hmtk/parsers/catalogue/gcmt_ndk_parser.py b/openquake/hmtk/parsers/catalogue/gcmt_ndk_parser.py index 55696fe91ab8..ea8a899858ee 100644 --- a/openquake/hmtk/parsers/catalogue/gcmt_ndk_parser.py +++ b/openquake/hmtk/parsers/catalogue/gcmt_ndk_parser.py @@ -275,6 +275,7 @@ def to_hmtk(self, use_centroid=True): iloc ] = gcmt.hypocentre.latitude self.catalogue.data["depth"][iloc] = gcmt.hypocentre.depth + # Moment, magnitude and relative errors self.catalogue.data["moment"][iloc] = gcmt.moment self.catalogue.data["magnitude"][iloc] = gcmt.magnitude diff --git a/openquake/hmtk/seismicity/catalogue.py b/openquake/hmtk/seismicity/catalogue.py index c3825cde79ff..f7fde88fde9f 100644 --- a/openquake/hmtk/seismicity/catalogue.py +++ b/openquake/hmtk/seismicity/catalogue.py @@ -78,6 +78,12 @@ class Catalogue(object): "depthError", "magnitude", "sigmaMagnitude", + 'str1', + 'dip1', + 'rake1', + 'str2', + 'dip2', + 'rake2' ] INT_ATTRIBUTE_LIST = ["year", "month", "day", "hour", "minute", "flag"] @@ -110,6 +116,13 @@ class Catalogue(object): "magnitude", "sigmaMagnitude", "magnitudeType", + 'str1', + 'dip1', + 'rake1', + 'str2', + 'dip2', + 'rake2' + ] def __init__(self): diff --git a/openquake/hmtk/seismicity/declusterer/dec_reasenberg.py b/openquake/hmtk/seismicity/declusterer/dec_reasenberg.py new file mode 100644 index 000000000000..e646ba60ad72 --- /dev/null +++ b/openquake/hmtk/seismicity/declusterer/dec_reasenberg.py @@ -0,0 +1,454 @@ +# -*- coding: utf-8 -*- +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# -*- coding: utf-8 -*- +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# +# LICENSE +# +# Copyright (C) 2010-2024 GEM Foundation, G. Weatherill, M. Pagani, +# D. Monelli. +# +# The Hazard Modeller's Toolkit is free software: you can redistribute +# it and/or modify it under the terms of the GNU Affero General Public +# License as published by the Free Software Foundation, either version +# 3 of the License, or (at your option) any later version. +# +# You should have received a copy of the GNU Affero General Public License +# along with OpenQuake. If not, see +# +# DISCLAIMER +# +# The software Hazard Modeller's Toolkit (openquake.hmtk) provided herein +# is released as a prototype implementation on behalf of +# scientists and engineers working within the GEM Foundation (Global +# Earthquake Model). +# +# It is distributed for the purpose of open collaboration and in the +# hope that it will be useful to the scientific, engineering, disaster +# risk and software design communities. +# +# The software is NOT distributed as part of GEM’s OpenQuake suite +# (https://www.globalquakemodel.org/tools-products) and must be considered as a +# separate entity. The software provided herein is designed and implemented +# by scientific staff. It is not developed to the design standards, nor +# subject to same level of critical review by professional software +# developers, as GEM’s OpenQuake software suite. +# +# Feedback and contribution to the software is welcome, and can be +# directed to the hazard scientific staff of the GEM Model Facility +# (hazard@globalquakemodel.org). +# +# The Hazard Modeller's Toolkit (openquake.hmtk) is therefore distributed +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# The GEM Foundation, and the authors of the software, assume no +# liability for use of the software. + + +""" +Module :mod:`openquake.hmtk.seismicity.declusterer.dec_reasenberg` +defines the Reasenberg declustering algorithm +""" + +from bisect import bisect_left +from typing import Callable, Tuple, Dict, Optional +from openquake.hazardlib.geo.geodetic import geodetic_distance, distance, _prepare_coords +import numpy as np + +# import datetime +from openquake.hmtk.seismicity.declusterer.base import (BaseCatalogueDecluster, DECLUSTERER_METHODS) +from openquake.hmtk.seismicity.utils import haversine + + +@DECLUSTERER_METHODS.add( + "decluster", + taumin=1.0, # look ahead time for not clustered events, days + taumax=10.0, # maximum look ahead time for clustered events, days + P=0.95, # confidence level that this is next event in sequence + xk=0.5, # factor used with xmeff to modify magnitude cutoff during clusters + xmeff=1.5, # magnitude effective, used with xk to define magnitude cutoff + rfact=10, # factor for interaction radius for dependent events + horiz_error=1.5, # epicenter error, km. if unspecified or None, it is pulled from the catalogue + depth_error=2.0, # depth error, km. if unspecified or None, it is pulled from the catalogue + interaction_formula='Reasenberg1985', # either `Reasenberg1985` or `WellsCoppersmith1994` + max_interaction_dist=np.inf # km, some studies limit to crustal thickness (ex. 30) +) + +class Reasenberg(BaseCatalogueDecluster): + """ + This class implements the Reasenberg algorithm as described in + this paper: + + Reasenberg, P., 1985. Second‐order moment of central California + seismicity, 1969–1982. Journal of Geophysical Research: Solid Earth, + 90(B7), pp.5479-5495. + + Declustering code originally converted to MATLAB by A. Allman. + Then, highly modified and converted to Python by CG Reyes. + This implementation is similar to that in Zmap (https://github.com/CelsoReyes/zmap7) + + """ + + def __init__(self): + self.verbose = False + self.interaction_formulas = {'Reasenberg1985': lambda m: 0.011 * 10 ** (0.4 * m), + # Helmstetter (SRL) 2007: + 'WellsCoppersmith1994': lambda m: 0.01 * 10 ** (0.5 * m)} + + @staticmethod + def clust_look_ahead_time( + mag_big: float, + dt_big: np.ndarray, + xk: float, + xmeff: float, + p: float) -> np.ndarray: + """ CLUSTLOOKAHEAD calculate look ahead time for clustered events + + :param mag_big: + biggest magnitude in the cluster + :param dt_big: + days difference between biggest event and this one + :param xk: + factor used with xmeff to define magnitude cutoff + - increases effective magnitude during clusters + :param xmeff: + magnitude effective, used with xk to define magnitude cutoff + :param p: + confidence level that this is next event in sequence + :returns: + tau: look-ahead time for clustered events (days) + """ + + deltam = (1 - xk) * mag_big - xmeff # delta in magnitude + + if deltam < 0: + deltam = 0 + + denom = 10.0 ** ((deltam - 1) * 2 / 3) # expected rate of aftershocks + top = -np.log(1 - p) * dt_big # natural log, verified in Reasenberg + tau = top / denom # equation from Reasenberg paper + return tau + + + def decluster(self, catalogue, config) -> Tuple[np.ndarray, np.ndarray]: + """ + decluster the catalog using the provided configuration + + :param catalogue: Catalogue of earthquakes + :param config: Configuration parameters + :returns: + **vcl vector** indicating cluster number, + **ms vector** indicating which events should be considered mainshock events + + if catalogue contains additional data keys data['depthError'] and/or + data['horizError'], containing the location error for each event, then + these will be used to bring the decluster distance closer together + [additively]. To override the inbuilt catalog depth and horiz errors, + set the value of config['depth_error'] and config['horiz_error'] respectively + + """ + + self.verbose = config.get('verbose', self.verbose) + # Get relevant parameters + neq = len(catalogue.data['latitude']) + min_lookahead_days = config['taumin'] + max_lookahead_days = config['taumax'] + + # Get elapsed days + elapsed = days_from_first_event(catalogue) + + assert np.all(elapsed[1:] >= elapsed[:-1]), "catalogue needs to be in ascending date order" + + mags = catalogue['magnitude'] + if catalogue['depth'].size > 0: + depth = catalogue.data.get('depth', np.zeros(1)) + else: + depth = np.array([0]*neq) + + # Errors are determined 1st by the config. + # If this value doesn't exist or is None, then get the error values from the catalog. + # If errors do not exist within the catalog, then set the errors to 0. + + if config.get('horiz_error', None) is None: + horiz_error = catalogue.data.get('horizError', np.zeros(1)) + else: + horiz_error = config['horiz_error'] + + if config.get('depth_error', None) is None: + depth_error = catalogue.data.get('depthError', np.zeros(1)) + else: + depth_error = config['depth_error'] + + # Pre-allocate cluster index vectors + vcl = np.zeros(neq, dtype=int).flatten() + msi = np.zeros(neq, dtype=int).flatten() + ev_id = np.arange(0, neq) + # set the interaction zones, in km + # Reasenberg 1987 or alternate version: Wells & Coppersmith 1994 / Helmstetter (SRL) 2007 + zone_noclust, zone_clust = self.get_zone_distances_per_mag( + mags=mags, + rfact=config['rfact'], + formula=self.interaction_formulas[config['interaction_formula']], + max_interact_km=config.get('max_interaction_dist', np.inf)) + + k = 0 # clusterindex + + # variable to store whether earthquake is already clustered + clusmaxmag = np.array([-np.inf] * neq) + clus_biggest_idx = np.zeros(neq, dtype=int) + + # for every earthquake in catalog, main loop + for i in range(neq - 1): + my_mag = mags[i] + + # variable needed for distance and timediff + my_cluster = vcl[i] + not_classified = my_cluster == 0 + + # attach interaction time + + if not_classified: + # this event is not associated with a cluster, yet + look_ahead_days = min_lookahead_days + + + elif my_mag >= clusmaxmag[my_cluster]: + # this is the biggest event in this cluster, so far (or equal to it). + # note, if this is now the biggest event, then the cluster range collapses to its radius + clusmaxmag[my_cluster] = my_mag + clus_biggest_idx[my_cluster] = i + look_ahead_days = min_lookahead_days + # time between largest event in cluster and this event is 0, + # so use min_lookahead_days (rather than 0). + else: + # this event is already tied to a cluster, but is not the largest event + idx_biggest = clus_biggest_idx[my_cluster] + days_since_biggest = elapsed[i] - elapsed[idx_biggest] + # set new look_ahead_days (function of time from largest event in cluster) + look_ahead_days = self.clust_look_ahead_time(clusmaxmag[my_cluster], + days_since_biggest, + config['xk'], + config['xmeff'], + config['P']) + + # look_ahead_days should be > min and < max to prevent this running forever. + look_ahead_days = np.clip(look_ahead_days, min_lookahead_days, max_lookahead_days) + + + # extract eqs that fit interaction time window -------------- + + max_elapsed = elapsed[i] + look_ahead_days + next_event = i + 1 + # find location of last event between elapsed and max_elapsed + last_event = bisect_left(elapsed, max_elapsed, lo=next_event) + + # range from next event (i+1) to last event (end of the lookahead time) + temporal_evs = np.arange(next_event, last_event) + if my_cluster != 0: + # If we are in a cluster, consider only events that are not already in the cluster + temporal_evs = temporal_evs[vcl[temporal_evs] != my_cluster] + + if len(temporal_evs) == 0: + continue + + # ------------------------------------ + # one or more events have now passed the time window test. Now compare + # this subcatalog in space to A) most recent and B) largest event in cluster + # ------------------------------------ + + my_biggest_idx = clus_biggest_idx[my_cluster] + bg_ev_for_dist = i if not_classified else my_biggest_idx + + dist_to_recent = distance(catalogue.data['latitude'][i], + catalogue.data['longitude'][i], + depth[i], + catalogue.data['latitude'][temporal_evs], + catalogue.data['longitude'][temporal_evs], + depth[temporal_evs]) + dist_to_biggest = distance(catalogue.data['latitude'][bg_ev_for_dist], + catalogue.data['longitude'][bg_ev_for_dist], + depth[bg_ev_for_dist], + catalogue.data['latitude'][temporal_evs], + catalogue.data['longitude'][temporal_evs], + depth[temporal_evs]) + + if look_ahead_days == min_lookahead_days: + l_big = dist_to_biggest == 0 # all false + l_recent = dist_to_recent <= zone_noclust[my_mag] + + else: + l_big = dist_to_biggest <= zone_noclust[clusmaxmag[my_cluster]] + l_recent = dist_to_recent <= zone_clust[my_mag] + + spatial_evs = np.logical_or(l_recent, l_big) + if not any(spatial_evs): + continue + + # ------------------------------------ + # one or more events have now passed both spatial and temporal window tests + # + # if there are events in this cluster that are already related to another + # cluster, figure out the smallest cluster number. Then, assign all events + # (both previously clustered and unclustered) to this new cluster number. + # ------------------------------------ + + # spatial events only include events AFTER i, not i itself + # so vcl[events_in_any_cluster] is independent from vcl[i] + + # eqs that fit spatial and temporal criterion + candidates = temporal_evs[spatial_evs] + # eqs which are already related with a cluster + events_in_any_cluster = candidates[vcl[candidates] != 0] + # eqs that are not already in a cluster + events_in_no_cluster = candidates[vcl[candidates] == 0] + + # if this cluster overlaps with any other cluster, then merge them + # assign every event in all related clusters to the same (lowest) cluster number + # set this cluster's maximum magnitude "clusmaxmag" to the largest magnitude of + # all combined events set this cluster's clus_biggest_idx to the largest event + # of all combined events + # Flag largest event in each cluster + + if len(events_in_any_cluster) > 0: + if not_classified: + related_clust_nums = np.unique(vcl[events_in_any_cluster]) + else: + # include this cluster number in the reckoning + related_clust_nums = np.unique(np.hstack((vcl[events_in_any_cluster], my_cluster,))) + + # associate all related events with my cluster + my_cluster = related_clust_nums[0] + vcl[i] = my_cluster + vcl[candidates] = my_cluster + + for clustnum in related_clust_nums: + vcl[vcl == clustnum] = my_cluster + + events_in_my_cluster = vcl == my_cluster + biggest_mag = np.max(mags[events_in_my_cluster]) + + biggest_mag_idx = np.flatnonzero( + np.logical_and(mags == biggest_mag, events_in_my_cluster))[-1] + + # reset values for other clusters + clusmaxmag[related_clust_nums] = -np.inf + clus_biggest_idx[related_clust_nums] = 0 + + # assign values for this cluster + clusmaxmag[my_cluster] = biggest_mag + clus_biggest_idx[my_cluster] = biggest_mag_idx + + elif my_cluster == 0: + k += 1 + vcl[i] = my_cluster = k + clusmaxmag[my_cluster] = my_mag + clus_biggest_idx[my_cluster] = i + else: + pass # no events found, and attached to existing cluster + + # attach clustnumber to catalogue yet unrelated to a cluster + vcl[events_in_no_cluster] = my_cluster + + # set mainshock index for all events not in a cluster to be 1 also + # i.e. an independent event counts as a mainshock + msi[vcl == 0] = 1 + + # for each cluster, identify mainshock (largest event) + clusters = np.unique(vcl[vcl != 0]) + for cluster_no in clusters: + cluster_ids = ev_id[vcl == cluster_no] + biggest_mag_idx = np.where(np.max(mags[vcl == cluster_no])) + ms_id = cluster_ids[biggest_mag_idx] + msi[ms_id] = 1 + + return vcl, msi + + + + @staticmethod + def get_zone_distances_per_mag(mags: np.ndarray, rfact: float, formula: Callable, + max_interact_km: float = np.inf) -> Tuple[Dict, Dict]: + """ + :param mags: + list of magnitudes + :param rfact: + interaction distance scaling value for clusters + :param formula: + formula that takes magnitudes and returns interaction distances + :param max_interact_km: + if used, may refer to crustal thickness + :returns: + dictionaries keyed on sorted unique magnitudes. + zone_noclust : + dictionary of interaction distances when not in a cluster + so, zone_noclust[mag] -> out-of-cluster interaction distance for mag + zone_clust : + dictionary of interaction distances when IN a cluster + so, zone_noclust[mag] -> in-cluster interaction distance for mag + """ + + unique_mags = np.unique(mags) + noclust_km = formula(unique_mags) + clust_km = noclust_km * rfact + + # limit interaction distance [generally to one crustal thickness], in km + noclust_km[noclust_km > max_interact_km] = max_interact_km + clust_km[clust_km > max_interact_km] = max_interact_km + zone_noclust = dict(zip(unique_mags, noclust_km)) + zone_clust = dict(zip(unique_mags, clust_km)) + return zone_noclust, zone_clust + + +def relative_time( + years, + months, + days, + hours=None, + minutes=None, + seconds=None, + datetime_unit ='D', + reference_date=None): + """ Get elapsed days since first event in catalog + + :param reference_date + use this array of [Y, M, D, h, m, s] instead of first event in catalog + :type: Array + :returns: + **elapsed** number of days since reference_date (which defaults to the first event in catalog) + :rtype: numpy.timedelta64 array + """ + + import datetime + if hours is None and minutes is None and seconds is None: + dates = [datetime.datetime( + *[years[n], months[n], days[n]]) + for n in range(len(years))] + else: + hours = hours if minutes is not None else np.zeros_like(years) + minutes = minutes if minutes is not None else np.zeros_like(years) + seconds = seconds if seconds is not None else np.zeros_like(years) + + dates = [datetime.datetime( + *[years[n], months[n], days[n],hours[n], minutes[n], seconds[n].astype(int)]) + for n in range(len(years))] + + dt64 = np.array([np.datetime64(v) for v in dates]) + if reference_date: + from_day = np.datetime64(datetime.datetime(*reference_date)) + else: + from_day = min(dt64) + + return (dt64 - from_day) / np.timedelta64(1, datetime_unit) + + +def days_from_first_event(catalog) -> relative_time: + return relative_time(catalog['year'], catalog['month'], catalog['day'], + catalog['hour'], catalog['minute'], + catalog['second'].astype(int), + datetime_unit='D') + + diff --git a/openquake/hmtk/seismicity/declusterer/dec_zaliapin.py b/openquake/hmtk/seismicity/declusterer/dec_zaliapin.py new file mode 100644 index 000000000000..dacefd85b1db --- /dev/null +++ b/openquake/hmtk/seismicity/declusterer/dec_zaliapin.py @@ -0,0 +1,330 @@ +# -*- coding: utf-8 -*- +# vim: tabstop=4 shiftwidth=4 softtabstop=4 +# +# LICENSE +# +# Copyright (C) 2010-2024 GEM Foundation, G. Weatherill, M. Pagani, +# D. Monelli. +# +# The Hazard Modeller's Toolkit is free software: you can redistribute +# it and/or modify it under the terms of the GNU Affero General Public +# License as published by the Free Software Foundation, either version +# 3 of the License, or (at your option) any later version. +# +# You should have received a copy of the GNU Affero General Public License +# along with OpenQuake. If not, see +# +# DISCLAIMER +# +# The software Hazard Modeller's Toolkit (openquake.hmtk) provided herein +# is released as a prototype implementation on behalf of +# scientists and engineers working within the GEM Foundation (Global +# Earthquake Model). +# +# It is distributed for the purpose of open collaboration and in the +# hope that it will be useful to the scientific, engineering, disaster +# risk and software design communities. +# +# The software is NOT distributed as part of GEM’s OpenQuake suite +# (https://www.globalquakemodel.org/tools-products) and must be considered as a +# separate entity. The software provided herein is designed and implemented +# by scientific staff. It is not developed to the design standards, nor +# subject to same level of critical review by professional software +# developers, as GEM’s OpenQuake software suite. +# +# Feedback and contribution to the software is welcome, and can be +# directed to the hazard scientific staff of the GEM Model Facility +# (hazard@globalquakemodel.org). +# +# The Hazard Modeller's Toolkit (openquake.hmtk) is therefore distributed +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# The GEM Foundation, and the authors of the software, assume no +# liability for use of the software. + +""" +Module :mod:`openquake.hmtk.seismicity.declusterer.dec_zaliapin` +defines the Zaliapin declustering algorithm + + +""" +import numpy as np +import datetime +import matplotlib.dates as mdates + +from openquake.hmtk.seismicity.declusterer.base import ( + BaseCatalogueDecluster, DECLUSTERER_METHODS) + +from openquake.hazardlib.geo.geodetic import geodetic_distance, distance, _prepare_coords +from sklearn import mixture + +@DECLUSTERER_METHODS.add( + "decluster", + fractal_dim=float, # spatial weighting factor + b_value=float, # magnitude weighting factor +) + + +class Zaliapin(BaseCatalogueDecluster): + """ + Implements the declustering method of Zaliapin and Ben-Zion (2013), + based on a nearest-neighbour (space-time-magnitude) distance metric. + + Implemented in Python by CG Reyes. + Modified to: + - return mainshocks (largest event per cluster) rather than first event + (NB: this increases computational time because we need to reconstruct clusters + but is more useful for PSHA and more comparable to alternative methods) + - give the option for stochastic declustering. + + """ + + def __init__(self): + pass + + def decluster(self, catalogue, config): + """Zaliapin's declustering code + + :param catalogue: + Earthquake catalogue + :param config: + Dictionary containing: + 1. values for fractal dimension `fractal_dim` and Gutenberg-Richter `b-value` + 2. Method used to choose which events to keep. + Use `Threshold = ` to specify a float value for probability + at which to keep an event. + If `Stochastic = True` (or no threshold is provided) + a stochastic declustering will be implemented. + A seed for the stochastic method can be specified with the `stoch_seed` parameter + (will default to 5 if not included) + 3. Optional flag to use depths when calculating event distances. + Note that the fractal dimension should correspond to the number of dimensions + in the model. (Zaliapin and Ben-Zion defaults are 1.6 for 2D and 2.4 for 3D + distances and these values are generally used in the literature). + + :return: + probability [0..1] that each event is a member of a cluster, + optionally if config['nearest_neighbor_dist'] is true: + - return the nearest neighbor distances, mainshock flag, + root (first) event in cluster and leaf depth of cluster. + """ + + if 'depth' in config and 'depth' == True: + nnd, nni = self.getnnd(catalogue, + config['fractal_dim'], + config['b_value'], + depth = True) + + else: + nnd, nni = self.getnnd(catalogue, + config['fractal_dim'], + config['b_value'], + depth = False) + + probability_of_independence = self.gaussianmixturefit(nnd) + + if 'threshold' in config and config['threshold'] == 'stochastic': + root, ld, ms_flag = cluster_number(catalogue, + nni, + probability_of_independence, + stochastic = True) + + else: + if 'threshold' in config: + threshold = config['threshold'] + else: + threshold = 0.5 + root, ld, ms_flag = cluster_number(catalogue, + nni, + probability_of_independence, + threshold = threshold, + stochastic = False) + + if 'output_nearest_neighbor_distances' in config and config['output_nearest_neighbor_distances'] == True: + return probability_of_independence, nnd, nni, ms_flag, root, ld + cluster_index = 1 - ms_flag + return root, cluster_index + + + @staticmethod + def getnnd(catalogue, d_f=1.6, b=1.0, depth = False): + """ + Identifies nearest neighbour parent and distance for each event. + + :param catalogue: + an earthquake catalogue + :param d_f: + fractal dimension (default 1.6 for 2D, recommend ~2.4 for 3D) + :param b: + b-value for the catalogue + :param depth: + flag to include depths + + :return: + nearest neighbor distances, index of nearest neighbour as numpy arrays + """ + + # set up variables for later + nearest_distance=[0]*len(catalogue.data['latitude']) + nearest_index=[0]*len(catalogue.data['latitude']) + + if (depth == True): + depth = catalogue.data['depth'] + # Set nan depths to 0 + depth[np.isnan(depth)] = 0 + else: depth = np.zeros(len(catalogue.data['latitude'])) + + time=[0]*len(catalogue.data['latitude']) + # Change date and time data into one list of datetimes in years (ie 1980.25) + for i in range(len(time)): + time[i]=datetime.datetime(catalogue.data['year'][i], + catalogue.data['month'][i], + catalogue.data['day'][i], + catalogue.data['hour'][i], + catalogue.data['minute'][i], + int(catalogue.data['second'][i])) + time[i]=mdates.date2num(time[i]) + # date2num gives days, change to years + time[i]= (time[i] -1)/365.25 + + for j in range(1, len(catalogue.data['latitude'])): + # Calculate spatial distance between events + dist = distance(catalogue.data['latitude'][j], + catalogue.data['longitude'][j], + depth[j], + catalogue.data['latitude'][:j], + catalogue.data['longitude'][:j], + depth[:j]) + time_diff= time[j]-time[:j] + # If time difference is zero, set to very small value + time_diff[time_diff == 0] = 0.0000001 + # Similarly with spatial distances = 0 + dist[dist < 0.1] = 0.1 + # ZBZ interevent distance = time_diff*(10^(b*Mi))*spat_dist^(d_f) + interevent_distance = time_diff*( + 10**(-b*catalogue.data['magnitude'][:j]))*(dist**d_f) + # Record index of nearest neighbour (needed to reconstruct the clusters) + # and the distance to closest neighbour (smallest nnd) + nearest_index[j] = np.argmin(interevent_distance) + nearest_distance[j] = np.min(interevent_distance) + + ## Set zeroth event to mean NND (doesn't really matter what we do with this) + nearest_distance[0]= np.mean(nearest_distance) + + return(nearest_distance, nearest_index) + + @staticmethod + def gaussianmixturefit(nnd): + """ + fit Gaussian mixture distribution to nearest neighbour distances + (Zaliapin and Ben-Zion 2013) and calculate probability of + independence from mixture fit. + + :param nnd: + nearest neighbour distances for each event + + :return: + probability of independence of event based on mixture fit + """ + log_nearest_neighbor_dist = np.log10(nnd) + samples = log_nearest_neighbor_dist[np.isfinite(log_nearest_neighbor_dist)] + samples = samples.reshape(-1,1) + + # find two gaussians that fit the nearest-neighbor-distances + clf = mixture.GaussianMixture(n_components=2, covariance_type='full') + clf.fit(samples) + + probability_of_independence = np.zeros_like(log_nearest_neighbor_dist) + + # calculate the probabilities that it belongs to each of the gaussian curves + # (2 columns of probabilities) + prediction = clf.predict_proba(samples) + + # keep only the chance that this is a background event + #(the column matching the gaussian model's largest mean) + probability_of_independence[np.isfinite(log_nearest_neighbor_dist)] = prediction[:, np.argmax(clf.means_)] + + probability_of_independence[0] = 1 # first event, by definition, cannot depend upon prior events + probability_of_independence[np.isnan(probability_of_independence)] = 0 + + return probability_of_independence + +def cluster_number(catalogue, nearest_index, prob_ind, threshold = 0.5, stochastic = True, stoch_seed = 5): + """ + function to reconstruct clusters + - need this to identify mainshocks rather than earliest independent events + Identifies head node (first event in cluster) and number of iterations of while loop to get to it + (ie how deep in cluster event is; 1st, 2nd, 3rd generation etc). + + :param catalogue: + an earthquake catalogue + + :param nearest_index: + index of event's nearest neighbour (output of getnnd) + + :param prob_ind: + Probability event is independent (output of gaussianmixturefit) + + :param threshold: + Threshold of independence probability at which to consider an event independent. + Specified in config file or default to 0.5. + + :param stochastic: + Instead of using a fixed threshold, use a stochastic method to thin clustered events + + :param stoch_seed: + Seed value for stochastic thinning to assure reproducibility. + Set by default if not specified. + + :return: + arrays of root, event depth, mainshock flag + + """ + + n_id = range(len(catalogue.data['latitude'])) + root = np.zeros(len(catalogue.data['latitude'])) + ld = np.zeros(len(catalogue.data['latitude'])) + indep_flag = np.zeros(len(catalogue.data['latitude'])) + + + if stochastic == True: + # set seed for reproducible results when using stochastic method + np.random.seed(stoch_seed) + indep_flag = 1*prob_ind > np.random.uniform(0,1) + + else: + indep_flag = 1*(prob_ind > threshold) + + parent = nearest_index*(1-indep_flag) + + for i in range(len(catalogue.data['latitude'])): + p = parent[i] + if(p == 0): + root[i] = i + ld[i] = 0 + else: + while (p != 0): + root[i] = p + ld[i] = ld[i] +1 + p = parent[p] + + # Collect all unique cluster roots + clust_heads = np.unique(root) + MS = clust_heads.astype(int) + + for j in range(len(clust_heads)): + # locate all events that are part of this cluster + idx = np.argwhere(np.isin(root, clust_heads[j])).ravel() + # collect all magnitudes + mags = catalogue.data['magnitude'][idx] + if len(mags) > 0: + # find mainshock and add to MS + MS_rel = np.argmax(mags) + MS[j] = idx[MS_rel] + + ms_flag = np.zeros(len(catalogue.data['latitude'])) + ms_flag[MS] = 1 + + return root, ld, ms_flag diff --git a/openquake/hmtk/seismicity/gcmt_catalogue.py b/openquake/hmtk/seismicity/gcmt_catalogue.py index af28cf9e77c8..f64abd0438ed 100644 --- a/openquake/hmtk/seismicity/gcmt_catalogue.py +++ b/openquake/hmtk/seismicity/gcmt_catalogue.py @@ -505,6 +505,7 @@ def serialise_to_hmtk_csv(self, filename, centroid_location=True): """ Serialise the catalogue to a simple csv format, designed for comptibility with the GEM Hazard Modeller's Toolkit + """ header_list = [ "eventID", @@ -525,14 +526,22 @@ def serialise_to_hmtk_csv(self, filename, centroid_location=True): "depthError", "magnitude", "sigmaMagnitude", + 'str1', + 'dip1', + 'rake1', + 'str2', + 'dip2', + 'rake2' ] with open(filename, "wt", newline="") as fid: + writer = csv.DictWriter(fid, fieldnames=header_list) headers = dict((header, header) for header in header_list) writer.writerow(headers) print("Writing to simple csv format ...") for iloc, tensor in enumerate(self.gcmts): # Generic Data + cmt_dict = { "eventID": iloc + 100000, "Agency": "GCMT", @@ -543,6 +552,13 @@ def serialise_to_hmtk_csv(self, filename, centroid_location=True): "sigmaMagnitude": None, "depth": None, "depthError": None, + 'magnitudeType': 'Mw', + 'str1': tensor.nodal_planes.nodal_plane_1['strike'], + 'dip1': tensor.nodal_planes.nodal_plane_1['dip'], + 'rake1': tensor.nodal_planes.nodal_plane_1['rake'], + 'str2': tensor.nodal_planes.nodal_plane_2['strike'], + 'dip2': tensor.nodal_planes.nodal_plane_2['dip'], + 'rake2': tensor.nodal_planes.nodal_plane_2['rake'] } if centroid_location: @@ -580,6 +596,7 @@ def serialise_to_hmtk_csv(self, filename, centroid_location=True): cmt_dict["latitude"] = tensor.hypocentre.latitude cmt_dict["depth"] = tensor.hypocentre.depth cmt_dict["depthError"] = None + writer.writerow(cmt_dict) print("done!") diff --git a/openquake/hmtk/seismicity/occurrence/utils.py b/openquake/hmtk/seismicity/occurrence/utils.py index 710218895758..c9bbb17ae61f 100644 --- a/openquake/hmtk/seismicity/occurrence/utils.py +++ b/openquake/hmtk/seismicity/occurrence/utils.py @@ -214,14 +214,15 @@ def get_completeness_counts(catalogue, completeness, d_m): * n_obs - number of events in completeness period """ mmax_obs = np.max(catalogue.data["magnitude"]) - # thw line below was added by Nick Ackerley but it breaks the tests + # thw line below was added by Nick Ackerley but it breaks the tests # catalogue.data["dtime"] = catalogue.get_decimal_time() if mmax_obs > np.max(completeness[:, 1]): cmag = np.hstack([completeness[:, 1], mmax_obs]) else: cmag = completeness[:, 1] + #print(cmag) cyear = np.hstack([catalogue.end_year + 1, completeness[:, 0]]) - + #print(cyear) # When the magnitude value is on the bin edge numpy's histogram function # may assign randomly to one side or the other based on the floating # point value. As catalogues are rounded to the nearest 0.1 this occurs @@ -244,10 +245,12 @@ def get_completeness_counts(catalogue, completeness, d_m): 0 ].astype(float) count_years[m_idx[:-1]] += float(nyrs) + # Removes any zero rates greater than last_loc = np.where(count_rates > 0)[0][-1] n_obs = count_rates[: (last_loc + 1)] t_per = count_years[: (last_loc + 1)] cent_mag = (master_bins[:-1] + master_bins[1:]) / 2.0 cent_mag = np.around(cent_mag[: (last_loc + 1)], 3) + return cent_mag, t_per, n_obs diff --git a/openquake/hmtk/tests/seismicity/declusterer/data/zaliapin_test_catalogue.csv b/openquake/hmtk/tests/seismicity/declusterer/data/zaliapin_test_catalogue.csv new file mode 100644 index 000000000000..3ddd1c342b0e --- /dev/null +++ b/openquake/hmtk/tests/seismicity/declusterer/data/zaliapin_test_catalogue.csv @@ -0,0 +1,21 @@ +"year","month","day","hour","minute","second","longitude","latitude","magnitude","flag","comment" +1977,2,5,0,0,0,1,0.5,5,0, +1977,4,1,0,0,0,0,1,5,0, +1977,5,6,0,0,0,0,0.1,5,0, +1977,8,19,0,0,0,0.6,1,5,0, +1977,12,13,0,0,0,0,0.5,6,0, +1978,1,1,0,0,0,0.8,0,5,0, +1978,9,27,0,0,0,0.2,0.8,5,0, +1979,12,29,0,0,0,0.8,0.1,5,0," Smaller mainshock >> d" +1979,12,31,0,0,0,0.6,0.1,5,0," Foreshock of M1" +1980,1,1,0,0,0,0,0,8,0," Large Mainshock1 (M1)" +1980,1,31,0,0,0,0.1,0,7,1," Aftershock of M1" +1980,2,2,0,0,0,0.5,0.1,6,1, +1980,2,3,0,0,0,0.1,0.1,6,1, +1980,2,5,0,0,0,0.5,0.1,6,1, +1980,2,8,0,0,0,0.5,0.1,6,1, +1980,2,12,0,0,0,0,0,7,1," Aftershock of M1 " +1980,2,14,0,0,0,0,0,6,1, +1980,2,16,0,0,0,0,0,6,1, +1982,7,30,0,0,0,0.3,0.1,5,0, +1982,12,21,0,0,0,0.6,0.8,5,0, diff --git a/openquake/hmtk/tests/seismicity/declusterer/dec_reasenberg_test.py b/openquake/hmtk/tests/seismicity/declusterer/dec_reasenberg_test.py new file mode 100644 index 000000000000..ff97e6a6b33f --- /dev/null +++ b/openquake/hmtk/tests/seismicity/declusterer/dec_reasenberg_test.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# +# LICENSE +# +# Copyright (C) 2010-2023 GEM Foundation, G. Weatherill, M. Pagani, +# D. Monelli. +# +# The Hazard Modeller's Toolkit is free software: you can redistribute +# it and/or modify it under the terms of the GNU Affero General Public +# License as published by the Free Software Foundation, either version +# 3 of the License, or (at your option) any later version. +# +# You should have received a copy of the GNU Affero General Public License +# along with OpenQuake. If not, see +# +# DISCLAIMER +# +# The software Hazard Modeller's Toolkit (openquake.hmtk) provided herein +# is released as a prototype implementation on behalf of +# scientists and engineers working within the GEM Foundation (Global +# Earthquake Model). +# +# It is distributed for the purpose of open collaboration and in the +# hope that it will be useful to the scientific, engineering, disaster +# risk and software design communities. +# +# The software is NOT distributed as part of GEM’s OpenQuake suite +# (https://www.globalquakemodel.org/tools-products) and must be considered as a +# separate entity. The software provided herein is designed and implemented +# by scientific staff. It is not developed to the design standards, nor +# subject to same level of critical review by professional software +# developers, as GEM’s OpenQuake software suite. +# +# Feedback and contribution to the software is welcome, and can be +# directed to the hazard scientific staff of the GEM Model Facility +# (hazard@globalquakemodel.org). +# +# The Hazard Modeller's Toolkit (openquake.hmtk) is therefore distributed WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# The GEM Foundation, and the authors of the software, assume no +# liability for use of the software. + +""" +""" + +import unittest +import os +import numpy as np + +from openquake.hmtk.seismicity.declusterer.dec_reasenberg import Reasenberg +from openquake.hmtk.parsers.catalogue import CsvCatalogueParser + + +class ReasenbergTestCase(unittest.TestCase): + """ + Unit tests for the Reasenberg declustering algorithm class. + """ + + BASE_DATA_PATH = os.path.join(os.path.dirname(__file__), 'data') + + def setUp(self): + """ + Read the sample catalogue + """ + flnme = 'zaliapin_test_catalogue.csv' + filename = os.path.join(self.BASE_DATA_PATH, flnme) + parser = CsvCatalogueParser(filename) + self.cat = parser.read_file() + + def test_dec_reasenberg(self): + # Testing the Reasenberg algorithm + config = { 'taumin' : 40.0, # look ahead time for not clustered events, days + 'taumax' : 50.0, # maximum look ahead time for clustered events, days + 'P' : 0.95, # confidence level that this is next event in sequence + 'xk' : 0.5, # factor used with xmeff to define magnitude cutoff + 'xmeff' : 1.5, # magnitude effective, used with xk to define magnitude cutoff + 'rfact' : 10, # factor for interaction radius for dependent events + 'horiz_error' : .5, # epicenter error, km. if unspecified or None, it is pulled from the catalogue + 'depth_error' : 2.0, # depth error, km. if unspecified or None, it is pulled from the catalogue + 'interaction_formula' : 'WellsCoppersmith1994', # either `Reasenberg1985` or `WellsCoppersmith1994` + 'max_interaction_dist' : 100 } + + # Instantiate the declusterer and process the sample catalogue + dec = Reasenberg() + vcl, flagvector = dec.decluster(self.cat, config) + np.testing.assert_allclose(flagvector, self.cat.data['flag']) + diff --git a/openquake/hmtk/tests/seismicity/declusterer/dec_zaliapin_test.py b/openquake/hmtk/tests/seismicity/declusterer/dec_zaliapin_test.py new file mode 100644 index 000000000000..ddacf8043231 --- /dev/null +++ b/openquake/hmtk/tests/seismicity/declusterer/dec_zaliapin_test.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# +# LICENSE +# +# Copyright (C) 2010-2023 GEM Foundation, G. Weatherill, M. Pagani, +# D. Monelli. +# +# The Hazard Modeller's Toolkit is free software: you can redistribute +# it and/or modify it under the terms of the GNU Affero General Public +# License as published by the Free Software Foundation, either version +# 3 of the License, or (at your option) any later version. +# +# You should have received a copy of the GNU Affero General Public License +# along with OpenQuake. If not, see +# +# DISCLAIMER +# +# The software Hazard Modeller's Toolkit (openquake.hmtk) provided herein +# is released as a prototype implementation on behalf of +# scientists and engineers working within the GEM Foundation (Global +# Earthquake Model). +# +# It is distributed for the purpose of open collaboration and in the +# hope that it will be useful to the scientific, engineering, disaster +# risk and software design communities. +# +# The software is NOT distributed as part of GEM’s OpenQuake suite +# (https://www.globalquakemodel.org/tools-products) and must be considered as a +# separate entity. The software provided herein is designed and implemented +# by scientific staff. It is not developed to the design standards, nor +# subject to same level of critical review by professional software +# developers, as GEM’s OpenQuake software suite. +# +# Feedback and contribution to the software is welcome, and can be +# directed to the hazard scientific staff of the GEM Model Facility +# (hazard@globalquakemodel.org). +# +# The Hazard Modeller's Toolkit (openquake.hmtk) is therefore distributed WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# The GEM Foundation, and the authors of the software, assume no +# liability for use of the software. + +""" +""" + +import unittest +import os +import numpy as np + +from openquake.hmtk.seismicity.declusterer.dec_zaliapin import Zaliapin +from openquake.hmtk.parsers.catalogue import CsvCatalogueParser + + +class ZaliapinTestCase(unittest.TestCase): + """ + Unit tests for the Zaliapin declustering algorithm class. + """ + + BASE_DATA_PATH = os.path.join(os.path.dirname(__file__), 'data') + + def setUp(self): + """ + Read the sample catalogue + """ + flnme = 'zaliapin_test_catalogue.csv' + filename = os.path.join(self.BASE_DATA_PATH, flnme) + parser = CsvCatalogueParser(filename) + self.cat = parser.read_file() + + def test_dec_zaliapin(self): + # Testing the Zaliapin algorithm + config = {'fractal_dim': 1.6, + 'b_value': 1.0, + 'threshold': 0.5 } + # Instantiate the declusterer and process the sample catalogue + dec = Zaliapin() + vcl, flagvector = dec.decluster(self.cat, config) + print('vcl:', vcl) + print('flagvector:', flagvector, self.cat.data['flag']) + np.testing.assert_allclose(flagvector, self.cat.data['flag']) +