From 5ba088cd712ada5f439c01b9950de150bf59be57 Mon Sep 17 00:00:00 2001 From: zhuwq Date: Sun, 25 Aug 2024 15:13:40 -0700 Subject: [PATCH] clean up code --- scripts/args.py | 12 ++ scripts/convert_growclust.py | 85 ------------ scripts/cut_templates_cc.py | 95 ++++++-------- scripts/download_catalog.py | 90 +++++++------ scripts/download_station.py | 121 +++++++++--------- scripts/download_waveform_v2.py | 20 +-- scripts/run_adloc.py | 40 +++--- scripts/run_cctorch.py | 41 +++--- scripts/run_gamma.py | 85 +++--------- scripts/run_growclust_cc.py | 65 ++++++++++ scripts/run_hypodd_cc.py | 72 +++++++++++ scripts/run_hypodd_cc.sh | 2 +- .../{convert_hypodd.py => run_hypodd_ct.py} | 47 +++---- scripts/run_phasenet.py | 25 ++-- scripts/set_config.py | 26 ++-- 15 files changed, 392 insertions(+), 434 deletions(-) create mode 100644 scripts/args.py delete mode 100644 scripts/convert_growclust.py create mode 100644 scripts/run_growclust_cc.py create mode 100644 scripts/run_hypodd_cc.py rename scripts/{convert_hypodd.py => run_hypodd_ct.py} (73%) diff --git a/scripts/args.py b/scripts/args.py new file mode 100644 index 0000000..a3e836f --- /dev/null +++ b/scripts/args.py @@ -0,0 +1,12 @@ +import argparse + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("root_path", nargs="?", type=str, default="local", help="root path") + parser.add_argument("region", nargs="?", type=str, default="demo", help="region") + + ## CCTorch + parser.add_argument("--dtct_pair", action="store_true", help="run convert_dtcc.py") + + return parser.parse_args() diff --git a/scripts/convert_growclust.py b/scripts/convert_growclust.py deleted file mode 100644 index 8f3f0ba..0000000 --- a/scripts/convert_growclust.py +++ /dev/null @@ -1,85 +0,0 @@ -# %% -import argparse -import json -import os -from datetime import datetime -from pathlib import Path - -import h5py -import numpy as np -import pandas as pd -import scipy -from tqdm import tqdm - - -# %% -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("root_path", nargs="?", type=str, default="local", help="root path") - parser.add_argument("region", nargs="?", type=str, default="demo", help="region") - parser.add_argument("--dtct", action="store_true") - args, unknown = parser.parse_known_args() - return args - - -args = parse_args() - -# %% -root_path = args.root_path -region = args.region -result_path = f"{region}/growclust" -if not os.path.exists(f"{root_path}/{result_path}"): - os.makedirs(f"{root_path}/{result_path}") - -# %% -station_json = f"{region}/results/data/stations.json" -station_df = pd.read_json(f"{root_path}/{station_json}", orient="index") - -lines = [] -for i, row in station_df.iterrows(): - # line = f"{row['network']}{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n" - line = f"{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n" - lines.append(line) - -with open(f"{root_path}/{result_path}/stlist.txt", "w") as fp: - fp.writelines(lines) - - -# %% -catalog_file = f"{region}/results/phase_association/events.csv" -# catalog_file = f"{region}/cctorch/events.csv" -catalog_df = pd.read_csv(f"{root_path}/{catalog_file}") -# catalog_df = catalog_df[catalog_df["gamma_score"] > 10] -# event_index = [f"{x:06d}" for x in catalog_df["event_index"]] - -catalog_df[["year", "month", "day", "hour", "minute", "second"]] = ( - catalog_df["time"] - .apply(lambda x: datetime.fromisoformat(x).strftime("%Y %m %d %H %M %S.%f").split(" ")) - .apply(pd.Series) - .apply(pd.to_numeric) -) - -lines = [] -for i, row in catalog_df.iterrows(): - # yr mon day hr min sec lat lon dep mag eh ez rms evid - line = f"{row['year']:4d} {row['month']:2d} {row['day']:2d} {row['hour']:2d} {row['minute']:2d} {row['second']:7.3f} {row['latitude']:.4f} {row['longitude']:.4f} {row['depth_km']:7.3f} {row['magnitude']:.2f} 0.000 0.000 0.000 {row['event_index']:6d}\n" - lines.append(line) - -with open(f"{root_path}/{result_path}/evlist.txt", "w") as fp: - fp.writelines(lines) - -# %% -if args.dtct: - dt_ct = f"{root_path}/{region}/hypodd/dt.ct" - lines = [] - with open(dt_ct, "r") as fp: - for line in tqdm(fp, desc="convert dt.ct"): - if line.startswith("#"): - ev1, ev2 = line.split()[1:3] - lines.append(f"# {ev1} {ev2} 0.000\n") - else: - station, t1, t2, score, phase = line.split() - lines.append(f"{station} {float(t1)-float(t2):.5f} {score} {phase}\n") - - with open(f"{root_path}/{result_path}/dt.ct", "w") as fp: - fp.writelines(lines) diff --git a/scripts/cut_templates_cc.py b/scripts/cut_templates_cc.py index c20f440..7c81259 100644 --- a/scripts/cut_templates_cc.py +++ b/scripts/cut_templates_cc.py @@ -4,17 +4,16 @@ import os import sys from glob import glob + +import matplotlib.pyplot as plt import numpy as np import obspy import pandas as pd +from adloc.eikonal2d import calc_traveltime, init_eikonal2d +from args import parse_args +from pyproj import Proj from sklearn.neighbors import NearestNeighbors from tqdm import tqdm -from adloc.eikonal2d import init_eikonal2d, calc_traveltime -from pyproj import Proj -import matplotlib.pyplot as plt -from scipy.signal import find_peaks -from obspy.signal.cross_correlation import correlate -from scipy.interpolate import interp1d np.random.seed(42) @@ -242,7 +241,7 @@ def extract_template_numpy( # %% -def generate_pairs(picks, events, stations, max_pair_dist=10, max_neighbors=50, fname="event_pairs.txt"): +def generate_pairs(picks, events, stations, max_pair_dist=10, max_neighbors=50, fname="pairs.txt"): ncpu = min(32, mp.cpu_count()) neigh = NearestNeighbors(radius=max_pair_dist, n_neighbors=max_neighbors, n_jobs=ncpu) neigh.fit(events[["x_km", "y_km", "z_km"]].values) @@ -288,30 +287,27 @@ def generate_pairs(picks, events, stations, max_pair_dist=10, max_neighbors=50, def cut_templates(root_path, region, config): # %% - root_path = "local" - region = "Ridgecrest" - result_path = f"cctorch" + data_path = f"{region}/adloc" + result_path = f"{region}/cctorch" if not os.path.exists(f"{root_path}/{result_path}"): os.makedirs(f"{root_path}/{result_path}") - with open(f"{root_path}/{region}/config.json", "r") as fp: - config = json.load(fp) - + ## TODO: move to config + sampling_rate = 100.0 time_before_p = 0.3 time_after_p = 2.5 - time_before_p time_before_s = 0.3 time_after_s = 4.0 - time_before_s time_window = max((time_before_p + time_after_p), (time_before_s + time_after_s)) - nt = int(round(time_window * config["sampling_rate"])) + nt = int(round(time_window * sampling_rate)) max_epicenter_dist = 200.0 max_pair_dist = 10 max_neighbors = 50 min_cc_score = 0.5 min_obs = 8 max_obs = 100 - sampling_rate = 100.0 - config["cctorch"].update( + config.update( { "time_before_p": time_before_p, "time_after_p": time_after_p, @@ -319,8 +315,8 @@ def cut_templates(root_path, region, config): "time_after_s": time_after_s, "time_window": time_window, "nt": nt, - "max_epicenter_dist": max_epicenter_dist, - "max_pair_dist": max_pair_dist, + "max_epicenter_dist_km": max_epicenter_dist, + "max_pair_dist_km": max_pair_dist, "max_neighbors": max_neighbors, "min_cc_score": min_cc_score, "min_obs": min_obs, @@ -330,7 +326,7 @@ def cut_templates(root_path, region, config): ) # %% - stations = pd.read_csv(f"{root_path}/{region}/adloc/ransac_stations_sst.csv") + stations = pd.read_csv(f"{root_path}/{data_path}/ransac_stations_sst.csv") stations = stations[stations["network"] == "CI"] stations.sort_values(by=["latitude", "longitude"], inplace=True) # stations = stations[stations["station_id"].isin(picks["station_id"].unique())] @@ -338,8 +334,7 @@ def cut_templates(root_path, region, config): print(stations.iloc[:5]) # %% - # events = pd.read_csv(f"{root_path}/{region}/adloc/ransac_events_sst.csv", parse_dates=["time"]) - events = pd.read_csv(f"tests/events.csv") + events = pd.read_csv(f"{root_path}/{data_path}/ransac_events_sst.csv", parse_dates=["time"]) events.rename(columns={"time": "event_time"}, inplace=True) events["event_time"] = pd.to_datetime(events["event_time"], utc=True) reference_t0 = events["event_time"].min() @@ -347,14 +342,6 @@ def cut_templates(root_path, region, config): print(f"{len(events) = }") print(events.iloc[:5]) - plt.figure() - plt.scatter(events["longitude"], events["latitude"], s=2) - # plt.plot(stations["longitude"], stations["latitude"], "v") - # plt.plot(picks["longitude"], picks["latitude"], "x") - plt.title(f"{len(events)} events") - plt.savefig("debug_events.png") - # raise - # %% lon0 = (config["minlongitude"] + config["maxlongitude"]) / 2 lat0 = (config["minlatitude"] + config["maxlatitude"]) / 2 @@ -429,8 +416,8 @@ def cut_templates(root_path, region, config): ) print(f"{len(picks) = }") - picks = picks[picks["dist_km"] < config["cctorch"]["max_epicenter_dist"]] - print(f"{len(picks) = } with dist_km < {config['cctorch']['max_epicenter_dist']} km") + picks = picks[picks["dist_km"] < config["max_epicenter_dist_km"]] + print(f"{len(picks) = } with dist_km < {config['max_epicenter_dist_km']} km") # %% Reindex event and station to make it continuous stations["idx_sta"] = np.arange(len(stations)) @@ -445,28 +432,28 @@ def cut_templates(root_path, region, config): # %% pair_fname = f"{root_path}/{result_path}/pairs.txt" - config["cctorch"]["pair_file"] = pair_fname + config["pair_file"] = pair_fname # %% nch = 3 ## [E,N,Z] components - nt = config["cctorch"]["nt"] + nt = config["nt"] npk = len(picks) nev = len(events) nst = len(stations) print(f"npk: {npk}, nev: {nev}, nch: {nch}, nst: {nst}, nt: {nt}") template_shape = (npk, 3, 1, nt) traveltime_shape = (npk, 3, 1) - config["cctorch"]["template_shape"] = template_shape - config["cctorch"]["traveltime_shape"] = traveltime_shape + config["template_shape"] = template_shape + config["traveltime_shape"] = traveltime_shape template_fname = f"{root_path}/{result_path}/template.dat" traveltime_fname = f"{root_path}/{result_path}/traveltime.dat" traveltime_index_fname = f"{root_path}/{result_path}/traveltime_index.dat" traveltime_mask_fname = f"{root_path}/{result_path}/traveltime_mask.dat" - config["cctorch"]["template_file"] = template_fname - config["cctorch"]["traveltime_file"] = traveltime_fname - config["cctorch"]["traveltime_index_file"] = traveltime_index_fname - config["cctorch"]["traveltime_mask_file"] = traveltime_mask_fname + config["template_file"] = template_fname + config["traveltime_file"] = traveltime_fname + config["traveltime_index_file"] = traveltime_index_fname + config["traveltime_mask_file"] = traveltime_mask_fname template_array = np.memmap(template_fname, dtype=np.float32, mode="w+", shape=template_shape) traveltime_array = np.memmap(traveltime_fname, dtype=np.float32, mode="w+", shape=traveltime_shape) @@ -474,10 +461,10 @@ def cut_templates(root_path, region, config): traveltime_mask = np.memmap(traveltime_mask_fname, dtype=bool, mode="w+", shape=traveltime_shape) with open(f"{root_path}/{result_path}/config.json", "w") as f: - json.dump(config["cctorch"], f, indent=4, sort_keys=True) + json.dump(config, f, indent=4, sort_keys=True) # %% - config["cctorch"]["reference_t0"] = reference_t0 + config["reference_t0"] = reference_t0 events = events[["idx_eve", "x_km", "y_km", "z_km", "event_index", "event_time", "event_timestamp"]] stations = stations[["idx_sta", "x_km", "y_km", "z_km", "station_id", "component", "network", "station"]] picks = picks[["idx_eve", "idx_sta", "phase_type", "phase_score", "phase_time", "phase_timestamp", "phase_source"]] @@ -518,7 +505,7 @@ def pbar_update(x): events, picks, stations, - config["cctorch"], + config, lock, ), callback=pbar_update, @@ -537,8 +524,8 @@ def pbar_update(x): picks, events, stations, - max_pair_dist=config["cctorch"]["min_pair_dist_km"], - fname=config["cctorch"]["pair_file"], + max_pair_dist=config["min_pair_dist_km"], + fname=config["pair_file"], ) @@ -546,20 +533,14 @@ def pbar_update(x): if __name__ == "__main__": # %% - root_path = "local" - # region = "demo" - region = "debug" - if len(sys.argv) > 1: - root_path = sys.argv[1] - region = sys.argv[2] - # with open(f"{root_path}/{region}/config.json", "r") as fp: - # config = json.load(fp) - config = None + args = parse_args() + root_path = args.root_path + region = args.region - # %% - result_path = f"{region}/cctorch" - if not os.path.exists(f"{root_path}/{result_path}"): - os.makedirs(f"{root_path}/{result_path}") + with open(f"{root_path}/{region}/config.json", "r") as fp: + config = json.load(fp) + + config.update(config["cctorch"]) # %% print(json.dumps(config, indent=4, sort_keys=True)) diff --git a/scripts/download_catalog.py b/scripts/download_catalog.py index e39ef2c..8b49477 100644 --- a/scripts/download_catalog.py +++ b/scripts/download_catalog.py @@ -1,36 +1,35 @@ # %% +import json +import os +import re +from datetime import timezone from typing import Dict +import cartopy.crs as ccrs +import cartopy.feature as cfeature +import fsspec +import matplotlib +import matplotlib.pyplot as plt +import obspy +import obspy.clients.fdsn +import pandas as pd +import pyproj +from args import parse_args + +matplotlib.use("Agg") + def download_catalog( root_path: str, region: str, config: Dict, protocol: str = "file", bucket: str = "", token: Dict = None ): - import json - import os - import re - from datetime import timezone - from pathlib import Path - - import cartopy.crs as ccrs - import cartopy.feature as cfeature - import fsspec - import matplotlib - import matplotlib.pyplot as plt - import obspy - import obspy.clients.fdsn - import pandas as pd - import pyproj - from obspy.clients.fdsn import header - - matplotlib.use("Agg") # %% fs = fsspec.filesystem(protocol, token=token) if not os.path.exists(root_path): os.makedirs(root_path) - data_dir = f"{region}/obspy" - if not os.path.exists(f"{root_path}/{data_dir}"): - os.makedirs(f"{root_path}/{data_dir}") + result_path = f"{region}/obspy" + if not os.path.exists(f"{root_path}/{result_path}"): + os.makedirs(f"{root_path}/{result_path}") print(json.dumps(config, indent=4)) @@ -44,9 +43,9 @@ def download_catalog( catalog = obspy.Catalog() for provider in config["provider"]: - if os.path.exists(f"{root_path}/{data_dir}/catalog_{provider.lower()}.xml"): - print(f"Loading existing {root_path}/{data_dir}/catalog_{provider.lower()}.xml") - catalog += obspy.read_events(f"{root_path}/{data_dir}/catalog_{provider.lower()}.xml") + if os.path.exists(f"{root_path}/{result_path}/catalog_{provider.lower()}.xml"): + print(f"Loading existing {root_path}/{result_path}/catalog_{provider.lower()}.xml") + catalog += obspy.read_events(f"{root_path}/{result_path}/catalog_{provider.lower()}.xml") continue client = obspy.clients.fdsn.Client(provider, timeout=1200) @@ -65,24 +64,24 @@ def download_catalog( print(f"{provider} failed: {e}") if events is not None: print(f"Dowloaded {len(events)} events from {provider.lower()}") - events.write(f"{root_path}/{data_dir}/catalog_{provider.lower()}.xml", format="QUAKEML") + events.write(f"{root_path}/{result_path}/catalog_{provider.lower()}.xml", format="QUAKEML") if protocol != "file": fs.put( - f"{root_path}/{data_dir}/catalog_{provider.lower()}.xml", - f"{bucket}/{data_dir}/catalog_{provider.lower()}.xml", + f"{root_path}/{result_path}/catalog_{provider.lower()}.xml", + f"{bucket}/{result_path}/catalog_{provider.lower()}.xml", ) catalog += events if len(catalog) > 0: - catalog.write(f"{root_path}/{data_dir}/catalog.xml", format="QUAKEML") + catalog.write(f"{root_path}/{result_path}/catalog.xml", format="QUAKEML") if protocol != "file": - fs.put(f"{root_path}/{data_dir}/catalog.xml", f"{bucket}/{data_dir}/catalog.xml") + fs.put(f"{root_path}/{result_path}/catalog.xml", f"{bucket}/{result_path}/catalog.xml") # %% - if not os.path.exists(f"{root_path}/{data_dir}/catalog.xml"): # no events + if not os.path.exists(f"{root_path}/{result_path}/catalog.xml"): # no events return 0 - catalog = obspy.read_events(f"{root_path}/{data_dir}/catalog.xml") + catalog = obspy.read_events(f"{root_path}/{result_path}/catalog.xml") def parase_catalog(catalog): events = {} @@ -119,9 +118,9 @@ def parase_catalog(catalog): events["depth_km"] = events["depth_km"].round(3) events[["x_km", "y_km", "z_km"]] = events[["x_km", "y_km", "z_km"]].round(3) events.sort_values("time", inplace=True) - events.to_csv(f"{root_path}/{data_dir}/catalog.csv", index=False) + events.to_csv(f"{root_path}/{result_path}/catalog.csv", index=False) if protocol != "file": - fs.put(f"{root_path}/{data_dir}/catalog.csv", f"{bucket}/{data_dir}/catalog.csv") + fs.put(f"{root_path}/{result_path}/catalog.csv", f"{bucket}/{result_path}/catalog.csv") # %% def visulization(config, events=None, stations=None, fig_name="catalog.png"): @@ -147,6 +146,7 @@ def visulization(config, events=None, stations=None, fig_name="catalog.png"): marker=".", label="events", ) + ax.set_title(f"{config['provider']}: {len(events)} events") if stations is not None: ax.scatter( stations.longitude, @@ -161,30 +161,26 @@ def visulization(config, events=None, stations=None, fig_name="catalog.png"): plt.savefig(fig_name, dpi=300, bbox_inches="tight") stations = None - if os.path.exists(f"{root_path}/{data_dir}/stations.csv"): - stations = pd.read_csv(f"{root_path}/{data_dir}/stations.csv") - visulization(config, events, stations, f"{root_path}/{data_dir}/catalog.png") + if os.path.exists(f"{root_path}/{result_path}/stations.csv"): + stations = pd.read_csv(f"{root_path}/{result_path}/stations.csv") + visulization(config, events, stations, f"{root_path}/{result_path}/catalog.png") if protocol != "file": - fs.put(f"{root_path}/{data_dir}/catalog.png", f"{bucket}/{data_dir}/catalog.png") + fs.put(f"{root_path}/{result_path}/catalog.png", f"{bucket}/{result_path}/catalog.png") # %% copy to results/network if not os.path.exists(f"{root_path}/{region}/results/network"): os.makedirs(f"{root_path}/{region}/results/network") for file in ["catalog.csv", "catalog.png"]: - os.system(f"cp {root_path}/{data_dir}/{file} {root_path}/{region}/results/network/{file}") + os.system(f"cp {root_path}/{result_path}/{file} {root_path}/{region}/results/network/{file}") if protocol != "file": - fs.put(f"{root_path}/{data_dir}/{file}", f"{bucket}/{region}/results/network/{file}") + fs.put(f"{root_path}/{result_path}/{file}", f"{bucket}/{region}/results/network/{file}") if __name__ == "__main__": - import json - import sys - - root_path = "local" - region = "demo" - if len(sys.argv) > 1: - root_path = sys.argv[1] - region = sys.argv[2] + + args = parse_args() + root_path = args.root_path + region = args.region with open(f"{root_path}/{region}/config.json", "r") as fp: config = json.load(fp) diff --git a/scripts/download_station.py b/scripts/download_station.py index 7047d8d..706900b 100644 --- a/scripts/download_station.py +++ b/scripts/download_station.py @@ -1,35 +1,37 @@ # %% +import json +import os +import time +from collections import defaultdict +from datetime import timezone from typing import Dict +import cartopy.crs as ccrs +import cartopy.feature as cfeature +import fsspec +import matplotlib +import matplotlib.pyplot as plt +import obspy +import obspy.clients.fdsn +import pandas as pd +import pyproj +from args import parse_args + +matplotlib.use("Agg") + def download_station( root_path: str, region: str, config: Dict, protocol: str = "file", bucket: str = "", token: Dict = None ): - import json - import os - import time - from collections import defaultdict - from datetime import timezone - - import cartopy.crs as ccrs - import cartopy.feature as cfeature - import fsspec - import matplotlib - import matplotlib.pyplot as plt - import obspy - import obspy.clients.fdsn - import pandas as pd - import pyproj - - matplotlib.use("Agg") # %% fs = fsspec.filesystem(protocol, token=token) if not os.path.exists(root_path): os.makedirs(root_path) - data_dir = f"{region}/obspy" - if not os.path.exists(f"{root_path}/{data_dir}"): - os.makedirs(f"{root_path}/{data_dir}") + + result_dir = f"{region}/obspy" + if not os.path.exists(f"{root_path}/{result_dir}"): + os.makedirs(f"{root_path}/{result_dir}") print(json.dumps(config, indent=4)) @@ -42,9 +44,9 @@ def download_station( print("Downloading station response...") inventory = obspy.Inventory() for provider in config["provider"]: - if os.path.exists(f"{root_path}/{data_dir}/inventory_{provider.lower()}.xml"): - print(f"Loading existing {root_path}/{data_dir}/inventory_{provider.lower()}.xml") - inventory += obspy.read_inventory(f"{root_path}/{data_dir}/inventory_{provider.lower()}.xml") + if os.path.exists(f"{root_path}/{result_dir}/inventory_{provider.lower()}.xml"): + print(f"Loading existing {root_path}/{result_dir}/inventory_{provider.lower()}.xml") + inventory += obspy.read_inventory(f"{root_path}/{result_dir}/inventory_{provider.lower()}.xml") continue client = obspy.clients.fdsn.Client(provider, timeout=1200) max_retry = 10 @@ -82,19 +84,19 @@ def download_station( print( f"Dowloaded {len([chn for net in stations for sta in net for chn in sta])} stations from {provider.lower()}" ) - stations.write(f"{root_path}/{data_dir}/inventory_{provider.lower()}.xml", format="STATIONXML") + stations.write(f"{root_path}/{result_dir}/inventory_{provider.lower()}.xml", format="STATIONXML") if protocol != "file": fs.put( - f"{root_path}/{data_dir}/inventory_{provider.lower()}.xml", - f"{bucket}/{data_dir}/inventory_{provider.lower()}.xml", + f"{root_path}/{result_dir}/inventory_{provider.lower()}.xml", + f"{bucket}/{result_dir}/inventory_{provider.lower()}.xml", ) inventory += stations - inventory.write(f"{root_path}/{data_dir}/inventory.xml", format="STATIONXML") + inventory.write(f"{root_path}/{result_dir}/inventory.xml", format="STATIONXML") # save inventory by station - if not os.path.exists(f"{root_path}/{data_dir}/inventory/"): - os.makedirs(f"{root_path}/{data_dir}/inventory/") + if not os.path.exists(f"{root_path}/{result_dir}/inventory/"): + os.makedirs(f"{root_path}/{result_dir}/inventory/") for net in inventory: for sta in net: @@ -103,16 +105,15 @@ def download_station( network=net.code, station=sta.code, location=chn.location_code, channel=chn.code[:-1] + "*" ) inv.write( - f"{root_path}/{data_dir}/inventory/{net.code}.{sta.code}.{chn.location_code}.{chn.code[:-1]}.xml", + f"{root_path}/{result_dir}/inventory/{net.code}.{sta.code}.{chn.location_code}.{chn.code[:-1]}.xml", format="STATIONXML", ) if protocol != "file": - fs.put(f"{root_path}/{data_dir}/inventory/", f"{bucket}/{data_dir}/inventory/") - fs.put(f"{root_path}/{data_dir}/inventory.xml", f"{bucket}/{data_dir}/inventory.xml") + fs.put(f"{root_path}/{result_dir}/inventory/", f"{bucket}/{result_dir}/inventory/") + fs.put(f"{root_path}/{result_dir}/inventory.xml", f"{bucket}/{result_dir}/inventory.xml") # %% - def parse_inventory_json(inventory, mseed_ids=[]): comp = ["3", "2", "1", "U", "V", "E", "N", "Z"] order = {key: i for i, key in enumerate(comp)} @@ -210,8 +211,8 @@ def parse_inventory_csv(inventory, mseed_ids=[]): return channel_list for provider in config["provider"]: - if os.path.exists(f"{root_path}/{data_dir}/inventory_{provider.lower()}.xml"): - inventory = obspy.read_inventory(f"{root_path}/{data_dir}/inventory_{provider.lower()}.xml") + if os.path.exists(f"{root_path}/{result_dir}/inventory_{provider.lower()}.xml"): + inventory = obspy.read_inventory(f"{root_path}/{result_dir}/inventory_{provider.lower()}.xml") stations = parse_inventory_csv(inventory) stations[["x_km", "y_km"]] = stations.apply( lambda row: pd.Series(proj(row["longitude"], row["latitude"])), axis=1 @@ -222,39 +223,39 @@ def parse_inventory_csv(inventory, mseed_ids=[]): stations[["x_km", "y_km", "z_km"]] = stations[["x_km", "y_km", "z_km"]].round(2) stations = stations.sort_values(by=["network", "station", "location", "channel"]) stations = stations.groupby(["network", "station", "location", "channel"]).first().reset_index() - stations.to_csv(f"{root_path}/{data_dir}/stations_{provider.lower()}.csv", index=False) + stations.to_csv(f"{root_path}/{result_dir}/stations_{provider.lower()}.csv", index=False) if protocol != "file": fs.put( - f"{root_path}/{data_dir}/stations_{provider.lower()}.csv", - f"{bucket}/{data_dir}/stations_{provider.lower()}.csv", + f"{root_path}/{result_dir}/stations_{provider.lower()}.csv", + f"{bucket}/{result_dir}/stations_{provider.lower()}.csv", ) stations = parse_inventory_json(inventory) if len(stations) > 0: - with open(f"{root_path}/{data_dir}/stations_{provider.lower()}.json", "w") as f: + with open(f"{root_path}/{result_dir}/stations_{provider.lower()}.json", "w") as f: json.dump(stations, f, indent=4) if protocol != "file": fs.put( - f"{root_path}/{data_dir}/stations_{provider.lower()}.json", - f"{bucket}/{data_dir}/stations_{provider.lower()}.json", + f"{root_path}/{result_dir}/stations_{provider.lower()}.json", + f"{bucket}/{result_dir}/stations_{provider.lower()}.json", ) # %% merge stations stations = [] for provider in config["provider"]: - tmp = pd.read_csv(f"{root_path}/{data_dir}/stations_{provider.lower()}.csv", dtype={"location": str}) + tmp = pd.read_csv(f"{root_path}/{result_dir}/stations_{provider.lower()}.csv", dtype={"location": str}) tmp["provider"] = provider stations.append(tmp) stations = pd.concat(stations) stations = stations.groupby(["network", "station", "location", "channel"], dropna=False).first().reset_index() print(f"Merged {len(stations)} channels") - stations.to_csv(f"{root_path}/{data_dir}/stations.csv", index=False) + stations.to_csv(f"{root_path}/{result_dir}/stations.csv", index=False) if protocol != "file": - fs.put(f"{root_path}/{data_dir}/stations.csv", f"{bucket}/{data_dir}/stations.csv") + fs.put(f"{root_path}/{result_dir}/stations.csv", f"{bucket}/{result_dir}/stations.csv") stations = {} for provider in config["provider"]: - with open(f"{root_path}/{data_dir}/stations_{provider.lower()}.json") as f: + with open(f"{root_path}/{result_dir}/stations_{provider.lower()}.json") as f: tmp = json.load(f) for key, value in tmp.items(): if key not in stations: @@ -262,10 +263,10 @@ def parse_inventory_csv(inventory, mseed_ids=[]): stations[key]["provider"] = provider if len(stations) > 0: print(f"Merged {len(stations)} stations") - with open(f"{root_path}/{data_dir}/stations.json", "w") as f: + with open(f"{root_path}/{result_dir}/stations.json", "w") as f: json.dump(stations, f, indent=4) if protocol != "file": - fs.put(f"{root_path}/{data_dir}/stations.json", f"{bucket}/{data_dir}/stations.json") + fs.put(f"{root_path}/{result_dir}/stations.json", f"{bucket}/{result_dir}/stations.json") # %% def visulization(config, events=None, stations=None, fig_name="catalog.png"): @@ -306,34 +307,32 @@ def visulization(config, events=None, stations=None, fig_name="catalog.png"): stations = pd.DataFrame.from_dict(stations, orient="index") events = None - if os.path.exists(f"{root_path}/{data_dir}/events.csv"): - events = pd.read_csv(f"{root_path}/{data_dir}/events.csv") - visulization(config, events, stations, f"{root_path}/{data_dir}/stations.png") + if os.path.exists(f"{root_path}/{result_dir}/events.csv"): + events = pd.read_csv(f"{root_path}/{result_dir}/events.csv") + visulization(config, events, stations, f"{root_path}/{result_dir}/stations.png") if protocol != "file": - fs.put(f"{root_path}/{data_dir}/stations.png", f"{bucket}/{data_dir}/stations.png") + fs.put(f"{root_path}/{result_dir}/stations.png", f"{bucket}/{result_dir}/stations.png") # %% copy to results/network if not os.path.exists(f"{root_path}/{region}/results/network"): os.makedirs(f"{root_path}/{region}/results/network", exist_ok=True) for file in ["inventory.xml", "stations.csv", "stations.json", "stations.png"]: - os.system(f"cp {root_path}/{data_dir}/{file} {root_path}/{region}/results/network/{file}") + os.system(f"cp {root_path}/{result_dir}/{file} {root_path}/{region}/results/network/{file}") if protocol != "file": - fs.put(f"{root_path}/{data_dir}/{file}", f"{bucket}/{region}/results/network/{file}") - os.system(f"cp -r {root_path}/{data_dir}/inventory/ {root_path}/{region}/results/network/inventory/") + fs.put(f"{root_path}/{result_dir}/{file}", f"{bucket}/{region}/results/network/{file}") + os.system(f"cp -r {root_path}/{result_dir}/inventory/ {root_path}/{region}/results/network/inventory/") if __name__ == "__main__": - import json - import sys - root_path = "local" - region = "demo" - if len(sys.argv) > 1: - root_path = sys.argv[1] - region = sys.argv[2] + args = parse_args() + root_path = args.root_path + region = args.region with open(f"{root_path}/{region}/config.json", "r") as fp: config = json.load(fp) + config.update(config["obspy"]) + download_station(root_path=root_path, region=region, config=config, protocol="file", bucket="", token=None) diff --git a/scripts/download_waveform_v2.py b/scripts/download_waveform_v2.py index 3b464b0..c1411ca 100644 --- a/scripts/download_waveform_v2.py +++ b/scripts/download_waveform_v2.py @@ -1,19 +1,18 @@ # %% import json -import os -import threading import multiprocessing as mp +import os import time -from concurrent.futures import ThreadPoolExecutor from datetime import datetime from glob import glob +from typing import Dict, List import fsspec import numpy as np import obspy import obspy.clients.fdsn import pandas as pd -from typing import Dict, List +from args import parse_args def map_remote_path(provider, bucket, starttime, network, station, location, instrument, component): @@ -315,15 +314,10 @@ def download_waveform( if __name__ == "__main__": - import json - import os - import sys - - root_path = "local" - region = "demo" - if len(sys.argv) > 1: - root_path = sys.argv[1] - region = sys.argv[2] + + args = parse_args() + root_path = args.root_path + region = args.region with open(f"{root_path}/{region}/config.json", "r") as fp: config = json.load(fp) diff --git a/scripts/run_adloc.py b/scripts/run_adloc.py index f7797c7..f9a580a 100644 --- a/scripts/run_adloc.py +++ b/scripts/run_adloc.py @@ -2,20 +2,21 @@ import json import multiprocessing as mp import os +import sys +from typing import Dict, List, NamedTuple +import fsspec import matplotlib.pyplot as plt import numpy as np import pandas as pd -from pyproj import Proj - from adloc.eikonal2d import init_eikonal2d from adloc.sacloc2d import ADLoc from adloc.utils import invert_location, invert_location_iter +from args import parse_args +from pyproj import Proj # from utils import plotting_ransac from utils.plotting import plotting_ransac -from typing import Dict, List, NamedTuple -import fsspec # %% @@ -43,11 +44,11 @@ def run_adloc( os.makedirs(figure_path) # %% - data_path = f"{root_path}/{region}/" - picks_file = os.path.join(data_path, f"gamma/gamma_picks_{node_rank:03d}_{num_nodes:03d}.csv") - events_file = os.path.join(data_path, f"gamma/gamma_events_{node_rank:03d}_{num_nodes:03d}.csv") + data_path = f"{root_path}/{region}/gamma" + picks_file = os.path.join(data_path, f"gamma_picks_{node_rank:03d}_{num_nodes:03d}.csv") + events_file = os.path.join(data_path, f"gamma_events_{node_rank:03d}_{num_nodes:03d}.csv") # stations_file = os.path.join(data_path, "stations.csv") - stations_file = f"{data_path}/results/network/stations.json" + stations_file = f"{root_path}/{region}/obspy/stations.json" picks = pd.read_csv(picks_file) picks["phase_time"] = pd.to_datetime(picks["phase_time"]) @@ -127,13 +128,13 @@ def run_adloc( config["eikonal"] = init_eikonal2d(config["eikonal"]) # %% config for location - config["min_picks"] = 8 - config["min_picks_ratio"] = 0.2 + config["min_picks"] = 6 + config["min_picks_ratio"] = 0.5 config["max_residual_time"] = 1.0 config["max_residual_amplitude"] = 1.0 - config["min_score"] = 0.6 - config["min_s_picks"] = 2 - config["min_p_picks"] = 2 + config["min_score"] = 0.5 + config["min_s_picks"] = 1.5 + config["min_p_picks"] = 1.5 config["bfgs_bounds"] = ( (config["xlim_km"][0] - 1, config["xlim_km"][1] + 1), # x @@ -258,17 +259,10 @@ def run_adloc( # %% if __name__ == "__main__": - import json - import os - import sys - - os.environ["OMP_NUM_THREADS"] = "8" + args = parse_args() + root_path = args.root_path + region = args.region - root_path = "local" - region = "demo" - if len(sys.argv) > 1: - root_path = sys.argv[1] - region = sys.argv[2] with open(f"{root_path}/{region}/config.json", "r") as fp: config = json.load(fp) diff --git a/scripts/run_cctorch.py b/scripts/run_cctorch.py index 650bc16..ec37c30 100644 --- a/scripts/run_cctorch.py +++ b/scripts/run_cctorch.py @@ -1,26 +1,15 @@ # %% -import argparse import os -import sys -from glob import glob -from pathlib import Path import torch - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("root_path", nargs="?", type=str, default="local", help="root path") - parser.add_argument("region", nargs="?", type=str, default="demo", help="region") - parser.add_argument("--dtct_pair", action="store_true", help="run convert_dtcc.py") - return parser.parse_args() - +from args import parse_args args = parse_args() # %% root_path = args.root_path region = args.region +data_path = f"{region}/cctorch" result_path = f"{region}/cctorch/ccpairs" if not os.path.exists(f"{root_path}/{result_path}"): os.makedirs(f"{root_path}/{result_path}") @@ -29,12 +18,12 @@ def parse_args(): ## based on GPU memory batch = 1_024 -block_size1 = 10_000 -block_size2 = 10_000 # ~7GB +block_size1 = 1000_000 +block_size2 = 1000_000 if args.dtct_pair: dt_ct = f"{root_path}/{region}/hypodd/dt.ct" - pair_list = f"{root_path}/{region}/hypodd/event_pairs.txt" + pair_list = f"{root_path}/{region}/hypodd/pairs.txt" lines = [] with open(dt_ct, "r") as fp: for line in fp: @@ -45,12 +34,17 @@ def parse_args(): lines.append(f"{ev1},{ev2}\n") print(f"Number of pairs from hypodd dt.ct: {len(lines)}") - with open(f"{root_path}/{region}/hypodd/event_pairs.txt", "w") as fp: + with open(f"{root_path}/{region}/hypodd/pairs.txt", "w") as fp: fp.writelines(lines) - base_cmd = f"../CCTorch/run.py --pair_list={root_path}/{region}/hypodd/event_pairs.txt --data_path1={root_path}/{region}/cctorch/template.dat --data_format1=memmap --config={root_path}/{region}/cctorch/config.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} --result_path={root_path}/{result_path}" + base_cmd = f"../CCTorch/run.py --pair_list={root_path}/{region}/hypodd/pairs.txt --data_path1={root_path}/{region}/cctorch/template.dat --data_format1=memmap --config={root_path}/{region}/cctorch/config.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} --result_path={root_path}/{result_path}" else: - base_cmd = f"../CCTorch/run.py --pair_list={root_path}/{region}/cctorch/event_pairs.txt --data_path1={root_path}/{region}/cctorch/template.dat --data_format1=memmap --config={root_path}/{region}/cctorch/config.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} --result_path={root_path}/{result_path}" + base_cmd = ( + f"../CCTorch/run.py --pair_list={root_path}/{region}/cctorch/pairs.txt --data_path1={root_path}/{region}/cctorch/template.dat --data_format1=memmap " + f"--data_list1={root_path}/{region}/cctorch/cctorch_picks.csv " + f"--events_csv={root_path}/{region}/cctorch/cctorch_events.csv --picks_csv={root_path}/{region}/cctorch/cctorch_picks.csv --stations_csv={root_path}/{region}/cctorch/cctorch_stations.csv " + f"--config={root_path}/{region}/cctorch/config.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} --result_path={root_path}/{result_path}" + ) num_gpu = torch.cuda.device_count() if num_gpu == 0: if os.uname().sysname == "Darwin": @@ -61,3 +55,12 @@ def parse_args(): cmd = f"torchrun --standalone --nproc_per_node {num_gpu} {base_cmd}" print(cmd) os.system(cmd) + +# %% +os.chdir(f"{root_path}/{region}/cctorch") +source_file = f"ccpairs/CC_{num_gpu:03d}_dt.cc" +target_file = f"dt.cc" +print(f"{source_file} -> {target_file}") +if os.path.lexists(target_file): + os.remove(target_file) +os.symlink(source_file, target_file) diff --git a/scripts/run_gamma.py b/scripts/run_gamma.py index dcb71d8..a4ba0b8 100644 --- a/scripts/run_gamma.py +++ b/scripts/run_gamma.py @@ -1,10 +1,10 @@ -from typing import Dict, List, NamedTuple import json import os +from typing import Dict, NamedTuple import fsspec -import numpy as np import pandas as pd +from args import parse_args from gamma.utils import association, estimate_eps from pyproj import Proj @@ -25,16 +25,15 @@ def run_gamma( fs = fsspec.filesystem(protocol=protocol, token=token) # %% + data_path = f"{region}/phasenet" result_path = f"{region}/gamma" if not os.path.exists(f"{root_path}/{result_path}"): os.makedirs(f"{root_path}/{result_path}") # %% - # station_csv = data_path / "stations.csv" - station_json = f"{region}/results/network/stations.json" + station_json = f"{region}/obspy/stations.json" if picks_csv is None: - # picks_csv = f"{region}/results/picking/phase_picks_{node_rank:03d}_{num_nodes:03d}.csv" - picks_csv = f"{region}/phasenet/phasenet_picks_{node_rank:03d}_{num_nodes:03d}.csv" + picks_csv = f"{data_path}/phasenet_picks_{node_rank:03d}_{num_nodes:03d}.csv" gamma_events_csv = f"{result_path}/gamma_events_{node_rank:03d}_{num_nodes:03d}.csv" gamma_picks_csv = f"{result_path}/gamma_picks_{node_rank:03d}_{num_nodes:03d}.csv" @@ -83,19 +82,12 @@ def run_gamma( # earthquake location config["vel"] = {"p": 6.0, "s": 6.0 / 1.75} config["dims"] = ["x(km)", "y(km)", "z(km)"] - config["x(km)"] = ( - np.array([config["minlongitude"] - config["longitude0"], config["maxlongitude"] - config["longitude0"]]) - * config["degree2km"] - * np.cos(np.deg2rad(config["latitude0"])) - ) - config["y(km)"] = ( - np.array([config["minlatitude"] - config["latitude0"], config["maxlatitude"] - config["latitude0"]]) - * config["degree2km"] - ) - if "gamma" not in config: - config["z(km)"] = (0, 60) - else: - config["z(km)"] = [config["gamma"]["zmin_km"], config["gamma"]["zmax_km"]] + xmin, ymin = proj(config["minlongitude"], config["minlatitude"]) + xmax, ymax = proj(config["maxlongitude"], config["maxlatitude"]) + zmin, zmax = config["mindepth"], config["maxdepth"] + config["x(km)"] = (xmin, xmax) + config["y(km)"] = (ymin, ymax) + config["z(km)"] = (zmin, zmax) config["bfgs_bounds"] = ( (config["x(km)"][0] - 1, config["x(km)"][1] + 1), # x (config["y(km)"][0] - 1, config["y(km)"][1] + 1), # y @@ -140,12 +132,6 @@ def run_gamma( if len(events) > 0: ## create catalog - # events = pd.DataFrame( - # events, - # columns=["time"] - # + config["dims"] - # + ["magnitude", "sigma_time", "sigma_amp", "cov_time_amp", "event_index", "gamma_score"], - # ) events = pd.DataFrame(events) events[["longitude", "latitude"]] = events.apply( lambda x: pd.Series(proj(longitude=x["x(km)"], latitude=x["y(km)"], inverse=True)), axis=1 @@ -153,46 +139,13 @@ def run_gamma( events["depth_km"] = events["z(km)"] events.sort_values("time", inplace=True) with open(f"{root_path}/{gamma_events_csv}", "w") as fp: - events.to_csv( - fp, - index=False, - float_format="%.3f", - date_format="%Y-%m-%dT%H:%M:%S.%f", - # columns=[ - # "time", - # "magnitude", - # "longitude", - # "latitude", - # # "depth(m)", - # "depth_km", - # "sigma_time", - # "sigma_amp", - # "cov_time_amp", - # "event_index", - # "gamma_score", - # ], - ) - # events = events[['time', 'magnitude', 'longitude', 'latitude', 'depth(m)', 'sigma_time', 'sigma_amp', 'gamma_score']] - + events.to_csv(fp, index=False, float_format="%.3f", date_format="%Y-%m-%dT%H:%M:%S.%f") ## add assignment to picks assignments = pd.DataFrame(assignments, columns=["pick_index", "event_index", "gamma_score"]) picks = picks.join(assignments.set_index("pick_index")).fillna(-1).astype({"event_index": int}) picks.sort_values(["phase_time"], inplace=True) with open(f"{root_path}/{gamma_picks_csv}", "w") as fp: - picks.to_csv( - fp, - index=False, - date_format="%Y-%m-%dT%H:%M:%S.%f", - # columns=[ - # "station_id", - # "phase_time", - # "phase_type", - # "phase_score", - # "phase_amplitude", - # "event_index", - # "gamma_score", - # ], - ) + picks.to_csv(fp, index=False, date_format="%Y-%m-%dT%H:%M:%S.%f") if protocol != "file": fs.put(f"{root_path}/{gamma_events_csv}", f"{bucket}/{gamma_events_csv}") @@ -229,17 +182,13 @@ def run_gamma( if __name__ == "__main__": - import json - import os - import sys os.environ["OMP_NUM_THREADS"] = "8" - root_path = "local" - region = "demo" - if len(sys.argv) > 1: - root_path = sys.argv[1] - region = sys.argv[2] + args = parse_args() + root_path = args.root_path + region = args.region + with open(f"{root_path}/{region}/config.json", "r") as fp: config = json.load(fp) diff --git a/scripts/run_growclust_cc.py b/scripts/run_growclust_cc.py new file mode 100644 index 0000000..48473ef --- /dev/null +++ b/scripts/run_growclust_cc.py @@ -0,0 +1,65 @@ +# %% +import os +from datetime import datetime + +import pandas as pd +from args import parse_args +from tqdm import tqdm + +args = parse_args() + +# %% +root_path = args.root_path +region = args.region +result_path = f"{region}/growclust" +if not os.path.exists(f"{root_path}/{result_path}"): + os.makedirs(f"{root_path}/{result_path}") + +# %% +# stations_json = f"{region}/results/data/stations.json" +# stations = pd.read_json(f"{root_path}/{stations_json}", orient="index") +station_csv = f"{region}/adloc/ransac_stations_sst.csv" +stations = pd.read_csv(f"{root_path}/{station_csv}") +stations.set_index("station_id", inplace=True) + + +lines = [] +for i, row in stations.iterrows(): + # line = f"{row['network']}{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n" + line = f"{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n" + lines.append(line) + +with open(f"{root_path}/{result_path}/stlist.txt", "w") as fp: + fp.writelines(lines) + + +# %% +# events_csv = f"{region}/results/phase_association/events.csv" +events_csv = f"{region}/adloc/ransac_events_sst.csv" +# event_file = f"{region}/cctorch/events.csv" +events = pd.read_csv(f"{root_path}/{events_csv}") +# event_df = event_df[event_df["gamma_score"] > 10] +# event_index = [f"{x:06d}" for x in event_df["event_index"]] +events["time"] = pd.to_datetime(events["time"]) +if "magnitude" not in events.columns: + events["magnitude"] = 0.0 + +events[["year", "month", "day", "hour", "minute", "second"]] = ( + events["time"] + # .apply(lambda x: datetime.fromisoformat(x).strftime("%Y %m %d %H %M %S.%f").split(" ")) + .apply(lambda x: x.strftime("%Y %m %d %H %M %S.%f").split(" ")) + .apply(pd.Series) + .apply(pd.to_numeric) +) + +lines = [] +for i, row in events.iterrows(): + # yr mon day hr min sec lat lon dep mag eh ez rms evid + line = f"{row['year']:4d} {row['month']:2d} {row['day']:2d} {row['hour']:2d} {row['minute']:2d} {row['second']:7.3f} {row['latitude']:.4f} {row['longitude']:.4f} {row['depth_km']:7.3f} {row['magnitude']:.2f} 0.000 0.000 0.000 {row['event_index']:6d}\n" + lines.append(line) + +with open(f"{root_path}/{result_path}/evlist.txt", "w") as fp: + fp.writelines(lines) + +# %% +os.system(f"bash run_growclust_cc.sh {root_path} {region}") diff --git a/scripts/run_hypodd_cc.py b/scripts/run_hypodd_cc.py new file mode 100644 index 0000000..e18cdd3 --- /dev/null +++ b/scripts/run_hypodd_cc.py @@ -0,0 +1,72 @@ +# %% +import json +import os + +import numpy as np +import pandas as pd +from args import parse_args + +# %% +args = parse_args() +root_path = args.root_path +region = args.region + +with open(f"{root_path}/{region}/config.json", "r") as fp: + config = json.load(fp) + +# %% +data_path = f"{region}/cctorch" +result_path = f"{region}/hypodd" +if not os.path.exists(f"{root_path}/{result_path}"): + os.makedirs(f"{root_path}/{result_path}") + +# %% +stations = pd.read_csv(f"{root_path}/{data_path}/cctorch_stations.csv") + +station_lines = {} +for i, row in stations.iterrows(): + station_id = row["station_id"] + network_code, station_code, comp_code, channel_code = station_id.split(".") + # tmp_code = f"{station_code}{channel_code}" + tmp_code = f"{station_code}" + station_lines[tmp_code] = f"{tmp_code:<8s} {row['latitude']:.3f} {row['longitude']:.3f}\n" + + +with open(f"{root_path}/{result_path}/stations.dat", "w") as f: + for line in sorted(station_lines.values()): + f.write(line) + +# %% +events = pd.read_csv(f"{root_path}/{data_path}/cctorch_events.csv") +events["time"] = pd.to_datetime(events["event_time"], format="mixed") + +event_lines = [] + +mean_latitude = events["latitude"].mean() +mean_longitude = events["longitude"].mean() +for i, row in events.iterrows(): + event_index = row["event_index"] + origin = row["time"] + # magnitude = row["magnitude"] + magnitude = 1.0 + x_err = 0.0 + z_err = 0.0 + time_err = 0.0 + dx, dy, dz = 0.0, 0.0, 0.0 + dx = np.random.uniform(-0.01, 0.01) + dy = np.random.uniform(-0.01, 0.01) + # dz = np.random.uniform(0, 10) + dz = 0 + event_lines.append( + f"{origin.year:4d}{origin.month:02d}{origin.day:02d} " + f"{origin.hour:2d}{origin.minute:02d}{origin.second:02d}{round(origin.microsecond / 1e4):02d} " + # f"{row['latitude']:8.4f} {row['longitude']:9.4f} {row['depth_km']:8.4f} " + f"{row['latitude'] + dy:8.4f} {row['longitude']+ dx:9.4f} {row['depth_km']+dz:8.4f} " + f"{magnitude:5.2f} {x_err:5.2f} {z_err:5.2f} {time_err:5.2f} {event_index:9d}\n" + ) + +with open(f"{root_path}/{result_path}/events.dat", "w") as f: + f.writelines(event_lines) + +# %% +os.system(f"bash run_hypodd_cc.sh {root_path} {region}") diff --git a/scripts/run_hypodd_cc.sh b/scripts/run_hypodd_cc.sh index 0f62db5..9b33a01 100644 --- a/scripts/run_hypodd_cc.sh +++ b/scripts/run_hypodd_cc.sh @@ -32,7 +32,7 @@ dt.cc * * event file: -event.sel +events.dat * * station file: stations.dat diff --git a/scripts/convert_hypodd.py b/scripts/run_hypodd_ct.py similarity index 73% rename from scripts/convert_hypodd.py rename to scripts/run_hypodd_ct.py index 20d621c..fe12947 100644 --- a/scripts/convert_hypodd.py +++ b/scripts/run_hypodd_ct.py @@ -1,25 +1,10 @@ # %% -import argparse -import json import os -from datetime import datetime -from pathlib import Path -import h5py -import numpy as np import pandas as pd -import scipy +from args import parse_args from tqdm import tqdm - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("root_path", nargs="?", type=str, default="local", help="root path") - parser.add_argument("region", nargs="?", type=str, default="demo", help="region") - parser.add_argument("--dt_cc", action="store_true", help="run convert_dtcc.py") - return parser.parse_args() - - args = parse_args() # %% @@ -32,14 +17,14 @@ def parse_args(): # %% ############################################# Station Format ###################################################### -station_json = f"{region}/results/data/stations.json" -stations = pd.read_json(f"{root_path}/{station_json}", orient="index") +# station_json = f"{region}/results/data/stations.json" +# stations = pd.read_json(f"{root_path}/{station_json}", orient="index") +# station_csv = f"{region}/cctorch/cctorch_stations.csv" +station_csv = f"{region}/adloc/ransac_stations_sst.csv" +stations = pd.read_csv(f"{root_path}/{station_csv}") +stations.set_index("station_id", inplace=True) shift_topo = stations["elevation_m"].max() / 1e3 -# shift_topo = stations["elevation_m"].max()/1e3 + 3.0 -# shift_topo = 0.0 ## prevent air quakes -# shift_topo = 3.0 - converted_hypoinverse = [] converted_hypodd = {} @@ -68,18 +53,22 @@ def parse_args(): # %% ############################################# Picks Format ###################################################### -picks_csv = f"{region}/results/phase_association/picks.csv" -events_csv = f"{region}/results/phase_association/events.csv" +picks_csv = f"{region}/adloc/ransac_picks_sst.csv" +events_csv = f"{region}/adloc/ransac_events_sst.csv" -picks = pd.read_csv(f"{root_path}/{picks_csv}", parse_dates=["phase_time"]) -events = pd.read_csv(f"{root_path}/{events_csv}", parse_dates=["time"]) +picks = pd.read_csv(f"{root_path}/{picks_csv}") +events = pd.read_csv(f"{root_path}/{events_csv}") +picks["phase_time"] = pd.to_datetime(picks["phase_time"], format="mixed") +events["time"] = pd.to_datetime(events["time"]) +events["magnitude"] = 1.0 +events["sigma_time"] = 1.0 # events.sort_values("time", inplace=True) picks = picks.loc[picks["event_index"].isin(events["event_index"])] lines = [] picks_by_event = picks.groupby("event_index").groups -for i, event in tqdm(events.iterrows(), desc="Convert gamma catalog", total=len(events)): +for i, event in tqdm(events.iterrows(), desc="Convert catalog", total=len(events)): # event_time = datetime.strptime(event["time"], "%Y-%m-%dT%H:%M:%S.%f") event_time = event["time"] lat = event["latitude"] @@ -120,6 +109,4 @@ def parse_args(): fp.writelines(lines) # %% -if args.dt_cc: - os.system("python convert_dtcc.py") - os.system("cp templates/dt.cc relocation/hypodd/") +os.system(f"bash run_hypodd_ct.sh {root_path} {region}") diff --git a/scripts/run_phasenet.py b/scripts/run_phasenet.py index 6ce6033..fa09e19 100644 --- a/scripts/run_phasenet.py +++ b/scripts/run_phasenet.py @@ -1,6 +1,12 @@ # %% +import json +import os +from glob import glob from typing import Dict, List +import fsspec +from args import parse_args + def run_phasenet( root_path: str, @@ -14,10 +20,6 @@ def run_phasenet( bucket: str = "", token: Dict = None, ) -> str: - import os - from glob import glob - - import fsspec # %% fs = fsspec.filesystem(protocol=protocol, token=token) @@ -62,7 +64,7 @@ def run_phasenet( # %% os.system( - f"python {model_path}/phasenet/predict.py --model={model_path}/model/190703-214543 --data_dir=./ --data_list={root_path}/{result_path}/mseed_list_{node_rank:03d}_{num_nodes:03d}.csv --response_xml={root_path}/{region}/results/network/inventory.xml --format=mseed --amplitude --highpass_filter=1.0 --result_dir={root_path}/{result_path} --result_fname=phasenet_picks_{node_rank:03d} --batch_size=1" + f"python {model_path}/phasenet/predict.py --model={model_path}/model/190703-214543 --data_dir=./ --data_list={root_path}/{result_path}/mseed_list_{node_rank:03d}_{num_nodes:03d}.csv --response_xml={root_path}/{region}/results/network/inventory.xml --format=mseed --amplitude --highpass_filter=1.0 --result_dir={root_path}/{result_path} --result_fname=phasenet_picks_{node_rank:03d}_{num_nodes:03d} --batch_size=1" ) if protocol != "file": @@ -87,15 +89,10 @@ def run_phasenet( if __name__ == "__main__": - import json - import os - import sys - - root_path = "local" - region = "demo" - if len(sys.argv) > 1: - root_path = sys.argv[1] - region = sys.argv[2] + + args = parse_args() + root_path = args.root_path + region = args.region with open(f"{root_path}/{region}/config.json", "r") as fp: config = json.load(fp) diff --git a/scripts/set_config.py b/scripts/set_config.py index ec46716..036028d 100644 --- a/scripts/set_config.py +++ b/scripts/set_config.py @@ -1,12 +1,13 @@ # %% +import json +import os from typing import Dict +import fsspec +from args import parse_args -def set_config(root_path: str, region: str, config: Dict, protocol: str, bucket: str, token: Dict) -> Dict: - import json - import os - import fsspec +def set_config(root_path: str, region: str, config: Dict, protocol: str, bucket: str, token: Dict) -> Dict: fs = fsspec.filesystem(protocol, token=token) if not os.path.exists(root_path): @@ -22,7 +23,6 @@ def set_config(root_path: str, region: str, config: Dict, protocol: str, bucket: "location", "relocation", "mechanism", - "results", ]: if not os.path.exists(f"{root_path}/{data_dir}/results/{subfolder}"): os.makedirs(f"{root_path}/{data_dir}/results/{subfolder}", exist_ok=True) @@ -31,10 +31,8 @@ def set_config(root_path: str, region: str, config: Dict, protocol: str, bucket: ## default values config_region["num_nodes"] = 1 ## submodules config - if "default" in config: - config_region.update(config["default"]) if "obspy" in config: - config_region.update(config["obspy"]) + config_region["obspy"] = config["obspy"] if "phasenet" in config: config_region["phasenet"] = config["phasenet"] if "gamma" in config: @@ -51,6 +49,7 @@ def set_config(root_path: str, region: str, config: Dict, protocol: str, bucket: with open(f"{root_path}/{data_dir}/config.json", "w") as fp: json.dump(config_region, fp, indent=4) + if protocol != "file": fs.put(f"{root_path}/{data_dir}/config.json", f"{bucket}/{data_dir}/config.json") print(json.dumps(config_region, indent=4)) @@ -59,15 +58,10 @@ def set_config(root_path: str, region: str, config: Dict, protocol: str, bucket: if __name__ == "__main__": - import json - import os - import sys - root_path = "local" - region = "demo" - if len(sys.argv) > 1: - root_path = sys.argv[1] - region = sys.argv[2] + args = parse_args() + root_path = args.root_path + region = args.region with open("config.json", "r") as fp: config = json.load(fp)