From acf2fd41ffc2196b88036ac364b283e6ae93eff3 Mon Sep 17 00:00:00 2001 From: zhuwq Date: Mon, 16 Sep 2024 09:20:09 -0700 Subject: [PATCH] add hypodd to japan --- examples/japan/.gitignore | 1 + examples/japan/plot_catalog.py | 23 ++ examples/japan/run_adloc_cc.py | 330 +++++++++++++++++++++++++++++ examples/japan/run_growclust_cc.py | 65 ++++++ examples/japan/run_growclust_cc.sh | 119 +++++++++++ examples/japan/run_hypodd_cc.sh | 8 +- 6 files changed, 542 insertions(+), 4 deletions(-) create mode 100644 examples/japan/.gitignore create mode 100644 examples/japan/run_adloc_cc.py create mode 100644 examples/japan/run_growclust_cc.py create mode 100644 examples/japan/run_growclust_cc.sh diff --git a/examples/japan/.gitignore b/examples/japan/.gitignore new file mode 100644 index 0000000..51b4cfd --- /dev/null +++ b/examples/japan/.gitignore @@ -0,0 +1 @@ +local/ diff --git a/examples/japan/plot_catalog.py b/examples/japan/plot_catalog.py index 4d974d6..ed442ce 100644 --- a/examples/japan/plot_catalog.py +++ b/examples/japan/plot_catalog.py @@ -28,6 +28,10 @@ with open(f"{root_path}/{region}/config.json", "r") as f: config = json.load(f) print(json.dumps(config, indent=4, sort_keys=True)) +if "mindepth" not in config: + config["mindepth"] = 0 +if "maxdepth" not in config: + config["maxdepth"] = 30 xlim = [config["minlongitude"], config["maxlongitude"]] ylim = [config["minlatitude"], config["maxlatitude"]] @@ -935,6 +939,25 @@ def plot3d(x, y, z, config, fig_name): config_plot3d, f"{root_path}/{figure_path}/earthquake_location_adloc.html", ) + + if adloc_dt_exist and len(adloc_dt_catalog) > 0: + plot3d( + adloc_dt_catalog["longitude"], + adloc_dt_catalog["latitude"], + adloc_dt_catalog["depth_km"], + config_plot3d, + f"{root_path}/{figure_path}/earthquake_location_adloc_dt.html", + ) + + if adloc_dtcc_exist and len(adloc_dtcc_catalog) > 0: + plot3d( + adloc_dtcc_catalog["longitude"], + adloc_dtcc_catalog["latitude"], + adloc_dtcc_catalog["depth_km"], + config_plot3d, + f"{root_path}/{figure_path}/earthquake_location_adloc_dtcc.html", + ) + if hypodd_ct_exist and len(catalog_ct_hypodd) > 0: plot3d( catalog_ct_hypodd["LON"], diff --git a/examples/japan/run_adloc_cc.py b/examples/japan/run_adloc_cc.py new file mode 100644 index 0000000..113540d --- /dev/null +++ b/examples/japan/run_adloc_cc.py @@ -0,0 +1,330 @@ +# %% +import json +import os +import pickle +import time + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import torch +import torch.distributed as dist +import torch.optim as optim +from adloc.adloc import TravelTimeDD +from adloc.data import PhaseDatasetDTCC +from adloc.eikonal2d import init_eikonal2d +from adloc.inversion import optimize_dd + +# from args import parse_args +from matplotlib import pyplot as plt +from plotting import plotting_dd +from pyproj import Proj +from torch.distributed import init_process_group +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.data import DataLoader +from tqdm.auto import tqdm + +torch.manual_seed(0) +np.random.seed(0) + + +# %% +if __name__ == "__main__": + + # %% + # args = parse_args() + # root_path = args.root_path + # region = args.region + root_path = "local" + region = "hinet" + data_path = f"{root_path}/{region}/cctorch" + result_path = f"{root_path}/{region}/adloc_dd" + figure_path = f"{root_path}/{region}/adloc_dd/figures" + if not os.path.exists(data_path): + os.makedirs(data_path) + if not os.path.exists(result_path): + os.makedirs(result_path) + + # %% + ddp = int(os.environ.get("RANK", -1)) != -1 # is this a ddp run? + if ddp: + init_process_group(backend="gloo") + ddp_rank = int(os.environ["RANK"]) + ddp_local_rank = int(os.environ["LOCAL_RANK"]) + ddp_world_size = int(os.environ["WORLD_SIZE"]) + master_process = ddp_rank == 0 # this process will do logging, checkpointing etc. + print(f"DDP rank {ddp_rank}, local rank {ddp_local_rank}, world size {ddp_world_size}") + else: + # vanilla, non-DDP run + ddp_rank = 0 + ddp_local_rank = 0 + ddp_world_size = 1 + master_process = True + print("Non-DDP run") + + # %% reading from the generated files + events = pd.read_csv(os.path.join(data_path, "cctorch_events.csv")) + events["event_time"] = pd.to_datetime(events["event_time"]) + events.rename(columns={"event_time": "time"}, inplace=True) + stations = pd.read_csv(os.path.join(data_path, "cctorch_stations.csv")) + picks = pd.read_csv(os.path.join(data_path, "cctorch_picks.csv"), parse_dates=["phase_time"]) + pairs = pd.read_csv(os.path.join(data_path, "dtcc.csv")) + + with open(f"{root_path}/{region}/config.json", "r") as fp: + config = json.load(fp) + print(json.dumps(config, indent=4)) + config["use_amplitude"] = True + + # ## Eikonal for 1D velocity model + zz = [0.0, 5.5, 16.0, 32.0] + vp = [5.5, 5.5, 6.7, 7.8] + vp_vs_ratio = 1.73 + vs = [v / vp_vs_ratio for v in vp] + h = 0.3 + + # %% + if not os.path.exists(result_path): + os.makedirs(result_path) + if not os.path.exists(figure_path): + os.makedirs(figure_path) + + # %% + ## Automatic region; you can also specify a region + # lon0 = stations["longitude"].median() + # lat0 = stations["latitude"].median() + lat0 = (config["minlatitude"] + config["maxlatitude"]) / 2 + lon0 = (config["minlongitude"] + config["maxlongitude"]) / 2 + proj = Proj(f"+proj=sterea +lon_0={lon0} +lat_0={lat0} +units=km") + + stations["x_km"], stations["y_km"] = proj(stations["longitude"], stations["latitude"]) + stations["z_km"] = stations["depth_km"] + events["time"] = pd.to_datetime(events["time"]) + events["x_km"], events["y_km"] = proj(events["longitude"], events["latitude"]) + events["z_km"] = events["depth_km"] + + events_init = events.copy() + + ## set up the config; you can also specify the region manually + if ("xlim_km" not in config) or ("ylim_km" not in config) or ("zlim_km" not in config): + xmin, ymin = proj(config["minlongitude"], config["minlatitude"]) + xmax, ymax = proj(config["maxlongitude"], config["maxlatitude"]) + # zmin, zmax = config["mindepth"], config["maxdepth"] + zmin = config["mindepth"] if "mindepth" in config else 0.0 + zmax = config["maxdepth"] if "maxdepth" in config else 30.0 + config["xlim_km"] = (xmin, xmax) + config["ylim_km"] = (ymin, ymax) + config["zlim_km"] = (zmin, zmax) + + mapping_phase_type_int = {"P": 0, "S": 1} + config["vel"] = {"P": 6.0, "S": 6.0 / 1.73} + config["vel"] = {mapping_phase_type_int[k]: v for k, v in config["vel"].items()} + + # %% + config["eikonal"] = None + ## Eikonal for 1D velocity model + # zz = [0.0, 5.5, 16.0, 32.0] + # vp = [5.5, 5.5, 6.7, 7.8] + # vp_vs_ratio = 1.73 + # vs = [v / vp_vs_ratio for v in vp] + # zz = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 30.0] + # vp = [4.746, 4.793, 4.799, 5.045, 5.721, 5.879, 6.504, 6.708, 6.725, 7.800] + # vs = [2.469, 2.470, 2.929, 2.930, 3.402, 3.403, 3.848, 3.907, 3.963, 4.500] + # h = 0.3 + vel = {"Z": zz, "P": vp, "S": vs} + config["eikonal"] = { + "vel": vel, + "h": h, + "xlim_km": config["xlim_km"], + "ylim_km": config["ylim_km"], + "zlim_km": config["zlim_km"], + } + config["eikonal"] = init_eikonal2d(config["eikonal"]) + + # %% + config["bfgs_bounds"] = ( + (config["xlim_km"][0] - 1, config["xlim_km"][1] + 1), # x + (config["ylim_km"][0] - 1, config["ylim_km"][1] + 1), # y + (0, config["zlim_km"][1] + 1), + (None, None), # t + ) + + # %% + phase_dataset = PhaseDatasetDTCC(pairs, picks, events, stations, rank=ddp_local_rank, world_size=ddp_world_size) + data_loader = DataLoader(phase_dataset, batch_size=None, shuffle=False, num_workers=0, drop_last=False) + + # %% + num_event = len(events) + num_station = len(stations) + station_loc = stations[["x_km", "y_km", "z_km"]].values + event_loc = events[["x_km", "y_km", "z_km"]].values # + np.random.randn(num_event, 3) * 2.0 + travel_time = TravelTimeDD( + num_event, + num_station, + station_loc=station_loc, + event_loc=event_loc, + # event_time=event_time, + eikonal=config["eikonal"], + ) + if ddp: + travel_time = DDP(travel_time) + raw_travel_time = travel_time.module if ddp else travel_time + + if ddp_local_rank == 0: + print(f"Dataset: {len(events)} events, {len(stations)} stations, {len(data_loader)} batches") + + ## invert loss + ###################################################################################################### + optimizer = optim.Adam(params=travel_time.parameters(), lr=0.1) + valid_index = np.ones(len(pairs), dtype=bool) + EPOCHS = 100 + for epoch in range(EPOCHS): + loss = 0 + optimizer.zero_grad() + # for meta in tqdm(phase_dataset, desc=f"Epoch {i}"): + for meta in data_loader: + out = travel_time( + meta["idx_sta"], + meta["idx_eve"], + meta["phase_type"], + meta["phase_time"], + meta["phase_weight"], + ) + pred_, loss_ = out["phase_time"], out["loss"] + + loss_.backward() + + if ddp: + dist.all_reduce(loss_, op=dist.ReduceOp.SUM) + # loss_ /= ddp_world_size + + loss += loss_ + + # torch.nn.utils.clip_grad_norm_(travel_time.parameters(), 1.0) + optimizer.step() + with torch.no_grad(): + raw_travel_time.event_loc.weight.data[:, 2].clamp_( + min=config["zlim_km"][0] + 0.1, max=config["zlim_km"][1] - 0.1 + ) + raw_travel_time.event_loc.weight.data[torch.isnan(raw_travel_time.event_loc.weight)] = 0.0 + if ddp_local_rank == 0: + print(f"Epoch {epoch}: loss {loss:.6e} of {np.sum(valid_index)} picks, {loss / np.sum(valid_index):.6e}") + + ### filtering + pred_time = [] + phase_dataset.valid_index = np.ones(len(pairs), dtype=bool) + for meta in phase_dataset: + meta = travel_time( + meta["idx_sta"], + meta["idx_eve"], + meta["phase_type"], + meta["phase_time"], + meta["phase_weight"], + ) + pred_time.append(meta["phase_time"].detach().numpy()) + + pred_time = np.concatenate(pred_time) + valid_index = ( + np.abs(pred_time - pairs["dt"]) < np.std((pred_time - pairs["dt"])[valid_index]) * 3.0 + ) # * (np.cos(epoch * np.pi / EPOCHS) + 2.0) # 3std -> 1std + + pairs_df = pd.DataFrame( + { + "event_index1": pairs["idx_eve1"], + "event_index2": pairs["idx_eve2"], + "station_index": pairs["idx_sta"], + } + ) + pairs_df = pairs_df[valid_index] + config["MIN_OBS"] = 8 + pairs_df = pairs_df.groupby(["event_index1", "event_index2"], as_index=False, group_keys=False).filter( + lambda x: len(x) >= config["MIN_OBS"] + ) + valid_index = np.zeros(len(pairs), dtype=bool) + valid_index[pairs_df.index] = True + + phase_dataset.valid_index = valid_index + + invert_event_loc = raw_travel_time.event_loc.weight.clone().detach().numpy() + invert_event_time = raw_travel_time.event_time.weight.clone().detach().numpy() + valid_event_index = np.unique(pairs["idx_eve1"][valid_index]) + valid_event_index = np.concatenate( + [np.unique(pairs["idx_eve1"][valid_index]), np.unique(pairs["idx_eve2"][valid_index])] + ) + valid_event_index = np.sort(np.unique(valid_event_index)) + + if ddp_local_rank == 0 and (epoch % 10 == 0): + events = events_init.copy() + events["time"] = events["time"] + pd.to_timedelta(np.squeeze(invert_event_time), unit="s") + events["x_km"] = invert_event_loc[:, 0] + events["y_km"] = invert_event_loc[:, 1] + events["z_km"] = invert_event_loc[:, 2] + events[["longitude", "latitude"]] = events.apply( + lambda x: pd.Series(proj(x["x_km"], x["y_km"], inverse=True)), axis=1 + ) + events["depth_km"] = events["z_km"] + events = events.iloc[valid_event_index] + events.to_csv( + f"{result_path}/adloc_dtcc_events_{epoch//10}.csv", + index=False, + float_format="%.5f", + date_format="%Y-%m-%dT%H:%M:%S.%f", + ) + plotting_dd(events, stations, config, figure_path, events_init, suffix=f"_ddcc_{epoch//10}") + + # ###################################################################################################### + # optimizer = optim.LBFGS(params=raw_travel_time.parameters(), max_iter=10, line_search_fn="strong_wolfe") + + # def closure(): + # optimizer.zero_grad() + # loss = 0 + # # for meta in tqdm(phase_dataset, desc=f"BFGS"): + # if ddp_local_rank == 0: + # print(f"BFGS: ", end="") + # for meta in phase_dataset: + # if ddp_local_rank == 0: + # print(".", end="") + + # loss_ = travel_time( + # meta["idx_sta"], + # meta["idx_eve"], + # meta["phase_type"], + # meta["phase_time"], + # meta["phase_weight"], + # )["loss"] + # loss_.backward() + + # if ddp: + # dist.all_reduce(loss_, op=dist.ReduceOp.SUM) + # # loss_ /= ddp_world_size + + # loss += loss_ + + # if ddp_local_rank == 0: + # print(f"Loss: {loss}") + # raw_travel_time.event_loc.weight.data[:, 2].clamp_(min=config["zlim_km"][0], max=config["zlim_km"][1]) + # return loss + + # optimizer.step(closure) + # ###################################################################################################### + + # %% + if ddp_local_rank == 0: + + plotting_dd(events, stations, config, figure_path, events_init, suffix="_dtcc") + + invert_event_loc = raw_travel_time.event_loc.weight.clone().detach().numpy() + invert_event_time = raw_travel_time.event_time.weight.clone().detach().numpy() + + events = events_init.copy() + events["time"] = events["time"] + pd.to_timedelta(np.squeeze(invert_event_time), unit="s") + events["x_km"] = invert_event_loc[:, 0] + events["y_km"] = invert_event_loc[:, 1] + events["z_km"] = invert_event_loc[:, 2] + events[["longitude", "latitude"]] = events.apply( + lambda x: pd.Series(proj(x["x_km"], x["y_km"], inverse=True)), axis=1 + ) + events["depth_km"] = events["z_km"] + events = events.iloc[valid_event_index] + events.to_csv( + f"{result_path}/adloc_dtcc_events.csv", index=False, float_format="%.5f", date_format="%Y-%m-%dT%H:%M:%S.%f" + ) diff --git a/examples/japan/run_growclust_cc.py b/examples/japan/run_growclust_cc.py new file mode 100644 index 0000000..546fed8 --- /dev/null +++ b/examples/japan/run_growclust_cc.py @@ -0,0 +1,65 @@ +# %% +import os +from datetime import datetime + +import pandas as pd +from tqdm import tqdm + +# %% +root_path = "local" +region = "hinet" +result_path = f"{region}/growclust" +if not os.path.exists(f"{root_path}/{result_path}"): + os.makedirs(f"{root_path}/{result_path}") + +# %% +# stations_json = f"{region}/results/data/stations.json" +# stations = pd.read_json(f"{root_path}/{stations_json}", orient="index") +# station_csv = f"{region}/adloc/ransac_stations.csv" +station_csv = f"{region}/cctorch/cctorch_stations.csv" +stations = pd.read_csv(f"{root_path}/{station_csv}") +stations["station"] = stations["station_id"].apply(lambda x: x.split(".")[2]) +stations.set_index("station_id", inplace=True) + + +lines = [] +for i, row in stations.iterrows(): + # line = f"{row['network']}{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n" + line = f"{row['station']:<4} {row['latitude']:.4f} {row['longitude']:.4f}\n" + lines.append(line) + +with open(f"{root_path}/{result_path}/stlist.txt", "w") as fp: + fp.writelines(lines) + + +# %% +# events_csv = f"{region}/results/phase_association/events.csv" +# events_csv = f"{region}/adloc/ransac_events.csv" +events_csv = f"{region}/cctorch/cctorch_events.csv" +# event_file = f"{region}/cctorch/events.csv" +events = pd.read_csv(f"{root_path}/{events_csv}") +# event_df = event_df[event_df["gamma_score"] > 10] +# event_index = [f"{x:06d}" for x in event_df["event_index"]] +events["time"] = pd.to_datetime(events["event_time"]) +if "magnitude" not in events.columns: + events["magnitude"] = 0.0 + +events[["year", "month", "day", "hour", "minute", "second"]] = ( + events["time"] + # .apply(lambda x: datetime.fromisoformat(x).strftime("%Y %m %d %H %M %S.%f").split(" ")) + .apply(lambda x: x.strftime("%Y %m %d %H %M %S.%f").split(" ")) + .apply(pd.Series) + .apply(pd.to_numeric) +) + +lines = [] +for i, row in events.iterrows(): + # yr mon day hr min sec lat lon dep mag eh ez rms evid + line = f"{row['year']:4d} {row['month']:2d} {row['day']:2d} {row['hour']:2d} {row['minute']:2d} {row['second']:7.3f} {row['latitude']:.4f} {row['longitude']:.4f} {row['depth_km']:7.3f} {row['magnitude']:.2f} 0.000 0.000 0.000 {row['event_index']:6d}\n" + lines.append(line) + +with open(f"{root_path}/{result_path}/evlist.txt", "w") as fp: + fp.writelines(lines) + +# %% +os.system(f"bash run_growclust_cc.sh {root_path} {region}") diff --git a/examples/japan/run_growclust_cc.sh b/examples/japan/run_growclust_cc.sh new file mode 100644 index 0000000..8154841 --- /dev/null +++ b/examples/japan/run_growclust_cc.sh @@ -0,0 +1,119 @@ +#!/bin/bash +set -x +WORKING_DIR=$PWD +if [ $# -eq 2 ]; then + root_path=$1 + region=$2 +else + root_path="local" + region="demo" +fi + +if [ ! -d "$root_path/$region/growclust" ]; then + mkdir -p $root_path/$region/growclust +fi + +cp $root_path/$region/cctorch/dt.cc $root_path/$region/growclust/dt.cc +cd $root_path/$region/growclust +mkdir -p TT OUT + +if [ ! -d "GrowClust" ]; then + git clone git@github.com:zhuwq0/GrowClust.git + make -C GrowClust/SRC/ +fi + +cat < growclust.inp +**** Example GrowClust Control File ***** +******** Daniel Trugman, 2016 ********** +******************************************* +* +******************************************* +************* Event list **************** +******************************************* +* evlist_fmt (0 = evlist, 1 = phase, 2 = GrowClust, 3 = HypoInverse) +1 +* fin_evlist (event list file name) +evlist.txt +* +******************************************* +************ Station list ************* +******************************************* +* stlist_fmt (0 = SEED channel, 1 = station name) +1 +* fin_stlist (station list file name) +stlist.txt +* +******************************************* +************* XCOR data *************** +******************************************* +* xcordat_fmt (0 = binary, 1 = text), tdif_fmt (21 = tt2-tt1, 12 = tt1-tt2) +1 12 +* fin_xcordat +dt.cc +* +******************************************* +*** Velocity Model / Travel Time Tables *** +******************************************* +* fin_vzmdl (input vz model file) +vzmodel.txt +* fout_vzfine (output, interpolated vz model file) +TT/vzfine.txt +* fout_pTT (output travel time table, P phase) +TT/tt.pg +* fout_sTT (output travel time table, S phase) +TT/tt.sg +* +****************************************** +***** Travel Time Table Parameters ****** +****************************************** +* vpvs_factor rayparam_min (-1 = default) + 1.732 0.0 +* tt_dep0 tt_dep1 tt_ddep + 0. 71. 1. +* tt_del0 tt_del1 tt_ddel + 0. 500. 2. +* +****************************************** +***** GrowClust Algorithm Parameters ***** +****************************************** +* rmin delmax rmsmax + 0.1 120 1.0 +* rpsavgmin, rmincut ngoodmin iponly + 0 0 8 0 +* +****************************************** +************ Output files **************** +****************************************** +* nboot nbranch_min + 0 1 +* fout_cat (relocated catalog) +OUT/out.growclust_cc_cat +* fout_clust (relocated cluster file) +OUT/out.growclust_cc_clust +* fout_log (program log) +OUT/out.growclust_cc_log +* fout_boot (bootstrap distribution) +OUT/out.growclust_cc_boot +****************************************** +****************************************** +EOF + +cat < vzmodel.txt +0.0 5.30 0.00 +1.0 5.65 0.00 +3.0 5.93 0.00 +5.0 6.20 0.00 +7.0 6.20 0.00 +9.0 6.20 0.00 +11.0 6.20 0.00 +13.0 6.20 0.00 +17.0 6.20 0.00 +21.0 6.20 0.00 +31.00 7.50 0.00 +31.10 8.11 0.00 +100.0 8.11 0.00 +EOF + +./GrowClust/SRC/growclust growclust.inp +cp OUT/out.growclust_cc_cat growclust_cc_catalog.txt +cd $WORKING_DIR diff --git a/examples/japan/run_hypodd_cc.sh b/examples/japan/run_hypodd_cc.sh index 9b33a01..79b0686 100644 --- a/examples/japan/run_hypodd_cc.sh +++ b/examples/japan/run_hypodd_cc.sh @@ -78,10 +78,10 @@ hypodd.src * DAMP: damping (for lsqr only) * --- CROSS DATA ----- ----CATALOG DATA ---- * NITER WTCCP WTCCS WRCC WDCC WTCTP WTCTS WRCT WDCT DAMP - 4 1 1 -9 -9 -9 -9 -9 -9 70 - 4 1 1 6 -9 -9 -9 -9 -9 70 - 4 1 0.8 3 4 -9 -9 -9 -9 70 - 4 1 0.8 2 2 -9 -9 -9 -9 70 + 4 1 1 -9 -9 -9 -9 -9 -9 170 + 4 1 1 6 -9 -9 -9 -9 -9 170 + 4 1 0.8 3 4 -9 -9 -9 -9 170 + 4 1 0.8 2 2 -9 -9 -9 -9 170 * *--- 1D model: * NLAY: number of model layers