diff --git a/scripts/cut_templates_cc.py b/scripts/cut_templates_cc.py index 7c81259..6951108 100644 --- a/scripts/cut_templates_cc.py +++ b/scripts/cut_templates_cc.py @@ -306,6 +306,7 @@ def cut_templates(root_path, region, config): min_cc_score = 0.5 min_obs = 8 max_obs = 100 + component_mapping = {"3": 0, "2": 1, "1": 2, "E": 0, "N": 1, "Z": 2} config.update( { @@ -322,14 +323,13 @@ def cut_templates(root_path, region, config): "min_obs": min_obs, "max_obs": max_obs, "sampling_rate": sampling_rate, + "component_mapping": component_mapping, } ) # %% stations = pd.read_csv(f"{root_path}/{data_path}/ransac_stations_sst.csv") - stations = stations[stations["network"] == "CI"] stations.sort_values(by=["latitude", "longitude"], inplace=True) - # stations = stations[stations["station_id"].isin(picks["station_id"].unique())] print(f"{len(stations) = }") print(stations.iloc[:5]) diff --git a/scripts/run_qtm.py b/scripts/run_qtm.py index a1f61b6..447f29d 100644 --- a/scripts/run_qtm.py +++ b/scripts/run_qtm.py @@ -24,29 +24,27 @@ def parse_args(): root_path = args.root_path region = args.region -output_path = f"{region}/qtm" -if not os.path.exists(f"{root_path}/{output_path}"): - os.makedirs(f"{root_path}/{output_path}") +result_path = f"{region}/qtm" +if not os.path.exists(f"{root_path}/{result_path}"): + os.makedirs(f"{root_path}/{result_path}") # %% with open(f"{root_path}/{region}/config.json", "r") as fp: config = json.load(fp) # %% Get mseed list -mseed_list = sorted(glob(f"{root_path}/{region}/waveforms/????-???/??/*.mseed")) -folder_level = 3 # year-jday/hour/station_id.mseed +mseed_list = sorted(glob(f"{root_path}/{region}/waveforms/????/???/??/*.mseed")) +subdir = 4 # year/jday/hour/station_id.mseed mseeds = pd.DataFrame(mseed_list, columns=["fname"]) -mseeds["mseed_station_id"] = mseeds["fname"].apply( - lambda x: "/".join(x.replace(".mseed", "").split("/")[-folder_level:])[:-1] -) +mseeds["mseed_id"] = mseeds["fname"].apply(lambda x: "/".join(x.replace(".mseed", "").split("/")[-subdir:])[:-1]) mseeds["station_id"] = mseeds["fname"].apply(lambda x: x.replace(".mseed", "").split("/")[-1][:-1]) mseeds["begin_time"] = mseeds["fname"].apply( - lambda x: datetime.strptime("T".join(x.split("/")[-folder_level : -folder_level + 2]), "%Y-%jT%H").strftime( - "%Y-%m-%dT%H:%M:%S.%f" - ) + lambda x: datetime.strptime( + f"{x.split('/')[-subdir]}-{x.split('/')[-subdir+1]}T{x.split('/')[-subdir+2]}", "%Y-%jT%H" + ).strftime("%Y-%m-%dT%H:%M:%S.%f") ) mseeds = ( - mseeds.groupby("mseed_station_id") + mseeds.groupby("mseed_id") .agg( { "station_id": lambda x: ",".join(x.unique()), @@ -56,27 +54,27 @@ def parse_args(): ) .reset_index() ) +mseeds["idx_mseed"] = np.arange(len(mseeds)) mseeds.to_csv(f"{root_path}/{region}/qtm/mseed_list.csv", index=False) with open(f"{root_path}/{region}/qtm/mseed_list.txt", "w") as fp: fp.write("\n".join(mseeds["fname"])) # %% -with open(f"{root_path}/{region}/qtm/event_phase_station_id.txt", "r") as fp: - event_phase_station_id = fp.read().splitlines() - -event_phase_station_id = pd.DataFrame(event_phase_station_id, columns=["event_phase_station_id"]) -event_phase_station_id[["event_index", "phase_type", "station_id"]] = event_phase_station_id[ - "event_phase_station_id" -].str.split("/", expand=True) +# with open(f"{root_path}/{region}/qtm/event_phase_station_id.txt", "r") as fp: +# event_phase_station_id = fp.read().splitlines() +picks = pd.read_csv(f"{root_path}/{region}/cctorch/cctorch_picks.csv") +stations = pd.read_csv(f"{root_path}/{region}/cctorch/cctorch_stations.csv") +picks = picks.merge(stations[["idx_sta", "station_id"]], on="idx_sta") +print(picks.iloc[:10]) # %% Generate event mseed pairs pairs = [] -unique_station_ids = np.intersect1d(mseeds["station_id"].unique(), event_phase_station_id["station_id"].unique()) +unique_station_ids = np.intersect1d(mseeds["station_id"].unique(), picks["station_id"].unique()) for station_id in unique_station_ids: - mseed_index = mseeds.loc[mseeds["station_id"] == station_id].index - event_phase_station_index = event_phase_station_id.loc[event_phase_station_id["station_id"] == station_id].index - pairs.extend(product(mseed_index, event_phase_station_index)) + mseed_index = mseeds.loc[mseeds["station_id"] == station_id, "idx_mseed"] + pick_index = picks.loc[picks["station_id"] == station_id, "idx_pick"] + pairs.extend(product(mseed_index, pick_index)) # %% with open(f"{root_path}/{region}/qtm/pairs.txt", "w") as fp: @@ -85,21 +83,28 @@ def parse_args(): ## based on GPU memory batch = 256 block_size1 = 1 -block_size2 = 10_000 # ~7GB +block_size2 = 100_000 # ~7GB # %% -base_cmd = f"../CCTorch/run.py --mode=TM --pair_list={root_path}/{region}/qtm/pairs.txt --data_list1={root_path}/{region}/qtm/mseed_list.txt --data_format1=mseed --data_path2={root_path}/{region}/qtm/template.dat --data_format2=memmap --config={root_path}/{region}/qtm/config.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} --normalize --reduce_c --result_path={root_path}/{region}/qtm/ccpairs" +base_cmd = ( + f"../CCTorch/run.py --mode=TM --pair_list={root_path}/{region}/qtm/pairs.txt " + f"--data_list1={root_path}/{region}/qtm/mseed_list.txt --data_format1=mseed " + f"--data_list2={root_path}/{region}/cctorch/cctorch_picks.csv --data_path2={root_path}/{region}/cctorch/template.dat --data_format2=memmap " + f"--config={root_path}/{region}/cctorch/config.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} --normalize --reduce_c --result_path={root_path}/{region}/qtm/ccpairs" +) # %% num_gpu = torch.cuda.device_count() if num_gpu == 0: if os.uname().sysname == "Darwin": - os.system(f"python {base_cmd} --device=mps") + cmd = f"python {base_cmd} --device=mps" else: - os.system(f"python {base_cmd} --device=cpu") + cmd = f"python {base_cmd} --device=cpu" elif num_gpu == 1: - os.system(f"python {base_cmd}") + cmd = f"python {base_cmd}" else: - os.system(f"torchrun --standalone --nproc_per_node {num_gpu} {base_cmd}") + cmd = f"torchrun --standalone --nproc_per_node {num_gpu} {base_cmd}" # %% +print(cmd) +os.system(cmd)