update qtm

AI4EPS · Aug 26, 2024 · d16f2a1 · d16f2a1
1 parent edfeb61
commit d16f2a1
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 31 deletions.
diff --git a/scripts/cut_templates_cc.py b/scripts/cut_templates_cc.py
@@ -306,6 +306,7 @@ def cut_templates(root_path, region, config):
     min_cc_score = 0.5
     min_obs = 8
     max_obs = 100
+    component_mapping = {"3": 0, "2": 1, "1": 2, "E": 0, "N": 1, "Z": 2}
 
     config.update(
         {
@@ -322,14 +323,13 @@ def cut_templates(root_path, region, config):
             "min_obs": min_obs,
             "max_obs": max_obs,
             "sampling_rate": sampling_rate,
+            "component_mapping": component_mapping,
         }
     )
 
     # %%
     stations = pd.read_csv(f"{root_path}/{data_path}/ransac_stations_sst.csv")
-    stations = stations[stations["network"] == "CI"]
     stations.sort_values(by=["latitude", "longitude"], inplace=True)
-    # stations = stations[stations["station_id"].isin(picks["station_id"].unique())]
     print(f"{len(stations) = }")
     print(stations.iloc[:5])
 

diff --git a/scripts/run_qtm.py b/scripts/run_qtm.py
@@ -24,29 +24,27 @@ def parse_args():
 root_path = args.root_path
 region = args.region
 
-output_path = f"{region}/qtm"
-if not os.path.exists(f"{root_path}/{output_path}"):
-    os.makedirs(f"{root_path}/{output_path}")
+result_path = f"{region}/qtm"
+if not os.path.exists(f"{root_path}/{result_path}"):
+    os.makedirs(f"{root_path}/{result_path}")
 
 # %%
 with open(f"{root_path}/{region}/config.json", "r") as fp:
     config = json.load(fp)
 
 # %% Get mseed list
-mseed_list = sorted(glob(f"{root_path}/{region}/waveforms/????-???/??/*.mseed"))
-folder_level = 3  # year-jday/hour/station_id.mseed
+mseed_list = sorted(glob(f"{root_path}/{region}/waveforms/????/???/??/*.mseed"))
+subdir = 4  # year/jday/hour/station_id.mseed
 mseeds = pd.DataFrame(mseed_list, columns=["fname"])
-mseeds["mseed_station_id"] = mseeds["fname"].apply(
-    lambda x: "/".join(x.replace(".mseed", "").split("/")[-folder_level:])[:-1]
-)
+mseeds["mseed_id"] = mseeds["fname"].apply(lambda x: "/".join(x.replace(".mseed", "").split("/")[-subdir:])[:-1])
 mseeds["station_id"] = mseeds["fname"].apply(lambda x: x.replace(".mseed", "").split("/")[-1][:-1])
 mseeds["begin_time"] = mseeds["fname"].apply(
-    lambda x: datetime.strptime("T".join(x.split("/")[-folder_level : -folder_level + 2]), "%Y-%jT%H").strftime(
-        "%Y-%m-%dT%H:%M:%S.%f"
-    )
+    lambda x: datetime.strptime(
+        f"{x.split('/')[-subdir]}-{x.split('/')[-subdir+1]}T{x.split('/')[-subdir+2]}", "%Y-%jT%H"
+    ).strftime("%Y-%m-%dT%H:%M:%S.%f")
 )
 mseeds = (
-    mseeds.groupby("mseed_station_id")
+    mseeds.groupby("mseed_id")
     .agg(
         {
             "station_id": lambda x: ",".join(x.unique()),
@@ -56,27 +54,27 @@ def parse_args():
     )
     .reset_index()
 )
+mseeds["idx_mseed"] = np.arange(len(mseeds))
 mseeds.to_csv(f"{root_path}/{region}/qtm/mseed_list.csv", index=False)
 with open(f"{root_path}/{region}/qtm/mseed_list.txt", "w") as fp:
     fp.write("\n".join(mseeds["fname"]))
 
 # %%
-with open(f"{root_path}/{region}/qtm/event_phase_station_id.txt", "r") as fp:
-    event_phase_station_id = fp.read().splitlines()
-
-event_phase_station_id = pd.DataFrame(event_phase_station_id, columns=["event_phase_station_id"])
-event_phase_station_id[["event_index", "phase_type", "station_id"]] = event_phase_station_id[
-    "event_phase_station_id"
-].str.split("/", expand=True)
+# with open(f"{root_path}/{region}/qtm/event_phase_station_id.txt", "r") as fp:
+#     event_phase_station_id = fp.read().splitlines()
+picks = pd.read_csv(f"{root_path}/{region}/cctorch/cctorch_picks.csv")
+stations = pd.read_csv(f"{root_path}/{region}/cctorch/cctorch_stations.csv")
+picks = picks.merge(stations[["idx_sta", "station_id"]], on="idx_sta")
+print(picks.iloc[:10])
 
 # %% Generate event mseed pairs
 pairs = []
-unique_station_ids = np.intersect1d(mseeds["station_id"].unique(), event_phase_station_id["station_id"].unique())
+unique_station_ids = np.intersect1d(mseeds["station_id"].unique(), picks["station_id"].unique())
 
 for station_id in unique_station_ids:
-    mseed_index = mseeds.loc[mseeds["station_id"] == station_id].index
-    event_phase_station_index = event_phase_station_id.loc[event_phase_station_id["station_id"] == station_id].index
-    pairs.extend(product(mseed_index, event_phase_station_index))
+    mseed_index = mseeds.loc[mseeds["station_id"] == station_id, "idx_mseed"]
+    pick_index = picks.loc[picks["station_id"] == station_id, "idx_pick"]
+    pairs.extend(product(mseed_index, pick_index))
 
 # %%
 with open(f"{root_path}/{region}/qtm/pairs.txt", "w") as fp:
@@ -85,21 +83,28 @@ def parse_args():
 ## based on GPU memory
 batch = 256
 block_size1 = 1
-block_size2 = 10_000  # ~7GB
+block_size2 = 100_000  # ~7GB
 
 # %%
-base_cmd = f"../CCTorch/run.py --mode=TM --pair_list={root_path}/{region}/qtm/pairs.txt --data_list1={root_path}/{region}/qtm/mseed_list.txt --data_format1=mseed --data_path2={root_path}/{region}/qtm/template.dat --data_format2=memmap --config={root_path}/{region}/qtm/config.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} --normalize --reduce_c  --result_path={root_path}/{region}/qtm/ccpairs"
+base_cmd = (
+    f"../CCTorch/run.py --mode=TM --pair_list={root_path}/{region}/qtm/pairs.txt "
+    f"--data_list1={root_path}/{region}/qtm/mseed_list.txt --data_format1=mseed "
+    f"--data_list2={root_path}/{region}/cctorch/cctorch_picks.csv --data_path2={root_path}/{region}/cctorch/template.dat --data_format2=memmap "
+    f"--config={root_path}/{region}/cctorch/config.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} --normalize --reduce_c --result_path={root_path}/{region}/qtm/ccpairs"
+)
 
 # %%
 num_gpu = torch.cuda.device_count()
 if num_gpu == 0:
     if os.uname().sysname == "Darwin":
-        os.system(f"python {base_cmd} --device=mps")
+        cmd = f"python {base_cmd} --device=mps"
     else:
-        os.system(f"python {base_cmd} --device=cpu")
+        cmd = f"python {base_cmd} --device=cpu"
 elif num_gpu == 1:
-    os.system(f"python {base_cmd}")
+    cmd = f"python {base_cmd}"
 else:
-    os.system(f"torchrun --standalone --nproc_per_node {num_gpu} {base_cmd}")
+    cmd = f"torchrun --standalone --nproc_per_node {num_gpu} {base_cmd}"
 
 # %%
+print(cmd)
+os.system(cmd)