Skip to content

Commit

Permalink
update qtm
Browse files Browse the repository at this point in the history
  • Loading branch information
zhuwq0 committed Aug 26, 2024
1 parent edfeb61 commit d16f2a1
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 31 deletions.
4 changes: 2 additions & 2 deletions scripts/cut_templates_cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ def cut_templates(root_path, region, config):
min_cc_score = 0.5
min_obs = 8
max_obs = 100
component_mapping = {"3": 0, "2": 1, "1": 2, "E": 0, "N": 1, "Z": 2}

config.update(
{
Expand All @@ -322,14 +323,13 @@ def cut_templates(root_path, region, config):
"min_obs": min_obs,
"max_obs": max_obs,
"sampling_rate": sampling_rate,
"component_mapping": component_mapping,
}
)

# %%
stations = pd.read_csv(f"{root_path}/{data_path}/ransac_stations_sst.csv")
stations = stations[stations["network"] == "CI"]
stations.sort_values(by=["latitude", "longitude"], inplace=True)
# stations = stations[stations["station_id"].isin(picks["station_id"].unique())]
print(f"{len(stations) = }")
print(stations.iloc[:5])

Expand Down
63 changes: 34 additions & 29 deletions scripts/run_qtm.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,29 +24,27 @@ def parse_args():
root_path = args.root_path
region = args.region

output_path = f"{region}/qtm"
if not os.path.exists(f"{root_path}/{output_path}"):
os.makedirs(f"{root_path}/{output_path}")
result_path = f"{region}/qtm"
if not os.path.exists(f"{root_path}/{result_path}"):
os.makedirs(f"{root_path}/{result_path}")

# %%
with open(f"{root_path}/{region}/config.json", "r") as fp:
config = json.load(fp)

# %% Get mseed list
mseed_list = sorted(glob(f"{root_path}/{region}/waveforms/????-???/??/*.mseed"))
folder_level = 3 # year-jday/hour/station_id.mseed
mseed_list = sorted(glob(f"{root_path}/{region}/waveforms/????/???/??/*.mseed"))
subdir = 4 # year/jday/hour/station_id.mseed
mseeds = pd.DataFrame(mseed_list, columns=["fname"])
mseeds["mseed_station_id"] = mseeds["fname"].apply(
lambda x: "/".join(x.replace(".mseed", "").split("/")[-folder_level:])[:-1]
)
mseeds["mseed_id"] = mseeds["fname"].apply(lambda x: "/".join(x.replace(".mseed", "").split("/")[-subdir:])[:-1])
mseeds["station_id"] = mseeds["fname"].apply(lambda x: x.replace(".mseed", "").split("/")[-1][:-1])
mseeds["begin_time"] = mseeds["fname"].apply(
lambda x: datetime.strptime("T".join(x.split("/")[-folder_level : -folder_level + 2]), "%Y-%jT%H").strftime(
"%Y-%m-%dT%H:%M:%S.%f"
)
lambda x: datetime.strptime(
f"{x.split('/')[-subdir]}-{x.split('/')[-subdir+1]}T{x.split('/')[-subdir+2]}", "%Y-%jT%H"
).strftime("%Y-%m-%dT%H:%M:%S.%f")
)
mseeds = (
mseeds.groupby("mseed_station_id")
mseeds.groupby("mseed_id")
.agg(
{
"station_id": lambda x: ",".join(x.unique()),
Expand All @@ -56,27 +54,27 @@ def parse_args():
)
.reset_index()
)
mseeds["idx_mseed"] = np.arange(len(mseeds))
mseeds.to_csv(f"{root_path}/{region}/qtm/mseed_list.csv", index=False)
with open(f"{root_path}/{region}/qtm/mseed_list.txt", "w") as fp:
fp.write("\n".join(mseeds["fname"]))

# %%
with open(f"{root_path}/{region}/qtm/event_phase_station_id.txt", "r") as fp:
event_phase_station_id = fp.read().splitlines()

event_phase_station_id = pd.DataFrame(event_phase_station_id, columns=["event_phase_station_id"])
event_phase_station_id[["event_index", "phase_type", "station_id"]] = event_phase_station_id[
"event_phase_station_id"
].str.split("/", expand=True)
# with open(f"{root_path}/{region}/qtm/event_phase_station_id.txt", "r") as fp:
# event_phase_station_id = fp.read().splitlines()
picks = pd.read_csv(f"{root_path}/{region}/cctorch/cctorch_picks.csv")
stations = pd.read_csv(f"{root_path}/{region}/cctorch/cctorch_stations.csv")
picks = picks.merge(stations[["idx_sta", "station_id"]], on="idx_sta")
print(picks.iloc[:10])

# %% Generate event mseed pairs
pairs = []
unique_station_ids = np.intersect1d(mseeds["station_id"].unique(), event_phase_station_id["station_id"].unique())
unique_station_ids = np.intersect1d(mseeds["station_id"].unique(), picks["station_id"].unique())

for station_id in unique_station_ids:
mseed_index = mseeds.loc[mseeds["station_id"] == station_id].index
event_phase_station_index = event_phase_station_id.loc[event_phase_station_id["station_id"] == station_id].index
pairs.extend(product(mseed_index, event_phase_station_index))
mseed_index = mseeds.loc[mseeds["station_id"] == station_id, "idx_mseed"]
pick_index = picks.loc[picks["station_id"] == station_id, "idx_pick"]
pairs.extend(product(mseed_index, pick_index))

# %%
with open(f"{root_path}/{region}/qtm/pairs.txt", "w") as fp:
Expand All @@ -85,21 +83,28 @@ def parse_args():
## based on GPU memory
batch = 256
block_size1 = 1
block_size2 = 10_000 # ~7GB
block_size2 = 100_000 # ~7GB

# %%
base_cmd = f"../CCTorch/run.py --mode=TM --pair_list={root_path}/{region}/qtm/pairs.txt --data_list1={root_path}/{region}/qtm/mseed_list.txt --data_format1=mseed --data_path2={root_path}/{region}/qtm/template.dat --data_format2=memmap --config={root_path}/{region}/qtm/config.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} --normalize --reduce_c --result_path={root_path}/{region}/qtm/ccpairs"
base_cmd = (
f"../CCTorch/run.py --mode=TM --pair_list={root_path}/{region}/qtm/pairs.txt "
f"--data_list1={root_path}/{region}/qtm/mseed_list.txt --data_format1=mseed "
f"--data_list2={root_path}/{region}/cctorch/cctorch_picks.csv --data_path2={root_path}/{region}/cctorch/template.dat --data_format2=memmap "
f"--config={root_path}/{region}/cctorch/config.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} --normalize --reduce_c --result_path={root_path}/{region}/qtm/ccpairs"
)

# %%
num_gpu = torch.cuda.device_count()
if num_gpu == 0:
if os.uname().sysname == "Darwin":
os.system(f"python {base_cmd} --device=mps")
cmd = f"python {base_cmd} --device=mps"
else:
os.system(f"python {base_cmd} --device=cpu")
cmd = f"python {base_cmd} --device=cpu"
elif num_gpu == 1:
os.system(f"python {base_cmd}")
cmd = f"python {base_cmd}"
else:
os.system(f"torchrun --standalone --nproc_per_node {num_gpu} {base_cmd}")
cmd = f"torchrun --standalone --nproc_per_node {num_gpu} {base_cmd}"

# %%
print(cmd)
os.system(cmd)

0 comments on commit d16f2a1

Please sign in to comment.