From c689c2feb8d8f2b6b04aa717244a30f146fedf49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CDafydd?= <“dafydd_stephenson@hotmail.com”> Date: Wed, 11 Sep 2024 17:33:21 -0600 Subject: [PATCH] - Define a get() method on ROMSInputDataset that: - calls super.get() - detects if source is yaml (returns if not) - makes a local copy of the yaml to modify - modifies the yaml with a dictionary of entries to replace - imports roms-tools and creates the object from modified yaml - saves to netCDF - Remove roms/utils.py as logic of _modify_roms_tools_yaml is now in ROMSInputDataset.get() - Change default path of InputDataset.get() to be exactly the local_dir argument, not a subdir & - Change the call from Case.setup() to provide the correct subdir as local_dir argument to get() --- cstar/base/input_dataset.py | 15 +++---- cstar/case.py | 6 ++- cstar/roms/input_dataset.py | 87 ++++++++++++++++++++++++++----------- cstar/roms/utils.py | 26 ----------- 4 files changed, 73 insertions(+), 61 deletions(-) delete mode 100644 cstar/roms/utils.py diff --git a/cstar/base/input_dataset.py b/cstar/base/input_dataset.py index f963287..3d2fd36 100644 --- a/cstar/base/input_dataset.py +++ b/cstar/base/input_dataset.py @@ -143,9 +143,8 @@ def get(self, local_dir: str | Path) -> None: """ local_dir = Path(local_dir).resolve() - tgt_dir = local_dir / f"input_datasets/{self.base_model.name}/" - tgt_dir.mkdir(parents=True, exist_ok=True) - tgt_path = tgt_dir / str(self.source.basename) + local_dir.mkdir(parents=True, exist_ok=True) + tgt_path = local_dir / str(self.source.basename) # If the file is somewhere else on the system, make a symbolic link where we want it if self.exists_locally: @@ -156,11 +155,11 @@ def get(self, local_dir: str | Path) -> None: if tgt_path.exists(): raise FileExistsError( f"A file by the name of {self.source.basename}" - + f"already exists at {tgt_dir}." + + f"already exists at {local_dir}." ) # TODO maybe this should check the hash and just `return` if it matches? else: - # QUESTION: Should this now update self.local_path to point to the symlink? 20240827 - YES + # Create a symlink and update the local path attribute tgt_path.symlink_to(self.local_path) self.local_path = tgt_path return @@ -172,15 +171,15 @@ def get(self, local_dir: str | Path) -> None: # NOTE: default timeout was leading to a lot of timeouterrors downloader = pooch.HTTPDownloader(timeout=120) to_fetch = pooch.create( - path=tgt_dir, - # FIXME Cannot find a urllib equivalent to this: + path=local_dir, + # urllib equivalent to Path.parent base_url=urljoin(self.source.location, "."), registry={self.source.basename: self.file_hash}, ) to_fetch.fetch(self.source.basename, downloader=downloader) self.exists_locally = True - self.local_path = tgt_dir / self.source.basename + self.local_path = local_dir / self.source.basename def check_exists_locally(self, local_dir: str | Path) -> bool: """ diff --git a/cstar/case.py b/cstar/case.py index 9fb0755..84039aa 100644 --- a/cstar/case.py +++ b/cstar/case.py @@ -619,7 +619,9 @@ def check_is_setup(self) -> bool: if component.input_datasets is None: continue for inp in component.input_datasets: - if not inp.check_exists_locally(self.caseroot): + if not inp.check_exists_locally( + self.caseroot / f"input_datasets/{inp.base_model.name}" + ): # If it can't be found locally, check whether it should by matching dataset dates with simulation dates: if (not isinstance(inp.start_date, dt.datetime)) or ( not isinstance(inp.end_date, dt.datetime) @@ -676,7 +678,7 @@ def setup(self) -> None: or (inp.start_date <= self.end_date) and (self.end_date >= self.start_date) ): - inp.get(self.caseroot) + inp.get(self.caseroot / f"input_datasets/{inp.base_model.name}") self.is_setup = True diff --git a/cstar/roms/input_dataset.py b/cstar/roms/input_dataset.py index f99a653..2c647b8 100644 --- a/cstar/roms/input_dataset.py +++ b/cstar/roms/input_dataset.py @@ -1,10 +1,10 @@ -import pooch +import yaml +import shutil from abc import ABC from pathlib import Path from typing import Optional from cstar.base.input_dataset import InputDataset -from cstar.roms.utils import _modify_roms_tools_yaml class ROMSInputDataset(InputDataset, ABC): @@ -20,43 +20,80 @@ class ROMSInputDataset(InputDataset, ABC): """ ) + (InputDataset.__doc__ or "") - -class ROMSModelGrid(ROMSInputDataset): - """ - An implementation of the ROMSInputDataset class for model grid files. - """ - def get( self, local_dir: str | Path, + yaml_entries_to_modify: dict = {}, np_xi: Optional[int] = None, np_eta: Optional[int] = None, ) -> None: + """ + docstring + """ + # Ensure we're working with a Path object local_dir = Path(local_dir) - if self.source.source_type == "yaml": - local_file = local_dir / Path(self.source.location).stem - if self.source.location_type == "url": - pooch.retrieve( - self.source.location, known_hash=self.file_hash, path=local_file + # First, get the file as usual + super().get(local_dir) + + # If it's not a yaml, we're done + if self.source.source_type != "yaml": + return + + # If it is a yaml, first make sure that the local copy is not a symlink + # (as InputDataset.get() symlinks files that didn't need to be downloaded) + yaml_file = local_dir / Path(self.source.location).name + if yaml_file.is_symlink(): + linkpath = yaml_file.resolve() + yaml_file.unlink() + shutil.copy2(linkpath, yaml_file) + yaml_file = linkpath + + # Now modify the local copy of the yaml file as needed: + with open(yaml_file, "r") as F: + _, header, yaml_data = F.read().split("---", 2) + yaml_dict = yaml.safe_load(yaml_data) + + roms_tools_class_name = list(yaml_dict.keys())[-1] + + for key, value in yaml_entries_to_modify.items(): + if key in yaml_dict[roms_tools_class_name].keys(): + yaml_dict[roms_tools_class_name][key] = value + else: + raise ValueError( + f"Cannot replace entry {key} in " + + f"roms_tools yaml file {yaml_file} under {roms_tools_class_name}. " + + "No such entry." ) - yaml_location = local_file - elif self.source.location_type == "path": - yaml_location = Path(self.source.location) - import roms_tools as rt + with open(yaml_file, "w") as F: + F.write(f"---{header}---\n" + yaml.dump(yaml_dict)) - # Copy the yaml - _modify_roms_tools_yaml( - input_file=yaml_location, output_file=local_file, new_entries={} - ) + # Finally, make a roms-tools object from the modified yaml + import roms_tools - roms_grd = rt.Grid.from_yaml(self.source.location) - roms_grd.save(local_file.stem, np_xi=np_xi, np_eta=np_eta) - self.local_path = local_file + roms_tools_class = getattr(roms_tools, roms_tools_class_name) + roms_tools_class_instance = roms_tools_class.from_yaml(self.source.location) + # ... and save: + if (np_eta is not None) and (np_xi is not None): + roms_tools_class_instance.save( + local_dir / "PARTITIONED" / yaml_file.stem, np_xi=np_xi, np_eta=np_eta + ) + parted_dir = yaml_file.parent / "PARTITIONED" + self.local_partitioned_files = list( + parted_dir.glob(f"{yaml_file.stem}.*.nc") + ) else: - super().get(local_dir) + savepath = Path(f"{local_dir/yaml_file.stem}.nc") + roms_tools_class_instance.save(savepath) + self.local_path = savepath + + +class ROMSModelGrid(ROMSInputDataset): + """ + An implementation of the ROMSInputDataset class for model grid files. + """ pass diff --git a/cstar/roms/utils.py b/cstar/roms/utils.py deleted file mode 100644 index 29de2c6..0000000 --- a/cstar/roms/utils.py +++ /dev/null @@ -1,26 +0,0 @@ -import yaml -from pathlib import Path - - -def _modify_roms_tools_yaml( - input_file: str | Path, output_file: str | Path, new_entries: dict = {} -): - with open(input_file, "r") as F: - _, header, yaml_data = F.read().split("---", 2) - yaml_dict = yaml.safe_load(yaml_data) - - for key, value in new_entries.items(): - # First level describes roms tools class - rtclasses = yaml_dict.keys() - for rtclass in rtclasses: - if key not in yaml_dict[rtclass].keys(): - raise ValueError( - f"Cannot replace entry {key} in " - + f"roms_tools yaml file {input_file}. " - + "No such entry." - ) - - yaml_dict[rtclass][key] = value - - with open(output_file, "w") as F: - F.write(f"---{header}---\n" + yaml.dump(yaml_dict))