Skip to content

Commit

Permalink
- Define a get() method on ROMSInputDataset that:
Browse files Browse the repository at this point in the history
   - calls super.get()
   - detects if source is yaml (returns if not)
   - makes a local copy of the yaml to modify
   - modifies the yaml with a dictionary of entries to replace
   - imports roms-tools and creates the object from modified yaml
   - saves to netCDF
- Remove roms/utils.py as logic of _modify_roms_tools_yaml is now in ROMSInputDataset.get()
- Change default path of InputDataset.get() to be exactly the local_dir argument, not a subdir &
- Change the call from Case.setup() to provide the correct subdir as local_dir argument to get()
  • Loading branch information
“Dafydd committed Sep 11, 2024
1 parent 9dfc68c commit c689c2f
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 61 deletions.
15 changes: 7 additions & 8 deletions cstar/base/input_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,8 @@ def get(self, local_dir: str | Path) -> None:
"""
local_dir = Path(local_dir).resolve()

tgt_dir = local_dir / f"input_datasets/{self.base_model.name}/"
tgt_dir.mkdir(parents=True, exist_ok=True)
tgt_path = tgt_dir / str(self.source.basename)
local_dir.mkdir(parents=True, exist_ok=True)
tgt_path = local_dir / str(self.source.basename)

# If the file is somewhere else on the system, make a symbolic link where we want it
if self.exists_locally:
Expand All @@ -156,11 +155,11 @@ def get(self, local_dir: str | Path) -> None:
if tgt_path.exists():
raise FileExistsError(
f"A file by the name of {self.source.basename}"
+ f"already exists at {tgt_dir}."
+ f"already exists at {local_dir}."
)
# TODO maybe this should check the hash and just `return` if it matches?
else:
# QUESTION: Should this now update self.local_path to point to the symlink? 20240827 - YES
# Create a symlink and update the local path attribute
tgt_path.symlink_to(self.local_path)
self.local_path = tgt_path
return
Expand All @@ -172,15 +171,15 @@ def get(self, local_dir: str | Path) -> None:
# NOTE: default timeout was leading to a lot of timeouterrors
downloader = pooch.HTTPDownloader(timeout=120)
to_fetch = pooch.create(
path=tgt_dir,
# FIXME Cannot find a urllib equivalent to this:
path=local_dir,
# urllib equivalent to Path.parent
base_url=urljoin(self.source.location, "."),
registry={self.source.basename: self.file_hash},
)

to_fetch.fetch(self.source.basename, downloader=downloader)
self.exists_locally = True
self.local_path = tgt_dir / self.source.basename
self.local_path = local_dir / self.source.basename

def check_exists_locally(self, local_dir: str | Path) -> bool:
"""
Expand Down
6 changes: 4 additions & 2 deletions cstar/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,9 @@ def check_is_setup(self) -> bool:
if component.input_datasets is None:
continue
for inp in component.input_datasets:
if not inp.check_exists_locally(self.caseroot):
if not inp.check_exists_locally(
self.caseroot / f"input_datasets/{inp.base_model.name}"
):
# If it can't be found locally, check whether it should by matching dataset dates with simulation dates:
if (not isinstance(inp.start_date, dt.datetime)) or (
not isinstance(inp.end_date, dt.datetime)
Expand Down Expand Up @@ -676,7 +678,7 @@ def setup(self) -> None:
or (inp.start_date <= self.end_date)
and (self.end_date >= self.start_date)
):
inp.get(self.caseroot)
inp.get(self.caseroot / f"input_datasets/{inp.base_model.name}")

self.is_setup = True

Expand Down
87 changes: 62 additions & 25 deletions cstar/roms/input_dataset.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import pooch
import yaml
import shutil

from abc import ABC
from pathlib import Path
from typing import Optional
from cstar.base.input_dataset import InputDataset
from cstar.roms.utils import _modify_roms_tools_yaml


class ROMSInputDataset(InputDataset, ABC):
Expand All @@ -20,43 +20,80 @@ class ROMSInputDataset(InputDataset, ABC):
"""
) + (InputDataset.__doc__ or "")


class ROMSModelGrid(ROMSInputDataset):
"""
An implementation of the ROMSInputDataset class for model grid files.
"""

def get(
self,
local_dir: str | Path,
yaml_entries_to_modify: dict = {},
np_xi: Optional[int] = None,
np_eta: Optional[int] = None,
) -> None:
"""
docstring
"""
# Ensure we're working with a Path object
local_dir = Path(local_dir)
if self.source.source_type == "yaml":
local_file = local_dir / Path(self.source.location).stem

if self.source.location_type == "url":
pooch.retrieve(
self.source.location, known_hash=self.file_hash, path=local_file
# First, get the file as usual
super().get(local_dir)

# If it's not a yaml, we're done
if self.source.source_type != "yaml":
return

# If it is a yaml, first make sure that the local copy is not a symlink
# (as InputDataset.get() symlinks files that didn't need to be downloaded)
yaml_file = local_dir / Path(self.source.location).name
if yaml_file.is_symlink():
linkpath = yaml_file.resolve()
yaml_file.unlink()
shutil.copy2(linkpath, yaml_file)
yaml_file = linkpath

# Now modify the local copy of the yaml file as needed:
with open(yaml_file, "r") as F:
_, header, yaml_data = F.read().split("---", 2)
yaml_dict = yaml.safe_load(yaml_data)

roms_tools_class_name = list(yaml_dict.keys())[-1]

for key, value in yaml_entries_to_modify.items():
if key in yaml_dict[roms_tools_class_name].keys():
yaml_dict[roms_tools_class_name][key] = value
else:
raise ValueError(
f"Cannot replace entry {key} in "
+ f"roms_tools yaml file {yaml_file} under {roms_tools_class_name}. "
+ "No such entry."
)
yaml_location = local_file
elif self.source.location_type == "path":
yaml_location = Path(self.source.location)

import roms_tools as rt
with open(yaml_file, "w") as F:
F.write(f"---{header}---\n" + yaml.dump(yaml_dict))

# Copy the yaml
_modify_roms_tools_yaml(
input_file=yaml_location, output_file=local_file, new_entries={}
)
# Finally, make a roms-tools object from the modified yaml
import roms_tools

roms_grd = rt.Grid.from_yaml(self.source.location)
roms_grd.save(local_file.stem, np_xi=np_xi, np_eta=np_eta)
self.local_path = local_file
roms_tools_class = getattr(roms_tools, roms_tools_class_name)
roms_tools_class_instance = roms_tools_class.from_yaml(self.source.location)

# ... and save:
if (np_eta is not None) and (np_xi is not None):
roms_tools_class_instance.save(
local_dir / "PARTITIONED" / yaml_file.stem, np_xi=np_xi, np_eta=np_eta
)
parted_dir = yaml_file.parent / "PARTITIONED"
self.local_partitioned_files = list(
parted_dir.glob(f"{yaml_file.stem}.*.nc")
)
else:
super().get(local_dir)
savepath = Path(f"{local_dir/yaml_file.stem}.nc")
roms_tools_class_instance.save(savepath)
self.local_path = savepath


class ROMSModelGrid(ROMSInputDataset):
"""
An implementation of the ROMSInputDataset class for model grid files.
"""

pass

Expand Down
26 changes: 0 additions & 26 deletions cstar/roms/utils.py

This file was deleted.

0 comments on commit c689c2f

Please sign in to comment.