Skip to content

Commit

Permalink
improve output variable names from generate_output_path
Browse files Browse the repository at this point in the history
  • Loading branch information
JessyBarrette committed Aug 4, 2023
1 parent b61fae0 commit 164f555
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 18 deletions.
6 changes: 3 additions & 3 deletions ocean_data_parser/batch/default-batch-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ ioos_qc: {}
manual_qc: {}

# Outputs
file_output:
output:
path: null
source: null
file_name: null
file_preffix: ""
file_suffix: _test
file_suffix: ""
output_format: .nc

upload_to_database:
Expand Down
19 changes: 9 additions & 10 deletions ocean_data_parser/batch/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@

def generate_output_path(
ds: xarray.Dataset,
source: str = None,
path: Union[str, Path] = None,
defaults: dict = None,
file_name: str = None,
file_preffix: str = "",
file_suffix: str = "",
output_format: str = ".nc",
defaults: dict = None,
) -> Path:
"""Generate output path where to save Dataset.
Expand All @@ -42,10 +42,9 @@ def generate_output_path(

# handle defaults
original_source = Path(ds.attrs.get("source")) if ds.attrs.get("source") else None
if source is None and original_source:
source = str(original_source.stem)

if source is None:
if file_name is None and original_source:
file_name = str(original_source.stem)
elif file_name is None:
raise RuntimeError("No output source available. Please define source output.")

if path is None and ds.attrs.get("source"):
Expand Down Expand Up @@ -83,19 +82,19 @@ def generate_output_path(

# Generate path and file name
output_path = Path(path.format(**path_generation_inputs))
source = source.format(**path_generation_inputs)
file_name = file_name.format(**path_generation_inputs)

# Retrieve output_format if given in source

if "." in source and not output_format:
source, output_format = source.rsplit(".", 1)
if "." in file_name and not output_format:
file_name, output_format = file_name.rsplit(".", 1)
assert (
output_format
), "Unknown output file format extension: define the format through the path or output_format inputs"

# Generate path
return Path(output_path) / (
f"{file_preffix or ''}{source}{file_suffix or ''}{output_format}"
f"{file_preffix or ''}{file_name}{file_suffix or ''}{output_format}"
)


Expand Down
28 changes: 23 additions & 5 deletions tests/test_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,28 @@ def test_generate_output_from_source_attribute(self):
assert isinstance(name, Path)
assert str(name) == "source_file.nc"

def test_generate_filename_with_path(self):
@pytest.mark.parametrize(
"input,expected_path",
(
({"path": "output"}, "output/source_file.nc"),
({"file_name": "test"}, "test.nc"),
({"file_preffix": "test_"}, "test_source_file.nc"),
({"file_suffix": "_test"}, "source_file_test.nc"),
),
)
def test_generate_filename_with_unique_input(self, input, expected_path):
name = generate_output_path(
self._get_test_dataset(),
source="{organization}_{instrument}_test",
**input,
output_format=".nc",
)
assert isinstance(name, Path)
assert name == Path(expected_path)

def test_generate_filename_with_file_name(self):
name = generate_output_path(
self._get_test_dataset(),
file_name="{organization}_{instrument}_test",
output_format=".nc",
)
assert isinstance(name, Path)
Expand All @@ -226,7 +244,7 @@ def test_generate_filename_with_path(self):
def test_generate_filename_with_time(self):
name = generate_output_path(
self._get_test_dataset(),
source="{organization}_{instrument}_{time_min:%Y%m%d}-{time_max:%Y%m%d}",
file_name="{organization}_{instrument}_{time_min:%Y%m%d}-{time_max:%Y%m%d}",
output_format=".nc",
)
assert isinstance(name, Path)
Expand All @@ -235,7 +253,7 @@ def test_generate_filename_with_time(self):
def test_generate_filename_with_variable_attribute(self):
name = generate_output_path(
self._get_test_dataset(),
source="{organization}_{instrument}_{variable_time_timezone}",
file_name="{organization}_{instrument}_{variable_time_timezone}",
output_format=".nc",
)
assert isinstance(name, Path)
Expand Down Expand Up @@ -264,7 +282,7 @@ def test_generate_filename_with_prefix_and_suffix(self):
def test_generate_filename_with_defaults(self):
name = generate_output_path(
self._get_test_dataset(),
source="test_{missing_global}",
file_name="test_{missing_global}",
defaults={"missing_global": "this-is-the-default"},
)
assert str(name) == "test_this-is-the-default.nc"

0 comments on commit 164f555

Please sign in to comment.