Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changed behaviour of source._open_dataset to: #681

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 31 additions & 3 deletions intake_esm/source.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import typing
import warnings

import dask
import fsspec
Expand All @@ -11,6 +12,10 @@
from .utils import OPTIONS


class ConcatenationWarning(UserWarning):
pass


class ESMDataSourceError(Exception):
pass

Expand Down Expand Up @@ -84,9 +89,16 @@ def _open_dataset(
if requested_variables:
if isinstance(requested_variables, str):
requested_variables = [requested_variables]

variable_intersection = set(requested_variables).intersection(set(varname))
variables = [variable for variable in variable_intersection if variable in ds.data_vars]

data_vars = variable_intersection & set(ds.data_vars)
coord_vars = variable_intersection & set(ds.coords)

variables = list(data_vars | coord_vars)

scalar_variables = [v for v in ds.data_vars if len(ds[v].dims) == 0]

ds = ds.set_coords(scalar_variables)
ds = ds[variables]
ds.attrs[OPTIONS['vars_key']] = variables
Expand Down Expand Up @@ -242,7 +254,7 @@ def _open_dataset(self):
]

datasets = dask.compute(*datasets)
if len(datasets) == 1:
if len(datasets) == 1 or not datasets[0].data_vars:
self._ds = datasets[0]
else:
datasets = sorted(
Expand All @@ -256,7 +268,23 @@ def _open_dataset(self):
ds.set_coords(set(ds.variables) - set(ds.attrs[OPTIONS['vars_key']]))
for ds in datasets
]
self._ds = xr.combine_by_coords(datasets, **self.xarray_combine_by_coords_kwargs)
try:
self._ds = xr.combine_by_coords(
datasets, **self.xarray_combine_by_coords_kwargs
)
except ValueError as exc:
if (
str(exc)
== 'Could not find any dimension coordinates to use to order the datasets for concatenation'
):
warnings.warn(
'Attempting to concatenate datasets without valid dimension coordinates: retaining only first dataset.'
' Request valid dimension coordinate to silence this warning.',
category=ConcatenationWarning,
)
self._ds = datasets[0]
else:
raise exc

self._ds.attrs[OPTIONS['dataset_key']] = self.key

Expand Down
59 changes: 59 additions & 0 deletions tests/test_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,62 @@ def test_update_attrs(tmp_path, data_format, attrs):
_xarray_open_kwargs = _get_xarray_open_kwargs(data_format=data_format)
ds_new = _open_dataset(fpath, 'tasmax', xarray_open_kwargs=_xarray_open_kwargs).compute()
assert ds_new.attrs == ds.attrs


@pytest.mark.parametrize(
'fpath,dvars,cvars,expected',
[
(
f1,
['time_bnds'],
[''],
['time_bnds', 'height', 'time'],
),
(f1, ['tasmax'], [''], ['tasmax', 'height', 'time', 'lat', 'lon']),
(
f1,
[],
['height'],
['height'],
),
(
f1,
[],
[],
['height', 'time_bnds', 'lon_bnds', 'lat_bnds', 'tasmax', 'time', 'lat', 'lon'],
),
(multi_path, ['time_bnds'], [''], ['time_bnds', 'height', 'time']),
(
multi_path,
['tasmax'],
[''],
['tasmax', 'time', 'height', 'lat', 'lon'],
),
(multi_path, [], ['height'], ['height']),
(
multi_path,
[],
[],
['time_bnds', 'lon_bnds', 'lat_bnds', 'tasmax', 'time', 'height', 'lat', 'lon'],
),
],
)
def test_request_coord_vars(fpath, dvars, cvars, expected):
"""
Test requesting a combination of data & coordinate variables.
"""
requested_vars = [*dvars, *cvars]
xarray_open_kwargs = _get_xarray_open_kwargs('netcdf')
ds = _open_dataset(
urlpath=fpath,
varname=['height', 'lat', 'lat_bnds', 'lon', 'lon_bnds', 'tasmax', 'time', 'time_bnds'],
xarray_open_kwargs=xarray_open_kwargs,
requested_variables=requested_vars,
).compute()

ds_dvars = ds.data_vars or set()
ds_cvars = ds.coords or set()

found_vars = set(ds_dvars) | set(ds_cvars)

assert found_vars == set(expected)