Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Pass on on-disk chunk sizes as preferred chunk sizes to the xarray backend #678

Merged
merged 10 commits into from
Aug 14, 2023
1 change: 1 addition & 0 deletions docs/history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ History
Latest
------
- BUG: Fix setting spatial dims internally during propagation (pull #682)
- ENH: Pass on on-disk chunk sizes as preferred chunk sizes to the xarray backend (pull #678)

0.14.1
------
Expand Down
8 changes: 7 additions & 1 deletion rioxarray/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1236,7 +1236,7 @@ def open_rasterio(
)
result.encoding = encoding

# update attributes from NetCDF attributess
# update attributes from NetCDF attributes
_load_netcdf_attrs(riods.tags(), result)
result = _decode_datetime_cf(
result, decode_times=decode_times, decode_timedelta=decode_timedelta
Expand All @@ -1262,6 +1262,12 @@ def open_rasterio(

if chunks is not None:
result = _prepare_dask(result, riods, filename, chunks)
snowman2 marked this conversation as resolved.
Show resolved Hide resolved
else:
result.encoding["preferred_chunks"] = {
result.rio.y_dim: riods.block_shapes[0][0],
snowman2 marked this conversation as resolved.
Show resolved Hide resolved
result.rio.x_dim: riods.block_shapes[0][1],
coord_name: 1,
}

# add file path to encoding
result.encoding["source"] = riods.name
Expand Down
5 changes: 1 addition & 4 deletions rioxarray/xarray_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ def open_dataset(
filename_or_obj,
drop_variables=None,
parse_coordinates=None,
chunks=None,
cache=None,
lock=None,
masked=False,
mask_and_scale=True,
Expand All @@ -56,8 +54,7 @@ def open_dataset(
rds = _io.open_rasterio(
filename_or_obj,
parse_coordinates=parse_coordinates,
chunks=chunks,
cache=cache,
snowman2 marked this conversation as resolved.
Show resolved Hide resolved
cache=False,
lock=lock,
masked=masked,
mask_and_scale=mask_and_scale,
Expand Down
2 changes: 2 additions & 0 deletions test/integration/test_integration__io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1121,6 +1121,7 @@ def test_mask_and_scale(open_rasterio):
"grid_mapping": "crs",
"dtype": "uint16",
"rasterio_dtype": "uint16",
"preferred_chunks": dict(band=1, x=1386, y=585),
}
attrs = rds.air_temperature.attrs
assert "_Unsigned" not in attrs
Expand All @@ -1146,6 +1147,7 @@ def test_no_mask_and_scale(open_rasterio):
"grid_mapping": "crs",
"dtype": "uint16",
"rasterio_dtype": "uint16",
"preferred_chunks": {"band": 1, "x": 1386, "y": 585},
}
attrs = rds.air_temperature.attrs
assert attrs["_Unsigned"] == "true"
Expand Down
1 change: 1 addition & 0 deletions test/integration/test_integration_xarray_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def test_xarray_open_dataset():
assert "spatial_ref" in ds.coords
assert "grid_mapping" not in ds.data_vars["band_data"].attrs
assert "grid_mapping" in ds.data_vars["band_data"].encoding
assert "preferred_chunks" in ds.data_vars["band_data"].encoding

ds = xarray.open_dataset(cog_file)

Expand Down
Loading