Skip to content

Commit

Permalink
Merge branch 'dynamic_chunks_2' of github.com:jbusecke/pangeo-forge-r…
Browse files Browse the repository at this point in the history
…ecipes into dynamic_chunks_2
  • Loading branch information
jbusecke committed Jul 25, 2023
2 parents bc980f7 + b99c4ef commit 29097e8
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 12 deletions.
16 changes: 8 additions & 8 deletions pangeo_forge_recipes/dynamic_target_chunks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

import numpy as np
import xarray as xr

from dask.utils import parse_bytes

from pangeo_forge_recipes.aggregation import XarraySchema, schema_to_template_ds


Expand Down Expand Up @@ -42,22 +42,22 @@ def dynamic_target_chunks_from_schema(
target_chunks_aspect_ratio: Dict[str, int],
nbytes_tolerance: float = 0.2,
) -> dict[str, int]:
"""Determine chunksizes based on desired chunksize (max size of any variable in the
dataset) and the ratio of total chunks along each dimension of the dataset. The
algorithm finds even divisors, and chooses possible combination that produce chunk
"""Determine chunksizes based on desired chunksize (max size of any variable in the
dataset) and the ratio of total chunks along each dimension of the dataset. The
algorithm finds even divisors, and chooses possible combination that produce chunk
sizes close to the target. From this set of combination the chunks that most closely produce the ratio of total
chunks along the given dimensions.
chunks along the given dimensions.
Parameters
----------
schema : XarraySchema
Schema of the input dataset
target_chunk_nbytes : Union[int, str]
Desired chunk size (defined as the max size of any variable in the dataset with
Desired chunk size (defined as the max size of any variable in the dataset with
chosen chunks). Can be provided as integer (bytes) or a string like '100MB'.
nbytes_tolerance : float, optional
Chunksize tolerance. Resulting chunk size will be within
[target_chunk_nbytes*(1-nbytes_tolerance),
Chunksize tolerance. Resulting chunk size will be within
[target_chunk_nbytes*(1-nbytes_tolerance),
target_chunk_nbytes*(1+nbytes_tolerance)] , by default 0.2
Returns
Expand Down
11 changes: 7 additions & 4 deletions tests/test_dynamic_target_chunks.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,18 @@ def test_dynamic_rechunking(self, dims_shape, target_chunks_aspect_ratio, expect
assert target_chunks[dim] == chunks

def test_nbytes_str_input(self):
ds = _create_ds({'x':100, 'y':100, 'z':100})
ds = _create_ds({"x": 100, "y": 100, "z": 100})
schema = dataset_to_schema(ds)
target_chunks_aspect_ratio = {'x':1, 'y':1, 'z':1}
target_chunks_int = dynamic_target_chunks_from_schema(schema, 1e6, target_chunks_aspect_ratio=target_chunks_aspect_ratio)
target_chunks_str = dynamic_target_chunks_from_schema(schema, '1MB', target_chunks_aspect_ratio=target_chunks_aspect_ratio)
target_chunks_int = dynamic_target_chunks_from_schema(
schema, 1e6, target_chunks_aspect_ratio=target_chunks_aspect_ratio
)
target_chunks_str = dynamic_target_chunks_from_schema(
schema, '1MB', target_chunks_aspect_ratio=target_chunks_aspect_ratio
)
for dim in target_chunks_aspect_ratio.keys():
assert target_chunks_int[dim] == target_chunks_str[dim]


def test_dynamic_rechunking_maintain_ratio(self):
"""Confirm that for a given ratio with two differently sized datasets we maintain a constant ratio
between total number of chunks"""
Expand Down

0 comments on commit 29097e8

Please sign in to comment.