From bc52fc96e713ad06d4f59838cc2e1a60ad52d487 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Thu, 5 Oct 2023 15:00:52 +0200 Subject: [PATCH 1/7] fit_curve update time (#167) fit_curve with dimension convertion --- .../ml/curve_fitting.py | 29 +++++++++++++++++-- tests/test_ml.py | 13 +++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/openeo_processes_dask/process_implementations/ml/curve_fitting.py b/openeo_processes_dask/process_implementations/ml/curve_fitting.py index 81736ea7..8f5f400d 100644 --- a/openeo_processes_dask/process_implementations/ml/curve_fitting.py +++ b/openeo_processes_dask/process_implementations/ml/curve_fitting.py @@ -26,6 +26,23 @@ def fit_curve( f"Provided dimension ({dimension}) not found in data.dims: {data.dims}" ) + try: + # Try parsing as datetime first + dates = data[dimension].values + dates = np.asarray(dates, dtype=np.datetime64) + except ValueError: + dates = np.asarray(data[dimension].values) + + if np.issubdtype(dates.dtype, np.datetime64): + timestep = [ + ( + (np.datetime64(x) - np.datetime64("1970-01-01", "s")) + / np.timedelta64(1, "s") + ) + for x in dates + ] + data[dimension] = np.array(timestep) + dims_before = list(data.dims) # In the spec, parameters is a list, but xr.curvefit requires names for them, @@ -87,8 +104,16 @@ def predict_curve( labels = np.asarray(labels) if np.issubdtype(labels.dtype, np.datetime64): - labels = labels.astype(int) labels_were_datetime = True + initial_labels = labels + timestep = [ + ( + (np.datetime64(x) - np.datetime64("1970-01-01", "s")) + / np.timedelta64(1, "s") + ) + for x in labels + ] + labels = np.array(timestep) # This is necessary to pipe the arguments correctly through @process def wrapper(f): @@ -122,6 +147,6 @@ def _wrap(*args, **kwargs): predictions = predictions.assign_coords({dimension: labels.data}) if labels_were_datetime: - predictions[dimension] = pd.DatetimeIndex(predictions[dimension].values) + predictions[dimension] = initial_labels return predictions diff --git a/tests/test_ml.py b/tests/test_ml.py index c9157010..4248b212 100644 --- a/tests/test_ml.py +++ b/tests/test_ml.py @@ -84,6 +84,19 @@ def fitFunction(x, parameters): assert len(result.coords["param"]) == len(parameters) labels = dimension_labels(origin_cube, origin_cube.openeo.temporal_dims[0]) + labels = [float(l) for l in labels] + predictions = predict_curve( + result, + _process, + origin_cube.openeo.temporal_dims[0], + labels=labels, + ).compute() + + assert len(predictions.coords[origin_cube.openeo.temporal_dims[0]]) == len(labels) + assert "param" not in predictions.dims + assert result.rio.crs == predictions.rio.crs + + labels = ["2020-02-02", "2020-03-02", "2020-04-02", "2020-05-02"] predictions = predict_curve( result, _process, From 19af1504e58e1457239e5441387cb59f3120e0f2 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Thu, 5 Oct 2023 15:13:25 +0200 Subject: [PATCH 2/7] Uc6 update (#168) * fit_curve with dimension convertion * bump 2023.9.2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 44ff0e4a..1d01fd51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.9.1" +version = "2023.9.2" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] From 6032ac914b28a312732bdb580c76d27a31ebde3d Mon Sep 17 00:00:00 2001 From: clausmichele <31700619+clausmichele@users.noreply.github.com> Date: Fri, 6 Oct 2023 10:53:17 +0200 Subject: [PATCH 3/7] fix: import experimental processes (#163) Fix import experimental processes --- openeo_processes_dask/process_implementations/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/openeo_processes_dask/process_implementations/__init__.py b/openeo_processes_dask/process_implementations/__init__.py index 1129749d..875ee6d5 100644 --- a/openeo_processes_dask/process_implementations/__init__.py +++ b/openeo_processes_dask/process_implementations/__init__.py @@ -15,6 +15,13 @@ "Did not load machine learning processes due to missing dependencies: Install them like this: `pip install openeo-processes-dask[implementations, ml]`" ) +try: + from .experimental import * +except ImportError as e: + logger.warning( + "Did not experimental processes due to missing dependencies: Install them like this: `pip install openeo-processes-dask[implementations, experimental]`" + ) + import rioxarray as rio # Required for the .rio accessor on xarrays. import openeo_processes_dask.process_implementations.cubes._xr_interop From 8588f3d29953b2fca8ace72e77f81233737189f9 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Fri, 6 Oct 2023 11:38:54 +0200 Subject: [PATCH 4/7] Release 2023.10.1 (#169) release 2023.10.1 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1d01fd51..d63bbb5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.9.2" +version = "2023.10.1" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] From c254f6ec484d27b2e0ee4465d3c4633bc48791ac Mon Sep 17 00:00:00 2001 From: Gerald Walter Irsiegler Date: Wed, 11 Oct 2023 09:30:00 +0200 Subject: [PATCH 5/7] Fix: update chunking of all dimensions to auto (except dimension) (#171) * update chunking of all dimensions to auto (except dimension) * bump ver --------- Co-authored-by: Gerald Walter Irsiegler --- .../process_implementations/ml/curve_fitting.py | 5 ++++- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/openeo_processes_dask/process_implementations/ml/curve_fitting.py b/openeo_processes_dask/process_implementations/ml/curve_fitting.py index 8f5f400d..aad967be 100644 --- a/openeo_processes_dask/process_implementations/ml/curve_fitting.py +++ b/openeo_processes_dask/process_implementations/ml/curve_fitting.py @@ -49,8 +49,11 @@ def fit_curve( # so we do this to generate names locally parameters = {f"param_{i}": v for i, v in enumerate(parameters)} + chunking = {key: "auto" for key in data.dims if key != dimension} + chunking[dimension] = -1 + # The dimension along which to fit the curves cannot be chunked! - rechunked_data = data.chunk({dimension: -1}) + rechunked_data = data.chunk(chunking) def wrapper(f): def _wrap(*args, **kwargs): diff --git a/pyproject.toml b/pyproject.toml index d63bbb5b..a2ccff80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.10.1" +version = "2023.10.2" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] From 6e77c76d8dc52d12302bf0638c3fe2144cdb29c6 Mon Sep 17 00:00:00 2001 From: clausmichele <31700619+clausmichele@users.noreply.github.com> Date: Wed, 11 Oct 2023 14:29:05 +0200 Subject: [PATCH 6/7] fix: reduce_spatial (#164) * fix reduce_spatial * Fix context * Revert "fix reduce_spatial" This reverts commit f1fdac3d2b51777ec1dc9dbd2b60b7613364a3c6. * Revert "Revert "fix reduce_spatial"" This reverts commit b3d45dd64ecb443fd8b45aba3d68ec66e9a585f9. * Revert "Fix context" This reverts commit c8fbcfc5b0b7dbcd29291dde5137d84159fb651a. --- .../process_implementations/cubes/reduce.py | 3 +- tests/test_reduce.py | 36 ++++++++++++++++++- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/openeo_processes_dask/process_implementations/cubes/reduce.py b/openeo_processes_dask/process_implementations/cubes/reduce.py index 0998fc81..5641edc4 100644 --- a/openeo_processes_dask/process_implementations/cubes/reduce.py +++ b/openeo_processes_dask/process_implementations/cubes/reduce.py @@ -54,9 +54,8 @@ def reduce_spatial( spatial_dims = data.openeo.spatial_dims if data.openeo.spatial_dims else None return data.reduce( reducer, - dimension=spatial_dims, + dim=spatial_dims, keep_attrs=True, - context=context, positional_parameters=positional_parameters, named_parameters=named_parameters, ) diff --git a/tests/test_reduce.py b/tests/test_reduce.py index 3c3804da..5bc4ed8c 100644 --- a/tests/test_reduce.py +++ b/tests/test_reduce.py @@ -5,7 +5,10 @@ import xarray as xr from openeo_pg_parser_networkx.pg_schema import ParameterReference -from openeo_processes_dask.process_implementations.cubes.reduce import reduce_dimension +from openeo_processes_dask.process_implementations.cubes.reduce import ( + reduce_dimension, + reduce_spatial, +) from tests.general_checks import general_output_checks from tests.mockdata import create_fake_rastercube @@ -39,3 +42,34 @@ def test_reduce_dimension( ) xr.testing.assert_equal(output_cube, input_cube.mean(dim="t")) + + +@pytest.mark.parametrize("size", [(30, 30, 20, 4)]) +@pytest.mark.parametrize("dtype", [np.float32]) +def test_reduce_spatial( + temporal_interval, bounding_box, random_raster_data, process_registry +): + input_cube = create_fake_rastercube( + data=random_raster_data, + spatial_extent=bounding_box, + temporal_extent=temporal_interval, + bands=["B02", "B03", "B04", "B08"], + backend="dask", + ) + + _process = partial( + process_registry["sum"].implementation, + ignore_nodata=True, + data=ParameterReference(from_parameter="data"), + ) + + output_cube = reduce_spatial(data=input_cube, reducer=_process) + + general_output_checks( + input_cube=input_cube, + output_cube=output_cube, + verify_attrs=False, + verify_crs=True, + ) + + xr.testing.assert_equal(output_cube, input_cube.sum(dim=["x", "y"])) From 2c8f1ccf9c6d036897eba23f5310483a427331a2 Mon Sep 17 00:00:00 2001 From: ValentinaHutter <85164505+ValentinaHutter@users.noreply.github.com> Date: Wed, 11 Oct 2023 16:10:40 +0200 Subject: [PATCH 7/7] 2023.10.3 (#173) * update prediction * update tests for code cov * handle one band --- .../process_implementations/ml/curve_fitting.py | 12 ++++++++++-- pyproject.toml | 2 +- tests/test_ml.py | 10 ++++++++-- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/openeo_processes_dask/process_implementations/ml/curve_fitting.py b/openeo_processes_dask/process_implementations/ml/curve_fitting.py index aad967be..98ef6db4 100644 --- a/openeo_processes_dask/process_implementations/ml/curve_fitting.py +++ b/openeo_processes_dask/process_implementations/ml/curve_fitting.py @@ -25,6 +25,10 @@ def fit_curve( raise DimensionNotAvailable( f"Provided dimension ({dimension}) not found in data.dims: {data.dims}" ) + bands_required = False + if "bands" in data.dims: + if len(data["bands"].values) == 1: + bands_required = data["bands"].values[0] try: # Try parsing as datetime first @@ -81,11 +85,15 @@ def _wrap(*args, **kwargs): .drop_dims(["cov_i", "cov_j"]) .to_array() .squeeze() - .transpose(*expected_dims_after) ) fit_result.attrs = data.attrs fit_result = fit_result.rio.write_crs(rechunked_data.rio.crs) + if bands_required and not "bands" in fit_result.dims: + fit_result = fit_result.assign_coords(**{"bands": bands_required}) + fit_result = fit_result.expand_dims(dim="bands") + + fit_result = fit_result.transpose(*expected_dims_after) return fit_result @@ -99,6 +107,7 @@ def predict_curve( ): labels_were_datetime = False dims_before = list(parameters.dims) + initial_labels = labels try: # Try parsing as datetime first @@ -108,7 +117,6 @@ def predict_curve( if np.issubdtype(labels.dtype, np.datetime64): labels_were_datetime = True - initial_labels = labels timestep = [ ( (np.datetime64(x) - np.datetime64("1970-01-01", "s")) diff --git a/pyproject.toml b/pyproject.toml index a2ccff80..426ffefa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.10.2" +version = "2023.10.3" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] diff --git a/tests/test_ml.py b/tests/test_ml.py index 4248b212..3f185fd1 100644 --- a/tests/test_ml.py +++ b/tests/test_ml.py @@ -83,8 +83,14 @@ def fitFunction(x, parameters): assert len(result.coords["y"]) == len(origin_cube.coords["y"]) assert len(result.coords["param"]) == len(parameters) + origin_cube_B02 = origin_cube.sel(bands=["B02"]) + result_B02 = fit_curve( + origin_cube_B02, parameters=parameters, function=_process, dimension="t" + ) + assert "bands" in result_B02.dims + assert result_B02["bands"].values == "B02" + labels = dimension_labels(origin_cube, origin_cube.openeo.temporal_dims[0]) - labels = [float(l) for l in labels] predictions = predict_curve( result, _process, @@ -96,7 +102,7 @@ def fitFunction(x, parameters): assert "param" not in predictions.dims assert result.rio.crs == predictions.rio.crs - labels = ["2020-02-02", "2020-03-02", "2020-04-02", "2020-05-02"] + labels = [0, 1, 2, 3] predictions = predict_curve( result, _process,