From e55d6f781ee6bcc4424c1b0b0910c8e5eb57720c Mon Sep 17 00:00:00 2001 From: thodson Date: Thu, 9 Nov 2023 23:29:52 -0600 Subject: [PATCH] Setup recipe --- recipes/us-ssebop/meta.yaml | 2 +- recipes/us-ssebop/recipe.py | 65 ++++++++++++++++++++++++++++++ recipes/us-ssebop/requirements.txt | 1 + 3 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 recipes/us-ssebop/recipe.py create mode 100644 recipes/us-ssebop/requirements.txt diff --git a/recipes/us-ssebop/meta.yaml b/recipes/us-ssebop/meta.yaml index 4cdb909b90..e5d62ce211 100644 --- a/recipes/us-ssebop/meta.yaml +++ b/recipes/us-ssebop/meta.yaml @@ -2,7 +2,7 @@ title: 'US SSEBop Evapotranspiration' description: > Actual Evapotranspiration (ETa) produced using MODIS and the operational Simplified Surface Energy Balance (SSEBop) model for the period 2000 to 2022 at 1-kilometer (km) spatial resolution for the contiguous United States. -pangeo_forge_version: '0.9.4' +pangeo_forge_version: '0.10.3' pangeo_notebook_version: '2022.07.13' recipes: - id: us-ssebop diff --git a/recipes/us-ssebop/recipe.py b/recipes/us-ssebop/recipe.py new file mode 100644 index 0000000000..1d51212f45 --- /dev/null +++ b/recipes/us-ssebop/recipe.py @@ -0,0 +1,65 @@ +import apache_beam as beam +import pandas as pd + +from datetime import date + +from pangeo_forge_recipes.patterns import ConcatDim, FilePattern + +from pangeo_forge_recipes.transforms import ( + Indexed, + OpenURLWithFSSpec, + StoreToZarr, + T, +) + + +input_url_pattern = ( + 'https://edcintl.cr.usgs.gov/downloads/sciweb1/shared/uswem/web/' + 'conus/eta/modis_eta/daily/downloads/' + 'det{yyyyjjj}.modisSSEBopETactual.zip' +) + +start = date(2001, 1, 1) +end = date(2022, 10, 7) +dates = pd.date_range(start, end, freq='1D') + + +def make_url(time: pd.Timestamp) -> str: + return input_url_pattern.format(yyyyjjj=time.strftime("%Y%j")) + + +pattern = FilePattern(make_url, ConcatDim(name='time', keys=dates, nitems_per_file=1)) + + +class Preprocess(beam.PTransform): + """Preprocessor transform.""" + + @staticmethod + def _preproc(item: Indexed[T]) -> Indexed[T]: + import numpy as np + import rioxarray + + index, f = item + time = dates[index] + + da = rioxarray.open_rasterio(f.open()).drop('band') + da = da.rename({'x': 'lon', 'y': 'lat'}) + ds = da.to_dataset(name='aet') + ds = ds.expand_dims(time=np.array([time])) + + return index, ds + + def expand(self, pcoll: beam.PCollection) -> beam.PCollection: + return pcoll | beam.Map(self._preproc) + + +recipe = ( + beam.Create(pattern.items()) + | OpenURLWithFSSpec(max_concurrency=10, open_kwargs={'compression': 'zip'}) + | Preprocess() + | StoreToZarr( + store_name="us-ssebop.zarr", + combine_dims=pattern.combine_dim_keys, + target_chunks={"time": 1, "lat": int(2834 / 2), "lon": int(6612 / 6)}, + ) +) diff --git a/recipes/us-ssebop/requirements.txt b/recipes/us-ssebop/requirements.txt new file mode 100644 index 0000000000..5ee1e9f6a4 --- /dev/null +++ b/recipes/us-ssebop/requirements.txt @@ -0,0 +1 @@ +rioxarray