From c253c69d8f9b79d1073be919c06ae7cccace8528 Mon Sep 17 00:00:00 2001 From: Timothy Hodson <34148978+thodson-usgs@users.noreply.github.com> Date: Tue, 21 Nov 2023 16:43:42 -0600 Subject: [PATCH] Open without fsspec --- recipes/us-ssebop/recipe.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/recipes/us-ssebop/recipe.py b/recipes/us-ssebop/recipe.py index 67fb5feb71..4598df3274 100644 --- a/recipes/us-ssebop/recipe.py +++ b/recipes/us-ssebop/recipe.py @@ -4,12 +4,14 @@ import pandas as pd from pangeo_forge_recipes.patterns import ConcatDim, FilePattern -from pangeo_forge_recipes.transforms import Indexed, OpenURLWithFSSpec, StoreToZarr, T +from pangeo_forge_recipes.transforms import Indexed, StoreToZarr, T input_url_pattern = ( + 'zip+' 'https://edcintl.cr.usgs.gov/downloads/sciweb1/shared/uswem/web/' 'conus/eta/modis_eta/daily/downloads/' 'det{yyyyjjj}.modisSSEBopETactual.zip' + '!/det{yyyyjjj}.modisSSEBopETactual.tif' ) start = date(2001, 1, 1) @@ -32,14 +34,15 @@ def _preproc(item: Indexed[T]) -> Indexed[T]: import numpy as np import rioxarray - index, f = item + index, url = item time_dim = index.find_concat_dim('time') time_index = index[time_dim].value time = dates[time_index] - da = rioxarray.open_rasterio(f.open()).drop('band') + da = rioxarray.open_rasterio(url).drop('band') da = da.rename({'x': 'lon', 'y': 'lat'}) ds = da.to_dataset(name='aet') + ds = ds['aet'].where(ds['aet'] != 9999) ds = ds.expand_dims(time=np.array([time])) return index, ds @@ -50,7 +53,6 @@ def expand(self, pcoll: beam.PCollection) -> beam.PCollection: recipe = ( beam.Create(pattern.items()) - | OpenURLWithFSSpec(max_concurrency=10, open_kwargs={'compression': 'zip'}) | Preprocess() | StoreToZarr( store_name='us-ssebop.zarr',