pangeo-forge · norlandrhagen · Nov 1, 2022 · Nov 1, 2022 · Nov 1, 2022 · Nov 1, 2022
diff --git a/recipes/NWM/meta.yaml b/recipes/NWM/meta.yaml
@@ -0,0 +1,23 @@
+title: 'National Water Model - Short Range Forecast'
+description: The National Water Model dataset is a produced by the National Oceanic and Atmospheric Administations (NOAA) Office of Water Prediction. It is a forecast model of water resources, providing multiple variables across the Continental United States (CONUS). This dataset is available via the Registry of Open Data on AWS as a collection of netCDF files that do not require any login authentication. This Pangeo-Forge recipe uses the HDFReferenceRecipe, which leverages kerchunk and fsspec_reference_maker to build an index so that this dataset can be read as if it were a ARCO dataset.
+pangeo_forge_version: '0.9.0'
+pangeo_notebook_version: '2022.06.02'
+recipes:
+  - id: NWM
+    object: 'recipe:recipe'
+provenance:
+  providers:
+    - name: 'National Oceanic and Atmospheric Administration (NOAA) Office of Water Prediction'
+      description: 'NOAA National Water Model CONUS Retrospective Dataset was accessed on 11-01-2022 from https://registry.opendata.aws/nwm-archive.'
+      roles:
+        - producer
+        - licensor
+      url: https://water.noaa.gov/about/nwm
+  license: Open
+
+maintainers:
+  - name: 'Raphael Hagen'
+    orcid: '0000-0003-1994-1153'
+    github: norlandrhagen
+bakery:
+  id: 'pangeo-ldeo-nsf-earthcube'
diff --git a/recipes/NWM/recipe.py b/recipes/NWM/recipe.py
@@ -0,0 +1,37 @@
+# Author: Norland Raphael Hagen @norlandrhagen 11-01-2022
+# Pangeo-Forge recipe for National Water Model - Short Range Forecast  # noqa: E501
+# Heavily adapted from Kerchunk example written by Rich Signell (USGS) @rsignell-usgs. https://gist.github.com/rsignell-usgs/ef435a53ac530a2843ce7e1d59f96e22 # noqa: E501
+
+import os
+
+import fsspec
+
+from pangeo_forge_recipes.patterns import pattern_from_file_sequence
+from pangeo_forge_recipes.recipes.reference_hdf_zarr import HDFReferenceRecipe
+
+# Create fsspec aws filesystem
+fs = fsspec.filesystem('s3', anon=True, skip_instance_cache=True)
+flist = fs.glob('noaa-nwm-pds/nwm.*/short_range/nwm.*.short_range.channel_rt.f001.conus.nc')
+
+# Join the "best time series" from past forecasts with the latest forecast
+# Remove the first day of data since this is a rolling collection and
+# we don't want to be trying to access files that soon will be removed.
+# & Use all the files from the last forecast cycle
+
+last_dir = f'{os.path.dirname(flist[-1])}'
+last_file = os.path.basename(flist[-1]).split('.')
+last_files = fs.glob(
+    f'{last_dir}/{last_file[0]}.{last_file[1]}.{last_file[2]}.channel_rt.*.conus.nc'
+)
+
+# Skip the first of the last_files since it's a duplicate
+flist.extend(last_files[1:])
+
+# Append s3 prefix
+urls = ['s3://' + f for f in flist]
+
+# Create filepattern from urls
+pattern = pattern_from_file_sequence(urls, 'time')
+
+# Create HDFReference recipe from pattern
+recipe = HDFReferenceRecipe(pattern, netcdf_storage_options={'anon': True})