Skip to content

Commit

Permalink
make dimension.py xarray compatible (#397)
Browse files Browse the repository at this point in the history
* make dimension.py xarray compatible

* convert final method in the dimension module

* nanmin in stead of zerovalue in square domain method

* make test steps skill run

* undo accidental change

* remove commented out code

* The dataset can contain more than one dataarray

* Address pull request comments

* Add links to dataset documentation everywhere
  • Loading branch information
mats-knmi committed Sep 23, 2024
1 parent b3aa009 commit 016d28f
Show file tree
Hide file tree
Showing 8 changed files with 522 additions and 540 deletions.
18 changes: 13 additions & 5 deletions pysteps/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"""

import numpy as np
import numpy.typing as npt
import pyproj
import xarray as xr

Expand Down Expand Up @@ -67,6 +68,15 @@ def _convert_proj4_to_grid_mapping(proj4str):
return grid_mapping_var_name, grid_mapping_name, params


def compute_lat_lon(
x_r: npt.ArrayLike, y_r: npt.ArrayLike, projection: str
) -> tuple[npt.ArrayLike, npt.ArrayLike]:
x_2d, y_2d = np.meshgrid(x_r, y_r)
pr = pyproj.Proj(projection)
lon, lat = pr(x_2d.flatten(), y_2d.flatten(), inverse=True)
return lat.reshape(x_2d.shape), lon.reshape(x_2d.shape)


def convert_to_xarray_dataset(
precip: np.ndarray,
quality: np.ndarray | None,
Expand Down Expand Up @@ -105,9 +115,7 @@ def convert_to_xarray_dataset(
if metadata["yorigin"] == "upper":
y_r = np.flip(y_r)

x_2d, y_2d = np.meshgrid(x_r, y_r)
pr = pyproj.Proj(metadata["projection"])
lon, lat = pr(x_2d.flatten(), y_2d.flatten(), inverse=True)
lat, lon = compute_lat_lon(x_r, y_r, metadata["projection"])

(
grid_mapping_var_name,
Expand Down Expand Up @@ -166,7 +174,7 @@ def convert_to_xarray_dataset(
),
"lon": (
["y", "x"],
lon.reshape(precip.shape),
lon,
{
"long_name": "longitude coordinate",
"standard_name": "longitude",
Expand All @@ -176,7 +184,7 @@ def convert_to_xarray_dataset(
),
"lat": (
["y", "x"],
lat.reshape(precip.shape),
lat,
{
"long_name": "latitude coordinate",
"standard_name": "latitude",
Expand Down
87 changes: 87 additions & 0 deletions pysteps/io/importers.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,93 @@
| zr_b | the Z-R exponent b in Z = a*R**b |
+------------------+----------------------------------------------------------+
The data and metadata is then postprocessed into an xarray dataset. This dataset will
always contain an x and y dimension, but can be extended with a time dimension and/or
an ensemble member dimension over the course of the process.
The dataset can contain the following coordinate variables:
.. tabularcolumns:: |p{2cm}|L|
+---------------+-------------------------------------------------------------------------------------------+
| Coordinate | Description |
+===============+===========================================================================================+
| y | y-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
+---------------+-------------------------------------------------------------------------------------------+
| x | x-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
+---------------+-------------------------------------------------------------------------------------------+
| lat | latitude coordinate in degrees |
+---------------+-------------------------------------------------------------------------------------------+
| lon | longitude coordinate in degrees |
+---------------+-------------------------------------------------------------------------------------------+
| time | forecast time in seconds since forecast start time |
+---------------+-------------------------------------------------------------------------------------------+
| member | ensemble member number (integer) |
+---------------+-------------------------------------------------------------------------------------------+
The dataset can contain the following data variables:
.. tabularcolumns:: |p{2cm}|L|
+-------------------+-----------------------------------------------------------------------------------------------------------+
| Variable | Description |
+===================+===========================================================================================================+
| precip_intensity, | precipitation data, based on the unit the data has it is stored in one of these 3 possible variables |
| precip_accum | precip_intensity if unit is ``mm/h``, precip_accum if unit is ``mm`` and reflectivity if unit is ``dBZ``, |
| or reflectivity | the attributes of this variable contain metadata relevant to this attribute (see below) |
+-------------------+-----------------------------------------------------------------------------------------------------------+
| quality | value between 0 and 1 denoting the quality of the precipitation data, currently not used for anything |
+-------------------+-----------------------------------------------------------------------------------------------------------+
Some of the metadata in the metadata dictionary is not explicitely stored in the dataset,
but is still implicitly present. For example ``x1`` can easily be found by taking the first
value from the x coordinate variable. Metadata that is not implicitly present is explicitly
stored either in the datasets global attributes or as attributes of the precipitation variable.
Data that relates to the entire dataset is stored in the global attributes. The following data
is stored in the global attributes:
.. tabularcolumns:: |p{2cm}|L|
+------------------+----------------------------------------------------------+
| Key | Value |
+==================+==========================================================+
| projection | PROJ.4-compatible projection definition |
+------------------+----------------------------------------------------------+
| institution | name of the institution who provides the data |
+------------------+----------------------------------------------------------+
| precip_var | the name of the precipitation variable in this dataset |
+------------------+----------------------------------------------------------+
The following data is stored as attributes of the precipitation variable:
.. tabularcolumns:: |p{2cm}|L|
+------------------+----------------------------------------------------------+
| Key | Value |
+==================+==========================================================+
| units | the physical unit of the data: 'mm/h', 'mm' or 'dBZ' |
+------------------+----------------------------------------------------------+
| transform | the transformation of the data: None, 'dB', 'Box-Cox' or |
| | others |
+------------------+----------------------------------------------------------+
| accutime | the accumulation time in minutes of the data, float |
+------------------+----------------------------------------------------------+
| threshold | the rain/no rain threshold with the same unit, |
| | transformation and accutime of the data. |
+------------------+----------------------------------------------------------+
| zerovalue | the value assigned to the no rain pixels with the same |
| | unit, transformation and accutime of the data. |
+------------------+----------------------------------------------------------+
| zr_a | the Z-R constant a in Z = a*R**b |
+------------------+----------------------------------------------------------+
| zr_b | the Z-R exponent b in Z = a*R**b |
+------------------+----------------------------------------------------------+
Furthermore the dataset can contain some additional metadata to make the dataset
CF-compliant.
Available Importers
-------------------
Expand Down
40 changes: 15 additions & 25 deletions pysteps/tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import pysteps as stp
from pysteps import io, rcparams
from pysteps.utils import aggregate_fields_space
from pysteps.utils.dimension import clip_domain

_reference_dates = dict()
_reference_dates["bom"] = datetime(2018, 6, 16, 10, 0)
Expand Down Expand Up @@ -53,7 +54,6 @@ def get_precipitation_fields(
num_prev_files=0,
num_next_files=0,
return_raw=False,
metadata=False,
upscale=None,
source="mch",
log_transform=True,
Expand Down Expand Up @@ -100,9 +100,6 @@ def get_precipitation_fields(
The pre-processing steps are: 1) Convert to mm/h,
2) Mask invalid values, 3) Log-transform the data [dBR].
metadata: bool, optional
If True, also return file metadata.
upscale: float or None, optional
Upscale fields in space during the pre-processing steps.
If it is None, the precipitation field is not modified.
Expand All @@ -127,8 +124,8 @@ def get_precipitation_fields(
Returns
-------
reference_field : array
metadata : dict
dataset: xarray.Dataset
As described in the documentation of :py:mod:`pysteps.io.importers`.
"""

if source == "bom":
Expand Down Expand Up @@ -186,41 +183,34 @@ def get_precipitation_fields(
# Read the radar composites
importer = io.get_method(importer_name, "importer")

ref_dataset = io.read_timeseries(fns, importer, **_importer_kwargs)
dataset = io.read_timeseries(fns, importer, **_importer_kwargs)

if not return_raw:
if (num_prev_files == 0) and (num_next_files == 0):
# Remove time dimension
reference_field = np.squeeze(reference_field)
precip_var = dataset.attrs["precip_var"]

# Convert to mm/h
ref_dataset = stp.utils.to_rainrate(ref_dataset)
dataset = stp.utils.to_rainrate(dataset)
precip_var = dataset.attrs["precip_var"]

# Clip domain
ref_dataset = stp.utils.clip_domain(ref_dataset, clip)
dataset = clip_domain(dataset, clip)

# Upscale data
reference_field, ref_metadata = aggregate_fields_space(
reference_field, ref_metadata, upscale
)
dataset = aggregate_fields_space(dataset, upscale)

# Mask invalid values
reference_field = np.ma.masked_invalid(reference_field)
valid_mask = np.isfinite(dataset[precip_var].values)

if log_transform:
# Log-transform the data [dBR]
reference_field, ref_metadata = stp.utils.dB_transform(
reference_field, ref_metadata, threshold=0.1, zerovalue=-15.0
)
dataset = stp.utils.dB_transform(dataset, threshold=0.1, zerovalue=-15.0)

# Set missing values with the fill value
np.ma.set_fill_value(reference_field, ref_metadata["zerovalue"])
reference_field.data[reference_field.mask] = ref_metadata["zerovalue"]

if metadata:
return reference_field, ref_metadata
metadata = dataset[precip_var].attrs
zerovalue = metadata["zerovalue"]
dataset[precip_var].data[~valid_mask] = zerovalue

return reference_field
return dataset


def smart_assert(actual_value, expected, tolerance=None):
Expand Down
20 changes: 11 additions & 9 deletions pysteps/tests/test_nowcasts_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from pysteps import io, motion, nowcasts, verification
from pysteps.tests.helpers import get_precipitation_fields


steps_arg_names = (
"n_ens_members",
"n_cascade_levels",
Expand Down Expand Up @@ -44,28 +43,29 @@ def test_steps_skill(
):
"""Tests STEPS nowcast skill."""
# inputs
precip_input, metadata = get_precipitation_fields(
dataset_input = get_precipitation_fields(
num_prev_files=2,
num_next_files=0,
return_raw=False,
metadata=True,
upscale=2000,
)
precip_input = precip_input.filled()

precip_obs = get_precipitation_fields(
dataset_obs = get_precipitation_fields(
num_prev_files=0, num_next_files=3, return_raw=False, upscale=2000
)[1:, :, :]
precip_obs = precip_obs.filled()
).isel(time=slice(1, None, None))
precip_var = dataset_input.attrs["precip_var"]
metadata = dataset_input[precip_var].attrs
precip_data = dataset_input[precip_var].values

pytest.importorskip("cv2")
oflow_method = motion.get_method("LK")
retrieved_motion = oflow_method(precip_input)
retrieved_motion = oflow_method(precip_data)

nowcast_method = nowcasts.get_method("steps")

precip_forecast = nowcast_method(
precip_input,
precip_data,
retrieved_motion,
timesteps=timesteps,
precip_thr=metadata["threshold"],
Expand All @@ -86,7 +86,9 @@ def test_steps_skill(
timesteps if isinstance(timesteps, int) else len(timesteps)
)

crps = verification.probscores.CRPS(precip_forecast[:, -1], precip_obs[-1])
crps = verification.probscores.CRPS(
precip_forecast[:, -1], dataset_obs[precip_var].values[-1]
)
assert crps < max_crps, f"CRPS={crps:.2f}, required < {max_crps:.2f}"


Expand Down
Loading

0 comments on commit 016d28f

Please sign in to comment.