Skip to content

Commit

Permalink
Merge pull request #17 from lucduron/limit-data-pr14
Browse files Browse the repository at this point in the history
Limit compulsory objects in DataSet dims/data_vars/attrs/coords
  • Loading branch information
tomsail authored Mar 2, 2024
2 parents 0e68228 + 742560b commit d8b2690
Show file tree
Hide file tree
Showing 4 changed files with 366 additions and 137 deletions.
86 changes: 81 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,21 +1,97 @@
# xarray backend for Selafin formats

Supports lazy loading by default.

## Dev guide

to have the backend working in xarray, follow these steps
To have the backend working in xarray, follow these steps:

```
pip install xarray-selafin
```

## Read selafin
## Read Selafin

```python
import xarray as xr
ds = xr.open_dataset("input_file.slf", engine='selafin')
ds = xr.open_dataset("tests/data/r3d_tidal_flats.slf", engine="selafin")
ds = xr.open_dataset("tests/data/r3d_tidal_flats.slf", lang="fr", engine="selafin") # if variables are in French
```

```
<xarray.Dataset>
Dimensions: (time: 17, node: 648, plan: 21)
Coordinates:
x (node) float32 ...
y (node) float32 ...
* time (time) datetime64[ns] 1900-01-01 ... 1900-01-02T20:26:40
Dimensions without coordinates: node, plan
Data variables:
Z (time, node, plan) <class 'numpy.float64'> ...
U (time, node, plan) <class 'numpy.float64'> ...
V (time, node, plan) <class 'numpy.float64'> ...
W (time, node, plan) <class 'numpy.float64'> ...
MUD (time, node, plan) <class 'numpy.float64'> ...
Attributes:
title: Sloped flume Rouse profile test
language: en
float_size: 4
endian: >
params: (1, 0, 0, 0, 0, 0, 21, 5544, 0, 1)
ipobo: [ 1 264 263 ... 5411 5412 5413]
ikle2: [[155 153 156]\n [310 307 305]\n [308 310 305]\n ...\n [537 ...
ikle3: [[ 155 153 156 803 801 804]\n [ 310 307 305 ...
variables: {'Z': ('ELEVATION Z', 'M'), 'U': ('VELOCITY U', 'M/S'), 'V':...
date_start: (1900, 1, 1, 0, 0, 0)
```

## Indexing

```python
ds_last = ds.isel(time=-1) # last frame
```
## Write selafin

## Manipulate variables

```python
ds.selafin.write('output_file.slf')
ds = ds.assign(UTIMES100=lambda x: x.U * 100) # Add a new variable
# ds.attrs["variables"]["UTIMES100"] = ("UTIMES100", "My/Unit") # To provide variable name and unit (optional)
ds.drop_vars(["W"]) # Remove variable `VELOCITY W`
```

## Write Selafin

```python
ds.selafin.write("output_file.slf")
```

## DataSet content

### Dimensions
* time
* node
* plan (only in 3D)

### Coordinates

| Coordinate | Description |
|------------|------------------------|
| x | East mesh coordinates |
| y | North mesh coordinates |
| time | Datetime serie |

### Attributes

All attributes are optional except `ikle2`:

| Attribute | Description | Default value |
|------------|-------------------------------------------------------------------------|--------------------------|
| title | Serafin title | "" (empty string) |
| language | Language for variable detection | "en" |
| float_size | Float size | 4 (single precision) |
| endian | File endianness | ">" |
| params | Table of integer parameters | (can be rebuilt) |
| ikle2 | Connectivity table in 2D (1-indexed) | - |
| ikle3 | Connectivity table in 3D (1-indexed, only in 3D, optional) | (can be rebuilt from 2D) |
| variables | Dictionary with variable names and units (key is variable abbreviation) | - |
| date_start | Starting date with integers (year to seconds) | (from first time serie) |
97 changes: 73 additions & 24 deletions tests/io_test.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
import pytest
import xarray as xr
import numpy as np

try:
import dask.array as da

DASK_AVAILABLE = True
except ImportError:
DASK_AVAILABLE = False

TIDAL_FLATS = pytest.mark.parametrize(
"slf_in",
Expand All @@ -18,60 +11,116 @@
)


def write_netcdf(ds, nc_out):
# Remove dict and multi-dimensional arrays not supported in netCDF
del ds.attrs["variables"]
del ds.attrs["ikle2"]
try:
del ds.attrs["ikle3"]
except KeyError:
pass
# Write netCDF file
ds.to_netcdf(nc_out)


@TIDAL_FLATS
def test_open_dataset(slf_in):
ds = xr.open_dataset(slf_in, engine="selafin")
assert isinstance(ds, xr.Dataset)

# Dimensions
assert ds.sizes['time'] == 17
assert ds.sizes['node'] == 648
if "r3d" in slf_in:
assert ds.sizes['plan'] == 21

# Coordinates
assert "x" in ds.coords
assert "y" in ds.coords
assert "time" in ds.coords

# Attributes
assert ds.attrs["endian"] == ">"
assert ds.attrs["date_start"] == (1900, 1, 1, 0, 0, 0)
assert "ipobo" in ds.attrs
assert "ikle2" in ds.attrs
if "r3d_" in slf_in:
assert "ikle3" in ds.attrs
else:
assert "ikle3" not in ds.attrs


@TIDAL_FLATS
def test_to_netcdf(tmp_path, slf_in):
ds_slf = xr.open_dataset(slf_in, engine="selafin")
nc_out = tmp_path / "test.nc"
ds_slf.to_netcdf(nc_out)
write_netcdf(ds_slf, nc_out)
ds_nc = xr.open_dataset(nc_out)
assert ds_nc.equals(ds_slf)


@TIDAL_FLATS
def test_to_selafin(tmp_path, slf_in):
ds_slf = xr.open_dataset(slf_in, engine="selafin")

# Remove some data which is rebuilt
del ds_slf.attrs["date_start"]

slf_out = tmp_path / "test.slf"
ds_slf.selafin.write(slf_out)
ds_slf2 = xr.open_dataset(slf_out, engine="selafin")
assert ds_slf2.equals(ds_slf)

# Compare binary files
with open(slf_in, 'rb') as in_slf1, open(slf_out, 'rb') as in_slf2:
assert in_slf1.read() == in_slf2.read()


@TIDAL_FLATS
def test_to_selafin_eager_mode(tmp_path, slf_in):
ds_slf = xr.open_dataset(slf_in, lazy_loading=False, engine="selafin")

# Remove some data which is rebuilt
del ds_slf.attrs["date_start"]

slf_out = tmp_path / "test.slf"
ds_slf.selafin.write(slf_out)
ds_slf2 = xr.open_dataset(slf_out, engine="selafin")
assert ds_slf2.equals(ds_slf)

# Compare binary files
with open(slf_in, 'rb') as in_slf1, open(slf_out, 'rb') as in_slf2:
assert in_slf1.read() == in_slf2.read()


@TIDAL_FLATS
def test_slice(tmp_path, slf_in):
# simple selection
ds_slf = xr.open_dataset(slf_in, engine="selafin")
nc_out = tmp_path / "test1.nc"
ds_slice = ds_slf.isel(time=0)
ds_slice.to_netcdf(nc_out)
write_netcdf(ds_slice, nc_out)
ds_nc = xr.open_dataset(nc_out)
assert ds_nc.equals(ds_slice)
# simple range
ds_slf = xr.open_dataset(slf_in, engine="selafin")
nc_out = tmp_path / "test2.nc"
ds_slice = ds_slf.isel(time=slice(0, 10))
ds_slice.to_netcdf(nc_out)
ds_nc = xr.open_dataset(nc_out)
assert ds_nc.equals(ds_slice)
# multiple slices
ds_slf = xr.open_dataset(slf_in, engine="selafin")
nc_out = tmp_path / "test3.nc"
ds_slice = ds_slf.isel(time=slice(0, 10), plan=0)
ds_slice.to_netcdf(nc_out)
write_netcdf(ds_slice, nc_out)
ds_nc = xr.open_dataset(nc_out)
assert ds_nc.equals(ds_slice)
# # multiple range slices # FIXME: not working yet
# ds_slf = xr.open_dataset(slf_in, engine="selafin")
# nc_out = tmp_path / "test3.nc"
# ds_slice = ds_slf.isel(time=slice(0, 10), plan=slice(5, 10))
# ds_slice.to_netcdf(nc_out)
# ds_nc = xr.open_dataset(nc_out)
# assert ds_nc.equals(ds_slice)
if "r3d" in slf_in:
# multiple slices
ds_slf = xr.open_dataset(slf_in, engine="selafin")
nc_out = tmp_path / "test3.nc"
ds_slice = ds_slf.isel(time=slice(0, 10), plan=0)
write_netcdf(ds_slice, nc_out)
ds_nc = xr.open_dataset(nc_out)
assert ds_nc.equals(ds_slice)
# multiple range slices
ds_slf = xr.open_dataset(slf_in, engine="selafin")
nc_out = tmp_path / "test4.nc"
ds_slice = ds_slf.isel(time=slice(0, 10), plan=slice(5, 10))
write_netcdf(ds_slice, nc_out)
ds_nc = xr.open_dataset(nc_out)
assert ds_nc.equals(ds_slice)
36 changes: 34 additions & 2 deletions xarray_selafin/Serafin.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""!
Adapted from https://github.com/CNR-Engineering/PyTelTools
Read/Write Serafin files and manipulate associated data.
Handles Serafin file with:
Expand All @@ -22,6 +24,10 @@

logger = logging.getLogger(__name__)


# Default language for variables
LANG = "en"

# Encoding Information Type (EIT) for Serafin title, variable names and units
SLF_EIT = "iso-8859-1"

Expand Down Expand Up @@ -127,7 +133,7 @@ class SerafinHeader:
[shape = nb_nodes]
"""

def __init__(self, title="", format_type="SERAFIN ", lang="en", endian=">"):
def __init__(self, title="", format_type="SERAFIN ", lang=LANG, endian=">"):
"""
@param title <str>: title of the simulation
@param format_type <str>:
Expand Down Expand Up @@ -218,6 +224,32 @@ def _set_file_format_and_precision(self, file_format):
def _set_has_knolg(self):
self.has_knolg = self.params[7] != 0 or self.params[8] != 0

def compute_ikle(self, nb_planes, nb_nodes_per_elem):
ikle_bottom_pattern = np.concatenate(
(self.ikle_2d, self.ikle_2d + self.nb_nodes_2d), axis=1
)
ikle = np.empty(
(nb_planes - 1, self.nb_elements // (nb_planes - 1), nb_nodes_per_elem),
dtype=np.int64,
)
for i in range(nb_planes - 1):
ikle[i] = ikle_bottom_pattern + i * self.nb_nodes_2d
return ikle.flatten()

def build_params(self):
self.params = (
1,
0,
self.mesh_origin[0],
self.mesh_origin[0],
0,
0,
self.nb_planes,
self.nb_elements,
0,
1
)

def _build_ikle_2d(self):
if self.is_2d:
self.ikle_2d = self.ikle.reshape(self.nb_elements, self.nb_nodes_per_elem)
Expand Down Expand Up @@ -549,7 +581,7 @@ def add_variable_from_ID(self, var_ID):

def set_mesh_origin(self, x, y):
"""!
@brief: Set mesh origin coordinates (/!\ coordinates should be integers!)
@brief: Set mesh origin coordinates (Beware: coordinates should be integers!)
@param x <int>: X-coordinate of the origin
@param y <int>: Y-coordinate of the origin
"""
Expand Down
Loading

0 comments on commit d8b2690

Please sign in to comment.