Skip to content

Commit

Permalink
fix: streamline 1D results/parquet exports
Browse files Browse the repository at this point in the history
  • Loading branch information
tomsail committed Sep 14, 2024
1 parent 62c2d63 commit 1c4afc6
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 59 deletions.
72 changes: 35 additions & 37 deletions pyposeidon/telemac.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from pyposeidon.paths import DATA_PATH
from pyposeidon.utils.get_value import get_value
from pyposeidon.utils.converter import myconverter
from pyposeidon.utils.cpoint import closest_n_points
from pyposeidon.utils.cpoint import find_nearest_nodes
from pyposeidon.utils import data
from pyposeidon.utils.norm import normalize_column_names
from pyposeidon.utils.norm import normalize_varnames
Expand Down Expand Up @@ -194,14 +194,18 @@ def write_netcdf(ds, outpath):


def extract_t_elev_2D(
ds: xr.Dataset, x: float, y: float, var: str = "elev", xstr: str = "longitude", ystr: str = "latitude"
ds: xr.Dataset, x: float, y: float, var: str = "elev", xstr: str = "longitude", ystr: str = "latitude", max_dist: float = 1000,
):
lons, lats = ds[xstr].values, ds[ystr].values
indx, _ = closest_n_points(np.array([x, y]).T, 1, np.array([lons, lats]).T)
ds_ = ds.isel(node=indx[0])
elev_ = ds_[var].values
mesh = pd.DataFrame(np.vstack([x, y]).T, columns = ["lon", "lat"])
points = pd.DataFrame(np.vstack([lons, lats]).T, columns = ["lon", "lat"])
df = find_nearest_nodes(mesh, points, 1)
df = df[df.distance < max_dist]
indx = df["mesh_index"]
ds_ = ds.isel(node=indx.values[0])
out_ = ds_[var].values
t_ = [pd.Timestamp(ti) for ti in ds_.time.values]
return pd.Series(elev_, index=t_), float(ds_[xstr]), float(ds_[ystr])
return pd.Series(out_, index=t_), float(ds_[xstr]), float(ds_[ystr])


# Function to subset ERA data based on the mesh extent
Expand Down Expand Up @@ -1374,7 +1378,6 @@ def results(self, **kwargs):

if self.monitor:
logger.info("export observations file\n")
# idx = stations["gindex"] # not need for now

res_1D = os.path.join(path, "results_1D.slf")
xc = self.slf_to_xarray(res_1D, res_type="1D")
Expand Down Expand Up @@ -1426,50 +1429,45 @@ def set_obs(self, **kwargs):
logger.warning("no mesh available skipping \n")
return

gpoints = np.array(
list(
zip(
mesh = pd.DataFrame(
np.array(
[
self.mesh.Dataset.SCHISM_hgrid_node_x.values,
self.mesh.Dataset.SCHISM_hgrid_node_y.values,
)
)
)

coords = np.array([tgn.longitude.values, tgn.latitude.values]).T
cp, mask = closest_n_points(coords, 1, gpoints, max_dist)
mesh_index = cp
stations = gpoints[cp]

if len(stations) == 0:
self.mesh.Dataset.SCHISM_hgrid_node_y.values
]
).T,
columns = ["lon", "lat"])
points = pd.DataFrame(
np.array([tgn.longitude.values, tgn.latitude.values]).T,
columns = ["lon", "lat"])
df = find_nearest_nodes(mesh, points, 1)
df = df[df.distance < max_dist]

if len(df) == 0:
logger.warning("no observations available\n")

stations = pd.DataFrame(stations, columns=["lon", "lat"])
stations["z"] = 0
stations.index += 1
stations["gindex"] = mesh_index
stations["unique_id"] = stations.index
stations[id_str] = tgn[id_str].values[mask]
stations["longitude"] = stations.lon.values[mask]
stations["latitude"] = stations.lat.values[mask]
df["z"] = 0
df.index += 1
df["unique_id"] = df.index
df[id_str] = tgn[id_str].values

# convert to MERCATOR coordinates
# dirty fix (this needs to be fixed in TELEMAC directly)
x, y = longlat2spherical(stations["longitude"], stations["latitude"],0,0)
stations["x"] = x
stations["y"] = y

self.stations_mesh_id = stations
x, y = longlat2spherical(df["lon"], df["lat"],0,0)
df["x"] = x
df["y"] = y

logger.info("write out stations.csv file \n")
tgn[mask].to_csv(os.path.join(path, "stations.csv"), index=False)
tgn.to_csv(os.path.join(path, "stations.csv"), index=False)

logger.info("write out stations.in file \n")
# output to file
with open(os.path.join(path, "station.in"), "w") as f:
f.write(f"1 {stations.shape[0]}\n") # 1st line: number of periods and number of points
f.write(f"1 {df.shape[0]}\n") # 1st line: number of periods and number of points
f.write(
f"{0 + offset} {int(self.params['duration']) + offset} {self.params['tstep']}\n"
) # 2nd line: period 1: start time, end time and interval (in seconds)
stations.loc[:, ["x", "y", "unique_id", id_str]].to_csv(
df.loc[:, ["x", "y", "unique_id", id_str]].to_csv(
f, header=None, sep=" ", index=False
) # 3rd-10th line: output points; x coordinate, y coordinate, station number, and station name

Expand Down
40 changes: 23 additions & 17 deletions pyposeidon/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from pyposeidon.mesh import r2d
import pyposeidon.model as pm
from pyposeidon.tools import flat_list
from pyposeidon.utils.cpoint import closest_n_points
from pyposeidon.telemac import extract_t_elev_2D
import xarray as xr
import glob
import logging
Expand All @@ -39,17 +39,6 @@ def get_output(solver_name: str, **kwargs):
return instance


def extract_t_elev_2D(
ds: xr.Dataset, x: float, y: float, var: str = "elev", xstr: str = "longitude", ystr: str = "latitude"
):
lons, lats = ds[xstr].values, ds[ystr].values
indx, _ = closest_n_points(np.array([x, y]).T, 1, np.array([lons, lats]).T)
ds_ = ds.isel(node=indx[0])
elev_ = ds_[var].values
t_ = [pd.Timestamp(ti) for ti in ds_.time.values]
return pd.Series(elev_, index=t_), float(ds_[xstr]), float(ds_[ystr])


class D3DResults:
def __init__(self, **kwargs):
rpath = kwargs.get("rpath", "./d3d/")
Expand Down Expand Up @@ -231,6 +220,7 @@ def __init__(self, **kwargs):
convert = kwargs.get("convert_results", True)
extract_TS = kwargs.get("extract_TS", False)
station_id_str = kwargs.get("id_str", "ioc_code")
max_dist = kwargs.get("max_dist", 1000)

if res_type not in ["1D", "2D"]:
raise ValueError("results_type needs to be '1D' or '2D'!")
Expand All @@ -248,14 +238,14 @@ def __init__(self, **kwargs):

datai = []

tag = kwargs.get("tag", "telemac2d")
module = kwargs.get("module", "telemac2d")

misc = kwargs.get("misc", {})

for folder in self.folders:
logger.info(" Combining output for folder {}\n".format(folder))

with open(folder + "/" + tag + "_model.json", "r") as f:
with open(folder + "/" + module + "_model.json", "r") as f:
data = json.load(f)
data = pd.json_normalize(data, max_level=0)
info = data.to_dict(orient="records")[0]
Expand All @@ -266,7 +256,15 @@ def __init__(self, **kwargs):
# read from output file
if convert:
xdat = glob.glob(folder + "/outputs/" + out_default)
var, model_xstr, model_ystr = "elev", "longitude", "latitude"
model_xstr, model_ystr = "longitude", "latitude"
if module == "telemac2d":
var = "elev"
elif module == "telemac3d":
var = "elev"
elif module == "tomawac":
var = "hm0"
else:
raise ValueError(f"module {module} not supported!")

if len(xdat) > 0:
datai.append(xdat) # append to list
Expand All @@ -284,7 +282,15 @@ def __init__(self, **kwargs):
else: # read from selafin file
xdat = glob.glob(folder + f"/results_{res_type}.slf")
datai.append(xdat) # append to list
var, model_xstr, model_ystr = "S", "x", "y"
model_xstr, model_ystr = "x", "y"
if module == "telemac2d":
var = "S"
elif module == "telemac3d":
var = "Z"
elif module == "tomawac":
var = "WH"
else:
raise ValueError(f"module {module} not supported!")

if not any(datai):
logger.warning("no output files found")
Expand Down Expand Up @@ -319,6 +325,6 @@ def __init__(self, **kwargs):
for i_s, id_ in enumerate(stations[station_id_str]):
s = stations[stations[station_id_str] == id_]
mod, mlon, mlat = extract_t_elev_2D(
self.Dataset, s.longitude.values[0], s.latitude.values[0], var, model_xstr, model_ystr
self.Dataset, s.longitude.values[0], s.latitude.values[0], var, model_xstr, model_ystr, max_dist=max_dist
)
mod.to_frame().to_parquet(f"{rpath}{id_}.parquet")
3 changes: 2 additions & 1 deletion pyposeidon/utils/norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@
"U": "u",
"V": "v",
"S": "elev",
"HM0": "hm0",
"Z": "elev",
"WH": "hm0",
"DIRM": "dir",
"TM01": "tm01",
"TM02": "tm02",
Expand Down
14 changes: 10 additions & 4 deletions tests/test_telemac.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pathlib

import pyposeidon
from pyposeidon.utils import data
import pytest

from . import DATA_DIR
Expand Down Expand Up @@ -105,21 +106,26 @@ def test_telemac(tmpdir, case):
b.create()
b.mesh.Dataset.type[:] = "closed"
b.output()
b.set_obs()
b.save()
b.run()
b.results()
case["result_type"] = "2D"
case["convert_results"] = True
case["max_dist"] = 5000
res = data.get_output(**case)
zarr_tar = b.rpath + "/outputs/out_2D.zarr.tar"
assert os.path.exists(zarr_tar)
a = pyposeidon.model.read(rpath + b.module + "_model.json") # read model
a.create()
a.mesh.Dataset.type[:] = "closed"
a.output()
a.set_obs()
a.save()
a.run()
a.results()
zarr_tar = a.rpath + "/outputs/out_2D.zarr.tar"
case["result_type"] = "2D"
case["extract_TS"] = True
res = data.get_output(**case)
res = a.rpath + "/results_2D.slf"
assert os.path.exists(zarr_tar)
ds = xr.open_dataset(res, engine="selafin")
if "WH" in ds.variables:
max_WH = ds.WH.max().values
Expand Down

0 comments on commit 1c4afc6

Please sign in to comment.