Skip to content

Commit

Permalink
Removed special handling for rh_cor values in value_clipping.py
Browse files Browse the repository at this point in the history
The logic for handling `rh` values has been simplified to align with the general clipping rules. This change removes the exception cases previously applied to `rh`, ensuring consistent value clipping across all variables.
  • Loading branch information
ladsmund committed Oct 14, 2024
1 parent dbc495a commit a698e68
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 35 deletions.
18 changes: 4 additions & 14 deletions src/pypromice/process/value_clipping.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,10 @@ def clip_values(
if var not in list(ds.variables):
continue

# This is a special case for rh_u_cor and rh_l_cor where values are clipped to 0 and 100.
if var in ["rh_u_cor", "rh_l_cor"]:
# Nan inputs should stay nan
was_nan = ds[var].isnull()
if ~np.isnan(row.lo):
ds[var] = ds[var].where(ds[var] >= row.lo, other=0)
if ~np.isnan(row.hi):
ds[var] = ds[var].where( ds[var] <= row.hi, other=100)
ds[var] = ds[var].where(~was_nan)
else:
if ~np.isnan(row.lo):
ds[var] = ds[var].where(ds[var] >= row.lo)
if ~np.isnan(row.hi):
ds[var] = ds[var].where(ds[var] <= row.hi)
if ~np.isnan(row.lo):
ds[var] = ds[var].where(ds[var] >= row.lo)
if ~np.isnan(row.hi):
ds[var] = ds[var].where(ds[var] <= row.hi)

# Flag dependents as NaN if parent is NaN
for o in row.dependents_closure:
Expand Down
53 changes: 32 additions & 21 deletions tests/unit/test_value_clippping.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,37 +198,48 @@ def test_circular_dependencies(self):
check_dtype=True,
)

def test_rh_corrected_case(self):
"""
The rh corrected variables are treated differently in the clipping function.
"""
data_index = pd.RangeIndex(2)
rh_u = pd.Series(index=data_index, data=[0, 54], name="rh_u")
rh_u_cor = pd.Series(index=data_index, data=[0, np.nan], name="rh_u_cor")
rh_l = pd.Series(index=data_index, data=[-20, 54], name="rh_l")
rh_l_cor = pd.Series(index=data_index, data=[0, 254], name="rh_l_cor")
data = pd.concat([rh_u, rh_u_cor, rh_l, rh_l_cor], axis=1)
def test_rh_corrected(self):
variable_config = pd.DataFrame(
columns=["field", "lo", "hi", "OOL"],
data=[
["rh_u", 0, 100, "rh_u_cor"],
["rh_u", 0, 150, "rh_u_cor"],
["rh_u_cor", 0, 150, ""],
["rh_l_cor", np.nan, np.nan, ""],
["rh_l", 0, 100, "rh_l_cor"],
],
).set_index("field")
data_set = xr.Dataset(data)

rows_input = []
rows_expected = []
# All values are within the expected range
rows_input.append(dict(rh_u=42, rh_u_cor=43))
rows_expected.append(dict(rh_u=42, rh_u_cor=43))
# rh_u is below range, but rh_u_cor is within range. Both should be flagged due to the OOL relationship
rows_input.append(dict(rh_u=-10, rh_u_cor=3))
rows_expected.append(dict(rh_u=np.nan, rh_u_cor=np.nan))
# rh_u is within range, but rh_u_cor is below range; rh_u_cor should be flagged
rows_input.append(dict(rh_u=54, rh_u_cor=-4))
rows_expected.append(dict(rh_u=54, rh_u_cor=np.nan))
# rh_u is above range, but rh_u_cor is within range. Both should be flagged due to the OOL relationship
rows_input.append(dict(rh_u=160, rh_u_cor=120))
rows_expected.append(dict(rh_u=np.nan, rh_u_cor=np.nan))
# rh_u is within range, but rh_u_cor is above range; rh_u_cor should be flagged
rows_input.append(dict(rh_u=100, rh_u_cor=255))
rows_expected.append(dict(rh_u=100, rh_u_cor=np.nan))

# Prepare the data
df_input = pd.DataFrame(rows_input, dtype=float)
df_expected = pd.DataFrame(rows_expected, dtype=float)
data_set = xr.Dataset(df_input)

# Run the function
data_set_out = clip_values(data_set, variable_config)

# Convert to dataframe for easier comparison
data_frame_out = data_set_out.to_dataframe()
# The value of rh_u_cor should be nan since the input was nan
self.assertTrue(np.isnan(data_frame_out.iloc[1]["rh_u_cor"]))
# The value of rh_l_cor should not be changed since the hi threshold is not nan
self.assertEqual(data_frame_out.iloc[1]["rh_l_cor"], 254)
# The value of rh_l_cor should be nan since rh_l is below its threshold
self.assertTrue(np.isnan(data_frame_out.iloc[0]["rh_l_cor"]))
pd.testing.assert_frame_equal(
data_frame_out,
df_expected,
check_names=False,
check_dtype=True,
)

def test_nan_input(self):
"""
Expand Down

0 comments on commit a698e68

Please sign in to comment.