Skip to content

Commit

Permalink
Merge pull request #149 from NOAA-OWP/public_dataformat
Browse files Browse the repository at this point in the history
Public rasterize and vectorize method
  • Loading branch information
fernando-aristizabal authored Aug 3, 2023
2 parents c0ed2a1 + 63154c6 commit 5a5b469
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 13 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ authors = [
requires-python = ">=3.8"
keywords = ["geospatial", "evaluations"]
license = {text = "MIT"}
version = "0.1.1"
version = "0.1.2"
dynamic = ["readme", "dependencies"]


Expand Down
37 changes: 37 additions & 0 deletions src/gval/accessors/gval_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@

import pandas as pd
from pandera.typing import DataFrame
import xarray as xr

from gval.comparison.compute_categorical_metrics import _compute_categorical_metrics
from gval.utils.schemas import Metrics_df
from gval.homogenize.rasterize import _rasterize_data


@pd.api.extensions.register_dataframe_accessor("gval")
Expand Down Expand Up @@ -88,3 +90,38 @@ def compute_categorical_metrics(
average=average,
weights=weights,
)

def rasterize_data(
self, reference_map: Union[xr.Dataset, xr.DataArray], rasterize_attributes: list
) -> Union[xr.Dataset, xr.DataArray]:
"""
Convenience function for rasterizing vector data using a reference raster. For more control use `make_geocube`
from the geocube package.
Parameters
----------
reference_map: Union[xr.Dataset, xr.DataArray]
Map to reference in creation of rasterized vector map
rasterize_attributes: list
Attributes to rasterize
Returns
-------
Union[xr.Dataset, xr.DataArray]
Rasterized Data
Raises
------
KeyError
References
----------
.. [1] [geocube `make_geocube`](https://corteva.github.io/geocube/html/geocube.html)
"""

return _rasterize_data(
candidate_map=reference_map,
benchmark_map=self._obj,
rasterize_attributes=rasterize_attributes,
)
14 changes: 13 additions & 1 deletion src/gval/accessors/gval_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ def compute_agreement_map(
)

if self.agreement_map_format == "vector":
agreement_map = _vectorize_data(agreement_map)
agreement_map = agreement_map.gval.vectorize_data()

return agreement_map

Expand Down Expand Up @@ -677,3 +677,15 @@ def cont_plot(
basemap=basemap,
colorbar_label=colorbar_label,
)

def vectorize_data(self) -> gpd.GeoDataFrame:
"""
Vectorize an xarray DataArray or Dataset
Returns
-------
gpd.GeoDataFrame
Vectorized data
"""

return _vectorize_data(self._obj)
2 changes: 1 addition & 1 deletion src/gval/comparison/compute_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def wrapper(*args, **kwargs):
unique_benchmark_values=kwargs["allow_benchmark_values"],
)

else:
if "comparison_function" not in kwargs:
kwargs["comparison_function"] = getattr(self, "szudzik")

# Call the decorated function
Expand Down
2 changes: 1 addition & 1 deletion src/gval/statistics/categorical_stat_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def prevalence(tp: Number, tn: Number, fp: Number, fn: Number) -> float:
----------
.. [1] [Prevalence](https://en.wikipedia.org/wiki/Prevalence)
"""
return (tp + fp) / (tp + fp + tn + fn)
return (tp + fn) / (tp + fp + tn + fn)


def accuracy(tp: Number, tn: Number, fp: Number, fn: Number) -> float:
Expand Down
10 changes: 9 additions & 1 deletion tests/cases_accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,14 @@ def case_continuous_plot_fail(candidate_map):
return candidate_map


@parametrize(
"vector_map, reference_map, attributes",
list(zip(benchmark_maps[2:3], candidate_maps[0:1], [["category"]])),
)
def case_dataframe_accessor_rasterize(vector_map, reference_map, attributes):
return vector_map, reference_map, attributes


candidate_maps = ["candidate_continuous_0.tif", "candidate_continuous_1.tif"]
benchmark_maps = ["benchmark_continuous_0.tif", "benchmark_continuous_1.tif"]

Expand Down Expand Up @@ -424,4 +432,4 @@ def case_data_set_accessor_continuous(candidate_map, benchmark_map):
list(zip(candidate_maps, benchmark_maps, agreement_maps)),
)
def case_accessor_attributes(candidate_map, benchmark_map, agreement_map):
return (_load_xarray(candidate_map), _load_xarray(benchmark_map), agreement_map)
return _load_xarray(candidate_map), _load_xarray(benchmark_map), agreement_map
16 changes: 8 additions & 8 deletions tests/cases_compute_categorical_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
"overall_bias": {0: 0.9535906369448297},
"positive_likelihood_ratio": {0: 1.3660848657244327},
"positive_predictive_value": {0: 0.7891880387357667},
"prevalence": {0: 0.6986443954301008},
"prevalence": {0: 0.7326460310772968},
"prevalence_threshold": {0: 0.46108525048654536},
"true_negative_rate": {0: 0.44911012235817577},
"true_positive_rate": {0: 0.7525623245272807},
Expand Down Expand Up @@ -85,7 +85,7 @@
0: 0.31210513014910285,
1: 0.6623376623376623,
},
"prevalence": {0: 0.42776066158586024, 1: 0.4514317452480869},
"prevalence": {0: 0.1928544403005243, 1: 0.5266600839298938},
"prevalence_threshold": {0: 0.4205207112769604, 1: 0.42959744253992793},
"true_negative_rate": {0: 0.635438291033282, 1: 0.6779661016949152},
"true_positive_rate": {0: 0.6922645739910314, 1: 0.5677290836653387},
Expand Down Expand Up @@ -476,12 +476,12 @@ def case_compute_categorical_metrics_fail(
"tn": [28, 28, 20, 20, 12, 12],
"tp": [1, 1, 5, 5, 9, 9],
"prevalence": [
0.133333,
0.133333,
0.333333,
0.333333,
0.533333,
0.533333,
0.26666666666666666,
0.26666666666666666,
0.3333333333333333,
0.3333333333333333,
0.4,
0.4,
],
"true_negative_rate": [
0.848485,
Expand Down
13 changes: 13 additions & 0 deletions tests/test_accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,3 +317,16 @@ def test_accessor_attributes(candidate_map, benchmark_map, agreement_map):

assert isinstance(agreement_map, xr.DataArray)
assert isinstance(attrs_df, DataFrame)


@parametrize_with_cases(
"vector_map, reference_map, attributes",
glob="dataframe_accessor_rasterize",
)
def test_dataframe_accessor_rasterize(vector_map, reference_map, attributes):
raster_map = vector_map.gval.rasterize_data(
reference_map=reference_map, rasterize_attributes=attributes
)

assert isinstance(raster_map, type(reference_map))
assert raster_map.shape == reference_map.shape

0 comments on commit 5a5b469

Please sign in to comment.