diff --git a/README.MD b/README.MD index 37b84f51..f596015b 100644 --- a/README.MD +++ b/README.MD @@ -19,10 +19,10 @@ See the full documentation [here](noaa-owp.github.io/gval/). WARNING: -- Our current public API and output formats are likely to change in - the future. -- Software is provided "AS-IS" without any guarantees. Please QA/QC - your metrics carefully until this project matures. +- Our current public API and output formats are likely to change in the + future. +- Software is provided "AS-IS" without any guarantees. Please QA/QC your + metrics carefully until this project matures. # Installation diff --git a/docs/compile_readme_and_arrange_docs.py b/docs/compile_readme_and_arrange_docs.py index 332572ad..12e56da5 100755 --- a/docs/compile_readme_and_arrange_docs.py +++ b/docs/compile_readme_and_arrange_docs.py @@ -101,6 +101,11 @@ def compile_readme(): f"{abs_path}/sphinx/SphinxContinuousTutorial.ipynb", ) + shutil.copy( + f"{abs_path}/../notebooks/Multi-Class Categorical Statistics.ipynb", + f"{abs_path}/sphinx/SphinxMulticatTutorial.ipynb", + ) + shutil.copy( f"{abs_path}/../CONTRIBUTING.MD", f"{abs_path}/sphinx/SPHINX_CONTRIBUTING.MD", diff --git a/docs/sphinx/PYPI_README.MD b/docs/sphinx/PYPI_README.MD index ec458e5c..076c4a36 100644 --- a/docs/sphinx/PYPI_README.MD +++ b/docs/sphinx/PYPI_README.MD @@ -19,10 +19,10 @@ See the full documentation [here](noaa-owp.github.io/gval/). WARNING: -- Our current public API and output formats are likely to change in - the future. -- Software is provided "AS-IS" without any guarantees. Please QA/QC - your metrics carefully until this project matures. +- Our current public API and output formats are likely to change in the + future. +- Software is provided "AS-IS" without any guarantees. Please QA/QC your + metrics carefully until this project matures. # Installation diff --git a/docs/sphinx/SPHINX_README.MD b/docs/sphinx/SPHINX_README.MD index 999fbb8e..77d48eb8 100644 --- a/docs/sphinx/SPHINX_README.MD +++ b/docs/sphinx/SPHINX_README.MD @@ -17,10 +17,10 @@ continuous, and probabilistic. WARNING: -- Our current public API and output formats are likely to change in - the future. -- Software is provided "AS-IS" without any guarantees. Please QA/QC - your metrics carefully until this project matures. +- Our current public API and output formats are likely to change in the + future. +- Software is provided "AS-IS" without any guarantees. Please QA/QC your + metrics carefully until this project matures. # Installation diff --git a/docs/sphinx/SphinxContinuousTutorial.ipynb b/docs/sphinx/SphinxContinuousTutorial.ipynb index 6d6873a6..6a31f722 100644 --- a/docs/sphinx/SphinxContinuousTutorial.ipynb +++ b/docs/sphinx/SphinxContinuousTutorial.ipynb @@ -55,8 +55,12 @@ "metadata": {}, "outputs": [], "source": [ - "candidate = rxr.open_rasterio('./livneh_2011_precip.tif', mask_and_scale=True) # VIC\n", - "benchmark = rxr.open_rasterio('./prism_2011_precip.tif', mask_and_scale=True) # PRISM" + "candidate = rxr.open_rasterio(\n", + " './livneh_2011_precip.tif', mask_and_scale=True\n", + ") # VIC\n", + "benchmark = rxr.open_rasterio(\n", + " './prism_2011_precip.tif', mask_and_scale=True\n", + ") # PRISM" ] }, { @@ -110,7 +114,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 4, @@ -149,7 +153,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 5, @@ -168,10 +172,12 @@ } ], "source": [ - "agreement.data = xr.where((agreement < np.nanquantile(agreement.values, \n", - " 0.0001)) | \n", - " (agreement > np.nanquantile(agreement.values, 0.9999)), \n", - " np.nan, agreement)\n", + "agreement.data = xr.where(\n", + " (agreement < np.nanquantile(agreement.values, 0.0001)) | \n", + " (agreement > np.nanquantile(agreement.values, 0.9999)), \n", + " np.nan, \n", + " agreement\n", + ")\n", "agreement.gval.cont_plot(title=\"Agreement Map\", figsize=(6, 3))" ] }, @@ -218,36 +224,60 @@ " \n", " \n", " \n", - " band\n", - " coefficient_of_determination\n", - " mean_absolute_error\n", - " mean_absolute_percentage_error\n", - " mean_normalized_mean_absolute_error\n", - " mean_normalized_root_mean_squared_error\n", - " mean_percentage_error\n", - " mean_signed_error\n", - " mean_squared_error\n", - " range_normalized_mean_absolute_error\n", - " range_normalized_root_mean_squared_error\n", - " root_mean_squared_error\n", - " symmetric_mean_absolute_percentage_error\n", + " 0\n", " \n", " \n", " \n", " \n", - " 0\n", + " band\n", " 1\n", + " \n", + " \n", + " coefficient_of_determination\n", " 0.685261\n", + " \n", + " \n", + " mean_absolute_error\n", " 216.089706\n", + " \n", + " \n", + " mean_absolute_percentage_error\n", " 0.319234\n", + " \n", + " \n", + " mean_normalized_mean_absolute_error\n", " 0.267845\n", + " \n", + " \n", + " mean_normalized_root_mean_squared_error\n", " 0.372578\n", + " \n", + " \n", + " mean_percentage_error\n", " 0.010022\n", + " \n", + " \n", + " mean_signed_error\n", " 8.085411\n", + " \n", + " \n", + " mean_squared_error\n", " 90351.664062\n", + " \n", + " \n", + " range_normalized_mean_absolute_error\n", " 0.033065\n", + " \n", + " \n", + " range_normalized_root_mean_squared_error\n", " 0.045995\n", + " \n", + " \n", + " root_mean_squared_error\n", " 300.585541\n", + " \n", + " \n", + " symmetric_mean_absolute_percentage_error\n", " 0.269394\n", " \n", " \n", @@ -255,26 +285,20 @@ "" ], "text/plain": [ - " band coefficient_of_determination mean_absolute_error \\\n", - "0 1 0.685261 216.089706 \n", - "\n", - " mean_absolute_percentage_error mean_normalized_mean_absolute_error \\\n", - "0 0.319234 0.267845 \n", - "\n", - " mean_normalized_root_mean_squared_error mean_percentage_error \\\n", - "0 0.372578 0.010022 \n", - "\n", - " mean_signed_error mean_squared_error \\\n", - "0 8.085411 90351.664062 \n", - "\n", - " range_normalized_mean_absolute_error \\\n", - "0 0.033065 \n", - "\n", - " range_normalized_root_mean_squared_error root_mean_squared_error \\\n", - "0 0.045995 300.585541 \n", - "\n", - " symmetric_mean_absolute_percentage_error \n", - "0 0.269394 " + " 0\n", + "band 1\n", + "coefficient_of_determination 0.685261\n", + "mean_absolute_error 216.089706\n", + "mean_absolute_percentage_error 0.319234\n", + "mean_normalized_mean_absolute_error 0.267845\n", + "mean_normalized_root_mean_squared_error 0.372578\n", + "mean_percentage_error 0.010022\n", + "mean_signed_error 8.085411\n", + "mean_squared_error 90351.664062\n", + "range_normalized_mean_absolute_error 0.033065\n", + "range_normalized_root_mean_squared_error 0.045995\n", + "root_mean_squared_error 300.585541\n", + "symmetric_mean_absolute_percentage_error 0.269394" ] }, "execution_count": 6, @@ -283,7 +307,7 @@ } ], "source": [ - "metric_table" + "metric_table.transpose()" ] }, { @@ -325,8 +349,10 @@ "metadata": {}, "outputs": [], "source": [ - "candidate, benchmark = candidate.gval.homogenize(benchmark_map=benchmark,\n", - " target_map = \"candidate\")" + "candidate, benchmark = candidate.gval.homogenize(\n", + " benchmark_map=benchmark,\n", + " target_map = \"candidate\"\n", + ")" ] }, { @@ -362,7 +388,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -381,8 +407,10 @@ } ], "source": [ - "agreement_map = candidate.gval.compute_agreement_map(benchmark_map=benchmark, \n", - " comparison_function=\"difference\")\n", + "agreement_map = candidate.gval.compute_agreement_map(\n", + " benchmark_map=benchmark, \n", + " comparison_function=\"difference\"\n", + ")\n", "\n", "agreement_map.gval.cont_plot(title=\"Agreement Map\", figsize=(6, 3))" ] @@ -404,7 +432,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 9, @@ -430,8 +458,10 @@ "def multiply(c: Number, b: Number) -> Number:\n", " return c / b\n", "\n", - "agreement_map = candidate.gval.compute_agreement_map(benchmark_map=benchmark, \n", - " comparison_function=\"divide\")\n", + "agreement_map = candidate.gval.compute_agreement_map(\n", + " benchmark_map=benchmark, \n", + " comparison_function=\"divide\"\n", + ")\n", "\n", "agreement_map.gval.cont_plot(title=\"Agreement Map\", figsize=(6, 3))" ] @@ -506,9 +536,10 @@ } ], "source": [ - "_, metric_table = candidate.gval.continuous_compare(benchmark,\n", - " metrics=['mean_absolute_error', \n", - " 'mean_squared_error'])\n", + "_, metric_table = candidate.gval.continuous_compare(\n", + " benchmark,\n", + " metrics=['mean_absolute_error', 'mean_squared_error']\n", + ")\n", "\n", "metric_table" ] @@ -621,10 +652,10 @@ } ], "source": [ - "_, metric_table = candidate.gval.continuous_compare(benchmark,\n", - " metrics=['min_error', \n", - " 'median_error', \n", - " 'max_error'])\n", + "_, metric_table = candidate.gval.continuous_compare(\n", + " benchmark,\n", + " metrics=['min_error', 'median_error', 'max_error']\n", + ")\n", "\n", "metric_table" ] diff --git a/docs/sphinx/SphinxMulticatTutorial.ipynb b/docs/sphinx/SphinxMulticatTutorial.ipynb new file mode 100644 index 00000000..1f035af2 --- /dev/null +++ b/docs/sphinx/SphinxMulticatTutorial.ipynb @@ -0,0 +1,1174 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "05d93248", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "id": "4744f004", + "metadata": {}, + "source": [ + "# Multi-Class Categorical Comparisons" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "275a7087", + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [], + "source": [ + "import rioxarray as rxr\n", + "import gval\n", + "import numpy as np\n", + "import pandas as pd\n", + "import xarray as xr\n", + "from itertools import product\n", + "\n", + "pd.set_option('display.max_columns', None)" + ] + }, + { + "cell_type": "markdown", + "id": "34069943", + "metadata": {}, + "source": [ + "## Load Datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "38473c06", + "metadata": {}, + "outputs": [], + "source": [ + "candidate = rxr.open_rasterio(\n", + " \"./candidate_map_multi_categorical.tif\", mask_and_scale=True\n", + ")\n", + "benchmark = rxr.open_rasterio(\n", + " \"./benchmark_map_multi_categorical.tif\", mask_and_scale=True\n", + ")\n", + "depth_raster = rxr.open_rasterio(\n", + " \"./candidate_raw_elevation_multi_categorical.tif\", mask_and_scale=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "fa522035", + "metadata": {}, + "source": [ + "## Homogenize Datasets and Make Agreement Map" + ] + }, + { + "cell_type": "markdown", + "id": "e3e5ca15", + "metadata": {}, + "source": [ + "Although one can call `candidate.gval.categorical_compare` to run the entire workflow, in this case homogenization and creation of an agreement map will be done separately to show more options for multi-class comparisons." + ] + }, + { + "cell_type": "markdown", + "id": "2ac66a26", + "metadata": {}, + "source": [ + "#### Homogenize" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "29375e17", + "metadata": {}, + "outputs": [], + "source": [ + "candidate_r, benchmark_r = candidate.gval.homogenize(benchmark)\n", + "depth_raster_r, arb = depth_raster.gval.homogenize(benchmark_r)\n", + "del arb" + ] + }, + { + "cell_type": "markdown", + "id": "4e9e1be1", + "metadata": {}, + "source": [ + "#### Agreement Map" + ] + }, + { + "cell_type": "markdown", + "id": "e2851c9b", + "metadata": {}, + "source": [ + "The following makes a pairing dictionary which maps combinations of values in the candidate and benchmark maps to unique values in the agreement map. In this case we will encode each value as concatenation of what the values are. Instead of making a pairing dictionary one can use the `szudzik` or `cantor` pairing functions to make unique values for each combination of candidate and benchmark map values. e.g. 12 represents a class 1 for the candidate and a class 2 for the benchmark." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "de894568", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1, 1): 11\n", + "(1, 2): 12\n", + "(1, 3): 13\n", + "(1, 4): 14\n", + "(1, 5): 15\n", + "(2, 1): 21\n" + ] + } + ], + "source": [ + "classes = [1, 2, 3, 4, 5]\n", + "pairing_dictionary = {(x, y): int(f'{x}{y}') for x, y in product(*([classes]*2))}\n", + "\n", + "# Showing the first 6 entries\n", + "print('\\n'.join([f'{k}: {v}' for k,v in pairing_dictionary.items()][:6]))" + ] + }, + { + "cell_type": "markdown", + "id": "44328dcf", + "metadata": {}, + "source": [ + "The benchmark map has an extra class 0 which is very similar to nodata so it will not be included in `allow_benchmark_values` in the following methods." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "1dc16dd7", + "metadata": {}, + "outputs": [], + "source": [ + "agreement_map = candidate_r.gval.compute_agreement_map(\n", + " benchmark_r,\n", + " nodata=255,\n", + " encode_nodata=True,\n", + " comparison_function=\"pairing_dict\",\n", + " pairing_dict=pairing_dictionary,\n", + " allow_candidate_values=classes,\n", + " allow_benchmark_values=classes,\n", + ")\n", + "\n", + "crosstab = candidate_r.gval.compute_crosstab(\n", + " benchmark_r,\n", + " comparison_function=\"pairing_dict\",\n", + " pairing_dict=pairing_dictionary,\n", + " allow_candidate_values=classes,\n", + " allow_benchmark_values=classes,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "93fe86df", + "metadata": {}, + "source": [ + "The following only shows a small subset of the map for memory purposes:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "55606165", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "agreement_map.gval.cat_plot(\n", + " title='Agreement Map', \n", + " figsize=(8, 6),\n", + " colormap='tab20b'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "bdcbfb8e", + "metadata": {}, + "source": [ + "## Comparisons" + ] + }, + { + "cell_type": "markdown", + "id": "4a1f3ecc", + "metadata": {}, + "source": [ + "For multi-categorical statistics GVAL offers 4 methods of averaging:\n", + "\n", + "1. No Averaging which provides one vs. all metrics on a class basis\n", + "1. Micro Averaging which sums up the contingencies of each class defined as either positive or negative\n", + "3. Macro Averaging which sums up the contingencies of one class vs all and then averages them\n", + "4. Weighted Averaging which does macro averaging with the inclusion of weights to be applied to each positive category." + ] + }, + { + "cell_type": "markdown", + "id": "66235a0a", + "metadata": {}, + "source": [ + "### No Averaging" + ] + }, + { + "cell_type": "markdown", + "id": "4f258087", + "metadata": {}, + "source": [ + "Using `None` for the averaging argument runs a one class vs. all methodology for each class and reports their metrics on a class basis:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "936f2dea", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01234
band11111
positive_categories12345
fn6.01043.0318274.0516572.0364147.0
fp172762.0561004.0462496.03775.05.0
tn1043592.0653360.0422623.0693617.0852206.0
tp0.0953.012967.02396.02.0
accuracy0.8579630.5379270.3581090.572210.700622
balanced_accuracy0.4289840.5077410.2583110.4996020.5
critical_success_index0.00.0016930.0163370.0045840.000005
equitable_threat_score-0.0000050.000055-0.175401-0.000455-0.0
f_score0.00.003380.0321480.0091250.000011
false_discovery_rate1.00.9983040.9727280.6117320.714286
false_negative_rate1.00.5225450.9608530.9953830.999995
false_omission_rate0.0000060.0015940.4295790.4268520.299376
false_positive_rate0.1420330.4619740.5225240.0054130.000006
fowlkes_mallows_index0.00.0284550.0326750.0423390.001253
matthews_correlation_coefficient-0.0009040.001257-0.440983-0.005543-0.000072
negative_likelihood_ratio1.1655460.9712262.012361.0008011.0
negative_predictive_value0.9999940.9984060.5704210.5731480.700624
overall_bias28793.666667281.5415831.4353990.0118910.000019
positive_likelihood_ratio0.01.0335110.0749190.8529160.936112
positive_predictive_value0.00.0016960.0272720.3882680.285714
prevalence0.0000050.0016410.2723220.4266570.299376
prevalence_threshold1.00.495880.7851070.5198760.508252
true_negative_rate0.8579670.5380260.4774760.9945870.999994
true_positive_rate0.00.4774550.0391470.0046170.000005
\n", + "
" + ], + "text/plain": [ + " 0 1 2 \\\n", + "band 1 1 1 \n", + "positive_categories 1 2 3 \n", + "fn 6.0 1043.0 318274.0 \n", + "fp 172762.0 561004.0 462496.0 \n", + "tn 1043592.0 653360.0 422623.0 \n", + "tp 0.0 953.0 12967.0 \n", + "accuracy 0.857963 0.537927 0.358109 \n", + "balanced_accuracy 0.428984 0.507741 0.258311 \n", + "critical_success_index 0.0 0.001693 0.016337 \n", + "equitable_threat_score -0.000005 0.000055 -0.175401 \n", + "f_score 0.0 0.00338 0.032148 \n", + "false_discovery_rate 1.0 0.998304 0.972728 \n", + "false_negative_rate 1.0 0.522545 0.960853 \n", + "false_omission_rate 0.000006 0.001594 0.429579 \n", + "false_positive_rate 0.142033 0.461974 0.522524 \n", + "fowlkes_mallows_index 0.0 0.028455 0.032675 \n", + "matthews_correlation_coefficient -0.000904 0.001257 -0.440983 \n", + "negative_likelihood_ratio 1.165546 0.971226 2.01236 \n", + "negative_predictive_value 0.999994 0.998406 0.570421 \n", + "overall_bias 28793.666667 281.541583 1.435399 \n", + "positive_likelihood_ratio 0.0 1.033511 0.074919 \n", + "positive_predictive_value 0.0 0.001696 0.027272 \n", + "prevalence 0.000005 0.001641 0.272322 \n", + "prevalence_threshold 1.0 0.49588 0.785107 \n", + "true_negative_rate 0.857967 0.538026 0.477476 \n", + "true_positive_rate 0.0 0.477455 0.039147 \n", + "\n", + " 3 4 \n", + "band 1 1 \n", + "positive_categories 4 5 \n", + "fn 516572.0 364147.0 \n", + "fp 3775.0 5.0 \n", + "tn 693617.0 852206.0 \n", + "tp 2396.0 2.0 \n", + "accuracy 0.57221 0.700622 \n", + "balanced_accuracy 0.499602 0.5 \n", + "critical_success_index 0.004584 0.000005 \n", + "equitable_threat_score -0.000455 -0.0 \n", + "f_score 0.009125 0.000011 \n", + "false_discovery_rate 0.611732 0.714286 \n", + "false_negative_rate 0.995383 0.999995 \n", + "false_omission_rate 0.426852 0.299376 \n", + "false_positive_rate 0.005413 0.000006 \n", + "fowlkes_mallows_index 0.042339 0.001253 \n", + "matthews_correlation_coefficient -0.005543 -0.000072 \n", + "negative_likelihood_ratio 1.000801 1.0 \n", + "negative_predictive_value 0.573148 0.700624 \n", + "overall_bias 0.011891 0.000019 \n", + "positive_likelihood_ratio 0.852916 0.936112 \n", + "positive_predictive_value 0.388268 0.285714 \n", + "prevalence 0.426657 0.299376 \n", + "prevalence_threshold 0.519876 0.508252 \n", + "true_negative_rate 0.994587 0.999994 \n", + "true_positive_rate 0.004617 0.000005 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "no_averaged_metrics = crosstab.gval.compute_categorical_metrics(\n", + " positive_categories=[1, 2, 3, 4, 5],\n", + " negative_categories=None,\n", + " average=None\n", + ")\n", + "no_averaged_metrics.transpose()" + ] + }, + { + "cell_type": "markdown", + "id": "d722dc68", + "metadata": {}, + "source": [ + "### Micro Averaging" + ] + }, + { + "cell_type": "markdown", + "id": "3bbb83cf", + "metadata": {}, + "source": [ + "The following is an example of a using micro averaging to combine classes to process two-class categorical statistics. In this example we will use classes 1 and 2 as positive classes and classes 3, 4, and 5 as negative classes:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "538dfc49", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
band1
fn382.0
fp733099.0
tn481259.0
tp1620.0
accuracy0.396987
balanced_accuracy0.602749
critical_success_index0.002204
equitable_threat_score0.00056
f_score0.004398
false_discovery_rate0.997795
false_negative_rate0.190809
false_omission_rate0.000793
false_positive_rate0.603693
fowlkes_mallows_index0.04224
matthews_correlation_coefficient0.017033
negative_likelihood_ratio0.481468
negative_predictive_value0.999207
overall_bias366.992507
positive_likelihood_ratio1.340402
positive_predictive_value0.002205
prevalence0.001646
prevalence_threshold0.463444
true_negative_rate0.396307
true_positive_rate0.809191
\n", + "
" + ], + "text/plain": [ + " 0\n", + "band 1\n", + "fn 382.0\n", + "fp 733099.0\n", + "tn 481259.0\n", + "tp 1620.0\n", + "accuracy 0.396987\n", + "balanced_accuracy 0.602749\n", + "critical_success_index 0.002204\n", + "equitable_threat_score 0.00056\n", + "f_score 0.004398\n", + "false_discovery_rate 0.997795\n", + "false_negative_rate 0.190809\n", + "false_omission_rate 0.000793\n", + "false_positive_rate 0.603693\n", + "fowlkes_mallows_index 0.04224\n", + "matthews_correlation_coefficient 0.017033\n", + "negative_likelihood_ratio 0.481468\n", + "negative_predictive_value 0.999207\n", + "overall_bias 366.992507\n", + "positive_likelihood_ratio 1.340402\n", + "positive_predictive_value 0.002205\n", + "prevalence 0.001646\n", + "prevalence_threshold 0.463444\n", + "true_negative_rate 0.396307\n", + "true_positive_rate 0.809191" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "micro_averaged_metrics = crosstab.gval.compute_categorical_metrics(\n", + " positive_categories=[1, 2],\n", + " negative_categories=[3, 4, 5],\n", + " average=\"micro\"\n", + ")\n", + "micro_averaged_metrics.transpose()" + ] + }, + { + "cell_type": "markdown", + "id": "79761a73", + "metadata": {}, + "source": [ + "### Macro Averaging" + ] + }, + { + "cell_type": "markdown", + "id": "790c56df", + "metadata": {}, + "source": [ + "The following shows macro averaging and is equivalent to the values of shared columns in `no_averaged_comps.mean()`:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "7e64eb9b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
band1
accuracy0.605366
balanced_accuracy0.438927
critical_success_index0.004524
equitable_threat_score-0.035161
f_score0.008933
false_discovery_rate0.85941
false_negative_rate0.895755
false_omission_rate0.231481
false_positive_rate0.22639
fowlkes_mallows_index0.020944
matthews_correlation_coefficient-0.089249
negative_likelihood_ratio1.229986
negative_predictive_value0.768519
overall_bias5815.331112
positive_likelihood_ratio0.579492
positive_predictive_value0.14059
prevalence0.2
prevalence_threshold0.661823
true_negative_rate0.77361
true_positive_rate0.104245
\n", + "
" + ], + "text/plain": [ + " 0\n", + "band 1\n", + "accuracy 0.605366\n", + "balanced_accuracy 0.438927\n", + "critical_success_index 0.004524\n", + "equitable_threat_score -0.035161\n", + "f_score 0.008933\n", + "false_discovery_rate 0.85941\n", + "false_negative_rate 0.895755\n", + "false_omission_rate 0.231481\n", + "false_positive_rate 0.22639\n", + "fowlkes_mallows_index 0.020944\n", + "matthews_correlation_coefficient -0.089249\n", + "negative_likelihood_ratio 1.229986\n", + "negative_predictive_value 0.768519\n", + "overall_bias 5815.331112\n", + "positive_likelihood_ratio 0.579492\n", + "positive_predictive_value 0.14059\n", + "prevalence 0.2\n", + "prevalence_threshold 0.661823\n", + "true_negative_rate 0.77361\n", + "true_positive_rate 0.104245" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "macro_averaged_metrics = crosstab.gval.compute_categorical_metrics(\n", + " positive_categories=classes,\n", + " negative_categories=None,\n", + " average=\"macro\"\n", + ")\n", + "macro_averaged_metrics.transpose()" + ] + }, + { + "cell_type": "markdown", + "id": "ef8f72ab", + "metadata": {}, + "source": [ + "### Weighted Averaging" + ] + }, + { + "cell_type": "markdown", + "id": "e182a6f7", + "metadata": {}, + "source": [ + "To further enhance `macro-averaging`, we can apply weights to the classes of interest in order to appropriately change the strength of evaluations for each class. For instance, if we applied the following vector the classes uses in this notebook, `[1, 4, 1, 5, 1]`, classes 2 and 4 would have greater influence on the final averaging of the scores for all classes. (All weight values are in reference to the other weight values respectively. e.g. the vector `[5, 5, 5, 5, 5]` would cause no change in the averaging because each weight value is equivalent to a ll other weight values.) Let's use the first weight vector mentioned in weighted averaging:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "0eae1cbc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
band1
accuracy0.577454
balanced_accuracy0.476356
critical_success_index0.003836
equitable_threat_score-0.014789
f_score0.007609
false_discovery_rate0.811574
false_negative_rate0.835662
false_omission_rate0.239133
false_positive_rate0.211627
fowlkes_mallows_index0.029953
matthews_correlation_coefficient-0.03872
negative_likelihood_ratio1.088901
negative_predictive_value0.760867
overall_bias2493.443989
positive_likelihood_ratio0.784138
positive_predictive_value0.188426
prevalence0.225962
prevalence_threshold0.573022
true_negative_rate0.788373
true_positive_rate0.164338
\n", + "
" + ], + "text/plain": [ + " 0\n", + "band 1\n", + "accuracy 0.577454\n", + "balanced_accuracy 0.476356\n", + "critical_success_index 0.003836\n", + "equitable_threat_score -0.014789\n", + "f_score 0.007609\n", + "false_discovery_rate 0.811574\n", + "false_negative_rate 0.835662\n", + "false_omission_rate 0.239133\n", + "false_positive_rate 0.211627\n", + "fowlkes_mallows_index 0.029953\n", + "matthews_correlation_coefficient -0.03872\n", + "negative_likelihood_ratio 1.088901\n", + "negative_predictive_value 0.760867\n", + "overall_bias 2493.443989\n", + "positive_likelihood_ratio 0.784138\n", + "positive_predictive_value 0.188426\n", + "prevalence 0.225962\n", + "prevalence_threshold 0.573022\n", + "true_negative_rate 0.788373\n", + "true_positive_rate 0.164338" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "weight_averaged_metrics = crosstab.gval.compute_categorical_metrics(\n", + " positive_categories=classes,\n", + " weights=[1, 4, 1, 5, 1],\n", + " negative_categories=None,\n", + " average=\"weighted\"\n", + ")\n", + "weight_averaged_metrics.transpose()" + ] + }, + { + "cell_type": "markdown", + "id": "8c567b77", + "metadata": {}, + "source": [ + "Regardless of the averaging methodology it seems as though the candidate does not agree with the benchmark. We can now save the output." + ] + }, + { + "cell_type": "markdown", + "id": "0d5f7be8", + "metadata": {}, + "source": [ + "## Save Output" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "dff8f8a0", + "metadata": {}, + "outputs": [], + "source": [ + "# output agreement map\n", + "agreement_file = 'multi_categorical_agreement_map.tif'\n", + "metric_file = 'macro_averaged_metric_file.csv'\n", + "\n", + "agreement_map.rio.to_raster(agreement_file)\n", + "macro_averaged_metrics.to_csv(metric_file)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/sphinx/SphinxTutorial.ipynb b/docs/sphinx/SphinxTutorial.ipynb index 8cb24a51..9a9d5b53 100644 --- a/docs/sphinx/SphinxTutorial.ipynb +++ b/docs/sphinx/SphinxTutorial.ipynb @@ -13,12 +13,12 @@ "id": "a403ee30", "metadata": {}, "source": [ - "# Categorical Comparisons" + "# Two-Class Categorical Comparisons" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "a9fa8470", "metadata": {}, "outputs": [], @@ -45,13 +45,17 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "f91c0b8c", "metadata": {}, "outputs": [], "source": [ - "candidate = rxr.open_rasterio('candidate_map_two_class_categorical.tif', mask_and_scale=True)\n", - "benchmark = rxr.open_rasterio('benchmark_map_two_class_categorical.tif', mask_and_scale=True)" + "candidate = rxr.open_rasterio(\n", + " 'candidate_map_two_class_categorical.tif', mask_and_scale=True\n", + ")\n", + "benchmark = rxr.open_rasterio(\n", + " 'benchmark_map_two_class_categorical.tif', mask_and_scale=True\n", + ")" ] }, { @@ -72,14 +76,16 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "541857a7", "metadata": {}, "outputs": [], "source": [ - "agreement_map, crosstab_table, metric_table = candidate.gval.categorical_compare(benchmark,\n", - " positive_categories=[2],\n", - " negative_categories=[0, 1])" + "agreement_map, crosstab_table, metric_table = candidate.gval.categorical_compare(\n", + " benchmark,\n", + " positive_categories=[2],\n", + " negative_categories=[0, 1]\n", + ")" ] }, { @@ -156,17 +162,17 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "b1ef13a0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, @@ -203,7 +209,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "fdc9df2b", "metadata": {}, "outputs": [ @@ -280,7 +286,7 @@ "3 1 2.0 2.0 24.0 2473405.0" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -307,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "16cb3626", "metadata": {}, "outputs": [ @@ -332,91 +338,150 @@ " \n", " \n", " \n", - " band\n", - " fn\n", - " fp\n", - " tn\n", - " tp\n", - " accuracy\n", - " critical_success_index\n", - " f_score\n", - " false_discovery_rate\n", - " false_negative_rate\n", - " ...\n", - " fowlkes_mallows_index\n", - " matthews_correlation_coefficient\n", - " negative_likelihood_ratio\n", - " negative_predictive_value\n", - " positive_likelihood_ratio\n", - " positive_predictive_value\n", - " prevalence\n", - " prevalence_threshold\n", - " true_negative_rate\n", - " true_positive_rate\n", + " 0\n", " \n", " \n", " \n", " \n", - " 0\n", + " band\n", " 1\n", + " \n", + " \n", + " fn\n", " 639227.0\n", + " \n", + " \n", + " fp\n", " 512277.0\n", + " \n", + " \n", + " tn\n", " 10345720.0\n", + " \n", + " \n", + " tp\n", " 2473405.0\n", + " \n", + " \n", + " accuracy\n", " 0.917577\n", + " \n", + " \n", + " balanced_accuracy\n", + " 0.873727\n", + " \n", + " \n", + " critical_success_index\n", " 0.682336\n", + " \n", + " \n", + " equitable_threat_score\n", + " 0.610939\n", + " \n", + " \n", + " f_score\n", " 0.811177\n", + " \n", + " \n", + " false_discovery_rate\n", " 0.171578\n", + " \n", + " \n", + " false_negative_rate\n", " 0.205365\n", - " ...\n", + " \n", + " \n", + " false_omission_rate\n", + " 0.058191\n", + " \n", + " \n", + " false_positive_rate\n", + " 0.04718\n", + " \n", + " \n", + " fowlkes_mallows_index\n", " 0.811352\n", + " \n", + " \n", + " matthews_correlation_coefficient\n", " 0.758757\n", + " \n", + " \n", + " negative_likelihood_ratio\n", " 0.215534\n", + " \n", + " \n", + " negative_predictive_value\n", " 0.941809\n", + " \n", + " \n", + " overall_bias\n", + " 0.959215\n", + " \n", + " \n", + " positive_likelihood_ratio\n", " 16.842723\n", + " \n", + " \n", + " positive_predictive_value\n", " 0.828422\n", - " 0.213711\n", + " \n", + " \n", + " prevalence\n", + " 0.222798\n", + " \n", + " \n", + " prevalence_threshold\n", " 0.195925\n", + " \n", + " \n", + " true_negative_rate\n", " 0.95282\n", + " \n", + " \n", + " true_positive_rate\n", " 0.794635\n", " \n", " \n", "\n", - "

1 rows × 22 columns

\n", "" ], "text/plain": [ - " band fn fp tn tp accuracy \\\n", - "0 1 639227.0 512277.0 10345720.0 2473405.0 0.917577 \n", - "\n", - " critical_success_index f_score false_discovery_rate \\\n", - "0 0.682336 0.811177 0.171578 \n", - "\n", - " false_negative_rate ... fowlkes_mallows_index \\\n", - "0 0.205365 ... 0.811352 \n", - "\n", - " matthews_correlation_coefficient negative_likelihood_ratio \\\n", - "0 0.758757 0.215534 \n", - "\n", - " negative_predictive_value positive_likelihood_ratio \\\n", - "0 0.941809 16.842723 \n", - "\n", - " positive_predictive_value prevalence prevalence_threshold \\\n", - "0 0.828422 0.213711 0.195925 \n", - "\n", - " true_negative_rate true_positive_rate \n", - "0 0.95282 0.794635 \n", - "\n", - "[1 rows x 22 columns]" + " 0\n", + "band 1\n", + "fn 639227.0\n", + "fp 512277.0\n", + "tn 10345720.0\n", + "tp 2473405.0\n", + "accuracy 0.917577\n", + "balanced_accuracy 0.873727\n", + "critical_success_index 0.682336\n", + "equitable_threat_score 0.610939\n", + "f_score 0.811177\n", + "false_discovery_rate 0.171578\n", + "false_negative_rate 0.205365\n", + "false_omission_rate 0.058191\n", + "false_positive_rate 0.04718\n", + "fowlkes_mallows_index 0.811352\n", + "matthews_correlation_coefficient 0.758757\n", + "negative_likelihood_ratio 0.215534\n", + "negative_predictive_value 0.941809\n", + "overall_bias 0.959215\n", + "positive_likelihood_ratio 16.842723\n", + "positive_predictive_value 0.828422\n", + "prevalence 0.222798\n", + "prevalence_threshold 0.195925\n", + "true_negative_rate 0.95282\n", + "true_positive_rate 0.794635" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "metric_table" + "metric_table.transpose()" ] }, { @@ -457,13 +522,15 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "7264ffc9", "metadata": {}, "outputs": [], "source": [ - "candidate, benchmark = candidate.gval.homogenize(benchmark_map=benchmark,\n", - " target_map = \"candidate\")" + "candidate, benchmark = candidate.gval.homogenize(\n", + " benchmark_map=benchmark,\n", + " target_map = \"candidate\"\n", + ")" ] }, { @@ -476,14 +543,16 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "e3917e34", "metadata": {}, "outputs": [], "source": [ "target_map = rxr.open_rasterio('target_map_two_class_categorical.tif')\n", - "candidate, benchmark = candidate.gval.homogenize(benchmark_map=benchmark,\n", - " target_map = target_map)" + "candidate, benchmark = candidate.gval.homogenize(\n", + " benchmark_map=benchmark,\n", + " target_map = target_map\n", + ")" ] }, { @@ -512,17 +581,17 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "c6e3c35c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" }, @@ -538,8 +607,10 @@ } ], "source": [ - "agreement_map = candidate.gval.compute_agreement_map(benchmark_map=benchmark, \n", - " comparison_function='cantor')\n", + "agreement_map = candidate.gval.compute_agreement_map(\n", + " benchmark_map=benchmark, \n", + " comparison_function='cantor'\n", + ")\n", "\n", "agreement_map.gval.cat_plot(title=\"Agreement Map\")" ] @@ -558,17 +629,17 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "a2310a98", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" }, @@ -598,9 +669,11 @@ " (2, 2): 8\n", "}\n", "\n", - "agreement_map = candidate.gval.compute_agreement_map(benchmark_map=benchmark,\n", - " comparison_function='pairing_dict',\n", - " pairing_dict=pairing_dict)\n", + "agreement_map = candidate.gval.compute_agreement_map(\n", + " benchmark_map=benchmark,\n", + " comparison_function='pairing_dict',\n", + " pairing_dict=pairing_dict\n", + ")\n", "\n", "agreement_map.gval.cat_plot(title=\"Agreement Map\", basemap=None)" ] @@ -615,17 +688,17 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "f6567376", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" }, @@ -641,10 +714,12 @@ } ], "source": [ - "agreement_map = candidate.gval.compute_agreement_map(benchmark_map=benchmark, \n", - " comparison_function='pairing_dict',\n", - " allow_candidate_values=[1, 2],\n", - " allow_benchmark_values=[0, 2])\n", + "agreement_map = candidate.gval.compute_agreement_map(\n", + " benchmark_map=benchmark, \n", + " comparison_function='pairing_dict',\n", + " allow_candidate_values=[1, 2],\n", + " allow_benchmark_values=[0, 2]\n", + ")\n", "\n", "agreement_map.gval.cat_plot(title=\"Agreement Map\")" ] @@ -667,17 +742,17 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "972f07aa", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" }, @@ -700,8 +775,10 @@ "def multiply(c: Number, b: Number) -> Number:\n", " return c * b\n", "\n", - "agreement_map = candidate.gval.compute_agreement_map(benchmark_map=benchmark, \n", - " comparison_function=\"multi\")\n", + "agreement_map = candidate.gval.compute_agreement_map(\n", + " benchmark_map=benchmark, \n", + " comparison_function=\"multi\"\n", + ")\n", "\n", "agreement_map.gval.cat_plot(title=\"Agreement Map\")" ] @@ -732,7 +809,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "18b9c315", "metadata": {}, "outputs": [ @@ -791,17 +868,18 @@ "1 1 2.0 2.0 4.0 2624301.0" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "crosstab_table_allow = candidate.gval.compute_crosstab(benchmark,\n", - " allow_benchmark_values=[0, 2],\n", - " allow_candidate_values=[2],\n", - " comparison_function=\"multi\"\n", - " )\n", + "crosstab_table_allow = candidate.gval.compute_crosstab(\n", + " benchmark,\n", + " allow_benchmark_values=[0, 2],\n", + " allow_candidate_values=[2],\n", + " comparison_function=\"multi\"\n", + ")\n", "crosstab_table_allow" ] }, @@ -823,7 +901,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "id": "2ba3fc06", "metadata": {}, "outputs": [ @@ -866,7 +944,7 @@ " 10345720.0\n", " 2473405.0\n", " 0.794635\n", - " 0.213711\n", + " 0.222798\n", " \n", " \n", "\n", @@ -877,18 +955,20 @@ "0 1 639227.0 512277.0 10345720.0 2473405.0 0.794635 \n", "\n", " prevalence \n", - "0 0.213711 " + "0 0.222798 " ] }, - "execution_count": 14, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "metric_table_select = crosstab_table.gval.compute_categorical_metrics(negative_categories= [0, 1],\n", - " positive_categories = [2],\n", - " metrics=['true_positive_rate', 'prevalence'])\n", + "metric_table_select = crosstab_table.gval.compute_categorical_metrics(\n", + " negative_categories= [0, 1],\n", + " positive_categories = [2],\n", + " metrics=['true_positive_rate', 'prevalence']\n", + ")\n", "metric_table_select" ] }, @@ -902,7 +982,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "67938408", "metadata": {}, "outputs": [], @@ -924,7 +1004,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "id": "1e8eeb59", "metadata": {}, "outputs": [], @@ -951,21 +1031,21 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "id": "6a41eee3", "metadata": {}, "outputs": [], "source": [ - "metric_table_register = crosstab_table.gval.compute_categorical_metrics(negative_categories= None,\n", - " positive_categories = [2],\n", - " metrics=['error_balance', \n", - " 'arbitrary1', \n", - " 'arbitrary2'])" + "metric_table_register = crosstab_table.gval.compute_categorical_metrics(\n", + " negative_categories= None,\n", + " positive_categories = [2],\n", + " metrics=['error_balance', 'arbitrary1', 'arbitrary2']\n", + ")" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "id": "6ab884b7", "metadata": {}, "outputs": [ @@ -1017,7 +1097,7 @@ "0 1 639227.0 512277.0 NaN 2473405.0 0.801401" ] }, - "execution_count": 18, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1044,7 +1124,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "id": "899a1da9", "metadata": {}, "outputs": [], diff --git a/docs/sphinx/index.rst b/docs/sphinx/index.rst index 940661d6..01e883ba 100644 --- a/docs/sphinx/index.rst +++ b/docs/sphinx/index.rst @@ -17,6 +17,7 @@ ___________________________________ :caption: Table of Contents SphinxTutorial + SphinxMulticatTutorial SphinxContinuousTutorial api contributing diff --git a/docs/sphinx/tutorials.rst b/docs/sphinx/tutorials.rst new file mode 100644 index 00000000..847a6575 --- /dev/null +++ b/docs/sphinx/tutorials.rst @@ -0,0 +1,10 @@ +Tutorials +######### + +.. toctree:: + :maxdepth: 1 + :caption: Table of Contents + + SphinxTutorial + SphinxMulticatTutorial + SphinxContinuousTutorial diff --git a/notebooks/Continuous Comparison Tutorial.ipynb b/notebooks/Continuous Comparison Tutorial.ipynb index 6d6873a6..6a31f722 100644 --- a/notebooks/Continuous Comparison Tutorial.ipynb +++ b/notebooks/Continuous Comparison Tutorial.ipynb @@ -55,8 +55,12 @@ "metadata": {}, "outputs": [], "source": [ - "candidate = rxr.open_rasterio('./livneh_2011_precip.tif', mask_and_scale=True) # VIC\n", - "benchmark = rxr.open_rasterio('./prism_2011_precip.tif', mask_and_scale=True) # PRISM" + "candidate = rxr.open_rasterio(\n", + " './livneh_2011_precip.tif', mask_and_scale=True\n", + ") # VIC\n", + "benchmark = rxr.open_rasterio(\n", + " './prism_2011_precip.tif', mask_and_scale=True\n", + ") # PRISM" ] }, { @@ -110,7 +114,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 4, @@ -149,7 +153,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 5, @@ -168,10 +172,12 @@ } ], "source": [ - "agreement.data = xr.where((agreement < np.nanquantile(agreement.values, \n", - " 0.0001)) | \n", - " (agreement > np.nanquantile(agreement.values, 0.9999)), \n", - " np.nan, agreement)\n", + "agreement.data = xr.where(\n", + " (agreement < np.nanquantile(agreement.values, 0.0001)) | \n", + " (agreement > np.nanquantile(agreement.values, 0.9999)), \n", + " np.nan, \n", + " agreement\n", + ")\n", "agreement.gval.cont_plot(title=\"Agreement Map\", figsize=(6, 3))" ] }, @@ -218,36 +224,60 @@ " \n", " \n", " \n", - " band\n", - " coefficient_of_determination\n", - " mean_absolute_error\n", - " mean_absolute_percentage_error\n", - " mean_normalized_mean_absolute_error\n", - " mean_normalized_root_mean_squared_error\n", - " mean_percentage_error\n", - " mean_signed_error\n", - " mean_squared_error\n", - " range_normalized_mean_absolute_error\n", - " range_normalized_root_mean_squared_error\n", - " root_mean_squared_error\n", - " symmetric_mean_absolute_percentage_error\n", + " 0\n", " \n", " \n", " \n", " \n", - " 0\n", + " band\n", " 1\n", + " \n", + " \n", + " coefficient_of_determination\n", " 0.685261\n", + " \n", + " \n", + " mean_absolute_error\n", " 216.089706\n", + " \n", + " \n", + " mean_absolute_percentage_error\n", " 0.319234\n", + " \n", + " \n", + " mean_normalized_mean_absolute_error\n", " 0.267845\n", + " \n", + " \n", + " mean_normalized_root_mean_squared_error\n", " 0.372578\n", + " \n", + " \n", + " mean_percentage_error\n", " 0.010022\n", + " \n", + " \n", + " mean_signed_error\n", " 8.085411\n", + " \n", + " \n", + " mean_squared_error\n", " 90351.664062\n", + " \n", + " \n", + " range_normalized_mean_absolute_error\n", " 0.033065\n", + " \n", + " \n", + " range_normalized_root_mean_squared_error\n", " 0.045995\n", + " \n", + " \n", + " root_mean_squared_error\n", " 300.585541\n", + " \n", + " \n", + " symmetric_mean_absolute_percentage_error\n", " 0.269394\n", " \n", " \n", @@ -255,26 +285,20 @@ "" ], "text/plain": [ - " band coefficient_of_determination mean_absolute_error \\\n", - "0 1 0.685261 216.089706 \n", - "\n", - " mean_absolute_percentage_error mean_normalized_mean_absolute_error \\\n", - "0 0.319234 0.267845 \n", - "\n", - " mean_normalized_root_mean_squared_error mean_percentage_error \\\n", - "0 0.372578 0.010022 \n", - "\n", - " mean_signed_error mean_squared_error \\\n", - "0 8.085411 90351.664062 \n", - "\n", - " range_normalized_mean_absolute_error \\\n", - "0 0.033065 \n", - "\n", - " range_normalized_root_mean_squared_error root_mean_squared_error \\\n", - "0 0.045995 300.585541 \n", - "\n", - " symmetric_mean_absolute_percentage_error \n", - "0 0.269394 " + " 0\n", + "band 1\n", + "coefficient_of_determination 0.685261\n", + "mean_absolute_error 216.089706\n", + "mean_absolute_percentage_error 0.319234\n", + "mean_normalized_mean_absolute_error 0.267845\n", + "mean_normalized_root_mean_squared_error 0.372578\n", + "mean_percentage_error 0.010022\n", + "mean_signed_error 8.085411\n", + "mean_squared_error 90351.664062\n", + "range_normalized_mean_absolute_error 0.033065\n", + "range_normalized_root_mean_squared_error 0.045995\n", + "root_mean_squared_error 300.585541\n", + "symmetric_mean_absolute_percentage_error 0.269394" ] }, "execution_count": 6, @@ -283,7 +307,7 @@ } ], "source": [ - "metric_table" + "metric_table.transpose()" ] }, { @@ -325,8 +349,10 @@ "metadata": {}, "outputs": [], "source": [ - "candidate, benchmark = candidate.gval.homogenize(benchmark_map=benchmark,\n", - " target_map = \"candidate\")" + "candidate, benchmark = candidate.gval.homogenize(\n", + " benchmark_map=benchmark,\n", + " target_map = \"candidate\"\n", + ")" ] }, { @@ -362,7 +388,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -381,8 +407,10 @@ } ], "source": [ - "agreement_map = candidate.gval.compute_agreement_map(benchmark_map=benchmark, \n", - " comparison_function=\"difference\")\n", + "agreement_map = candidate.gval.compute_agreement_map(\n", + " benchmark_map=benchmark, \n", + " comparison_function=\"difference\"\n", + ")\n", "\n", "agreement_map.gval.cont_plot(title=\"Agreement Map\", figsize=(6, 3))" ] @@ -404,7 +432,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 9, @@ -430,8 +458,10 @@ "def multiply(c: Number, b: Number) -> Number:\n", " return c / b\n", "\n", - "agreement_map = candidate.gval.compute_agreement_map(benchmark_map=benchmark, \n", - " comparison_function=\"divide\")\n", + "agreement_map = candidate.gval.compute_agreement_map(\n", + " benchmark_map=benchmark, \n", + " comparison_function=\"divide\"\n", + ")\n", "\n", "agreement_map.gval.cont_plot(title=\"Agreement Map\", figsize=(6, 3))" ] @@ -506,9 +536,10 @@ } ], "source": [ - "_, metric_table = candidate.gval.continuous_compare(benchmark,\n", - " metrics=['mean_absolute_error', \n", - " 'mean_squared_error'])\n", + "_, metric_table = candidate.gval.continuous_compare(\n", + " benchmark,\n", + " metrics=['mean_absolute_error', 'mean_squared_error']\n", + ")\n", "\n", "metric_table" ] @@ -621,10 +652,10 @@ } ], "source": [ - "_, metric_table = candidate.gval.continuous_compare(benchmark,\n", - " metrics=['min_error', \n", - " 'median_error', \n", - " 'max_error'])\n", + "_, metric_table = candidate.gval.continuous_compare(\n", + " benchmark,\n", + " metrics=['min_error', 'median_error', 'max_error']\n", + ")\n", "\n", "metric_table" ] diff --git a/notebooks/Multi-Class Categorical Statistics.ipynb b/notebooks/Multi-Class Categorical Statistics.ipynb new file mode 100644 index 00000000..1f035af2 --- /dev/null +++ b/notebooks/Multi-Class Categorical Statistics.ipynb @@ -0,0 +1,1174 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "05d93248", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "id": "4744f004", + "metadata": {}, + "source": [ + "# Multi-Class Categorical Comparisons" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "275a7087", + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [], + "source": [ + "import rioxarray as rxr\n", + "import gval\n", + "import numpy as np\n", + "import pandas as pd\n", + "import xarray as xr\n", + "from itertools import product\n", + "\n", + "pd.set_option('display.max_columns', None)" + ] + }, + { + "cell_type": "markdown", + "id": "34069943", + "metadata": {}, + "source": [ + "## Load Datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "38473c06", + "metadata": {}, + "outputs": [], + "source": [ + "candidate = rxr.open_rasterio(\n", + " \"./candidate_map_multi_categorical.tif\", mask_and_scale=True\n", + ")\n", + "benchmark = rxr.open_rasterio(\n", + " \"./benchmark_map_multi_categorical.tif\", mask_and_scale=True\n", + ")\n", + "depth_raster = rxr.open_rasterio(\n", + " \"./candidate_raw_elevation_multi_categorical.tif\", mask_and_scale=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "fa522035", + "metadata": {}, + "source": [ + "## Homogenize Datasets and Make Agreement Map" + ] + }, + { + "cell_type": "markdown", + "id": "e3e5ca15", + "metadata": {}, + "source": [ + "Although one can call `candidate.gval.categorical_compare` to run the entire workflow, in this case homogenization and creation of an agreement map will be done separately to show more options for multi-class comparisons." + ] + }, + { + "cell_type": "markdown", + "id": "2ac66a26", + "metadata": {}, + "source": [ + "#### Homogenize" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "29375e17", + "metadata": {}, + "outputs": [], + "source": [ + "candidate_r, benchmark_r = candidate.gval.homogenize(benchmark)\n", + "depth_raster_r, arb = depth_raster.gval.homogenize(benchmark_r)\n", + "del arb" + ] + }, + { + "cell_type": "markdown", + "id": "4e9e1be1", + "metadata": {}, + "source": [ + "#### Agreement Map" + ] + }, + { + "cell_type": "markdown", + "id": "e2851c9b", + "metadata": {}, + "source": [ + "The following makes a pairing dictionary which maps combinations of values in the candidate and benchmark maps to unique values in the agreement map. In this case we will encode each value as concatenation of what the values are. Instead of making a pairing dictionary one can use the `szudzik` or `cantor` pairing functions to make unique values for each combination of candidate and benchmark map values. e.g. 12 represents a class 1 for the candidate and a class 2 for the benchmark." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "de894568", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1, 1): 11\n", + "(1, 2): 12\n", + "(1, 3): 13\n", + "(1, 4): 14\n", + "(1, 5): 15\n", + "(2, 1): 21\n" + ] + } + ], + "source": [ + "classes = [1, 2, 3, 4, 5]\n", + "pairing_dictionary = {(x, y): int(f'{x}{y}') for x, y in product(*([classes]*2))}\n", + "\n", + "# Showing the first 6 entries\n", + "print('\\n'.join([f'{k}: {v}' for k,v in pairing_dictionary.items()][:6]))" + ] + }, + { + "cell_type": "markdown", + "id": "44328dcf", + "metadata": {}, + "source": [ + "The benchmark map has an extra class 0 which is very similar to nodata so it will not be included in `allow_benchmark_values` in the following methods." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "1dc16dd7", + "metadata": {}, + "outputs": [], + "source": [ + "agreement_map = candidate_r.gval.compute_agreement_map(\n", + " benchmark_r,\n", + " nodata=255,\n", + " encode_nodata=True,\n", + " comparison_function=\"pairing_dict\",\n", + " pairing_dict=pairing_dictionary,\n", + " allow_candidate_values=classes,\n", + " allow_benchmark_values=classes,\n", + ")\n", + "\n", + "crosstab = candidate_r.gval.compute_crosstab(\n", + " benchmark_r,\n", + " comparison_function=\"pairing_dict\",\n", + " pairing_dict=pairing_dictionary,\n", + " allow_candidate_values=classes,\n", + " allow_benchmark_values=classes,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "93fe86df", + "metadata": {}, + "source": [ + "The following only shows a small subset of the map for memory purposes:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "55606165", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "agreement_map.gval.cat_plot(\n", + " title='Agreement Map', \n", + " figsize=(8, 6),\n", + " colormap='tab20b'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "bdcbfb8e", + "metadata": {}, + "source": [ + "## Comparisons" + ] + }, + { + "cell_type": "markdown", + "id": "4a1f3ecc", + "metadata": {}, + "source": [ + "For multi-categorical statistics GVAL offers 4 methods of averaging:\n", + "\n", + "1. No Averaging which provides one vs. all metrics on a class basis\n", + "1. Micro Averaging which sums up the contingencies of each class defined as either positive or negative\n", + "3. Macro Averaging which sums up the contingencies of one class vs all and then averages them\n", + "4. Weighted Averaging which does macro averaging with the inclusion of weights to be applied to each positive category." + ] + }, + { + "cell_type": "markdown", + "id": "66235a0a", + "metadata": {}, + "source": [ + "### No Averaging" + ] + }, + { + "cell_type": "markdown", + "id": "4f258087", + "metadata": {}, + "source": [ + "Using `None` for the averaging argument runs a one class vs. all methodology for each class and reports their metrics on a class basis:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "936f2dea", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01234
band11111
positive_categories12345
fn6.01043.0318274.0516572.0364147.0
fp172762.0561004.0462496.03775.05.0
tn1043592.0653360.0422623.0693617.0852206.0
tp0.0953.012967.02396.02.0
accuracy0.8579630.5379270.3581090.572210.700622
balanced_accuracy0.4289840.5077410.2583110.4996020.5
critical_success_index0.00.0016930.0163370.0045840.000005
equitable_threat_score-0.0000050.000055-0.175401-0.000455-0.0
f_score0.00.003380.0321480.0091250.000011
false_discovery_rate1.00.9983040.9727280.6117320.714286
false_negative_rate1.00.5225450.9608530.9953830.999995
false_omission_rate0.0000060.0015940.4295790.4268520.299376
false_positive_rate0.1420330.4619740.5225240.0054130.000006
fowlkes_mallows_index0.00.0284550.0326750.0423390.001253
matthews_correlation_coefficient-0.0009040.001257-0.440983-0.005543-0.000072
negative_likelihood_ratio1.1655460.9712262.012361.0008011.0
negative_predictive_value0.9999940.9984060.5704210.5731480.700624
overall_bias28793.666667281.5415831.4353990.0118910.000019
positive_likelihood_ratio0.01.0335110.0749190.8529160.936112
positive_predictive_value0.00.0016960.0272720.3882680.285714
prevalence0.0000050.0016410.2723220.4266570.299376
prevalence_threshold1.00.495880.7851070.5198760.508252
true_negative_rate0.8579670.5380260.4774760.9945870.999994
true_positive_rate0.00.4774550.0391470.0046170.000005
\n", + "
" + ], + "text/plain": [ + " 0 1 2 \\\n", + "band 1 1 1 \n", + "positive_categories 1 2 3 \n", + "fn 6.0 1043.0 318274.0 \n", + "fp 172762.0 561004.0 462496.0 \n", + "tn 1043592.0 653360.0 422623.0 \n", + "tp 0.0 953.0 12967.0 \n", + "accuracy 0.857963 0.537927 0.358109 \n", + "balanced_accuracy 0.428984 0.507741 0.258311 \n", + "critical_success_index 0.0 0.001693 0.016337 \n", + "equitable_threat_score -0.000005 0.000055 -0.175401 \n", + "f_score 0.0 0.00338 0.032148 \n", + "false_discovery_rate 1.0 0.998304 0.972728 \n", + "false_negative_rate 1.0 0.522545 0.960853 \n", + "false_omission_rate 0.000006 0.001594 0.429579 \n", + "false_positive_rate 0.142033 0.461974 0.522524 \n", + "fowlkes_mallows_index 0.0 0.028455 0.032675 \n", + "matthews_correlation_coefficient -0.000904 0.001257 -0.440983 \n", + "negative_likelihood_ratio 1.165546 0.971226 2.01236 \n", + "negative_predictive_value 0.999994 0.998406 0.570421 \n", + "overall_bias 28793.666667 281.541583 1.435399 \n", + "positive_likelihood_ratio 0.0 1.033511 0.074919 \n", + "positive_predictive_value 0.0 0.001696 0.027272 \n", + "prevalence 0.000005 0.001641 0.272322 \n", + "prevalence_threshold 1.0 0.49588 0.785107 \n", + "true_negative_rate 0.857967 0.538026 0.477476 \n", + "true_positive_rate 0.0 0.477455 0.039147 \n", + "\n", + " 3 4 \n", + "band 1 1 \n", + "positive_categories 4 5 \n", + "fn 516572.0 364147.0 \n", + "fp 3775.0 5.0 \n", + "tn 693617.0 852206.0 \n", + "tp 2396.0 2.0 \n", + "accuracy 0.57221 0.700622 \n", + "balanced_accuracy 0.499602 0.5 \n", + "critical_success_index 0.004584 0.000005 \n", + "equitable_threat_score -0.000455 -0.0 \n", + "f_score 0.009125 0.000011 \n", + "false_discovery_rate 0.611732 0.714286 \n", + "false_negative_rate 0.995383 0.999995 \n", + "false_omission_rate 0.426852 0.299376 \n", + "false_positive_rate 0.005413 0.000006 \n", + "fowlkes_mallows_index 0.042339 0.001253 \n", + "matthews_correlation_coefficient -0.005543 -0.000072 \n", + "negative_likelihood_ratio 1.000801 1.0 \n", + "negative_predictive_value 0.573148 0.700624 \n", + "overall_bias 0.011891 0.000019 \n", + "positive_likelihood_ratio 0.852916 0.936112 \n", + "positive_predictive_value 0.388268 0.285714 \n", + "prevalence 0.426657 0.299376 \n", + "prevalence_threshold 0.519876 0.508252 \n", + "true_negative_rate 0.994587 0.999994 \n", + "true_positive_rate 0.004617 0.000005 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "no_averaged_metrics = crosstab.gval.compute_categorical_metrics(\n", + " positive_categories=[1, 2, 3, 4, 5],\n", + " negative_categories=None,\n", + " average=None\n", + ")\n", + "no_averaged_metrics.transpose()" + ] + }, + { + "cell_type": "markdown", + "id": "d722dc68", + "metadata": {}, + "source": [ + "### Micro Averaging" + ] + }, + { + "cell_type": "markdown", + "id": "3bbb83cf", + "metadata": {}, + "source": [ + "The following is an example of a using micro averaging to combine classes to process two-class categorical statistics. In this example we will use classes 1 and 2 as positive classes and classes 3, 4, and 5 as negative classes:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "538dfc49", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
band1
fn382.0
fp733099.0
tn481259.0
tp1620.0
accuracy0.396987
balanced_accuracy0.602749
critical_success_index0.002204
equitable_threat_score0.00056
f_score0.004398
false_discovery_rate0.997795
false_negative_rate0.190809
false_omission_rate0.000793
false_positive_rate0.603693
fowlkes_mallows_index0.04224
matthews_correlation_coefficient0.017033
negative_likelihood_ratio0.481468
negative_predictive_value0.999207
overall_bias366.992507
positive_likelihood_ratio1.340402
positive_predictive_value0.002205
prevalence0.001646
prevalence_threshold0.463444
true_negative_rate0.396307
true_positive_rate0.809191
\n", + "
" + ], + "text/plain": [ + " 0\n", + "band 1\n", + "fn 382.0\n", + "fp 733099.0\n", + "tn 481259.0\n", + "tp 1620.0\n", + "accuracy 0.396987\n", + "balanced_accuracy 0.602749\n", + "critical_success_index 0.002204\n", + "equitable_threat_score 0.00056\n", + "f_score 0.004398\n", + "false_discovery_rate 0.997795\n", + "false_negative_rate 0.190809\n", + "false_omission_rate 0.000793\n", + "false_positive_rate 0.603693\n", + "fowlkes_mallows_index 0.04224\n", + "matthews_correlation_coefficient 0.017033\n", + "negative_likelihood_ratio 0.481468\n", + "negative_predictive_value 0.999207\n", + "overall_bias 366.992507\n", + "positive_likelihood_ratio 1.340402\n", + "positive_predictive_value 0.002205\n", + "prevalence 0.001646\n", + "prevalence_threshold 0.463444\n", + "true_negative_rate 0.396307\n", + "true_positive_rate 0.809191" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "micro_averaged_metrics = crosstab.gval.compute_categorical_metrics(\n", + " positive_categories=[1, 2],\n", + " negative_categories=[3, 4, 5],\n", + " average=\"micro\"\n", + ")\n", + "micro_averaged_metrics.transpose()" + ] + }, + { + "cell_type": "markdown", + "id": "79761a73", + "metadata": {}, + "source": [ + "### Macro Averaging" + ] + }, + { + "cell_type": "markdown", + "id": "790c56df", + "metadata": {}, + "source": [ + "The following shows macro averaging and is equivalent to the values of shared columns in `no_averaged_comps.mean()`:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "7e64eb9b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
band1
accuracy0.605366
balanced_accuracy0.438927
critical_success_index0.004524
equitable_threat_score-0.035161
f_score0.008933
false_discovery_rate0.85941
false_negative_rate0.895755
false_omission_rate0.231481
false_positive_rate0.22639
fowlkes_mallows_index0.020944
matthews_correlation_coefficient-0.089249
negative_likelihood_ratio1.229986
negative_predictive_value0.768519
overall_bias5815.331112
positive_likelihood_ratio0.579492
positive_predictive_value0.14059
prevalence0.2
prevalence_threshold0.661823
true_negative_rate0.77361
true_positive_rate0.104245
\n", + "
" + ], + "text/plain": [ + " 0\n", + "band 1\n", + "accuracy 0.605366\n", + "balanced_accuracy 0.438927\n", + "critical_success_index 0.004524\n", + "equitable_threat_score -0.035161\n", + "f_score 0.008933\n", + "false_discovery_rate 0.85941\n", + "false_negative_rate 0.895755\n", + "false_omission_rate 0.231481\n", + "false_positive_rate 0.22639\n", + "fowlkes_mallows_index 0.020944\n", + "matthews_correlation_coefficient -0.089249\n", + "negative_likelihood_ratio 1.229986\n", + "negative_predictive_value 0.768519\n", + "overall_bias 5815.331112\n", + "positive_likelihood_ratio 0.579492\n", + "positive_predictive_value 0.14059\n", + "prevalence 0.2\n", + "prevalence_threshold 0.661823\n", + "true_negative_rate 0.77361\n", + "true_positive_rate 0.104245" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "macro_averaged_metrics = crosstab.gval.compute_categorical_metrics(\n", + " positive_categories=classes,\n", + " negative_categories=None,\n", + " average=\"macro\"\n", + ")\n", + "macro_averaged_metrics.transpose()" + ] + }, + { + "cell_type": "markdown", + "id": "ef8f72ab", + "metadata": {}, + "source": [ + "### Weighted Averaging" + ] + }, + { + "cell_type": "markdown", + "id": "e182a6f7", + "metadata": {}, + "source": [ + "To further enhance `macro-averaging`, we can apply weights to the classes of interest in order to appropriately change the strength of evaluations for each class. For instance, if we applied the following vector the classes uses in this notebook, `[1, 4, 1, 5, 1]`, classes 2 and 4 would have greater influence on the final averaging of the scores for all classes. (All weight values are in reference to the other weight values respectively. e.g. the vector `[5, 5, 5, 5, 5]` would cause no change in the averaging because each weight value is equivalent to a ll other weight values.) Let's use the first weight vector mentioned in weighted averaging:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "0eae1cbc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
band1
accuracy0.577454
balanced_accuracy0.476356
critical_success_index0.003836
equitable_threat_score-0.014789
f_score0.007609
false_discovery_rate0.811574
false_negative_rate0.835662
false_omission_rate0.239133
false_positive_rate0.211627
fowlkes_mallows_index0.029953
matthews_correlation_coefficient-0.03872
negative_likelihood_ratio1.088901
negative_predictive_value0.760867
overall_bias2493.443989
positive_likelihood_ratio0.784138
positive_predictive_value0.188426
prevalence0.225962
prevalence_threshold0.573022
true_negative_rate0.788373
true_positive_rate0.164338
\n", + "
" + ], + "text/plain": [ + " 0\n", + "band 1\n", + "accuracy 0.577454\n", + "balanced_accuracy 0.476356\n", + "critical_success_index 0.003836\n", + "equitable_threat_score -0.014789\n", + "f_score 0.007609\n", + "false_discovery_rate 0.811574\n", + "false_negative_rate 0.835662\n", + "false_omission_rate 0.239133\n", + "false_positive_rate 0.211627\n", + "fowlkes_mallows_index 0.029953\n", + "matthews_correlation_coefficient -0.03872\n", + "negative_likelihood_ratio 1.088901\n", + "negative_predictive_value 0.760867\n", + "overall_bias 2493.443989\n", + "positive_likelihood_ratio 0.784138\n", + "positive_predictive_value 0.188426\n", + "prevalence 0.225962\n", + "prevalence_threshold 0.573022\n", + "true_negative_rate 0.788373\n", + "true_positive_rate 0.164338" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "weight_averaged_metrics = crosstab.gval.compute_categorical_metrics(\n", + " positive_categories=classes,\n", + " weights=[1, 4, 1, 5, 1],\n", + " negative_categories=None,\n", + " average=\"weighted\"\n", + ")\n", + "weight_averaged_metrics.transpose()" + ] + }, + { + "cell_type": "markdown", + "id": "8c567b77", + "metadata": {}, + "source": [ + "Regardless of the averaging methodology it seems as though the candidate does not agree with the benchmark. We can now save the output." + ] + }, + { + "cell_type": "markdown", + "id": "0d5f7be8", + "metadata": {}, + "source": [ + "## Save Output" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "dff8f8a0", + "metadata": {}, + "outputs": [], + "source": [ + "# output agreement map\n", + "agreement_file = 'multi_categorical_agreement_map.tif'\n", + "metric_file = 'macro_averaged_metric_file.csv'\n", + "\n", + "agreement_map.rio.to_raster(agreement_file)\n", + "macro_averaged_metrics.to_csv(metric_file)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/Tutorial.ipynb b/notebooks/Tutorial.ipynb index 8cb24a51..9a9d5b53 100644 --- a/notebooks/Tutorial.ipynb +++ b/notebooks/Tutorial.ipynb @@ -13,12 +13,12 @@ "id": "a403ee30", "metadata": {}, "source": [ - "# Categorical Comparisons" + "# Two-Class Categorical Comparisons" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "a9fa8470", "metadata": {}, "outputs": [], @@ -45,13 +45,17 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "f91c0b8c", "metadata": {}, "outputs": [], "source": [ - "candidate = rxr.open_rasterio('candidate_map_two_class_categorical.tif', mask_and_scale=True)\n", - "benchmark = rxr.open_rasterio('benchmark_map_two_class_categorical.tif', mask_and_scale=True)" + "candidate = rxr.open_rasterio(\n", + " 'candidate_map_two_class_categorical.tif', mask_and_scale=True\n", + ")\n", + "benchmark = rxr.open_rasterio(\n", + " 'benchmark_map_two_class_categorical.tif', mask_and_scale=True\n", + ")" ] }, { @@ -72,14 +76,16 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "541857a7", "metadata": {}, "outputs": [], "source": [ - "agreement_map, crosstab_table, metric_table = candidate.gval.categorical_compare(benchmark,\n", - " positive_categories=[2],\n", - " negative_categories=[0, 1])" + "agreement_map, crosstab_table, metric_table = candidate.gval.categorical_compare(\n", + " benchmark,\n", + " positive_categories=[2],\n", + " negative_categories=[0, 1]\n", + ")" ] }, { @@ -156,17 +162,17 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "b1ef13a0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, @@ -203,7 +209,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "fdc9df2b", "metadata": {}, "outputs": [ @@ -280,7 +286,7 @@ "3 1 2.0 2.0 24.0 2473405.0" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -307,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "16cb3626", "metadata": {}, "outputs": [ @@ -332,91 +338,150 @@ " \n", " \n", " \n", - " band\n", - " fn\n", - " fp\n", - " tn\n", - " tp\n", - " accuracy\n", - " critical_success_index\n", - " f_score\n", - " false_discovery_rate\n", - " false_negative_rate\n", - " ...\n", - " fowlkes_mallows_index\n", - " matthews_correlation_coefficient\n", - " negative_likelihood_ratio\n", - " negative_predictive_value\n", - " positive_likelihood_ratio\n", - " positive_predictive_value\n", - " prevalence\n", - " prevalence_threshold\n", - " true_negative_rate\n", - " true_positive_rate\n", + " 0\n", " \n", " \n", " \n", " \n", - " 0\n", + " band\n", " 1\n", + " \n", + " \n", + " fn\n", " 639227.0\n", + " \n", + " \n", + " fp\n", " 512277.0\n", + " \n", + " \n", + " tn\n", " 10345720.0\n", + " \n", + " \n", + " tp\n", " 2473405.0\n", + " \n", + " \n", + " accuracy\n", " 0.917577\n", + " \n", + " \n", + " balanced_accuracy\n", + " 0.873727\n", + " \n", + " \n", + " critical_success_index\n", " 0.682336\n", + " \n", + " \n", + " equitable_threat_score\n", + " 0.610939\n", + " \n", + " \n", + " f_score\n", " 0.811177\n", + " \n", + " \n", + " false_discovery_rate\n", " 0.171578\n", + " \n", + " \n", + " false_negative_rate\n", " 0.205365\n", - " ...\n", + " \n", + " \n", + " false_omission_rate\n", + " 0.058191\n", + " \n", + " \n", + " false_positive_rate\n", + " 0.04718\n", + " \n", + " \n", + " fowlkes_mallows_index\n", " 0.811352\n", + " \n", + " \n", + " matthews_correlation_coefficient\n", " 0.758757\n", + " \n", + " \n", + " negative_likelihood_ratio\n", " 0.215534\n", + " \n", + " \n", + " negative_predictive_value\n", " 0.941809\n", + " \n", + " \n", + " overall_bias\n", + " 0.959215\n", + " \n", + " \n", + " positive_likelihood_ratio\n", " 16.842723\n", + " \n", + " \n", + " positive_predictive_value\n", " 0.828422\n", - " 0.213711\n", + " \n", + " \n", + " prevalence\n", + " 0.222798\n", + " \n", + " \n", + " prevalence_threshold\n", " 0.195925\n", + " \n", + " \n", + " true_negative_rate\n", " 0.95282\n", + " \n", + " \n", + " true_positive_rate\n", " 0.794635\n", " \n", " \n", "\n", - "

1 rows × 22 columns

\n", "" ], "text/plain": [ - " band fn fp tn tp accuracy \\\n", - "0 1 639227.0 512277.0 10345720.0 2473405.0 0.917577 \n", - "\n", - " critical_success_index f_score false_discovery_rate \\\n", - "0 0.682336 0.811177 0.171578 \n", - "\n", - " false_negative_rate ... fowlkes_mallows_index \\\n", - "0 0.205365 ... 0.811352 \n", - "\n", - " matthews_correlation_coefficient negative_likelihood_ratio \\\n", - "0 0.758757 0.215534 \n", - "\n", - " negative_predictive_value positive_likelihood_ratio \\\n", - "0 0.941809 16.842723 \n", - "\n", - " positive_predictive_value prevalence prevalence_threshold \\\n", - "0 0.828422 0.213711 0.195925 \n", - "\n", - " true_negative_rate true_positive_rate \n", - "0 0.95282 0.794635 \n", - "\n", - "[1 rows x 22 columns]" + " 0\n", + "band 1\n", + "fn 639227.0\n", + "fp 512277.0\n", + "tn 10345720.0\n", + "tp 2473405.0\n", + "accuracy 0.917577\n", + "balanced_accuracy 0.873727\n", + "critical_success_index 0.682336\n", + "equitable_threat_score 0.610939\n", + "f_score 0.811177\n", + "false_discovery_rate 0.171578\n", + "false_negative_rate 0.205365\n", + "false_omission_rate 0.058191\n", + "false_positive_rate 0.04718\n", + "fowlkes_mallows_index 0.811352\n", + "matthews_correlation_coefficient 0.758757\n", + "negative_likelihood_ratio 0.215534\n", + "negative_predictive_value 0.941809\n", + "overall_bias 0.959215\n", + "positive_likelihood_ratio 16.842723\n", + "positive_predictive_value 0.828422\n", + "prevalence 0.222798\n", + "prevalence_threshold 0.195925\n", + "true_negative_rate 0.95282\n", + "true_positive_rate 0.794635" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "metric_table" + "metric_table.transpose()" ] }, { @@ -457,13 +522,15 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "7264ffc9", "metadata": {}, "outputs": [], "source": [ - "candidate, benchmark = candidate.gval.homogenize(benchmark_map=benchmark,\n", - " target_map = \"candidate\")" + "candidate, benchmark = candidate.gval.homogenize(\n", + " benchmark_map=benchmark,\n", + " target_map = \"candidate\"\n", + ")" ] }, { @@ -476,14 +543,16 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "e3917e34", "metadata": {}, "outputs": [], "source": [ "target_map = rxr.open_rasterio('target_map_two_class_categorical.tif')\n", - "candidate, benchmark = candidate.gval.homogenize(benchmark_map=benchmark,\n", - " target_map = target_map)" + "candidate, benchmark = candidate.gval.homogenize(\n", + " benchmark_map=benchmark,\n", + " target_map = target_map\n", + ")" ] }, { @@ -512,17 +581,17 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "c6e3c35c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" }, @@ -538,8 +607,10 @@ } ], "source": [ - "agreement_map = candidate.gval.compute_agreement_map(benchmark_map=benchmark, \n", - " comparison_function='cantor')\n", + "agreement_map = candidate.gval.compute_agreement_map(\n", + " benchmark_map=benchmark, \n", + " comparison_function='cantor'\n", + ")\n", "\n", "agreement_map.gval.cat_plot(title=\"Agreement Map\")" ] @@ -558,17 +629,17 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "a2310a98", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" }, @@ -598,9 +669,11 @@ " (2, 2): 8\n", "}\n", "\n", - "agreement_map = candidate.gval.compute_agreement_map(benchmark_map=benchmark,\n", - " comparison_function='pairing_dict',\n", - " pairing_dict=pairing_dict)\n", + "agreement_map = candidate.gval.compute_agreement_map(\n", + " benchmark_map=benchmark,\n", + " comparison_function='pairing_dict',\n", + " pairing_dict=pairing_dict\n", + ")\n", "\n", "agreement_map.gval.cat_plot(title=\"Agreement Map\", basemap=None)" ] @@ -615,17 +688,17 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "f6567376", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" }, @@ -641,10 +714,12 @@ } ], "source": [ - "agreement_map = candidate.gval.compute_agreement_map(benchmark_map=benchmark, \n", - " comparison_function='pairing_dict',\n", - " allow_candidate_values=[1, 2],\n", - " allow_benchmark_values=[0, 2])\n", + "agreement_map = candidate.gval.compute_agreement_map(\n", + " benchmark_map=benchmark, \n", + " comparison_function='pairing_dict',\n", + " allow_candidate_values=[1, 2],\n", + " allow_benchmark_values=[0, 2]\n", + ")\n", "\n", "agreement_map.gval.cat_plot(title=\"Agreement Map\")" ] @@ -667,17 +742,17 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "972f07aa", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" }, @@ -700,8 +775,10 @@ "def multiply(c: Number, b: Number) -> Number:\n", " return c * b\n", "\n", - "agreement_map = candidate.gval.compute_agreement_map(benchmark_map=benchmark, \n", - " comparison_function=\"multi\")\n", + "agreement_map = candidate.gval.compute_agreement_map(\n", + " benchmark_map=benchmark, \n", + " comparison_function=\"multi\"\n", + ")\n", "\n", "agreement_map.gval.cat_plot(title=\"Agreement Map\")" ] @@ -732,7 +809,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "18b9c315", "metadata": {}, "outputs": [ @@ -791,17 +868,18 @@ "1 1 2.0 2.0 4.0 2624301.0" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "crosstab_table_allow = candidate.gval.compute_crosstab(benchmark,\n", - " allow_benchmark_values=[0, 2],\n", - " allow_candidate_values=[2],\n", - " comparison_function=\"multi\"\n", - " )\n", + "crosstab_table_allow = candidate.gval.compute_crosstab(\n", + " benchmark,\n", + " allow_benchmark_values=[0, 2],\n", + " allow_candidate_values=[2],\n", + " comparison_function=\"multi\"\n", + ")\n", "crosstab_table_allow" ] }, @@ -823,7 +901,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "id": "2ba3fc06", "metadata": {}, "outputs": [ @@ -866,7 +944,7 @@ " 10345720.0\n", " 2473405.0\n", " 0.794635\n", - " 0.213711\n", + " 0.222798\n", " \n", " \n", "\n", @@ -877,18 +955,20 @@ "0 1 639227.0 512277.0 10345720.0 2473405.0 0.794635 \n", "\n", " prevalence \n", - "0 0.213711 " + "0 0.222798 " ] }, - "execution_count": 14, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "metric_table_select = crosstab_table.gval.compute_categorical_metrics(negative_categories= [0, 1],\n", - " positive_categories = [2],\n", - " metrics=['true_positive_rate', 'prevalence'])\n", + "metric_table_select = crosstab_table.gval.compute_categorical_metrics(\n", + " negative_categories= [0, 1],\n", + " positive_categories = [2],\n", + " metrics=['true_positive_rate', 'prevalence']\n", + ")\n", "metric_table_select" ] }, @@ -902,7 +982,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "67938408", "metadata": {}, "outputs": [], @@ -924,7 +1004,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "id": "1e8eeb59", "metadata": {}, "outputs": [], @@ -951,21 +1031,21 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "id": "6a41eee3", "metadata": {}, "outputs": [], "source": [ - "metric_table_register = crosstab_table.gval.compute_categorical_metrics(negative_categories= None,\n", - " positive_categories = [2],\n", - " metrics=['error_balance', \n", - " 'arbitrary1', \n", - " 'arbitrary2'])" + "metric_table_register = crosstab_table.gval.compute_categorical_metrics(\n", + " negative_categories= None,\n", + " positive_categories = [2],\n", + " metrics=['error_balance', 'arbitrary1', 'arbitrary2']\n", + ")" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "id": "6ab884b7", "metadata": {}, "outputs": [ @@ -1017,7 +1097,7 @@ "0 1 639227.0 512277.0 NaN 2473405.0 0.801401" ] }, - "execution_count": 18, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1044,7 +1124,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "id": "899a1da9", "metadata": {}, "outputs": [], diff --git a/notebooks/benchmark_map_multi_categorical.tif b/notebooks/benchmark_map_multi_categorical.tif new file mode 100644 index 00000000..941e1f23 Binary files /dev/null and b/notebooks/benchmark_map_multi_categorical.tif differ diff --git a/notebooks/candidate_map_multi_categorical.tif b/notebooks/candidate_map_multi_categorical.tif new file mode 100644 index 00000000..2e334cf3 Binary files /dev/null and b/notebooks/candidate_map_multi_categorical.tif differ diff --git a/notebooks/candidate_raw_elevation_multi_categorical.tif b/notebooks/candidate_raw_elevation_multi_categorical.tif new file mode 100644 index 00000000..90ba5d3d Binary files /dev/null and b/notebooks/candidate_raw_elevation_multi_categorical.tif differ