From eca505acaae7001c64a3bcb7923b65b0a9623590 Mon Sep 17 00:00:00 2001 From: Greg Cocks Date: Tue, 29 Dec 2020 13:56:07 -0600 Subject: [PATCH 001/359] Fixed Typo Fixed small typo in add_crosswalk.py --- lib/add_crosswalk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/add_crosswalk.py b/lib/add_crosswalk.py index 6ae1c6980..5d66da70b 100755 --- a/lib/add_crosswalk.py +++ b/lib/add_crosswalk.py @@ -18,7 +18,7 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f if extent == 'FR': ## crosswalk using majority catchment method - # calculate majority catchemnts + # calculate majority catchments majority_calc = zonal_stats(input_catchments, input_nwmcatras_fileName, stats=['majority'], geojson_out=True) input_majorities = gpd.GeoDataFrame.from_features(majority_calc) input_majorities = input_majorities.rename(columns={'majority' : 'feature_id'}) From 2ce8b6a0421c244e41072cdae790e7a37269930c Mon Sep 17 00:00:00 2001 From: MattLuck-NOAA Date: Thu, 31 Dec 2020 14:36:33 -0500 Subject: [PATCH 002/359] Modifications to build and run Docker image more reliably Modifications to build and run Docker image more reliably. Closes #178. Changes Changed to noninteractive install of grass. Fixes #182. Notes aggregate_vector_inputs.py doesn't work yet. Need to externally download required data to run fim_run.sh --- .gitignore | 1 + CHANGELOG.md | 13 ++++++- Dockerfile.dev | 3 +- install_grass.exp | 52 ---------------------------- lib/acquire_and_preprocess_inputs.py | 1 + lib/aggregate_vector_inputs.py | 8 ++--- lib/derive_headwaters.py | 2 +- 7 files changed, 20 insertions(+), 60 deletions(-) delete mode 100755 install_grass.exp diff --git a/.gitignore b/.gitignore index 370f4f97d..ec0085323 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ config/** !config/*default* !config/*calibrated* !config/symbology/ +.vscode/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 867435f65..bc1459cb2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,18 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.0.1 - 2020-12-31 - [PR #184](https://github.com/NOAA-OWP/cahaba/pull/184) -## v3.0.0.0 - 2020-12-22 +Modifications to build and run Docker image more reliably. Cleanup on some pre-processing scripts. + +### Changes + + - Changed to noninteractive install of GRASS. + - Changed some paths from relative to absolute and cleaned up some python shebang lines. + +### Notes + - `aggregate_vector_inputs.py` doesn't work yet. Need to externally download required data to run fim_run.sh + +## v3.0.0.0 - 2020-12-22 - [PR #181](https://github.com/NOAA-OWP/cahaba/pull/181) The software released here builds on the flood inundation mapping capabilities demonstrated as part of the National Flood Interoperability Experiment, the Office of Water Prediction's Innovators Program and the National Water Center Summer Institute. The flood inundation mapping software implements the Height Above Nearest Drainage (HAND) algorithm and incorporates community feedback and lessons learned over several years. The software has been designed to meet the requirements set by stakeholders interested in flood prediction and has been developed in partnership with several entities across the water enterprise. diff --git a/Dockerfile.dev b/Dockerfile.dev index a57389da8..626ef94ef 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -74,8 +74,7 @@ COPY --from=builder $depDir $depDir RUN apt update --fix-missing RUN apt install -y p7zip-full python3-pip time mpich=3.3.2-2build1 parallel=20161222-1.1 libgeos-dev=3.8.0-1build1 expect=5.45.4-2build1 -COPY install_grass.exp . -RUN ./install_grass.exp +RUN DEBIAN_FRONTEND=noninteractive apt install -y grass=7.8.2-1build3 grass-doc=7.8.2-1build3 RUN apt auto-remove diff --git a/install_grass.exp b/install_grass.exp deleted file mode 100755 index e1320ffb9..000000000 --- a/install_grass.exp +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/expect -f -# -# This Expect script was generated by autoexpect on Thu Oct 22 20:27:42 2020 -# Expect and autoexpect were both written by Don Libes, NIST. -# -# Note that autoexpect does not guarantee a working script. It -# necessarily has to guess about certain things. Two reasons a script -# might fail are: -# -# 1) timing - A surprising number of programs (rn, ksh, zsh, telnet, -# etc.) and devices discard or ignore keystrokes that arrive "too -# quickly" after prompts. If you find your new script hanging up at -# one spot, try adding a short sleep just before the previous send. -# Setting "force_conservative" to 1 (see below) makes Expect do this -# automatically - pausing briefly before sending each character. This -# pacifies every program I know of. The -c flag makes the script do -# this in the first place. The -C flag allows you to define a -# character to toggle this mode off and on. - -set force_conservative 0 ;# set to 1 to force conservative mode even if - ;# script wasn't run conservatively originally -if {$force_conservative} { - set send_slow {1 .1} - proc send {ignore arg} { - sleep .1 - exp_send -s -- $arg - } -} - -# -# 2) differing output - Some programs produce different output each time -# they run. The "date" command is an obvious example. Another is -# ftp, if it produces throughput statistics at the end of a file -# transfer. If this causes a problem, delete these patterns or replace -# them with wildcards. An alternative is to use the -p flag (for -# "prompt") which makes Expect only look for the last line of output -# (i.e., the prompt). The -P flag allows you to define a character to -# toggle this mode off and on. -# -# Read the man page for more info. -# -# -Don - - -set timeout -1 -spawn apt install -y grass=7.8.2-1build3 grass-doc=7.8.2-1build3 -match_max 100000 -expect -exact "Country of origin for the keyboard: " -send -- "31\r" -expect -exact "Keyboard layout: " -send -- "1\r" -expect eof diff --git a/lib/acquire_and_preprocess_inputs.py b/lib/acquire_and_preprocess_inputs.py index f3b200e94..446e6955e 100755 --- a/lib/acquire_and_preprocess_inputs.py +++ b/lib/acquire_and_preprocess_inputs.py @@ -8,6 +8,7 @@ from multiprocessing import Pool import geopandas as gp from urllib.error import HTTPError +from tqdm import tqdm from utils.shared_variables import (NHD_URL_PARENT, NHD_URL_PREFIX, diff --git a/lib/aggregate_vector_inputs.py b/lib/aggregate_vector_inputs.py index 6d9b2abc9..48dadecbf 100755 --- a/lib/aggregate_vector_inputs.py +++ b/lib/aggregate_vector_inputs.py @@ -1,4 +1,4 @@ -#!/usr/bin/env·python3 +#!/usr/bin/env python3 import os import geopandas as gpd @@ -8,9 +8,9 @@ from derive_headwaters import findHeadWaterPoints from tqdm import tqdm -in_dir ='data/inputs/nhdplus_vectors' -nhd_dir ='data/inputs/nhdplus_vectors_aggregate' -nwm_dir = 'data/inputs/nwm_hydrofabric' +in_dir ='/data/inputs/nhdplus_vectors' +nhd_dir ='/data/inputs/nhdplus_vectors_aggregate' +nwm_dir = '/data/inputs/nwm_hydrofabric' ## NWM Headwaters print ('deriving NWM headwater points') diff --git a/lib/derive_headwaters.py b/lib/derive_headwaters.py index e0c39f6b5..d29381c60 100644 --- a/lib/derive_headwaters.py +++ b/lib/derive_headwaters.py @@ -1,4 +1,4 @@ -#!/usr/bin/env·python3 +#!/usr/bin/env python3 import geopandas as gpd from shapely.geometry import Point From 68b1969bc28b123c7bc1044f19e5a258e54815be Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Wed, 6 Jan 2021 14:14:08 -0600 Subject: [PATCH 003/359] hotfix to address AHPSs mapping errors (#200) * handle dytpe and leading zeros for hydrotable * exit inundation.py when all stream segments are lakes --- lib/add_crosswalk.py | 5 +++++ lib/run_by_unit.sh | 2 +- tests/inundation.py | 7 ++++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/add_crosswalk.py b/lib/add_crosswalk.py index 5d66da70b..1f51efb4d 100755 --- a/lib/add_crosswalk.py +++ b/lib/add_crosswalk.py @@ -135,8 +135,13 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f output_flows['HydroID'] = output_flows.HydroID.str.zfill(8) output_hydro_table = output_hydro_table.merge(output_flows.loc[:,['HydroID','LakeID']],how='left',on='HydroID') output_hydro_table['LakeID'] = output_hydro_table['LakeID'].astype(int) + output_hydro_table = output_hydro_table.rename(columns={'HUC8':'HUC'}) + if output_hydro_table.HUC.dtype != 'str': output_hydro_table.HUC = output_hydro_table.HUC.astype(str) + output_hydro_table.HUC = output_hydro_table.HUC.str.zfill(8) + output_hydro_table.drop(columns='fossid',inplace=True) + if output_hydro_table.feature_id.dtype != 'int': output_hydro_table.feature_id = output_hydro_table.feature_id.astype(int) if output_hydro_table.feature_id.dtype != 'str': output_hydro_table.feature_id = output_hydro_table.feature_id.astype(str) # write out based on mode diff --git a/lib/run_by_unit.sh b/lib/run_by_unit.sh index d2c63a208..7a19cd88b 100755 --- a/lib/run_by_unit.sh +++ b/lib/run_by_unit.sh @@ -59,7 +59,7 @@ echo -e $startDiv"Get Vector Layers and Subset $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg ] && \ -$libDir/snap_and_clip_to_nhd.py -d $hucNumber -w $input_NWM_Flows -f $input_NWM_Headwaters -s $input_NHD_Flowlines -l $input_NWM_Lakes -r $input_NLD -u $outputHucDataDir/wbd.gpkg -g $outputHucDataDir/wbd_buffered.gpkg -y $inputDataDir/ahp_sites/ahps_sites.gpkg -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -t $outputHucDataDir/nwm_headwaters_proj_subset.gpkg -m $input_NWM_Catchments -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -x $outputHucDataDir/LandSea_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -p $extent +$libDir/snap_and_clip_to_nhd.py -d $hucNumber -w $input_NWM_Flows -f $input_NWM_Headwaters -s $input_NHD_Flowlines -l $input_NWM_Lakes -r $input_NLD -u $outputHucDataDir/wbd.gpkg -g $outputHucDataDir/wbd_buffered.gpkg -y $inputDataDir/ahp_sites/nws_lid.gpkg -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -t $outputHucDataDir/nwm_headwaters_proj_subset.gpkg -m $input_NWM_Catchments -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -x $outputHucDataDir/LandSea_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -p $extent Tcount if [ "$extent" = "MS" ]; then diff --git a/tests/inundation.py b/tests/inundation.py index a07f8d96f..0496d8d1b 100755 --- a/tests/inundation.py +++ b/tests/inundation.py @@ -400,7 +400,7 @@ def __return_huc_in_hucSet(hucCode,hucSet): rem_array,window_transform = mask(rem,[shape(huc['geometry'])],crop=True,indexes=1) catchments_array,_ = mask(catchments,[shape(huc['geometry'])],crop=True,indexes=1) elif mask_type == "filter": - + # input catchments polygon if isinstance(catchment_poly,str): catchment_poly=gpd.read_file(catchment_poly) @@ -458,6 +458,11 @@ def __subset_hydroTable_to_forecast(hydroTable,forecast,subset_hucs=None): ) hydroTable.set_index(['HUC','feature_id','HydroID'],inplace=True) hydroTable = hydroTable[hydroTable["LakeID"] == -999] # Subset hydroTable to include only non-lake catchments. + + if hydroTable.empty: + print ("All stream segments in this HUC are within lake boundaries.") + sys.exit(0) + elif isinstance(hydroTable,pd.DataFrame): pass #consider checking for correct dtypes, indices, and columns else: From 9bffb885f32dfcd95978c7ccd2639f9df56ff829 Mon Sep 17 00:00:00 2001 From: FernandoSalas-NOAA Date: Thu, 7 Jan 2021 22:35:27 -0600 Subject: [PATCH 004/359] Update README.md Added to reference list --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 63c2a27f2..69a815b6d 100644 --- a/README.md +++ b/README.md @@ -112,8 +112,9 @@ NOAA's National Water Center welcomes anyone to contribute to the Cahaba reposit 2. National Flood Interoperability Experiment [(NFIE)](https://web.corral.tacc.utexas.edu/nfiedata/) 3. Garousi‐Nejad, I., Tarboton, D. G.,Aboutalebi, M., & Torres‐Rua, A.(2019). Terrain analysis enhancements to the Height Above Nearest Drainage flood inundation mapping method. Water Resources Research, 55 , 7983–8009. https://doi.org/10.1029/2019WR0248375. 4. Zheng, X., D.G. Tarboton, D.R. Maidment, Y.Y. Liu, and P. Passalacqua. 2018. “River Channel Geometry and Rating Curve Estimation Using Height above the Nearest Drainage.” Journal of the American Water Resources Association 54 (4): 785–806. https://doi.org/10.1111/1752-1688.12661. -5. Barnes, Richard. 2016. RichDEM: Terrain Analysis Software. http://github.com/r-barnes/richdem -6. [TauDEM](https://github.com/dtarb/TauDEM) -7. Federal Emergency Management Agency (FEMA) Base Level Engineering [(BLE)](https://webapps.usgs.gov/infrm/estBFE/) -8. Verdin, James; Verdin, Kristine; Mathis, Melissa; Magadzire, Tamuka; Kabuchanga, Eric; Woodbury, Mark; and Gadain, Hussein, 2016, A software tool for rapid flood inundation mapping: U.S. Geological Survey Open-File Report 2016–1038, 26 p., http://dx.doi.org/10.3133/ofr20161038. -9. United States Geological Survey (USGS) National Hydrography Dataset Plus High Resolution (NHDPlusHR). https://www.usgs.gov/core-science-systems/ngp/national-hydrography/nhdplus-high-resolution +5. Liu, Y. Y., D. R. Maidment, D. G. Tarboton, X. Zheng and S. Wang, (2018), "A CyberGIS Integration and Computation Framework for High-Resolution Continental-Scale Flood Inundation Mapping," JAWRA Journal of the American Water Resources Association, 54(4): 770-784, https://doi.org/10.1111/1752-1688.12660. +6. Barnes, Richard. 2016. RichDEM: Terrain Analysis Software. http://github.com/r-barnes/richdem +7. [TauDEM](https://github.com/dtarb/TauDEM) +8. Federal Emergency Management Agency (FEMA) Base Level Engineering [(BLE)](https://webapps.usgs.gov/infrm/estBFE/) +9. Verdin, James; Verdin, Kristine; Mathis, Melissa; Magadzire, Tamuka; Kabuchanga, Eric; Woodbury, Mark; and Gadain, Hussein, 2016, A software tool for rapid flood inundation mapping: U.S. Geological Survey Open-File Report 2016–1038, 26 p., http://dx.doi.org/10.3133/ofr20161038. +10. United States Geological Survey (USGS) National Hydrography Dataset Plus High Resolution (NHDPlusHR). https://www.usgs.gov/core-science-systems/ngp/national-hydrography/nhdplus-high-resolution From 474c209751942940e8943bc9d858c875b102c0a0 Mon Sep 17 00:00:00 2001 From: NickChadwick-NOAA Date: Fri, 8 Jan 2021 12:18:15 -0600 Subject: [PATCH 005/359] Update CHANGELOG.md --- CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bc1459cb2..e17cbd453 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,17 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.0.2 - 2021-01-06 - [PR #200](https://github.com/NOAA-OWP/cahaba/pull/200) + +Patch to address AHPSs mapping errors. + +### Changes + + - Checks `dtype` of `hydroTable.csv` columns to resolve errors caused in `inundation.py` when joining to flow forecast. + - Exits `inundation.py` when all hydrotable HydroIDs are lake features. + - Updates path to latest AHPs site layer. + - Updated [readme](https://github.com/NOAA-OWP/cahaba/commit/9bffb885f32dfcd95978c7ccd2639f9df56ff829) + ## v3.0.0.1 - 2020-12-31 - [PR #184](https://github.com/NOAA-OWP/cahaba/pull/184) Modifications to build and run Docker image more reliably. Cleanup on some pre-processing scripts. From 0993052ebc51530af136e6dfc9c393aa090400bd Mon Sep 17 00:00:00 2001 From: RyanSpies-NOAA Date: Thu, 14 Jan 2021 09:08:45 -0600 Subject: [PATCH 006/359] Hotfix for handling nodata value in rasterized levee lines - Resolves bug for HUCs where $ndv > 0 (Great Lakes region) - Initialize the nld_rasterized_elev.tif using a value of -9999 instead of $ndv --- CHANGELOG.md | 9 +++++++++ lib/run_by_unit.sh | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e17cbd453..608c93979 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,15 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.0.3 - 2021-01-14 - [PR #210](https://github.com/NOAA-OWP/cahaba/pull/210) + +Hotfix for handling nodata value in rasterized levee lines. + +### Changes + + - Resolves bug for HUCs where `$ndv > 0` (Great Lakes region). + - Initialize the `nld_rasterized_elev.tif` using a value of `-9999` instead of `$ndv`. + ## v3.0.0.2 - 2021-01-06 - [PR #200](https://github.com/NOAA-OWP/cahaba/pull/200) Patch to address AHPSs mapping errors. diff --git a/lib/run_by_unit.sh b/lib/run_by_unit.sh index 7a19cd88b..81bc676da 100755 --- a/lib/run_by_unit.sh +++ b/lib/run_by_unit.sh @@ -96,7 +96,7 @@ echo -e $startDiv"Rasterize all NLD multilines using zelev vertices"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/nld_rasterized_elev.tif ] && [ -f $outputHucDataDir/nld_subset_levees.gpkg ] && \ -gdal_rasterize -l nld_subset_levees -3d -at -init $ndv -te $xmin $ymin $xmax $ymax -ts $ncols $nrows -ot Float32 -of GTiff -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" $outputHucDataDir/nld_subset_levees.gpkg $outputHucDataDir/nld_rasterized_elev.tif +gdal_rasterize -l nld_subset_levees -3d -at -init -9999 -a_nodata $ndv -te $xmin $ymin $xmax $ymax -ts $ncols $nrows -ot Float32 -of GTiff -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" $outputHucDataDir/nld_subset_levees.gpkg $outputHucDataDir/nld_rasterized_elev.tif Tcount ## CONVERT TO METERS ## @@ -138,7 +138,7 @@ echo -e $startDiv"Burn nld levees into dem & convert nld elev to meters (*Overwr date -u Tstart [ -f $outputHucDataDir/nld_rasterized_elev.tif ] && \ -gdal_calc.py --quiet --type=Float32 --overwrite --NoDataValue $ndv --co "BLOCKXSIZE=512" --co "BLOCKYSIZE=512" --co "TILED=YES" --co "COMPRESS=LZW" --co "BIGTIFF=YES" -A $outputHucDataDir/dem_meters.tif -B $outputHucDataDir/nld_rasterized_elev.tif --outfile="$outputHucDataDir/dem_meters.tif" --calc="maximum(A,(B*0.3048))" --NoDataValue=$ndv +gdal_calc.py --quiet --type=Float32 --overwrite --NoDataValue $ndv --co "BLOCKXSIZE=512" --co "BLOCKYSIZE=512" --co "TILED=YES" --co "COMPRESS=LZW" --co "BIGTIFF=YES" -A $outputHucDataDir/dem_meters.tif -B $outputHucDataDir/nld_rasterized_elev.tif --outfile="$outputHucDataDir/dem_meters.tif" --calc="maximum(A,((B>-9999)*0.3048))" --NoDataValue=$ndv Tcount ## DEM Reconditioning ## From d9c6a2afb517b39259d1df6279d96cdb114f8b14 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Wed, 20 Jan 2021 12:01:15 -0600 Subject: [PATCH 007/359] Changed the directory where the included_huc*.lst files are being read from. Changed the directory where the `included_huc*.lst` files are being read from. --- CHANGELOG.md | 8 ++++++++ lib/check_huc_inputs.py | 8 ++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 608c93979..5133f88e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,14 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.0.4 - 2021-01-20 - [PR #230](https://github.com/NOAA-OWP/cahaba/pull/230) + +Changed the directory where the `included_huc*.lst` files are being read from. + +### Changes + + - Changed the directory where the `included_huc*.lst` files are being read from. + ## v3.0.0.3 - 2021-01-14 - [PR #210](https://github.com/NOAA-OWP/cahaba/pull/210) Hotfix for handling nodata value in rasterized levee lines. diff --git a/lib/check_huc_inputs.py b/lib/check_huc_inputs.py index b1b83e7c0..1302e18ad 100755 --- a/lib/check_huc_inputs.py +++ b/lib/check_huc_inputs.py @@ -5,7 +5,7 @@ from glob import glob def __read_included_files(parent_dir_path): - + filename_patterns = glob(os.path.join(parent_dir_path,'included_huc*.lst')) accepted_hucs_set = set() @@ -23,7 +23,7 @@ def __read_included_files(parent_dir_path): def __read_input_hucs(hucs): - + hucs = [h.split() for h in hucs][0] if os.path.isfile(hucs[0]): with open(hucs[0],'r') as hucs_file: @@ -42,7 +42,7 @@ def __check_for_membership(hucs,accepted_hucs_set): def check_hucs(hucs): - accepted_hucs = __read_included_files(os.environ['inputDataDir']) + accepted_hucs = __read_included_files(os.path.join(os.environ['inputDataDir'],'huc_lists')) hucs = __read_input_hucs(hucs) __check_for_membership(hucs,accepted_hucs) @@ -54,6 +54,6 @@ def check_hucs(hucs): # extract to dictionary args = vars(parser.parse_args()) - + # call function check_hucs(**args) From 8b6eb18f75665b5b2bd06297a373078f1dca5825 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Thu, 21 Jan 2021 10:38:15 -0600 Subject: [PATCH 008/359] Preprocess MS and FR stream networks * headwater stream segments geometries are adjusted to align with with NWM streams * incoming streams are selected using intersection points between NWM streams and HUC4 boundaries * clip_vectors_to_wbd.py handles local headwaters * removes NHDPlus features categorized as coastline and underground conduit * added streams layer to production whitelist * fixed progress bar in lib/acquire_and_preprocess_inputs.py * added getDriver to shared functions.py * cleaned up variable names and types This addresses Github issues #58, #12, #118, #73, and partial resolution of #4 --- CHANGELOG.md | 15 + config/params_calibrated.env | 8 +- config/params_template.env | 8 +- fim_run.sh | 5 +- lib/acquire_and_preprocess_inputs.py | 2 +- lib/adjust_headwater_streams.py | 142 +++++++++ lib/aggregate_vector_inputs.py | 445 ++++++++++++++++++++++++--- lib/buildstreamtraversal.py | 93 +++--- lib/clip_vectors_to_wbd.py | 145 +++++++++ lib/output_cleanup.py | 16 +- lib/reduce_nhd_stream_density.py | 165 ++++++++++ lib/run_by_unit.sh | 26 +- lib/snap_and_clip_to_nhd.py | 298 ------------------ lib/split_flows.py | 3 + lib/utils/shared_functions.py | 43 +-- 15 files changed, 978 insertions(+), 436 deletions(-) create mode 100644 lib/adjust_headwater_streams.py create mode 100755 lib/clip_vectors_to_wbd.py create mode 100644 lib/reduce_nhd_stream_density.py delete mode 100755 lib/snap_and_clip_to_nhd.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 5133f88e8..d028b692d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,21 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.1.0 - 2021-01-21 - [PR #206](https://github.com/NOAA-OWP/cahaba/pull/206) + +Preprocess MS and FR stream networks + +### Changes + + - Headwater stream segments geometries are adjusted to align with with NWM streams. + - Incoming streams are selected using intersection points between NWM streams and HUC4 boundaries. + - `clip_vectors_to_wbd.py` handles local headwaters. + - Removes NHDPlus features categorized as coastline and underground conduit. + - Added streams layer to production whitelist. + - Fixed progress bar in `lib/acquire_and_preprocess_inputs.py`. + - Added `getDriver` to shared `functions.py`. + - Cleaned up variable names and types. + ## v3.0.0.4 - 2021-01-20 - [PR #230](https://github.com/NOAA-OWP/cahaba/pull/230) Changed the directory where the `included_huc*.lst` files are being read from. diff --git a/config/params_calibrated.env b/config/params_calibrated.env index 09e7f0167..f150d0428 100644 --- a/config/params_calibrated.env +++ b/config/params_calibrated.env @@ -1,9 +1,9 @@ #!/bin/bash #### geospatial parameters #### -export negativeBurnValue=1000 -export buffer=70 -export maxSplitDistance_meters=1500 +export negative_burn_value=1000 +export agree_DEM_buffer=70 +export max_split_distance_meters=1500 export manning_n="/foss_fim/config/mannings_calibrated.json" export stage_min_meters=0 export stage_interval_meters=0.3048 @@ -15,7 +15,7 @@ export lakes_buffer_dist_meters=20 #### computational parameters #### export ncores_gw=1 # mpi number of cores for gagewatershed export ncores_fd=1 # mpi number of cores for flow directions -export defaultMaxJobs=1 # default number of max concurrent jobs to run +export default_max_jobs=1 # default number of max concurrent jobs to run export memfree=0G # min free memory required to start a new job or keep youngest job alive #### logging parameters #### diff --git a/config/params_template.env b/config/params_template.env index f87b4c15a..04af26828 100644 --- a/config/params_template.env +++ b/config/params_template.env @@ -1,9 +1,9 @@ #!/bin/bash #### geospatial parameters #### -export negativeBurnValue=1000 -export buffer=70 -export maxSplitDistance_meters=1500 +export negative_burn_value=1000 +export agree_DEM_buffer=70 +export max_split_distance_meters=1500 export manning_n="/foss_fim/config/mannings_default.json" export stage_min_meters=0 export stage_interval_meters=0.3048 @@ -15,7 +15,7 @@ export lakes_buffer_dist_meters=20 #### computational parameters #### export ncores_gw=1 # mpi number of cores for gagewatershed export ncores_fd=1 # mpi number of cores for flow directions -export defaultMaxJobs=1 # default number of max concurrent jobs to run +export default_max_jobs=1 # default number of max concurrent jobs to run export memfree=0G # min free memory required to start a new job or keep youngest job alive #### logging parameters #### diff --git a/fim_run.sh b/fim_run.sh index fb22bc8c2..a8763da60 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -115,7 +115,10 @@ export input_NWM_Lakes=$inputDataDir/nwm_hydrofabric/nwm_lakes.gpkg export input_NWM_Catchments=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg export input_NWM_Flows=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg export input_NWM_Headwaters=$inputDataDir/nwm_hydrofabric/nwm_headwaters.gpkg -export input_NHD_Flowlines=$inputDataDir/nhdplus_vectors_aggregate/NHDPlusBurnLineEvent_wVAA.gpkg +export input_nhd_flowlines_fr=$inputDataDir/nhdplus_vectors_aggregate/NHDPlusBurnLineEvent_fr_adjusted.gpkg +export input_nhd_flowlines_ms=$inputDataDir/nhdplus_vectors_aggregate/NHDPlusBurnLineEvent_ms_adjusted.gpkg +export input_nhd_headwaters_fr=$inputDataDir/nhdplus_vectors_aggregate/nhd_headwaters_adjusted_fr.gpkg +export input_nhd_headwaters_ms=$inputDataDir/nhdplus_vectors_aggregate/nhd_headwaters_adjusted_ms.gpkg ## Input handling ## $libDir/check_huc_inputs.py -u "$hucList" diff --git a/lib/acquire_and_preprocess_inputs.py b/lib/acquire_and_preprocess_inputs.py index 446e6955e..01270f235 100755 --- a/lib/acquire_and_preprocess_inputs.py +++ b/lib/acquire_and_preprocess_inputs.py @@ -249,7 +249,7 @@ def build_huc_list_files(path_to_saved_data_parent_dir, wbd_directory): wbd = gp.read_file(full_huc_gpkg, layer=huc_gpkg) # Loop through entries and compare against the huc4_list to get available HUCs within the geopackage domain. - for index, row in tqdm(wbd.iterrows()): + for index, row in tqdm(wbd.iterrows(),total=len(wbd)): huc = row["HUC" + huc_gpkg[-1]] huc_mask = wbd.loc[wbd[str("HUC" + huc_gpkg[-1])]==huc].geometry burnline = os.path.join(nhd_plus_vector_dir, huc[0:4], 'NHDPlusBurnLineEvent' + huc[0:4] + '.gpkg') diff --git a/lib/adjust_headwater_streams.py b/lib/adjust_headwater_streams.py new file mode 100644 index 000000000..e08bf3352 --- /dev/null +++ b/lib/adjust_headwater_streams.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 + +import geopandas as gpd +import pandas as pd +import numpy as np +from os.path import splitext +from tqdm import tqdm +import argparse +import pygeos +from shapely.geometry import Point,LineString +from shapely.ops import split +from shapely.wkb import dumps, loads +from utils.shared_variables import PREP_PROJECTION +from utils.shared_functions import getDriver + +def adjust_headwaters(huc,nhd_streams,headwaters,headwater_id): + + # identify true headwater segments + if nhd_streams['headwaters_id'].dtype=='int': + nhd_streams_adj = nhd_streams.loc[(nhd_streams.headwaters_id > 0) & (nhd_streams.downstream_of_headwater == False),:].copy() + if headwaters[headwater_id].dtype != 'int': headwaters[headwater_id] = headwaters[headwater_id].astype(int) + else: + nhd_streams_adj = nhd_streams.loc[(nhd_streams.headwaters_id.notna()) & (nhd_streams.downstream_of_headwater == False),:].copy() + + nhd_streams_adj = nhd_streams_adj.explode() + nhd_streams_adj = nhd_streams_adj.reset_index(drop=True) + + headwater_limited = headwaters.merge(nhd_streams_adj["headwaters_id"],left_on=headwater_id, right_on="headwaters_id",how='right') + + headwaterstreams = [] + referencedpoints = [] + + for index, point in headwater_limited.iterrows(): + + # convert headwaterpoint geometries to WKB representation + wkb_points = dumps(point.geometry) + + # create pygeos headwaterpoint geometries from WKB representation + pointbin_geom = pygeos.io.from_wkb(wkb_points) + + # Closest segment to headwater + closest_stream = nhd_streams_adj.loc[nhd_streams_adj["headwaters_id"]==point[headwater_id]] + + try: # seeing inconsistent geometry objects even after exploding nhd_streams_adj; not sure why this is + closest_stream =closest_stream.explode() + except: + pass + try: + wkb_closest_stream = dumps(closest_stream.geometry[0]) + except: + wkb_closest_stream = dumps(closest_stream.geometry[0][0]) + + streambin_geom = pygeos.io.from_wkb(wkb_closest_stream) + + # Linear reference headwater to closest stream segment + pointdistancetoline = pygeos.linear.line_locate_point(streambin_geom, pointbin_geom) + referencedpoint = pygeos.linear.line_interpolate_point(streambin_geom, pointdistancetoline) + + # convert geometries to wkb representation + bin_referencedpoint = pygeos.io.to_wkb(referencedpoint) + + # convert to shapely geometries + shply_referencedpoint = loads(bin_referencedpoint) + shply_linestring = loads(wkb_closest_stream) + headpoint = Point(shply_referencedpoint.coords) + cumulative_line = [] + relativedistlst = [] + + # collect all nhd stream segment linestring verticies + for point in zip(*shply_linestring.coords.xy): + cumulative_line = cumulative_line + [point] + relativedist = shply_linestring.project(Point(point)) + relativedistlst = relativedistlst + [relativedist] + + # add linear referenced headwater point to closest nhd stream segment + if not headpoint in cumulative_line: + cumulative_line = cumulative_line + [headpoint] + relativedist = shply_linestring.project(headpoint) + relativedistlst = relativedistlst + [relativedist] + + # sort by relative line distance to place headwater point in linestring + sortline = pd.DataFrame({'geom' : cumulative_line, 'dist' : relativedistlst}).sort_values('dist') + shply_linestring = LineString(sortline.geom.tolist()) + referencedpoints = referencedpoints + [headpoint] + + # split the new linestring at the new headwater point + try: + line1,line2 = split(shply_linestring, headpoint) + headwaterstreams = headwaterstreams + [LineString(line1)] + nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1) + except: + line1 = split(shply_linestring, headpoint) + headwaterstreams = headwaterstreams + [LineString(line1[0])] + nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1[0]) + + nhd_streams = nhd_streams.drop(columns=['is_relevant_stream', 'headwaters_id', 'downstream_of_headwater']) + + try: + del nhd_streams_adj, headwaters, headwater_limited, headwaterstreams, referencedpoints, cumulative_line, relativedistlst + except: + print ('issue deleting adjusted stream variables for huc ' + str(huc)) + + ## identify ajusted nhd headwaters + # print('Identify NHD headwater points',flush=True) + nhd_headwater_streams_adj = nhd_streams.loc[nhd_streams['is_headwater'],:] + nhd_headwater_streams_adj = nhd_headwater_streams_adj.explode() + + hw_points = np.zeros(len(nhd_headwater_streams_adj),dtype=object) + for index,lineString in enumerate(nhd_headwater_streams_adj.geometry): + hw_point = [point for point in zip(*lineString.coords.xy)][-1] + hw_points[index] = Point(*hw_point) + + nhd_headwater_points_adj = gpd.GeoDataFrame({'NHDPlusID' : nhd_headwater_streams_adj['NHDPlusID'], + 'geometry' : hw_points},geometry='geometry',crs=PREP_PROJECTION) + + del nhd_headwater_streams_adj + + return(nhd_streams, nhd_headwater_points_adj) + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='adjust headwater stream geometery based on headwater start points') + parser.add_argument('-f','--huc',help='huc number',required=True) + parser.add_argument('-l','--nhd-streams',help='NHDPlus HR geodataframe',required=True) + parser.add_argument('-p','--headwaters',help='Headwater points layer',required=True,type=str) + parser.add_argument('-s','--subset-nhd-streams-fileName',help='Output streams layer name',required=False,type=str,default=None) + parser.add_argument('-s','--adj-headwater-points-fileName',help='Output adj headwater points layer name',required=False,type=str,default=None) + parser.add_argument('-g','--headwater-points-fileName',help='Output headwater points layer name',required=False,type=str,default=None) + parser.add_argument('-i','--headwater-id',help='Output headwaters points',required=True) + + args = vars(parser.parse_args()) + + adj_streams_gdf,adj_headwaters_gdf = adjust_headwaters(huc,nhd_streams,headwaters,headwater_id) + + if subset_nhd_streams_fileName is not None: + adj_streams_gdf.to_file(args['subset_nhd_streams_fileName'],driver=getDriver(args['subset_nhd_streams_fileName']),index=False) + + if headwater_points_fileName is not None: + headwater_points_fileName.to_file(args['headwater_points_fileName'],driver=getDriver(args['headwater_points_fileName']),index=False) + + if adj_headwater_points_fileName is not None: + adj_headwaters_gdf.to_file(args['adj_headwater_points_fileName'],driver=getDriver(args['adj_headwater_points_fileName']),index=False) diff --git a/lib/aggregate_vector_inputs.py b/lib/aggregate_vector_inputs.py index 48dadecbf..d60cdc2f0 100755 --- a/lib/aggregate_vector_inputs.py +++ b/lib/aggregate_vector_inputs.py @@ -2,51 +2,410 @@ import os import geopandas as gpd -import pandas as pd -from os.path import splitext from utils.shared_variables import PREP_PROJECTION +from utils.shared_functions import getDriver from derive_headwaters import findHeadWaterPoints +from reduce_nhd_stream_density import subset_nhd_network +from adjust_headwater_streams import adjust_headwaters from tqdm import tqdm +from os.path import splitext +from shapely.geometry import Point +from concurrent.futures import ProcessPoolExecutor,as_completed +from collections import deque +import numpy as np +from shapely.wkb import dumps, loads +import pygeos + +in_dir ='data/inputs/nhdplus_vectors' +nwm_dir = 'data/inputs/nwm_hydrofabric' +wbd_dir = 'data/inputs/wbd' +ahps_dir = 'data/inputs/ahp_sites' +agg_dir = 'data/inputs/nhdplus_vectors_aggregate' + +wbd_filename = os.path.join(wbd_dir, 'WBD_National.gpkg') +nwm_streams_fr_filename = os.path.join(nwm_dir,'nwm_flows.gpkg') +nwm_streams_ms_filename = os.path.join(nwm_dir,'nwm_flows_ms.gpkg') +nwm_headwaters_filename = os.path.join(nwm_dir,'nwm_headwaters.gpkg') +nwm_huc4_intersections_ms_filename = os.path.join(nwm_dir,'nwm_ms_huc4_intersections.gpkg') +nwm_huc4_intersections_fr_filename = os.path.join(nwm_dir,'nwm_fr_huc4_intersections.gpkg') + +def subset_nwm_ms_streams(args): + nwm_streams_filename = args[0] + in_dir = args[1] + ahps_dir = args[2] + output_filename = args[3] + + # subset nwm network to ms + ahps_headwaters_filename = os.path.join(ahps_dir,'bed_lids.gpkg') + ahps_headwaters = gpd.read_file(ahps_headwaters_filename) + + nwm_streams = gpd.read_file(nwm_streams_filename) + + nwm_streams['is_headwater'] = False + nwm_streams['downstream_of_headwater'] = False + + nwm_streams.loc[nwm_streams.ID.isin(list(ahps_headwaters.nwm_featur)),'is_headwater'] = True + + ## subset NHDPlus HR + nwm_streams['is_relevant_stream'] = nwm_streams['is_headwater'].copy() + + nwm_streams = nwm_streams.explode() + + # trace down from headwaters + nwm_streams.set_index('ID',inplace=True,drop=False) + + Q = deque(nwm_streams.loc[nwm_streams['is_headwater'],'ID'].tolist()) + visited = set() + + while Q: + q = Q.popleft() + if q in visited: + continue + # + visited.add(q) + toNode = nwm_streams.loc[q,'to'] + # + if not toNode == 0: + # + nwm_streams.loc[nwm_streams.ID==toNode,'is_relevant_stream'] = True + # + if toNode not in visited: + Q.append(toNode) + + nwm_streams = nwm_streams.loc[nwm_streams['is_relevant_stream'],:] + + nwm_streams.reset_index(drop=True,inplace=True) + + nwm_streams.to_file(output_filename,getDriver(output_filename),index=False) + +def find_nwm_incoming_streams(args): + + nwm_streams_filename = args[0] + wbd_filename = args[1] + in_dir = args[2] + output_filename = args[3] + + wbd = gpd.read_file(wbd_filename, layer='WBDHU4') + + intersecting_points = [] + for index, row in tqdm(wbd.iterrows(),total=len(wbd)): + col_name = 'HUC4' + huc = row[col_name] + + huc_mask = wbd.loc[wbd[col_name]==str(huc)] + huc_mask = huc_mask.explode() + huc_mask = huc_mask.reset_index(drop=True) + + nwm_streams = gpd.read_file(nwm_streams_filename, mask=huc_mask) + nwm_streams = nwm_streams.explode() + nwm_streams = nwm_streams.reset_index(drop=True) + + for index, polygon in enumerate(huc_mask.geometry): + crosses=nwm_streams.crosses(polygon.exterior) + nwm_streams_subset =nwm_streams[crosses] + nwm_streams_subset = nwm_streams_subset.reset_index(drop=True) + + for index, linestring in enumerate(nwm_streams_subset.geometry): + distances = [] + # distance to each stream segment + for point in zip(*linestring.coords.xy): + distance = Point(point).distance(polygon.exterior) + distances = distances + [distance] + + # find minimum distance + min_index = np.argmin(distances) + + # Closest segment to headwater + closest_point = list(linestring.coords)[min_index] + last_node = Point(closest_point) + + # convert geometries to WKB representation + wkb_point = dumps(last_node) + wkb_poly = dumps(polygon.exterior) + + # create pygeos geometries from WKB representation + stream_point_geom = pygeos.io.from_wkb(wkb_point) + polybin_geom = pygeos.io.from_wkb(wkb_poly) + + # Linear reference end node to huc boundary + pointdistancetoline = pygeos.linear.line_locate_point(polybin_geom,stream_point_geom) + referencedpoint = pygeos.linear.line_interpolate_point(polybin_geom, pointdistancetoline) + + # convert geometries to wkb representation + bin_referencedpoint = pygeos.io.to_wkb(referencedpoint) + + # convert to shapely geometries + shply_referencedpoint = loads(bin_referencedpoint) + + # collect all nhd stream segment linestring verticies + intersecting_points = intersecting_points + [shply_referencedpoint] + + huc_intersection = gpd.GeoDataFrame({'geometry' : intersecting_points},crs=nwm_streams.crs,geometry='geometry') + huc_intersection = huc_intersection.drop_duplicates() + huc_intersection.to_file(output_filename,getDriver(output_filename)) + +def collect_stream_attributes(args, huc): + print ('Starting huc: ' + str(huc)) + in_dir = args[0] + nwm_dir = args[1] + ahps_dir = args[2] + + print ('Collecting NHDPlus HR attributes') + burnline_filename = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg') + vaa_filename = os.path.join(in_dir,huc,'NHDPlusFlowLineVAA' + str(huc) + '.gpkg') + flowline_filename = os.path.join(in_dir,huc,'NHDFlowline' + str(huc) + '.gpkg') + + if os.path.exists(os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg')): + + burnline = gpd.read_file(burnline_filename) + burnline = burnline[['NHDPlusID','ReachCode','geometry']] + + flowline = gpd.read_file(flowline_filename) + flowline = flowline[['NHDPlusID','FType','FCode']] + # flowline = flowline.loc[flowline["FType"].isin([334,420,428,460,558])] + flowline = flowline.loc[~flowline["FType"].isin([566,420])] + + nhd_streams_vaa = gpd.read_file(vaa_filename) + nhd_streams_vaa = nhd_streams_vaa[['FromNode','ToNode','NHDPlusID','StreamOrde','DnLevelPat','LevelPathI']] + nhd_streams = burnline.merge(nhd_streams_vaa,on='NHDPlusID',how='inner') + nhd_streams = nhd_streams.merge(flowline,on='NHDPlusID',how='inner') + + del burnline, flowline, nhd_streams_vaa + + nhd_streams = nhd_streams.to_crs(PREP_PROJECTION) + nhd_streams = nhd_streams.loc[nhd_streams.geometry!=None,:] # special case: remove segments without geometries + nhd_streams['HUC4'] = str(huc) + + # write out NHDPlus HR aggregated + nhd_streams_agg_fileName = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') + nhd_streams.to_file(nhd_streams_agg_fileName,driver=getDriver(nhd_streams_agg_fileName),index=False) + del nhd_streams + + print ('finished huc: ' + str(huc)) -in_dir ='/data/inputs/nhdplus_vectors' -nhd_dir ='/data/inputs/nhdplus_vectors_aggregate' -nwm_dir = '/data/inputs/nwm_hydrofabric' - -## NWM Headwaters -print ('deriving NWM headwater points') -nwm_streams = gpd.read_file(os.path.join(nwm_dir,'nwm_flows.gpkg')) -nwm_headwaters = findHeadWaterPoints(nwm_streams) -nwm_headwaters.to_file(os.path.join(nwm_dir,'nwm_headwaters.gpkg'),driver='GPKG',index=False) - -## NHDPlus HR -print ('aggregating NHDPlus HR burnline layers') -nhd_streams_wVAA_fileName_pre=os.path.join(nhd_dir,'NHDPlusBurnLineEvent_wVAA.gpkg') - -schema = {'geometry': 'MultiLineString','properties': {'NHDPlusID': 'str','ReachCode': 'str', - 'FromNode': 'str','ToNode': 'str', - 'StreamOrde': 'str','DnLevelPat': 'str', - 'LevelPathI': 'str'}} - -for huc in tqdm(os.listdir(in_dir)): - if not huc[0]=='#': - burnline_filename = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg') - vaa_filename = os.path.join(in_dir,huc,'NHDPlusFlowLineVAA' + str(huc) + '.gpkg') - flowline_filename = os.path.join(in_dir,huc,'NHDFlowline' + str(huc) + '.gpkg') - if os.path.exists(os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg')): - burnline = gpd.read_file(burnline_filename) - nhd_streams_vaa = gpd.read_file(vaa_filename) - flowline = gpd.read_file(flowline_filename) - burnline = burnline[['NHDPlusID','ReachCode','geometry']] - flowline = flowline[['NHDPlusID','FCode']] - nhd_streams_vaa = nhd_streams_vaa[['FromNode','ToNode','NHDPlusID','StreamOrde','DnLevelPat','LevelPathI']] - nhd_streams_withVAA = burnline.merge(nhd_streams_vaa,on='NHDPlusID',how='inner') - nhd_streams_fcode = nhd_streams_withVAA.merge(flowline,on='NHDPlusID',how='inner') - nhd_streams = nhd_streams_fcode.to_crs(PREP_PROJECTION) - if os.path.isfile(nhd_streams_wVAA_fileName_pre): - nhd_streams.to_file(nhd_streams_wVAA_fileName_pre,driver='GPKG',index=False, mode='a') - else: - nhd_streams.to_file(nhd_streams_wVAA_fileName_pre,driver='GPKG',index=False) - else: - print ('missing data for huc ' + str(huc)) else: - print ('skipping huc ' + str(huc)) + print ('missing data for huc ' + str(huc)) + +def subset_stream_networks(args, huc): + + nwm_dir = args[0] + ahps_dir = args[1] + wbd4 = args[2] + wbd8 = args[3] + in_dir = args[4] + nwm_huc4_intersect_fr_filename = args[5] + nwm_huc4_intersect_ms_filename = args[6] + + print("starting HUC " + str(huc),flush=True) + nwm_headwater_id = 'ID' + nwm_headwaters_filename = os.path.join(nwm_dir,'nwm_headwaters.gpkg') + ahps_headwater_id = 'nws_lid' + ahps_headwaters_filename = os.path.join(ahps_dir,'nws_lid.gpkg') + nhd_streams_filename = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') + + # subset to reduce footprint + selected_wbd4 = wbd4.loc[wbd4.HUC4.str.startswith(str(huc))] + del wbd4 + selected_wbd8 = wbd8.loc[wbd8.HUC8.str.startswith(huc)] + del wbd8 + + huc_mask = selected_wbd4.loc[selected_wbd4.HUC4.str.startswith(str(huc))] + huc_mask = huc_mask.explode() + huc_mask = huc_mask.reset_index(drop=True) + + if len(selected_wbd8.HUC8) > 0: + selected_wbd8 = selected_wbd8.reset_index(drop=True) + + # identify FR/NWM headwaters + nhd_streams_fr = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersect_fr_filename) + + ## adjust FR/NWM headwater segments + nwm_headwaters = gpd.read_file(nwm_headwaters_filename, mask=huc_mask) + + if len(nwm_headwaters) > 0: + + adj_nhd_streams_fr, adj_nhd_headwater_points_fr = adjust_headwaters(str(huc),nhd_streams_fr,nwm_headwaters,nwm_headwater_id) + + nhd_streams_fr_adjusted_fileName=os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') + adj_nhd_headwaters_fr_fileName=os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') + + # write out FR adjusted + adj_nhd_streams_fr.to_file(nhd_streams_fr_adjusted_fileName,getDriver(nhd_streams_fr_adjusted_fileName),index=False) + adj_nhd_headwater_points_fr.to_file(adj_nhd_headwaters_fr_fileName,getDriver(adj_nhd_headwaters_fr_fileName),index=False) + + del adj_nhd_streams_fr, adj_nhd_headwater_points_fr + else: + print ('skipping FR headwater adjustments for HUC: ' + str(huc)) + + del nhd_streams_fr + + ## identify MS/AHPs headwaters + nhd_streams_ms = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,ahps_headwaters_filename,ahps_headwater_id,nwm_huc4_intersect_ms_filename) + + ## adjust MS/AHPs headwater segments + ahps_headwaters = gpd.read_file(ahps_headwaters_filename, mask=huc_mask) + + if len(ahps_headwaters) > 0: + + adj_nhd_streams_ms, adj_nhd_headwater_points_ms = adjust_headwaters(str(huc),nhd_streams_ms,ahps_headwaters,ahps_headwater_id) + + nhd_streams_ms_adjusted_fileName=os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') + adj_nhd_headwaters_ms_fileName=os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') + + # write out MS adjusted + adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,getDriver(nhd_streams_ms_adjusted_fileName),index=False) + adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,getDriver(adj_nhd_headwaters_ms_fileName),index=False) + + del adj_nhd_streams_ms, adj_nhd_headwater_points_ms + + else: + print ('skipping MS headwater adjustments for HUC: ' + str(huc)) + del nhd_streams_ms + +def aggregate_stream_networks(in_dir,agg_dir, huc_list): + + for huc in huc_list: + + ## FR adjusted + adj_nhd_headwaters_fr_fileName=os.path.join(agg_dir,'nhd_headwaters_adjusted_fr.gpkg') + nhd_fr_adj_huc_subset = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') + nhd_streams_fr_adjusted_fileName=os.path.join(agg_dir,'NHDPlusBurnLineEvent_fr_adjusted.gpkg') + nhd_fr_adj_headwaters_subset = os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') + + if os.path.isfile(nhd_fr_adj_huc_subset): + adj_nhd_streams_fr = gpd.read_file(nhd_fr_adj_huc_subset) + + # write out FR adjusted + if os.path.isfile(nhd_streams_fr_adjusted_fileName): + adj_nhd_streams_fr.to_file(nhd_streams_fr_adjusted_fileName,driver=getDriver(nhd_streams_fr_adjusted_fileName),index=False, mode='a') + else: + adj_nhd_streams_fr.to_file(nhd_streams_fr_adjusted_fileName,driver=getDriver(nhd_streams_fr_adjusted_fileName),index=False) + + del adj_nhd_streams_fr + + if os.path.isfile(nhd_fr_adj_headwaters_subset): + adj_nhd_headwater_points_fr = gpd.read_file(nhd_fr_adj_headwaters_subset) + + # write out FR adjusted + if os.path.isfile(adj_nhd_headwaters_fr_fileName): + adj_nhd_headwater_points_fr.to_file(adj_nhd_headwaters_fr_fileName,driver=getDriver(adj_nhd_headwaters_fr_fileName),index=False, mode='a') + else: + adj_nhd_headwater_points_fr.to_file(adj_nhd_headwaters_fr_fileName,driver=getDriver(adj_nhd_headwaters_fr_fileName),index=False) + + del adj_nhd_headwater_points_fr + + ## MS adjusted + adj_nhd_headwaters_ms_fileName=os.path.join(agg_dir,'nhd_headwaters_adjusted_ms.gpkg') + nhd_ms_adj_huc_subset = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') + nhd_streams_ms_adjusted_fileName=os.path.join(agg_dir,'NHDPlusBurnLineEvent_ms_adjusted.gpkg') + nhd_ms_adj_headwater_subset = os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') + + if os.path.isfile(nhd_ms_adj_huc_subset): + adj_nhd_streams_ms = gpd.read_file(nhd_ms_adj_huc_subset) + + # write out ms adjusted + if os.path.isfile(nhd_streams_ms_adjusted_fileName): + adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,driver=getDriver(nhd_streams_ms_adjusted_fileName),index=False, mode='a') + else: + adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,driver=getDriver(nhd_streams_ms_adjusted_fileName),index=False) + + del adj_nhd_streams_ms + + if os.path.isfile(nhd_ms_adj_headwater_subset): + adj_nhd_headwater_points_ms = gpd.read_file(nhd_ms_adj_headwater_subset) + + # write out ms adjusted + if os.path.isfile(adj_nhd_headwaters_ms_fileName): + adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,driver=getDriver(adj_nhd_headwaters_ms_fileName),index=False, mode='a') + else: + adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,driver=getDriver(adj_nhd_headwaters_ms_fileName),index=False) + + del adj_nhd_headwater_points_ms + + +def clean_up_intermediate_files(in_dir): + + for huc in os.listdir(in_dir): + agg_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') + fr_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr.gpkg') + fr_adj_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') + ms_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms.gpkg') + ms_adj_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') + ms_headwater_adj_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') + fr_headwater_adj_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') + ms_headwater_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_ms.gpkg') + fr_headwater_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_fr.gpkg') + + if os.path.exists(agg_path): + os.remove(agg_path) + + if os.path.exists(fr_path): + os.remove(fr_path) + + if os.path.exists(fr_adj_path): + os.remove(fr_adj_path) + + if os.path.exists(ms_path): + os.remove(ms_path) + + if os.path.exists(ms_adj_path): + os.remove(ms_adj_path) + + if os.path.exists(ms_headwater_adj_path): + os.remove(ms_headwater_adj_path) + + if os.path.exists(fr_headwater_adj_path): + os.remove(fr_headwater_adj_path) + + if os.path.exists(ms_headwater_path): + os.remove(ms_headwater_path) + + if os.path.exists(fr_headwater_path): + os.remove(fr_headwater_path) + + + +if(__name__=='__main__'): + + ## generate NWM Headwaters + # print ('deriving nwm headwater points') + # nwm_headwaters = findHeadWaterPoints(nwm_streams_fr_filename) + # nwm_headwaters['ID'] = nwm_headwaters.index + 1 + # nwm_headwaters.to_file(nwm_headwaters_filename,getDriver(nwm_headwaters_filename),index=False) + # del nwm_headwaters, nwm_streams + + ## subset NWM MS Streams + # nwm_subset_ms_args = (nwm_streams_fr_filename,in_dir,ahps_dir,nwm_streams_ms_filename) + # print ('deriving nwm ms streams') + # subset_nwm_ms_streams(nwm_subset_ms_args) + + ## generate NWM intersection points with WBD4 boundaries + # ms_nwm_intersect_args = (nwm_streams_ms_filename,wbd_filename,in_dir,nwm_huc4_intersections_ms_filename) + # fr_nwm_intersect_args = (nwm_streams_fr_filename,wbd_filename,in_dir,nwm_huc4_intersections_fr_filename) + # print ('deriving nwm ms intersection points') + # find_nwm_incoming_streams(ms_nwm_intersect_args) + # print ('deriving nwm fr intersection points') + # find_nwm_incoming_streams(fr_nwm_intersect_args) + + print ('loading wb4') + wbd4 = gpd.read_file(wbd_filename, layer='WBDHU4') + print ('loading wb8') + wbd8 = gpd.read_file(wbd_filename, layer='WBDHU8') + + subset_arg_list = (nwm_dir,ahps_dir,wbd4,wbd8,in_dir,nwm_huc4_intersections_fr_filename,nwm_huc4_intersections_ms_filename) + collect_arg_list = (in_dir,nwm_dir,ahps_dir) + + num_workers=9 + + with ProcessPoolExecutor(max_workers=num_workers) as executor: + ## preprocess nhd hr and add attributes + collect_attributes = [executor.submit(collect_stream_attributes, collect_arg_list, str(huc)) for huc in os.listdir(in_dir)] + ## subset nhd hr network + subset_results = [executor.submit(subset_stream_networks, subset_arg_list, str(huc)) for huc in os.listdir(in_dir)] + + + ## aggregate fr and ms nhd netowrks for entire nwm domain + aggregate_stream_networks(in_dir,agg_dir, os.listdir(in_dir)) + + ## remove intermediate files + # clean_up_intermediate_files(in_dir) diff --git a/lib/buildstreamtraversal.py b/lib/buildstreamtraversal.py index 1fcb00d65..f7b2bcbe4 100644 --- a/lib/buildstreamtraversal.py +++ b/lib/buildstreamtraversal.py @@ -2,9 +2,9 @@ Description: This tool creates unique IDs for each segment and builds the To_Node, From_Node, and NextDownID columns to traverse the network Required Arguments: - modelstream = stream network - WBD8 = HUC8 boundary dataset - HYDROID = name of ID column (string) + streams = stream network + wbd8 = HUC8 boundary dataset + hydro_id = name of ID column (string) ''' import sys import datetime @@ -22,9 +22,9 @@ def trace(): synerror = traceback.format_exc().splitlines()[-1] return line, filename, synerror -FN_FROMNODE = "From_Node" -FN_TONODE = "To_Node" -FN_NEXTDOWNID = "NextDownID" +from_node = "From_Node" +to_node = "To_Node" +next_down_id = "NextDownID" class BuildStreamTraversalColumns(object): '''Tool class for updating the next down IDs of stream features.''' @@ -33,60 +33,60 @@ def __init__(self): self.label = 'Find Next Downstream Line' self.description = '''Finds next downstream line, retrieves its HydroID and stores it in the NextDownID field.''' - def execute(self, modelstream, WBD8, HYDROID): + def execute(self, streams, wbd8, hydro_id): try: split_code = 1 sOK = 'OK' # check for HydroID; Assign if it doesn't exist - if not HYDROID in modelstream.columns: - print ("Required field " + HYDROID + " does not exist in input. Generating..") + if not hydro_id in streams.columns: + print ("Required field " + hydro_id + " does not exist in input. Generating..") - stream_centroid = gpd.GeoDataFrame({'geometry':modelstream.geometry.centroid}, crs=modelstream.crs, geometry='geometry') - stream_wbdjoin = gpd.sjoin(stream_centroid, WBD8, how='left', op='within') + stream_centroid = gpd.GeoDataFrame({'geometry':streams.geometry.centroid}, crs=streams.crs, geometry='geometry') + stream_wbdjoin = gpd.sjoin(stream_centroid, wbd8, how='left', op='within') stream_wbdjoin = stream_wbdjoin.rename(columns={"geometry": "centroid", "index_right": "HUC8id"}) - modelstream = modelstream.join(stream_wbdjoin).drop(columns=['centroid']) + streams = streams.join(stream_wbdjoin).drop(columns=['centroid']) - modelstream['seqID'] = (modelstream.groupby('HUC8id').cumcount(ascending=True)+1).astype('str').str.zfill(4) - modelstream = modelstream.loc[modelstream['HUC8id'].notna(),:] - modelstream = modelstream.assign(HYDROID= lambda x: x.HUC8id + x.seqID) - modelstream = modelstream.rename(columns={"HYDROID": HYDROID}).sort_values(HYDROID) - modelstream = modelstream.drop(columns=['HUC8id', 'seqID']) - modelstream[HYDROID] = modelstream[HYDROID].astype(int) - print ('Generated ' + HYDROID) + streams['seqID'] = (streams.groupby('HUC8id').cumcount(ascending=True)+1).astype('str').str.zfill(4) + streams = streams.loc[streams['HUC8id'].notna(),:] + streams = streams.assign(hydro_id= lambda x: x.HUC8id + x.seqID) + streams = streams.rename(columns={"hydro_id": hydro_id}).sort_values(hydro_id) + streams = streams.drop(columns=['HUC8id', 'seqID']) + streams[hydro_id] = streams[hydro_id].astype(int) + print ('Generated ' + hydro_id) # Check for TO/From Nodes; Assign if doesnt exist bOK = True - if not FN_FROMNODE in modelstream.columns: - print ("Field " + FN_FROMNODE + " does not exist in input ") + if not from_node in streams.columns: + print ("Field " + from_node + " does not exist in input ") bOK = False - if not FN_TONODE in modelstream.columns: - print ("Field " + FN_TONODE + " does not exist in input. Generating..") + if not to_node in streams.columns: + print ("Field " + to_node + " does not exist in input. Generating..") bOK = False if(bOK==False): # Add fields if not they do not exist. - if not FN_FROMNODE in modelstream.columns: - modelstream[FN_FROMNODE] = '' + if not from_node in streams.columns: + streams[from_node] = '' - if not FN_TONODE in modelstream.columns: - modelstream[FN_TONODE] = '' + if not to_node in streams.columns: + streams[to_node] = '' - modelstream = modelstream.sort_values(by=[HYDROID], ascending=True).copy() + streams = streams.sort_values(by=[hydro_id], ascending=True).copy() xy_dict = {} bhasnullshape=False - for rows in modelstream[['geometry', FN_FROMNODE, FN_TONODE]].iterrows(): + for rows in streams[['geometry', from_node, to_node]].iterrows(): if rows[1][0]: # From Node firstx = round(rows[1][0].coords.xy[0][0], 7) firsty = round(rows[1][0].coords.xy[1][0], 7) from_key = '{},{}'.format(firstx, firsty) if from_key in xy_dict: - modelstream.at[rows[0], FN_FROMNODE,] = xy_dict[from_key] + streams.at[rows[0], from_node,] = xy_dict[from_key] else: xy_dict[from_key] = len(xy_dict) + 1 - modelstream.at[rows[0], FN_FROMNODE,] = xy_dict[from_key] + streams.at[rows[0], from_node,] = xy_dict[from_key] # To Node lastx = round(rows[1][0].coords.xy[0][-1], 7) @@ -94,27 +94,27 @@ def execute(self, modelstream, WBD8, HYDROID): to_key = '{},{}'.format(lastx, lasty) #if xy_dict.has_key(to_key): if to_key in xy_dict: - modelstream.at[rows[0], FN_TONODE] = xy_dict[to_key] + streams.at[rows[0], to_node] = xy_dict[to_key] else: xy_dict[to_key] = len(xy_dict) + 1 - modelstream.at[rows[0], FN_TONODE] = xy_dict[to_key] + streams.at[rows[0], to_node] = xy_dict[to_key] else: bhasnullshape=True if bhasnullshape==True: print ("Some of the input features have a null shape.") - print (FN_FROMNODE + " and " + FN_TONODE + " fields cannot be populated for those features.") + print (from_node + " and " + to_node + " fields cannot be populated for those features.") else: print ('Generated To/From Nodes') # Create NextDownID field - if not FN_NEXTDOWNID in modelstream.columns: - modelstream[FN_NEXTDOWNID] = '' + if not next_down_id in streams.columns: + streams[next_down_id] = '' - # Create dict to store FN_FROMNODE values for each HydroID + # Create dict to store from_node values for each HydroID dnodes=dict() lstHydroIDs=[] - for row in modelstream[[FN_FROMNODE,HYDROID]].iterrows(): + for row in streams[[from_node,hydro_id]].iterrows(): if (row[1][0] in dnodes)==False: lstHydroIDs=[row[1][1]] @@ -124,7 +124,7 @@ def execute(self, modelstream, WBD8, HYDROID): lstHydroIDs.append(row[1][1]) # for each stream segment, search dict for HydroID downstream and - for urow in modelstream[[FN_NEXTDOWNID, FN_TONODE, FN_FROMNODE, HYDROID]].iterrows(): + for urow in streams[[next_down_id, to_node, from_node, hydro_id]].iterrows(): tonodecol = urow[1][1] nextdownIDcol = urow[1][0] hydroIDcol = urow[1][3] @@ -152,9 +152,9 @@ def execute(self, modelstream, WBD8, HYDROID): if next_down_ids:del next_down_ids except: pass - modelstream.loc[modelstream[HYDROID]== hydroIDcol,[FN_NEXTDOWNID]] = nextdownIDcol + streams.loc[streams[hydro_id]== hydroIDcol,[next_down_id]] = nextdownIDcol - tReturns = (sOK, modelstream) + tReturns = (sOK, streams) except Exception: sOK = "{}".format(trace()) tReturns = (sOK, ) @@ -166,18 +166,15 @@ def execute(self, modelstream, WBD8, HYDROID): ap.add_argument("-p", "--parameters", nargs='+', default=[], required=True, help="list of parameters") args = ap.parse_args() - modelstream = args.parameters[0] - WBD8 = args.parameters[1] - HYDROID = args.parameters[2] + streams = args.parameters[0] + wbd8 = args.parameters[1] + hydro_id = args.parameters[2] oProcessor = BuildStreamTraversalColumns() - params = (modelstream, WBD8, HYDROID) + params = (streams, wbd8, hydro_id) tResults=None tResults = oProcessor.execute(params) del oProcessor except: print (str(trace())) - finally: - dt = datetime.datetime.now() - print ('Finished at ' + dt.strftime("%Y-%m-%d %H:%M:%S")) diff --git a/lib/clip_vectors_to_wbd.py b/lib/clip_vectors_to_wbd.py new file mode 100755 index 000000000..92ece11f8 --- /dev/null +++ b/lib/clip_vectors_to_wbd.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 + +import sys +import geopandas as gpd +import argparse +from os.path import splitext +from shapely.geometry import MultiPolygon,Polygon,Point + +def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,dissolveLinks=False,extent='FR'): + + hucUnitLength = len(str(hucCode)) + + # Get wbd buffer + wbd = gpd.read_file(wbd_filename) + wbd_buffer = gpd.read_file(wbd_buffer_filename) + projection = wbd_buffer.crs + + # Clip ocean water polygon for future masking ocean areas (where applicable) + landsea = gpd.read_file(landsea_filename, mask = wbd_buffer) + if not landsea.empty: + landsea.to_file(subset_landsea_filename,driver=getDriver(subset_landsea_filename),index=False) + del landsea + + # find intersecting lakes and writeout + print("Subsetting NWM Lakes for HUC{} {}".format(hucUnitLength,hucCode),flush=True) + nwm_lakes = gpd.read_file(nwm_lakes_filename, mask = wbd_buffer) + + if not nwm_lakes.empty: + # perform fill process to remove holes/islands in the NWM lake polygons + nwm_lakes = nwm_lakes.explode() + nwm_lakes_fill_holes=MultiPolygon(Polygon(p.exterior) for p in nwm_lakes['geometry']) # remove donut hole geometries + # loop through the filled polygons and insert the new geometry + for i in range(len(nwm_lakes_fill_holes)): + nwm_lakes.loc[i,'geometry'] = nwm_lakes_fill_holes[i] + nwm_lakes.to_file(subset_nwm_lakes_filename,driver=getDriver(subset_nwm_lakes_filename),index=False) + del nwm_lakes + + # find intersecting levee lines + print("Subsetting NLD levee lines for HUC{} {}".format(hucUnitLength,hucCode),flush=True) + nld_lines = gpd.read_file(nld_lines_filename, mask = wbd_buffer) + if not nld_lines.empty: + nld_lines.to_file(subset_nld_lines_filename,driver=getDriver(subset_nld_lines_filename),index=False) + del nld_lines + + # find intersecting nwm_catchments + print("Subsetting NWM Catchments for HUC{} {}".format(hucUnitLength,hucCode),flush=True) + nwm_catchments = gpd.read_file(nwm_catchments_filename, mask = wbd_buffer) + nwm_catchments.to_file(subset_nwm_catchments_filename,driver=getDriver(subset_nwm_catchments_filename),index=False) + del nwm_catchments + + # subset nhd headwaters + print("Subsetting NHD Headwater Points for HUC{} {}".format(hucUnitLength,hucCode),flush=True) + nhd_headwaters = gpd.read_file(nhd_headwaters_filename, mask = wbd_buffer) + + # subset nhd streams + print("Querying NHD Streams for HUC{} {}".format(hucUnitLength,hucCode),flush=True) + nhd_streams = gpd.read_file(nhd_streams_filename, mask = wbd_buffer) + + ## identify local headwater stream segments + nhd_streams_subset = gpd.read_file(nhd_streams_filename, mask = wbd) + nhd_streams_subset = nhd_streams_subset.loc[~nhd_streams_subset.FromNode.isin(list(set(nhd_streams_subset.ToNode) & set(nhd_streams_subset.FromNode)))] + nhd_streams_subset = nhd_streams_subset[~nhd_streams_subset['is_headwater']] + + if not nhd_streams_subset.empty: + nhd_streams_subset = nhd_streams_subset.reset_index(drop=True) + start_coords = [] + NHDPlusIDs = [] + for index, linestring in enumerate(nhd_streams_subset.geometry): + start_coords = start_coords + [linestring.coords[-1]] + NHDPlusIDs = NHDPlusIDs + [nhd_streams_subset.iloc[index].NHDPlusID] + + start_geoms = [Point(point) for point in start_coords] + local_headwaters = gpd.GeoDataFrame({'NHDPlusID': NHDPlusIDs,'geometry': start_geoms}, crs=projection, geometry='geometry') + nhd_headwaters = nhd_headwaters.append(local_headwaters) + + # nhd_streams = nhd_streams.loc[~nhd_streams.NHDPlusID.isin(NHDPlusIDs)] + + nhd_streams.to_file(subset_nhd_streams_filename,driver=getDriver(subset_nhd_streams_filename),index=False) + + if len(nhd_headwaters) > 0: + nhd_headwaters.to_file(subset_nhd_headwaters_filename,driver=getDriver(subset_nhd_headwaters_filename),index=False) + del nhd_headwaters, nhd_streams + else: + print ("No headwater point(s) within HUC " + str(hucCode) + " boundaries.") + sys.exit(0) + + # subset nwm streams + print("Subsetting NWM Streams and deriving headwaters for HUC{} {}".format(hucUnitLength,hucCode),flush=True) + nwm_streams = gpd.read_file(nwm_streams_filename, mask = wbd_buffer) + nwm_streams.to_file(subset_nwm_streams_filename,driver=getDriver(subset_nwm_streams_filename),index=False) + del nwm_streams + +def getDriver(filename): + + driverDictionary = {'.gpkg' : 'GPKG','.geojson' : 'GeoJSON','.shp' : 'ESRI Shapefile'} + driver = driverDictionary[splitext(filename)[1]] + + return(driver) + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Subset vector layers') + parser.add_argument('-d','--hucCode', help='HUC boundary ID', required=True,type=str) + parser.add_argument('-w','--nwm-streams', help='NWM flowlines', required=True) + parser.add_argument('-s','--nhd-streams',help='NHDPlus HR burnline',required=True) + parser.add_argument('-l','--nwm-lakes', help='NWM Lakes', required=True) + parser.add_argument('-r','--nld-lines', help='Levee vectors to use within project path', required=True) + parser.add_argument('-g','--wbd',help='HUC boundary',required=True) + parser.add_argument('-f','--wbd-buffer',help='Buffered HUC boundary',required=True) + parser.add_argument('-m','--nwm-catchments', help='NWM catchments', required=True) + parser.add_argument('-y','--nhd-headwaters',help='NHD headwaters',required=True) + parser.add_argument('-v','--landsea',help='LandSea - land boundary',required=True) + parser.add_argument('-c','--subset-nhd-streams',help='NHD streams subset',required=True) + parser.add_argument('-z','--subset-nld-lines',help='Subset of NLD levee vectors for HUC',required=True) + parser.add_argument('-a','--subset-lakes',help='NWM lake subset',required=True) + parser.add_argument('-n','--subset-catchments',help='NWM catchments subset',required=True) + parser.add_argument('-e','--subset-nhd-headwaters',help='NHD headwaters subset',required=True,default=None) + parser.add_argument('-b','--subset-nwm-streams',help='NWM streams subset',required=True) + parser.add_argument('-x','--subset-landsea',help='LandSea subset',required=True) + parser.add_argument('-o','--dissolve-links',help='remove multi-line strings',action="store_true",default=False) + parser.add_argument('-p','--extent',help='MS or FR extent',required=True) + + args = vars(parser.parse_args()) + + hucCode = args['hucCode'] + nwm_streams_filename = args['nwm_streams'] + nhd_streams_filename = args['nhd_streams'] + nwm_lakes_filename = args['nwm_lakes'] + nld_lines_filename = args['nld_lines'] + wbd_filename = args['wbd'] + wbd_buffer_filename = args['wbd_buffer'] + nwm_catchments_filename = args['nwm_catchments'] + nhd_headwaters_filename = args['nhd_headwaters'] + landsea_filename = args['landsea'] + subset_nhd_streams_filename = args['subset_nhd_streams'] + subset_nld_lines_filename = args['subset_nld_lines'] + subset_nwm_lakes_filename = args['subset_lakes'] + subset_nwm_catchments_filename = args['subset_catchments'] + subset_nhd_headwaters_filename = args['subset_nhd_headwaters'] + subset_nwm_streams_filename = args['subset_nwm_streams'] + subset_landsea_filename = args['subset_landsea'] + dissolveLinks = args['dissolve_links'] + extent = args['extent'] + + subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,dissolveLinks,extent) diff --git a/lib/output_cleanup.py b/lib/output_cleanup.py index 98dcb9044..f4ea3ea19 100755 --- a/lib/output_cleanup.py +++ b/lib/output_cleanup.py @@ -27,6 +27,7 @@ def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_prod production_whitelist = [ 'rem_zeroed_masked.tif', 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg', + 'demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg', 'gw_catchments_reaches_filtered_addedAttributes.tif', 'hydroTable.csv', 'src.json' @@ -54,15 +55,15 @@ def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_prod src_data = {} with open(os.path.join(output_folder_path, 'src.json')) as jsonf: src_data = json.load(jsonf) - - with open(os.path.join(output_folder_path, 'hydroTable.csv')) as csvf: - csvReader = csv.DictReader(csvf) - for row in csvReader: + + with open(os.path.join(output_folder_path, 'hydroTable.csv')) as csvf: + csvReader = csv.DictReader(csvf) + for row in csvReader: if row['HydroID'].lstrip('0') in src_data and 'nwm_feature_id' not in src_data[row['HydroID'].lstrip('0')]: src_data[row['HydroID'].lstrip('0')]['nwm_feature_id'] = row['feature_id'] # Write src_data to JSON file - with open(os.path.join(output_folder_path, f'rating_curves_{huc_number}.json'), 'w') as jsonf: + with open(os.path.join(output_folder_path, f'rating_curves_{huc_number}.json'), 'w') as jsonf: json.dump(src_data, jsonf) # Step 3, copy files to desired names @@ -78,7 +79,7 @@ def whitelist_directory(directory_path, whitelist, additional_whitelist): directory = os.fsencode(directory_path) for file in os.listdir(directory_path): filename = os.fsdecode(file) - if filename not in whitelist: + if filename not in whitelist: os.remove(os.path.join(directory_path, filename)) @@ -100,7 +101,6 @@ def whitelist_directory(directory_path, whitelist, additional_whitelist): additional_whitelist = args['additional_whitelist'] is_production = args['is_production'] is_viz_post_processing = args['is_viz_post_processing'] - + # Run output_cleanup output_cleanup(huc_number, output_folder_path, additional_whitelist, is_production, is_viz_post_processing) - \ No newline at end of file diff --git a/lib/reduce_nhd_stream_density.py b/lib/reduce_nhd_stream_density.py new file mode 100644 index 000000000..cce2fa7ca --- /dev/null +++ b/lib/reduce_nhd_stream_density.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 + +import geopandas as gpd +import pandas as pd +import numpy as np +from os.path import splitext +from tqdm import tqdm +from collections import deque +import argparse +import pygeos +from shapely.wkb import dumps +from utils.shared_functions import getDriver + +def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_filename,headwaters_filename,headwater_id,nwm_intersections_filename): + + headwater_streams = pd.DataFrame() + + nhd_streams = gpd.read_file(nhd_streams_filename) + + for index, row in selected_wbd8.iterrows(): + huc = row["HUC8"] + + if huc.startswith(str(huc4)): + huc8_mask = selected_wbd8.loc[selected_wbd8.HUC8.str.startswith(huc)] + huc8_mask = huc8_mask.reset_index(drop=True) + + headwaters_mask = gpd.read_file(headwaters_filename, mask = huc8_mask) + headwaters_mask = headwaters_mask.reset_index(drop=True) + + streams_subset = gpd.read_file(nhd_streams_filename, mask = huc8_mask) + + if not streams_subset.empty: + streams_subset.loc[:,'is_headwater'] = False + streams_subset = streams_subset.reset_index(drop=True) + + streams_subset['b_geom'] = None + for index, linestring in enumerate(streams_subset.geometry): + streams_subset.at[index, 'b_geom'] = dumps(linestring) + + # create pygeos nhd stream geometries from WKB representation + streambin_geom = pygeos.io.from_wkb(streams_subset['b_geom']) + + streams_subset.loc[:,'HUC8'] = str(huc) + + if headwaters_mask[headwater_id].dtype=='int': + n = -1 + else: + n = '' + + streams_subset.loc[:,'headwaters_id'] = n + + # find stream segment closest to headwater point + for index, point in headwaters_mask.iterrows(): + + # convert headwaterpoint geometries to WKB representation + wkb_points = dumps(point.geometry) + + # create pygeos headwaterpoint geometries from WKB representation + pointbin_geom = pygeos.io.from_wkb(wkb_points) + + # distance to each stream segment + distances = pygeos.measurement.distance(streambin_geom, pointbin_geom) + + # find minimum distance + min_index = np.argmin(distances) + + # Closest segment to headwater + streams_subset.loc[min_index,'is_headwater'] = True + streams_subset.loc[min_index,'headwaters_id'] = point[headwater_id] + + headwater_streams = headwater_streams.append(streams_subset[['NHDPlusID','is_headwater','headwaters_id','HUC8']]) + + headwater_streams = headwater_streams.sort_values('is_headwater', ascending=False).drop_duplicates('NHDPlusID') # keeps headwater=True for conflicting duplicates + nhd_streams = nhd_streams.merge(headwater_streams,on='NHDPlusID',how='inner') + + del selected_wbd8, streams_subset, headwater_streams + + huc4_mask_buffer = huc4_mask.buffer(10) + + # identify inflowing streams + nwm_intersections = gpd.read_file(nwm_intersections_filename, mask=huc4_mask_buffer) + + nhd_streams['downstream_of_headwater'] = False + nhd_streams = nhd_streams.explode() + nhd_streams = nhd_streams.reset_index(drop=True) + + # find stream segment closest to nwm intersection point + for index, point in nwm_intersections.iterrows(): + + # distance to each stream segment + distances = nhd_streams.distance(point.geometry) + + # find minimum distance + min_index = np.argmin(distances) + + # update attributes for incoming stream + nhd_streams.loc[min_index,'is_headwater'] = True + nhd_streams.loc[min_index,'downstream_of_headwater'] = True + + ## subset NHDPlus HR + nhd_streams['is_relevant_stream'] = nhd_streams['is_headwater'].copy() + + # trace down from headwaters + nhd_streams.set_index('NHDPlusID',inplace=True,drop=False) + + nhd_streams = get_downstream_segments(nhd_streams, 'is_headwater') + + nhd_streams = nhd_streams.loc[nhd_streams['is_relevant_stream'],:] + nhd_streams.reset_index(drop=True,inplace=True) + + return(nhd_streams) + +def get_downstream_segments(streams, attribute): + + Q = deque(streams.loc[streams[attribute],'NHDPlusID'].tolist()) + visited = set() + + while Q: + q = Q.popleft() + if q in visited: + continue + + visited.add(q) + toNode,DnLevelPat = streams.loc[q,['ToNode','DnLevelPat']] + + try: + downstream_ids = streams.loc[streams['FromNode'] == toNode,:].index.tolist() + except ValueError: # 18050002 has duplicate nhd stream feature + if len(toNode.unique()) == 1: + toNode = toNode.iloc[0] + downstream_ids = streams.loc[streams['FromNode'] == toNode,:].index.tolist() + + # If multiple downstream_ids are returned select the ids that are along the main flow path (i.e. exclude segments that are diversions) + if len(set(downstream_ids))>1: # special case: remove duplicate NHDPlusIDs + relevant_ids = [segment for segment in downstream_ids if DnLevelPat == streams.loc[segment,'LevelPathI']] + else: + relevant_ids = downstream_ids + + streams.loc[relevant_ids,'is_relevant_stream'] = True + streams.loc[relevant_ids,'downstream_of_headwater'] = True + + for i in relevant_ids: + if i not in visited: + Q.append(i) + + return(streams) + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Reduce NHDPlus HR network based on headwater points') + parser.add_argument('-n','--huc-number',help='HUC number',required=True,type=str) + parser.add_argument('-b','--huc4-mask',help='HUC4 mask',required=True) + parser.add_argument('-w','--selected-wbd8',help='WBD8 layer',required=True) + parser.add_argument('-t','--nhd-streams',help='NHDPlus HR geodataframe',required=True) + parser.add_argument('-a','--headwaters-filename',help='Headwaters points layer name',required=True,type=str) + parser.add_argument('-s','--subset-nhd-streams-fileName',help='Output streams layer name',required=False,type=str,default=None) + parser.add_argument('-i','--headwater-id',help='Headwater points ID column',required=True) + parser.add_argument('-i','--nwm-intersections-filename',help='NWM HUC4 intersection points',required=True) + + args = vars(parser.parse_args()) + + subset_streams_gdf = subset_nhd_network(huc_number,huc4_mask,selected_wbd8,nhd_streams,headwaters_filename,headwater_id) + + if subset_nhd_streams_fileName is not None: + subset_streams_gdf.to_file(args['subset_nhd_streams_fileName'],driver=getDriver(args['subset_nhd_streams_fileName']),index=False) diff --git a/lib/run_by_unit.sh b/lib/run_by_unit.sh index 81bc676da..d3733dd6f 100755 --- a/lib/run_by_unit.sh +++ b/lib/run_by_unit.sh @@ -5,8 +5,8 @@ T_total_start echo -e $startDiv"Parameter Values" echo -e "extent=$extent" -echo -e "negativeBurnValue=$negativeBurnValue" -echo -e "maxSplitDistance_meters=$maxSplitDistance_meters" +echo -e "negative_burn_value=$negative_burn_value" +echo -e "max_split_distance_meters=$max_split_distance_meters" echo -e "mannings_n=$manning_n" echo -e "stage_min_meters=$stage_min_meters" echo -e "stage_interval_meters=$stage_interval_meters" @@ -15,7 +15,7 @@ echo -e "slope_min=$slope_min" echo -e "ms_buffer_dist=$ms_buffer_dist" echo -e "ncores_gw=$ncores_gw" echo -e "ncores_fd=$ncores_fd" -echo -e "defaultMaxJobs=$defaultMaxJobs" +echo -e "default_max_jobs=$default_max_jobs" echo -e "memfree=$memfree"$stopDiv ## SET OUTPUT DIRECTORY FOR UNIT ## @@ -38,6 +38,14 @@ else input_LANDSEA=$inputDataDir/landsea/water_polygons_us.gpkg fi +if [ "$extent" = "MS" ]; then + input_nhd_flowlines=$input_nhd_flowlines_ms + input_nhd_headwaters=$input_nhd_headwaters_ms +else + input_nhd_flowlines=$input_nhd_flowlines_fr + input_nhd_headwaters=$input_nhd_headwaters_fr +fi + ## GET WBD ## echo -e $startDiv"Get WBD $hucNumber"$stopDiv date -u @@ -54,16 +62,16 @@ Tstart ogr2ogr -f GPKG -dialect sqlite -sql "select ST_buffer(geom, 5000) from 'WBDHU$hucUnitLength'" $outputHucDataDir/wbd_buffered.gpkg $outputHucDataDir/wbd.gpkg Tcount -## GET STREAMS ## +## Subset Vector Layers ## echo -e $startDiv"Get Vector Layers and Subset $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg ] && \ -$libDir/snap_and_clip_to_nhd.py -d $hucNumber -w $input_NWM_Flows -f $input_NWM_Headwaters -s $input_NHD_Flowlines -l $input_NWM_Lakes -r $input_NLD -u $outputHucDataDir/wbd.gpkg -g $outputHucDataDir/wbd_buffered.gpkg -y $inputDataDir/ahp_sites/nws_lid.gpkg -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -t $outputHucDataDir/nwm_headwaters_proj_subset.gpkg -m $input_NWM_Catchments -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -x $outputHucDataDir/LandSea_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -p $extent +$libDir/clip_vectors_to_wbd.py -d $hucNumber -w $input_NWM_Flows -s $input_nhd_flowlines -l $input_NWM_Lakes -r $input_NLD -g $outputHucDataDir/wbd.gpkg -f $outputHucDataDir/wbd_buffered.gpkg -m $input_NWM_Catchments -y $input_nhd_headwaters -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -x $outputHucDataDir/LandSea_subset.gpkg -p $extent Tcount if [ "$extent" = "MS" ]; then - if [[ ! -f $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg ]] ; then + if [[ ! -f $outputHucDataDir/nhd_headwater_points_subset.gpkg ]] ; then echo "No AHPs point(s) within HUC $hucNumber boundaries. Aborting run_by_unit.sh" rm -rf $outputHucDataDir exit 0 @@ -145,11 +153,11 @@ Tcount # Using AGREE methodology, hydroenforce the DEM so that it is consistent # with the supplied stream network. This allows for more realistic catchment # delineation which is ultimately reflected in the output FIM mapping. -echo -e $startDiv"Creating AGREE DEM using $buffer meter buffer"$stopDiv +echo -e $startDiv"Creating AGREE DEM using $agree_DEM_buffer meter buffer"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/dem_burned.tif ] && \ -$libDir/agreedem.py -r $outputHucDataDir/flows_grid_boolean.tif -d $outputHucDataDir/dem_meters.tif -w $outputHucDataDir -g $outputHucDataDir/temp_work -o $outputHucDataDir/dem_burned.tif -b $buffer -sm 10 -sh 1000 +$libDir/agreedem.py -r $outputHucDataDir/flows_grid_boolean.tif -d $outputHucDataDir/dem_meters.tif -w $outputHucDataDir -g $outputHucDataDir/temp_work -o $outputHucDataDir/dem_burned.tif -b $agree_DEM_buffer -sm 10 -sh 1000 Tcount ## PIT REMOVE BURNED DEM ## @@ -240,7 +248,7 @@ echo -e $startDiv"Split Derived Reaches $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/demDerived_reaches_split.gpkg ] && \ -$libDir/split_flows.py $outputHucDataDir/demDerived_reaches.shp $outputHucDataDir/dem_thalwegCond.tif $outputHucDataDir/demDerived_reaches_split.gpkg $outputHucDataDir/demDerived_reaches_split_points.gpkg $maxSplitDistance_meters $slope_min $outputHucDataDir/wbd8_clp.gpkg $outputHucDataDir/nwm_lakes_proj_subset.gpkg $lakes_buffer_dist_meters +$libDir/split_flows.py $outputHucDataDir/demDerived_reaches.shp $outputHucDataDir/dem_thalwegCond.tif $outputHucDataDir/demDerived_reaches_split.gpkg $outputHucDataDir/demDerived_reaches_split_points.gpkg $max_split_distance_meters $slope_min $outputHucDataDir/wbd8_clp.gpkg $outputHucDataDir/nwm_lakes_proj_subset.gpkg $lakes_buffer_dist_meters Tcount if [[ ! -f $outputHucDataDir/demDerived_reaches_split.gpkg ]] ; then diff --git a/lib/snap_and_clip_to_nhd.py b/lib/snap_and_clip_to_nhd.py deleted file mode 100755 index 7c715e06f..000000000 --- a/lib/snap_and_clip_to_nhd.py +++ /dev/null @@ -1,298 +0,0 @@ -#!/usr/bin/env python3 - -import sys -import geopandas as gpd -import pandas as pd -from collections import deque,Counter -import numpy as np -from tqdm import tqdm -import argparse -from os.path import splitext,isfile -from shapely.strtree import STRtree -from shapely.geometry import Point,MultiLineString,LineString,mapping,MultiPolygon,Polygon - -def subset_vector_layers(hucCode,nwm_streams_fileName,nwm_headwaters_fileName,nhd_streams_fileName,nwm_lakes_fileName,nld_lines_fileName,nwm_catchments_fileName,wbd_fileName,wbd_buffer_fileName,ahps_sites_fileName,landsea_filename,subset_nhd_streams_fileName,subset_nwm_lakes_fileName,subset_nld_lines_fileName,subset_nwm_headwaters_fileName,subset_nwm_catchments_fileName,subset_nwm_streams_fileName,subset_landsea_filename,subset_nhd_headwaters_fileName=None,dissolveLinks=False,extent='FR'): - - hucUnitLength = len(str(hucCode)) - - wbd = gpd.read_file(wbd_fileName) - wbd_buffer = gpd.read_file(wbd_buffer_fileName) - projection = wbd.crs - - # Clip ocean water polygon for future masking ocean areas (where applicable) - landsea_read = gpd.read_file(landsea_filename, mask = wbd_buffer) - landsea = gpd.clip(landsea_read, wbd_buffer) - if not landsea.empty: - landsea.to_file(subset_landsea_filename,driver=getDriver(subset_landsea_filename),index=False) - del landsea - - # find intersecting lakes and writeout - print("Subsetting NWM Lakes for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nwm_lakes = gpd.read_file(nwm_lakes_fileName, mask = wbd_buffer) - - if not nwm_lakes.empty: - # perform fill process to remove holes/islands in the NWM lake polygons - nwm_lakes = nwm_lakes.explode() - nwm_lakes_fill_holes=MultiPolygon(Polygon(p.exterior) for p in nwm_lakes['geometry']) # remove donut hole geometries - # loop through the filled polygons and insert the new geometry - for i in range(len(nwm_lakes_fill_holes)): - nwm_lakes.loc[i,'geometry'] = nwm_lakes_fill_holes[i] - nwm_lakes.to_file(subset_nwm_lakes_fileName,driver=getDriver(subset_nwm_lakes_fileName),index=False) - del nwm_lakes - - # find intersecting levee lines - print("Subsetting NLD levee lines for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nld_lines = gpd.read_file(nld_lines_fileName, mask = wbd) - if not nld_lines.empty: - nld_lines.to_file(subset_nld_lines_fileName,driver=getDriver(subset_nld_lines_fileName),index=False) - del nld_lines - - # find intersecting nwm_catchments - print("Subsetting NWM Catchments for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nwm_catchments = gpd.read_file(nwm_catchments_fileName, mask = wbd) - nwm_catchments.to_file(subset_nwm_catchments_fileName,driver=getDriver(subset_nwm_catchments_fileName),index=False) - del nwm_catchments - - # query nhd+HR streams for HUC code - print("Querying NHD Streams for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nhd_streams = gpd.read_file(nhd_streams_fileName, mask = wbd_buffer) - nhd_streams = nhd_streams.explode() - - # find intersecting nwm_headwaters - print("Subsetting NWM Streams and deriving headwaters for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nwm_streams = gpd.read_file(nwm_streams_fileName, mask = wbd_buffer) - nwm_streams.to_file(subset_nwm_streams_fileName,driver=getDriver(subset_nwm_streams_fileName),index=False) - del nwm_streams - - # get nhd headwaters closest to nwm headwater points - print('Identify NHD Headwater streams nearest to NWM Headwater points',flush=True) - nhd_streams.loc[:,'is_nwm_headwater'] = False - # nhd_streams_tree = STRtree(nhd_streams.geometry) - - if extent == 'FR': - nwm_headwaters = gpd.read_file(nwm_headwaters_fileName, mask = wbd_buffer) - elif extent == 'MS': - nwm_headwaters = gpd.read_file(ahps_sites_fileName, mask = wbd) - - # check for incoming MS streams and convert to points - intersecting = nhd_streams.crosses(wbd.geometry[0]) - incoming_flows = nhd_streams.loc[intersecting,:] - incoming_points_list = [] - - if len(incoming_flows) > 0: - for i,linesting in enumerate(incoming_flows.geometry): - incoming_points_list = incoming_points_list + [linesting.coords[-1]] - - geometry = [Point(xy) for xy in zip(incoming_points_list)] - incoming_points = gpd.GeoDataFrame({'feature_id' : 0 ,'nwsid' : 'huc8_incoming' ,'geometry':geometry}, crs=nhd_streams.crs, geometry='geometry') - - if (len(nwm_headwaters) > 0) or (len(incoming_points) > 0): - - if len(nwm_headwaters) > 0: - print ("Snapping forecasting points to nhd stream network") - streamlines_union = nhd_streams.geometry.unary_union - snapped_geoms = [] - snappedpoints_df = pd.DataFrame(nwm_headwaters).drop(columns=['geometry']) - - # snap lines to streams - for i in range(len(nwm_headwaters)): - snapped_geoms.append(streamlines_union.interpolate(streamlines_union.project(nwm_headwaters.geometry[i]))) - - snappedpoints_df['geometry'] = snapped_geoms - snapped_points = gpd.GeoDataFrame(snappedpoints_df,crs=nhd_streams.crs) - - if (len(incoming_points) > 0) and (len(nwm_headwaters) > 0): - nwm_headwaters = snapped_points.append(incoming_points).reset_index(drop=True) - elif len(incoming_points) > 0: - nwm_headwaters = incoming_points.copy() - else: - print ("No AHPs point(s) within HUC " + str(hucCode) + " boundaries.") - sys.exit(0) - - for index, row in tqdm(nwm_headwaters.iterrows(),total=len(nwm_headwaters)): - distances = nhd_streams.distance(row['geometry']) - # nearestGeom = nhd_streams_tree.nearest(row['geometry']) - min_index = np.argmin(distances) - nhd_streams.loc[min_index,'is_nwm_headwater'] = True - - nhd_streams = nhd_streams.loc[nhd_streams.geometry!=None,:] # special case: remove segments without geometries - - # writeout nwm headwaters - if not nwm_headwaters.empty: - nwm_headwaters.to_file(subset_nwm_headwaters_fileName,driver=getDriver(subset_nwm_headwaters_fileName),index=False) - del nwm_headwaters - - # copy over headwater features to nwm streams - nhd_streams['is_nwm_stream'] = nhd_streams['is_nwm_headwater'].copy() - - # trace down from NWM Headwaters - print('Identify NHD streams downstream of relevant NHD Headwater streams',flush=True) - nhd_streams.set_index('NHDPlusID',inplace=True,drop=False) - - Q = deque(nhd_streams.loc[nhd_streams['is_nwm_headwater'],'NHDPlusID'].tolist()) - visited = set() - - while Q: - q = Q.popleft() - if q in visited: - continue - visited.add(q) - toNode,DnLevelPat = nhd_streams.loc[q,['ToNode','DnLevelPat']] - try: - downstream_ids = nhd_streams.loc[nhd_streams['FromNode'] == toNode,:].index.tolist() - except ValueError: # 18050002 has duplicate nhd stream feature - if len(toNode.unique()) == 1: - toNode = toNode.iloc[0] - downstream_ids = nhd_streams.loc[nhd_streams['FromNode'] == toNode,:].index.tolist() - #If multiple downstream_ids are returned select the ids that are along the main flow path (i.e. exclude segments that are diversions) - if len(set(downstream_ids))>1: # special case: remove duplicate NHDPlusIDs - relevant_ids = [segment for segment in downstream_ids if DnLevelPat == nhd_streams.loc[segment,'LevelPathI']] - else: - relevant_ids = downstream_ids - nhd_streams.loc[relevant_ids,'is_nwm_stream'] = True - for i in relevant_ids: - if i not in visited: - Q.append(i) - - nhd_streams = nhd_streams.loc[nhd_streams['is_nwm_stream'],:] - - if dissolveLinks: - # remove multi-line strings - print("Dissolving NHD reaches to Links (reaches constrained to stream intersections)",flush=True) - - nhd_streams.set_index('NHDPlusID',inplace=True,drop=False) - nhd_streams['before_confluence'] = nhd_streams.duplicated(subset='ToNode',keep=False) - - nhd_streams.loc[nhd_streams['is_nwm_headwater'],'linkNo'] = np.arange(1,nhd_streams['is_nwm_headwater'].sum()+1) - - Q = deque(nhd_streams.loc[nhd_streams['is_nwm_headwater'],'NHDPlusID'].tolist()) - visited = set() - linkNo = np.max(nhd_streams.loc[nhd_streams['is_nwm_headwater'],'linkNo']) + 1 - link_geometries = dict() - - # adds all headwaters to link_geometries - for q in Q: - link_geometries[nhd_streams.loc[q,'linkNo']] = [p for p in zip(*nhd_streams.loc[q,'geometry'].coords.xy)][::-1] - - # Do BFS - while Q: - q = Q.popleft() - - if q in visited: - continue - - visited.add(q) - - toNode = nhd_streams.loc[q,'ToNode'] - - downstream_ids = nhd_streams.loc[nhd_streams['FromNode'] == toNode,:].index.tolist() - numberOfDownstreamIDs = len(downstream_ids) - - for i in downstream_ids: - if i not in visited: - Q.append(i) - - if nhd_streams.loc[q,'before_confluence'] or (numberOfDownstreamIDs > 1): - # do not dissolve - linkNo += 1 - nhd_streams.loc[i,'linkNo'] = linkNo - - next_stream_geometry = [p for p in zip(*nhd_streams.loc[i,'geometry'].coords.xy)][::-1] - - link_geometries[linkNo] = next_stream_geometry - - else: - nhd_streams.loc[i,'linkNo'] = nhd_streams.loc[q,'linkNo'] - - next_stream_geometry = [p for p in zip(*nhd_streams.loc[i,'geometry'].coords.xy)][::-1] - - link_geometries[nhd_streams.loc[i,'linkNo']] = link_geometries[nhd_streams.loc[i,'linkNo']] + next_stream_geometry - - - # convert dictionary to lists for keys (linkNos) and values (geometry linestrings) - output_links = [] ; output_geometries = [] - for ln_no, ln_geom in link_geometries.items(): - output_links = output_links + [ln_no] - output_geometries = output_geometries + [LineString(ln_geom)] - - nhd_streams = gpd.GeoDataFrame({'linkNO' : output_links,'geometry': output_geometries},geometry='geometry',crs=projection) - - # write to files - nhd_streams.reset_index(drop=True,inplace=True) - nhd_streams.to_file(subset_nhd_streams_fileName,driver=getDriver(subset_nhd_streams_fileName),index=False) - - if subset_nhd_headwaters_fileName is not None: - # identify all nhd headwaters - print('Identify NHD headwater points',flush=True) - nhd_headwater_streams = nhd_streams.loc[nhd_streams['is_nwm_headwater'],:] - nhd_headwater_streams = nhd_headwater_streams.explode() - - hw_points = np.zeros(len(nhd_headwater_streams),dtype=object) - for index,lineString in enumerate(nhd_headwater_streams.geometry): - hw_point = [point for point in zip(*lineString.coords.xy)][-1] - hw_points[index] = Point(*hw_point) - - nhd_headwater_points = gpd.GeoDataFrame({'NHDPlusID' : nhd_headwater_streams['NHDPlusID'], - 'geometry' : hw_points},geometry='geometry',crs=projection) - - nhd_headwater_points.to_file(subset_nhd_headwaters_fileName,driver=getDriver(subset_nhd_headwaters_fileName),index=False) - del nhd_headwater_streams, nhd_headwater_points - -def getDriver(fileName): - - driverDictionary = {'.gpkg' : 'GPKG','.geojson' : 'GeoJSON','.shp' : 'ESRI Shapefile'} - driver = driverDictionary[splitext(fileName)[1]] - - return(driver) - - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='Subset vector layers') - parser.add_argument('-d','--hucCode', help='HUC boundary ID', required=True,type=str) - parser.add_argument('-w','--nwm-streams', help='NWM flowlines', required=True) - parser.add_argument('-f','--nwm-headwaters', help='NWM headwater points', required=True) - parser.add_argument('-s','--nhd-streams',help='NHDPlus HR burnline',required=True) - parser.add_argument('-l','--nwm-lakes', help='NWM Lakes', required=True) - parser.add_argument('-r','--nld-lines', help='Levee vectors to use within project path', required=True) - parser.add_argument('-m','--nwm-catchments', help='NWM catchments', required=True) - parser.add_argument('-u','--wbd',help='HUC boundary',required=True) - parser.add_argument('-g','--wbd-buffer',help='Buffered HUC boundary',required=True) - parser.add_argument('-y','--ahps-sites',help='Buffered HUC boundary',required=True) - parser.add_argument('-v','--landsea',help='LandSea - land boundary',required=True) - parser.add_argument('-c','--subset-nhd-streams',help='NHD streams subset',required=True) - parser.add_argument('-a','--subset-lakes',help='NWM lake subset',required=True) - parser.add_argument('-t','--subset-nwm-headwaters',help='NWM headwaters subset',required=True) - parser.add_argument('-z','--subset-nld-lines',help='Subset of NLD levee vectors for HUC',required=True) - parser.add_argument('-e','--subset-nhd-headwaters',help='NHD headwaters subset',required=True,default=None) - parser.add_argument('-n','--subset-catchments',help='NWM catchments subset',required=True) - parser.add_argument('-b','--subset-nwm-streams',help='NWM streams subset',required=True) - parser.add_argument('-x','--subset-landsea',help='LandSea subset',required=True) - parser.add_argument('-o','--dissolve-links',help='remove multi-line strings',action="store_true",default=False) - parser.add_argument('-p','--extent',help='MS or FR extent',required=True) - - args = vars(parser.parse_args()) - - hucCode = args['hucCode'] - nwm_streams_fileName = args['nwm_streams'] - nwm_headwaters_fileName = args['nwm_headwaters'] - nhd_streams_fileName = args['nhd_streams'] - nwm_lakes_fileName = args['nwm_lakes'] - nld_lines_fileName = args['nld_lines'] - nwm_catchments_fileName = args['nwm_catchments'] - wbd_fileName = args['wbd'] - wbd_buffer_fileName = args['wbd_buffer'] - ahps_sites_fileName = args['ahps_sites'] - landsea_fileName = args['landsea'] - subset_nhd_streams_fileName = args['subset_nhd_streams'] - subset_nwm_lakes_fileName = args['subset_lakes'] - subset_nwm_headwaters_fileName = args['subset_nwm_headwaters'] - subset_nld_lines_fileName = args['subset_nld_lines'] - subset_nwm_catchments_fileName = args['subset_catchments'] - subset_nhd_headwaters_fileName = args['subset_nhd_headwaters'] - subset_nwm_streams_fileName = args['subset_nwm_streams'] - subset_landsea_filename = args['subset_landsea'] - dissolveLinks = args['dissolve_links'] - extent = args['extent'] - - subset_vector_layers(hucCode,nwm_streams_fileName,nwm_headwaters_fileName,nhd_streams_fileName,nwm_lakes_fileName,nld_lines_fileName,nwm_catchments_fileName,wbd_fileName,wbd_buffer_fileName,ahps_sites_fileName,landsea_fileName,subset_nhd_streams_fileName,subset_nwm_lakes_fileName,subset_nld_lines_fileName,subset_nwm_headwaters_fileName,subset_nwm_catchments_fileName,subset_nwm_streams_fileName,subset_landsea_filename,subset_nhd_headwaters_fileName,dissolveLinks,extent) diff --git a/lib/split_flows.py b/lib/split_flows.py index 0687ea0cd..db11d6d4a 100755 --- a/lib/split_flows.py +++ b/lib/split_flows.py @@ -161,6 +161,9 @@ else: split_flows_gdf['LakeID'] = -999 +# need to figure out why so many duplicate stream segments for 04010101 FR +split_flows_gdf = split_flows_gdf.drop_duplicates() + # Create Ids and Network Traversal Columns addattributes = buildstreamtraversal.BuildStreamTraversalColumns() tResults=None diff --git a/lib/utils/shared_functions.py b/lib/utils/shared_functions.py index fa643d7fe..6ea7b0a74 100644 --- a/lib/utils/shared_functions.py +++ b/lib/utils/shared_functions.py @@ -1,62 +1,69 @@ #!/usr/bin/env python3 import os +from os.path import splitext +def getDriver(fileName): + + driverDictionary = {'.gpkg' : 'GPKG','.geojson' : 'GeoJSON','.shp' : 'ESRI Shapefile'} + driver = driverDictionary[splitext(fileName)[1]] + + return(driver) def pull_file(url, full_pulled_filepath): """ This helper function pulls a file and saves it to a specified path. - + Args: url (str): The full URL to the file to download. full_pulled_filepath (str): The full system path where the downloaded file will be saved. """ import urllib.request - + print("Pulling " + url) urllib.request.urlretrieve(url, full_pulled_filepath) - + def delete_file(file_path): """ This helper function deletes a file. - + Args: file_path (str): System path to a file to be deleted. """ - + try: os.remove(file_path) except FileNotFoundError: pass - - + + def run_system_command(args): """ This helper function takes a system command and runs it. This function is designed for use in multiprocessing. - + Args: args (list): A single-item list, the first and only item being a system command string. """ - + # Parse system command. command = args[0] - + # Run system command. os.system(command) - - + + def subset_wbd_gpkg(wbd_gpkg, multilayer_wbd_geopackage): - + import geopandas as gp from utils.shared_variables import CONUS_STATE_LIST, PREP_PROJECTION - + print("Subsetting " + wbd_gpkg + "...") # Read geopackage into dataframe. wbd = gp.read_file(wbd_gpkg) gdf = gp.GeoDataFrame(wbd) - + for index, row in gdf.iterrows(): state = row["STATES"] if state != None: # Some polygons are empty in the STATES field. @@ -75,8 +82,4 @@ def subset_wbd_gpkg(wbd_gpkg, multilayer_wbd_geopackage): # Overwrite geopackage. layer_name = os.path.split(wbd_gpkg)[1].strip('.gpkg') gdf.crs = PREP_PROJECTION - gdf.to_file(multilayer_wbd_geopackage, layer=layer_name, driver='GPKG') - - - - \ No newline at end of file + gdf.to_file(multilayer_wbd_geopackage, layer=layer_name,driver='GPKG',index=False) From b017e3e086bef9da42ae22e909a541430231ffcb Mon Sep 17 00:00:00 2001 From: NickChadwick-NOAA Date: Mon, 25 Jan 2021 08:54:05 -0600 Subject: [PATCH 009/359] Initial Version of the FIM API Addition of an API service to schedule, run and manage fim_run jobs through a user-friendly web interface. * Added api folder that contains all the codebase for the new service. --- CHANGELOG.md | 8 + Dockerfile.prod | 5 +- api/.gitignore | 1 + api/README.md | 0 api/frontend/.env-template | 2 + api/frontend/docker-compose-dev.yml | 52 +++ api/frontend/docker-compose-prod.yml | 47 +++ api/frontend/gui/Dockerfile | 14 + api/frontend/gui/entrypoint.sh | 5 + api/frontend/gui/gui.py | 16 + api/frontend/gui/requirements.txt | 4 + api/frontend/gui/templates/index.html | 395 ++++++++++++++++++ api/frontend/gui/wsgi.py | 4 + api/frontend/nginx-dev.conf | 72 ++++ api/frontend/nginx-prod.conf | 55 +++ api/frontend/output_handler/Dockerfile | 12 + api/frontend/output_handler/entrypoint.sh | 6 + api/frontend/output_handler/output_handler.py | 94 +++++ api/frontend/output_handler/requirements.txt | 2 + api/node/.env-template | 5 + api/node/connector/Dockerfile | 16 + api/node/connector/connector.py | 169 ++++++++ api/node/connector/entrypoint.sh | 5 + api/node/connector/requirements.txt | 3 + api/node/docker-compose-dev.yml | 38 ++ api/node/docker-compose-prod.yml | 51 +++ api/node/nginx.conf | 64 +++ api/node/updater/Dockerfile | 14 + api/node/updater/entrypoint.sh | 5 + api/node/updater/requirements.txt | 2 + api/node/updater/updater.py | 147 +++++++ 31 files changed, 1311 insertions(+), 2 deletions(-) create mode 100644 api/.gitignore create mode 100644 api/README.md create mode 100644 api/frontend/.env-template create mode 100644 api/frontend/docker-compose-dev.yml create mode 100644 api/frontend/docker-compose-prod.yml create mode 100644 api/frontend/gui/Dockerfile create mode 100755 api/frontend/gui/entrypoint.sh create mode 100644 api/frontend/gui/gui.py create mode 100644 api/frontend/gui/requirements.txt create mode 100644 api/frontend/gui/templates/index.html create mode 100644 api/frontend/gui/wsgi.py create mode 100644 api/frontend/nginx-dev.conf create mode 100644 api/frontend/nginx-prod.conf create mode 100644 api/frontend/output_handler/Dockerfile create mode 100755 api/frontend/output_handler/entrypoint.sh create mode 100644 api/frontend/output_handler/output_handler.py create mode 100644 api/frontend/output_handler/requirements.txt create mode 100644 api/node/.env-template create mode 100644 api/node/connector/Dockerfile create mode 100644 api/node/connector/connector.py create mode 100755 api/node/connector/entrypoint.sh create mode 100644 api/node/connector/requirements.txt create mode 100644 api/node/docker-compose-dev.yml create mode 100644 api/node/docker-compose-prod.yml create mode 100644 api/node/nginx.conf create mode 100644 api/node/updater/Dockerfile create mode 100755 api/node/updater/entrypoint.sh create mode 100644 api/node/updater/requirements.txt create mode 100644 api/node/updater/updater.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d028b692d..eed3cd415 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,14 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.2.0 - 2021-01-25 - [PR #218](https://github.com/NOAA-OWP/cahaba/pull/218) + +Addition of an API service to schedule, run and manage `fim_run` jobs through a user-friendly web interface. + +### Additions + + - `api` folder that contains all the codebase for the new service. + ## v3.0.1.0 - 2021-01-21 - [PR #206](https://github.com/NOAA-OWP/cahaba/pull/206) Preprocess MS and FR stream networks diff --git a/Dockerfile.prod b/Dockerfile.prod index 86fd4ec31..79c50bdaf 100644 --- a/Dockerfile.prod +++ b/Dockerfile.prod @@ -84,8 +84,9 @@ ENV PYTHONUNBUFFERED=TRUE ENV PATH="$projectDir:${PATH}" ## install python 3 modules ## -COPY requirements.txt . -RUN pip3 install --no-binary shapely --no-binary pygeos -r requirements.txt --no-cache-dir +COPY Pipfile . +COPY Pipfile.lock . +RUN pip3 install pipenv && PIP_NO_CACHE_DIR=off PIP_NO_BINARY=shapely,pygeos pipenv install --system --deploy --ignore-pipfile ## Copy the source code to the image COPY . $projectDir/ diff --git a/api/.gitignore b/api/.gitignore new file mode 100644 index 000000000..2eea525d8 --- /dev/null +++ b/api/.gitignore @@ -0,0 +1 @@ +.env \ No newline at end of file diff --git a/api/README.md b/api/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/api/frontend/.env-template b/api/frontend/.env-template new file mode 100644 index 000000000..afd955443 --- /dev/null +++ b/api/frontend/.env-template @@ -0,0 +1,2 @@ +DATA_PATH= +SOCKET_URL= \ No newline at end of file diff --git a/api/frontend/docker-compose-dev.yml b/api/frontend/docker-compose-dev.yml new file mode 100644 index 000000000..1f7b0ca2d --- /dev/null +++ b/api/frontend/docker-compose-dev.yml @@ -0,0 +1,52 @@ +version: '3.5' +services: + fim_frontend_gui: + image: fim_frontend_gui + build: + context: ./gui + container_name: fim_frontend_gui + restart: always + env_file: + - .env + expose: + - "5000" + networks: + - fim + volumes: + - ./gui/templates/:/opt/gui/templates/ + - ./gui/gui.py:/opt/gui/gui.py + fim_frontend_output_handler: + image: fim_frontend_output_handler + build: + context: ./output_handler + container_name: fim_frontend_output_handler + restart: always + external_links: + - fim_node_connector + env_file: + - .env + networks: + - fim + volumes: + - ${DATA_PATH}:/data/ + - ./output_handler/output_handler.py:/opt/output_handler/output_handler.py + fim_nginx: + image: nginx + container_name: fim_nginx + restart: always + depends_on: + - fim_frontend_gui + external_links: + - fim_node_connector + ports: + - "80:80" + volumes: + - ./nginx-dev.conf:/etc/nginx/nginx.conf:ro + networks: + - fim + command: [nginx] + +networks: + fim: + name: fim + \ No newline at end of file diff --git a/api/frontend/docker-compose-prod.yml b/api/frontend/docker-compose-prod.yml new file mode 100644 index 000000000..b408fe054 --- /dev/null +++ b/api/frontend/docker-compose-prod.yml @@ -0,0 +1,47 @@ +version: '3.5' +services: + fim_frontend_gui: + image: fim_frontend_gui + build: + context: ./gui + container_name: fim_frontend_gui + restart: always + env_file: + - .env + expose: + - "5000" + networks: + - fim + fim_frontend_output_handler: + image: fim_frontend_output_handler + build: + context: ./output_handler + container_name: fim_frontend_output_handler + restart: always + external_links: + - fim_node_connector + env_file: + - .env + networks: + - fim + volumes: + - ${DATA_PATH}:/data/ + fim_nginx: + image: nginx + container_name: fim_nginx + restart: always + depends_on: + - fim_frontend_gui + external_links: + - fim_node_connector + ports: + - "80:80" + volumes: + - ./nginx-prod.conf:/etc/nginx/nginx.conf:ro + networks: + - fim + command: [nginx] + +networks: + fim: + name: fim \ No newline at end of file diff --git a/api/frontend/gui/Dockerfile b/api/frontend/gui/Dockerfile new file mode 100644 index 000000000..fb5d0750e --- /dev/null +++ b/api/frontend/gui/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.8.5-slim-buster + +ENV PYTHONUNBUFFERED 1 +RUN mkdir -p /opt/gui +WORKDIR /opt/gui + +COPY requirements.txt . +RUN pip install -r requirements.txt --no-cache-dir +COPY . /opt/gui + +EXPOSE 5000 + +RUN chmod +x /opt/gui/entrypoint.sh +ENTRYPOINT ["/opt/gui/entrypoint.sh"] diff --git a/api/frontend/gui/entrypoint.sh b/api/frontend/gui/entrypoint.sh new file mode 100755 index 000000000..fbf7d3145 --- /dev/null +++ b/api/frontend/gui/entrypoint.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +cd /opt/gui/ +echo "Starting Gunicorn" +exec gunicorn --bind 0.0.0.0:5000 --reload wsgi:app \ No newline at end of file diff --git a/api/frontend/gui/gui.py b/api/frontend/gui/gui.py new file mode 100644 index 000000000..456622d72 --- /dev/null +++ b/api/frontend/gui/gui.py @@ -0,0 +1,16 @@ +import os +from gevent import monkey +monkey.patch_all() + +from flask import Flask, render_template, request + +SOCKET_URL = os.environ.get('SOCKET_URL') + +app = Flask(__name__) + +@app.route('/') +def main(): + return render_template('index.html', socket_url=SOCKET_URL) + +if __name__ == '__main__': + app.run("0.0.0.0", port=5000) \ No newline at end of file diff --git a/api/frontend/gui/requirements.txt b/api/frontend/gui/requirements.txt new file mode 100644 index 000000000..a78a884a3 --- /dev/null +++ b/api/frontend/gui/requirements.txt @@ -0,0 +1,4 @@ +flask==1.1.2 +flask-socketio==5.0.0 +gevent==20.9.0 +gunicorn==20.0.4 \ No newline at end of file diff --git a/api/frontend/gui/templates/index.html b/api/frontend/gui/templates/index.html new file mode 100644 index 000000000..abccdd7cc --- /dev/null +++ b/api/frontend/gui/templates/index.html @@ -0,0 +1,395 @@ + + + Cahaba API + + + + + + + +
+
+
FIM Run
+
Calibration
+
Pre-processing
+
+
+
+

Basic

+
+ + + +
+

Configuration

+
+
+ + +
+
+ + +
+
+

Extent

+
+
+ + +
+
+ + +
+
+
+ + +
+
+ +
+
+
Not Connected
+
+ + +
+
+ + + + + + + + + + + +
NameTime ElapsedStatusOutputs Saved
+
+ + + + + + \ No newline at end of file diff --git a/api/frontend/gui/wsgi.py b/api/frontend/gui/wsgi.py new file mode 100644 index 000000000..b9303d13e --- /dev/null +++ b/api/frontend/gui/wsgi.py @@ -0,0 +1,4 @@ +from gui import app + +if __name__ == "__main__": + app.run() \ No newline at end of file diff --git a/api/frontend/nginx-dev.conf b/api/frontend/nginx-dev.conf new file mode 100644 index 000000000..97e37c1f0 --- /dev/null +++ b/api/frontend/nginx-dev.conf @@ -0,0 +1,72 @@ +user nginx; +worker_processes 1; +pid /var/run/nginx.pid; +daemon off; + +events { + worker_connections 512; + # multi_accept on; +} + +http { + sendfile on; + tcp_nopush on; + tcp_nodelay on; + proxy_connect_timeout 300; + proxy_send_timeout 300; + proxy_read_timeout 90m; + send_timeout 300; + keepalive_timeout 65; + types_hash_max_size 2048; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format upstream_time '$remote_addr - $remote_user [$time_local] ' + '"$request" $status $body_bytes_sent ' + '"$http_referer" "$http_user_agent"' + 'rt=$request_time uct="$upstream_connect_time" uht="$upstream_header_time" urt="$upstream_response_time"'; + + access_log /var/log/nginx/access.log upstream_time; + error_log /var/log/nginx/error.log warn; + + server { + listen 80; + client_header_buffer_size 64k; + large_client_header_buffers 4 64k; + server_name _; + root /var/www/; + + gzip on; + gzip_types application/json; + proxy_http_version 1.1; + + location /stats/nginx { + stub_status on; + } + + # Node side (these should only be used if the frontend is on the same machine as the connector) + location / { + proxy_pass http://fim_node_connector:6000/; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /socket.io { + proxy_http_version 1.1; + proxy_buffering off; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "Upgrade"; + proxy_pass http://fim_node_connector:6000/socket.io; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Frontend Side + location /api { + proxy_pass http://fim_frontend_gui:5000/; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Proto $scheme; + } + } +} diff --git a/api/frontend/nginx-prod.conf b/api/frontend/nginx-prod.conf new file mode 100644 index 000000000..878d423fc --- /dev/null +++ b/api/frontend/nginx-prod.conf @@ -0,0 +1,55 @@ +user nginx; +worker_processes 1; +pid /var/run/nginx.pid; +daemon off; + +events { + worker_connections 512; + # multi_accept on; +} + +http { + sendfile on; + tcp_nopush on; + tcp_nodelay on; + proxy_connect_timeout 300; + proxy_send_timeout 300; + proxy_read_timeout 90m; + send_timeout 300; + keepalive_timeout 65; + types_hash_max_size 2048; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format upstream_time '$remote_addr - $remote_user [$time_local] ' + '"$request" $status $body_bytes_sent ' + '"$http_referer" "$http_user_agent"' + 'rt=$request_time uct="$upstream_connect_time" uht="$upstream_header_time" urt="$upstream_response_time"'; + + access_log /var/log/nginx/access.log upstream_time; + error_log /var/log/nginx/error.log warn; + + server { + listen 80; + client_header_buffer_size 64k; + large_client_header_buffers 4 64k; + server_name _; + root /var/www/; + + gzip on; + gzip_types application/json; + proxy_http_version 1.1; + + location /stats/nginx { + stub_status on; + } + + # Frontend Side + location /api { + proxy_pass http://fim_frontend_gui:5000/; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Proto $scheme; + } + } +} diff --git a/api/frontend/output_handler/Dockerfile b/api/frontend/output_handler/Dockerfile new file mode 100644 index 000000000..68498a6d3 --- /dev/null +++ b/api/frontend/output_handler/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.8.5-slim-buster + +ENV PYTHONUNBUFFERED 1 +RUN mkdir -p /opt/output_handler +WORKDIR /opt/output_handler + +COPY requirements.txt . +RUN pip install -r requirements.txt --no-cache-dir +COPY . /opt/output_handler + +RUN chmod +x /opt/output_handler/entrypoint.sh +ENTRYPOINT ["/opt/output_handler/entrypoint.sh"] diff --git a/api/frontend/output_handler/entrypoint.sh b/api/frontend/output_handler/entrypoint.sh new file mode 100755 index 000000000..248541b09 --- /dev/null +++ b/api/frontend/output_handler/entrypoint.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +umask 002 +cd /opt/output_handler/ +echo "Starting Output Handler" +python ./output_handler.py \ No newline at end of file diff --git a/api/frontend/output_handler/output_handler.py b/api/frontend/output_handler/output_handler.py new file mode 100644 index 000000000..1d7de4a88 --- /dev/null +++ b/api/frontend/output_handler/output_handler.py @@ -0,0 +1,94 @@ +import os +import time + +import socketio + +SOCKET_URL = os.environ.get('SOCKET_URL') + +pending_files = {} + +def handle_outputs(data): + name = f"{data['job_name']}_{data['file_name']}" + if name not in pending_files: + pending_files[name] = { + 'locked': False, + 'current_index': 0, + 'nice_name': data['nice_name'], + 'job_name': data['job_name'], + 'directory_path': data['directory_path'], + 'file_name': data['file_name'] + } + + pending_files[name][data['chunk_index']] = data['file_chunk'] + + + work_to_do = True + while work_to_do: + work_to_do = False + + nice_name = pending_files[name]['nice_name'] + job_name = pending_files[name]['job_name'] + directory_path = pending_files[name]['directory_path'] + file_name = pending_files[name]['file_name'] + + # If the last chunk just got added, waiting to write any potentially missing data to file + if data['file_chunk'] == None and pending_files[name]['locked']: + while name in pending_files and pending_files[name]['locked']: + print("EOF, waiting till not locked") + sio.sleep(0.5) + if not name in pending_files: + return + + # To ensure that the files are being written in the correct order, use current_index + # to write the correct file chunk. + if not pending_files[name]['locked'] and pending_files[name]['current_index'] in pending_files[name]: + pending_files[name]['locked'] = True + file_chunk = pending_files[name].pop(pending_files[name]['current_index']) + + # End of file + if file_chunk == None: + if sio.connected: + sio.emit('output_handler_finished_file', {'job_name': job_name, 'file_path': f"{directory_path}/{file_name}"}) + print("finished with file", name, directory_path, file_name) + # files_to_delete.append(name) + pending_files.pop(name) + continue + else: + # Not end of file, keep looping till you can't do more work + work_to_do = True + + # Create folder if it doesn't yet exist and set writing mode + mode = 'ab' + if pending_files[name]['current_index'] == 0: + mode = 'wb' + try: + os.makedirs(f"/data/outputs/{nice_name}/{directory_path}") + except: + pass + + # Write binary data to file + with open(f"/data/outputs/{nice_name}/{directory_path}/{file_name}", mode) as binary_file: + print(f"Writing chunk {pending_files[name]['current_index']} for file {directory_path}/{file_name}") + binary_file.write(file_chunk) + + # Remove current chunk from list + pending_files[name]['current_index'] += 1 + pending_files[name]['locked'] = False + +sio = socketio.Client() + +@sio.event +def connect(): + print("Output Handler Connected!") + sio.emit('output_handler_connected') + + +@sio.event +def disconnect(): + print('disconnected from server') + +@sio.on('new_job_outputs') +def ws_new_job_outputs(data): + handle_outputs(data) + +sio.connect(SOCKET_URL) \ No newline at end of file diff --git a/api/frontend/output_handler/requirements.txt b/api/frontend/output_handler/requirements.txt new file mode 100644 index 000000000..a2217b4e9 --- /dev/null +++ b/api/frontend/output_handler/requirements.txt @@ -0,0 +1,2 @@ +python-engineio[client]==4.0.0 +python-socketio[client]==5.0.3 \ No newline at end of file diff --git a/api/node/.env-template b/api/node/.env-template new file mode 100644 index 000000000..6df26095b --- /dev/null +++ b/api/node/.env-template @@ -0,0 +1,5 @@ +DATA_PATH= +DOCKER_IMAGE_PATH= +SOCKET_URL= +FRONTEND_URL= +GITHUB_REPO=https://github.com/NOAA-OWP/cahaba.git \ No newline at end of file diff --git a/api/node/connector/Dockerfile b/api/node/connector/Dockerfile new file mode 100644 index 000000000..ffdb1581e --- /dev/null +++ b/api/node/connector/Dockerfile @@ -0,0 +1,16 @@ +FROM docker:19.03.14-dind + +RUN apk add --no-cache python3 python3-dev py3-pip build-base openssl-dev libffi-dev git + +ENV PYTHONUNBUFFERED 1 +RUN mkdir -p /opt/connector +WORKDIR /opt/connector + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --no-cache-dir +COPY . /opt/connector + +EXPOSE 6000 + +RUN chmod +x /opt/connector/entrypoint.sh +ENTRYPOINT ["/opt/connector/entrypoint.sh"] diff --git a/api/node/connector/connector.py b/api/node/connector/connector.py new file mode 100644 index 000000000..0148947c2 --- /dev/null +++ b/api/node/connector/connector.py @@ -0,0 +1,169 @@ +import eventlet +eventlet.monkey_patch() + +import os +import re +import random +import logging +import subprocess +from datetime import date + +from flask import Flask, request +from flask_socketio import SocketIO, emit + +DATA_PATH = os.environ.get('DATA_PATH') +DOCKER_IMAGE_PATH = os.environ.get('DOCKER_IMAGE_PATH') +SOCKET_URL = os.environ.get('SOCKET_URL') +FRONTEND_URL = os.environ.get('FRONTEND_URL') +GITHUB_REPO = os.environ.get('GITHUB_REPO') + +app = Flask(__name__) +socketio = SocketIO(app, cors_allowed_origins=[SOCKET_URL, FRONTEND_URL, "http://fim_node_connector:6000"]) + +shared_data = { + 'handler_sid': None, + 'updater_sid': None +} + +@app.route('/') +def main(): + return '

Nothing to see here....

' + +@socketio.on('connect') +def ws_conn(): + print('user connected!') + emit('is_connected', True) + +@socketio.on('disconnect') +def ws_disconn(): + print('user disconnected!') + emit('is_connected', False) + +@socketio.on('update') +def ws_update(current_jobs): + emit('client_update', current_jobs, broadcast=True) + +@socketio.on('output_handler_connected') +def ws_output_handler_connected(): + print('handler_sid: ', request.sid) + shared_data['handler_sid'] = request.sid + emit('retry_saving_files', room=shared_data['updater_sid']) + +@socketio.on('updater_connected') +def ws_updater_connected(): + print('updater_sid: ', request.sid) + shared_data['updater_sid'] = request.sid + emit('retry_saving_files', room=shared_data['updater_sid']) + +@socketio.on('ready_for_output_handler') +def ws_ready_for_output_handler(data): + nice_name = data['nice_name'] + job_name = data['job_name'] + path = data['path'] + + print(f"handler_sid: {shared_data['handler_sid']}") + + if shared_data['handler_sid'] == None: + print("output handler not connected!") + emit('retry_saving_files') + return + + # Split up path into parts for the output handler + path_parts = re.search(rf"/data/outputs/{job_name}/(.+)/(.+)", path) + directory_path = path_parts.group(1) + file_name = path_parts.group(2) + + with open(path, "rb") as binary_file: + print("Sending to output handler", path) + + # Read and emit file chunk by chunk (50MB at a time) + chunk_index = 0 + file_chunk = binary_file.read(52428800) + # file_chunk = binary_file.read(104857600) + while file_chunk: + print("Sending to output handler", path, "Chunk:", chunk_index) + emit('new_job_outputs', { + 'nice_name': nice_name, + 'job_name': job_name, + 'directory_path': directory_path, + 'file_name': file_name, + 'file_chunk': file_chunk, + 'chunk_index': chunk_index + }, room=shared_data['handler_sid']) + + chunk_index += 1 + file_chunk = binary_file.read(52428800) + # file_chunk = binary_file.read(104857600) + + # Send None to indicate end of file + print("Sending to output handler", path, "Chunk:", chunk_index, "EOF") + emit('new_job_outputs', { + 'nice_name': nice_name, + 'job_name': job_name, + 'directory_path': directory_path, + 'file_name': file_name, + 'file_chunk': None, + 'chunk_index': chunk_index + }, room=shared_data['handler_sid']) + +@socketio.on('output_handler_finished_file') +def ws_output_handler_finished_file(data): + job_name = data['job_name'] + file_path = data['file_path'] + + print('done saving', job_name, file_path) + emit('file_saved', { + 'job_name': job_name, + 'file_path': f"/data/outputs/{job_name}/{file_path}" + }, room=shared_data['updater_sid']) + +@socketio.on('new_job') +def ws_new_job(job_params): + validation_errors = [] + + # Validate Hucs Name Option + hucs = ' '.join(job_params['hucs'].replace(',', ' ').split()) + invalid_hucs = re.search('[a-zA-Z]', hucs) + if invalid_hucs: validation_errors.append('Invalid Huc(s)') + + # Validate Git Branch Option + branch = '' + branch_exists = subprocess.run(['git', 'ls-remote', '--heads', GITHUB_REPO, job_params['git_branch'].replace(' ', '_')], stdout=subprocess.PIPE).stdout.decode('utf-8') + if branch_exists: branch = job_params['git_branch'].replace(' ', '_') + else: validation_errors.append('Git Branch Does Not Exist') + + # Validate Job Name Option + job_name = f"apijob_{job_params['job_name'].replace(' ', '_')[0:50]}_apijob_{branch}_{date.today().strftime('%d%m%Y')}_{random.randint(0, 99999)}" + + # Validate Extent Option + extent = '' + if job_params['extent'] == 'FR': extent = 'FR' + elif job_params['extent'] == 'MS': extent = 'MS' + else: validation_errors.append('Invalid Extent Option') + + # Validate Configuration Option + config_path = '' + if job_params['configuration'] == 'default': config_path = './foss_fim/config/params_template.env' + elif job_params['configuration'] == 'calibrated': config_path = './foss_fim/config/params_calibrated.env' + else: validation_errors.append('Invalid Configuration Option') + + # Validate Dev Run Option + if job_params['dev_run'] : dev_run = True + else: dev_run = False + + if len(validation_errors) == 0: + # Clone github repo, with specific branch, to a temp folder + print(f'cd /data/temp && git clone -b {branch} {GITHUB_REPO} {job_name}') + subprocess.call(f'cd /data/temp && git clone -b {branch} {GITHUB_REPO} {job_name}', shell=True) + + # TODO: instead of starting the job right away, add it to a queue until there are enough resources to run it. Also track things like huc count and huc type (6 or 8) + + # Kick off the new job as a docker container with the new cloned repo as the volume + print(f"docker run -d --rm --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim {DOCKER_IMAGE_PATH} fim_run.sh -u \"{hucs}\" -e {extent} -c {config_path} -n {job_name} -o {'' if dev_run else '-p'}") + subprocess.call(f"docker run -d --rm --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim {DOCKER_IMAGE_PATH} fim_run.sh -u \"{hucs}\" -e {extent} -c {config_path} -n {job_name} -o {'' if dev_run else '-p'}", shell=True) + emit('job_started', 'fim_run') + else: + emit('validation_errors', validation_errors) + +if __name__ == '__main__': + socketio.run(app, host="0.0.0.0", port="6000") diff --git a/api/node/connector/entrypoint.sh b/api/node/connector/entrypoint.sh new file mode 100755 index 000000000..d6d853d6a --- /dev/null +++ b/api/node/connector/entrypoint.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +cd /opt/connector/ +echo "Starting Connector" +python3 ./connector.py diff --git a/api/node/connector/requirements.txt b/api/node/connector/requirements.txt new file mode 100644 index 000000000..9d8c9d17f --- /dev/null +++ b/api/node/connector/requirements.txt @@ -0,0 +1,3 @@ +flask==1.1.2 +flask-socketio==5.0.0 +eventlet==0.30.0 \ No newline at end of file diff --git a/api/node/docker-compose-dev.yml b/api/node/docker-compose-dev.yml new file mode 100644 index 000000000..9823afad9 --- /dev/null +++ b/api/node/docker-compose-dev.yml @@ -0,0 +1,38 @@ +version: '3.5' +services: + fim_node_connector: + image: fim_node_connector + build: + context: ./connector + container_name: fim_node_connector + env_file: + - .env + restart: always + expose: + - "6000" + networks: + fim: + aliases: + - fimnodeconnector + volumes: + - ${DATA_PATH}:/data/ + - /var/run/docker.sock:/var/run/docker.sock + - ./connector/connector.py:/opt/connector/connector.py + fim_node_updater: + image: fim_node_updater + build: + context: ./updater + container_name: fim_node_updater + restart: always + depends_on: + - fim_node_connector + networks: + - fim + volumes: + - ${DATA_PATH}:/data/ + - /var/run/docker.sock:/var/run/docker.sock + - ./updater/updater.py:/opt/updater/updater.py +networks: + fim: + name: fim + \ No newline at end of file diff --git a/api/node/docker-compose-prod.yml b/api/node/docker-compose-prod.yml new file mode 100644 index 000000000..3e1ee654a --- /dev/null +++ b/api/node/docker-compose-prod.yml @@ -0,0 +1,51 @@ +version: '3.5' +services: + fim_node_connector: + image: fim_node_connector + build: + context: ./connector + container_name: fim_node_connector + env_file: + - .env + restart: always + expose: + - "6000" + networks: + fim: + aliases: + - fimnodeconnector + volumes: + - ${DATA_PATH}:/data/ + - /var/run/docker.sock:/var/run/docker.sock + fim_node_updater: + image: fim_node_updater + build: + context: ./updater + container_name: fim_node_updater + restart: always + depends_on: + - fim_node_connector + networks: + - fim + volumes: + - ${DATA_PATH}:/data/ + - /var/run/docker.sock:/var/run/docker.sock + fim_nginx: + image: nginx + container_name: fim_nginx + restart: always + depends_on: + - fim_node_connector + ports: + - "80:80" + volumes: + - ./nginx.conf:/etc/nginx/nginx.conf:ro + networks: + - fim + command: [nginx] + +networks: + fim: + name: fim + + \ No newline at end of file diff --git a/api/node/nginx.conf b/api/node/nginx.conf new file mode 100644 index 000000000..c2fc935bd --- /dev/null +++ b/api/node/nginx.conf @@ -0,0 +1,64 @@ +user nginx; +worker_processes 1; +pid /var/run/nginx.pid; +daemon off; + +events { + worker_connections 512; + # multi_accept on; +} + +http { + sendfile on; + tcp_nopush on; + tcp_nodelay on; + proxy_connect_timeout 300; + proxy_send_timeout 300; + proxy_read_timeout 90m; + send_timeout 300; + keepalive_timeout 65; + types_hash_max_size 2048; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format upstream_time '$remote_addr - $remote_user [$time_local] ' + '"$request" $status $body_bytes_sent ' + '"$http_referer" "$http_user_agent"' + 'rt=$request_time uct="$upstream_connect_time" uht="$upstream_header_time" urt="$upstream_response_time"'; + + access_log /var/log/nginx/access.log upstream_time; + error_log /var/log/nginx/error.log warn; + + server { + listen 80; + client_header_buffer_size 64k; + large_client_header_buffers 4 64k; + server_name _; + root /var/www/; + + gzip on; + gzip_types application/json; + proxy_http_version 1.1; + + location /stats/nginx { + stub_status on; + } + + location / { + proxy_pass http://fim_node_connector:6000/; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /socket.io { + proxy_http_version 1.1; + proxy_buffering off; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "Upgrade"; + proxy_pass http://fim_node_connector:6000/socket.io; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Proto $scheme; + } + } +} \ No newline at end of file diff --git a/api/node/updater/Dockerfile b/api/node/updater/Dockerfile new file mode 100644 index 000000000..e179f3a42 --- /dev/null +++ b/api/node/updater/Dockerfile @@ -0,0 +1,14 @@ +FROM docker:19.03.14-dind + +RUN apk add --no-cache python3 python3-dev py3-pip + +ENV PYTHONUNBUFFERED 1 +RUN mkdir -p /opt/updater +WORKDIR /opt/updater + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --no-cache-dir +COPY . /opt/updater + +RUN chmod +x /opt/updater/entrypoint.sh +ENTRYPOINT ["/opt/updater/entrypoint.sh"] diff --git a/api/node/updater/entrypoint.sh b/api/node/updater/entrypoint.sh new file mode 100755 index 000000000..0f74f2541 --- /dev/null +++ b/api/node/updater/entrypoint.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +cd /opt/updater/ +echo "Starting Update Loop" +python3 ./updater.py diff --git a/api/node/updater/requirements.txt b/api/node/updater/requirements.txt new file mode 100644 index 000000000..a2217b4e9 --- /dev/null +++ b/api/node/updater/requirements.txt @@ -0,0 +1,2 @@ +python-engineio[client]==4.0.0 +python-socketio[client]==5.0.3 \ No newline at end of file diff --git a/api/node/updater/updater.py b/api/node/updater/updater.py new file mode 100644 index 000000000..7bc65836c --- /dev/null +++ b/api/node/updater/updater.py @@ -0,0 +1,147 @@ +import os +import re +import math +import time +import json +import shutil +import logging + +import socketio + +DATA_PATH = os.environ.get('DATA_PATH') + +connected = False +shared_data = { + 'connected': False +} + +current_jobs = {} +if os.path.exists('/data/outputs/current_jobs.json'): + with open('/data/outputs/current_jobs.json') as f: + current_jobs = json.load(f) + +# Get all the current running jobs from the list of docker containers, store that data in a dictionary +# along with any other needed metadata (like if it's still running, doing post processing, copying outputs +# to its destination, etc), and then update the websocket server of the status of the jobs. +def update_loop(): + while True: + # Get list of current docker containers that are fim run jobs + job_names = os.popen("docker container ls --filter=name=apijob --format '{{.Names}}'").read().splitlines() + for job_name in job_names: + if job_name not in current_jobs: + # If it's a new job, add it to the dictionary + current_jobs[job_name] = { + 'job_name': job_name, + 'nice_name': re.search(r"apijob_(.+)_apijob.+", job_name).group(1), + 'status': 'In Progress', + 'time_started': time.time(), + 'time_elapsed': 0, + 'output_files_saved': {} + } + + jobs_to_delete = [] + for job_name in current_jobs.keys(): + # Update the time elapsed for all jobs that are currently in progress or saving outputs + if current_jobs[job_name]['status'] == 'In Progress' or current_jobs[job_name]['status'] == 'Ready to Save File'\ + or current_jobs[job_name]['status'] == 'Saving File': + current_jobs[job_name]['time_elapsed'] = math.ceil(time.time() - current_jobs[job_name]['time_started']) + + # TODO: While job is in progress, keep track of how many hucs are done and overall progress % + + # Once the Docker container is done, set the job as ready to save output + if current_jobs[job_name]['status'] == 'In Progress' and job_name not in job_names: + for path, folders, files in os.walk(f"/data/outputs/{job_name}"): + for file in files: + current_jobs[job_name]['output_files_saved'][os.path.join(path, file)] = False + + current_jobs[job_name]['status'] = 'Ready to Save File' + # TODO: Possible check the completed job's log for its exit code + + # Trigger connector to transmit the outputs to the output_handler + # If the output_handler is offline, it will keep retrying until the output_handler is online + if current_jobs[job_name]['status'] == 'Ready to Save File': + print(f"{job_name} ready for output handler") + outputs_to_save = [] + for path in current_jobs[job_name]['output_files_saved']: + if current_jobs[job_name]['output_files_saved'][path] == False: + outputs_to_save.append(path) + + if len(outputs_to_save) > 0: + if shared_data['connected']: + sio.emit('ready_for_output_handler', { + 'nice_name': current_jobs[job_name]['nice_name'], + 'job_name': job_name, + 'path': outputs_to_save[0] + }) + current_jobs[job_name]['status'] = 'Saving File' + + # Once the output_handler is done getting the outputs and the connector deletes the temp repo source, + # mark as completed + if current_jobs[job_name]['status'] == 'Saving File': + is_done = True + for path in current_jobs[job_name]['output_files_saved']: + if current_jobs[job_name]['output_files_saved'][path] == False: + is_done = False + break + + if is_done: + print("output_handler finished, deleted temp source files and output files") + temp_path = f"/data/temp/{job_name}" + if os.path.isdir(temp_path): + shutil.rmtree(temp_path) + + outputs_path = f"/data/outputs/{job_name}" + if os.path.isdir(outputs_path): + shutil.rmtree(outputs_path) + + current_jobs[job_name]['status'] = 'Completed' + print(f"{job_name} completed") + # TODO: Insert Slack notification here for finished job + + # Remove job from list after it's been completed for more than 15 minutes + if current_jobs[job_name]['status'] == 'Completed' and \ + time.time() >= current_jobs[job_name]['time_started'] + current_jobs[job_name]['time_elapsed'] + 900: + print(f"{job_name} removed from job list") + jobs_to_delete.append(job_name) + + for job in jobs_to_delete: + del current_jobs[job] + + # Send updates to the connector and write job progress to file + if shared_data['connected']: sio.emit('update', current_jobs) + with open('/data/outputs/current_jobs.json', 'w') as f: + json.dump(current_jobs, f) + + time.sleep(1) + +sio = socketio.Client() + +@sio.event +def connect(): + print("Update Loop Connected!") + sio.emit('updater_connected') + shared_data['connected'] = True + +@sio.event +def disconnect(): + print('disconnected from server') + shared_data['connected'] = False + +# If the output_handler is offline, try the saving process again +@sio.on('retry_saving_files') +def ws_retry_saving_files(): + print('saving files failed, retrying') + for job_name in current_jobs: + if current_jobs[job_name]['status'] == "Saving File": + current_jobs[job_name]['status'] = 'Ready to Save File' + +@sio.on('file_saved') +def ws_file_saved(data): + job_name = data['job_name'] + file_path = data['file_path'] + + current_jobs[job_name]['output_files_saved'][file_path] = True + current_jobs[job_name]['status'] = 'Ready to Save File' + +sio.connect('http://fim_node_connector:6000/') +update_loop() \ No newline at end of file From 2ded12aa2ada7da5599f21193fa6c31f694fbfdf Mon Sep 17 00:00:00 2001 From: Brad Date: Thu, 28 Jan 2021 10:22:57 -0600 Subject: [PATCH 010/359] Updated point of contact on README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 69a815b6d..5b2e3c781 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ Please see the issue tracker on GitHub for known issues and for getting help. ## Getting Involved -NOAA's National Water Center welcomes anyone to contribute to the Cahaba repository to improve flood inundation mapping capabilities. Please contact Fernando Aristizabal (fernando.aristizabal@noaa.gov) or Fernando Salas (fernando.salas@noaa.gov) to get started. +NOAA's National Water Center welcomes anyone to contribute to the Cahaba repository to improve flood inundation mapping capabilities. Please contact Brad Bates (bradford.bates@noaa.gov) or Fernando Salas (fernando.salas@noaa.gov) to get started. ---- From dda9e22e927816dc359e70382543c249314de462 Mon Sep 17 00:00:00 2001 From: Brad Date: Tue, 2 Feb 2021 13:12:47 -0600 Subject: [PATCH 011/359] Updated README.md credits Updated README.md with Esri Arc Hydro credit to acknowledge the knowledge gained and discoveries made through collaboration with Esri. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 5b2e3c781..47d3561ab 100644 --- a/README.md +++ b/README.md @@ -118,3 +118,4 @@ NOAA's National Water Center welcomes anyone to contribute to the Cahaba reposit 8. Federal Emergency Management Agency (FEMA) Base Level Engineering [(BLE)](https://webapps.usgs.gov/infrm/estBFE/) 9. Verdin, James; Verdin, Kristine; Mathis, Melissa; Magadzire, Tamuka; Kabuchanga, Eric; Woodbury, Mark; and Gadain, Hussein, 2016, A software tool for rapid flood inundation mapping: U.S. Geological Survey Open-File Report 2016–1038, 26 p., http://dx.doi.org/10.3133/ofr20161038. 10. United States Geological Survey (USGS) National Hydrography Dataset Plus High Resolution (NHDPlusHR). https://www.usgs.gov/core-science-systems/ngp/national-hydrography/nhdplus-high-resolution +11. Esri Arc Hydro. https://www.esri.com/library/fliers/pdfs/archydro.pdf From d657b985b4442e31ba8a4f730a59774f2f0717e7 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Thu, 4 Feb 2021 09:50:31 -0600 Subject: [PATCH 012/359] Post-process to aggregate FIM outputs to HUC6 scale Additions * viz outputs aggregated to HUC6 scale; saves outputs to aggregate_fim_outputs folder Changes * split_flows.py now splits streams at HUC8 boundaries to ensure consistent catchment boundaries along edges * aggregate_fim_outputs.sh has been depreciated but remains in the repo for potential FIM 4 development * replaced geopandas driver arg with getDriver throughout repo * organized parameters in environment files by group * cleaned up variable names in split_flows.py and build_stream_traversal.py * build_stream_traversal.py is now assigning HydroID by midpoint instead centroid * cleanup of clip_vectors_to_wbd.py resolves #161 and #4 --- CHANGELOG.md | 18 +++ config/params_calibrated.env | 11 +- config/params_template.env | 11 +- fim_run.sh | 7 +- lib/acquire_and_preprocess_inputs.py | 7 +- lib/add_crosswalk.py | 5 +- lib/aggregate_fim_outputs.py | 106 ++++++++++++++++++ lib/aggregate_vector_inputs.py | 17 +-- ...traversal.py => build_stream_traversal.py} | 20 +++- lib/clip_vectors_to_wbd.py | 20 ++-- lib/derive_headwaters.py | 7 +- lib/reachID_grid_to_vector_points.py | 4 +- lib/run_by_unit.sh | 10 +- lib/split_flows.py | 36 +++--- 14 files changed, 215 insertions(+), 64 deletions(-) create mode 100644 lib/aggregate_fim_outputs.py rename lib/{buildstreamtraversal.py => build_stream_traversal.py} (90%) diff --git a/CHANGELOG.md b/CHANGELOG.md index eed3cd415..fd90d4713 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,24 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.3.0 - 2021-02-04 - [PR #227](https://github.com/NOAA-OWP/cahaba/pull/227) + +Post-process to aggregate FIM outputs to HUC6 scale. + +### Additions + + - Viz outputs aggregated to HUC6 scale; saves outputs to `aggregate_fim_outputs` folder. + +### Changes + + - `split_flows.py` now splits streams at HUC8 boundaries to ensure consistent catchment boundaries along edges. + - `aggregate_fim_outputs.sh` has been depreciated but remains in the repo for potential FIM 4 development. + - Replaced geopandas driver arg with getDriver throughout repo. + - Organized parameters in environment files by group. + - Cleaned up variable names in `split_flows.py` and `build_stream_traversal.py`. + - `build_stream_traversal.py` is now assigning HydroID by midpoint instead centroid. + - Cleanup of `clip_vectors_to_wbd.py`. + ## v3.0.2.0 - 2021-01-25 - [PR #218](https://github.com/NOAA-OWP/cahaba/pull/218) Addition of an API service to schedule, run and manage `fim_run` jobs through a user-friendly web interface. diff --git a/config/params_calibrated.env b/config/params_calibrated.env index f150d0428..ff0da17b0 100644 --- a/config/params_calibrated.env +++ b/config/params_calibrated.env @@ -1,16 +1,21 @@ #!/bin/bash -#### geospatial parameters #### +#### hydroconditioning parameters #### export negative_burn_value=1000 export agree_DEM_buffer=70 +export wbd_buffer=5000 + +#### geospatial parameters #### export max_split_distance_meters=1500 +export ms_buffer_dist=7000 +export lakes_buffer_dist_meters=20 + +#### rating curve parameters #### export manning_n="/foss_fim/config/mannings_calibrated.json" export stage_min_meters=0 export stage_interval_meters=0.3048 export stage_max_meters=25 export slope_min=0.001 -export ms_buffer_dist=7000 -export lakes_buffer_dist_meters=20 #### computational parameters #### export ncores_gw=1 # mpi number of cores for gagewatershed diff --git a/config/params_template.env b/config/params_template.env index 04af26828..b250de4bf 100644 --- a/config/params_template.env +++ b/config/params_template.env @@ -1,16 +1,21 @@ #!/bin/bash -#### geospatial parameters #### +#### hydroconditioning parameters #### export negative_burn_value=1000 export agree_DEM_buffer=70 +export wbd_buffer=5000 + +#### geospatial parameters #### export max_split_distance_meters=1500 +export ms_buffer_dist=7000 +export lakes_buffer_dist_meters=20 + +#### rating curve parameters #### export manning_n="/foss_fim/config/mannings_default.json" export stage_min_meters=0 export stage_interval_meters=0.3048 export stage_max_meters=25 export slope_min=0.001 -export ms_buffer_dist=7000 -export lakes_buffer_dist_meters=20 #### computational parameters #### export ncores_gw=1 # mpi number of cores for gagewatershed diff --git a/fim_run.sh b/fim_run.sh index a8763da60..b2efa0ec5 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -147,5 +147,8 @@ else fi fi -# aggregate outputs -bash /foss_fim/lib/aggregate_fim_outputs.sh $outputRunDataDir +echo "$viz" +if [[ "$viz" -eq 1 ]]; then + # aggregate outputs + python3 /foss_fim/lib/aggregate_fim_outputs.py -d $outputRunDataDir +fi diff --git a/lib/acquire_and_preprocess_inputs.py b/lib/acquire_and_preprocess_inputs.py index 01270f235..bd1eda194 100755 --- a/lib/acquire_and_preprocess_inputs.py +++ b/lib/acquire_and_preprocess_inputs.py @@ -23,7 +23,7 @@ OVERWRITE_NHD, OVERWRITE_ALL) -from utils.shared_functions import pull_file, run_system_command, subset_wbd_gpkg, delete_file +from utils.shared_functions import pull_file, run_system_command, subset_wbd_gpkg, delete_file, getDriver NHDPLUS_VECTORS_DIRNAME = 'nhdplus_vectors' NHDPLUS_RASTERS_DIRNAME = 'nhdplus_rasters' @@ -82,10 +82,9 @@ def pull_and_prepare_wbd(path_to_saved_data_parent_dir,nwm_dir_name,nwm_file_to_ fossids = [str(item).zfill(4) for item in list(range(1, 1 + len(wbd_hu8)))] wbd_hu8[FOSS_ID] = fossids wbd_hu8 = wbd_hu8.to_crs(PREP_PROJECTION) # Project. - #wbd_hu8.to_file(os.path.join(wbd_directory, 'WBDHU8.gpkg'), driver='GPKG') # Save. wbd_hu8 = subset_wbd_to_nwm_domain(wbd_hu8,nwm_file_to_use) wbd_hu8.geometry = wbd_hu8.buffer(0) - wbd_hu8.to_file(multilayer_wbd_geopackage, driver='GPKG',layer='WBDHU8') # Save. + wbd_hu8.to_file(multilayer_wbd_geopackage,layer='WBDHU8',driver=getDriver(multilayer_wbd_geopackage),index=False) # Save. wbd_hu8.HUC8.to_csv(nwm_huc_list_file_template.format('8'),index=False,header=False) #wbd_gpkg_list.append(os.path.join(wbd_directory, 'WBDHU8.gpkg')) # Append to wbd_gpkg_list for subsetting later. del wbd_hu8 @@ -99,7 +98,7 @@ def pull_and_prepare_wbd(path_to_saved_data_parent_dir,nwm_dir_name,nwm_file_to_ wbd = wbd.rename(columns={'huc'+wbd_layer_num : 'HUC' + wbd_layer_num}) wbd = subset_wbd_to_nwm_domain(wbd,nwm_file_to_use) wbd.geometry = wbd.buffer(0) - wbd.to_file(multilayer_wbd_geopackage,driver="GPKG",layer=wbd_layer) + wbd.to_file(multilayer_wbd_geopackage,layer=wbd_layer,driver=getDriver(multilayer_wbd_geopackage),index=False) wbd['HUC{}'.format(wbd_layer_num)].to_csv(nwm_huc_list_file_template.format(wbd_layer_num),index=False,header=False) #output_gpkg = os.path.join(wbd_directory, wbd_layer + '.gpkg') #wbd_gpkg_list.append(output_gpkg) diff --git a/lib/add_crosswalk.py b/lib/add_crosswalk.py index 1f51efb4d..c5ed0dd5a 100755 --- a/lib/add_crosswalk.py +++ b/lib/add_crosswalk.py @@ -7,6 +7,7 @@ import json import argparse import sys +from utils.shared_functions import getDriver def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,calibration_mode=False): @@ -163,8 +164,8 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f output_src_json[str(hid)] = { 'q_list' : q_list , 'stage_list' : stage_list } # write out - output_catchments.to_file(output_catchments_fileName, driver="GPKG",index=False) - output_flows.to_file(output_flows_fileName, driver="GPKG", index=False) + output_catchments.to_file(output_catchments_fileName,driver=getDriver(output_catchments_fileName),index=False) + output_flows.to_file(output_flows_fileName,driver=getDriver(output_flows_fileName),index=False) output_src.to_csv(output_src_fileName,index=False) output_crosswalk.to_csv(output_crosswalk_fileName,index=False) output_hydro_table.to_csv(output_hydro_table_fileName,index=False) diff --git a/lib/aggregate_fim_outputs.py b/lib/aggregate_fim_outputs.py new file mode 100644 index 000000000..1296fb8cc --- /dev/null +++ b/lib/aggregate_fim_outputs.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 + +import os +import argparse +import pandas as pd +import json +import rasterio +from rasterio.merge import merge +from utils.shared_variables import PREP_PROJECTION + + +def aggregate_fim_outputs(fim_out_dir): + + print ("aggregating outputs to HUC6 scale") + drop_folders = ['logs'] + huc_list = [huc for huc in os.listdir(fim_out_dir) if huc not in drop_folders] + huc6_list = [str(huc[0:6]) for huc in os.listdir(fim_out_dir) if huc not in drop_folders] + huc6_list = list(set(huc6_list)) + + for huc in huc_list: + + os.makedirs(os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc[0:6])), exist_ok=True) + + # merge hydrotable + aggregate_hydrotable = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc[0:6]),'hydroTable.csv') + hydrotable = pd.read_csv(os.path.join(fim_out_dir,huc,'hydroTable.csv')) + + # write out hydrotable + if os.path.isfile(aggregate_hydrotable): + hydrotable.to_csv(aggregate_hydrotable,index=False, mode='a',header=False) + else: + hydrotable.to_csv(aggregate_hydrotable,index=False) + + del hydrotable + + # merge src + aggregate_src = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc[0:6]),'src.json') + src = open(os.path.join(fim_out_dir,huc,'src.json')) + src = json.load(src) + + # write out src + if os.path.isfile(aggregate_src): + with open(aggregate_src, 'a') as outfile: + json.dump(src, outfile) + else: + with open(aggregate_src, 'w') as outfile: + json.dump(src, outfile) + + del src + + for huc6 in huc6_list: + huc6_dir = os.path.join(fim_out_dir,'aggregate_fim_outputs',huc6) + + huc6_filter = [path.startswith(huc6) for path in huc_list] + subset_huc6_list = [i for (i, v) in zip(huc_list, huc6_filter) if v] + + # aggregate and mosaic rem + rem_list = [os.path.join(fim_out_dir,huc,'rem_zeroed_masked.tif') for huc in subset_huc6_list] + + rem_files_to_mosaic = [] + + for rem in rem_list: + rem_src = rasterio.open(rem) + rem_files_to_mosaic.append(rem_src) + + mosaic, out_trans = merge(rem_files_to_mosaic) + out_meta = rem_src.meta.copy() + out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION}) + + rem_mosaic = os.path.join(huc6_dir,'rem_zeroed_masked.tif') + with rasterio.open(rem_mosaic, "w", **out_meta) as dest: + dest.write(mosaic) + + del rem_files_to_mosaic,rem_src,out_meta,mosaic + + # aggregate and mosaic catchments + catchment_list = [os.path.join(fim_out_dir,huc,'gw_catchments_reaches_filtered_addedAttributes.tif') for huc in subset_huc6_list] + + cat_files_to_mosaic = [] + + for cat in catchment_list: + cat_src = rasterio.open(cat) + cat_files_to_mosaic.append(cat_src) + + mosaic, out_trans = merge(cat_files_to_mosaic) + out_meta = cat_src.meta.copy() + out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION}) + + catchment_mosaic = os.path.join(huc6_dir,'gw_catchments_reaches_filtered_addedAttributes.tif') + with rasterio.open(catchment_mosaic, "w", **out_meta) as dest: + dest.write(mosaic) + + del cat_files_to_mosaic,cat_src,out_meta,mosaic + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Aggregate layers buy HUC6') + parser.add_argument('-d','--fim-outputs-directory', help='FIM outputs directory', required=True) + + + args = vars(parser.parse_args()) + + fim_outputs_directory = args['fim_outputs_directory'] + + aggregate_fim_outputs(fim_outputs_directory) diff --git a/lib/aggregate_vector_inputs.py b/lib/aggregate_vector_inputs.py index d60cdc2f0..eb4d3e4f5 100755 --- a/lib/aggregate_vector_inputs.py +++ b/lib/aggregate_vector_inputs.py @@ -76,7 +76,7 @@ def subset_nwm_ms_streams(args): nwm_streams.reset_index(drop=True,inplace=True) - nwm_streams.to_file(output_filename,getDriver(output_filename),index=False) + nwm_streams.to_file(output_filename,driver=getDriver(output_filename),index=False) def find_nwm_incoming_streams(args): @@ -142,7 +142,8 @@ def find_nwm_incoming_streams(args): huc_intersection = gpd.GeoDataFrame({'geometry' : intersecting_points},crs=nwm_streams.crs,geometry='geometry') huc_intersection = huc_intersection.drop_duplicates() - huc_intersection.to_file(output_filename,getDriver(output_filename)) + huc_intersection.to_file(output_filename,driver=getDriver(output_filename)) + def collect_stream_attributes(args, huc): print ('Starting huc: ' + str(huc)) @@ -230,8 +231,8 @@ def subset_stream_networks(args, huc): adj_nhd_headwaters_fr_fileName=os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') # write out FR adjusted - adj_nhd_streams_fr.to_file(nhd_streams_fr_adjusted_fileName,getDriver(nhd_streams_fr_adjusted_fileName),index=False) - adj_nhd_headwater_points_fr.to_file(adj_nhd_headwaters_fr_fileName,getDriver(adj_nhd_headwaters_fr_fileName),index=False) + adj_nhd_streams_fr.to_file(nhd_streams_fr_adjusted_fileName,driver=getDriver(nhd_streams_fr_adjusted_fileName),index=False) + adj_nhd_headwater_points_fr.to_file(adj_nhd_headwaters_fr_fileName,driver=getDriver(adj_nhd_headwaters_fr_fileName),index=False) del adj_nhd_streams_fr, adj_nhd_headwater_points_fr else: @@ -253,8 +254,8 @@ def subset_stream_networks(args, huc): adj_nhd_headwaters_ms_fileName=os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') # write out MS adjusted - adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,getDriver(nhd_streams_ms_adjusted_fileName),index=False) - adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,getDriver(adj_nhd_headwaters_ms_fileName),index=False) + adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,driver=getDriver(nhd_streams_ms_adjusted_fileName),index=False) + adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,driver=getDriver(adj_nhd_headwaters_ms_fileName),index=False) del adj_nhd_streams_ms, adj_nhd_headwater_points_ms @@ -322,7 +323,6 @@ def aggregate_stream_networks(in_dir,agg_dir, huc_list): del adj_nhd_headwater_points_ms - def clean_up_intermediate_files(in_dir): for huc in os.listdir(in_dir): @@ -371,7 +371,8 @@ def clean_up_intermediate_files(in_dir): # print ('deriving nwm headwater points') # nwm_headwaters = findHeadWaterPoints(nwm_streams_fr_filename) # nwm_headwaters['ID'] = nwm_headwaters.index + 1 - # nwm_headwaters.to_file(nwm_headwaters_filename,getDriver(nwm_headwaters_filename),index=False) + # nwm_headwaters.to_file(nwm_headwaters_filename,driver=getDriver(nwm_headwaters_filename),index=False) + # del nwm_headwaters, nwm_streams ## subset NWM MS Streams diff --git a/lib/buildstreamtraversal.py b/lib/build_stream_traversal.py similarity index 90% rename from lib/buildstreamtraversal.py rename to lib/build_stream_traversal.py index f7b2bcbe4..a30fe04ed 100644 --- a/lib/buildstreamtraversal.py +++ b/lib/build_stream_traversal.py @@ -26,7 +26,7 @@ def trace(): to_node = "To_Node" next_down_id = "NextDownID" -class BuildStreamTraversalColumns(object): +class build_stream_traversal_columns(object): '''Tool class for updating the next down IDs of stream features.''' def __init__(self): '''Define tool properties (tool name is the class name).''' @@ -42,13 +42,21 @@ def execute(self, streams, wbd8, hydro_id): if not hydro_id in streams.columns: print ("Required field " + hydro_id + " does not exist in input. Generating..") - stream_centroid = gpd.GeoDataFrame({'geometry':streams.geometry.centroid}, crs=streams.crs, geometry='geometry') - stream_wbdjoin = gpd.sjoin(stream_centroid, wbd8, how='left', op='within') - stream_wbdjoin = stream_wbdjoin.rename(columns={"geometry": "centroid", "index_right": "HUC8id"}) - streams = streams.join(stream_wbdjoin).drop(columns=['centroid']) + # Get stream midpoint + stream_midpoint = [] + for i,lineString in enumerate(streams.geometry): + stream_midpoint = stream_midpoint + [lineString.interpolate(0.05,normalized=True)] + + stream_md_gpd = gpd.GeoDataFrame({'geometry':stream_midpoint}, crs=streams.crs, geometry='geometry') + stream_wbdjoin = gpd.sjoin(stream_md_gpd, wbd8, how='left', op='within') + stream_wbdjoin = stream_wbdjoin.rename(columns={"geometry": "midpoint", "index_right": "HUC8id"}) + streams = streams.join(stream_wbdjoin).drop(columns=['midpoint']) streams['seqID'] = (streams.groupby('HUC8id').cumcount(ascending=True)+1).astype('str').str.zfill(4) streams = streams.loc[streams['HUC8id'].notna(),:] + if streams.HUC8id.dtype != 'str': streams.HUC8id = streams.HUC8id.astype(str) + if streams.seqID.dtype != 'str': streams.seqID = streams.seqID.astype(str) + streams = streams.assign(hydro_id= lambda x: x.HUC8id + x.seqID) streams = streams.rename(columns={"hydro_id": hydro_id}).sort_values(hydro_id) streams = streams.drop(columns=['HUC8id', 'seqID']) @@ -170,7 +178,7 @@ def execute(self, streams, wbd8, hydro_id): wbd8 = args.parameters[1] hydro_id = args.parameters[2] - oProcessor = BuildStreamTraversalColumns() + oProcessor = build_stream_traversal_columns() params = (streams, wbd8, hydro_id) tResults=None tResults = oProcessor.execute(params) diff --git a/lib/clip_vectors_to_wbd.py b/lib/clip_vectors_to_wbd.py index 92ece11f8..654fe6e4f 100755 --- a/lib/clip_vectors_to_wbd.py +++ b/lib/clip_vectors_to_wbd.py @@ -5,8 +5,9 @@ import argparse from os.path import splitext from shapely.geometry import MultiPolygon,Polygon,Point +from utils.shared_functions import getDriver -def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,dissolveLinks=False,extent='FR'): +def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,dissolveLinks=False): hucUnitLength = len(str(hucCode)) @@ -75,7 +76,11 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l # nhd_streams = nhd_streams.loc[~nhd_streams.NHDPlusID.isin(NHDPlusIDs)] - nhd_streams.to_file(subset_nhd_streams_filename,driver=getDriver(subset_nhd_streams_filename),index=False) + if len(nhd_streams) > 0: + nhd_streams.to_file(subset_nhd_streams_filename,driver=getDriver(subset_nhd_streams_filename),index=False) + else: + print ("No NHD streams within HUC " + str(hucCode) + " boundaries.") + sys.exit(0) if len(nhd_headwaters) > 0: nhd_headwaters.to_file(subset_nhd_headwaters_filename,driver=getDriver(subset_nhd_headwaters_filename),index=False) @@ -90,13 +95,6 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l nwm_streams.to_file(subset_nwm_streams_filename,driver=getDriver(subset_nwm_streams_filename),index=False) del nwm_streams -def getDriver(filename): - - driverDictionary = {'.gpkg' : 'GPKG','.geojson' : 'GeoJSON','.shp' : 'ESRI Shapefile'} - driver = driverDictionary[splitext(filename)[1]] - - return(driver) - if __name__ == '__main__': parser = argparse.ArgumentParser(description='Subset vector layers') @@ -118,7 +116,6 @@ def getDriver(filename): parser.add_argument('-b','--subset-nwm-streams',help='NWM streams subset',required=True) parser.add_argument('-x','--subset-landsea',help='LandSea subset',required=True) parser.add_argument('-o','--dissolve-links',help='remove multi-line strings',action="store_true",default=False) - parser.add_argument('-p','--extent',help='MS or FR extent',required=True) args = vars(parser.parse_args()) @@ -140,6 +137,5 @@ def getDriver(filename): subset_nwm_streams_filename = args['subset_nwm_streams'] subset_landsea_filename = args['subset_landsea'] dissolveLinks = args['dissolve_links'] - extent = args['extent'] - subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,dissolveLinks,extent) + subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,dissolveLinks) diff --git a/lib/derive_headwaters.py b/lib/derive_headwaters.py index d29381c60..9fb7c1020 100644 --- a/lib/derive_headwaters.py +++ b/lib/derive_headwaters.py @@ -2,6 +2,7 @@ import geopandas as gpd from shapely.geometry import Point +from utils.shared_functions import getDriver def findHeadWaterPoints(flows): @@ -32,12 +33,6 @@ def findHeadWaterPoints(flows): return(hw_gdf) -def getDriver(fileName): - - driverDictionary = {'.gpkg' : 'GPKG','.geojson' : 'GeoJSON','.shp' : 'ESRI Shapefile'} - driver = driverDictionary[splitext(fileName)[1]] - - return(driver) if __name__ == '__main__': diff --git a/lib/reachID_grid_to_vector_points.py b/lib/reachID_grid_to_vector_points.py index c4a54b2ec..5dadd43c1 100755 --- a/lib/reachID_grid_to_vector_points.py +++ b/lib/reachID_grid_to_vector_points.py @@ -12,6 +12,7 @@ import geopandas as gpd from shapely.geometry import Point from raster import Raster +from utils.shared_functions import getDriver """ USAGE: @@ -78,8 +79,7 @@ i += 1 pointGDF = gpd.GeoDataFrame({'id' : id, 'geometry' : points},crs=boolean.proj,geometry='geometry') -pointGDF.to_file(outputFileName,driver='GPKG',index=False) +pointGDF.to_file(outputFileName,driver=getDriver(outputFileName),index=False) print("Complete") #shapeData.Destroy() - diff --git a/lib/run_by_unit.sh b/lib/run_by_unit.sh index d3733dd6f..7715304e3 100755 --- a/lib/run_by_unit.sh +++ b/lib/run_by_unit.sh @@ -5,6 +5,10 @@ T_total_start echo -e $startDiv"Parameter Values" echo -e "extent=$extent" +echo -e "agree_DEM_buffer=$agree_DEM_buffer" +echo -e "wbd_buffer=$wbd_buffer" +echo -e "ms_buffer_dist=$ms_buffer_dist" +echo -e "lakes_buffer_dist_meters=$lakes_buffer_dist_meters" echo -e "negative_burn_value=$negative_burn_value" echo -e "max_split_distance_meters=$max_split_distance_meters" echo -e "mannings_n=$manning_n" @@ -30,6 +34,7 @@ huc2Identifier=${hucNumber:0:2} input_NHD_WBHD_layer=WBDHU$hucUnitLength input_DEM=$inputDataDir/nhdplus_rasters/HRNHDPlusRasters"$huc4Identifier"/elev_cm.tif input_NLD=$inputDataDir/nld_vectors/huc2_levee_lines/nld_preprocessed_"$huc2Identifier".gpkg + # Define the landsea water body mask using either Great Lakes or Ocean polygon input # if [[ $huc2Identifier == "04" ]] ; then input_LANDSEA=$inputDataDir/landsea/gl_water_polygons.gpkg @@ -38,6 +43,7 @@ else input_LANDSEA=$inputDataDir/landsea/water_polygons_us.gpkg fi +# Define streams and headwaters based on extent # if [ "$extent" = "MS" ]; then input_nhd_flowlines=$input_nhd_flowlines_ms input_nhd_headwaters=$input_nhd_headwaters_ms @@ -59,7 +65,7 @@ echo -e $startDiv"Buffer WBD $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/wbd_buffered.gpkg ] && \ -ogr2ogr -f GPKG -dialect sqlite -sql "select ST_buffer(geom, 5000) from 'WBDHU$hucUnitLength'" $outputHucDataDir/wbd_buffered.gpkg $outputHucDataDir/wbd.gpkg +ogr2ogr -f GPKG -dialect sqlite -sql "select ST_buffer(geom, $wbd_buffer) from 'WBDHU$hucUnitLength'" $outputHucDataDir/wbd_buffered.gpkg $outputHucDataDir/wbd.gpkg Tcount ## Subset Vector Layers ## @@ -67,7 +73,7 @@ echo -e $startDiv"Get Vector Layers and Subset $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg ] && \ -$libDir/clip_vectors_to_wbd.py -d $hucNumber -w $input_NWM_Flows -s $input_nhd_flowlines -l $input_NWM_Lakes -r $input_NLD -g $outputHucDataDir/wbd.gpkg -f $outputHucDataDir/wbd_buffered.gpkg -m $input_NWM_Catchments -y $input_nhd_headwaters -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -x $outputHucDataDir/LandSea_subset.gpkg -p $extent +$libDir/clip_vectors_to_wbd.py -d $hucNumber -w $input_NWM_Flows -s $input_nhd_flowlines -l $input_NWM_Lakes -r $input_NLD -g $outputHucDataDir/wbd.gpkg -f $outputHucDataDir/wbd_buffered.gpkg -m $input_NWM_Catchments -y $input_nhd_headwaters -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -x $outputHucDataDir/LandSea_subset.gpkg Tcount if [ "$extent" = "MS" ]; then diff --git a/lib/split_flows.py b/lib/split_flows.py index db11d6d4a..c8de19b34 100755 --- a/lib/split_flows.py +++ b/lib/split_flows.py @@ -21,7 +21,8 @@ from os.path import isfile from os import remove from collections import OrderedDict -import buildstreamtraversal +import build_stream_traversal +from utils.shared_functions import getDriver flows_fileName = sys.argv[1] dem_fileName = sys.argv[2] @@ -29,10 +30,12 @@ split_points_fileName = sys.argv[4] maxLength = float(sys.argv[5]) slope_min = float(sys.argv[6]) -huc8_filename = sys.argv[7] +wbd8_clp_filename = sys.argv[7] lakes_filename = sys.argv[8] lakes_buffer_input = float(sys.argv[9]) +wbd = gpd.read_file(wbd8_clp_filename) + toMetersConversion = 1e-3 print('Loading data ...') @@ -42,7 +45,7 @@ print ("No relevant streams within HUC boundaries.") sys.exit(0) -WBD8 = gpd.read_file(huc8_filename) +wbd8 = gpd.read_file(wbd8_clp_filename) #dem = Raster(dem_fileName) dem = rasterio.open(dem_fileName,'r') if isfile(lakes_filename): @@ -50,17 +53,21 @@ else: lakes = None -WBD8 = WBD8.filter(items=['fossid', 'geometry']) -WBD8 = WBD8.set_index('fossid') +wbd8 = wbd8.filter(items=['fossid', 'geometry']) +wbd8 = wbd8.set_index('fossid') flows = flows.explode() # temp -flows = flows.to_crs(WBD8.crs) +flows = flows.to_crs(wbd8.crs) split_flows = [] slopes = [] -HYDROID = 'HydroID' -split_endpoints = OrderedDict() +hydro_id = 'HydroID' + +# split at HUC8 boundaries +print ('splitting stream segments at HUC8 boundaries') +flows = gpd.overlay(flows, wbd8, how='union').explode().reset_index(drop=True) + # check for lake features if lakes is not None: if len(lakes) > 0: @@ -165,9 +172,9 @@ split_flows_gdf = split_flows_gdf.drop_duplicates() # Create Ids and Network Traversal Columns -addattributes = buildstreamtraversal.BuildStreamTraversalColumns() +addattributes = build_stream_traversal.build_stream_traversal_columns() tResults=None -tResults = addattributes.execute(split_flows_gdf, WBD8, HYDROID) +tResults = addattributes.execute(split_flows_gdf, wbd8, hydro_id) if tResults[0] == 'OK': split_flows_gdf = tResults[1] else: @@ -176,14 +183,14 @@ # Get Outlet Point Only #outlet = OrderedDict() #for i,segment in split_flows_gdf.iterrows(): -# outlet[segment.geometry.coords[-1]] = segment[HYDROID] +# outlet[segment.geometry.coords[-1]] = segment[hydro_id] #hydroIDs_points = [hidp for hidp in outlet.values()] #split_points = [Point(*point) for point in outlet] # Get all vertices split_points = OrderedDict() -for row in split_flows_gdf[['geometry',HYDROID, 'NextDownID']].iterrows(): +for row in split_flows_gdf[['geometry',hydro_id, 'NextDownID']].iterrows(): lineString = row[1][0] for point in zip(*lineString.coords.xy): @@ -199,12 +206,13 @@ split_points = [Point(*point) for point in split_points] split_points_gdf = gpd.GeoDataFrame({'id': hydroIDs_points , 'geometry':split_points}, crs=flows.crs, geometry='geometry') + print('Writing outputs ...') if isfile(split_flows_fileName): remove(split_flows_fileName) -split_flows_gdf.to_file(split_flows_fileName,driver='GPKG',index=False) +split_flows_gdf.to_file(split_flows_fileName,driver=getDriver(split_flows_fileName),index=False) if isfile(split_points_fileName): remove(split_points_fileName) -split_points_gdf.to_file(split_points_fileName,driver='GPKG',index=False) +split_points_gdf.to_file(split_points_fileName,driver=getDriver(split_points_fileName),index=False) From abbe0bd4bf3b03c5000476d0dfdcd70ec17b20eb Mon Sep 17 00:00:00 2001 From: MattLuck-NOAA Date: Thu, 4 Feb 2021 11:07:24 -0500 Subject: [PATCH 013/359] Minor bug fixes Changes - corrected variable name in fim_run.sh - acquire_and_preprocess_inputs.py now creates huc_lists folder and updates file file path Fixes #252 --- CHANGELOG.md | 11 ++++++++++- fim_run.sh | 2 +- lib/acquire_and_preprocess_inputs.py | 9 ++++++--- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fd90d4713..14b2a0fe1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,15 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.3.1 - 2021-02-04 - [PR #253](https://github.com/NOAA-OWP/cahaba/pull/253) + +Bug fixes to correct mismatched variable name and file path. + +### Changes + + - Corrected variable name in `fim_run.sh`. + - `acquire_and_preprocess_inputs.py` now creates `huc_lists` folder and updates file path. + ## v3.0.3.0 - 2021-02-04 - [PR #227](https://github.com/NOAA-OWP/cahaba/pull/227) Post-process to aggregate FIM outputs to HUC6 scale. @@ -18,7 +27,7 @@ Post-process to aggregate FIM outputs to HUC6 scale. - Cleaned up variable names in `split_flows.py` and `build_stream_traversal.py`. - `build_stream_traversal.py` is now assigning HydroID by midpoint instead centroid. - Cleanup of `clip_vectors_to_wbd.py`. - + ## v3.0.2.0 - 2021-01-25 - [PR #218](https://github.com/NOAA-OWP/cahaba/pull/218) Addition of an API service to schedule, run and manage `fim_run` jobs through a user-friendly web interface. diff --git a/fim_run.sh b/fim_run.sh index b2efa0ec5..a66ecb312 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -98,7 +98,7 @@ source $libDir/bash_functions.env # default values if [ "$jobLimit" = "" ] ; then - jobLimit=$defaultMaxJobs + jobLimit=$default_max_jobs fi ## Define Outputs Data Dir & Log File## diff --git a/lib/acquire_and_preprocess_inputs.py b/lib/acquire_and_preprocess_inputs.py index bd1eda194..bace41e19 100755 --- a/lib/acquire_and_preprocess_inputs.py +++ b/lib/acquire_and_preprocess_inputs.py @@ -262,9 +262,12 @@ def build_huc_list_files(path_to_saved_data_parent_dir, wbd_directory): huc6_list = set(huc6_list) # Write huc lists to appropriate .lst files. - included_huc4_file = os.path.join(path_to_saved_data_parent_dir, 'included_huc4.lst') - included_huc6_file = os.path.join(path_to_saved_data_parent_dir, 'included_huc6.lst') - included_huc8_file = os.path.join(path_to_saved_data_parent_dir, 'included_huc8.lst') + huc_lists_dir = os.path.join(path_to_saved_data_parent_dir, 'huc_lists') + if not os.path.exists(huc_lists_dir): + os.mkdir(huc_lists_dir) + included_huc4_file = os.path.join(huc_lists_dir, 'included_huc4.lst') + included_huc6_file = os.path.join(huc_lists_dir, 'included_huc6.lst') + included_huc8_file = os.path.join(huc_lists_dir, 'included_huc8.lst') # Overly verbose file writing loops. Doing this in a pinch. with open(included_huc4_file, 'w') as f: From 2ffd97b7e9749c984697d8778859467afa26bfb6 Mon Sep 17 00:00:00 2001 From: RyanSpies-NOAA Date: Wed, 10 Feb 2021 09:46:29 -0600 Subject: [PATCH 014/359] New python script "wrappers" for using inundation.py. - Created inundation_wrapper_nwm_flows.py to produce inundation outputs using NWM recurrence flows: 1.5 year, 5 year, 10 year. - Created inundation_wrapper_custom_flow.py to produce inundation outputs with user-created flow file. - Created new tools parent directory to store inundation_wrapper_nwm_flows.py and inundation_wrapper_custom_flow.py. This resolves #220. --- CHANGELOG.md | 19 +++ tools/inundation_wrapper_custom_flow.py | 136 ++++++++++++++++++++ tools/inundation_wrapper_nwm_flows.py | 160 ++++++++++++++++++++++++ 3 files changed, 315 insertions(+) create mode 100644 tools/inundation_wrapper_custom_flow.py create mode 100755 tools/inundation_wrapper_nwm_flows.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 14b2a0fe1..4002936b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,17 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.4.0 - 2021-02-10 - [PR #256](https://github.com/NOAA-OWP/cahaba/pull/256) + +New python script "wrappers" for using `inundation.py`. + +### Additions + + - Created `inundation_wrapper_nwm_flows.py` to produce inundation outputs using NWM recurrence flows: 1.5 year, 5 year, 10 year. + - Created `inundation_wrapper_custom_flow.py` to produce inundation outputs with user-created flow file. + - Created new `tools` parent directory to store `inundation_wrapper_nwm_flows.py` and `inundation_wrapper_custom_flow.py`. + +

## v3.0.3.1 - 2021-02-04 - [PR #253](https://github.com/NOAA-OWP/cahaba/pull/253) Bug fixes to correct mismatched variable name and file path. @@ -10,6 +21,7 @@ Bug fixes to correct mismatched variable name and file path. - Corrected variable name in `fim_run.sh`. - `acquire_and_preprocess_inputs.py` now creates `huc_lists` folder and updates file path. +

## v3.0.3.0 - 2021-02-04 - [PR #227](https://github.com/NOAA-OWP/cahaba/pull/227) Post-process to aggregate FIM outputs to HUC6 scale. @@ -28,6 +40,7 @@ Post-process to aggregate FIM outputs to HUC6 scale. - `build_stream_traversal.py` is now assigning HydroID by midpoint instead centroid. - Cleanup of `clip_vectors_to_wbd.py`. +

## v3.0.2.0 - 2021-01-25 - [PR #218](https://github.com/NOAA-OWP/cahaba/pull/218) Addition of an API service to schedule, run and manage `fim_run` jobs through a user-friendly web interface. @@ -36,6 +49,7 @@ Addition of an API service to schedule, run and manage `fim_run` jobs through a - `api` folder that contains all the codebase for the new service. +

## v3.0.1.0 - 2021-01-21 - [PR #206](https://github.com/NOAA-OWP/cahaba/pull/206) Preprocess MS and FR stream networks @@ -51,6 +65,7 @@ Preprocess MS and FR stream networks - Added `getDriver` to shared `functions.py`. - Cleaned up variable names and types. +

## v3.0.0.4 - 2021-01-20 - [PR #230](https://github.com/NOAA-OWP/cahaba/pull/230) Changed the directory where the `included_huc*.lst` files are being read from. @@ -59,6 +74,7 @@ Changed the directory where the `included_huc*.lst` files are being read from. - Changed the directory where the `included_huc*.lst` files are being read from. +

## v3.0.0.3 - 2021-01-14 - [PR #210](https://github.com/NOAA-OWP/cahaba/pull/210) Hotfix for handling nodata value in rasterized levee lines. @@ -68,6 +84,7 @@ Hotfix for handling nodata value in rasterized levee lines. - Resolves bug for HUCs where `$ndv > 0` (Great Lakes region). - Initialize the `nld_rasterized_elev.tif` using a value of `-9999` instead of `$ndv`. +

## v3.0.0.2 - 2021-01-06 - [PR #200](https://github.com/NOAA-OWP/cahaba/pull/200) Patch to address AHPSs mapping errors. @@ -79,6 +96,7 @@ Patch to address AHPSs mapping errors. - Updates path to latest AHPs site layer. - Updated [readme](https://github.com/NOAA-OWP/cahaba/commit/9bffb885f32dfcd95978c7ccd2639f9df56ff829) +

## v3.0.0.1 - 2020-12-31 - [PR #184](https://github.com/NOAA-OWP/cahaba/pull/184) Modifications to build and run Docker image more reliably. Cleanup on some pre-processing scripts. @@ -91,6 +109,7 @@ Modifications to build and run Docker image more reliably. Cleanup on some pre-p ### Notes - `aggregate_vector_inputs.py` doesn't work yet. Need to externally download required data to run fim_run.sh +

## v3.0.0.0 - 2020-12-22 - [PR #181](https://github.com/NOAA-OWP/cahaba/pull/181) The software released here builds on the flood inundation mapping capabilities demonstrated as part of the National Flood Interoperability Experiment, the Office of Water Prediction's Innovators Program and the National Water Center Summer Institute. The flood inundation mapping software implements the Height Above Nearest Drainage (HAND) algorithm and incorporates community feedback and lessons learned over several years. The software has been designed to meet the requirements set by stakeholders interested in flood prediction and has been developed in partnership with several entities across the water enterprise. diff --git a/tools/inundation_wrapper_custom_flow.py b/tools/inundation_wrapper_custom_flow.py new file mode 100644 index 000000000..8602bb008 --- /dev/null +++ b/tools/inundation_wrapper_custom_flow.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 + +# Created: 1/11/2021 +# Primary developer(s): ryan.spies@noaa.gov +# Purpose: This script provides the user to input a customized flow entry to produce +# inundation outputs using outputs from fim_run. Note that the flow csv must be +# formatted with "feature_id" & "discharge" columns. Flow must be in cubic m/s + +import os +import sys +import pandas as pd +import geopandas as gpd +import rasterio +import json +import csv +import argparse +import shutil + +# insert python path at runtime for accessing scripts in foss_fim/tests dir (e.g. inundation.py) +sys.path.insert(1, 'foss_fim/tests') +from utils.shared_functions import get_contingency_table_from_binary_rasters, compute_stats_from_contingency_table +from inundation import inundate + +TEST_CASES_DIR = r'/data/inundation_review/inundation_custom_flow/' # Will update. +INPUTS_DIR = r'/data/inputs' +OUTPUTS_DIR = os.environ['outputDataDir'] + +ENDC = '\033[m' +TGREEN_BOLD = '\033[32;1m' +TGREEN = '\033[32m' +TRED_BOLD = '\033[31;1m' +TWHITE = '\033[37m' +WHITE_BOLD = '\033[37;1m' +CYAN_BOLD = '\033[36;1m' + +def run_recurr_test(fim_run_dir, branch_name, huc_id, input_flow_csv, mask_type='huc'): + + # Construct paths to development test results if not existent. + huc_id_dir_parent = os.path.join(TEST_CASES_DIR, huc_id) + if not os.path.exists(huc_id_dir_parent): + os.mkdir(huc_id_dir_parent) + branch_test_case_dir_parent = os.path.join(TEST_CASES_DIR, huc_id, branch_name) + + # Delete the entire directory if it already exists. + if os.path.exists(branch_test_case_dir_parent): + shutil.rmtree(branch_test_case_dir_parent) + + print("Running the NWM recurrence intervals for HUC: " + huc_id + ", " + branch_name + "...") + + assert os.path.exists(fim_run_dir), "Cannot locate " + fim_run_dir + + # Create paths to fim_run outputs for use in inundate(). + if "previous_fim" in fim_run_dir and "fim_2" in fim_run_dir: + rem = os.path.join(fim_run_dir, 'rem_clipped_zeroed_masked.tif') + catchments = os.path.join(fim_run_dir, 'gw_catchments_reaches_clipped_addedAttributes.tif') + else: + rem = os.path.join(fim_run_dir, 'rem_zeroed_masked.tif') + catchments = os.path.join(fim_run_dir, 'gw_catchments_reaches_filtered_addedAttributes.tif') + if mask_type == 'huc': + catchment_poly = '' + else: + catchment_poly = os.path.join(fim_run_dir, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') + hydro_table = os.path.join(fim_run_dir, 'hydroTable.csv') + + # Map necessary inputs for inundation(). + hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' + + #benchmark_category = huc_id.split('_')[1] + current_huc = huc_id.split('_')[0] # Break off HUC ID and assign to variable. + + if not os.path.exists(branch_test_case_dir_parent): + os.mkdir(branch_test_case_dir_parent) + + + #branch_test_case_dir = os.path.join(branch_test_case_dir_parent) + + #os.makedirs(branch_test_case_dir) # Make output directory for branch. + + # Define paths to inundation_raster and forecast file. + inundation_raster = os.path.join(branch_test_case_dir_parent, branch_name + '_inund_extent.tif') + forecast = os.path.join(TEST_CASES_DIR,"_input_flow_files", input_flow_csv) + + # Copy forecast flow file into the outputs directory to all viewer to reference the flows used to create inundation_raster + shutil.copyfile(forecast,os.path.join(branch_test_case_dir_parent,input_flow_csv)) + + # Run inundate. + print("-----> Running inundate() to produce modeled inundation extent for the " + input_flow_csv) + inundate( + rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, + subset_hucs=current_huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, + depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True + ) + + print("-----> Inundation mapping complete.") + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Inundation mapping for FOSS FIM using a user supplied flow data file. Inundation outputs are stored in the /inundation_review/inundation_custom_flow/ directory.') + parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh (e.g. data/ouputs/dev_abc/12345678_dev/12345678)',required=True) + parser.add_argument('-b', '--branch-name',help='The name of the working branch in which features are being tested (used to name the output inundation directory) -> type=str',required=True,default="") + parser.add_argument('-t', '--huc-id',help='The huc id to use (single huc). Format as: xxxxxxxx, e.g. 12345678',required=True,default="") + parser.add_argument('-m', '--mask-type', help='Optional: specify \'huc\' (FIM < 3) or \'filter\' (FIM >= 3) masking method', required=False,default="huc") + parser.add_argument('-y', '--input-flow-csv',help='Filename of the user generated (customized) csv. Must contain nwm feature ids and flow value(s) (units: cms) --> put this file in the "_input_flow_files" directory',required=True, default="") + + + # Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + + valid_huc_id_list = ['nwm_recurr'] + + exit_flag = False # Default to False. + print() + + # Ensure fim_run_dir exists. + if not os.path.exists(args['fim_run_dir']): + print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided fim_run_dir (-r) " + CYAN_BOLD + args['fim_run_dir'] + WHITE_BOLD + " could not be located in the 'outputs' directory." + ENDC) + print(WHITE_BOLD + "Please provide the parent directory name for fim_run.sh outputs. These outputs are usually written in a subdirectory, e.g. data/outputs/123456/123456." + ENDC) + print() + exit_flag = True + + + if args['input_flow_csv'] == '': + print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided input_flow_csv (-y) " + CYAN_BOLD + args['input_flow_csv'] + WHITE_BOLD + " is not provided. Please provide a csv file with nwm featureid and flow values" + ENDC) + exit_flag = True + + + if exit_flag: + print() + sys.exit() + + + else: + + run_recurr_test(**args) diff --git a/tools/inundation_wrapper_nwm_flows.py b/tools/inundation_wrapper_nwm_flows.py new file mode 100755 index 000000000..1854bb5c3 --- /dev/null +++ b/tools/inundation_wrapper_nwm_flows.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 + +# Created: 1/10/2021 +# Primary developer(s): ryan.spies@noaa.gov +# Purpose: This script provides the user to generate inundation outputs using +# the NWM Recurrence Interval flow data for 1.5yr, 5yr, & 10yr events. + +import os +import sys +import pandas as pd +import geopandas as gpd +import rasterio +import json +import csv +import argparse +import shutil + +# insert python path at runtime for accessing scripts in foss_fim/tests dir (e.g. inundation.py) +sys.path.insert(1, 'foss_fim/tests') +from utils.shared_functions import get_contingency_table_from_binary_rasters, compute_stats_from_contingency_table +from inundation import inundate + +TEST_CASES_DIR = r'/data/inundation_review/inundation_nwm_recurr/' # Will update. +INPUTS_DIR = r'/data/inputs' +OUTPUTS_DIR = os.environ['outputDataDir'] + +ENDC = '\033[m' +TGREEN_BOLD = '\033[32;1m' +TGREEN = '\033[32m' +TRED_BOLD = '\033[31;1m' +TWHITE = '\033[37m' +WHITE_BOLD = '\033[37;1m' +CYAN_BOLD = '\033[36;1m' + +def run_recurr_test(fim_run_dir, branch_name, huc_id, magnitude, mask_type='huc'): + + # Construct paths to development test results if not existent. + huc_id_dir_parent = os.path.join(TEST_CASES_DIR, huc_id) + if not os.path.exists(huc_id_dir_parent): + os.mkdir(huc_id_dir_parent) + branch_test_case_dir_parent = os.path.join(TEST_CASES_DIR, huc_id, branch_name) + + # Delete the entire directory if it already exists. + if os.path.exists(branch_test_case_dir_parent): + shutil.rmtree(branch_test_case_dir_parent) + + print("Running the NWM recurrence intervals for huc_id: " + huc_id + ", " + branch_name + "...") + + fim_run_parent = os.path.join(fim_run_dir) + assert os.path.exists(fim_run_parent), "Cannot locate " + fim_run_parent + + # Create paths to fim_run outputs for use in inundate(). + if "previous_fim" in fim_run_parent and "fim_2" in fim_run_parent: + rem = os.path.join(fim_run_parent, 'rem_clipped_zeroed_masked.tif') + catchments = os.path.join(fim_run_parent, 'gw_catchments_reaches_clipped_addedAttributes.tif') + else: + rem = os.path.join(fim_run_parent, 'rem_zeroed_masked.tif') + catchments = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes.tif') + if mask_type == 'huc': + catchment_poly = '' + else: + catchment_poly = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') + hydro_table = os.path.join(fim_run_parent, 'hydroTable.csv') + + # Map necessary inputs for inundation(). + hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' + + #benchmark_category = huc_id.split('_')[1] + current_huc = huc_id.split('_')[0] # Break off HUC ID and assign to variable. + + if not os.path.exists(branch_test_case_dir_parent): + os.mkdir(branch_test_case_dir_parent) + + # Check if magnitude is list of magnitudes or single value. + magnitude_list = magnitude + if type(magnitude_list) != list: + magnitude_list = [magnitude_list] + + for magnitude in magnitude_list: + # Construct path to validation raster and forecast file. + + branch_test_case_dir = os.path.join(branch_test_case_dir_parent, magnitude) + + os.makedirs(branch_test_case_dir) # Make output directory for branch. + + # Define paths to inundation_raster and forecast file. + inundation_raster = os.path.join(branch_test_case_dir, branch_name + '_inund_extent.tif') + forecast = os.path.join(TEST_CASES_DIR, 'nwm_recurr_flow_data', 'recurr_' + magnitude + '_cms.csv') + + # Run inundate. + print("-----> Running inundate() to produce modeled inundation extent for the " + magnitude + " magnitude...") + inundate( + rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, + subset_hucs=current_huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, + depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True + ) + + print("-----> Inundation mapping complete.") + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Inundation mapping for FOSS FIM using streamflow recurrence interflow data. Inundation outputs are stored in the /inundation_review/inundation_nwm_recurr/ directory.') + parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh (e.g. data/ouputs/dev_abc/12345678_dev_test)',required=True) + parser.add_argument('-b', '--branch-name',help='The name of the working branch in which features are being tested (used to name the output inundation directory) -> type=str',required=True,default="") + parser.add_argument('-t', '--huc-id',help='Provide either a single hucid (Format as: xxxxxxxx, e.g. 12345678) or a filepath to a list of hucids',required=True,default="") + parser.add_argument('-m', '--mask-type', help='Optional: specify \'huc\' (FIM < 3) or \'filter\' (FIM >= 3) masking method', required=False,default="huc") + parser.add_argument('-y', '--magnitude',help='The magnitude (reccur interval) to run. Leave blank to use default intervals (options: 1_5, 5_0, 10_0).',required=False, default="") + + + # Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + + valid_test_id_list = ['nwm_recurr'] + + exit_flag = False # Default to False. + print() + + # check if user provided a single huc_id or a file path to a list of huc ids + if args['huc_id'].isdigit(): + huc_list = [args['huc_id']] + elif os.path.exists(args['huc_id']): # check if provided str is a valid path + with open(args['huc_id'],newline='') as list_file: + read_list = csv.reader(list_file) + huc_list=[i for row in read_list for i in row] + else: + print(TRED_BOLD + "Warning: " + WHITE_BOLD + "Invalid huc-id entry: " + CYAN_BOLD + args['fim_run_dir'] + WHITE_BOLD + " --> check that huc_id number or list file is valid") + exit_flag = True + print(huc_list) + if exit_flag: + print() + sys.exit() + + for huc_id in huc_list: + args['huc_id'] = huc_id + # Ensure fim_run_dir exists. + fim_run_dir = args['fim_run_dir'] + os.sep + huc_id + if not os.path.exists(fim_run_dir): + print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided fim_run_dir (-r) " + CYAN_BOLD + fim_run_dir + WHITE_BOLD + " could not be located in the 'outputs' directory." + ENDC) + print(WHITE_BOLD + "Please provide the parent directory name for fim_run.sh outputs. These outputs are usually written in a subdirectory, e.g. data/outputs/123456/123456." + ENDC) + print() + exit_flag = True + + # Ensure valid flow recurr intervals + default_flow_intervals = ['1_5','5_0','10_0'] + if args['magnitude'] == '': + args['magnitude'] = default_flow_intervals + print(TRED_BOLD + "Using default flow reccurence intervals: " + WHITE_BOLD + str(default_flow_intervals)[1:-1]) + else: + if set(default_flow_intervals).issuperset(set(args['magnitude'])) == False: + print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided magnitude (-y) " + CYAN_BOLD + args['magnitude'] + WHITE_BOLD + " is invalid. NWM Recurrence Interval options include: " + str(default_flow_intervals)[1:-1] + ENDC) + exit_flag = True + + if exit_flag: + print() + sys.exit() + + else: + run_recurr_test(fim_run_dir,args['branch_name'],huc_id,args['magnitude'],args['mask_type']) From b147f45c9b88343361a6dc10d5922442f666090f Mon Sep 17 00:00:00 2001 From: Brad Date: Wed, 10 Feb 2021 14:00:16 -0600 Subject: [PATCH 015/359] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 47d3561ab..c2fca221e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ### Cahaba: Flood Inundation Mapping for U.S. National Water Model -Flood inundation mapping software configured to work with the U.S. National Water Model operated and maintained by the National Oceanic and Atmospheric Administration (NOAA) National Weather Service (NWS). Software enables inundation mapping capability by generating Relative Elevation Models (REMs) and Synthetic Rating Curves (SRCs). Included are tests to evaluate skill and computational efficiency as well as functions to generate inundation maps. +Flood inundation mapping software configured to work with the U.S. National Water Model operated and maintained by the National Oceanic and Atmospheric Administration (NOAA) National Weather Service (NWS). This software uses the Height Above Nearest Drainage (HAND) method to generate Relative Elevation Models (REMs) and Synthetic Rating Curves (SRCs), and catchment grids, which together are used to produce flood inundation maps (FIM). This repository also includes functionality to generate FIMs as well as tests to evaluate FIM prediction skill. ## Dependencies From 57cb283c8d194893a0b96af1b6c95b333b651fe1 Mon Sep 17 00:00:00 2001 From: Brad Date: Wed, 10 Feb 2021 14:01:24 -0600 Subject: [PATCH 016/359] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c2fca221e..5cf6f92ca 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ### Cahaba: Flood Inundation Mapping for U.S. National Water Model -Flood inundation mapping software configured to work with the U.S. National Water Model operated and maintained by the National Oceanic and Atmospheric Administration (NOAA) National Weather Service (NWS). This software uses the Height Above Nearest Drainage (HAND) method to generate Relative Elevation Models (REMs) and Synthetic Rating Curves (SRCs), and catchment grids, which together are used to produce flood inundation maps (FIM). This repository also includes functionality to generate FIMs as well as tests to evaluate FIM prediction skill. +Flood inundation mapping software configured to work with the U.S. National Water Model operated and maintained by the National Oceanic and Atmospheric Administration (NOAA) National Weather Service (NWS). This software uses the Height Above Nearest Drainage (HAND) method to generate Relative Elevation Models (REMs), Synthetic Rating Curves (SRCs), and catchment grids, which together are used to produce flood inundation maps (FIM). This repository also includes functionality to generate FIMs as well as tests to evaluate FIM prediction skill. ## Dependencies From ad89e6d36691f8fa9a4a743e15faee8e8e10c20c Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Fri, 12 Feb 2021 08:57:03 -0600 Subject: [PATCH 017/359] Updated MS Crosswalk method to address gaps in FIM. - Fixed typo in stream midpoint calculation in split_flows.py and add_crosswalk.py. - add_crosswalk.py now restricts the MS crosswalk to NWM MS catchments. - add_crosswalk.py now performs a secondary MS crosswalk selection by nearest NWM MS catchment. This resolves #232. --- CHANGELOG.md | 11 +++++++++++ fim_run.sh | 6 ++++-- lib/add_crosswalk.py | 18 +++++++++++++++++- lib/build_stream_traversal.py | 2 +- lib/run_by_unit.sh | 4 ++++ 5 files changed, 37 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4002936b9..d89843748 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,17 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.4.1 - 2021-02-12 - [PR #261](https://github.com/NOAA-OWP/cahaba/pull/261) + +Updated MS Crosswalk method to address gaps in FIM. + +### Changes + +- Fixed typo in stream midpoint calculation in `split_flows.py` and `add_crosswalk.py`. +- `add_crosswalk.py` now restricts the MS crosswalk to NWM MS catchments. +- `add_crosswalk.py` now performs a secondary MS crosswalk selection by nearest NWM MS catchment. + +

## v3.0.4.0 - 2021-02-10 - [PR #256](https://github.com/NOAA-OWP/cahaba/pull/256) New python script "wrappers" for using `inundation.py`. diff --git a/fim_run.sh b/fim_run.sh index a66ecb312..8c6862dae 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -112,8 +112,10 @@ logFile=$outputRunDataDir/logs/summary.log ## Define inputs export input_WBD_gdb=$inputDataDir/wbd/WBD_National.gpkg export input_NWM_Lakes=$inputDataDir/nwm_hydrofabric/nwm_lakes.gpkg -export input_NWM_Catchments=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg -export input_NWM_Flows=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg +export input_NWM_Catchments_fr=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg +export input_NWM_Catchments_ms=$inputDataDir/nwm_hydrofabric/nwm_catchments_ms.gpkg +export input_NWM_Flows_fr=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg +export input_NWM_Flows_ms=$inputDataDir/nwm_hydrofabric/nwm_flows_ms.gpkg export input_NWM_Headwaters=$inputDataDir/nwm_hydrofabric/nwm_headwaters.gpkg export input_nhd_flowlines_fr=$inputDataDir/nhdplus_vectors_aggregate/NHDPlusBurnLineEvent_fr_adjusted.gpkg export input_nhd_flowlines_ms=$inputDataDir/nhdplus_vectors_aggregate/NHDPlusBurnLineEvent_ms_adjusted.gpkg diff --git a/lib/add_crosswalk.py b/lib/add_crosswalk.py index c5ed0dd5a..d3f052c3a 100755 --- a/lib/add_crosswalk.py +++ b/lib/add_crosswalk.py @@ -58,7 +58,7 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f hydroID = [] for i,lineString in enumerate(input_flows.geometry): hydroID = hydroID + [input_flows.loc[i,'HydroID']] - stream_midpoint = stream_midpoint + [lineString.interpolate(0.05,normalized=True)] + stream_midpoint = stream_midpoint + [lineString.interpolate(0.5,normalized=True)] input_flows_midpoint = gpd.GeoDataFrame({'HydroID':hydroID, 'geometry':stream_midpoint}, crs=input_flows.crs, geometry='geometry') input_flows_midpoint = input_flows_midpoint.set_index('HydroID') @@ -66,6 +66,22 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f # Create crosswalk crosswalk = gpd.sjoin(input_flows_midpoint, input_nwmcat, how='left', op='within').reset_index() crosswalk = crosswalk.rename(columns={"index_right": "feature_id"}) + + # fill in missing ms + crosswalk_missing = crosswalk.loc[crosswalk.feature_id.isna()] + for index, stream in crosswalk_missing.iterrows(): + + # find closest nwm catchment by distance + distances = [stream.geometry.distance(poly) for poly in input_nwmcat.geometry] + min_dist = min(distances) + nwmcat_index=distances.index(min_dist) + + # update crosswalk + crosswalk.loc[crosswalk.HydroID==stream.HydroID,'feature_id'] = input_nwmcat.iloc[nwmcat_index].name + crosswalk.loc[crosswalk.HydroID==stream.HydroID,'AreaSqKM'] = input_nwmcat.iloc[nwmcat_index].AreaSqKM + crosswalk.loc[crosswalk.HydroID==stream.HydroID,'Shape_Length'] = input_nwmcat.iloc[nwmcat_index].Shape_Length + crosswalk.loc[crosswalk.HydroID==stream.HydroID,'Shape_Area'] = input_nwmcat.iloc[nwmcat_index].Shape_Area + crosswalk = crosswalk.filter(items=['HydroID', 'feature_id']) crosswalk = crosswalk.merge(input_nwmflows[['feature_id','order_']],on='feature_id') diff --git a/lib/build_stream_traversal.py b/lib/build_stream_traversal.py index a30fe04ed..daca019f2 100644 --- a/lib/build_stream_traversal.py +++ b/lib/build_stream_traversal.py @@ -45,7 +45,7 @@ def execute(self, streams, wbd8, hydro_id): # Get stream midpoint stream_midpoint = [] for i,lineString in enumerate(streams.geometry): - stream_midpoint = stream_midpoint + [lineString.interpolate(0.05,normalized=True)] + stream_midpoint = stream_midpoint + [lineString.interpolate(0.5,normalized=True)] stream_md_gpd = gpd.GeoDataFrame({'geometry':stream_midpoint}, crs=streams.crs, geometry='geometry') stream_wbdjoin = gpd.sjoin(stream_md_gpd, wbd8, how='left', op='within') diff --git a/lib/run_by_unit.sh b/lib/run_by_unit.sh index 7715304e3..9f34bc222 100755 --- a/lib/run_by_unit.sh +++ b/lib/run_by_unit.sh @@ -47,9 +47,13 @@ fi if [ "$extent" = "MS" ]; then input_nhd_flowlines=$input_nhd_flowlines_ms input_nhd_headwaters=$input_nhd_headwaters_ms + input_NWM_Flows=$input_NWM_Flows_ms + input_NWM_Catchments=$input_NWM_Catchments_ms else input_nhd_flowlines=$input_nhd_flowlines_fr input_nhd_headwaters=$input_nhd_headwaters_fr + input_NWM_Flows=$input_NWM_Flows_fr + input_NWM_Catchments=$input_NWM_Catchments_fr fi ## GET WBD ## From f474736240f6d27c7a1d5637efd88134214db87b Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Fri, 12 Feb 2021 15:35:11 -0600 Subject: [PATCH 018/359] Addresses issue when running on HUC6 scale * src should be fixed and slightly smaller by removing whitespace * rasters are about the same size as running fim as huc6 (compressed and tiled; aggregated are slightly larger) * naming convention and feature id attribute are only added to the aggregated hucs (this bullet and the next one assume that Viz will not use the huc8 layer outputs) * HydroIDs are different for huc6 vs aggregated huc8s mostly due to forced split at huc boundaries (so long we use consistent workflow it shouldn't matter) * known issue where sometimes an incoming stream is not included in the final selection (resolves #238) will affect aggregate outputs --- CHANGELOG.md | 19 +++- lib/aggregate_fim_outputs.py | 164 +++++++++++++++++++++++++---------- lib/output_cleanup.py | 20 ----- 3 files changed, 132 insertions(+), 71 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d89843748..65cdeb4f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,15 +1,28 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.4.2 - 2021-02-12 - [PR #255](https://github.com/NOAA-OWP/cahaba/pull/255) + +Addresses issue when running on HUC6 scale. + +### Changes + + - `src.json` should be fixed and slightly smaller by removing whitespace. + - Rasters are about the same size as running fim as huc6 (compressed and tiled; aggregated are slightly larger). + - Naming convention and feature id attribute are only added to the aggregated hucs. + - HydroIDs are different for huc6 vs aggregated huc8s mostly due to forced split at huc boundaries (so long we use consistent workflow it shouldn't matter). + - Fixed known issue where sometimes an incoming stream is not included in the final selection will affect aggregate outputs. + +

## v3.0.4.1 - 2021-02-12 - [PR #261](https://github.com/NOAA-OWP/cahaba/pull/261) Updated MS Crosswalk method to address gaps in FIM. ### Changes -- Fixed typo in stream midpoint calculation in `split_flows.py` and `add_crosswalk.py`. -- `add_crosswalk.py` now restricts the MS crosswalk to NWM MS catchments. -- `add_crosswalk.py` now performs a secondary MS crosswalk selection by nearest NWM MS catchment. + - Fixed typo in stream midpoint calculation in `split_flows.py` and `add_crosswalk.py`. + - `add_crosswalk.py` now restricts the MS crosswalk to NWM MS catchments. + - `add_crosswalk.py` now performs a secondary MS crosswalk selection by nearest NWM MS catchment.

## v3.0.4.0 - 2021-02-10 - [PR #256](https://github.com/NOAA-OWP/cahaba/pull/256) diff --git a/lib/aggregate_fim_outputs.py b/lib/aggregate_fim_outputs.py index 1296fb8cc..edafd93a3 100644 --- a/lib/aggregate_fim_outputs.py +++ b/lib/aggregate_fim_outputs.py @@ -6,12 +6,15 @@ import json import rasterio from rasterio.merge import merge +import shutil +import csv from utils.shared_variables import PREP_PROJECTION def aggregate_fim_outputs(fim_out_dir): print ("aggregating outputs to HUC6 scale") + drop_folders = ['logs'] huc_list = [huc for huc in os.listdir(fim_out_dir) if huc not in drop_folders] huc6_list = [str(huc[0:6]) for huc in os.listdir(fim_out_dir) if huc not in drop_folders] @@ -21,76 +24,141 @@ def aggregate_fim_outputs(fim_out_dir): os.makedirs(os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc[0:6])), exist_ok=True) - # merge hydrotable + # original file paths + hydrotable_filename = os.path.join(fim_out_dir,huc,'hydroTable.csv') + src_filename = os.path.join(fim_out_dir,huc,'src.json') + + # aggregate file name paths aggregate_hydrotable = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc[0:6]),'hydroTable.csv') - hydrotable = pd.read_csv(os.path.join(fim_out_dir,huc,'hydroTable.csv')) + aggregate_src = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc[0:6]),f'rating_curves_{huc[0:6]}.json') - # write out hydrotable - if os.path.isfile(aggregate_hydrotable): - hydrotable.to_csv(aggregate_hydrotable,index=False, mode='a',header=False) - else: - hydrotable.to_csv(aggregate_hydrotable,index=False) + if len(huc)> 6: - del hydrotable + # open hydrotable + hydrotable = pd.read_csv(hydrotable_filename) - # merge src - aggregate_src = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc[0:6]),'src.json') - src = open(os.path.join(fim_out_dir,huc,'src.json')) - src = json.load(src) + # write/append aggregate hydrotable + if os.path.isfile(aggregate_hydrotable): + hydrotable.to_csv(aggregate_hydrotable,index=False, mode='a',header=False) + else: + hydrotable.to_csv(aggregate_hydrotable,index=False) - # write out src - if os.path.isfile(aggregate_src): - with open(aggregate_src, 'a') as outfile: - json.dump(src, outfile) - else: - with open(aggregate_src, 'w') as outfile: - json.dump(src, outfile) + del hydrotable + + # open src + src = open(src_filename) + src = json.load(src) + + # write/append aggregate src + if os.path.isfile(aggregate_src): + + with open(aggregate_src, "r+") as file: + data = json.load(file) + data.update(src) + + with open(aggregate_src, 'w') as outfile: + json.dump(data, outfile) + else: + with open(aggregate_src, 'w') as outfile: + json.dump(src, outfile) - del src + del src + + else: + shutil.copy(hydrotable_filename, aggregate_hydrotable) + shutil.copy(src_filename, aggregate_src) for huc6 in huc6_list: + + ## add feature_id to aggregate src + aggregate_hydrotable = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc6),'hydroTable.csv') + aggregate_src = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc6),f'rating_curves_{huc6}.json') + + # Open aggregate src for writing feature_ids to + src_data = {} + with open(aggregate_src) as jsonf: + src_data = json.load(jsonf) + + with open(aggregate_hydrotable) as csvf: + csvReader = csv.DictReader(csvf) + + for row in csvReader: + if row['HydroID'].lstrip('0') in src_data and 'nwm_feature_id' not in src_data[row['HydroID'].lstrip('0')]: + src_data[row['HydroID'].lstrip('0')]['nwm_feature_id'] = row['feature_id'] + + # Write src_data to JSON file + with open(aggregate_src, 'w') as jsonf: + json.dump(src_data, jsonf) + + ## aggregate rasters huc6_dir = os.path.join(fim_out_dir,'aggregate_fim_outputs',huc6) - huc6_filter = [path.startswith(huc6) for path in huc_list] - subset_huc6_list = [i for (i, v) in zip(huc_list, huc6_filter) if v] + # aggregate file paths + rem_mosaic = os.path.join(huc6_dir,f'hand_grid_{huc6}.tif') + catchment_mosaic = os.path.join(huc6_dir,f'catchments_{huc6}.tif') + + if huc6 not in huc_list: + + huc6_filter = [path.startswith(huc6) for path in huc_list] + subset_huc6_list = [i for (i, v) in zip(huc_list, huc6_filter) if v] + + # aggregate and mosaic rem + rem_list = [os.path.join(fim_out_dir,huc,'rem_zeroed_masked.tif') for huc in subset_huc6_list] + + if len(rem_list) > 1: + + rem_files_to_mosaic = [] + + for rem in rem_list: - # aggregate and mosaic rem - rem_list = [os.path.join(fim_out_dir,huc,'rem_zeroed_masked.tif') for huc in subset_huc6_list] + rem_src = rasterio.open(rem) + rem_files_to_mosaic.append(rem_src) - rem_files_to_mosaic = [] + mosaic, out_trans = merge(rem_files_to_mosaic) + out_meta = rem_src.meta.copy() + out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION,'compress': 'lzw'}) - for rem in rem_list: - rem_src = rasterio.open(rem) - rem_files_to_mosaic.append(rem_src) + with rasterio.open(rem_mosaic, "w", **out_meta, tiled=True, blockxsize=256, blockysize=256, BIGTIFF='YES') as dest: + dest.write(mosaic) - mosaic, out_trans = merge(rem_files_to_mosaic) - out_meta = rem_src.meta.copy() - out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION}) + del rem_files_to_mosaic,rem_src,out_meta,mosaic - rem_mosaic = os.path.join(huc6_dir,'rem_zeroed_masked.tif') - with rasterio.open(rem_mosaic, "w", **out_meta) as dest: - dest.write(mosaic) + elif len(rem_list)==1: - del rem_files_to_mosaic,rem_src,out_meta,mosaic + shutil.copy(rem_list[0], rem_mosaic) - # aggregate and mosaic catchments - catchment_list = [os.path.join(fim_out_dir,huc,'gw_catchments_reaches_filtered_addedAttributes.tif') for huc in subset_huc6_list] + # aggregate and mosaic catchments + catchment_list = [os.path.join(fim_out_dir,huc,'gw_catchments_reaches_filtered_addedAttributes.tif') for huc in subset_huc6_list] - cat_files_to_mosaic = [] + if len(catchment_list) > 1: - for cat in catchment_list: - cat_src = rasterio.open(cat) - cat_files_to_mosaic.append(cat_src) + cat_files_to_mosaic = [] - mosaic, out_trans = merge(cat_files_to_mosaic) - out_meta = cat_src.meta.copy() - out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION}) + for cat in catchment_list: + cat_src = rasterio.open(cat) + cat_files_to_mosaic.append(cat_src) - catchment_mosaic = os.path.join(huc6_dir,'gw_catchments_reaches_filtered_addedAttributes.tif') - with rasterio.open(catchment_mosaic, "w", **out_meta) as dest: - dest.write(mosaic) + mosaic, out_trans = merge(cat_files_to_mosaic) + out_meta = cat_src.meta.copy() + + out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION,'compress': 'lzw'}) + + with rasterio.open(catchment_mosaic, "w", **out_meta, tiled=True, blockxsize=256, blockysize=256, BIGTIFF='YES') as dest: + dest.write(mosaic) + + del cat_files_to_mosaic,cat_src,out_meta,mosaic + + elif len(catchment_list)==1: + + shutil.copy(catchment_list[0], catchment_mosaic) + + else: + # original file paths + rem_filename = os.path.join(fim_out_dir,huc6,'rem_zeroed_masked.tif') + catchment_filename = os.path.join(fim_out_dir,huc6,'gw_catchments_reaches_filtered_addedAttributes.tif') - del cat_files_to_mosaic,cat_src,out_meta,mosaic + shutil.copy(rem_filename, rem_mosaic) + shutil.copy(catchment_filename, catchment_mosaic) if __name__ == '__main__': diff --git a/lib/output_cleanup.py b/lib/output_cleanup.py index f4ea3ea19..b253598cf 100755 --- a/lib/output_cleanup.py +++ b/lib/output_cleanup.py @@ -50,26 +50,6 @@ def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_prod # Step 1, keep only files that Viz needs whitelist_directory(output_folder_path, viz_whitelist, additional_whitelist) - # Step 2, add feature_id to src.json and rename file - # Open src.json for writing feature_ids to - src_data = {} - with open(os.path.join(output_folder_path, 'src.json')) as jsonf: - src_data = json.load(jsonf) - - with open(os.path.join(output_folder_path, 'hydroTable.csv')) as csvf: - csvReader = csv.DictReader(csvf) - for row in csvReader: - if row['HydroID'].lstrip('0') in src_data and 'nwm_feature_id' not in src_data[row['HydroID'].lstrip('0')]: - src_data[row['HydroID'].lstrip('0')]['nwm_feature_id'] = row['feature_id'] - - # Write src_data to JSON file - with open(os.path.join(output_folder_path, f'rating_curves_{huc_number}.json'), 'w') as jsonf: - json.dump(src_data, jsonf) - - # Step 3, copy files to desired names - shutil.copy(os.path.join(output_folder_path, 'rem_zeroed_masked.tif'), os.path.join(output_folder_path, f'hand_grid_{huc_number}.tif')) - shutil.copy(os.path.join(output_folder_path, 'gw_catchments_reaches_filtered_addedAttributes.tif'), os.path.join(output_folder_path, f'catchments_{huc_number}.tif')) - def whitelist_directory(directory_path, whitelist, additional_whitelist): # Add any additional files to the whitelist that the user wanted to keep if additional_whitelist: From d615a6ca459e5de6d26c17bfa83b234c01dab8a9 Mon Sep 17 00:00:00 2001 From: RyanSpies-NOAA Date: Fri, 12 Feb 2021 15:59:00 -0600 Subject: [PATCH 019/359] Modified rem.py with a new function to output HAND reference elev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Function "make_catchment_hydroid_dict" creates a df of pixel catchment ids and overlapping hydroids - Merge hydroid df and thalweg minimum elevation df - Produces new output containing all catchment ids and min thalweg elevation value → “hand_ref_elev_table.csv” - Overwrites the demDerived_reaches_split.gpk layer --> adding additional attribute: “Min_Thal_Elev_meters” to view the elevation value for each hydroid --- CHANGELOG.md | 12 +++++ lib/rem.py | 106 ++++++++++++++++++++++++++++++++++++--------- lib/run_by_unit.sh | 2 +- 3 files changed, 99 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65cdeb4f9..7b469c5c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,18 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.4.3 - 2021-02-12 - [PR #254](https://github.com/NOAA-OWP/cahaba/pull/254) + +Modified `rem.py` with a new function to output HAND reference elev. + +### Changes + + - Function `make_catchment_hydroid_dict` creates a df of pixel catchment ids and overlapping hydroids. + - Merge hydroid df and thalweg minimum elevation df. + - Produces new output containing all catchment ids and min thalweg elevation value named `hand_ref_elev_table.csv`. + - Overwrites the `demDerived_reaches_split.gpk` layer by adding additional attribute `Min_Thal_Elev_meters` to view the elevation value for each hydroid. + +

## v3.0.4.2 - 2021-02-12 - [PR #255](https://github.com/NOAA-OWP/cahaba/pull/255) Addresses issue when running on HUC6 scale. diff --git a/lib/rem.py b/lib/rem.py index aa7faeafa..411380070 100755 --- a/lib/rem.py +++ b/lib/rem.py @@ -5,13 +5,16 @@ import numpy as np import argparse import os +import pandas as pd from osgeo import ogr, gdal +import geopandas as gpd +from utils.shared_functions import getDriver -def rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster): +def rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster, hydroid_fileName, hand_ref_elev_fileName, dem_reaches_filename): """ Calculates REM/HAND/Detrended DEM - + Parameters ---------- dem_fileName : str @@ -20,56 +23,112 @@ def rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raste File name of stream pixel watersheds raster. rem_fileName : str File name of output relative elevation raster. + hydroid_fileName : str + File name of the hydroid raster (i.e. gw_catchments_reaches.tif) + hand_ref_elev_fileName + File name of the output csv containing list of hydroid values and HAND zero/reference elev + dem_reaches_filename + File name of the reaches layer to populate HAND elevation attribute values and overwrite as output """ - + # ------------------------------------------- Get catchment_hydroid_dict --------------------------------------------------- # + # The following creates a dictionary of the catchment ids (key) and their hydroid along the thalweg (value). + # This is needed to produce a HAND zero reference elevation by hydroid dataframe (helpful for evaluating rating curves & bathy properties) + @njit + def make_catchment_hydroid_dict(flat_value_raster, catchment_hydroid_dict, flat_catchments, thalweg_window): + + for i,cm in enumerate(flat_catchments): + if thalweg_window[i] == 1: # Only allow reference hydroid to be within thalweg. + catchment_hydroid_dict[cm] = flat_value_raster[i] + return(catchment_hydroid_dict) + + # Open the masked gw_catchments_pixels_masked, hydroid_raster, and dem_thalwegCond_masked. + gw_catchments_pixels_masked_object = rasterio.open(pixel_watersheds_fileName) + hydroid_pixels_object = rasterio.open(hydroid_fileName) + thalweg_raster_object = rasterio.open(thalweg_raster) + + # Specify raster object metadata. + meta = hydroid_pixels_object.meta.copy() + meta['tiled'], meta['compress'] = True, 'lzw' + + # -- Create catchment_hydroid_dict -- # + catchment_hydroid_dict = typed.Dict.empty(types.int64,types.int64) # Initialize an empty dictionary to store the catchment hydroid. + # Update catchment_hydroid_dict with each pixel sheds hydroid. + # Creating dictionary containing catchment ids (key) and corresponding hydroid within the thalweg... + for ji, window in hydroid_pixels_object.block_windows(1): # Iterate over windows, using dem_rasterio_object as template. + hydroid_window = hydroid_pixels_object.read(1,window=window).ravel() # Define hydroid_window. + catchments_window = gw_catchments_pixels_masked_object.read(1,window=window).ravel() # Define catchments_window. + thalweg_window = thalweg_raster_object.read(1, window=window).ravel() # Define cost_window. + + # Call numba-optimized function to update catchment_hydroid_dict with pixel sheds overlapping hydroid. + catchment_hydroid_dict = make_catchment_hydroid_dict(hydroid_window, catchment_hydroid_dict, catchments_window, thalweg_window) + + hydroid_pixels_object.close() + gw_catchments_pixels_masked_object.close() + thalweg_raster_object.close() # ------------------------------------------- Get catchment_min_dict --------------------------------------------------- # # The following creates a dictionary of the catchment ids (key) and their elevation along the thalweg (value). @njit def make_catchment_min_dict(flat_dem, catchment_min_dict, flat_catchments, thalweg_window): - + for i,cm in enumerate(flat_catchments): if thalweg_window[i] == 1: # Only allow reference elevation to be within thalweg. # If the catchment really exists in the dictionary, compare elevation values. if (cm in catchment_min_dict): if (flat_dem[i] < catchment_min_dict[cm]): # If the flat_dem's elevation value is less than the catchment_min_dict min, update the catchment_min_dict min. - catchment_min_dict[cm] = flat_dem[i] + catchment_min_dict[cm] = flat_dem[i] else: - catchment_min_dict[cm] = flat_dem[i] + catchment_min_dict[cm] = flat_dem[i] return(catchment_min_dict) # Open the masked gw_catchments_pixels_masked and dem_thalwegCond_masked. gw_catchments_pixels_masked_object = rasterio.open(pixel_watersheds_fileName) dem_thalwegCond_masked_object = rasterio.open(dem_fileName) thalweg_raster_object = rasterio.open(thalweg_raster) - + # Specify raster object metadata. meta = dem_thalwegCond_masked_object.meta.copy() meta['tiled'], meta['compress'] = True, 'lzw' - + # -- Create catchment_min_dict -- # - catchment_min_dict = typed.Dict.empty(types.int32,types.float32) # Initialize an empty dictionary to store the catchment minimums. + catchment_min_dict = typed.Dict.empty(types.int64,types.float32) # Initialize an empty dictionary to store the catchment minimums. # Update catchment_min_dict with pixel sheds minimum. + # Creating dictionary containing catchment ids (key) and corresponding elevation within the thalweg (value)... for ji, window in dem_thalwegCond_masked_object.block_windows(1): # Iterate over windows, using dem_rasterio_object as template. dem_window = dem_thalwegCond_masked_object.read(1,window=window).ravel() # Define dem_window. catchments_window = gw_catchments_pixels_masked_object.read(1,window=window).ravel() # Define catchments_window. thalweg_window = thalweg_raster_object.read(1, window=window).ravel() # Define cost_window. - + # Call numba-optimized function to update catchment_min_dict with pixel sheds minimum. catchment_min_dict = make_catchment_min_dict(dem_window, catchment_min_dict, catchments_window, thalweg_window) dem_thalwegCond_masked_object.close() gw_catchments_pixels_masked_object.close() thalweg_raster_object.close() + +############################################### + # Merge and export dictionary to to_csv + catchment_min_dict_df = pd.DataFrame.from_dict(catchment_min_dict, orient='index') # convert dict to dataframe + catchment_min_dict_df.columns = ['Min_Thal_Elev_meters'] + catchment_hydroid_dict_df = pd.DataFrame.from_dict(catchment_hydroid_dict, orient='index') # convert dict to dataframe + catchment_hydroid_dict_df.columns = ['HydroID'] + merge_df = catchment_hydroid_dict_df.merge(catchment_min_dict_df, left_index=True, right_index=True) + merge_df.index.name = 'pixelcatch_id' + merge_df.to_csv(hand_ref_elev_fileName,index=True) # export dataframe to csv file + + # Merge the HAND reference elvation by HydroID dataframe with the demDerived_reaches layer (add new layer attribute) + merge_df = merge_df.groupby(['HydroID']).median() # median value of all Min_Thal_Elev_meters for pixel catchments in each HydroID reach + input_reaches = gpd.read_file(dem_reaches_filename) + input_reaches = input_reaches.merge(merge_df, on='HydroID') # merge dataframes by HydroID variable + input_reaches.to_file(dem_reaches_filename,driver=getDriver(dem_reaches_filename),index=False) # ------------------------------------------------------------------------------------------------------------------------ # - - + + # ------------------------------------------- Produce relative elevation model ------------------------------------------- # @njit def calculate_rem(flat_dem,catchmentMinDict,flat_catchments,ndv): - rem_window = np.zeros(len(flat_dem),dtype=np.float32) for i,cm in enumerate(flat_catchments): if cm in catchmentMinDict: @@ -83,19 +142,20 @@ def calculate_rem(flat_dem,catchmentMinDict,flat_catchments,ndv): rem_rasterio_object = rasterio.open(rem_fileName,'w',**meta) # Open rem_rasterio_object for writing to rem_fileName. pixel_catchments_rasterio_object = rasterio.open(pixel_watersheds_fileName) # Open pixel_catchments_rasterio_object dem_rasterio_object = rasterio.open(dem_fileName) - + + # Producing relative elevation model raster for ji, window in dem_rasterio_object.block_windows(1): dem_window = dem_rasterio_object.read(1,window=window) window_shape = dem_window.shape dem_window = dem_window.ravel() catchments_window = pixel_catchments_rasterio_object.read(1,window=window).ravel() - + rem_window = calculate_rem(dem_window, catchment_min_dict, catchments_window, meta['nodata']) rem_window = rem_window.reshape(window_shape).astype(np.float32) - + rem_rasterio_object.write(rem_window, window=window, indexes=1) - + dem_rasterio_object.close() pixel_catchments_rasterio_object.close() rem_rasterio_object.close() @@ -110,8 +170,11 @@ def calculate_rem(flat_dem,catchmentMinDict,flat_catchments,ndv): parser.add_argument('-w','--watersheds',help='Pixel based watersheds raster to use within project path',required=True) parser.add_argument('-t','--thalweg-raster',help='A binary raster representing the thalweg. 1 for thalweg, 0 for non-thalweg.',required=True) parser.add_argument('-o','--rem',help='Output REM raster',required=True) - - + parser.add_argument('-i','--hydroid', help='HydroID raster to use within project path', required=True) + parser.add_argument('-r','--hand_ref_elev_table',help='Output table of HAND reference elev by catchment',required=True) + parser.add_argument('-s','--dem_reaches_in_out',help='DEM derived reach layer to join HAND reference elevation attribute',required=True) + + # extract to dictionary args = vars(parser.parse_args()) @@ -120,5 +183,8 @@ def calculate_rem(flat_dem,catchmentMinDict,flat_catchments,ndv): pixel_watersheds_fileName = args['watersheds'] rem_fileName = args['rem'] thalweg_raster = args['thalweg_raster'] + hydroid_fileName = args['hydroid'] + hand_ref_elev_fileName = args['hand_ref_elev_table'] + dem_reaches_filename = args['dem_reaches_in_out'] - rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster) + rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster, hydroid_fileName, hand_ref_elev_fileName, dem_reaches_filename) diff --git a/lib/run_by_unit.sh b/lib/run_by_unit.sh index 9f34bc222..3501bfdbd 100755 --- a/lib/run_by_unit.sh +++ b/lib/run_by_unit.sh @@ -321,7 +321,7 @@ echo -e $startDiv"D8 REM $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/rem.tif ] && \ -$libDir/rem.py -d $dem_thalwegCond -w $outputHucDataDir/gw_catchments_pixels.tif -o $outputHucDataDir/rem.tif -t $demDerived_streamPixels +$libDir/rem.py -d $dem_thalwegCond -w $outputHucDataDir/gw_catchments_pixels.tif -o $outputHucDataDir/rem.tif -t $demDerived_streamPixels -i $outputHucDataDir/gw_catchments_reaches.tif -r $outputHucDataDir/hand_ref_elev_table.csv -s $outputHucDataDir/demDerived_reaches_split.gpkg Tcount ## DINF DISTANCE DOWN ## From 0412e412c096edb8e773b8a9be18866ecae56496 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Fri, 19 Feb 2021 12:29:17 -0600 Subject: [PATCH 020/359] Rating curves for short stream segments are replaced with rating curves from upstream/downstream segments. - Short stream segments are identified and are reassigned the channel geometry from upstream/downstream segment. - fossid renamed to fimid and the attribute's starting value is now 1000 to avoid HydroIDs with leading zeroes. - Addresses issue where HydroIDs were not included in final hydrotable. - Added import sys to inundation.py (missing from previous feature branch). - Variable names and general workflow are cleaned up. This resolves #100. --- CHANGELOG.md | 13 +++ config/params_calibrated.env | 2 + config/params_template.env | 2 + lib/acquire_and_preprocess_inputs.py | 26 ++--- lib/add_crosswalk.py | 103 +++++++++++++++++--- lib/build_stream_traversal.py | 1 - lib/filter_catchments_and_add_attributes.py | 13 ++- lib/fr_to_ms_raster_mask.py | 48 ++++----- lib/output_cleanup.py | 3 +- lib/run_by_unit.sh | 6 +- lib/split_flows.py | 29 +++--- lib/utils/shared_variables.py | 10 +- tests/inundation.py | 5 +- 13 files changed, 176 insertions(+), 85 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b469c5c2..cdcc09070 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,19 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.4.4 - 2021-02-19 - [PR #266](https://github.com/NOAA-OWP/cahaba/pull/266) + +Rating curves for short stream segments are replaced with rating curves from upstream/downstream segments. + +### Changes + + - Short stream segments are identified and are reassigned the channel geometry from upstream/downstream segment. + - `fossid` renamed to `fimid` and the attribute's starting value is now 1000 to avoid HydroIDs with leading zeroes. + - Addresses issue where HydroIDs were not included in final hydrotable. + - Added `import sys` to `inundation.py` (missing from previous feature branch). + - Variable names and general workflow are cleaned up. + +

## v3.0.4.3 - 2021-02-12 - [PR #254](https://github.com/NOAA-OWP/cahaba/pull/254) Modified `rem.py` with a new function to output HAND reference elev. diff --git a/config/params_calibrated.env b/config/params_calibrated.env index ff0da17b0..c5b040215 100644 --- a/config/params_calibrated.env +++ b/config/params_calibrated.env @@ -16,6 +16,8 @@ export stage_min_meters=0 export stage_interval_meters=0.3048 export stage_max_meters=25 export slope_min=0.001 +export min_catchment_area=0.25 +export min_stream_length=0.5 #### computational parameters #### export ncores_gw=1 # mpi number of cores for gagewatershed diff --git a/config/params_template.env b/config/params_template.env index b250de4bf..d6c9e2865 100644 --- a/config/params_template.env +++ b/config/params_template.env @@ -16,6 +16,8 @@ export stage_min_meters=0 export stage_interval_meters=0.3048 export stage_max_meters=25 export slope_min=0.001 +export min_catchment_area=0.25 +export min_stream_length=0.5 #### computational parameters #### export ncores_gw=1 # mpi number of cores for gagewatershed diff --git a/lib/acquire_and_preprocess_inputs.py b/lib/acquire_and_preprocess_inputs.py index bace41e19..30ce7974a 100755 --- a/lib/acquire_and_preprocess_inputs.py +++ b/lib/acquire_and_preprocess_inputs.py @@ -6,7 +6,7 @@ import sys import shutil from multiprocessing import Pool -import geopandas as gp +import geopandas as gpd from urllib.error import HTTPError from tqdm import tqdm @@ -18,7 +18,7 @@ NHD_VECTOR_EXTRACTION_SUFFIX, PREP_PROJECTION, WBD_NATIONAL_URL, - FOSS_ID, + FIM_ID, OVERWRITE_WBD, OVERWRITE_NHD, OVERWRITE_ALL) @@ -32,7 +32,7 @@ def subset_wbd_to_nwm_domain(wbd,nwm_file_to_use): - intersecting_indices = [not (gp.read_file(nwm_file_to_use,mask=b).empty) for b in wbd.geometry] + intersecting_indices = [not (gpd.read_file(nwm_file_to_use,mask=b).empty) for b in wbd.geometry] return(wbd[intersecting_indices]) @@ -71,16 +71,16 @@ def pull_and_prepare_wbd(path_to_saved_data_parent_dir,nwm_dir_name,nwm_file_to_ procs_list, wbd_gpkg_list = [], [] multilayer_wbd_geopackage = os.path.join(wbd_directory, 'WBD_National.gpkg') - # Add fossid to HU8, project, and convert to geopackage. Code block from Brian Avant. + # Add fimid to HU8, project, and convert to geopackage. if os.path.isfile(multilayer_wbd_geopackage): os.remove(multilayer_wbd_geopackage) print("Making National WBD GPKG...") print("\tWBDHU8") - wbd_hu8 = gp.read_file(wbd_gdb_path, layer='WBDHU8') + wbd_hu8 = gpd.read_file(wbd_gdb_path, layer='WBDHU8') wbd_hu8 = wbd_hu8.rename(columns={'huc8':'HUC8'}) # rename column to caps wbd_hu8 = wbd_hu8.sort_values('HUC8') - fossids = [str(item).zfill(4) for item in list(range(1, 1 + len(wbd_hu8)))] - wbd_hu8[FOSS_ID] = fossids + fimids = [str(item).zfill(4) for item in list(range(1000, 1000 + len(wbd_hu8)))] + wbd_hu8[FIM_ID] = fimids wbd_hu8 = wbd_hu8.to_crs(PREP_PROJECTION) # Project. wbd_hu8 = subset_wbd_to_nwm_domain(wbd_hu8,nwm_file_to_use) wbd_hu8.geometry = wbd_hu8.buffer(0) @@ -93,7 +93,7 @@ def pull_and_prepare_wbd(path_to_saved_data_parent_dir,nwm_dir_name,nwm_file_to_ for wbd_layer_num in ['4', '6']: wbd_layer = 'WBDHU' + wbd_layer_num print("\t{}".format(wbd_layer)) - wbd = gp.read_file(wbd_gdb_path,layer=wbd_layer) + wbd = gpd.read_file(wbd_gdb_path,layer=wbd_layer) wbd = wbd.to_crs(PREP_PROJECTION) wbd = wbd.rename(columns={'huc'+wbd_layer_num : 'HUC' + wbd_layer_num}) wbd = subset_wbd_to_nwm_domain(wbd,nwm_file_to_use) @@ -204,15 +204,15 @@ def pull_and_prepare_nhd_data(args): huc = os.path.split(nhd_vector_extraction_parent)[1] # Parse HUC. os.system("7za x {nhd_vector_extraction_path} -o{nhd_vector_extraction_parent}".format(nhd_vector_extraction_path=nhd_vector_extraction_path, nhd_vector_extraction_parent=nhd_vector_extraction_parent)) # extract input stream network - nhd = gp.read_file(nhd_gdb,layer='NHDPlusBurnLineEvent') + nhd = gpd.read_file(nhd_gdb,layer='NHDPlusBurnLineEvent') nhd = nhd.to_crs(PREP_PROJECTION) nhd.to_file(os.path.join(nhd_vector_extraction_parent, 'NHDPlusBurnLineEvent' + huc + '.gpkg'),driver='GPKG') # extract flowlines for FType attributes - nhd = gp.read_file(nhd_gdb,layer='NHDFlowline') + nhd = gpd.read_file(nhd_gdb,layer='NHDFlowline') nhd = nhd.to_crs(PREP_PROJECTION) nhd.to_file(os.path.join(nhd_vector_extraction_parent, 'NHDFlowline' + huc + '.gpkg'),driver='GPKG') # extract attributes - nhd = gp.read_file(nhd_gdb,layer='NHDPlusFlowLineVAA') + nhd = gpd.read_file(nhd_gdb,layer='NHDPlusFlowLineVAA') nhd.to_file(os.path.join(nhd_vector_extraction_parent, 'NHDPlusFlowLineVAA' + huc + '.gpkg'),driver='GPKG') # -- Project and convert NHDPlusBurnLineEvent and NHDPlusFlowLineVAA vectors to geopackage -- # #for nhd_layer in ['NHDPlusBurnLineEvent', 'NHDPlusFlowlineVAA']: @@ -245,7 +245,7 @@ def build_huc_list_files(path_to_saved_data_parent_dir, wbd_directory): huc_gpkg = 'WBDHU8' # The WBDHU4 are handled by the nhd_plus_raster_dir name. # Open geopackage. - wbd = gp.read_file(full_huc_gpkg, layer=huc_gpkg) + wbd = gpd.read_file(full_huc_gpkg, layer=huc_gpkg) # Loop through entries and compare against the huc4_list to get available HUCs within the geopackage domain. for index, row in tqdm(wbd.iterrows(),total=len(wbd)): @@ -253,7 +253,7 @@ def build_huc_list_files(path_to_saved_data_parent_dir, wbd_directory): huc_mask = wbd.loc[wbd[str("HUC" + huc_gpkg[-1])]==huc].geometry burnline = os.path.join(nhd_plus_vector_dir, huc[0:4], 'NHDPlusBurnLineEvent' + huc[0:4] + '.gpkg') if os.path.exists(burnline): - nhd_test = len(gp.read_file(burnline, mask = huc_mask)) # this is slow, iterates through 2000+ HUC8s + nhd_test = len(gpd.read_file(burnline, mask = huc_mask)) # this is slow, iterates through 2000+ HUC8s # Append huc to huc8 list. if (str(huc[:4]) in huc4_list) & (nhd_test>0): huc8_list.append(huc) diff --git a/lib/add_crosswalk.py b/lib/add_crosswalk.py index d3f052c3a..a0e57c7de 100755 --- a/lib/add_crosswalk.py +++ b/lib/add_crosswalk.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import os import geopandas as gpd import pandas as pd from numpy import unique @@ -8,13 +9,16 @@ import argparse import sys from utils.shared_functions import getDriver +from utils.shared_variables import FIM_ID -def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,calibration_mode=False): +def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,small_segments_filename,calibration_mode=False): input_catchments = gpd.read_file(input_catchments_fileName) input_flows = gpd.read_file(input_flows_fileName) input_huc = gpd.read_file(input_huc_fileName) input_nwmflows = gpd.read_file(input_nwmflows_fileName) + min_catchment_area = float(os.environ['min_catchment_area']) + min_stream_length = float(os.environ['min_stream_length']) if extent == 'FR': ## crosswalk using majority catchment method @@ -33,15 +37,15 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f relevant_input_nwmflows = input_nwmflows[input_nwmflows['feature_id'].isin(input_majorities['feature_id'])] relevant_input_nwmflows = relevant_input_nwmflows.filter(items=['feature_id','order_']) - if calibration_mode == False: - if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) - output_catchments = input_catchments.merge(input_majorities[['HydroID','feature_id']],on='HydroID') - output_catchments = output_catchments.merge(relevant_input_nwmflows[['order_','feature_id']],on='feature_id') + if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) + output_catchments = input_catchments.merge(input_majorities[['HydroID','feature_id']],on='HydroID') + output_catchments = output_catchments.merge(relevant_input_nwmflows[['order_','feature_id']],on='feature_id') if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) output_flows = input_flows.merge(input_majorities[['HydroID','feature_id']],on='HydroID') if output_flows.HydroID.dtype != 'int': output_flows.HydroID = output_flows.HydroID.astype(int) output_flows = output_flows.merge(relevant_input_nwmflows[['order_','feature_id']],on='feature_id') + output_flows = output_flows.merge(output_catchments.filter(items=['HydroID','areasqkm']),on='HydroID') elif extent == 'MS': ## crosswalk using stream segment midpoint method @@ -89,12 +93,12 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f print ("No relevant streams within HUC boundaries.") sys.exit(0) - if calibration_mode == False: - if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) - output_catchments = input_catchments.merge(crosswalk,on='HydroID') + if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) + output_catchments = input_catchments.merge(crosswalk,on='HydroID') if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) output_flows = input_flows.merge(crosswalk,on='HydroID') + output_flows = output_flows.merge(output_catchments.filter(items=['HydroID','areasqkm']),on='HydroID') # read in manning's n values if calibration_mode == False: @@ -108,6 +112,57 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f output_flows['ManningN'] = output_flows['order_'].astype(str).map(mannings_dict) + if output_flows.NextDownID.dtype != 'int': output_flows.NextDownID = output_flows.NextDownID.astype(int) + + # Adjust short model reach rating curves + print("Adjusting model reach rating curves") + sml_segs = pd.DataFrame() + + # replace small segment geometry with neighboring stream + for stream_index in output_flows.index: + + if output_flows["areasqkm"][stream_index] < min_catchment_area and output_flows["LengthKm"][stream_index] < min_stream_length and output_flows["LakeID"][stream_index] < 0: + + short_id = output_flows['HydroID'][stream_index] + to_node = output_flows['To_Node'][stream_index] + from_node = output_flows['From_Node'][stream_index] + + # multiple upstream segments + if len(output_flows.loc[output_flows['NextDownID'] == short_id]['HydroID']) > 1: + max_order = max(output_flows.loc[output_flows['NextDownID'] == short_id]['order_']) # drainage area would be better than stream order but we would need to calculate + + if len(output_flows.loc[(output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)]['HydroID']) == 1: + update_id = output_flows.loc[(output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)]['HydroID'].item() + + else: + update_id = output_flows.loc[(output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)]['HydroID'].values[0] # get the first one (same stream order, without drainage area info it is hard to know which is the main channel) + + # single upstream segments + elif len(output_flows.loc[output_flows['NextDownID'] == short_id]['HydroID']) == 1: + update_id = output_flows.loc[output_flows.To_Node==from_node]['HydroID'].item() + + # no upstream segments; multiple downstream segments + elif len(output_flows.loc[output_flows.From_Node==to_node]['HydroID']) > 1: + max_order = max(output_flows.loc[output_flows.From_Node==to_node]['HydroID']['order_']) # drainage area would be better than stream order but we would need to calculate + + if len(output_flows.loc[(output_flows['NextDownID'] == short_id) & (output_flows['order_'] == max_order)]['HydroID']) == 1: + update_id = output_flows.loc[(output_flows.From_Node==to_node) & (output_flows['order_'] == max_order)]['HydroID'].item() + + else: + update_id = output_flows.loc[(output_flows.From_Node==to_node) & (output_flows['order_'] == max_order)]['HydroID'].values[0] # get the first one (same stream order, without drainage area info it is hard to know which is the main channel) + + # no upstream segments; single downstream segment + elif len(output_flows.loc[output_flows.From_Node==to_node]['HydroID']) == 1: + update_id = output_flows.loc[output_flows.From_Node==to_node]['HydroID'].item() + + else: + update_id = output_flows.loc[output_flows.HydroID==short_id]['HydroID'].item() + + str_order = output_flows.loc[output_flows.HydroID==short_id]['order_'].item() + sml_segs = sml_segs.append({'short_id':short_id, 'update_id':update_id, 'str_order':str_order}, ignore_index=True) + + print("Number of short reaches [{} < {} and {} < {}] = {}".format("areasqkm", min_catchment_area, "LengthKm", min_stream_length, len(sml_segs))) + # calculate src_full input_src_base = pd.read_csv(input_srcbase_fileName, dtype= object) if input_src_base.CatchId.dtype != 'int': input_src_base.CatchId = input_src_base.CatchId.astype(int) @@ -131,6 +186,21 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f output_src = input_src_base.drop(columns=['CatchId']) if output_src.HydroID.dtype != 'int': output_src.HydroID = output_src.HydroID.astype(int) + # update rating curves + if len(sml_segs) > 0: + + sml_segs.to_csv(small_segments_filename,index=False) + print("Update rating curves for short reaches.") + + for index, segment in sml_segs.iterrows(): + + short_id = segment[0] + update_id= segment[1] + new_values = output_src.loc[output_src['HydroID'] == update_id][['Stage', 'Discharge (m3s-1)']] + + for src_index, src_stage in new_values.iterrows(): + output_src.loc[(output_src['HydroID']== short_id) & (output_src['Stage']== src_stage[0]),['Discharge (m3s-1)']] = src_stage[1] + if extent == 'FR': output_src = output_src.merge(input_majorities[['HydroID','feature_id']],on='HydroID') elif extent == 'MS': @@ -142,22 +212,21 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f # make hydroTable output_hydro_table = output_src.loc[:,['HydroID','feature_id','Stage','Discharge (m3s-1)']] output_hydro_table.rename(columns={'Stage' : 'stage','Discharge (m3s-1)':'discharge_cms'},inplace=True) + if output_hydro_table.HydroID.dtype != 'str': output_hydro_table.HydroID = output_hydro_table.HydroID.astype(str) - output_hydro_table['HydroID'] = output_hydro_table.HydroID.str.zfill(8) - output_hydro_table['fossid'] = output_hydro_table.loc[:,'HydroID'].apply(lambda x : str(x)[0:4]) - if input_huc.fossid.dtype != 'str': input_huc.fossid = input_huc.fossid.astype(str) + output_hydro_table[FIM_ID] = output_hydro_table.loc[:,'HydroID'].apply(lambda x : str(x)[0:4]) + + if input_huc[FIM_ID].dtype != 'str': input_huc[FIM_ID] = input_huc[FIM_ID].astype(str) + output_hydro_table = output_hydro_table.merge(input_huc.loc[:,[FIM_ID,'HUC8']],how='left',on=FIM_ID) - output_hydro_table = output_hydro_table.merge(input_huc.loc[:,['fossid','HUC8']],how='left',on='fossid') if output_flows.HydroID.dtype != 'str': output_flows.HydroID = output_flows.HydroID.astype(str) - output_flows['HydroID'] = output_flows.HydroID.str.zfill(8) output_hydro_table = output_hydro_table.merge(output_flows.loc[:,['HydroID','LakeID']],how='left',on='HydroID') output_hydro_table['LakeID'] = output_hydro_table['LakeID'].astype(int) output_hydro_table = output_hydro_table.rename(columns={'HUC8':'HUC'}) if output_hydro_table.HUC.dtype != 'str': output_hydro_table.HUC = output_hydro_table.HUC.astype(str) - output_hydro_table.HUC = output_hydro_table.HUC.str.zfill(8) - output_hydro_table.drop(columns='fossid',inplace=True) + output_hydro_table.drop(columns=FIM_ID,inplace=True) if output_hydro_table.feature_id.dtype != 'int': output_hydro_table.feature_id = output_hydro_table.feature_id.astype(int) if output_hydro_table.feature_id.dtype != 'str': output_hydro_table.feature_id = output_hydro_table.feature_id.astype(str) @@ -207,6 +276,7 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f parser.add_argument('-m','--mannings-n',help='Mannings n. Accepts single parameter set or list of parameter set in calibration mode. Currently input as csv.',required=True) parser.add_argument('-z','--input-nwmcat-fileName',help='NWM catchment polygon',required=True) parser.add_argument('-p','--extent',help='MS or FR extent',required=True) + parser.add_argument('-k','--small-segments-filename',help='output list of short segments',required=True) parser.add_argument('-c','--calibration-mode',help='Mannings calibration flag',required=False,action='store_true') args = vars(parser.parse_args()) @@ -226,6 +296,7 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f mannings_n = args['mannings_n'] input_nwmcat_fileName = args['input_nwmcat_fileName'] extent = args['extent'] + small_segments_filename = args['small_segments_filename'] calibration_mode = args['calibration_mode'] - add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,calibration_mode) + add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,small_segments_filename,calibration_mode) diff --git a/lib/build_stream_traversal.py b/lib/build_stream_traversal.py index daca019f2..672fe8a01 100644 --- a/lib/build_stream_traversal.py +++ b/lib/build_stream_traversal.py @@ -7,7 +7,6 @@ hydro_id = name of ID column (string) ''' import sys -import datetime import pandas as pd import argparse import geopandas as gpd diff --git a/lib/filter_catchments_and_add_attributes.py b/lib/filter_catchments_and_add_attributes.py index ad9e6e543..104bc4275 100755 --- a/lib/filter_catchments_and_add_attributes.py +++ b/lib/filter_catchments_and_add_attributes.py @@ -5,6 +5,7 @@ import numpy as np import argparse import sys +from utils.shared_variables import FIM_ID input_catchments_fileName = sys.argv[1] input_flows_fileName = sys.argv[2] @@ -16,30 +17,28 @@ input_catchments = gpd.read_file(input_catchments_fileName) wbd = gpd.read_file(wbd_fileName) input_flows = gpd.read_file(input_flows_fileName) -# must drop leading zeroes -select_flows = tuple(map(str,map(int,wbd[wbd.HUC8.str.contains(hucCode)].fossid))) + +# filter segments within huc boundary +select_flows = tuple(map(str,map(int,wbd[wbd.HUC8.str.contains(hucCode)][FIM_ID]))) if input_flows.HydroID.dtype != 'str': input_flows.HydroID = input_flows.HydroID.astype(str) output_flows = input_flows[input_flows.HydroID.str.startswith(select_flows)].copy() if output_flows.HydroID.dtype != 'int': output_flows.HydroID = output_flows.HydroID.astype(int) if len(output_flows) > 0: + # merges input flows attributes and filters hydroids if input_catchments.HydroID.dtype != 'int': input_catchments.HydroID = input_catchments.HydroID.astype(int) output_catchments = input_catchments.merge(output_flows.drop(['geometry'],axis=1),on='HydroID') # filter out smaller duplicate features duplicateFeatures = np.where(np.bincount(output_catchments['HydroID'])>1)[0] - # print(duplicateFeatures) for dp in duplicateFeatures: - # print(dp) + indices_of_duplicate = np.where(output_catchments['HydroID'] == dp)[0] - # print(indices_of_duplicate) areas = output_catchments.iloc[indices_of_duplicate,:].geometry.area - # print(areas) indices_of_smaller_duplicates = indices_of_duplicate[np.where(areas != np.amax(areas))[0]] - # print(indices_of_smaller_duplicates) output_catchments = output_catchments.drop(output_catchments.index[indices_of_smaller_duplicates]) # add geometry column diff --git a/lib/fr_to_ms_raster_mask.py b/lib/fr_to_ms_raster_mask.py index 3f4d6e9b2..567b5cf19 100755 --- a/lib/fr_to_ms_raster_mask.py +++ b/lib/fr_to_ms_raster_mask.py @@ -9,30 +9,30 @@ import geopandas as gpd import rasterio.mask -split_flows_fileName = sys.argv[1] -fdrFR = sys.argv[2] -nhddemFR = sys.argv[3] -slpFR = sys.argv[4] -fdrMSname = sys.argv[5] -nhddemMSname = sys.argv[6] -slpMSname = sys.argv[7] -strpixelFR = sys.argv[8] -strpixelMSname = sys.argv[9] -floodAOIbuf = sys.argv[10] +split_flows_fileName = sys.argv[1] +fdr_fr = sys.argv[2] +dem_fr = sys.argv[3] +slope_fr = sys.argv[4] +fdr_ms_filename = sys.argv[5] +dem_ms_filename = sys.argv[6] +slope_ms_filename = sys.argv[7] +str_pixel_fr = sys.argv[8] +str_pixel_ms_filename = sys.argv[9] +ms_buffer_dist = int(os.environ['ms_buffer_dist']) # create output layer names split_flows = gpd.read_file(split_flows_fileName) # Limit the rasters to the buffer distance around the draft streams. -print ("Limiting rasters to buffer area ({} meters) around model streams".format(str(floodAOIbuf))) +print ("Limiting rasters to buffer area ({} meters) around model streams".format(str(ms_buffer_dist))) -MSsplit_flows_gdf_buffered = split_flows.unary_union.buffer(int(floodAOIbuf)) +split_flows_ms_buffer = split_flows.unary_union.buffer(ms_buffer_dist) print('Writing raster outputs ...') # Mask nhddem -with rasterio.open(nhddemFR) as src: - out_image, out_transform = rasterio.mask.mask(src, [MSsplit_flows_gdf_buffered], crop=True) +with rasterio.open(dem_fr) as src: + out_image, out_transform = rasterio.mask.mask(src, [split_flows_ms_buffer], crop=True) out_meta = src.meta out_meta.update({"driver": "GTiff", @@ -40,12 +40,12 @@ "width": out_image.shape[2], "transform": out_transform}) -with rasterio.open(os.path.join(os.path.dirname(nhddemFR), nhddemMSname), "w", **out_meta) as dest: +with rasterio.open(os.path.join(os.path.dirname(dem_fr), dem_ms_filename), "w", **out_meta) as dest: dest.write(out_image) # Mask fdr -with rasterio.open(fdrFR) as src: - out_image, out_transform = rasterio.mask.mask(src, [MSsplit_flows_gdf_buffered], crop=True) +with rasterio.open(fdr_fr) as src: + out_image, out_transform = rasterio.mask.mask(src, [split_flows_ms_buffer], crop=True) out_meta = src.meta out_meta.update({"driver": "GTiff", @@ -53,12 +53,12 @@ "width": out_image.shape[2], "transform": out_transform}) -with rasterio.open(os.path.join(os.path.dirname(fdrFR), fdrMSname), "w", **out_meta) as dest: +with rasterio.open(os.path.join(os.path.dirname(fdr_fr), fdr_ms_filename), "w", **out_meta) as dest: dest.write(out_image) # Mask slope -with rasterio.open(slpFR) as src: - out_image, out_transform = rasterio.mask.mask(src, [MSsplit_flows_gdf_buffered], crop=True) +with rasterio.open(slope_fr) as src: + out_image, out_transform = rasterio.mask.mask(src, [split_flows_ms_buffer], crop=True) out_meta = src.meta out_meta.update({"driver": "GTiff", @@ -66,12 +66,12 @@ "width": out_image.shape[2], "transform": out_transform}) -with rasterio.open(os.path.join(os.path.dirname(slpFR), slpMSname), "w", **out_meta) as dest: +with rasterio.open(os.path.join(os.path.dirname(slope_fr), slope_ms_filename), "w", **out_meta) as dest: dest.write(out_image) # Mask stream pixels -with rasterio.open(strpixelFR) as src: - out_image, out_transform = rasterio.mask.mask(src, [MSsplit_flows_gdf_buffered], crop=True) +with rasterio.open(str_pixel_fr) as src: + out_image, out_transform = rasterio.mask.mask(src, [split_flows_ms_buffer], crop=True) out_meta = src.meta out_meta.update({"driver": "GTiff", @@ -79,5 +79,5 @@ "width": out_image.shape[2], "transform": out_transform}) -with rasterio.open(os.path.join(os.path.dirname(strpixelFR), strpixelMSname), "w", **out_meta) as dest: +with rasterio.open(os.path.join(os.path.dirname(str_pixel_fr), str_pixel_ms_filename), "w", **out_meta) as dest: dest.write(out_image) diff --git a/lib/output_cleanup.py b/lib/output_cleanup.py index b253598cf..7e211bdc5 100755 --- a/lib/output_cleanup.py +++ b/lib/output_cleanup.py @@ -30,7 +30,8 @@ def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_prod 'demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg', 'gw_catchments_reaches_filtered_addedAttributes.tif', 'hydroTable.csv', - 'src.json' + 'src.json', + 'small_segments.csv' ] # List of files that will be saved during a viz run diff --git a/lib/run_by_unit.sh b/lib/run_by_unit.sh index 3501bfdbd..2ba820a32 100755 --- a/lib/run_by_unit.sh +++ b/lib/run_by_unit.sh @@ -258,7 +258,7 @@ echo -e $startDiv"Split Derived Reaches $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/demDerived_reaches_split.gpkg ] && \ -$libDir/split_flows.py $outputHucDataDir/demDerived_reaches.shp $outputHucDataDir/dem_thalwegCond.tif $outputHucDataDir/demDerived_reaches_split.gpkg $outputHucDataDir/demDerived_reaches_split_points.gpkg $max_split_distance_meters $slope_min $outputHucDataDir/wbd8_clp.gpkg $outputHucDataDir/nwm_lakes_proj_subset.gpkg $lakes_buffer_dist_meters +$libDir/split_flows.py $outputHucDataDir/demDerived_reaches.shp $outputHucDataDir/dem_thalwegCond.tif $outputHucDataDir/demDerived_reaches_split.gpkg $outputHucDataDir/demDerived_reaches_split_points.gpkg $outputHucDataDir/wbd8_clp.gpkg $outputHucDataDir/nwm_lakes_proj_subset.gpkg Tcount if [[ ! -f $outputHucDataDir/demDerived_reaches_split.gpkg ]] ; then @@ -272,7 +272,7 @@ if [ "$extent" = "MS" ]; then echo -e $startDiv"Mask Rasters with Stream Buffer $hucNumber"$stopDiv date -u Tstart - $libDir/fr_to_ms_raster_mask.py $outputHucDataDir/demDerived_reaches_split.gpkg $outputHucDataDir/flowdir_d8_burned_filled.tif $outputHucDataDir/dem_thalwegCond.tif $outputHucDataDir/slopes_d8_dem_meters.tif $outputHucDataDir/flowdir_d8_MS.tif $outputHucDataDir/dem_thalwegCond_MS.tif $outputHucDataDir/slopes_d8_dem_metersMS.tif $outputHucDataDir/demDerived_streamPixels.tif $outputHucDataDir/demDerived_streamPixelsMS.tif $ms_buffer_dist + $libDir/fr_to_ms_raster_mask.py $outputHucDataDir/demDerived_reaches_split.gpkg $outputHucDataDir/flowdir_d8_burned_filled.tif $outputHucDataDir/dem_thalwegCond.tif $outputHucDataDir/slopes_d8_dem_meters.tif $outputHucDataDir/flowdir_d8_MS.tif $outputHucDataDir/dem_thalwegCond_MS.tif $outputHucDataDir/slopes_d8_dem_metersMS.tif $outputHucDataDir/demDerived_streamPixels.tif $outputHucDataDir/demDerived_streamPixelsMS.tif Tcount if [[ ! -f $outputHucDataDir/dem_thalwegCond_MS.tif ]] ; then @@ -429,7 +429,7 @@ echo -e $startDiv"Finalize catchments and model streams $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg ] && \ -$libDir/add_crosswalk.py -d $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -s $outputHucDataDir/src_base.csv -l $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $outputHucDataDir/src_full_crosswalked.csv -j $outputHucDataDir/src.json -x $outputHucDataDir/crosswalk_table.csv -t $outputHucDataDir/hydroTable.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -y $outputHucDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $input_NWM_Catchments -p $extent +$libDir/add_crosswalk.py -d $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -s $outputHucDataDir/src_base.csv -l $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $outputHucDataDir/src_full_crosswalked.csv -j $outputHucDataDir/src.json -x $outputHucDataDir/crosswalk_table.csv -t $outputHucDataDir/hydroTable.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -y $outputHucDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $input_NWM_Catchments -p $extent -k $outputHucDataDir/small_segments.csv Tcount ## CLEANUP OUTPUTS ## diff --git a/lib/split_flows.py b/lib/split_flows.py index c8de19b34..67b69f7e9 100755 --- a/lib/split_flows.py +++ b/lib/split_flows.py @@ -4,7 +4,7 @@ Description: 1) split stream segments based on lake boundaries and input threshold distance 2) calculate channel slope, manning's n, and LengthKm for each segment - 3) create unique ids using HUC8 boundaries (and unique 'fossid' column) + 3) create unique ids using HUC8 boundaries (and unique FIM_ID column) 4) create network traversal attribute columns (To_Node, From_Node, NextDownID) 5) create points layer with segment verticies encoded with HydroID's (used for catchment delineation in next step) ''' @@ -19,20 +19,21 @@ from tqdm import tqdm import time from os.path import isfile -from os import remove +from os import remove,environ from collections import OrderedDict import build_stream_traversal from utils.shared_functions import getDriver +from utils.shared_variables import FIM_ID flows_fileName = sys.argv[1] dem_fileName = sys.argv[2] split_flows_fileName = sys.argv[3] split_points_fileName = sys.argv[4] -maxLength = float(sys.argv[5]) -slope_min = float(sys.argv[6]) -wbd8_clp_filename = sys.argv[7] -lakes_filename = sys.argv[8] -lakes_buffer_input = float(sys.argv[9]) +wbd8_clp_filename = sys.argv[5] +lakes_filename = sys.argv[6] +max_length = float(environ['max_split_distance_meters']) +slope_min = float(environ['slope_min']) +lakes_buffer_input = float(environ['lakes_buffer_dist_meters']) wbd = gpd.read_file(wbd8_clp_filename) @@ -46,15 +47,15 @@ sys.exit(0) wbd8 = gpd.read_file(wbd8_clp_filename) -#dem = Raster(dem_fileName) dem = rasterio.open(dem_fileName,'r') + if isfile(lakes_filename): lakes = gpd.read_file(lakes_filename) else: lakes = None -wbd8 = wbd8.filter(items=['fossid', 'geometry']) -wbd8 = wbd8.set_index('fossid') +wbd8 = wbd8.filter(items=[FIM_ID, 'geometry']) +wbd8 = wbd8.set_index(FIM_ID) flows = flows.explode() # temp @@ -79,7 +80,7 @@ lakes_buffer = lakes.copy() lakes_buffer['geometry'] = lakes.buffer(lakes_buffer_input) # adding X meter buffer for spatial join comparison (currently using 20meters) -print ('splitting ' + str(len(flows)) + ' stream segments based on ' + str(maxLength) + ' m max length') +print ('splitting ' + str(len(flows)) + ' stream segments based on ' + str(max_length) + ' m max length') # remove empty geometries flows = flows.loc[~flows.is_empty,:] @@ -92,8 +93,8 @@ if lineString.length == 0: continue - # existing reaches of less than maxLength - if lineString.length < maxLength: + # existing reaches of less than max_length + if lineString.length < max_length: split_flows = split_flows + [lineString] line_points = [point for point in zip(*lineString.coords.xy)] @@ -106,7 +107,7 @@ slopes = slopes + [slope] continue - splitLength = lineString.length / np.ceil(lineString.length / maxLength) + splitLength = lineString.length / np.ceil(lineString.length / max_length) cumulative_line = [] line_points = [] diff --git a/lib/utils/shared_variables.py b/lib/utils/shared_variables.py index cf75c733c..40a8feacb 100644 --- a/lib/utils/shared_variables.py +++ b/lib/utils/shared_variables.py @@ -21,13 +21,13 @@ NHD_VECTOR_EXTRACTION_SUFFIX = '_HU4_GDB.zip' # -- Field Names -- # -FOSS_ID = 'fossid' +FIM_ID = 'fimid' # -- Other -- # -CONUS_STATE_LIST = {"AL", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", - "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", - "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", - "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "PR", "RI", "SC", +CONUS_STATE_LIST = {"AL", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", + "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", + "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", + "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "PR", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"} OVERWRITE_WBD = 'OVERWRITE_WBD' diff --git a/tests/inundation.py b/tests/inundation.py index 0496d8d1b..679102432 100755 --- a/tests/inundation.py +++ b/tests/inundation.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import sys import numpy as np import pandas as pd from numba import njit, typeof, typed, types @@ -456,11 +457,13 @@ def __subset_hydroTable_to_forecast(hydroTable,forecast,subset_hucs=None): 'HydroID':str,'stage':float, 'discharge_cms':float,'LakeID' : int} ) + hydroTable.set_index(['HUC','feature_id','HydroID'],inplace=True) + hydroTable = hydroTable[hydroTable["LakeID"] == -999] # Subset hydroTable to include only non-lake catchments. if hydroTable.empty: - print ("All stream segments in this HUC are within lake boundaries.") + print ("All stream segments in HUC are within lake boundaries.") sys.exit(0) elif isinstance(hydroTable,pd.DataFrame): From 584f4a36d2b39fe6b62b46b830a3de4263a10f43 Mon Sep 17 00:00:00 2001 From: Brad Date: Mon, 22 Feb 2021 09:22:43 -0600 Subject: [PATCH 021/359] Enhancements to allow for evaluation at AHPS sites, the generation of a query-optimized metrics CSV, and the generation of categorical FIM. Enhancements to allow for evaluation at AHPS sites, the generation of a query-optimized metrics CSV, and the generation of categorical FIM. This merge requires that the `/test_cases` directory be updated for all machines performing evaluation. - `generate_categorical_fim.py` was added to allow production of NWS Flood Categorical HAND FIM (CatFIM) source data. More changes on this script are to follow in subsequent branches. - `ble_autoeval.sh` and `all_ble_stats_comparison.py` were deleted because `synthesize_test_cases.py` now handles the merging of metrics. - The code block in `run_test_case.py` that was responsible for printing the colored metrics to screen has been commented out because of the new scale of evaluations (formerly in `run_test_case.py`, now in `shared_functions.py`) - Remove unused imports from inundation wrappers in `/tools`. - Updated `synthesize_test_cases.py` to allow for AHPS site evaluations. - Reorganized `run_test_case.py` by moving more functions into `shared_functions.py`. - Created more shared variables in `shared_variables.py` and updated import statements in relevant scripts. This resolves #258 and #259. --- CHANGELOG.md | 22 ++ tests/aggregate_metrics.py | 167 ++++++++- tests/all_ble_stats_comparison.py | 54 --- tests/ble_autoeval.sh | 89 ----- tests/cache_metrics.py | 88 +++-- tests/run_test_case.py | 452 +++++++----------------- tests/synthesize_test_cases.py | 297 +++++++++++++--- tests/utils/shared_functions.py | 234 ++++++++++++ tests/utils/shared_variables.py | 20 ++ tools/generate_categorical_fim.py | 155 ++++++++ tools/inundation_wrapper_custom_flow.py | 6 - tools/inundation_wrapper_nwm_flows.py | 5 - 12 files changed, 1004 insertions(+), 585 deletions(-) delete mode 100755 tests/all_ble_stats_comparison.py delete mode 100755 tests/ble_autoeval.sh create mode 100644 tests/utils/shared_variables.py create mode 100644 tools/generate_categorical_fim.py diff --git a/CHANGELOG.md b/CHANGELOG.md index cdcc09070..f7f97b7dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,28 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.5.0 - 2021-02-22 - [PR #267](https://github.com/NOAA-OWP/cahaba/pull/267) + +Enhancements to allow for evaluation at AHPS sites, the generation of a query-optimized metrics CSV, and the generation of categorical FIM. This merge requires that the `/test_cases` directory be updated for all machines performing evaluation. + +### Additions + + - `generate_categorical_fim.py` was added to allow production of NWS Flood Categorical HAND FIM (CatFIM) source data. More changes on this script are to follow in subsequent branches. + +### Removals + + - `ble_autoeval.sh` and `all_ble_stats_comparison.py` were deleted because `synthesize_test_cases.py` now handles the merging of metrics. + - The code block in `run_test_case.py` that was responsible for printing the colored metrics to screen has been commented out because of the new scale of evaluations (formerly in `run_test_case.py`, now in `shared_functions.py`) + - Remove unused imports from inundation wrappers in `/tools`. + +### Changes + + - Updated `synthesize_test_cases.py` to allow for AHPS site evaluations. + - Reorganized `run_test_case.py` by moving more functions into `shared_functions.py`. + - Created more shared variables in `shared_variables.py` and updated import statements in relevant scripts. + +

+ ## v3.0.4.4 - 2021-02-19 - [PR #266](https://github.com/NOAA-OWP/cahaba/pull/266) Rating curves for short stream segments are replaced with rating curves from upstream/downstream segments. diff --git a/tests/aggregate_metrics.py b/tests/aggregate_metrics.py index 98134da59..d8a462d5b 100644 --- a/tests/aggregate_metrics.py +++ b/tests/aggregate_metrics.py @@ -3,15 +3,178 @@ import json import os import csv - + import argparse -TEST_CASES_DIR = r'/data/test_cases/' +TEST_CASES_DIR = r'/data/test_cases_new/' # TEMP = r'/data/temp' # Search through all previous_versions in test_cases from utils.shared_functions import compute_stats_from_contingency_table +def create_master_metrics_csv(): + + # Construct header + metrics_to_write = ['true_negatives_count', + 'false_negatives_count', + 'true_positives_count', + 'false_positives_count', + 'contingency_tot_count', + 'cell_area_m2', + 'TP_area_km2', + 'FP_area_km2', + 'TN_area_km2', + 'FN_area_km2', + 'contingency_tot_area_km2', + 'predPositive_area_km2', + 'predNegative_area_km2', + 'obsPositive_area_km2', + 'obsNegative_area_km2', + 'positiveDiff_area_km2', + 'CSI', + 'FAR', + 'TPR', + 'TNR', + 'PPV', + 'NPV', + 'ACC', + 'Bal_ACC', + 'MCC', + 'EQUITABLE_THREAT_SCORE', + 'PREVALENCE', + 'BIAS', + 'F1_SCORE', + 'TP_perc', + 'FP_perc', + 'TN_perc', + 'FN_perc', + 'predPositive_perc', + 'predNegative_perc', + 'obsPositive_perc', + 'obsNegative_perc', + 'positiveDiff_perc', + 'masked_count', + 'masked_perc', + 'masked_area_km2' + ] + + additional_header_info_prefix = ['version', 'nws_lid', 'magnitude', 'huc'] + list_to_write = [additional_header_info_prefix + metrics_to_write + ['full_json_path'] + ['flow'] + ['benchmark_source']] + + for benchmark_type in ['ble', 'ahps']: + + if benchmark_type == 'ble': + + test_cases = r'/data/test_cases' + test_cases_list = os.listdir(test_cases) + # AHPS test_ids + versions_to_aggregate = ['fim_1_0_0', 'fim_2_3_3', 'fim_3_0_0_3_fr_c'] + + for test_case in test_cases_list: + try: + int(test_case.split('_')[0]) + + huc = test_case.split('_')[0] + previous_versions = os.path.join(test_cases, test_case, 'performance_archive', 'previous_versions') + + for magnitude in ['100yr', '500yr']: + for version in versions_to_aggregate: + version_dir = os.path.join(previous_versions, version) + magnitude_dir = os.path.join(version_dir, magnitude) + + if os.path.exists(magnitude_dir): + + magnitude_dir_list = os.listdir(magnitude_dir) + for f in magnitude_dir_list: + if '.json' in f: + flow = 'NA' + nws_lid = "NA" + benchmark_source = 'ble' + sub_list_to_append = [version, nws_lid, magnitude, huc] + full_json_path = os.path.join(magnitude_dir, f) + if os.path.exists(full_json_path): + stats_dict = json.load(open(full_json_path)) + for metric in metrics_to_write: + sub_list_to_append.append(stats_dict[metric]) + sub_list_to_append.append(full_json_path) + sub_list_to_append.append(flow) + sub_list_to_append.append(benchmark_source) + + list_to_write.append(sub_list_to_append) + + except ValueError: + pass + + if benchmark_type == 'ahps': + + test_cases = r'/data/test_cases_ahps_testing' + test_cases_list = os.listdir(test_cases) + # AHPS test_ids + versions_to_aggregate = ['fim_1_0_0_nws_1_21_2021', 'fim_1_0_0_usgs_1_21_2021', + 'fim_2_x_ms_nws_1_21_2021', 'fim_2_x_ms_usgs_1_21_2021', + 'fim_3_0_0_3_ms_c_nws_1_21_2021', 'fim_3_0_0_3_ms_c_usgs_1_21_2021', + 'ms_xwalk_fill_missing_cal_nws', 'ms_xwalk_fill_missing_cal_usgs'] + + for test_case in test_cases_list: + try: + int(test_case.split('_')[0]) + + huc = test_case.split('_')[0] + previous_versions = os.path.join(test_cases, test_case, 'performance_archive', 'previous_versions') + + for magnitude in ['action', 'minor', 'moderate', 'major']: + for version in versions_to_aggregate: + + if 'nws' in version: + benchmark_source = 'ahps_nws' + if 'usgs' in version: + benchmark_source = 'ahps_usgs' + + version_dir = os.path.join(previous_versions, version) + magnitude_dir = os.path.join(version_dir, magnitude) + + if os.path.exists(magnitude_dir): + magnitude_dir_list = os.listdir(magnitude_dir) + for f in magnitude_dir_list: + if '.json' in f and 'total_area' not in f: + nws_lid = f[:5] + sub_list_to_append = [version, nws_lid, magnitude, huc] + full_json_path = os.path.join(magnitude_dir, f) + flow = '' + if os.path.exists(full_json_path): + # Get flow used to map. + if 'usgs' in version: + parent_dir = 'usgs_1_21_2021' + if 'nws' in version: + parent_dir = 'nws_1_21_2021' + + flow_file = os.path.join(test_cases, parent_dir, huc, nws_lid, magnitude, 'ahps_' + nws_lid + '_huc_' + huc + '_flows_' + magnitude + '.csv') + if os.path.exists(flow_file): + with open(flow_file, newline='') as csv_file: + reader = csv.reader(csv_file) + next(reader) + for row in reader: + flow = row[1] + if nws_lid == 'mcc01': + print(flow) + + stats_dict = json.load(open(full_json_path)) + for metric in metrics_to_write: + sub_list_to_append.append(stats_dict[metric]) + sub_list_to_append.append(full_json_path) + sub_list_to_append.append(flow) + sub_list_to_append.append(benchmark_source) + list_to_write.append(sub_list_to_append) + + except ValueError: + pass + + with open(output_csv, 'w', newline='') as csvfile: + csv_writer = csv.writer(csvfile) + csv_writer.writerows(list_to_write) + + + def aggregate_metrics(config="DEV", branch="", hucs="", special_string="", outfolder=""): # Read hucs into list. diff --git a/tests/all_ble_stats_comparison.py b/tests/all_ble_stats_comparison.py deleted file mode 100755 index c4683c4e1..000000000 --- a/tests/all_ble_stats_comparison.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python3 - -import os -import pandas as pd -import argparse - - -def subset_vector_layers(huclist, branch, current_dev, outfolder): - - test_cases= r'data/test_cases' - ble_sitelist = [str(line.rstrip('\n')) for line in open(huclist)] - stat_list = ['fim_1_0_0', 'fim_2_3_3',str(current_dev), 'new_feature','eval'] - eval_all = pd.DataFrame([]) - - # stat_list = stat_list + [branch] - for site in ble_sitelist: - eval_100_path=os.path.join(test_cases,str(site) + '_ble', 'performance_archive', 'development_versions', branch, '100yr','stats_summary.csv') - eval_500_path=os.path.join(test_cases,str(site) + '_ble', 'performance_archive', 'development_versions', branch, '500yr','stats_summary.csv') - - if os.path.exists(eval_100_path) and os.path.exists(eval_500_path): - eval_100 = pd.read_csv(eval_100_path,index_col=0) - eval_100['eval'] = '100yr' - - eval_500 = pd.read_csv(eval_500_path,index_col=0) - eval_500['eval'] = '500yr' - - eval_combined = eval_100.append(eval_500) - eval_combined.columns = ['new_feature' if x==str(branch) else x for x in eval_combined.columns] - eval_combined = eval_combined.filter(items=stat_list) - eval_combined = eval_combined.reindex(columns=stat_list) - eval_combined['site'] = str(site) - eval_combined['branch'] = str(branch) - eval_all = eval_all.append(eval_combined) - - if not os.path.exists(outfolder): - os.makedirs(outfolder) - eval_all.to_csv(os.path.join(outfolder,'ble_stats_comparison.csv')) - -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='Collect eval stats for BLE sites') - parser.add_argument('-b','--huclist', help='list of ble sites to test', required=True,type=str) - parser.add_argument('-e','--branch', help='list of outfolder(s)', required=False,type=str) - parser.add_argument('-d','--current-dev',help='name of current dev stat column',required=True,type=str) - parser.add_argument('-f','--outfolder',help='output folder',required=True,type=str) - - args = vars(parser.parse_args()) - - huclist = args['huclist'] - branch = args['branch'] - current_dev = args['current_dev'] - outfolder = args['outfolder'] - - subset_vector_layers(huclist,branch,current_dev,outfolder) diff --git a/tests/ble_autoeval.sh b/tests/ble_autoeval.sh deleted file mode 100755 index ea8af2d94..000000000 --- a/tests/ble_autoeval.sh +++ /dev/null @@ -1,89 +0,0 @@ -#!/bin/bash -e -: -usage () -{ - echo 'Automate runs of run_test_case.py and the aggregation of metrics for list of BLE sites.' - echo 'Usage : ble_auto_eval.sh [REQ: -f -b -d -s -v ] [OPT: -h -j ]' - echo '' - echo 'REQUIRED:' - echo ' -f/--fim-outfolder : fim output directory(s)' - echo ' -b/--ble-list : list of ble sites to evaluate' - echo ' -d/--current-dev : current archived dev stats column name' - echo ' -s/--outfolder : outfolder name' - echo ' -v/--version : version eval results. options are Options: "DEV" or "PREV"' - echo '' - echo 'OPTIONS:' - echo ' -h/--help : help file' - echo ' -j/--jobLimit : max number of concurrent jobs to run. Default 1 job at time. 1 outputs' - echo ' stdout and stderr to terminal and logs. With >1 outputs progress and logs the rest' - exit -} - -if [ "$#" -lt 7 ] -then - usage -fi - -while [ "$1" != "" ]; do -case $1 -in - -f|--fim_outfolder) - shift - fim_outfolder="$1" - ;; - -b|--ble_list) - shift - ble_list="$1" - ;; - -d|--current_dev) - shift - current_dev="$1" - ;; - -s|--outfolder) - shift - outfolder="$1" - ;; - -v|--version) - shift - version="$1" - ;; - -j|--jobLimit) - shift - jobLimit=$1 - ;; - -h|--help) - shift - usage - ;; - esac - shift -done - -export testDir='foss_fim/tests' - - -for branch in $fim_outfolder -do - echo "processing feature branch: $branch" - - while read p; do - # Run Eval - if [ -d "/data/outputs/$branch/$p" ] - then - echo "processing ble for $branch/$p" - python3 /$testDir/run_test_case.py -r $branch/$p -t $p"_ble" -b $branch -c - fi - - if [ -d "/data/outputs/$branch/$(echo $p| cut -b 1-6)" ] - then - echo "processing ble for $branch/$(echo $p| cut -b 1-6)" - python3 /$testDir/run_test_case.py -r $branch/$(echo $p| cut -b 1-6) -t $p"_ble" -b $branch -c - fi - done <$ble_list -done - -echo "combining ble metrics" -python3 /$testDir/all_ble_stats_comparison.py -b $ble_list -e "$fim_outfolder" -d $current_dev -f $outfolder - -echo "calculating aggregate metrics" -python3 /$testDir/aggregate_metrics.py -c $version -b "$fim_outfolder" -u $ble_list -f $outfolder diff --git a/tests/cache_metrics.py b/tests/cache_metrics.py index 63571977f..3f601cc5c 100644 --- a/tests/cache_metrics.py +++ b/tests/cache_metrics.py @@ -2,11 +2,12 @@ import os import argparse +import traceback from run_test_case import run_alpha_test from multiprocessing import Pool -TEST_CASES_DIR = r'/data/test_cases/' +TEST_CASES_DIR = r'/data/test_cases_new/' #TODO remove "_new" PREVIOUS_FIM_DIR = r'/data/previous_fim' OUTPUTS_DIR = r'/data/outputs' @@ -14,7 +15,7 @@ def process_alpha_test(args): fim_run_dir = args[0] - branch_name = args[1] + version = args[1] test_id = args[2] magnitude = args[3] archive_results = args[4] @@ -27,9 +28,9 @@ def process_alpha_test(args): compare_to_previous = False try: - run_alpha_test(fim_run_dir, branch_name, test_id, magnitude, compare_to_previous=compare_to_previous, archive_results=archive_results, mask_type=mask_type) - except Exception as e: - print(e) + run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous=compare_to_previous, archive_results=archive_results, mask_type=mask_type) + except Exception: + traceback.print_exc() if __name__ == '__main__': @@ -40,7 +41,7 @@ def process_alpha_test(args): parser.add_argument('-v','--fim-version',help='Name of fim version to cache.',required=False, default="all") parser.add_argument('-j','--job-number',help='Number of processes to use. Default is 1.',required=False, default="1") parser.add_argument('-s','--special-string',help='Add a special name to the end of the branch.',required=False, default="") - parser.add_argument('-b','--benchmark-category',help='Options include ble or ahps. Defaults to process both.',required=False, default=['ble', 'ahps']) + parser.add_argument('-b','--benchmark-category',help='Options include ble or ahps. Defaults to process both.',required=False, default=None) test_cases_dir_list = os.listdir(TEST_CASES_DIR) @@ -64,44 +65,53 @@ def process_alpha_test(args): else: print('Config (-c) option incorrectly set. Use "DEV" or "PREV"') - if type(benchmark_category) != list: - benchmark_category = [benchmark_category] + benchmark_category_list = [] + if benchmark_category == None: + for d in test_cases_dir_list: + if 'test_cases' in d: + benchmark_category_list.append(d.replace('_test_cases', '')) + else: + benchmark_category_list = [benchmark_category] + procs_list = [] - for test_id in test_cases_dir_list: - if 'validation' and 'other' not in test_id: - - current_huc = test_id.split('_')[0] - - if test_id.split('_')[1] in benchmark_category: - - for branch_name in previous_fim_list: - - if config == 'DEV': - fim_run_dir = os.path.join(OUTPUTS_DIR, branch_name, current_huc) - elif config == 'PREV': - fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, branch_name, current_huc) - - if os.path.exists(fim_run_dir): + for bench_cat in benchmark_category_list: + bench_cat_test_case_dir = os.path.join(TEST_CASES_DIR, bench_cat + '_test_cases') + + bench_cat_test_case_list = os.listdir(bench_cat_test_case_dir) + + for test_id in bench_cat_test_case_list: + if 'validation' and 'other' not in test_id: + + current_huc = test_id.split('_')[0] + if test_id.split('_')[1] in bench_cat: + + for version in previous_fim_list: - if special_string != "": - branch_name = branch_name + '_' + special_string + if config == 'DEV': + fim_run_dir = os.path.join(OUTPUTS_DIR, version, current_huc) + elif config == 'PREV': + fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc) + + if not os.path.exists(fim_run_dir): + fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc[:6]) # For previous versions of HAND computed at HUC6 scale - if 'ble' in test_id: - magnitude = ['100yr', '500yr'] - elif 'ahps' in test_id: - magnitude = ['action', 'minor', 'moderate', 'major'] - else: - continue - - print("Adding " + test_id + " to list of test_ids to process...") - if job_number > 1: - procs_list.append([fim_run_dir, branch_name, test_id, magnitude, archive_results]) - else: - process_alpha_test([fim_run_dir, branch_name, test_id, magnitude, archive_results]) + if os.path.exists(fim_run_dir): + if special_string != "": + version = version + '_' + special_string - else: - print("No test_ids were found for the provided benchmark category: " + str(benchmark_category)) + if 'ble' in test_id: + magnitude = ['100yr', '500yr'] + elif 'usgs' or 'nws' in test_id: + magnitude = ['action', 'minor', 'moderate', 'major'] + else: + continue + + print("Adding " + test_id + " to list of test_ids to process...") + if job_number > 1: + procs_list.append([fim_run_dir, version, test_id, magnitude, archive_results]) + else: + process_alpha_test([fim_run_dir, version, test_id, magnitude, archive_results]) if job_number > 1: pool = Pool(job_number) diff --git a/tests/run_test_case.py b/tests/run_test_case.py index 16f50b882..a11fa9ab5 100755 --- a/tests/run_test_case.py +++ b/tests/run_test_case.py @@ -2,152 +2,36 @@ import os import sys -import pandas as pd -import geopandas as gpd -import rasterio -import json -import csv -import argparse import shutil +import argparse -from utils.shared_functions import get_contingency_table_from_binary_rasters, compute_stats_from_contingency_table +from utils.shared_functions import compute_contingency_stats_from_rasters +from utils.shared_variables import (TEST_CASES_DIR, INPUTS_DIR, ENDC, TRED_BOLD, WHITE_BOLD, CYAN_BOLD, AHPS_BENCHMARK_CATEGORIES) from inundation import inundate -TEST_CASES_DIR = r'/data/test_cases/' # Will update. -INPUTS_DIR = r'/data/inputs' -PRINTWORTHY_STATS = ['CSI', 'TPR', 'TNR', 'FAR', 'MCC', 'TP_area_km2', 'FP_area_km2', 'TN_area_km2', 'FN_area_km2', 'contingency_tot_area_km2', 'TP_perc', 'FP_perc', 'TN_perc', 'FN_perc'] -GO_UP_STATS = ['CSI', 'TPR', 'MCC', 'TN_area_km2', 'TP_area_km2', 'TN_perc', 'TP_perc', 'TNR'] -GO_DOWN_STATS = ['FAR', 'FN_area_km2', 'FP_area_km2', 'FP_perc', 'FN_perc'] -OUTPUTS_DIR = os.environ['outputDataDir'] - -ENDC = '\033[m' -TGREEN_BOLD = '\033[32;1m' -TGREEN = '\033[32m' -TRED_BOLD = '\033[31;1m' -TWHITE = '\033[37m' -WHITE_BOLD = '\033[37;1m' -CYAN_BOLD = '\033[36;1m' - - -def profile_test_case_archive(archive_to_check, magnitude, stats_mode): - """ - This function searches multiple directories and locates previously produced performance statistics. - - Args: - archive_to_check (str): The directory path to search. - magnitude (str): Because a benchmark dataset may have multiple magnitudes, this argument defines - which magnitude is to be used when searching for previous statistics. - Returns: - archive_dictionary (dict): A dictionary of available statistics for previous versions of the domain and magnitude. - {version: {agreement_raster: agreement_raster_path, stats_csv: stats_csv_path, stats_json: stats_json_path}} - *Will only add the paths to files that exist. - - """ - - archive_dictionary = {} - - # List through previous version and check for available stats and maps. If available, add to dictionary. - available_versions_list = os.listdir(archive_to_check) - - if len(available_versions_list) == 0: - print("Cannot compare with -c flag because there are no data in the previous_versions directory.") - return - - for version in available_versions_list: - version_magnitude_dir = os.path.join(archive_to_check, version, magnitude) - stats_json = os.path.join(version_magnitude_dir, stats_mode + '_stats.json') - - if os.path.exists(stats_json): - archive_dictionary.update({version: {'stats_json': stats_json}}) - - return archive_dictionary - - -def compute_contingency_stats_from_rasters(predicted_raster_path, benchmark_raster_path, agreement_raster=None, stats_csv=None, stats_json=None, mask_values=None, stats_modes_list=['total_area'], test_id='', mask_dict={}): - """ - This function contains FIM-specific logic to prepare raster datasets for use in the generic get_contingency_table_from_binary_rasters() function. - This function also calls the generic compute_stats_from_contingency_table() function and writes the results to CSV and/or JSON, depending on user input. - - Args: - predicted_raster_path (str): The path to the predicted, or modeled, FIM extent raster. - benchmark_raster_path (str): The path to the benchmark, or truth, FIM extent raster. - agreement_raster (str): Optional. An agreement raster will be written to this path. 0: True Negatives, 1: False Negative, 2: False Positive, 3: True Positive. - stats_csv (str): Optional. Performance statistics will be written to this path. CSV allows for readability and other tabular processes. - stats_json (str): Optional. Performance statistics will be written to this path. JSON allows for quick ingestion into Python dictionary in other processes. - - Returns: - stats_dictionary (dict): A dictionary of statistics produced by compute_stats_from_contingency_table(). Statistic names are keys and statistic values are the values. - """ - - # Get cell size of benchmark raster. - raster = rasterio.open(predicted_raster_path) - t = raster.transform - cell_x = t[0] - cell_y = t[4] - cell_area = abs(cell_x*cell_y) - - # Get contingency table from two rasters. - contingency_table_dictionary = get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_raster_path, agreement_raster, mask_values=mask_values, mask_dict=mask_dict) - - stats_dictionary = {} - - for stats_mode in contingency_table_dictionary: - true_negatives = contingency_table_dictionary[stats_mode]['true_negatives'] - false_negatives = contingency_table_dictionary[stats_mode]['false_negatives'] - false_positives = contingency_table_dictionary[stats_mode]['false_positives'] - true_positives = contingency_table_dictionary[stats_mode]['true_positives'] - masked_count = contingency_table_dictionary[stats_mode]['masked_count'] - file_handle = contingency_table_dictionary[stats_mode]['file_handle'] - - # Produce statistics from continency table and assign to dictionary. cell_area argument optional (defaults to None). - mode_stats_dictionary = compute_stats_from_contingency_table(true_negatives, false_negatives, false_positives, true_positives, cell_area, masked_count) - - # Write the mode_stats_dictionary to the stats_csv. - if stats_csv != None: - stats_csv = os.path.join(os.path.split(stats_csv)[0], file_handle + '_stats.csv') - df = pd.DataFrame.from_dict(mode_stats_dictionary, orient="index", columns=['value']) - df.to_csv(stats_csv) - - # Write the mode_stats_dictionary to the stats_json. - if stats_json != None: - stats_json = os.path.join(os.path.split(stats_csv)[0], file_handle + '_stats.json') - with open(stats_json, "w") as outfile: - json.dump(mode_stats_dictionary, outfile) - - stats_dictionary.update({stats_mode: mode_stats_dictionary}) - - return stats_dictionary - - -def check_for_regression(stats_json_to_test, previous_version, previous_version_stats_json_path, regression_test_csv=None): - - difference_dict = {} - - # Compare stats_csv to previous_version_stats_file - stats_dict_to_test = json.load(open(stats_json_to_test)) - previous_version_stats_dict = json.load(open(previous_version_stats_json_path)) - - for stat, value in stats_dict_to_test.items(): - previous_version_value = previous_version_stats_dict[stat] - stat_value_diff = value - previous_version_value - difference_dict.update({stat + '_diff': stat_value_diff}) - - return difference_dict - - -def run_alpha_test(fim_run_dir, branch_name, test_id, magnitude, compare_to_previous=False, archive_results=False, mask_type='huc', inclusion_area='', inclusion_area_buffer=0, light_run=False): +def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous=False, archive_results=False, mask_type='huc', inclusion_area='', inclusion_area_buffer=0, light_run=False, overwrite=True): + + benchmark_category = test_id.split('_')[1] # Parse benchmark_category from test_id. + current_huc = test_id.split('_')[0] # Break off HUC ID and assign to variable. + # Construct paths to development test results if not existent. if archive_results: - branch_test_case_dir_parent = os.path.join(TEST_CASES_DIR, test_id, 'performance_archive', 'previous_versions', branch_name) + version_test_case_dir_parent = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', test_id, 'official_versions', version) else: - branch_test_case_dir_parent = os.path.join(TEST_CASES_DIR, test_id, 'performance_archive', 'development_versions', branch_name) + version_test_case_dir_parent = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', test_id, 'testing_versions', version) # Delete the entire directory if it already exists. - if os.path.exists(branch_test_case_dir_parent): - shutil.rmtree(branch_test_case_dir_parent) + if os.path.exists(version_test_case_dir_parent): + if overwrite == True: + shutil.rmtree(version_test_case_dir_parent) + else: + print("Metrics for ({version}: {test_id}) already exist. Use overwrite flag (-o) to overwrite metrics.".format(version=version, test_id=test_id)) + return + + os.mkdir(version_test_case_dir_parent) - print("Running the alpha test for test_id: " + test_id + ", " + branch_name + "...") + print("Running the alpha test for test_id: " + test_id + ", " + version + "...") stats_modes_list = ['total_area'] fim_run_parent = os.path.join(os.environ['outputDataDir'], fim_run_dir) @@ -155,7 +39,11 @@ def run_alpha_test(fim_run_dir, branch_name, test_id, magnitude, compare_to_prev # Create paths to fim_run outputs for use in inundate(). rem = os.path.join(fim_run_parent, 'rem_zeroed_masked.tif') + if not os.path.exists(rem): + rem = os.path.join(fim_run_parent, 'rem_clipped_zeroed_masked.tif') catchments = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes.tif') + if not os.path.exists(catchments): + catchments = os.path.join(fim_run_parent, 'gw_catchments_reaches_clipped_addedAttributes.tif') if mask_type == 'huc': catchment_poly = '' else: @@ -164,9 +52,6 @@ def run_alpha_test(fim_run_dir, branch_name, test_id, magnitude, compare_to_prev # Map necessary inputs for inundation(). hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' - - benchmark_category = test_id.split('_')[1] - current_huc = test_id.split('_')[0] # Break off HUC ID and assign to variable. # Create list of shapefile paths to use as exclusion areas. zones_dir = os.path.join(TEST_CASES_DIR, 'other', 'zones') @@ -182,41 +67,6 @@ def run_alpha_test(fim_run_dir, branch_name, test_id, magnitude, compare_to_prev }, } - if not os.path.exists(branch_test_case_dir_parent): - os.mkdir(branch_test_case_dir_parent) - - # If the test_id is AHPS, then identify possible inclusion zones in the HUC. - if benchmark_category == 'ahps': - - ahps_inclusion_zones_dir = os.path.join(branch_test_case_dir_parent, 'ahps_domains') - print(ahps_inclusion_zones_dir) - if not os.path.exists(ahps_inclusion_zones_dir): - os.mkdir(ahps_inclusion_zones_dir) - - ahps_domain_shapefile = os.path.join(TEST_CASES_DIR, 'other', 'zones', 'ahps_domains.shp') - - # Open shapefile, determine the polys in the huc, create a different shapefile for each poly--name according to AHPS. - ahps_domain_obj = gpd.read_file(ahps_domain_shapefile) - ahps_domain_gdf = gpd.GeoDataFrame(ahps_domain_obj) - - # Loop through entries and compare against the huc4_list to get available HUCs within the geopackage domain. - for index, row in ahps_domain_gdf.iterrows(): - huc8_code = row['huc8_code'] - ahps = row['ahps_code'] - - if huc8_code == current_huc: - ahps_domain_subset = ahps_domain_obj[ahps_domain_obj.ahps_code == ahps] - - #.query("ahps_code=='{ahps_code}'".format(ahps_code=ahps_code)) - ahps_domain_subset_output = os.path.join(ahps_inclusion_zones_dir, ahps + '.shp') - ahps_domain_subset.to_file(ahps_domain_subset_output,driver='ESRI Shapefile') - - mask_dict.update({ahps: - {'path': ahps_domain_subset_output, - 'buffer': None, - 'operation': 'include'} - }) - if inclusion_area != '': inclusion_area_name = os.path.split(inclusion_area)[1].split('.')[0] # Get layer name mask_dict.update({inclusion_area_name: {'path': inclusion_area, @@ -232,167 +82,107 @@ def run_alpha_test(fim_run_dir, branch_name, test_id, magnitude, compare_to_prev if type(magnitude_list) != list: magnitude_list = [magnitude_list] + # Get path to validation_data_{benchmark} directory and huc_dir. + validation_data_path = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category) + for magnitude in magnitude_list: - # Construct path to validation raster and forecast file. - - benchmark_raster_path = os.path.join(TEST_CASES_DIR, 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_depth_' + magnitude + '.tif') - if not os.path.exists(benchmark_raster_path): # Skip loop instance if the benchmark raster doesn't exist. - continue - - branch_test_case_dir = os.path.join(branch_test_case_dir_parent, magnitude) - - os.makedirs(branch_test_case_dir) # Make output directory for branch. - - # Define paths to inundation_raster and forecast file. - inundation_raster = os.path.join(branch_test_case_dir, 'inundation_extent.tif') - forecast = os.path.join(TEST_CASES_DIR, 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_flows_' + magnitude + '.csv') - - # Run inundate. - print("-----> Running inundate() to produce modeled inundation extent for the " + magnitude + " magnitude...") - inundate( - rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, - subset_hucs=current_huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, - depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True - ) - - print("-----> Inundation mapping complete.") - predicted_raster_path = os.path.join(os.path.split(inundation_raster)[0], os.path.split(inundation_raster)[1].replace('.tif', '_' + current_huc + '.tif')) # The inundate adds the huc to the name so I account for that here. - - # Define outputs for agreement_raster, stats_json, and stats_csv. - - agreement_raster, stats_json, stats_csv = os.path.join(branch_test_case_dir, 'total_area_agreement.tif'), os.path.join(branch_test_case_dir, 'stats.json'), os.path.join(branch_test_case_dir, 'stats.csv') - - test_version_dictionary = compute_contingency_stats_from_rasters(predicted_raster_path, - benchmark_raster_path, - agreement_raster, - stats_csv=stats_csv, - stats_json=stats_json, - mask_values=[], - stats_modes_list=stats_modes_list, - test_id=test_id, - mask_dict=mask_dict, - ) - print(" ") - print("Evaluation complete. All metrics for " + test_id + ", " + branch_name + ", " + magnitude + " are available at " + CYAN_BOLD + branch_test_case_dir + ENDC) - print(" ") - - if compare_to_previous: - text_block = [] - # Compare to previous stats files that are available. - archive_to_check = os.path.join(TEST_CASES_DIR, test_id, 'performance_archive', 'previous_versions') - for stats_mode in stats_modes_list: - archive_dictionary = profile_test_case_archive(archive_to_check, magnitude, stats_mode) - - if archive_dictionary == {}: - break - - # Create header for section. - header = [stats_mode] - for previous_version, paths in archive_dictionary.items(): - header.append(previous_version) - header.append(branch_name) - text_block.append(header) - - # Loop through stats in PRINTWORTHY_STATS for left. - for stat in PRINTWORTHY_STATS: - stat_line = [stat] - for previous_version, paths in archive_dictionary.items(): - # Load stats for previous version. - previous_version_stats_json_path = paths['stats_json'] - if os.path.exists(previous_version_stats_json_path): - previous_version_stats_dict = json.load(open(previous_version_stats_json_path)) + version_test_case_dir = os.path.join(version_test_case_dir_parent, magnitude) + if not os.path.exists(version_test_case_dir): + os.mkdir(version_test_case_dir) - # Append stat for the version to state_line. - stat_line.append(previous_version_stats_dict[stat]) - - - # Append stat for the current version to stat_line. - stat_line.append(test_version_dictionary[stats_mode][stat]) - - text_block.append(stat_line) - - text_block.append([" "]) - - regression_report_csv = os.path.join(branch_test_case_dir, 'stats_summary.csv') - with open(regression_report_csv, 'w', newline='') as csvfile: - csv_writer = csv.writer(csvfile) - csv_writer.writerows(text_block) + # Construct path to validation raster and forecast file. + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: + benchmark_raster_path_list, forecast_list = [], [] + lid_dir_list = os.listdir(os.path.join(validation_data_path, current_huc)) + lid_list, inundation_raster_list, extent_file_list = [], [], [] + + for lid in lid_dir_list: + lid_dir = os.path.join(validation_data_path, current_huc, lid) + benchmark_raster_path_list.append(os.path.join(lid_dir, magnitude, 'ahps_' + lid + '_huc_' + current_huc + '_depth_' + magnitude + '.tif')) # TEMP + forecast_list.append(os.path.join(lid_dir, magnitude, 'ahps_' + lid + '_huc_' + current_huc + '_flows_' + magnitude + '.csv')) # TEMP + lid_list.append(lid) + inundation_raster_list.append(os.path.join(version_test_case_dir, lid + '_inundation_extent.tif')) + extent_file_list.append(os.path.join(lid_dir, lid + '_extent.shp')) + + ahps_inclusion_zones_dir = os.path.join(version_test_case_dir_parent, 'ahps_domains') + + if not os.path.exists(ahps_inclusion_zones_dir): + os.mkdir(ahps_inclusion_zones_dir) - print() - print("--------------------------------------------------------------------------------------------------") + else: + benchmark_raster_file = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_depth_' + magnitude + '.tif') + benchmark_raster_path_list = [benchmark_raster_file] + forecast_path = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_flows_' + magnitude + '.csv') + forecast_list = [forecast_path] + inundation_raster_list = [os.path.join(version_test_case_dir, 'inundation_extent.tif')] + + for index in range(0, len(benchmark_raster_path_list)): + benchmark_raster_path = benchmark_raster_path_list[index] + forecast = forecast_list[index] + inundation_raster = inundation_raster_list[index] - stats_mode = stats_modes_list[0] + # Only need to define ahps_lid and ahps_extent_file for AHPS_BENCHMARK_CATEGORIES. + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: + ahps_lid = lid_list[index] + ahps_extent_file = extent_file_list[index] + mask_dict.update({ahps_lid: + {'path': ahps_extent_file, + 'buffer': None, + 'operation': 'include'} + }) + + if not os.path.exists(benchmark_raster_path) or not os.path.exists(ahps_extent_file) or not os.path.exists(forecast): # Skip loop instance if the benchmark raster doesn't exist. + continue + else: # If not in AHPS_BENCHMARK_CATEGORIES. + if not os.path.exists(benchmark_raster_path) or not os.path.exists(forecast): # Skip loop instance if the benchmark raster doesn't exist. + continue + + # Run inundate. + print("-----> Running inundate() to produce modeled inundation extent for the " + magnitude + " magnitude...") try: - last_version_index = text_block[0].index('dev_latest') - except ValueError: - try: - last_version_index = text_block[0].index('fim_2_3_3') - except ValueError: - try: - last_version_index = text_block[0].index('fim_1_0_0') - except ValueError: - print(TRED_BOLD + "Warning: " + ENDC + "Cannot compare " + branch_name + " to a previous version because no authoritative versions were found in previous_versions directory. Future version of run_test_case may allow for comparisons between dev branches.") - print() - continue - + inundate( + rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, + subset_hucs=current_huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, + depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True + ) - - for line in text_block: - first_item = line[0] - if first_item in stats_modes_list: - current_version_index = line.index(branch_name) - if first_item != stats_mode: # Update the stats_mode and print a separator. - print() - print() - print("--------------------------------------------------------------------------------------------------") - print() - stats_mode = first_item - print(CYAN_BOLD + current_huc + ": " + magnitude.upper(), ENDC) - print(CYAN_BOLD + stats_mode.upper().replace('_', ' ') + " METRICS" + ENDC) - print() - - color = WHITE_BOLD - metric_name = ' '.center(len(max(PRINTWORTHY_STATS, key=len))) - percent_change_header = '% CHG' - difference_header = 'DIFF' - current_version_header = line[current_version_index].upper() - last_version_header = line[last_version_index].upper() - # Print Header. - print(color + metric_name + " " + percent_change_header.center((7)) + " " + difference_header.center((15)) + " " + current_version_header.center(18) + " " + last_version_header.center(18), ENDC) - # Format and print stat row. - elif first_item in PRINTWORTHY_STATS: - stat_name = first_item.upper().center(len(max(PRINTWORTHY_STATS, key=len))).replace('_', ' ') - current_version = round((line[current_version_index]), 3) - last_version = round((line[last_version_index]) + 0.000, 3) - difference = round(current_version - last_version, 3) - if difference > 0: - symbol = '+' - if first_item in GO_UP_STATS: - color = TGREEN_BOLD - elif first_item in GO_DOWN_STATS: - color = TRED_BOLD - else: - color = TWHITE - if difference < 0: - symbol = '-' - if first_item in GO_UP_STATS: - color = TRED_BOLD - elif first_item in GO_DOWN_STATS: - color = TGREEN_BOLD - else: - color = TWHITE - - if difference == 0 : - symbol, color = '+', TGREEN - percent_change = round((difference / last_version)*100,2) - - print(WHITE_BOLD + stat_name + ENDC + " " + color + (symbol + " {:5.2f}".format(abs(percent_change)) + " %").rjust(len(percent_change_header)), ENDC + " " + color + ("{:12.3f}".format((difference))).rjust(len(difference_header)), ENDC + " " + "{:15.3f}".format(current_version).rjust(len(current_version_header)) + " " + "{:15.3f}".format(last_version).rjust(len(last_version_header)) + " ") - - print() - print() - print() - print("--------------------------------------------------------------------------------------------------") - print() + print("-----> Inundation mapping complete.") + predicted_raster_path = os.path.join(os.path.split(inundation_raster)[0], os.path.split(inundation_raster)[1].replace('.tif', '_' + current_huc + '.tif')) # The inundate adds the huc to the name so I account for that here. + + # Define outputs for agreement_raster, stats_json, and stats_csv. + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: + agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, lid + 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') + else: + agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') + + compute_contingency_stats_from_rasters(predicted_raster_path, + benchmark_raster_path, + agreement_raster, + stats_csv=stats_csv, + stats_json=stats_json, + mask_values=[], + stats_modes_list=stats_modes_list, + test_id=test_id, + mask_dict=mask_dict, + ) + + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: + del mask_dict[ahps_lid] + + print(" ") + print("Evaluation complete. All metrics for " + test_id + ", " + version + ", " + magnitude + " are available at " + CYAN_BOLD + version_test_case_dir + ENDC) + print(" ") + except Exception as e: + print(e) + + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: + # -- Delete temp files -- # + # List all files in the output directory. + output_file_list = os.listdir(version_test_case_dir) + for output_file in output_file_list: + if "total_area" in output_file: + full_output_file_path = os.path.join(version_test_case_dir, output_file) + os.remove(full_output_file_path) if __name__ == '__main__': @@ -400,7 +190,7 @@ def run_alpha_test(fim_run_dir, branch_name, test_id, magnitude, compare_to_prev # Parse arguments. parser = argparse.ArgumentParser(description='Inundation mapping and regression analysis for FOSS FIM. Regression analysis results are stored in the test directory.') parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh',required=True) - parser.add_argument('-b', '--branch-name',help='The name of the working branch in which features are being tested',required=True,default="") + parser.add_argument('-b', '--version-name',help='The name of the working version in which features are being tested',required=True,default="") parser.add_argument('-t', '--test-id',help='The test_id to use. Format as: HUC_BENCHMARKTYPE, e.g. 12345678_ble.',required=True,default="") parser.add_argument('-m', '--mask-type', help='Specify \'huc\' (FIM < 3) or \'filter\' (FIM >= 3) masking method', required=False,default="huc") parser.add_argument('-y', '--magnitude',help='The magnitude to run.',required=False, default="") @@ -409,6 +199,7 @@ def run_alpha_test(fim_run_dir, branch_name, test_id, magnitude, compare_to_prev parser.add_argument('-i', '--inclusion-area', help='Path to shapefile. Contingency metrics will be produced from pixels inside of shapefile extent.', required=False, default="") parser.add_argument('-ib','--inclusion-area-buffer', help='Buffer to use when masking contingency metrics with inclusion area.', required=False, default="0") parser.add_argument('-l', '--light-run', help='Using the light_run option will result in only stat files being written, and NOT grid files.', required=False, action='store_true') + parser.add_argument('-o','--overwrite',help='Overwrite all metrics or only fill in missing metrics.',required=False, default=False) # Extract to dictionary and assign to variables. args = vars(parser.parse_args()) @@ -454,12 +245,9 @@ def run_alpha_test(fim_run_dir, branch_name, test_id, magnitude, compare_to_prev print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided magnitude (-y) " + CYAN_BOLD + args['magnitude'] + WHITE_BOLD + " is invalid. ble options include: 100yr, 500yr. ahps options include action, minor, moderate, major." + ENDC) exit_flag = True - if exit_flag: print() sys.exit() - else: - run_alpha_test(**args) diff --git a/tests/synthesize_test_cases.py b/tests/synthesize_test_cases.py index 913ffd6da..65c241336 100644 --- a/tests/synthesize_test_cases.py +++ b/tests/synthesize_test_cases.py @@ -1,26 +1,182 @@ #!/usr/bin/env python3 - import os import argparse from multiprocessing import Pool +import json +import csv from run_test_case import run_alpha_test -from all_ble_stats_comparison import subset_vector_layers -from aggregate_metrics import aggregate_metrics +from utils.shared_variables import TEST_CASES_DIR, PREVIOUS_FIM_DIR, OUTPUTS_DIR -TEST_CASES_DIR = r'/data/test_cases/' -PREVIOUS_FIM_DIR = r'/data/previous_fim' -OUTPUTS_DIR = r'/data/outputs' + +def create_master_metrics_csv(master_metrics_csv_output): + + # Construct header + metrics_to_write = ['true_negatives_count', + 'false_negatives_count', + 'true_positives_count', + 'false_positives_count', + 'contingency_tot_count', + 'cell_area_m2', + 'TP_area_km2', + 'FP_area_km2', + 'TN_area_km2', + 'FN_area_km2', + 'contingency_tot_area_km2', + 'predPositive_area_km2', + 'predNegative_area_km2', + 'obsPositive_area_km2', + 'obsNegative_area_km2', + 'positiveDiff_area_km2', + 'CSI', + 'FAR', + 'TPR', + 'TNR', + 'PPV', + 'NPV', + 'ACC', + 'Bal_ACC', + 'MCC', + 'EQUITABLE_THREAT_SCORE', + 'PREVALENCE', + 'BIAS', + 'F1_SCORE', + 'TP_perc', + 'FP_perc', + 'TN_perc', + 'FN_perc', + 'predPositive_perc', + 'predNegative_perc', + 'obsPositive_perc', + 'obsNegative_perc', + 'positiveDiff_perc', + 'masked_count', + 'masked_perc', + 'masked_area_km2' + ] + + additional_header_info_prefix = ['version', 'nws_lid', 'magnitude', 'huc'] + list_to_write = [additional_header_info_prefix + metrics_to_write + ['full_json_path'] + ['flow'] + ['benchmark_source'] + ['extent_config']] + + versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR) + + for benchmark_source in ['ble', 'nws', 'usgs']: + + benchmark_test_case_dir = os.path.join(TEST_CASES_DIR, benchmark_source + '_test_cases') + + if benchmark_source == 'ble': + test_cases_list = os.listdir(benchmark_test_case_dir) + + for test_case in test_cases_list: + try: + int(test_case.split('_')[0]) + + huc = test_case.split('_')[0] + official_versions = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') + + for magnitude in ['100yr', '500yr']: + for version in versions_to_aggregate: + if '_fr_' in version: + extent_config = 'FR' + if '_ms_' in version: + extent_config = 'MS' + if '_fr_' or '_ms_' not in version: + extent_config = 'FR' + version_dir = os.path.join(official_versions, version) + magnitude_dir = os.path.join(version_dir, magnitude) + + if os.path.exists(magnitude_dir): + + magnitude_dir_list = os.listdir(magnitude_dir) + for f in magnitude_dir_list: + if '.json' in f: + flow = 'NA' + nws_lid = "NA" + benchmark_source = 'ble' + sub_list_to_append = [version, nws_lid, magnitude, huc] + full_json_path = os.path.join(magnitude_dir, f) + if os.path.exists(full_json_path): + stats_dict = json.load(open(full_json_path)) + for metric in metrics_to_write: + sub_list_to_append.append(stats_dict[metric]) + sub_list_to_append.append(full_json_path) + sub_list_to_append.append(flow) + sub_list_to_append.append(benchmark_source) + sub_list_to_append.append(extent_config) + + list_to_write.append(sub_list_to_append) + except ValueError: + pass + + if benchmark_source in ['nws', 'usgs']: + test_cases_list = os.listdir(TEST_CASES_DIR) + + for test_case in test_cases_list: + try: + int(test_case.split('_')[0]) + + huc = test_case.split('_')[0] + official_versions = os.path.join(benchmark_test_case_dir, test_case, 'performance_archive', 'previous_versions') + + for magnitude in ['action', 'minor', 'moderate', 'major']: + for version in versions_to_aggregate: + if '_fr_' in version: + extent_config = 'FR' + if '_ms_' in version: + extent_config = 'MS' + if '_fr_' or '_ms_' not in version: + extent_config = 'FR' + + version_dir = os.path.join(official_versions, version) + magnitude_dir = os.path.join(version_dir, magnitude) + + if os.path.exists(magnitude_dir): + magnitude_dir_list = os.listdir(magnitude_dir) + for f in magnitude_dir_list: + if '.json' in f and 'total_area' not in f: + nws_lid = f[:5] + sub_list_to_append = [version, nws_lid, magnitude, huc] + full_json_path = os.path.join(magnitude_dir, f) + flow = '' + if os.path.exists(full_json_path): + + # Get flow used to map. + flow_file = os.path.join(benchmark_test_case_dir, 'validation_data_' + benchmark_source, huc, nws_lid, magnitude, 'ahps_' + nws_lid + '_huc_' + huc + '_flows_' + magnitude + '.csv') + if os.path.exists(flow_file): + with open(flow_file, newline='') as csv_file: + reader = csv.reader(csv_file) + next(reader) + for row in reader: + flow = row[1] + if nws_lid == 'mcc01': + print(flow) + + stats_dict = json.load(open(full_json_path)) + for metric in metrics_to_write: + sub_list_to_append.append(stats_dict[metric]) + sub_list_to_append.append(full_json_path) + sub_list_to_append.append(flow) + sub_list_to_append.append(benchmark_source) + sub_list_to_append.append(extent_config) + + list_to_write.append(sub_list_to_append) + except ValueError: + pass + + with open(master_metrics_csv_output, 'w', newline='') as csvfile: + csv_writer = csv.writer(csvfile) + csv_writer.writerows(list_to_write) def process_alpha_test(args): fim_run_dir = args[0] - branch_name = args[1] + version = args[1] test_id = args[2] magnitude = args[3] archive_results = args[4] + overwrite = args[5] mask_type = 'huc' @@ -30,7 +186,7 @@ def process_alpha_test(args): compare_to_previous = False try: - run_alpha_test(fim_run_dir, branch_name, test_id, magnitude, compare_to_previous=compare_to_previous, archive_results=archive_results, mask_type=mask_type) + run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous=compare_to_previous, archive_results=archive_results, mask_type=mask_type, overwrite=overwrite) except Exception as e: print(e) @@ -40,80 +196,105 @@ def process_alpha_test(args): # Parse arguments. parser = argparse.ArgumentParser(description='Caches metrics from previous versions of HAND.') parser.add_argument('-c','--config',help='Save outputs to development_versions or previous_versions? Options: "DEV" or "PREV"',required=True) - parser.add_argument('-v','--fim-version',help='Name of fim version to cache.',required=True) + parser.add_argument('-v','--fim-version',help='Name of fim version to cache.',required=False, default="all") parser.add_argument('-j','--job-number',help='Number of processes to use. Default is 1.',required=False, default="1") parser.add_argument('-s','--special-string',help='Add a special name to the end of the branch.',required=False, default="") - parser.add_argument('-b','--benchmark-category',help='Options include ble or ahps. Defaults to process both.',required=False, default=['ble', 'ahps']) - parser.add_argument('-l','--huc8-list',help='A list of HUC8s to synthesize.',required=True) - parser.add_argument('-d','--current-dev',help='The current dev id.',required=True) - parser.add_argument('-o','--output-folder',help='The directory where synthesis outputs will be written.',required=True) + parser.add_argument('-b','--benchmark-category',help='A benchmark category to specify. Defaults to process all categories.',required=False, default="all") + parser.add_argument('-o','--overwrite',help='Overwrite all metrics or only fill in missing metrics.',required=False, action="store_true") + parser.add_argument('-m','--master-metrics-csv',help='Define path for master metrics CSV file.',required=True) - test_cases_dir_list = os.listdir(TEST_CASES_DIR) - + # Assign variables from arguments. args = vars(parser.parse_args()) - config = args['config'] - branch_name = args['fim_version'] + fim_version = args['fim_version'] job_number = int(args['job_number']) special_string = args['special_string'] benchmark_category = args['benchmark_category'] + overwrite = args['overwrite'] + master_metrics_csv = args['master_metrics_csv'] + + # Default to processing all possible versions in PREVIOUS_FIM_DIR. Otherwise, process only the user-supplied version. + if fim_version != "all": + previous_fim_list = [fim_version] + else: + if config == 'PREV': + previous_fim_list = os.listdir(PREVIOUS_FIM_DIR) + elif config == 'DEV': + previous_fim_list = os.listdir(OUTPUTS_DIR) - + # Define whether or not to archive metrics in "official_versions" or "testing_versions" for each test_id. if config == 'PREV': archive_results = True elif config == 'DEV': archive_results = False else: print('Config (-c) option incorrectly set. Use "DEV" or "PREV"') - - if type(benchmark_category) != list: - benchmark_category = [benchmark_category] + + # List all available benchmark categories and test_cases. + test_cases_dir_list = os.listdir(TEST_CASES_DIR) + benchmark_category_list = [] + if benchmark_category == "all": + for d in test_cases_dir_list: + if 'test_cases' in d: + benchmark_category_list.append(d.replace('_test_cases', '')) + else: + benchmark_category_list = [benchmark_category] + # Loop through benchmark categories. procs_list = [] - for test_id in test_cases_dir_list: - if not any(x in test_id for x in ['validation','other','.lst']):#if 'validation' and 'other' not in test_id: - - current_huc = test_id.split('_')[0] - print(current_huc) - if test_id.split('_')[1] in benchmark_category: - - - if config == 'DEV': - fim_run_dir = os.path.join(OUTPUTS_DIR, branch_name, current_huc) - elif config == 'PREV': - fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, branch_name, current_huc) - - if os.path.exists(fim_run_dir): - - if special_string != "": - branch_name = branch_name + '_' + special_string - - if 'ble' in test_id: - magnitude = ['100yr', '500yr'] - elif 'ahps' in test_id: - magnitude = ['action', 'minor', 'moderate', 'major'] - else: - continue + for bench_cat in benchmark_category_list: + + # Map path to appropriate test_cases folder and list test_ids into bench_cat_id_list. + bench_cat_test_case_dir = os.path.join(TEST_CASES_DIR, bench_cat + '_test_cases') + bench_cat_id_list = os.listdir(bench_cat_test_case_dir) + + # Loop through test_ids in bench_cat_id_list. + for test_id in bench_cat_id_list: + if 'validation' and 'other' not in test_id: + current_huc = test_id.split('_')[0] + if test_id.split('_')[1] in bench_cat: - print("Adding " + test_id + " to list of test_ids to process...") - if job_number > 1: - procs_list.append([fim_run_dir, branch_name, test_id, magnitude, archive_results]) - else: - process_alpha_test([fim_run_dir, branch_name, test_id, magnitude, archive_results]) + # Loop through versions. + for version in previous_fim_list: + if config == 'DEV': + fim_run_dir = os.path.join(OUTPUTS_DIR, version, current_huc) + elif config == 'PREV': + fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc) + + # For previous versions of HAND computed at HUC6 scale + if not os.path.exists(fim_run_dir): + if config == 'DEV': + fim_run_dir = os.path.join(OUTPUTS_DIR, version, current_huc[:6]) + print(fim_run_dir) + elif config == 'PREV': + fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc[:6]) - else: - print("No test_ids were found for the provided benchmark category: " + str(test_id.split('_')[1])) + if os.path.exists(fim_run_dir): + + # If a user supplies a specia_string (-s), then add it to the end of the created dirs. + if special_string != "": + version = version + '_' + special_string + + # Define the magnitude lists to use, depending on test_id. + if 'ble' in test_id: + magnitude = ['100yr', '500yr'] + elif 'usgs' or 'nws' in test_id: + magnitude = ['action', 'minor', 'moderate', 'major'] + else: + continue + + # Either add to list to multiprocess or process serially, depending on user specification. + if job_number > 1: + procs_list.append([fim_run_dir, version, test_id, magnitude, archive_results, overwrite]) + else: + process_alpha_test([fim_run_dir, version, test_id, magnitude, archive_results, overwrite]) # Multiprocess alpha test runs. if job_number > 1: pool = Pool(job_number) pool.map(process_alpha_test, procs_list) - - # Do all_ble_stats_comparison. - subset_vector_layers(args['huc8_list'], branch_name, args['current_dev'], args['output_folder']) # Do aggregate_metrics. - aggregate_metrics(config=config, branch=branch_name, hucs=args['huc8_list'], special_string=args['special_string'], outfolder=args['output_folder']) - - + print("Creating master metrics CSV...") + create_master_metrics_csv(master_metrics_csv_output=master_metrics_csv) \ No newline at end of file diff --git a/tests/utils/shared_functions.py b/tests/utils/shared_functions.py index 777575f02..d36c22814 100644 --- a/tests/utils/shared_functions.py +++ b/tests/utils/shared_functions.py @@ -1,5 +1,239 @@ #!/usr/bin/env python3 +import os +import json +import csv +import rasterio +import pandas as pd +from utils.shared_variables import (TEST_CASES_DIR, PRINTWORTHY_STATS, GO_UP_STATS, GO_DOWN_STATS, + ENDC, TGREEN_BOLD, TGREEN, TRED_BOLD, TWHITE, WHITE_BOLD, CYAN_BOLD) + +def check_for_regression(stats_json_to_test, previous_version, previous_version_stats_json_path, regression_test_csv=None): + + difference_dict = {} + + # Compare stats_csv to previous_version_stats_file + stats_dict_to_test = json.load(open(stats_json_to_test)) + previous_version_stats_dict = json.load(open(previous_version_stats_json_path)) + + for stat, value in stats_dict_to_test.items(): + previous_version_value = previous_version_stats_dict[stat] + stat_value_diff = value - previous_version_value + difference_dict.update({stat + '_diff': stat_value_diff}) + + return difference_dict + + +def compute_contingency_stats_from_rasters(predicted_raster_path, benchmark_raster_path, agreement_raster=None, stats_csv=None, stats_json=None, mask_values=None, stats_modes_list=['total_area'], test_id='', mask_dict={}): + """ + This function contains FIM-specific logic to prepare raster datasets for use in the generic get_contingency_table_from_binary_rasters() function. + This function also calls the generic compute_stats_from_contingency_table() function and writes the results to CSV and/or JSON, depending on user input. + + Args: + predicted_raster_path (str): The path to the predicted, or modeled, FIM extent raster. + benchmark_raster_path (str): The path to the benchmark, or truth, FIM extent raster. + agreement_raster (str): Optional. An agreement raster will be written to this path. 0: True Negatives, 1: False Negative, 2: False Positive, 3: True Positive. + stats_csv (str): Optional. Performance statistics will be written to this path. CSV allows for readability and other tabular processes. + stats_json (str): Optional. Performance statistics will be written to this path. JSON allows for quick ingestion into Python dictionary in other processes. + + Returns: + stats_dictionary (dict): A dictionary of statistics produced by compute_stats_from_contingency_table(). Statistic names are keys and statistic values are the values. + """ + + # Get cell size of benchmark raster. + raster = rasterio.open(predicted_raster_path) + t = raster.transform + cell_x = t[0] + cell_y = t[4] + cell_area = abs(cell_x*cell_y) + + # Get contingency table from two rasters. + contingency_table_dictionary = get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_raster_path, agreement_raster, mask_values=mask_values, mask_dict=mask_dict) + + stats_dictionary = {} + + for stats_mode in contingency_table_dictionary: + true_negatives = contingency_table_dictionary[stats_mode]['true_negatives'] + false_negatives = contingency_table_dictionary[stats_mode]['false_negatives'] + false_positives = contingency_table_dictionary[stats_mode]['false_positives'] + true_positives = contingency_table_dictionary[stats_mode]['true_positives'] + masked_count = contingency_table_dictionary[stats_mode]['masked_count'] + file_handle = contingency_table_dictionary[stats_mode]['file_handle'] + + # Produce statistics from continency table and assign to dictionary. cell_area argument optional (defaults to None). + mode_stats_dictionary = compute_stats_from_contingency_table(true_negatives, false_negatives, false_positives, true_positives, cell_area, masked_count) + + # Write the mode_stats_dictionary to the stats_csv. + if stats_csv != None: + stats_csv = os.path.join(os.path.split(stats_csv)[0], file_handle + '_stats.csv') + df = pd.DataFrame.from_dict(mode_stats_dictionary, orient="index", columns=['value']) + df.to_csv(stats_csv) + + # Write the mode_stats_dictionary to the stats_json. + if stats_json != None: + stats_json = os.path.join(os.path.split(stats_csv)[0], file_handle + '_stats.json') + with open(stats_json, "w") as outfile: + json.dump(mode_stats_dictionary, outfile) + + stats_dictionary.update({stats_mode: mode_stats_dictionary}) + + return stats_dictionary + + +def profile_test_case_archive(archive_to_check, magnitude, stats_mode): + """ + This function searches multiple directories and locates previously produced performance statistics. + + Args: + archive_to_check (str): The directory path to search. + magnitude (str): Because a benchmark dataset may have multiple magnitudes, this argument defines + which magnitude is to be used when searching for previous statistics. + Returns: + archive_dictionary (dict): A dictionary of available statistics for previous versions of the domain and magnitude. + {version: {agreement_raster: agreement_raster_path, stats_csv: stats_csv_path, stats_json: stats_json_path}} + *Will only add the paths to files that exist. + + """ + + archive_dictionary = {} + + # List through previous version and check for available stats and maps. If available, add to dictionary. + available_versions_list = os.listdir(archive_to_check) + + if len(available_versions_list) == 0: + print("Cannot compare with -c flag because there are no data in the previous_versions directory.") + return + + for version in available_versions_list: + version_magnitude_dir = os.path.join(archive_to_check, version, magnitude) + stats_json = os.path.join(version_magnitude_dir, stats_mode + '_stats.json') + + if os.path.exists(stats_json): + archive_dictionary.update({version: {'stats_json': stats_json}}) + + return archive_dictionary + + +#def compare_to_previous(benchmark_category, test_id, stats_modes_list, magnitude, version, test_version_dictionary, version_test_case_dir): +# text_block = [] +# # Compare to previous stats files that are available. +# archive_to_check = os.path.join(TEST_CASES_DIR, benchmark_category + 'test_cases', test_id, 'official_versions') +# for stats_mode in stats_modes_list: +# archive_dictionary = profile_test_case_archive(archive_to_check, magnitude, stats_mode) +# +# if archive_dictionary == {}: +# break +# +# # Create header for section. +# header = [stats_mode] +# for previous_version, paths in archive_dictionary.items(): +# header.append(previous_version) +# header.append(version) +# text_block.append(header) +# +# # Loop through stats in PRINTWORTHY_STATS for left. +# for stat in PRINTWORTHY_STATS: +# stat_line = [stat] +# for previous_version, paths in archive_dictionary.items(): +# # Load stats for previous version. +# previous_version_stats_json_path = paths['stats_json'] +# if os.path.exists(previous_version_stats_json_path): +# previous_version_stats_dict = json.load(open(previous_version_stats_json_path)) +# +# # Append stat for the version to state_line. +# stat_line.append(previous_version_stats_dict[stat]) +# +# +# # Append stat for the current version to stat_line. +# stat_line.append(test_version_dictionary[stats_mode][stat]) +# +# text_block.append(stat_line) +# +# text_block.append([" "]) +# +# regression_report_csv = os.path.join(version_test_case_dir, 'stats_summary.csv') +# with open(regression_report_csv, 'w', newline='') as csvfile: +# csv_writer = csv.writer(csvfile) +# csv_writer.writerows(text_block) +# +# print() +# print("--------------------------------------------------------------------------------------------------") +# +# stats_mode = stats_modes_list[0] +# try: +# last_version_index = text_block[0].index('dev_latest') +# except ValueError: +# try: +# last_version_index = text_block[0].index('fim_2_3_3') +# except ValueError: +# try: +# last_version_index = text_block[0].index('fim_1_0_0') +# except ValueError: +# print(TRED_BOLD + "Warning: " + ENDC + "Cannot compare " + version + " to a previous version because no authoritative versions were found in previous_versions directory. Future version of run_test_case may allow for comparisons between dev versions.") +# print() +# continue +# +# +# +# for line in text_block: +# first_item = line[0] +# if first_item in stats_modes_list: +# current_version_index = line.index(version) +# if first_item != stats_mode: # Update the stats_mode and print a separator. +# print() +# print() +# print("--------------------------------------------------------------------------------------------------") +# print() +# stats_mode = first_item +# print(CYAN_BOLD + current_huc + ": " + magnitude.upper(), ENDC) +# print(CYAN_BOLD + stats_mode.upper().replace('_', ' ') + " METRICS" + ENDC) +# print() +# +# color = WHITE_BOLD +# metric_name = ' '.center(len(max(PRINTWORTHY_STATS, key=len))) +# percent_change_header = '% CHG' +# difference_header = 'DIFF' +# current_version_header = line[current_version_index].upper() +# last_version_header = line[last_version_index].upper() +# # Print Header. +# print(color + metric_name + " " + percent_change_header.center((7)) + " " + difference_header.center((15)) + " " + current_version_header.center(18) + " " + last_version_header.center(18), ENDC) +# # Format and print stat row. +# elif first_item in PRINTWORTHY_STATS: +# stat_name = first_item.upper().center(len(max(PRINTWORTHY_STATS, key=len))).replace('_', ' ') +# current_version = round((line[current_version_index]), 3) +# last_version = round((line[last_version_index]) + 0.000, 3) +# difference = round(current_version - last_version, 3) +# if difference > 0: +# symbol = '+' +# if first_item in GO_UP_STATS: +# color = TGREEN_BOLD +# elif first_item in GO_DOWN_STATS: +# color = TRED_BOLD +# else: +# color = TWHITE +# if difference < 0: +# symbol = '-' +# if first_item in GO_UP_STATS: +# color = TRED_BOLD +# elif first_item in GO_DOWN_STATS: +# color = TGREEN_BOLD +# else: +# color = TWHITE +# +# if difference == 0 : +# symbol, color = '+', TGREEN +# percent_change = round((difference / last_version)*100,2) +# +# print(WHITE_BOLD + stat_name + ENDC + " " + color + (symbol + " {:5.2f}".format(abs(percent_change)) + " %").rjust(len(percent_change_header)), ENDC + " " + color + ("{:12.3f}".format((difference))).rjust(len(difference_header)), ENDC + " " + "{:15.3f}".format(current_version).rjust(len(current_version_header)) + " " + "{:15.3f}".format(last_version).rjust(len(last_version_header)) + " ") +# +# print() +# print() +# print() +# print("--------------------------------------------------------------------------------------------------") +# print() +# + + def compute_stats_from_contingency_table(true_negatives, false_negatives, false_positives, true_positives, cell_area=None, masked_count=None): """ This generic function takes contingency table metrics as arguments and returns a dictionary of contingency table statistics. diff --git a/tests/utils/shared_variables.py b/tests/utils/shared_variables.py new file mode 100644 index 000000000..4f844e3e9 --- /dev/null +++ b/tests/utils/shared_variables.py @@ -0,0 +1,20 @@ +import os + +TEST_CASES_DIR = r'/data/test_cases_new/' # TODO remove "_new" +PREVIOUS_FIM_DIR = r'/data/previous_fim' +OUTPUTS_DIR = os.environ['outputDataDir'] +INPUTS_DIR = r'/data/inputs' +PRINTWORTHY_STATS = ['CSI', 'TPR', 'TNR', 'FAR', 'MCC', 'TP_area_km2', 'FP_area_km2', 'TN_area_km2', 'FN_area_km2', 'contingency_tot_area_km2', 'TP_perc', 'FP_perc', 'TN_perc', 'FN_perc'] +GO_UP_STATS = ['CSI', 'TPR', 'MCC', 'TN_area_km2', 'TP_area_km2', 'TN_perc', 'TP_perc', 'TNR'] +GO_DOWN_STATS = ['FAR', 'FN_area_km2', 'FP_area_km2', 'FP_perc', 'FN_perc'] +AHPS_BENCHMARK_CATEGORIES = ['usgs', 'ble'] + + + +ENDC = '\033[m' +TGREEN_BOLD = '\033[32;1m' +TGREEN = '\033[32m' +TRED_BOLD = '\033[31;1m' +TWHITE = '\033[37m' +WHITE_BOLD = '\033[37;1m' +CYAN_BOLD = '\033[36;1m' diff --git a/tools/generate_categorical_fim.py b/tools/generate_categorical_fim.py new file mode 100644 index 000000000..9423f6c81 --- /dev/null +++ b/tools/generate_categorical_fim.py @@ -0,0 +1,155 @@ +import os +from multiprocessing import Pool +import argparse +import traceback +import sys + +sys.path.insert(1, 'foss_fim/tests') +from inundation import inundate + +INPUTS_DIR = r'/data/inputs' + +# Define necessary variables for inundation(). +hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' +mask_type, catchment_poly = 'huc', '' + + +def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, job_number, gpkg, extif, depthtif): + + # Create output directory and log directory. + if not os.path.exists(output_cat_fim_dir): + os.mkdir(output_cat_fim_dir) + log_dir = os.path.join(output_cat_fim_dir, 'logs') + if not os.path.exists(log_dir): + os.mkdir(log_dir) + + no_data_list = [] + procs_list = [] + + # Loop through huc directories in the source_flow directory. + source_flow_dir_list = os.listdir(source_flow_dir) + for huc in source_flow_dir_list: + if "." not in huc: + + # Get list of AHPS site directories. + ahps_site_dir = os.path.join(source_flow_dir, huc) + ahps_site_dir_list = os.listdir(ahps_site_dir) + + # Map paths to HAND files needed for inundation(). + fim_run_huc_dir = os.path.join(fim_run_dir, huc) + rem = os.path.join(fim_run_huc_dir, 'rem_zeroed_masked.tif') + catchments = os.path.join(fim_run_huc_dir, 'gw_catchments_reaches_filtered_addedAttributes.tif') + hydroTable = os.path.join(fim_run_huc_dir, 'hydroTable.csv') + + exit_flag = False # Default to False. + + # Check if necessary data exist; set exit_flag to True if they don't exist. + for f in [rem, catchments, hydroTable]: + if not os.path.exists(f): + print(f) + no_data_list.append(f) + exit_flag = True + + # Log "Missing data" if missing TODO improve this. + if exit_flag == True: + f = open(os.path.join(log_dir, huc + '.txt'), 'w') + f.write("Missing data") + continue + + # Map path to huc directory inside out output_cat_fim_dir. + cat_fim_huc_dir = os.path.join(output_cat_fim_dir, huc) + if not os.path.exists(cat_fim_huc_dir): + os.mkdir(cat_fim_huc_dir) + + # Loop through AHPS sites. + for ahps_site in ahps_site_dir_list: + # Map parent directory for AHPS source data dir and list AHPS thresholds (act, min, mod, maj). + ahps_site_parent = os.path.join(ahps_site_dir, ahps_site) + thresholds_dir_list = os.listdir(ahps_site_parent) + + # Map parent directory for all inundation output filesoutput files. + cat_fim_huc_ahps_dir = os.path.join(cat_fim_huc_dir, ahps_site) + if not os.path.exists(cat_fim_huc_ahps_dir): + os.mkdir(cat_fim_huc_ahps_dir) + + # Loop through thresholds/magnitudes and define inundation output files paths + for magnitude in thresholds_dir_list: + if "." not in magnitude: + magnitude_flows_csv = os.path.join(ahps_site_parent, magnitude, 'ahps_' + ahps_site + '_huc_' + huc + '_flows_' + magnitude + '.csv') + if os.path.exists(magnitude_flows_csv): + if gpkg: + output_extent_gpkg = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_extent.gpkg') + else: + output_extent_gpkg = None + if extif: + output_extent_grid = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_extent.tif') + else: + output_extent_grid = None + if depthtif: + output_depth_grid = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_depth.tif') + else: + output_depth_grid = None + + # Append necessary variables to list for multiprocessing. + procs_list.append([rem, catchments, catchment_poly, magnitude_flows_csv, huc, hydroTable, output_extent_gpkg, output_extent_grid, output_depth_grid, ahps_site, magnitude, log_dir]) + # Initiate multiprocessing. + pool = Pool(job_number) + pool.map(run_inundation, procs_list) + + +def run_inundation(args): + + # Parse args. + rem = args[0] + catchments = args[1] + catchment_poly = args[2] + magnitude_flows_csv = args[3] + huc = args[4] + hydroTable = args[5] + output_extent_gpkg = args[6] + output_extent_grid = args[7] + output_depth_grid = args[8] + ahps_site = args[9] + magnitude = args[10] + log_dir = args[11] + + print("Running inundation for " + str(os.path.split(os.path.split(output_extent_gpkg)[0])[0])) + try: + inundate( + rem,catchments,catchment_poly,hydroTable,magnitude_flows_csv,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, + subset_hucs=huc,num_workers=1,aggregate=False,inundation_raster=output_extent_grid,inundation_polygon=output_extent_gpkg, + depths=output_depth_grid,out_raster_profile=None,out_vector_profile=None,quiet=True + ) + except Exception: + # Log errors and their tracebacks. + f = open(os.path.join(log_dir, huc + "_" + ahps_site + "_" + magnitude + '.txt'), 'w') + f.write(traceback.format_exc()) + f.close() + + +if __name__ == '__main__': + + # Parse arguments. + parser = argparse.ArgumentParser(description='Inundation mapping and regression analysis for FOSS FIM. Regression analysis results are stored in the test directory.') + parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh',required=True) + parser.add_argument('-s', '--source-flow-dir',help='Path to directory containing flow CSVs to use to generate categorical FIM.',required=True, default="") + parser.add_argument('-o', '--output-cat-fim-dir',help='Path to directory where categorical FIM outputs will be written.',required=True, default="") + parser.add_argument('-j','--job-number',help='Number of processes to use. Default is 1.',required=False, default="1") + parser.add_argument('-gpkg','--write-geopackage',help='Using this option will write a geopackage.',required=False, action='store_true') + parser.add_argument('-extif','--write-extent-tiff',help='Using this option will write extent TIFFs. This is the default.',required=False, action='store_true') + parser.add_argument('-depthtif','--write-depth-tiff',help='Using this option will write depth TIFFs.',required=False, action='store_true') + + args = vars(parser.parse_args()) + + fim_run_dir = args['fim_run_dir'] + source_flow_dir = args['source_flow_dir'] + output_cat_fim_dir = args['output_cat_fim_dir'] + job_number = int(args['job_number']) + gpkg = args['write_geopackage'] + extif = args['write_extent_tiff'] + depthtif = args['write_depth_tiff'] + + generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, job_number, gpkg, extif, depthtif) + + + diff --git a/tools/inundation_wrapper_custom_flow.py b/tools/inundation_wrapper_custom_flow.py index 8602bb008..e82a474e6 100644 --- a/tools/inundation_wrapper_custom_flow.py +++ b/tools/inundation_wrapper_custom_flow.py @@ -8,17 +8,11 @@ import os import sys -import pandas as pd -import geopandas as gpd -import rasterio -import json -import csv import argparse import shutil # insert python path at runtime for accessing scripts in foss_fim/tests dir (e.g. inundation.py) sys.path.insert(1, 'foss_fim/tests') -from utils.shared_functions import get_contingency_table_from_binary_rasters, compute_stats_from_contingency_table from inundation import inundate TEST_CASES_DIR = r'/data/inundation_review/inundation_custom_flow/' # Will update. diff --git a/tools/inundation_wrapper_nwm_flows.py b/tools/inundation_wrapper_nwm_flows.py index 1854bb5c3..f6d158a79 100755 --- a/tools/inundation_wrapper_nwm_flows.py +++ b/tools/inundation_wrapper_nwm_flows.py @@ -7,17 +7,12 @@ import os import sys -import pandas as pd -import geopandas as gpd -import rasterio -import json import csv import argparse import shutil # insert python path at runtime for accessing scripts in foss_fim/tests dir (e.g. inundation.py) sys.path.insert(1, 'foss_fim/tests') -from utils.shared_functions import get_contingency_table_from_binary_rasters, compute_stats_from_contingency_table from inundation import inundate TEST_CASES_DIR = r'/data/inundation_review/inundation_nwm_recurr/' # Will update. From 7db12841dd579ff7b9e2e3f59c49422c3d49c7c5 Mon Sep 17 00:00:00 2001 From: Brad Date: Mon, 22 Feb 2021 09:27:01 -0600 Subject: [PATCH 022/359] Removed "_new" from test_cases dir --- tests/utils/shared_variables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/shared_variables.py b/tests/utils/shared_variables.py index 4f844e3e9..ffcf4bfd0 100644 --- a/tests/utils/shared_variables.py +++ b/tests/utils/shared_variables.py @@ -1,6 +1,6 @@ import os -TEST_CASES_DIR = r'/data/test_cases_new/' # TODO remove "_new" +TEST_CASES_DIR = r'/data/test_cases/' PREVIOUS_FIM_DIR = r'/data/previous_fim' OUTPUTS_DIR = os.environ['outputDataDir'] INPUTS_DIR = r'/data/inputs' From 8e2ccc21c7f07cf69d13117ba959e4af35841c56 Mon Sep 17 00:00:00 2001 From: Brad Date: Mon, 22 Feb 2021 10:29:23 -0600 Subject: [PATCH 023/359] Update CHANGELOG.md --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7f97b7dd..0b7aa15cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,14 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.5.1 - 2021-02-22 + +Fixed `TEST_CASES_DIR` path in `tests/utils/shared_variables.py`. + +### Changes + + - Removed `"_new"` from `TEST_CASES_DIR` variable. + ## v3.0.5.0 - 2021-02-22 - [PR #267](https://github.com/NOAA-OWP/cahaba/pull/267) Enhancements to allow for evaluation at AHPS sites, the generation of a query-optimized metrics CSV, and the generation of categorical FIM. This merge requires that the `/test_cases` directory be updated for all machines performing evaluation. From 6df54a5c787e3ed6b484412d83ef09aece699778 Mon Sep 17 00:00:00 2001 From: Brad Date: Mon, 22 Feb 2021 17:08:00 -0600 Subject: [PATCH 024/359] Update README.md --- README.md | 60 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 5cf6f92ca..e4fc1bf6f 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,12 @@ Flood inundation mapping software configured to work with the U.S. National Water Model operated and maintained by the National Oceanic and Atmospheric Administration (NOAA) National Weather Service (NWS). This software uses the Height Above Nearest Drainage (HAND) method to generate Relative Elevation Models (REMs), Synthetic Rating Curves (SRCs), and catchment grids, which together are used to produce flood inundation maps (FIM). This repository also includes functionality to generate FIMs as well as tests to evaluate FIM prediction skill. +

## Dependencies [Docker](https://docs.docker.com/get-docker/) +

## Installation 1. Install Docker : [Docker](https://docs.docker.com/get-docker/) @@ -15,35 +17,42 @@ Flood inundation mapping software configured to work with the U.S. National Wate 4. Change group ownership of repo (needs to be redone when a new file occurs in the repo): - Linux: `chgrp -R fim ` +

## Configuration Software is configurable via parameters found in config directory. Copy files before editing and remove "template" pattern from the filename. Make sure to set the config folder group to 'fim' recursively using the chown command. Each development version will include a calibrated parameter set of manning’s n values. -- params_template.env -- mannings_default.json - - must change filepath in params_template.env under "manning_n" variable name -- params_calibrated.env - - runs calibrated mannings parameters from mannings_calibrated.json +- `params_template.env` +- `mannings_default.json` + - must change filepath in `params_template.env` under "manning_n" variable name +- `params_calibrated.env` + - runs calibrated mannings parameters from `mannings_calibrated.json` +

## Input Data The following input data sources should be downloaded and preprocessed prior to executing the preprocessing & hydrofabric generation code: -USACE National Levee Database: --Access here: https://levees.sec.usace.army.mil/ --Recommend downloading the “Full GeoJSON” file for the area of interest --Unzip data and then use the preprocessing scripts to filter data and fix geometries where needed -AHPs site locations for MS extent (currently not available to public) -NHDPlus HR datasets --Acquire_and_preprocess_inputs.py --aggregate_nhd_hr_streams.py -NWM Hydrofabric --nwm_flows.gpkg (currently not available to public) --nwm_catchments.gpkg (currently not available to public) --nwm_lakes.gpkg (currently not available to public) --nwm_headwaters.gpkg - derived - -NOTE: Some of the input data is not easy to acquire and will need to be shared with outside users. We are currently working on providing this functionality and should be available soon. +### USACE National Levee Database: +- Access here: https://levees.sec.usace.army.mil/ +- Recommend downloading the “Full GeoJSON” file for the area of interest +- Unzip data and then use the preprocessing scripts to filter data and fix geometries where needed +- Unzip data and then use the preprocessing scripts to filter data and fix geometries where needed +### NHDPlus HR datasets +- `acquire_and_preprocess_inputs.py` +- `aggregate_nhd_hr_streams.py` + +### AHPs Site Locations for MS Extent (See Note Below) + +### NWM Hydrofabric (See Note Below) +- `nwm_flows.gpkg` +- `nwm_catchments.gpkg` +- `nwm_lakes.gpkg` +- `nwm_headwaters.gpkg` + +**NOTE:** We are currently working on a long-term data sharing solution. Until then, please fill out this [Google form](https://docs.google.com/forms/d/e/1FAIpQLSf4jkg3Fcfgl-zTCeuTzKleiM_5tE5qwwUvVUQrjC9DBa7Ulg/viewform) to notify us that you would like to obtain the AHPS Site Locations NWM Hydrofabric. + +

## Usage 1. Run Docker Container : `docker run --rm -it -v :/data -v :/foss_fim :` @@ -57,8 +66,9 @@ NOTE: Some of the input data is not easy to acquire and will need to be shared w i. To run entire domain of available data use one of the `/data/inputs/included_huc[4,6,8].lst` files - Outputs can be found under `/data/outputs/` -## Evaluate FIM output to a Benchmark Dataset -Once the hydrofabric has been generated from fim_run.sh for, evaluation against a benchmark dataset can be performed using binary contingency statistics. One benchmark dataset that can be used for evaluations are Base Level Engineering studies available on the FEMA Base Flood Elevation Viewer. To acquire FEMA datasets go to the FEMA Base Flood Elevation Viewer (https://webapps.usgs.gov/infrm/estbfe/) and download the file geodatabase and depth grids for a HUC. To perform an evaluation a flow forecast file is required and benchmark grids are preprocessed prior to running run_test_case.py. +

+## Evaluate Inundation Map Performance +Once the hydrofabric has been generated from fim_run.sh for, evaluation against a benchmark dataset can be performed using binary contingency statistics. One benchmark dataset that can be used for evaluations are Base Level Engineering studies available on the FEMA Base Flood Elevation Viewer. To acquire FEMA datasets go to the FEMA Base Flood Elevation Viewer (https://webapps.usgs.gov/infrm/estbfe/) and download the file geodatabase and depth grids for a HUC. To perform an evaluation a flow forecast file is required and benchmark grids are preprocessed prior to running `run_test_case.py`. 1. Flow Forecast File Creation `/foss_fim/tests/preprocess/create_flow_forecast_file.py -b -n -o -xs -hu -huid -l -f ` @@ -72,6 +82,7 @@ For HUC 12090301, the benchmark datasets (-b) are the 100 year (“BLE_DEP01PCT 3. Run hydrologic evaluation (from inside Docker container): `/foss_fim/tests/run_test_case.py -r -b -t ` - More information can be found by running `/foss_fim/tests/run_test_case.py --help` +

## Dependencies Dependencies are managed via [Pipenv](https://pipenv.pypa.io/en/latest/). To add new dependencies, from the projects's top-level directory: @@ -90,23 +101,26 @@ and include both `Pipfile` and `Pipfile.lock` in your commits. The docker image If you are on a machine that has a particularly slow internet connection, you may need to increase the timeout of pipenv. To do this simply add `PIPENV_INSTALL_TIMEOUT=10000000` in front of any of your pipenv commands. - +

## Known Issues & Getting Help Please see the issue tracker on GitHub for known issues and for getting help. +

## Getting Involved NOAA's National Water Center welcomes anyone to contribute to the Cahaba repository to improve flood inundation mapping capabilities. Please contact Brad Bates (bradford.bates@noaa.gov) or Fernando Salas (fernando.salas@noaa.gov) to get started. ---- +

## Open Source Licensing Info 1. [TERMS](TERMS.md) 2. [LICENSE](LICENSE) ---- +

## Credits and References 1. Office of Water Prediction [(OWP)](https://water.noaa.gov/) 2. National Flood Interoperability Experiment [(NFIE)](https://web.corral.tacc.utexas.edu/nfiedata/) From 08a4b9868e014ed1d84ceaeca428bc13a8473f19 Mon Sep 17 00:00:00 2001 From: Brad Date: Mon, 22 Feb 2021 17:09:36 -0600 Subject: [PATCH 025/359] Update README.md --- README.md | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/README.md b/README.md index e4fc1bf6f..1327ab3e4 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,10 @@ Flood inundation mapping software configured to work with the U.S. National Water Model operated and maintained by the National Oceanic and Atmospheric Administration (NOAA) National Weather Service (NWS). This software uses the Height Above Nearest Drainage (HAND) method to generate Relative Elevation Models (REMs), Synthetic Rating Curves (SRCs), and catchment grids, which together are used to produce flood inundation maps (FIM). This repository also includes functionality to generate FIMs as well as tests to evaluate FIM prediction skill. -

## Dependencies [Docker](https://docs.docker.com/get-docker/) -

## Installation 1. Install Docker : [Docker](https://docs.docker.com/get-docker/) @@ -17,7 +15,6 @@ Flood inundation mapping software configured to work with the U.S. National Wate 4. Change group ownership of repo (needs to be redone when a new file occurs in the repo): - Linux: `chgrp -R fim ` -

## Configuration Software is configurable via parameters found in config directory. Copy files before editing and remove "template" pattern from the filename. @@ -28,7 +25,6 @@ Make sure to set the config folder group to 'fim' recursively using the chown co - `params_calibrated.env` - runs calibrated mannings parameters from `mannings_calibrated.json` -

## Input Data The following input data sources should be downloaded and preprocessed prior to executing the preprocessing & hydrofabric generation code: @@ -52,7 +48,6 @@ The following input data sources should be downloaded and preprocessed prior to **NOTE:** We are currently working on a long-term data sharing solution. Until then, please fill out this [Google form](https://docs.google.com/forms/d/e/1FAIpQLSf4jkg3Fcfgl-zTCeuTzKleiM_5tE5qwwUvVUQrjC9DBa7Ulg/viewform) to notify us that you would like to obtain the AHPS Site Locations NWM Hydrofabric. -

## Usage 1. Run Docker Container : `docker run --rm -it -v :/data -v :/foss_fim :` @@ -66,7 +61,6 @@ The following input data sources should be downloaded and preprocessed prior to i. To run entire domain of available data use one of the `/data/inputs/included_huc[4,6,8].lst` files - Outputs can be found under `/data/outputs/` -

## Evaluate Inundation Map Performance Once the hydrofabric has been generated from fim_run.sh for, evaluation against a benchmark dataset can be performed using binary contingency statistics. One benchmark dataset that can be used for evaluations are Base Level Engineering studies available on the FEMA Base Flood Elevation Viewer. To acquire FEMA datasets go to the FEMA Base Flood Elevation Viewer (https://webapps.usgs.gov/infrm/estbfe/) and download the file geodatabase and depth grids for a HUC. To perform an evaluation a flow forecast file is required and benchmark grids are preprocessed prior to running `run_test_case.py`. @@ -82,7 +76,6 @@ For HUC 12090301, the benchmark datasets (-b) are the 100 year (“BLE_DEP01PCT 3. Run hydrologic evaluation (from inside Docker container): `/foss_fim/tests/run_test_case.py -r -b -t ` - More information can be found by running `/foss_fim/tests/run_test_case.py --help` -

## Dependencies Dependencies are managed via [Pipenv](https://pipenv.pypa.io/en/latest/). To add new dependencies, from the projects's top-level directory: @@ -101,26 +94,22 @@ and include both `Pipfile` and `Pipfile.lock` in your commits. The docker image If you are on a machine that has a particularly slow internet connection, you may need to increase the timeout of pipenv. To do this simply add `PIPENV_INSTALL_TIMEOUT=10000000` in front of any of your pipenv commands. -

## Known Issues & Getting Help Please see the issue tracker on GitHub for known issues and for getting help. -

## Getting Involved NOAA's National Water Center welcomes anyone to contribute to the Cahaba repository to improve flood inundation mapping capabilities. Please contact Brad Bates (bradford.bates@noaa.gov) or Fernando Salas (fernando.salas@noaa.gov) to get started. ---- -

## Open Source Licensing Info 1. [TERMS](TERMS.md) 2. [LICENSE](LICENSE) ---- -

## Credits and References 1. Office of Water Prediction [(OWP)](https://water.noaa.gov/) 2. National Flood Interoperability Experiment [(NFIE)](https://web.corral.tacc.utexas.edu/nfiedata/) From 6d17e9a9f78adbd983d73402ebd96e34cee009a2 Mon Sep 17 00:00:00 2001 From: Brad Date: Mon, 22 Feb 2021 18:33:34 -0600 Subject: [PATCH 026/359] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1327ab3e4..d71154b0f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ ### Cahaba: Flood Inundation Mapping for U.S. National Water Model -Flood inundation mapping software configured to work with the U.S. National Water Model operated and maintained by the National Oceanic and Atmospheric Administration (NOAA) National Weather Service (NWS). This software uses the Height Above Nearest Drainage (HAND) method to generate Relative Elevation Models (REMs), Synthetic Rating Curves (SRCs), and catchment grids, which together are used to produce flood inundation maps (FIM). This repository also includes functionality to generate FIMs as well as tests to evaluate FIM prediction skill. +Flood inundation mapping software configured to work with the U.S. National Water Model operated and maintained by the National Oceanic and Atmospheric Administration (NOAA) National Water Center (NWC). + +This software uses the Height Above Nearest Drainage (HAND) method to generate Relative Elevation Models (REMs), Synthetic Rating Curves (SRCs), and catchment grids, which together are used to produce flood inundation maps (FIMs). This repository also includes functionality to generate FIMs and tests to evaluate FIM prediction skill. ## Dependencies From 21d05c602130ae2b689cb7a8f076ac15292d60b6 Mon Sep 17 00:00:00 2001 From: Brad Date: Mon, 22 Feb 2021 18:52:45 -0600 Subject: [PATCH 027/359] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d71154b0f..fad922c31 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -### Cahaba: Flood Inundation Mapping for U.S. National Water Model +## Cahaba: Flood Inundation Mapping for U.S. National Water Model Flood inundation mapping software configured to work with the U.S. National Water Model operated and maintained by the National Oceanic and Atmospheric Administration (NOAA) National Water Center (NWC). @@ -48,7 +48,7 @@ The following input data sources should be downloaded and preprocessed prior to - `nwm_lakes.gpkg` - `nwm_headwaters.gpkg` -**NOTE:** We are currently working on a long-term data sharing solution. Until then, please fill out this [Google form](https://docs.google.com/forms/d/e/1FAIpQLSf4jkg3Fcfgl-zTCeuTzKleiM_5tE5qwwUvVUQrjC9DBa7Ulg/viewform) to notify us that you would like to obtain the AHPS Site Locations NWM Hydrofabric. +**NOTE:** We are currently working on a long-term data sharing solution. Until then, please fill out this [Google form](https://docs.google.com/forms/d/e/1FAIpQLSf4jkg3Fcfgl-zTCeuTzKleiM_5tE5qwwUvVUQrjC9DBa7Ulg/viewform) to notify us that you would like to obtain the AHPS Site Locations NWM Hydrofabric data. ## Usage From 0347e63bfa3e694a48c22b06bd16751a4c312cf8 Mon Sep 17 00:00:00 2001 From: Brad Date: Tue, 23 Feb 2021 07:52:17 -0600 Subject: [PATCH 028/359] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fad922c31..53ac82607 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ The following input data sources should be downloaded and preprocessed prior to - `nwm_lakes.gpkg` - `nwm_headwaters.gpkg` -**NOTE:** We are currently working on a long-term data sharing solution. Until then, please fill out this [Google form](https://docs.google.com/forms/d/e/1FAIpQLSf4jkg3Fcfgl-zTCeuTzKleiM_5tE5qwwUvVUQrjC9DBa7Ulg/viewform) to notify us that you would like to obtain the AHPS Site Locations NWM Hydrofabric data. +**NOTE:** We are currently working on a long-term data sharing solution. Until then, please contact Brad Bates (bradford.bates@noaa.gov) for data access. ## Usage From 90dc6951c69d83e0efdb0aec223e87d5b39141a1 Mon Sep 17 00:00:00 2001 From: Brad Date: Tue, 23 Feb 2021 08:11:44 -0600 Subject: [PATCH 029/359] Update README.md --- README.md | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 53ac82607..9c8b4a340 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ This software uses the Height Above Nearest Drainage (HAND) method to generate R ## Configuration -Software is configurable via parameters found in config directory. Copy files before editing and remove "template" pattern from the filename. +This software is configurable via parameters found in `config` directory. Copy files before editing and remove "template" pattern from the filename. Make sure to set the config folder group to 'fim' recursively using the chown command. Each development version will include a calibrated parameter set of manning’s n values. - `params_template.env` - `mannings_default.json` @@ -63,20 +63,11 @@ The following input data sources should be downloaded and preprocessed prior to i. To run entire domain of available data use one of the `/data/inputs/included_huc[4,6,8].lst` files - Outputs can be found under `/data/outputs/` -## Evaluate Inundation Map Performance -Once the hydrofabric has been generated from fim_run.sh for, evaluation against a benchmark dataset can be performed using binary contingency statistics. One benchmark dataset that can be used for evaluations are Base Level Engineering studies available on the FEMA Base Flood Elevation Viewer. To acquire FEMA datasets go to the FEMA Base Flood Elevation Viewer (https://webapps.usgs.gov/infrm/estbfe/) and download the file geodatabase and depth grids for a HUC. To perform an evaluation a flow forecast file is required and benchmark grids are preprocessed prior to running `run_test_case.py`. +## Evaluating Inundation Map Performance +After `fim_run.sh` completes, you are ready to evaluate the model's skill. The following information can be used to preprocess the appropriate benchmark data against which model comparisons can be performed. Please note that you will need access to the test_cases benchmark data. Similar to the other non-publicly available datasets, you can acquire the benchmark data from Brad Bates (bradford.bates). (A longer term data sharing solution is in the works). -1. Flow Forecast File Creation -`/foss_fim/tests/preprocess/create_flow_forecast_file.py -b -n -o -xs -hu -huid -l -f ` -For example, if HUC 12090301 were downloaded from the FEMA BFE viewer the geodatabase, “BLE_LowColoradoCummins.gdb”, contains a HUC Layer “S_HUC_Ar” (-hu) and a cross section layer “XS” (-xs). The HUC ID corresponds to the “HUC_CODE” field (-huid) within the “S_HUC_AR” layer. Additionally, the National Water Model geodatabase (-n) will be required with the stream layer (-l) along with the ID field (-f) in the attribute table. Instructions on how to obtain the National Water Model GIS layers will be forthcoming. - -2. Process benchmark grid data -`/foss_fim/tests/preprocess/preprocess_benchmark.py -b -r -o ` -For HUC 12090301, the benchmark datasets (-b) are the 100 year (“BLE_DEP01PCT”) and 500 year (“BLE_DEP0_2PCT”) depth grids converted to Geotiff format. An example of a reference dataset (-r) is the “rem_zeroed_masked.tif” produced as part of the hydrofabric from fim_run.sh. The output raster name (if doing ble data) should be `ble_huc__depth_.tif` where event is '100yr' or '500yr'. Once the flow file and benchmark grids are processed, the output files are then placed in this folder (from inside a Docker container): -`/foss_fim/tests_cases/validation_data_ble///` where event is ‘100yr’ or ‘500yr’ - -3. Run hydrologic evaluation (from inside Docker container): `/foss_fim/tests/run_test_case.py -r -b -t ` - - More information can be found by running `/foss_fim/tests/run_test_case.py --help` +`/foss_fim/tests/synthesize_test_cases.py -c DEV -v -m -j [num_of_jobs]` + - More information can be found by running `/foss_fim/tests/synthesize_test_cases.py --help` ## Dependencies From 83f9a3b21828cedc4abbb12ee3d17d8292720160 Mon Sep 17 00:00:00 2001 From: Brad Date: Tue, 23 Feb 2021 08:12:46 -0600 Subject: [PATCH 030/359] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9c8b4a340..d1600180f 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ The following input data sources should be downloaded and preprocessed prior to - `nwm_lakes.gpkg` - `nwm_headwaters.gpkg` -**NOTE:** We are currently working on a long-term data sharing solution. Until then, please contact Brad Bates (bradford.bates@noaa.gov) for data access. +**Please note:** We are currently working on a long-term data sharing solution. Until then, please contact Brad Bates (bradford.bates@noaa.gov) for data access. ## Usage @@ -64,7 +64,7 @@ The following input data sources should be downloaded and preprocessed prior to - Outputs can be found under `/data/outputs/` ## Evaluating Inundation Map Performance -After `fim_run.sh` completes, you are ready to evaluate the model's skill. The following information can be used to preprocess the appropriate benchmark data against which model comparisons can be performed. Please note that you will need access to the test_cases benchmark data. Similar to the other non-publicly available datasets, you can acquire the benchmark data from Brad Bates (bradford.bates). (A longer term data sharing solution is in the works). +After `fim_run.sh` completes, you are ready to evaluate the model's skill. The following information can be used to preprocess the appropriate benchmark data against which model comparisons can be performed. **Please note:** You will need access to the test_cases benchmark data. Similar to the other non-publicly available datasets, you can acquire the benchmark data from Brad Bates (bradford.bates). (A longer term data sharing solution is in the works). `/foss_fim/tests/synthesize_test_cases.py -c DEV -v -m -j [num_of_jobs]` - More information can be found by running `/foss_fim/tests/synthesize_test_cases.py --help` From 747deaf6fba191403c49ad46f440e98b4e84eed4 Mon Sep 17 00:00:00 2001 From: Brad Date: Tue, 23 Feb 2021 08:19:36 -0600 Subject: [PATCH 031/359] Update README.md --- README.md | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index d1600180f..5f4393b80 100644 --- a/README.md +++ b/README.md @@ -40,15 +40,18 @@ The following input data sources should be downloaded and preprocessed prior to - `acquire_and_preprocess_inputs.py` - `aggregate_nhd_hr_streams.py` -### AHPs Site Locations for MS Extent (See Note Below) - -### NWM Hydrofabric (See Note Below) -- `nwm_flows.gpkg` -- `nwm_catchments.gpkg` -- `nwm_lakes.gpkg` -- `nwm_headwaters.gpkg` - -**Please note:** We are currently working on a long-term data sharing solution. Until then, please contact Brad Bates (bradford.bates@noaa.gov) for data access. +

+**Please note:** For the following two datasets, please contact Brad Bates (bradford.bates@noaa.gov). We are currently working on a long-term data sharing solution for the in-house NOAA data. + +- **NWM Hydrofabric** + - `nwm_flows.gpkg` + - `nwm_catchments.gpkg` + - `nwm_lakes.gpkg` + - `nwm_headwaters.gpkg` + +- **AHPS Site Locations (For Mainstem Configuration)** + - `nws_lid.gpkg` + - `ms_segs.gpkg` ## Usage From 6c1e6a36282d55b456166e6e4df6bfc98aa0774a Mon Sep 17 00:00:00 2001 From: Brad Date: Tue, 23 Feb 2021 08:21:43 -0600 Subject: [PATCH 032/359] Update README.md --- README.md | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 5f4393b80..cdac59744 100644 --- a/README.md +++ b/README.md @@ -39,19 +39,18 @@ The following input data sources should be downloaded and preprocessed prior to ### NHDPlus HR datasets - `acquire_and_preprocess_inputs.py` - `aggregate_nhd_hr_streams.py` - -

+---- **Please note:** For the following two datasets, please contact Brad Bates (bradford.bates@noaa.gov). We are currently working on a long-term data sharing solution for the in-house NOAA data. -- **NWM Hydrofabric** - - `nwm_flows.gpkg` - - `nwm_catchments.gpkg` - - `nwm_lakes.gpkg` - - `nwm_headwaters.gpkg` +### NWM Hydrofabric +- `nwm_flows.gpkg` +- `nwm_catchments.gpkg` +- `nwm_lakes.gpkg` +- `nwm_headwaters.gpkg` -- **AHPS Site Locations (For Mainstem Configuration)** - - `nws_lid.gpkg` - - `ms_segs.gpkg` +### AHPS Site Locations (For Mainstem Configuration) +- `nws_lid.gpkg` +- `ms_segs.gpkg` ## Usage From 1640e26add539395af8db7f53719838b8db89626 Mon Sep 17 00:00:00 2001 From: Brad Date: Tue, 23 Feb 2021 08:23:50 -0600 Subject: [PATCH 033/359] Update README.md --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index cdac59744..d964d6595 100644 --- a/README.md +++ b/README.md @@ -66,10 +66,11 @@ The following input data sources should be downloaded and preprocessed prior to - Outputs can be found under `/data/outputs/` ## Evaluating Inundation Map Performance -After `fim_run.sh` completes, you are ready to evaluate the model's skill. The following information can be used to preprocess the appropriate benchmark data against which model comparisons can be performed. **Please note:** You will need access to the test_cases benchmark data. Similar to the other non-publicly available datasets, you can acquire the benchmark data from Brad Bates (bradford.bates). (A longer term data sharing solution is in the works). +After `fim_run.sh` completes, you are ready to evaluate the model's skill. **Please note:** You will need access to the test_cases benchmark data. Similar to the other non-publicly available datasets, you can acquire the benchmark data from Brad Bates (bradford.bates). (A longer term data sharing solution is in the works). -`/foss_fim/tests/synthesize_test_cases.py -c DEV -v -m -j [num_of_jobs]` - - More information can be found by running `/foss_fim/tests/synthesize_test_cases.py --help` +`/foss_fim/tests/synthesize_test_cases.py -c DEV -v -m -j [num_of_jobs]` + +More information can be found by running `/foss_fim/tests/synthesize_test_cases.py --help` ## Dependencies From 4d42ccc92552e5781eddb8721f130d24f2450151 Mon Sep 17 00:00:00 2001 From: Brad Date: Tue, 23 Feb 2021 08:26:01 -0600 Subject: [PATCH 034/359] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d964d6595..6bbbb1cad 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ This software uses the Height Above Nearest Drainage (HAND) method to generate R ## Configuration -This software is configurable via parameters found in `config` directory. Copy files before editing and remove "template" pattern from the filename. +This software is configurable via parameters found in the `config` directory. Copy files before editing and remove "template" pattern from the filename. Make sure to set the config folder group to 'fim' recursively using the chown command. Each development version will include a calibrated parameter set of manning’s n values. - `params_template.env` - `mannings_default.json` From ad97abb5970ec8cb88833d80271cafb84a574ed9 Mon Sep 17 00:00:00 2001 From: Brad Date: Tue, 23 Feb 2021 08:50:21 -0600 Subject: [PATCH 035/359] Update README.md --- README.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6bbbb1cad..a9c2204c8 100644 --- a/README.md +++ b/README.md @@ -66,11 +66,14 @@ The following input data sources should be downloaded and preprocessed prior to - Outputs can be found under `/data/outputs/` ## Evaluating Inundation Map Performance -After `fim_run.sh` completes, you are ready to evaluate the model's skill. **Please note:** You will need access to the test_cases benchmark data. Similar to the other non-publicly available datasets, you can acquire the benchmark data from Brad Bates (bradford.bates). (A longer term data sharing solution is in the works). +After `fim_run.sh` completes, you are ready to evaluate the model's skill. -`/foss_fim/tests/synthesize_test_cases.py -c DEV -v -m -j [num_of_jobs]` +**Please note:** You will need access to the test_cases benchmark data. Similar to the other non-publicly available datasets, you can acquire the benchmark data from Brad Bates (bradford.bates@noaa.gov). (A longer term data sharing solution is in the works). -More information can be found by running `/foss_fim/tests/synthesize_test_cases.py --help` +To evaluate model skill, run the following: +`python /foss_fim/tests/synthesize_test_cases.py -c DEV -v -m -j [num_of_jobs]` + +More information can be found by running `python /foss_fim/tests/synthesize_test_cases.py --help` ## Dependencies From a683d8b33c5a62fcd0bd4d03edb1c23b5a84283f Mon Sep 17 00:00:00 2001 From: Brad Date: Tue, 23 Feb 2021 08:59:04 -0600 Subject: [PATCH 036/359] Update README.md --- README.md | 47 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index a9c2204c8..bc3ea9964 100644 --- a/README.md +++ b/README.md @@ -54,26 +54,47 @@ The following input data sources should be downloaded and preprocessed prior to ## Usage -1. Run Docker Container : `docker run --rm -it -v :/data -v :/foss_fim :` -2. Acquire and Prepare Data : `/foss_fim/lib/acquire_and_preprocess_inputs.py -u ` - - `-u` can be a single HUC4, series of HUC4s (e.g. 1209 1210), path to line-delimited file with HUC4s. - - Please run `/foss_fim/lib/acquire_and_preprocess_inputs.py --help` for more information. - - See United States Geological Survey (USGS) National Hydrography Dataset Plus High Resolution (NHDPlusHR) [site](https://www.usgs.gov/core-science-systems/ngp/national-hydrography/nhdplus-high-resolution) for more information -3. Aggregate NHD HR streams and create NWM headwater points : /foss_fim/lib/aggregate_vector_inputs.py -4. Produce Hydrofabric : `fim_run.sh -u -c /foss_fim/config/ -n ` - - `-u` can be a single huc, a series passed in quotes, or a line-delimited file - i. To run entire domain of available data use one of the `/data/inputs/included_huc[4,6,8].lst` files - - Outputs can be found under `/data/outputs/` +### Run Docker Container +``` +docker run --rm -it -v :/data -v :/foss_fim : +``` + +### Acquire and Prepare Data +``` +/foss_fim/lib/acquire_and_preprocess_inputs.py -u +``` +- `-u` can be a single HUC4, series of HUC4s (e.g. 1209 1210), path to line-delimited file with HUC4s. +- Please run `/foss_fim/lib/acquire_and_preprocess_inputs.py --help` for more information. +- See United States Geological Survey (USGS) National Hydrography Dataset Plus High Resolution (NHDPlusHR) [site](https://www.usgs.gov/core-science-systems/ngp/national-hydrography/nhdplus-high-resolution) for more information + +### Aggregate NHD HR streams and create NWM headwater points +``` +/foss_fim/lib/aggregate_vector_inputs.py +``` +### Produce Hydrofabric +``` +fim_run.sh -u -c /foss_fim/config/ -n +``` +- `-u` can be a single huc, a series passed in quotes, or a line-delimited file + i. To run entire domain of available data use one of the ```/data/inputs/included_huc[4,6,8].lst``` files +- Outputs can be found under ```/data/outputs/``` + +---- ## Evaluating Inundation Map Performance After `fim_run.sh` completes, you are ready to evaluate the model's skill. -**Please note:** You will need access to the test_cases benchmark data. Similar to the other non-publicly available datasets, you can acquire the benchmark data from Brad Bates (bradford.bates@noaa.gov). (A longer term data sharing solution is in the works). +**Please note:** You will need access to the test_cases benchmark data. You can acquire the benchmark data from Brad Bates (bradford.bates@noaa.gov). As mentioned before, a long term data sharing solution is still in the works. To evaluate model skill, run the following: -`python /foss_fim/tests/synthesize_test_cases.py -c DEV -v -m -j [num_of_jobs]` +``` +python /foss_fim/tests/synthesize_test_cases.py -c DEV -v -m -j [num_of_jobs] +``` -More information can be found by running `python /foss_fim/tests/synthesize_test_cases.py --help` +More information can be found by running: +``` +python /foss_fim/tests/synthesize_test_cases.py --help +``` ## Dependencies From e0276ad9f3f9b3703b6d563bef195eda65291b9a Mon Sep 17 00:00:00 2001 From: Brad Date: Tue, 23 Feb 2021 09:00:23 -0600 Subject: [PATCH 037/359] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bc3ea9964..f78721b15 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ docker run --rm -it -v :/data -v :/foss_fim Date: Tue, 23 Feb 2021 09:02:23 -0600 Subject: [PATCH 038/359] Update README.md --- README.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f78721b15..1c3470d54 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ Flood inundation mapping software configured to work with the U.S. National Wate This software uses the Height Above Nearest Drainage (HAND) method to generate Relative Elevation Models (REMs), Synthetic Rating Curves (SRCs), and catchment grids, which together are used to produce flood inundation maps (FIMs). This repository also includes functionality to generate FIMs and tests to evaluate FIM prediction skill. +---- ## Dependencies [Docker](https://docs.docker.com/get-docker/) @@ -27,6 +28,7 @@ Make sure to set the config folder group to 'fim' recursively using the chown co - `params_calibrated.env` - runs calibrated mannings parameters from `mannings_calibrated.json` +---- ## Input Data The following input data sources should be downloaded and preprocessed prior to executing the preprocessing & hydrofabric generation code: @@ -36,10 +38,11 @@ The following input data sources should be downloaded and preprocessed prior to - Unzip data and then use the preprocessing scripts to filter data and fix geometries where needed - Unzip data and then use the preprocessing scripts to filter data and fix geometries where needed +---- ### NHDPlus HR datasets - `acquire_and_preprocess_inputs.py` - `aggregate_nhd_hr_streams.py` ----- + **Please note:** For the following two datasets, please contact Brad Bates (bradford.bates@noaa.gov). We are currently working on a long-term data sharing solution for the in-house NOAA data. ### NWM Hydrofabric @@ -52,6 +55,7 @@ The following input data sources should be downloaded and preprocessed prior to - `nws_lid.gpkg` - `ms_segs.gpkg` +---- ## Usage ### Run Docker Container @@ -80,7 +84,6 @@ fim_run.sh -u -c /foss_fim/config/ -n ``` ---- - ## Evaluating Inundation Map Performance After `fim_run.sh` completes, you are ready to evaluate the model's skill. @@ -96,6 +99,7 @@ More information can be found by running: python /foss_fim/tests/synthesize_test_cases.py --help ``` +---- ## Dependencies Dependencies are managed via [Pipenv](https://pipenv.pypa.io/en/latest/). To add new dependencies, from the projects's top-level directory: @@ -114,22 +118,22 @@ and include both `Pipfile` and `Pipfile.lock` in your commits. The docker image If you are on a machine that has a particularly slow internet connection, you may need to increase the timeout of pipenv. To do this simply add `PIPENV_INSTALL_TIMEOUT=10000000` in front of any of your pipenv commands. +---- ## Known Issues & Getting Help Please see the issue tracker on GitHub for known issues and for getting help. +---- ## Getting Involved NOAA's National Water Center welcomes anyone to contribute to the Cahaba repository to improve flood inundation mapping capabilities. Please contact Brad Bates (bradford.bates@noaa.gov) or Fernando Salas (fernando.salas@noaa.gov) to get started. ---- - ## Open Source Licensing Info 1. [TERMS](TERMS.md) 2. [LICENSE](LICENSE) ---- - ## Credits and References 1. Office of Water Prediction [(OWP)](https://water.noaa.gov/) 2. National Flood Interoperability Experiment [(NFIE)](https://web.corral.tacc.utexas.edu/nfiedata/) From 48e23597713190e429e021e31a3df341aa8c9f0d Mon Sep 17 00:00:00 2001 From: Brad Date: Tue, 23 Feb 2021 09:03:31 -0600 Subject: [PATCH 039/359] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1c3470d54..0d352a787 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ This software uses the Height Above Nearest Drainage (HAND) method to generate R [Docker](https://docs.docker.com/get-docker/) +---- ## Installation 1. Install Docker : [Docker](https://docs.docker.com/get-docker/) @@ -18,6 +19,7 @@ This software uses the Height Above Nearest Drainage (HAND) method to generate R 4. Change group ownership of repo (needs to be redone when a new file occurs in the repo): - Linux: `chgrp -R fim ` +---- ## Configuration This software is configurable via parameters found in the `config` directory. Copy files before editing and remove "template" pattern from the filename. @@ -38,7 +40,6 @@ The following input data sources should be downloaded and preprocessed prior to - Unzip data and then use the preprocessing scripts to filter data and fix geometries where needed - Unzip data and then use the preprocessing scripts to filter data and fix geometries where needed ----- ### NHDPlus HR datasets - `acquire_and_preprocess_inputs.py` - `aggregate_nhd_hr_streams.py` From 31af057baa8dcb367e041f6994fb70260e02967d Mon Sep 17 00:00:00 2001 From: Brad Date: Tue, 23 Feb 2021 09:04:19 -0600 Subject: [PATCH 040/359] Update README.md --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index 0d352a787..49e09b95a 100644 --- a/README.md +++ b/README.md @@ -4,12 +4,10 @@ Flood inundation mapping software configured to work with the U.S. National Wate This software uses the Height Above Nearest Drainage (HAND) method to generate Relative Elevation Models (REMs), Synthetic Rating Curves (SRCs), and catchment grids, which together are used to produce flood inundation maps (FIMs). This repository also includes functionality to generate FIMs and tests to evaluate FIM prediction skill. ----- ## Dependencies [Docker](https://docs.docker.com/get-docker/) ----- ## Installation 1. Install Docker : [Docker](https://docs.docker.com/get-docker/) @@ -19,7 +17,6 @@ This software uses the Height Above Nearest Drainage (HAND) method to generate R 4. Change group ownership of repo (needs to be redone when a new file occurs in the repo): - Linux: `chgrp -R fim ` ----- ## Configuration This software is configurable via parameters found in the `config` directory. Copy files before editing and remove "template" pattern from the filename. From 1898e934dcc6d464d9b5796dd7df04a7109deb3a Mon Sep 17 00:00:00 2001 From: Brad Date: Tue, 23 Feb 2021 09:05:15 -0600 Subject: [PATCH 041/359] Update README.md --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index 49e09b95a..a19932c17 100644 --- a/README.md +++ b/README.md @@ -121,17 +121,14 @@ If you are on a machine that has a particularly slow internet connection, you ma Please see the issue tracker on GitHub for known issues and for getting help. ----- ## Getting Involved NOAA's National Water Center welcomes anyone to contribute to the Cahaba repository to improve flood inundation mapping capabilities. Please contact Brad Bates (bradford.bates@noaa.gov) or Fernando Salas (fernando.salas@noaa.gov) to get started. ----- ## Open Source Licensing Info 1. [TERMS](TERMS.md) 2. [LICENSE](LICENSE) ----- ## Credits and References 1. Office of Water Prediction [(OWP)](https://water.noaa.gov/) 2. National Flood Interoperability Experiment [(NFIE)](https://web.corral.tacc.utexas.edu/nfiedata/) From ffa0a006b3247cccca8f566252bee8f3f7ec9a27 Mon Sep 17 00:00:00 2001 From: RyanSpies-NOAA Date: Tue, 23 Feb 2021 13:59:17 -0600 Subject: [PATCH 042/359] Adding HAND SRC datum elev values to hydroTable.csv output (#272) - Adding HAND SRC datum elev values to hydroTable.csv output - Renamed hydroid attribute to "Median_Thal_Elev_m" --- CHANGELOG.md | 9 +++++++++ lib/add_crosswalk.py | 4 ++-- lib/rem.py | 4 ++-- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b7aa15cf..ac7cf3ae1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,15 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.5.2 - 2021-02-23 + +Adding HAND SRC datum elev values to `hydroTable.csv` output + +### Changes + + - Updated `add_crosswalk.py` to included "Median_Thal_Elev_m" variable outputs in hydroTable.csv + - Renamed hydroid attribute in `rem.py` to "Median" in case we want to include other statistics in the future (e.g. min, max, range etc.) + ## v3.0.5.1 - 2021-02-22 Fixed `TEST_CASES_DIR` path in `tests/utils/shared_variables.py`. diff --git a/lib/add_crosswalk.py b/lib/add_crosswalk.py index a0e57c7de..eb4198cb3 100755 --- a/lib/add_crosswalk.py +++ b/lib/add_crosswalk.py @@ -220,9 +220,9 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f output_hydro_table = output_hydro_table.merge(input_huc.loc[:,[FIM_ID,'HUC8']],how='left',on=FIM_ID) if output_flows.HydroID.dtype != 'str': output_flows.HydroID = output_flows.HydroID.astype(str) - output_hydro_table = output_hydro_table.merge(output_flows.loc[:,['HydroID','LakeID']],how='left',on='HydroID') + output_hydro_table = output_hydro_table.merge(output_flows.loc[:,['HydroID','LakeID','Median_Thal_Elev_m']],how='left',on='HydroID') output_hydro_table['LakeID'] = output_hydro_table['LakeID'].astype(int) - + output_hydro_table['Median_Thal_Elev_m'] = output_hydro_table['Median_Thal_Elev_m'].astype(float).round(2) output_hydro_table = output_hydro_table.rename(columns={'HUC8':'HUC'}) if output_hydro_table.HUC.dtype != 'str': output_hydro_table.HUC = output_hydro_table.HUC.astype(str) diff --git a/lib/rem.py b/lib/rem.py index 411380070..403edf9db 100755 --- a/lib/rem.py +++ b/lib/rem.py @@ -111,7 +111,7 @@ def make_catchment_min_dict(flat_dem, catchment_min_dict, flat_catchments, thalw ############################################### # Merge and export dictionary to to_csv catchment_min_dict_df = pd.DataFrame.from_dict(catchment_min_dict, orient='index') # convert dict to dataframe - catchment_min_dict_df.columns = ['Min_Thal_Elev_meters'] + catchment_min_dict_df.columns = ['Median_Thal_Elev_m'] catchment_hydroid_dict_df = pd.DataFrame.from_dict(catchment_hydroid_dict, orient='index') # convert dict to dataframe catchment_hydroid_dict_df.columns = ['HydroID'] merge_df = catchment_hydroid_dict_df.merge(catchment_min_dict_df, left_index=True, right_index=True) @@ -119,7 +119,7 @@ def make_catchment_min_dict(flat_dem, catchment_min_dict, flat_catchments, thalw merge_df.to_csv(hand_ref_elev_fileName,index=True) # export dataframe to csv file # Merge the HAND reference elvation by HydroID dataframe with the demDerived_reaches layer (add new layer attribute) - merge_df = merge_df.groupby(['HydroID']).median() # median value of all Min_Thal_Elev_meters for pixel catchments in each HydroID reach + merge_df = merge_df.groupby(['HydroID']).median() # median value of all Median_Thal_Elev_m for pixel catchments in each HydroID reach input_reaches = gpd.read_file(dem_reaches_filename) input_reaches = input_reaches.merge(merge_df, on='HydroID') # merge dataframes by HydroID variable input_reaches.to_file(dem_reaches_filename,driver=getDriver(dem_reaches_filename),index=False) From e2ae250c2a0867d8e74a8c2c00be2b7df6c45627 Mon Sep 17 00:00:00 2001 From: Brad Date: Wed, 24 Feb 2021 10:58:31 -0600 Subject: [PATCH 043/359] Bug fixes to new evaluation code and README.md cleanup - Fixed a bug in synthesize_test_cases.py where the extent (MS/FR) was not being written to merged metrics file properly. - Fixed a bug in synthesize_test_cases.py where only BLE test cases were being written to merged metrics file. - Removed unused imports from inundation.py. - Updated README.md This resolves #270. --- CHANGELOG.md | 22 ++++++++++++++---- README.md | 5 ++-- tests/inundation.py | 12 ++++------ tests/run_test_case.py | 23 ++++++++---------- tests/synthesize_test_cases.py | 41 +++++++++++++++++++++------------ tests/utils/shared_variables.py | 6 ++--- 6 files changed, 62 insertions(+), 47 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ac7cf3ae1..dc768902a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,15 +1,28 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. -## v3.0.5.2 - 2021-02-23 +## v3.0.5.3 - 2021-02-23 - [PR #275](https://github.com/NOAA-OWP/cahaba/pull/275) -Adding HAND SRC datum elev values to `hydroTable.csv` output +Bug fixes to new evaluation code. ### Changes - - Updated `add_crosswalk.py` to included "Median_Thal_Elev_m" variable outputs in hydroTable.csv - - Renamed hydroid attribute in `rem.py` to "Median" in case we want to include other statistics in the future (e.g. min, max, range etc.) + - Fixed a bug in `synthesize_test_cases.py` where the extent (MS/FR) was not being written to merged metrics file properly. + - Fixed a bug in `synthesize_test_cases.py` where only BLE test cases were being written to merged metrics file. + - Removed unused imports from `inundation.py`. + - Updated README.md +

+## v3.0.5.2 - 2021-02-23 - [PR #272](https://github.com/NOAA-OWP/cahaba/pull/272) + +Adds HAND synthetic rating curve (SRC) datum elevation values to `hydroTable.csv` output. + +### Changes + + - Updated `add_crosswalk.py` to included "Median_Thal_Elev_m" variable outputs in `hydroTable.cs`v. + - Renamed hydroid attribute in `rem.py` to "Median" in case we want to include other statistics in the future (e.g. min, max, range etc.). + +

## v3.0.5.1 - 2021-02-22 Fixed `TEST_CASES_DIR` path in `tests/utils/shared_variables.py`. @@ -18,6 +31,7 @@ Fixed `TEST_CASES_DIR` path in `tests/utils/shared_variables.py`. - Removed `"_new"` from `TEST_CASES_DIR` variable. +

## v3.0.5.0 - 2021-02-22 - [PR #267](https://github.com/NOAA-OWP/cahaba/pull/267) Enhancements to allow for evaluation at AHPS sites, the generation of a query-optimized metrics CSV, and the generation of categorical FIM. This merge requires that the `/test_cases` directory be updated for all machines performing evaluation. diff --git a/README.md b/README.md index a19932c17..cb091529a 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Flood inundation mapping software configured to work with the U.S. National Water Model operated and maintained by the National Oceanic and Atmospheric Administration (NOAA) National Water Center (NWC). -This software uses the Height Above Nearest Drainage (HAND) method to generate Relative Elevation Models (REMs), Synthetic Rating Curves (SRCs), and catchment grids, which together are used to produce flood inundation maps (FIMs). This repository also includes functionality to generate FIMs and tests to evaluate FIM prediction skill. +This software uses the Height Above Nearest Drainage (HAND) method to generate Relative Elevation Models (REMs), Synthetic Rating Curves (SRCs), and catchment grids. This repository also includes functionality to generate flood inundation maps (FIMs) and evaluate FIM accuracy. ## Dependencies @@ -33,8 +33,7 @@ Make sure to set the config folder group to 'fim' recursively using the chown co The following input data sources should be downloaded and preprocessed prior to executing the preprocessing & hydrofabric generation code: ### USACE National Levee Database: - Access here: https://levees.sec.usace.army.mil/ -- Recommend downloading the “Full GeoJSON” file for the area of interest -- Unzip data and then use the preprocessing scripts to filter data and fix geometries where needed +- Download the “Full GeoJSON” file for the area of interest - Unzip data and then use the preprocessing scripts to filter data and fix geometries where needed ### NHDPlus HR datasets diff --git a/tests/inundation.py b/tests/inundation.py index 679102432..b4db4fa49 100755 --- a/tests/inundation.py +++ b/tests/inundation.py @@ -1,27 +1,23 @@ #!/usr/bin/env python3 -import sys import numpy as np import pandas as pd -from numba import njit, typeof, typed, types +from numba import njit, typed, types from concurrent.futures import ThreadPoolExecutor,as_completed from subprocess import run from os.path import splitext import rasterio import fiona -import shapely from shapely.geometry import shape -from fiona.crs import to_string -from rasterio.errors import WindowError from rasterio.mask import mask from rasterio.io import DatasetReader,DatasetWriter -from rasterio.features import shapes,geometry_window,dataset_features -from rasterio.windows import transform,Window from collections import OrderedDict import argparse from warnings import warn from gdal import BuildVRT import geopandas as gpd + + def inundate( rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=None,hucs_layerName=None, subset_hucs=None,num_workers=1,aggregate=False,inundation_raster=None,inundation_polygon=None, @@ -464,7 +460,7 @@ def __subset_hydroTable_to_forecast(hydroTable,forecast,subset_hucs=None): if hydroTable.empty: print ("All stream segments in HUC are within lake boundaries.") - sys.exit(0) + return elif isinstance(hydroTable,pd.DataFrame): pass #consider checking for correct dtypes, indices, and columns diff --git a/tests/run_test_case.py b/tests/run_test_case.py index a11fa9ab5..5beac4f16 100755 --- a/tests/run_test_case.py +++ b/tests/run_test_case.py @@ -103,11 +103,6 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous lid_list.append(lid) inundation_raster_list.append(os.path.join(version_test_case_dir, lid + '_inundation_extent.tif')) extent_file_list.append(os.path.join(lid_dir, lid + '_extent.shp')) - - ahps_inclusion_zones_dir = os.path.join(version_test_case_dir_parent, 'ahps_domains') - - if not os.path.exists(ahps_inclusion_zones_dir): - os.mkdir(ahps_inclusion_zones_dir) else: benchmark_raster_file = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_depth_' + magnitude + '.tif') @@ -190,7 +185,7 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous # Parse arguments. parser = argparse.ArgumentParser(description='Inundation mapping and regression analysis for FOSS FIM. Regression analysis results are stored in the test directory.') parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh',required=True) - parser.add_argument('-b', '--version-name',help='The name of the working version in which features are being tested',required=True,default="") + parser.add_argument('-b', '--version',help='The name of the working version in which features are being tested',required=True,default="") parser.add_argument('-t', '--test-id',help='The test_id to use. Format as: HUC_BENCHMARKTYPE, e.g. 12345678_ble.',required=True,default="") parser.add_argument('-m', '--mask-type', help='Specify \'huc\' (FIM < 3) or \'filter\' (FIM >= 3) masking method', required=False,default="huc") parser.add_argument('-y', '--magnitude',help='The magnitude to run.',required=False, default="") @@ -210,14 +205,14 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous print() # Ensure test_id is valid. - if args['test_id'] not in valid_test_id_list: - print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided test_id (-t) " + CYAN_BOLD + args['test_id'] + WHITE_BOLD + " is not available." + ENDC) - print(WHITE_BOLD + "Available test_ids include: " + ENDC) - for test_id in valid_test_id_list: - if 'validation' not in test_id.split('_') and 'ble' in test_id.split('_'): - print(CYAN_BOLD + test_id + ENDC) - print() - exit_flag = True +# if args['test_id'] not in valid_test_id_list: +# print(TRED_BOLD + "Warning: " + WHITE_BOLD + "The provided test_id (-t) " + CYAN_BOLD + args['test_id'] + WHITE_BOLD + " is not available." + ENDC) +# print(WHITE_BOLD + "Available test_ids include: " + ENDC) +# for test_id in valid_test_id_list: +# if 'validation' not in test_id.split('_') and 'ble' in test_id.split('_'): +# print(CYAN_BOLD + test_id + ENDC) +# print() +# exit_flag = True # Ensure fim_run_dir exists. if not os.path.exists(os.path.join(os.environ['outputDataDir'], args['fim_run_dir'])): diff --git a/tests/synthesize_test_cases.py b/tests/synthesize_test_cases.py index 65c241336..1fdb0a4dc 100644 --- a/tests/synthesize_test_cases.py +++ b/tests/synthesize_test_cases.py @@ -7,7 +7,7 @@ import csv from run_test_case import run_alpha_test -from utils.shared_variables import TEST_CASES_DIR, PREVIOUS_FIM_DIR, OUTPUTS_DIR +from utils.shared_variables import TEST_CASES_DIR, PREVIOUS_FIM_DIR, OUTPUTS_DIR, AHPS_BENCHMARK_CATEGORIES def create_master_metrics_csv(master_metrics_csv_output): @@ -57,7 +57,7 @@ def create_master_metrics_csv(master_metrics_csv_output): ] additional_header_info_prefix = ['version', 'nws_lid', 'magnitude', 'huc'] - list_to_write = [additional_header_info_prefix + metrics_to_write + ['full_json_path'] + ['flow'] + ['benchmark_source'] + ['extent_config']] + list_to_write = [additional_header_info_prefix + metrics_to_write + ['full_json_path'] + ['flow'] + ['benchmark_source'] + ['extent_config'] + ["calibrated"]] versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR) @@ -77,17 +77,20 @@ def create_master_metrics_csv(master_metrics_csv_output): for magnitude in ['100yr', '500yr']: for version in versions_to_aggregate: - if '_fr_' in version: + if '_fr' in version: extent_config = 'FR' - if '_ms_' in version: + elif '_ms' in version: extent_config = 'MS' - if '_fr_' or '_ms_' not in version: + else: extent_config = 'FR' + if "_c" in version and version.split('_c')[1] == "": + calibrated = "yes" + else: + calibrated = "no" version_dir = os.path.join(official_versions, version) magnitude_dir = os.path.join(version_dir, magnitude) if os.path.exists(magnitude_dir): - magnitude_dir_list = os.listdir(magnitude_dir) for f in magnitude_dir_list: if '.json' in f: @@ -104,33 +107,37 @@ def create_master_metrics_csv(master_metrics_csv_output): sub_list_to_append.append(flow) sub_list_to_append.append(benchmark_source) sub_list_to_append.append(extent_config) + sub_list_to_append.append(calibrated) list_to_write.append(sub_list_to_append) except ValueError: pass - if benchmark_source in ['nws', 'usgs']: - test_cases_list = os.listdir(TEST_CASES_DIR) + if benchmark_source in AHPS_BENCHMARK_CATEGORIES: + test_cases_list = os.listdir(benchmark_test_case_dir) for test_case in test_cases_list: try: int(test_case.split('_')[0]) huc = test_case.split('_')[0] - official_versions = os.path.join(benchmark_test_case_dir, test_case, 'performance_archive', 'previous_versions') + official_versions = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') for magnitude in ['action', 'minor', 'moderate', 'major']: for version in versions_to_aggregate: - if '_fr_' in version: + if '_fr' in version: extent_config = 'FR' - if '_ms_' in version: + elif '_ms' in version: extent_config = 'MS' - if '_fr_' or '_ms_' not in version: + else: extent_config = 'FR' + if "_c" in version and version.split('_c')[1] == "": + calibrated = "yes" + else: + calibrated = "no" version_dir = os.path.join(official_versions, version) magnitude_dir = os.path.join(version_dir, magnitude) - if os.path.exists(magnitude_dir): magnitude_dir_list = os.listdir(magnitude_dir) for f in magnitude_dir_list: @@ -159,6 +166,7 @@ def create_master_metrics_csv(master_metrics_csv_output): sub_list_to_append.append(flow) sub_list_to_append.append(benchmark_source) sub_list_to_append.append(extent_config) + sub_list_to_append.append(calibrated) list_to_write.append(sub_list_to_append) except ValueError: @@ -202,7 +210,7 @@ def process_alpha_test(args): parser.add_argument('-b','--benchmark-category',help='A benchmark category to specify. Defaults to process all categories.',required=False, default="all") parser.add_argument('-o','--overwrite',help='Overwrite all metrics or only fill in missing metrics.',required=False, action="store_true") parser.add_argument('-m','--master-metrics-csv',help='Define path for master metrics CSV file.',required=True) - + # Assign variables from arguments. args = vars(parser.parse_args()) config = args['config'] @@ -212,6 +220,10 @@ def process_alpha_test(args): benchmark_category = args['benchmark_category'] overwrite = args['overwrite'] master_metrics_csv = args['master_metrics_csv'] + + if overwrite: + if input("Are you sure you want to overwrite metrics? y/n: ") == "n": + quit # Default to processing all possible versions in PREVIOUS_FIM_DIR. Otherwise, process only the user-supplied version. if fim_version != "all": @@ -265,7 +277,6 @@ def process_alpha_test(args): if not os.path.exists(fim_run_dir): if config == 'DEV': fim_run_dir = os.path.join(OUTPUTS_DIR, version, current_huc[:6]) - print(fim_run_dir) elif config == 'PREV': fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc[:6]) diff --git a/tests/utils/shared_variables.py b/tests/utils/shared_variables.py index ffcf4bfd0..292f55de1 100644 --- a/tests/utils/shared_variables.py +++ b/tests/utils/shared_variables.py @@ -1,16 +1,16 @@ import os +# Environmental variables and constants. TEST_CASES_DIR = r'/data/test_cases/' PREVIOUS_FIM_DIR = r'/data/previous_fim' OUTPUTS_DIR = os.environ['outputDataDir'] INPUTS_DIR = r'/data/inputs' +AHPS_BENCHMARK_CATEGORIES = ['usgs', 'nws'] PRINTWORTHY_STATS = ['CSI', 'TPR', 'TNR', 'FAR', 'MCC', 'TP_area_km2', 'FP_area_km2', 'TN_area_km2', 'FN_area_km2', 'contingency_tot_area_km2', 'TP_perc', 'FP_perc', 'TN_perc', 'FN_perc'] GO_UP_STATS = ['CSI', 'TPR', 'MCC', 'TN_area_km2', 'TP_area_km2', 'TN_perc', 'TP_perc', 'TNR'] GO_DOWN_STATS = ['FAR', 'FN_area_km2', 'FP_area_km2', 'FP_perc', 'FN_perc'] -AHPS_BENCHMARK_CATEGORIES = ['usgs', 'ble'] - - +# Colors. ENDC = '\033[m' TGREEN_BOLD = '\033[32;1m' TGREEN = '\033[32m' From ca68f2b492e3e643cc4a254a09770789858d5e0e Mon Sep 17 00:00:00 2001 From: Brad Date: Thu, 25 Feb 2021 08:37:21 -0600 Subject: [PATCH 044/359] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc768902a..48dc68648 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,7 @@ Adds HAND synthetic rating curve (SRC) datum elevation values to `hydroTable.csv ### Changes - - Updated `add_crosswalk.py` to included "Median_Thal_Elev_m" variable outputs in `hydroTable.cs`v. + - Updated `add_crosswalk.py` to included "Median_Thal_Elev_m" variable outputs in `hydroTable.csv`. - Renamed hydroid attribute in `rem.py` to "Median" in case we want to include other statistics in the future (e.g. min, max, range etc.).

From a92212db5612c8c5b0bec0227be7f6de20c5f302 Mon Sep 17 00:00:00 2001 From: TrevorGrout-NOAA <69653333+TrevorGrout-NOAA@users.noreply.github.com> Date: Thu, 25 Feb 2021 12:45:26 -0600 Subject: [PATCH 045/359] Enhancement that creates metric plots and summary statistics. Enhancement that creates metric plots and summary statistics using metrics compiled by synthesize_test_cases.py. Additions - Added eval_plots.py, which produces: - Boxplots of CSI, FAR, and POD/TPR - Barplot of aggregated CSI scores - Scatterplot of CSI comparing two FIM versions - CSV of aggregated statistics (CSI, FAR, POD/TPR) - CSV of analyzed data and analyzed sites This resolves #70. --- CHANGELOG.md | 13 + tests/plots/eval_plots.py | 343 ++++++++++++++++++++++++++ tests/plots/utils/__init__.py | 0 tests/plots/utils/shared_functions.py | 321 ++++++++++++++++++++++++ 4 files changed, 677 insertions(+) create mode 100644 tests/plots/eval_plots.py create mode 100644 tests/plots/utils/__init__.py create mode 100644 tests/plots/utils/shared_functions.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 48dc68648..8e3d7b31f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,19 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +

+## v3.0.6.0 - 2021-02-25 - [PR #276](https://github.com/NOAA-OWP/cahaba/pull/276) + +Enhancement that creates metric plots and summary statistics using metrics compiled by `synthesize_test_cases.py`. +### Additions + - Added `eval_plots.py`, which produces: + - Boxplots of CSI, FAR, and POD/TPR + - Barplot of aggregated CSI scores + - Scatterplot of CSI comparing two FIM versions + - CSV of aggregated statistics (CSI, FAR, POD/TPR) + - CSV of analyzed data and analyzed sites + +

## v3.0.5.3 - 2021-02-23 - [PR #275](https://github.com/NOAA-OWP/cahaba/pull/275) Bug fixes to new evaluation code. diff --git a/tests/plots/eval_plots.py b/tests/plots/eval_plots.py new file mode 100644 index 000000000..9c29087e1 --- /dev/null +++ b/tests/plots/eval_plots.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python3 +import pandas as pd +from pathlib import Path +import argparse +from natsort import natsorted +import geopandas as gpd +from utils.shared_functions import filter_dataframe, boxplot, scatterplot, barplot +def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'] , alternate_ahps_query = False, spatial_ahps = False, fim_1_ms = False): + + ''' + Creates plots and summary statistics using metrics compiled from + synthesize_test_cases. Required inputs are metrics_csv and workspace. + Outputs include: + aggregate__.csv: this csv + contains the aggregated total statistics (i.e. CSI, FAR, POD) + using the summed area_sq_km fields + __common_sites.csv: this csv + contains the unique sites (e.g usgs/nws: nws_lid; ble: huc08) + considered for aggregation/plots for each magnitude. The selected + sites occur in all versions analyzed. For example, if FIM 1, + FIM 2, FIM 3.0.0.3 were versions analyzed, the common sites + would be those that had data for ALL versions. This + analysis is then redone for each magnitude. As such, the number + of sites may vary with magnitude. The number of sites for each + magnitude is annotated on generated plots. + __analyzed_data.csv: this is the + dataset used to create plots and aggregate statistics. It is + a subset of the input metrics file and consists of the common + sites. + csi_aggr__.png: bar plot of the + aggregated CSI scores. Number of common sites is annotated + (see list of sites listed in *_*_common_sites.csv). + csi__.png: box plot of CSI scores + (sites weighted equally). Number of common sites is annotated + (see list of sites listed in *_*_common_sites.csv). + far__*.png: box plot of FAR scores + (sites weighted equally). Number of common sites is annotated + (see list of sites listed in *_*_common_sites.csv). + tpr__*.png: box plot of TPR/POD + scores (sites weighted equally). Number of common sites is + annotated (see list of sites listed in *_*_common_sites.csv). + csi_scatter__*.png: scatter plot comparing + two versions for a given magnitude. This is only generated if + there are exactly two versions analyzed. + + Parameters + ---------- + metrics_csv : STRING + Path to csv produced as part of synthesize_test_cases containing + all metrics across all versions. + workspace : STRING + Path to the output workspace. Subdirectories will be created + reflecting the evaluation datasets. + versions: LIST + A list of versions to be aggregated/plotted. Uses the "startswith" + approach. Versions should be supplied in the order they are to + be plotted. For example: ['fim_', 'fb']; This will evaluate all + versions that start with fim_ (e.g. fim_1, fim_2, fim_3) and any + feature branch that starts with "fb". To esbalish version order, + the fim versions are naturally sorted and then fb versions + (naturally sorted) are appended. These versions are also used to + filter the input metric csv as only these versions are retained + for analysis. + stats: LIST + A list of statistics to be plotted. Must be identical to column + field in metrics_csv. CSI, POD, TPR are currently calculated, if + additional statistics are desired formulas would need to be coded. + alternate_ahps_query : STRING, optional + The default is false. Currently the default ahps query is same + as done for apg goals. If a different query is desired it can be + supplied and it will supercede the default query. + spatial_ahps : DICTIONARY, optional + The default is false. A dictionary with keys as follows: + 'static': Path to AHPS point file created during creation of + FIM 3 static libraries. + 'evaluated': Path to extent file created during the creation + of the NWS/USGS AHPS preprocessing. + 'metadata': Path to previously created file that contains + metadata about each site (feature_id, wfo, rfc and etc). + No spatial layers will be created if set to False, if a dictionary + is supplied then a spatial layer is produced. + fim_1_ms: BOOL + Default is false. If True then fim_1 rows are duplicated with + extent_config set to MS. This allows for FIM 1 to be included + in MS plots/stats (helpful for nws/usgs ahps comparisons). + + Returns + ------- + all_datasets : DICT + Dictionary containing all datasets generated. + Keys: (benchmark_source, extent_config), + Values: (filtered dataframe, common sites) + + ''' + + #Import metrics csv as DataFrame and initialize all_datasets dictionary + csv_df = pd.read_csv(metrics_csv) + + #fim_1_ms flag enables FIM 1 to be shown on MS plots/stats + if fim_1_ms: + #Query FIM 1 rows based on version beginning with "fim_1" + fim_1_rows = csv_df.query('version.str.startswith("fim_1")').copy() + #Set extent configuration to MS (instead of FR) + fim_1_rows['extent_config'] = 'MS' + #Append duplicate FIM 1 rows to original dataframe + csv_df = csv_df.append(fim_1_rows, ignore_index = True) + + #If versions are supplied then filter out + if versions: + #Filter out versions based on supplied version list + metrics = csv_df.query('version.str.startswith(tuple(@versions))') + else: + metrics = csv_df + + #Group by benchmark source + benchmark_by_source = metrics.groupby(['benchmark_source', 'extent_config']) + + #Iterate through benchmark_by_source. Pre-filter metrics dataframe + #as needed (e.g. usgs/nws filter query). Then further filtering to + #discard all hucs/nws_lid that are not present across all analyzed + #versions for a given magnitude. The final filtered dataset is written + #to a dictionary with the key (benchmark source, extent config) + #and values (filtered dataframe, common sites). + all_datasets = {} + for (benchmark_source, extent_configuration), benchmark_metrics in benchmark_by_source: + + #If source is usgs/nws define the base resolution and query + #(use alternate query if passed). Append filtered datasets to + #all_datasets dictionary. + if benchmark_source in ['usgs','nws']: + + #Set the base processing unit for the ahps runs. + base_resolution = 'nws_lid' + + #Default query (used for APG) it could be that bad_sites should be modified. If so pass an alternate query using the "alternate_ahps_query" + bad_sites = ['grfi2','ksdm7','hohn4','rwdn4'] + query = "not flow.isnull() & masked_perc<97 & not nws_lid in @bad_sites" + + #If alternate ahps evaluation query argument is passed, use that. + if alternate_ahps_query: + query = alternate_ahps_query + + #Filter the dataset based on query + ahps_metrics = benchmark_metrics.query(query) + + #Filter out all instances where the base_resolution doesn't + #exist across all desired fim versions for a given magnitude. + all_datasets[(benchmark_source, extent_configuration)] = filter_dataframe(ahps_metrics, base_resolution) + + #If source is 'ble', set base_resolution and append ble dataset + #to all_datasets dictionary + elif benchmark_source == 'ble': + + #Set the base processing unit for ble runs + base_resolution = 'huc' + + #Filter out all instances where base_resolution doesn't exist + #across all desired fim versions for a given magnitude. + all_datasets[(benchmark_source, extent_configuration)] = filter_dataframe(benchmark_metrics, base_resolution) + + #For each dataset in all_datasets, generate plots and aggregate statistics. + for (dataset_name,configuration), (dataset, sites) in all_datasets.items(): + + #Define and create the output workspace as a subfolder within + #the supplied workspace + output_workspace = Path(workspace) / dataset_name / configuration.lower() + output_workspace.mkdir(parents = True, exist_ok = True) + + #Write out the filtered dataset and common sites to file + dataset.to_csv(output_workspace / (f'{dataset_name}_{configuration.lower()}_analyzed_data.csv'), index = False) + sites_pd = pd.DataFrame.from_dict(sites, orient = 'index').transpose() + sites_pd.to_csv(output_workspace / (f'{dataset_name}_{configuration.lower()}_common_sites.csv'), index = False) + + #set the order of the magnitudes and define base resolution. + if dataset_name == 'ble': + magnitude_order = ['100yr', '500yr'] + base_resolution = 'huc' + elif dataset_name in ['usgs','nws']: + magnitude_order = ['action','minor','moderate','major'] + base_resolution = 'nws_lid' + + #Calculate aggregated metrics based on total_sq_km fields. + dataset_sums = dataset.groupby(['version', 'magnitude'])[['TP_area_km2','FP_area_km2','FN_area_km2']].sum() + dataset_sums['csi'] = dataset_sums['TP_area_km2']/(dataset_sums['TP_area_km2'] + dataset_sums['FP_area_km2'] + dataset_sums['FN_area_km2']) + dataset_sums['far'] = dataset_sums['FP_area_km2']/(dataset_sums['TP_area_km2'] + dataset_sums['FP_area_km2']) + dataset_sums['pod'] = dataset_sums['TP_area_km2']/(dataset_sums['TP_area_km2'] + dataset_sums['FN_area_km2']) + dataset_sums = dataset_sums.reset_index() + + #Write aggregated metrics to file. + dataset_sums.to_csv(output_workspace / f'aggregate_{dataset_name}_{configuration.lower()}.csv', index = False ) + + #This section naturally orders analyzed versions which defines + #the hue order for the generated plots. + #Get all versions in dataset + all_versions = list(dataset.version.unique()) + version_order = [] + #If versions are not specified then use all available versions + #and assign to versions_list + if not versions: + versions_list = all_versions + #if versions are supplied assign to versions_list + else: + versions_list = versions + #For each version supplied by the user + for version in versions_list: + #Select all the versions that start with the supplied version. + selected_versions = [sel_version for sel_version in all_versions if sel_version.startswith(version)] + #Naturally sort selected_versions + selected_versions = natsorted(selected_versions) + #Populate version order based on the sorted subsets. + version_order.extend(selected_versions) + + #Define textbox which will contain the counts of each magnitude. + textbox = [] + for magnitude in sites: + count = len(sites[magnitude]) + line_text = f'{magnitude.title()} Sites = {count}' + textbox.append(line_text) + textbox = '\n'.join(textbox) + + #Create aggregate barplot + aggregate_file = output_workspace / (f'csi_aggr_{dataset_name}_{configuration.lower()}.png') + barplot(dataframe = dataset_sums, x_field = 'magnitude', x_order = magnitude_order, y_field = 'csi', hue_field = 'version', ordered_hue = version_order, title_text = f'Aggregate {dataset_name.upper()} FIM Scores', fim_configuration = configuration, textbox_str = textbox, simplify_legend = True, dest_file = aggregate_file) + + #Create box plots for each metric in supplied stats. + for stat in stats: + output_file = output_workspace / (f'{stat.lower()}_{dataset_name}_{configuration.lower()}.png') + boxplot(dataframe = dataset, x_field = 'magnitude', x_order = magnitude_order, y_field = stat, hue_field = 'version', ordered_hue = version_order, title_text = f'{dataset_name.upper()} FIM Sites', fim_configuration = configuration, textbox_str = textbox, simplify_legend = True, dest_file = output_file) + + #Get the last 2 versions from the version order for scatter plot. + if len(version_order) == 2: + x_version, y_version = version_order + for magnitude in magnitude_order: + #Scatterplot comparison between last 2 versions. + x_csi = dataset.query(f'version == "{x_version}" & magnitude == "{magnitude}"')[[base_resolution, 'CSI']] + y_csi = dataset.query(f'version == "{y_version}" & magnitude == "{magnitude}"')[[base_resolution, 'CSI']] + plotdf = pd.merge(x_csi, y_csi, on = base_resolution, suffixes = (f"_{x_version}",f"_{y_version}")) + #Define arguments for scatterplot function. + title_text = f'CSI {magnitude}' + dest_file = output_workspace / f'csi_scatter_{magnitude}_{configuration.lower()}.png' + scatterplot(dataframe = plotdf, x_field = f'CSI_{x_version}', y_field = f'CSI_{y_version}', title_text = title_text, annotate = False, dest_file = dest_file) + + + ####################################################################### + #Create spatial layers with threshold and mapping information + ######################################################################## + if spatial_ahps: + + #Read in supplied shapefile layers + #Layer containing metadata for each site (feature_id, wfo, etc). + #Convert nws_lid to lower case. + ahps_metadata = gpd.read_file(spatial_ahps['metadata']) + ahps_metadata['nws_lid'] = ahps_metadata['nws_lid'].str.lower() + metadata_crs = ahps_metadata.crs + + #Extent layer generated from preprocessing NWS/USGS datasets + evaluated_ahps_extent = gpd.read_file(spatial_ahps['evaluated']) + + #Extent layer generated from static ahps library preprocessing + static_library = gpd.read_file(spatial_ahps['static']) + + #Fields to keep + #Get list of fields to keep in merge + preserved_static_library_fields = ['nws_lid'] + [i for i in static_library.columns if i.startswith(('Q','S'))] + #Get list of fields to keep in merge. + preserved_evaluated_ahps_fields = ['nws_lid', 'source', 'geometry'] + [i for i in evaluated_ahps_extent.columns if i.startswith(('action','minor','moderate','major'))] + + #Join tables to evaluated_ahps_extent + evaluated_ahps_extent = evaluated_ahps_extent[preserved_evaluated_ahps_fields] + evaluated_ahps_extent = evaluated_ahps_extent.merge(ahps_metadata, on = 'nws_lid') + evaluated_ahps_extent['geometry'] = evaluated_ahps_extent['geometry_y'] + evaluated_ahps_extent.drop(columns = ['geometry_y','geometry_x'], inplace = True) + evaluated_ahps_extent = evaluated_ahps_extent.merge(static_library[preserved_static_library_fields], on = 'nws_lid') + + #Join dataset metrics to evaluated_ahps_extent data. + final_join = pd.DataFrame() + for (dataset_name, configuration), (dataset, sites) in all_datasets.items(): + #Only select ahps from dataset if config is MS + if dataset_name in ['usgs','nws'] and configuration == 'MS': + #Select records from evaluated_ahps_extent that match the dataset name + subset = evaluated_ahps_extent.query(f'source == "{dataset_name}"') + #Join to dataset + dataset_with_subset = dataset.merge(subset, on = 'nws_lid') + #Append rows to final_join dataframe + final_join = final_join.append(dataset_with_subset) + + #Modify version field + final_join['version'] = final_join.version.str.split('_nws|_usgs').str[0] + + #Write geodataframe to file + gdf = gpd.GeoDataFrame(final_join, geometry = final_join['geometry'], crs = metadata_crs) + output_shapefile = Path(workspace) / 'nws_usgs_site_info.shp' + gdf.to_file(output_shapefile) + + + +####################################################################### +if __name__ == '__main__': + #Parse arguments + parser = argparse.ArgumentParser(description = 'Plot and aggregate statistics for benchmark datasets (BLE/AHPS libraries)') + parser.add_argument('-m','--metrics_csv', help = 'Metrics csv created from synthesize test cases.', required = True) + parser.add_argument('-w', '--workspace', help = 'Output workspace', required = True) + parser.add_argument('-v', '--versions', help = 'List of versions to be plotted/aggregated. Versions are filtered using the "startswith" approach. For example, ["fim_","fb1"] would retain all versions that began with "fim_" (e.g. fim_1..., fim_2..., fim_3...) as well as any feature branch that began with "fb". An other example ["fim_3","fb"] would result in all fim_3 versions being plotted along with the fb.', nargs = '+', default = []) + parser.add_argument('-s', '--stats', help = 'List of statistics (abbrev to 3 letters) to be plotted/aggregated', nargs = '+', default = ['CSI','TPR','FAR'], required = False) + parser.add_argument('-q', '--alternate_ahps_query',help = 'Alternate filter query for AHPS. Default is: "not nws_lid.isnull() & not flow.isnull() & masked_perc<97 & not nws_lid in @bad_sites" where bad_sites are (grfi2,ksdm7,hohn4,rwdn4)', default = False, required = False) + parser.add_argument('-sp', '--spatial_ahps', help = 'If spatial point layer is desired, supply a csv with 3 lines of the following format: metadata, path/to/metadata/shapefile\nevaluated, path/to/evaluated/shapefile\nstatic, path/to/static/shapefile.', default = False, required = False) + parser.add_argument('-f', '--fim_1_ms', help = 'If enabled fim_1 rows will be duplicated and extent config assigned "ms" so that fim_1 can be shown on mainstems plots/stats', action = 'store_true', required = False) + + #Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + + #If errors occur reassign error to True + error = False + #Create dictionary if file specified for spatial_ahps + if args['spatial_ahps']: + #Create dictionary + spatial_dict = {} + with open(args['spatial_ahps']) as file: + for line in file: + key, value = line.strip('\n').split(',') + spatial_dict[key] = Path(value) + args['spatial_ahps'] = spatial_dict + #Check that all required keys are present and overwrite args with spatial_dict + required_keys = set(['metadata', 'evaluated', 'static']) + if required_keys - spatial_dict.keys(): + print('\n Required keys are: metadata, evaluated, static') + error = True + else: + args['spatial_ahps'] = spatial_dict + + + #Finalize Variables + m = args['metrics_csv'] + w = args['workspace'] + v = args['versions'] + s = args['stats'] + q = args['alternate_ahps_query'] + sp= args['spatial_ahps'] + f = args['fim_1_ms'] + + #Run eval_plots function + if not error: + eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, alternate_ahps_query = q, spatial_ahps = sp, fim_1_ms = f) \ No newline at end of file diff --git a/tests/plots/utils/__init__.py b/tests/plots/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/plots/utils/shared_functions.py b/tests/plots/utils/shared_functions.py new file mode 100644 index 000000000..60342059e --- /dev/null +++ b/tests/plots/utils/shared_functions.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python3 +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +import re +######################################################################### +#Create boxplot +######################################################################### +def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, dest_file = False): + ''' + Create boxplots. + + Parameters + ---------- + dataframe : DataFrame + Pandas dataframe data to be plotted. + x_field : STR + Field to use for x-axis + x_order : List + Order to arrange the x-axis. + y_field : STR + Field to use for the y-axis + hue_field : STR + Field to use for hue (typically FIM version) + title_text : STR + Text for plot title. + fim_configuration: STR + Configuration of FIM (FR or MS or Composite). + simplify_legend : BOOL, optional + If True, it will simplify legend to FIM 1, FIM 2, FIM 3. + The default is False. + dest_file : STR or BOOL, optional + If STR provide the full path to the figure to be saved. If False + no plot is saved to disk. The default is False. + + Returns + ------- + fig : MATPLOTLIB + Plot. + + ''' + + #initialize plot + fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) + #Use seaborn to plot the boxplot + axes=sns.boxplot(x=x_field, y=y_field, order=x_order, hue=hue_field, hue_order = ordered_hue, data=dataframe, palette='bright') + #set title of plot + axes.set_title(f'{title_text} ({y_field})',fontsize=20, weight = 'bold') + #Set yticks and background horizontal line. + axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1)) + for index,ytick in enumerate(axes.get_yticks()): + plt.axhline(y=ytick,color='black',linestyle = '--',linewidth = 1,alpha = 0.1) + #Define y axis label and x axis label. + axes.set_ylabel(f'{y_field}',fontsize='xx-large',weight = 'bold') + axes.set_xlabel('',fontsize=0,weight = 'bold') + #Set sizes of ticks and legend. + axes.tick_params(labelsize = 'xx-large') + axes.legend(markerscale = 2, fontsize =20, loc = 'lower left') + + #If simple legend desired + if simplify_legend: + #trim labels to FIM 1, FIM 2, and the FIM 3 version + handles, org_labels = axes.get_legend_handles_labels() + label_dict = {} + for label in org_labels: + if 'fim_1' in label: + label_dict[label] = 'FIM 1' + elif 'fim_2' in label: + label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower() + elif 'fim_3' in label: + label_dict[label] = re.split('_fr|_ms', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower() + if label.endswith('_c'): + label_dict[label] = label_dict[label] + ' c' + else: + label_dict[label] = label + ' ' + fim_configuration.lower() + #Define simplified labels as a list. + new_labels = [label_dict[label] for label in org_labels] + #Define legend location. FAR needs to be in different location than CSI/POD. + if y_field == 'FAR': + legend_location = 'upper right' + else: + legend_location = 'lower left' + #rename legend labels to the simplified labels. + axes.legend(handles, new_labels, markerscale = 2, fontsize = 20, loc = legend_location, ncol = int(np.ceil(len(new_labels)/7))) + #Print textbox if supplied + if textbox_str: + box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) + axes.text(0.01, 0.99, textbox_str, transform=axes.transAxes, fontsize=14, verticalalignment='top', bbox=box_props) + + #If figure to be saved to disk, then do so, otherwise return figure + if dest_file: + fig.savefig(dest_file) + plt.close(fig) + else: + return fig + +######################################################################### +#Create scatter plot +######################################################################### +def scatterplot(dataframe, x_field, y_field, title_text, stats_text=False, annotate = False, dest_file = False): + ''' + Create boxplots. + + Parameters + ---------- + dataframe : DataFrame + Pandas dataframe data to be plotted. + x_field : STR + Field to use for x-axis (Assumes FIM 2) + y_field : STR + Field to use for the y-axis (Assumes FIM 3) + title_text : STR + Text for plot title. + stats_text : STR or BOOL + Text for stats to place on chart. Default is false (no stats printed) + dest_file : STR or BOOL, optional + If STR provide the full path to the figure to be saved. If False + no plot is saved to disk. The default is False. + + Returnsy + ------- + fig : MATPLOTLIB + Plot. + + ''' + + #initialize plot + fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) + + #Use seaborn to plot the boxplot + axes=sns.scatterplot(data=dataframe, x=x_field, y=y_field, color = 'black', s = 150) + + #Set xticks and yticks and background horizontal line. + axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1)) + axes.set(xlim=(0.0,1.0),xticks = np.arange(0,1.1,0.1)) + axes.grid(b=True, which='major', axis='both') + + #Set sizes of ticks and legend. + axes.tick_params(labelsize = 'xx-large') + + #Define y axis label and x axis label. + axes.set_ylabel(f'{y_field.replace("_"," ")}',fontsize='xx-large',weight = 'bold') + axes.set_xlabel(f'{x_field.replace("_"," ")}',fontsize='xx-large',weight = 'bold') + + #Plot diagonal line + diag_range = [0,1] + axes.plot(diag_range, diag_range, color='gray', transform=axes.transAxes) + + + #set title of plot + axes.set_title(f'{title_text}',fontsize=20, weight = 'bold') + + if annotate: + #Set text for labels + box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) + textbox_str = 'Target Better' + axes.text(0.3, 0.6, textbox_str, transform=axes.transAxes, fontsize=32, color = 'gray', fontweight = 'bold', verticalalignment='top', bbox=box_props, rotation = 35, rotation_mode = 'anchor') + textbox_str = 'Baseline Better' + axes.text(0.5, 0.2, textbox_str, transform=axes.transAxes, fontsize=32, color = 'gray', fontweight = 'bold', verticalalignment='top', bbox=box_props, rotation = 35, rotation_mode = 'anchor') + + if stats_text: + #Add statistics textbox + axes.text(0.01, 0.80, stats_text, transform=axes.transAxes, fontsize=24, verticalalignment='top', bbox=box_props) + + #If figure to be saved to disk, then do so, otherwise return fig + if dest_file: + fig.savefig(dest_file) + plt.close(fig) + else: + return fig +######################################################################### +#Create barplot +######################################################################### +def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, display_values = False, dest_file = False): + ''' + Create barplots. + + Parameters + ---------- + dataframe : DataFrame + Pandas dataframe data to be plotted. + x_field : STR + Field to use for x-axis + x_order : List + Order to arrange the x-axis. + y_field : STR + Field to use for the y-axis + hue_field : STR + Field to use for hue (typically FIM version) + title_text : STR + Text for plot title. + fim_configuration: STR + Configuration of FIM (FR or MS or Composite). + simplify_legend : BOOL, optional + If True, it will simplify legend to FIM 1, FIM 2, FIM 3. + Default is False. + display_values : BOOL, optional + If True, Y values will be displayed above bars. + Default is False. + dest_file : STR or BOOL, optional + If STR provide the full path to the figure to be saved. If False + no plot is saved to disk. Default is False. + + Returns + ------- + fig : MATPLOTLIB + Plot. + + ''' + + #initialize plot + fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) + #Use seaborn to plot the boxplot + axes=sns.barplot(x=x_field, y=y_field, order=x_order, hue=hue_field, hue_order = ordered_hue, data=dataframe, palette='bright') + #set title of plot + axes.set_title(f'{title_text}',fontsize=20, weight = 'bold') + #Set yticks and background horizontal line. + axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1)) + for index,ytick in enumerate(axes.get_yticks()): + plt.axhline(y=ytick,color='black',linestyle = '--',linewidth = 1,alpha = 0.1) + #Define y axis label and x axis label. + axes.set_ylabel(f'{y_field.upper()}',fontsize='xx-large',weight = 'bold') + axes.set_xlabel('',fontsize=0,weight = 'bold') + #Set sizes of ticks and legend. + axes.tick_params(labelsize = 'xx-large') + axes.legend(markerscale = 2, fontsize =20, loc = 'upper right') + #If simple legend desired + if simplify_legend: + #trim labels to FIM 1, FIM 2, FIM 3 + handles, org_labels = axes.get_legend_handles_labels() + label_dict = {} + for label in org_labels: + if 'fim_1' in label: + label_dict[label] = 'FIM 1' + elif 'fim_2' in label: + label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower() + elif 'fim_3' in label: + label_dict[label] = re.split('_fr|_ms', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower() + if label.endswith('_c'): + label_dict[label] = label_dict[label] + ' c' + else: + label_dict[label] = label + ' ' + fim_configuration.lower() + #Define simplified labels as a list. + new_labels = [label_dict[label] for label in org_labels] + #rename legend labels to the simplified labels. + axes.legend(handles, new_labels, markerscale = 2, fontsize = 20, loc = 'upper right', ncol = int(np.ceil(len(new_labels)/7))) + #Add Textbox + if textbox_str: + box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) + axes.text(0.01, 0.99, textbox_str, transform=axes.transAxes, fontsize=18, verticalalignment='top', bbox=box_props) + + #Display Y values above bars + if display_values: + #Add values of bars directly above bar. + for patch in axes.patches: + value = round(patch.get_height(),3) + axes.text(patch.get_x()+patch.get_width()/2., + patch.get_height(), + '{:1.3f}'.format(value), + ha="center", fontsize=18) + + #If figure to be saved to disk, then do so, otherwise return fig + if dest_file: + fig.savefig(dest_file) + plt.close(fig) + else: + return fig +####################################################################### +#Filter dataframe generated from csv file from run_test_case aggregation +######################################################################## +def filter_dataframe(dataframe, unique_field): + ''' + + This script will filter out the sites (or hucs) which are not consistently + found for all versions for a given magnitude. For example, an AHPS + lid site must have output for all 3 versions (fim1, fim2, fim3) for + a given magnitude (eg action) otherwise that lid is filtered out. + Likewise for a BLE a huc must have output for all 3 versions + (fim1, fim2, fim3) for a given magnitude (eg 100yr) otherwise it is + filtered out. + + Parameters + ---------- + dataframe : Pandas DataFrame + Containing the input metrics originating from synthesize_test_cases + unique_field : STR + base resolution for each benchmark source: 'nws'/'usgs' (nws_lid) + ble (huc). + + Returns + ------- + final_filtered_dataframe : Pandas Dataframe + Filtered dataframe that contains only common sites (lids or hucs) between versions for each magnitude. For example, for AHPS all sites which were run for each version for a given magnitude will be kept or for ble, all hucs which ran for all versions for a given magnitude. + unique_sites: DICT + The sites that were included in the dataframe for each magnitude. + + ''' + + #Get lists of sites for each magnitude/version + unique_sites = dataframe.groupby(['magnitude','version'])[unique_field].agg('unique') + #Get unique magnitudes + magnitudes = dataframe.magnitude.unique() + #Create new dataframe to hold metrics for the common sites as well as the actual lists of common sites. + final_filtered_dataframe = pd.DataFrame() + all_unique_sites = {} + #Cycle through each magnitude + for magnitude in magnitudes: + #Compile a list of sets containing unique lids pertaining to each threshold. List contains 3 unique sets [{fim1:unique lids},{fim2: unique lids},{fim3: unique lids}] + sites_per_magnitude=[set(a) for a in unique_sites[magnitude]] + #Intersect the sets to get the common lids per threshold then convert to list. + common_sites_per_magnitude = list(set.intersection(*sites_per_magnitude)) + #Write common sites to dataframe + all_unique_sites[magnitude] = common_sites_per_magnitude + #Query filtered dataframe and only include data associated with the common sites for that magnitude + filtered_common_sites = dataframe.query(f'magnitude == "{magnitude}" & {unique_field} in @common_sites_per_magnitude') + #Append the data for each magnitude to a final dataframe that will contain data for all common sites for all magnitudes. + final_filtered_dataframe = final_filtered_dataframe.append(filtered_common_sites, ignore_index = True) + + return final_filtered_dataframe, all_unique_sites + From 71a57b76a806a20f059ad4a9e84a2d6ee329c167 Mon Sep 17 00:00:00 2001 From: NickChadwick-NOAA Date: Mon, 1 Mar 2021 14:19:09 -0600 Subject: [PATCH 046/359] Restructured the repository Restructured the repository. This has no impact on hydrological code and simply moves files and renaming directories. - Moved the contents of the lib folder to a new folder called src. - Moved the contents of the tests folder to the tools folder. - Changed any instance of lib or libDir to src or srcDir This resolves #283. --- .gitignore | 2 ++ CHANGELOG.md | 11 ++++++++ Dockerfile.dev | 4 +-- Dockerfile.prod | 4 +-- README.md | 6 ++-- INSTALL.md => docs/INSTALL.md | 0 fim_run.sh | 14 +++++----- lib/time_and_tee_run_by_unit.sh | 5 ---- {lib => src}/__init__.py | 0 {lib => src}/acquire_and_preprocess_inputs.py | 0 {lib => src}/add_crosswalk.py | 0 {lib => src}/adjust_headwater_streams.py | 0 {lib => src}/adjust_thalweg_lateral.py | 0 {lib => src}/aggregate_fim_outputs.py | 0 {lib => src}/aggregate_fim_outputs.sh | 0 {lib => src}/aggregate_vector_inputs.py | 0 {lib => src}/agreedem.py | 0 {lib => src}/bash_functions.env | 0 {lib => src}/build_stream_traversal.py | 0 {lib => src}/check_huc_inputs.py | 0 {lib => src}/clip_vectors_to_wbd.py | 0 {lib => src}/derive_headwaters.py | 0 {lib => src}/entrypoint.sh | 0 .../filter_catchments_and_add_attributes.py | 0 {lib => src}/fr_to_ms_raster_mask.py | 0 {lib => src}/getRasterInfoNative.py | 0 {lib => src}/get_all_huc_in_inputs.py | 0 {lib => src}/make_stages_and_catchlist.py | 0 {lib => src}/output_cleanup.py | 0 {lib => src}/r_grow_distance.py | 0 {lib => src}/raster.py | 0 {lib => src}/reachID_grid_to_vector_points.py | 0 {lib => src}/reduce_nhd_stream_density.py | 0 {lib => src}/rem.py | 0 {lib => src}/run_by_unit.sh | 28 +++++++++---------- {lib => src}/split_flows.py | 0 src/time_and_tee_run_by_unit.sh | 5 ++++ {lib => src}/unique_pixel_and_allocation.py | 0 {lib => src}/utils/__init__.py | 0 {lib => src}/utils/archive_cleanup.py | 0 {lib => src}/utils/shared_functions.py | 0 {lib => src}/utils/shared_variables.py | 0 {tests => tools}/__init__.py | 0 .../aggregate_mannings_calibration.py | 0 {tests => tools}/aggregate_metrics.py | 0 {tests => tools}/cache_metrics.py | 0 {tests => tools}/comparing_src.py | 0 {tests => tools}/inundation.py | 0 {tests => tools}/mannings_calibration_run.sh | 0 {tests => tools}/mannings_run_by_set.sh | 2 +- {tests => tools}/plots/eval_plots.py | 0 {tests => tools}/plots/utils/__init__.py | 0 .../plots/utils/shared_functions.py | 0 .../preprocess/create_flow_forecast_file.py | 2 ++ .../preprocess/preprocess_benchmark.py | 2 ++ .../preprocess/preprocess_fimx.py | 0 {tests => tools}/run_test_case.py | 0 {tests => tools}/run_test_case_calibration.py | 0 {tests => tools}/synthesize_test_cases.py | 0 .../time_and_tee_mannings_calibration.sh | 0 {tests => tools}/utils/__init__.py | 0 {tests => tools}/utils/shapefile_to_raster.py | 0 {tests => tools}/utils/shared_functions.py | 0 {tests => tools}/utils/shared_variables.py | 0 64 files changed, 51 insertions(+), 34 deletions(-) rename INSTALL.md => docs/INSTALL.md (100%) delete mode 100755 lib/time_and_tee_run_by_unit.sh rename {lib => src}/__init__.py (100%) rename {lib => src}/acquire_and_preprocess_inputs.py (100%) rename {lib => src}/add_crosswalk.py (100%) rename {lib => src}/adjust_headwater_streams.py (100%) rename {lib => src}/adjust_thalweg_lateral.py (100%) rename {lib => src}/aggregate_fim_outputs.py (100%) rename {lib => src}/aggregate_fim_outputs.sh (100%) rename {lib => src}/aggregate_vector_inputs.py (100%) rename {lib => src}/agreedem.py (100%) rename {lib => src}/bash_functions.env (100%) rename {lib => src}/build_stream_traversal.py (100%) rename {lib => src}/check_huc_inputs.py (100%) rename {lib => src}/clip_vectors_to_wbd.py (100%) rename {lib => src}/derive_headwaters.py (100%) rename {lib => src}/entrypoint.sh (100%) rename {lib => src}/filter_catchments_and_add_attributes.py (100%) rename {lib => src}/fr_to_ms_raster_mask.py (100%) rename {lib => src}/getRasterInfoNative.py (100%) rename {lib => src}/get_all_huc_in_inputs.py (100%) rename {lib => src}/make_stages_and_catchlist.py (100%) rename {lib => src}/output_cleanup.py (100%) rename {lib => src}/r_grow_distance.py (100%) rename {lib => src}/raster.py (100%) rename {lib => src}/reachID_grid_to_vector_points.py (100%) rename {lib => src}/reduce_nhd_stream_density.py (100%) rename {lib => src}/rem.py (100%) rename {lib => src}/run_by_unit.sh (95%) rename {lib => src}/split_flows.py (100%) create mode 100755 src/time_and_tee_run_by_unit.sh rename {lib => src}/unique_pixel_and_allocation.py (100%) rename {lib => src}/utils/__init__.py (100%) rename {lib => src}/utils/archive_cleanup.py (100%) rename {lib => src}/utils/shared_functions.py (100%) rename {lib => src}/utils/shared_variables.py (100%) rename {tests => tools}/__init__.py (100%) rename {tests => tools}/aggregate_mannings_calibration.py (100%) rename {tests => tools}/aggregate_metrics.py (100%) rename {tests => tools}/cache_metrics.py (100%) rename {tests => tools}/comparing_src.py (100%) rename {tests => tools}/inundation.py (100%) rename {tests => tools}/mannings_calibration_run.sh (100%) rename {tests => tools}/mannings_run_by_set.sh (93%) rename {tests => tools}/plots/eval_plots.py (100%) rename {tests => tools}/plots/utils/__init__.py (100%) rename {tests => tools}/plots/utils/shared_functions.py (100%) rename {tests => tools}/preprocess/create_flow_forecast_file.py (99%) rename {tests => tools}/preprocess/preprocess_benchmark.py (99%) rename {tests => tools}/preprocess/preprocess_fimx.py (100%) rename {tests => tools}/run_test_case.py (100%) rename {tests => tools}/run_test_case_calibration.py (100%) rename {tests => tools}/synthesize_test_cases.py (100%) rename {tests => tools}/time_and_tee_mannings_calibration.sh (100%) rename {tests => tools}/utils/__init__.py (100%) rename {tests => tools}/utils/shapefile_to_raster.py (100%) rename {tests => tools}/utils/shared_functions.py (100%) rename {tests => tools}/utils/shared_variables.py (100%) diff --git a/.gitignore b/.gitignore index ec0085323..54bcfb6a6 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ config/** !config/*calibrated* !config/symbology/ .vscode/ +**/.DS_Store +**/*_pytest.py \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e3d7b31f..64bc07129 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,17 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

+ +## v3.0.7.0 - 2021-03-01 - [PR #288](https://github.com/NOAA-OWP/cahaba/pull/288) + +Restructured the repository. This has no impact on hydrological work done in the codebase and is simply moving files and renaming directories. + +### Changes + - Moved the contents of the `lib` folder to a new folder called `src`. + - Moved the contents of the `tests` folder to the `tools` folder. + - Changed any instance of `lib` or `libDir` to `src` or `srcDir`. + +

## v3.0.6.0 - 2021-02-25 - [PR #276](https://github.com/NOAA-OWP/cahaba/pull/276) Enhancement that creates metric plots and summary statistics using metrics compiled by `synthesize_test_cases.py`. diff --git a/Dockerfile.dev b/Dockerfile.dev index 626ef94ef..dea61c348 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -54,7 +54,7 @@ ARG projectDir=/foss_fim ARG depDir=/dependencies ENV inputDataDir=$dataDir/inputs ENV outputDataDir=$dataDir/outputs -ENV libDir=$projectDir/lib +ENV srcDir=$projectDir/src ENV taudemDir=$depDir/taudem/bin ENV taudemDir2=$depDir/taudem_accelerated_flowDirections/taudem/build/bin @@ -92,5 +92,5 @@ COPY Pipfile.lock . RUN pip3 install pipenv && PIP_NO_CACHE_DIR=off PIP_NO_BINARY=shapely,pygeos pipenv install --system --deploy --ignore-pipfile ## RUN UMASK TO CHANGE DEFAULT PERMISSIONS ## -ADD ./lib/entrypoint.sh / +ADD ./src/entrypoint.sh / ENTRYPOINT ["/bin/bash", "/entrypoint.sh"] diff --git a/Dockerfile.prod b/Dockerfile.prod index 79c50bdaf..23d0980ec 100644 --- a/Dockerfile.prod +++ b/Dockerfile.prod @@ -54,7 +54,7 @@ ARG projectDir=/foss_fim ARG depDir=/dependencies ENV inputDataDir=$dataDir/inputs ENV outputDataDir=$dataDir/outputs -ENV libDir=$projectDir/lib +ENV srcDir=$projectDir/src ENV taudemDir=$depDir/taudem/bin ENV taudemDir2=$depDir/taudem_accelerated_flowDirections/taudem/build/bin @@ -95,6 +95,6 @@ COPY . $projectDir/ USER root:$GroupName # RUN UMASK TO CHANGE DEFAULT PERMISSIONS ## -ADD ./lib/entrypoint.sh / +ADD ./src/entrypoint.sh / ENTRYPOINT ["/bin/bash", "/entrypoint.sh"] diff --git a/README.md b/README.md index cb091529a..311c29cde 100644 --- a/README.md +++ b/README.md @@ -62,15 +62,15 @@ docker run --rm -it -v :/data -v :/foss_fim +/foss_fim/src/acquire_and_preprocess_inputs.py -u ``` - `-u` can be a single HUC4, series of HUC4s (e.g. 1209 1210), path to line-delimited file with HUC4s. -- Please run `/foss_fim/lib/acquire_and_preprocess_inputs.py --help` for more information. +- Please run `/foss_fim/src/acquire_and_preprocess_inputs.py --help` for more information. - See United States Geological Survey (USGS) National Hydrography Dataset Plus High Resolution (NHDPlusHR) [site](https://www.usgs.gov/core-science-systems/ngp/national-hydrography/nhdplus-high-resolution) for more information ### Aggregate NHDHR Streams and Create NWM Headwater Points ``` -/foss_fim/lib/aggregate_vector_inputs.py +/foss_fim/src/aggregate_vector_inputs.py ``` ### Produce Hydrofabric ``` diff --git a/INSTALL.md b/docs/INSTALL.md similarity index 100% rename from INSTALL.md rename to docs/INSTALL.md diff --git a/fim_run.sh b/fim_run.sh index 8c6862dae..5acdeff71 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -94,7 +94,7 @@ fi ## SOURCE ENV FILE AND FUNCTIONS ## source $envFile -source $libDir/bash_functions.env +source $srcDir/bash_functions.env # default values if [ "$jobLimit" = "" ] ; then @@ -123,7 +123,7 @@ export input_nhd_headwaters_fr=$inputDataDir/nhdplus_vectors_aggregate/nhd_headw export input_nhd_headwaters_ms=$inputDataDir/nhdplus_vectors_aggregate/nhd_headwaters_adjusted_ms.gpkg ## Input handling ## -$libDir/check_huc_inputs.py -u "$hucList" +$srcDir/check_huc_inputs.py -u "$hucList" ## Make output and data directories ## if [ -d "$outputRunDataDir" ] && [ "$overwrite" -eq 1 ]; then @@ -137,20 +137,20 @@ mkdir -p $outputRunDataDir/logs ## RUN ## if [ -f "$hucList" ]; then if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit --joblog $logFile -- $libDir/time_and_tee_run_by_unit.sh :::: $hucList + parallel --verbose --lb -j $jobLimit --joblog $logFile -- $srcDir/time_and_tee_run_by_unit.sh :::: $hucList else - parallel --eta -j $jobLimit --joblog $logFile -- $libDir/time_and_tee_run_by_unit.sh :::: $hucList + parallel --eta -j $jobLimit --joblog $logFile -- $srcDir/time_and_tee_run_by_unit.sh :::: $hucList fi else if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit --joblog $logFile -- $libDir/time_and_tee_run_by_unit.sh ::: $hucList + parallel --verbose --lb -j $jobLimit --joblog $logFile -- $srcDir/time_and_tee_run_by_unit.sh ::: $hucList else - parallel --eta -j $jobLimit --joblog $logFile -- $libDir/time_and_tee_run_by_unit.sh ::: $hucList + parallel --eta -j $jobLimit --joblog $logFile -- $srcDir/time_and_tee_run_by_unit.sh ::: $hucList fi fi echo "$viz" if [[ "$viz" -eq 1 ]]; then # aggregate outputs - python3 /foss_fim/lib/aggregate_fim_outputs.py -d $outputRunDataDir + python3 /foss_fim/src/aggregate_fim_outputs.py -d $outputRunDataDir fi diff --git a/lib/time_and_tee_run_by_unit.sh b/lib/time_and_tee_run_by_unit.sh deleted file mode 100755 index c1eca97fc..000000000 --- a/lib/time_and_tee_run_by_unit.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -e - -/usr/bin/time -v $libDir/run_by_unit.sh $1 |& tee $outputRunDataDir/logs/$1.log -exit ${PIPESTATUS[0]} - diff --git a/lib/__init__.py b/src/__init__.py similarity index 100% rename from lib/__init__.py rename to src/__init__.py diff --git a/lib/acquire_and_preprocess_inputs.py b/src/acquire_and_preprocess_inputs.py similarity index 100% rename from lib/acquire_and_preprocess_inputs.py rename to src/acquire_and_preprocess_inputs.py diff --git a/lib/add_crosswalk.py b/src/add_crosswalk.py similarity index 100% rename from lib/add_crosswalk.py rename to src/add_crosswalk.py diff --git a/lib/adjust_headwater_streams.py b/src/adjust_headwater_streams.py similarity index 100% rename from lib/adjust_headwater_streams.py rename to src/adjust_headwater_streams.py diff --git a/lib/adjust_thalweg_lateral.py b/src/adjust_thalweg_lateral.py similarity index 100% rename from lib/adjust_thalweg_lateral.py rename to src/adjust_thalweg_lateral.py diff --git a/lib/aggregate_fim_outputs.py b/src/aggregate_fim_outputs.py similarity index 100% rename from lib/aggregate_fim_outputs.py rename to src/aggregate_fim_outputs.py diff --git a/lib/aggregate_fim_outputs.sh b/src/aggregate_fim_outputs.sh similarity index 100% rename from lib/aggregate_fim_outputs.sh rename to src/aggregate_fim_outputs.sh diff --git a/lib/aggregate_vector_inputs.py b/src/aggregate_vector_inputs.py similarity index 100% rename from lib/aggregate_vector_inputs.py rename to src/aggregate_vector_inputs.py diff --git a/lib/agreedem.py b/src/agreedem.py similarity index 100% rename from lib/agreedem.py rename to src/agreedem.py diff --git a/lib/bash_functions.env b/src/bash_functions.env similarity index 100% rename from lib/bash_functions.env rename to src/bash_functions.env diff --git a/lib/build_stream_traversal.py b/src/build_stream_traversal.py similarity index 100% rename from lib/build_stream_traversal.py rename to src/build_stream_traversal.py diff --git a/lib/check_huc_inputs.py b/src/check_huc_inputs.py similarity index 100% rename from lib/check_huc_inputs.py rename to src/check_huc_inputs.py diff --git a/lib/clip_vectors_to_wbd.py b/src/clip_vectors_to_wbd.py similarity index 100% rename from lib/clip_vectors_to_wbd.py rename to src/clip_vectors_to_wbd.py diff --git a/lib/derive_headwaters.py b/src/derive_headwaters.py similarity index 100% rename from lib/derive_headwaters.py rename to src/derive_headwaters.py diff --git a/lib/entrypoint.sh b/src/entrypoint.sh similarity index 100% rename from lib/entrypoint.sh rename to src/entrypoint.sh diff --git a/lib/filter_catchments_and_add_attributes.py b/src/filter_catchments_and_add_attributes.py similarity index 100% rename from lib/filter_catchments_and_add_attributes.py rename to src/filter_catchments_and_add_attributes.py diff --git a/lib/fr_to_ms_raster_mask.py b/src/fr_to_ms_raster_mask.py similarity index 100% rename from lib/fr_to_ms_raster_mask.py rename to src/fr_to_ms_raster_mask.py diff --git a/lib/getRasterInfoNative.py b/src/getRasterInfoNative.py similarity index 100% rename from lib/getRasterInfoNative.py rename to src/getRasterInfoNative.py diff --git a/lib/get_all_huc_in_inputs.py b/src/get_all_huc_in_inputs.py similarity index 100% rename from lib/get_all_huc_in_inputs.py rename to src/get_all_huc_in_inputs.py diff --git a/lib/make_stages_and_catchlist.py b/src/make_stages_and_catchlist.py similarity index 100% rename from lib/make_stages_and_catchlist.py rename to src/make_stages_and_catchlist.py diff --git a/lib/output_cleanup.py b/src/output_cleanup.py similarity index 100% rename from lib/output_cleanup.py rename to src/output_cleanup.py diff --git a/lib/r_grow_distance.py b/src/r_grow_distance.py similarity index 100% rename from lib/r_grow_distance.py rename to src/r_grow_distance.py diff --git a/lib/raster.py b/src/raster.py similarity index 100% rename from lib/raster.py rename to src/raster.py diff --git a/lib/reachID_grid_to_vector_points.py b/src/reachID_grid_to_vector_points.py similarity index 100% rename from lib/reachID_grid_to_vector_points.py rename to src/reachID_grid_to_vector_points.py diff --git a/lib/reduce_nhd_stream_density.py b/src/reduce_nhd_stream_density.py similarity index 100% rename from lib/reduce_nhd_stream_density.py rename to src/reduce_nhd_stream_density.py diff --git a/lib/rem.py b/src/rem.py similarity index 100% rename from lib/rem.py rename to src/rem.py diff --git a/lib/run_by_unit.sh b/src/run_by_unit.sh similarity index 95% rename from lib/run_by_unit.sh rename to src/run_by_unit.sh index 2ba820a32..68866729d 100755 --- a/lib/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -77,7 +77,7 @@ echo -e $startDiv"Get Vector Layers and Subset $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg ] && \ -$libDir/clip_vectors_to_wbd.py -d $hucNumber -w $input_NWM_Flows -s $input_nhd_flowlines -l $input_NWM_Lakes -r $input_NLD -g $outputHucDataDir/wbd.gpkg -f $outputHucDataDir/wbd_buffered.gpkg -m $input_NWM_Catchments -y $input_nhd_headwaters -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -x $outputHucDataDir/LandSea_subset.gpkg +$srcDir/clip_vectors_to_wbd.py -d $hucNumber -w $input_NWM_Flows -s $input_nhd_flowlines -l $input_NWM_Lakes -r $input_NLD -g $outputHucDataDir/wbd.gpkg -f $outputHucDataDir/wbd_buffered.gpkg -m $input_NWM_Catchments -y $input_nhd_headwaters -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -x $outputHucDataDir/LandSea_subset.gpkg Tcount if [ "$extent" = "MS" ]; then @@ -107,7 +107,7 @@ Tcount echo -e $startDiv"Get DEM Metadata $hucNumber"$stopDiv date -u Tstart -read fsize ncols nrows ndv xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($libDir/getRasterInfoNative.py $outputHucDataDir/dem.tif) +read fsize ncols nrows ndv xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($srcDir/getRasterInfoNative.py $outputHucDataDir/dem.tif) ## RASTERIZE NLD MULTILINES ## echo -e $startDiv"Rasterize all NLD multilines using zelev vertices"$stopDiv @@ -167,7 +167,7 @@ echo -e $startDiv"Creating AGREE DEM using $agree_DEM_buffer meter buffer"$stopD date -u Tstart [ ! -f $outputHucDataDir/dem_burned.tif ] && \ -$libDir/agreedem.py -r $outputHucDataDir/flows_grid_boolean.tif -d $outputHucDataDir/dem_meters.tif -w $outputHucDataDir -g $outputHucDataDir/temp_work -o $outputHucDataDir/dem_burned.tif -b $agree_DEM_buffer -sm 10 -sh 1000 +$srcDir/agreedem.py -r $outputHucDataDir/flows_grid_boolean.tif -d $outputHucDataDir/dem_meters.tif -w $outputHucDataDir -g $outputHucDataDir/temp_work -o $outputHucDataDir/dem_burned.tif -b $agree_DEM_buffer -sm 10 -sh 1000 Tcount ## PIT REMOVE BURNED DEM ## @@ -212,14 +212,14 @@ Tcount echo -e $startDiv"Preprocessing for lateral thalweg adjustment $hucNumber"$stopDiv date -u Tstart -$libDir/unique_pixel_and_allocation.py -s $outputHucDataDir/demDerived_streamPixels.tif -o $outputHucDataDir/demDerived_streamPixels_ids.tif -g $outputHucDataDir/temp_grass +$srcDir/unique_pixel_and_allocation.py -s $outputHucDataDir/demDerived_streamPixels.tif -o $outputHucDataDir/demDerived_streamPixels_ids.tif -g $outputHucDataDir/temp_grass Tcount ## ADJUST THALWEG MINIMUM USING LATERAL ZONAL MINIMUM ## echo -e $startDiv"Performing lateral thalweg adjustment $hucNumber"$stopDiv date -u Tstart -$libDir/adjust_thalweg_lateral.py -e $outputHucDataDir/dem_meters.tif -s $outputHucDataDir/demDerived_streamPixels.tif -a $outputHucDataDir/demDerived_streamPixels_ids_allo.tif -d $outputHucDataDir/demDerived_streamPixels_ids_dist.tif -t 50 -o $outputHucDataDir/dem_lateral_thalweg_adj.tif +$srcDir/adjust_thalweg_lateral.py -e $outputHucDataDir/dem_meters.tif -s $outputHucDataDir/demDerived_streamPixels.tif -a $outputHucDataDir/demDerived_streamPixels_ids_allo.tif -d $outputHucDataDir/demDerived_streamPixels_ids_dist.tif -t 50 -o $outputHucDataDir/dem_lateral_thalweg_adj.tif Tcount ## MASK BURNED DEM FOR STREAMS ONLY ### @@ -258,7 +258,7 @@ echo -e $startDiv"Split Derived Reaches $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/demDerived_reaches_split.gpkg ] && \ -$libDir/split_flows.py $outputHucDataDir/demDerived_reaches.shp $outputHucDataDir/dem_thalwegCond.tif $outputHucDataDir/demDerived_reaches_split.gpkg $outputHucDataDir/demDerived_reaches_split_points.gpkg $outputHucDataDir/wbd8_clp.gpkg $outputHucDataDir/nwm_lakes_proj_subset.gpkg +$srcDir/split_flows.py $outputHucDataDir/demDerived_reaches.shp $outputHucDataDir/dem_thalwegCond.tif $outputHucDataDir/demDerived_reaches_split.gpkg $outputHucDataDir/demDerived_reaches_split_points.gpkg $outputHucDataDir/wbd8_clp.gpkg $outputHucDataDir/nwm_lakes_proj_subset.gpkg Tcount if [[ ! -f $outputHucDataDir/demDerived_reaches_split.gpkg ]] ; then @@ -272,7 +272,7 @@ if [ "$extent" = "MS" ]; then echo -e $startDiv"Mask Rasters with Stream Buffer $hucNumber"$stopDiv date -u Tstart - $libDir/fr_to_ms_raster_mask.py $outputHucDataDir/demDerived_reaches_split.gpkg $outputHucDataDir/flowdir_d8_burned_filled.tif $outputHucDataDir/dem_thalwegCond.tif $outputHucDataDir/slopes_d8_dem_meters.tif $outputHucDataDir/flowdir_d8_MS.tif $outputHucDataDir/dem_thalwegCond_MS.tif $outputHucDataDir/slopes_d8_dem_metersMS.tif $outputHucDataDir/demDerived_streamPixels.tif $outputHucDataDir/demDerived_streamPixelsMS.tif + $srcDir/fr_to_ms_raster_mask.py $outputHucDataDir/demDerived_reaches_split.gpkg $outputHucDataDir/flowdir_d8_burned_filled.tif $outputHucDataDir/dem_thalwegCond.tif $outputHucDataDir/slopes_d8_dem_meters.tif $outputHucDataDir/flowdir_d8_MS.tif $outputHucDataDir/dem_thalwegCond_MS.tif $outputHucDataDir/slopes_d8_dem_metersMS.tif $outputHucDataDir/demDerived_streamPixels.tif $outputHucDataDir/demDerived_streamPixelsMS.tif Tcount if [[ ! -f $outputHucDataDir/dem_thalwegCond_MS.tif ]] ; then @@ -305,7 +305,7 @@ echo -e $startDiv"Vectorize Pixel Centroids $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/flows_points_pixels.gpkg ] && \ -$libDir/reachID_grid_to_vector_points.py $demDerived_streamPixels $outputHucDataDir/flows_points_pixels.gpkg featureID +$srcDir/reachID_grid_to_vector_points.py $demDerived_streamPixels $outputHucDataDir/flows_points_pixels.gpkg featureID Tcount ## GAGE WATERSHED FOR PIXELS ## @@ -321,7 +321,7 @@ echo -e $startDiv"D8 REM $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/rem.tif ] && \ -$libDir/rem.py -d $dem_thalwegCond -w $outputHucDataDir/gw_catchments_pixels.tif -o $outputHucDataDir/rem.tif -t $demDerived_streamPixels -i $outputHucDataDir/gw_catchments_reaches.tif -r $outputHucDataDir/hand_ref_elev_table.csv -s $outputHucDataDir/demDerived_reaches_split.gpkg +$srcDir/rem.py -d $dem_thalwegCond -w $outputHucDataDir/gw_catchments_pixels.tif -o $outputHucDataDir/rem.tif -t $demDerived_streamPixels -i $outputHucDataDir/gw_catchments_reaches.tif -r $outputHucDataDir/hand_ref_elev_table.csv -s $outputHucDataDir/demDerived_reaches_split.gpkg Tcount ## DINF DISTANCE DOWN ## @@ -353,7 +353,7 @@ echo -e $startDiv"Process catchments and model streams step 1 $hucNumber"$stopDi date -u Tstart [ ! -f $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg ] && \ -$libDir/filter_catchments_and_add_attributes.py $outputHucDataDir/gw_catchments_reaches.gpkg $outputHucDataDir/demDerived_reaches_split.gpkg $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg $outputHucDataDir/demDerived_reaches_split_filtered.gpkg $outputHucDataDir/wbd8_clp.gpkg $hucNumber +$srcDir/filter_catchments_and_add_attributes.py $outputHucDataDir/gw_catchments_reaches.gpkg $outputHucDataDir/demDerived_reaches_split.gpkg $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg $outputHucDataDir/demDerived_reaches_split_filtered.gpkg $outputHucDataDir/wbd8_clp.gpkg $hucNumber if [[ ! -f $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg ]] ; then echo "No relevant streams within HUC $hucNumber boundaries. Aborting run_by_unit.sh" @@ -366,7 +366,7 @@ Tcount echo -e $startDiv"Get Clipped Raster Metadata $hucNumber"$stopDiv date -u Tstart -read fsize ncols nrows ndv_clipped xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($libDir/getRasterInfoNative.py $outputHucDataDir/gw_catchments_reaches.tif) +read fsize ncols nrows ndv_clipped xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($srcDir/getRasterInfoNative.py $outputHucDataDir/gw_catchments_reaches.tif) Tcount ## RASTERIZE NEW CATCHMENTS AGAIN ## @@ -413,7 +413,7 @@ Tcount echo -e $startDiv"Generate Catchment List and Stage List Files $hucNumber"$stopDiv date -u Tstart -$libDir/make_stages_and_catchlist.py $outputHucDataDir/demDerived_reaches_split_filtered.gpkg $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg $outputHucDataDir/stage.txt $outputHucDataDir/catchment_list.txt $stage_min_meters $stage_interval_meters $stage_max_meters +$srcDir/make_stages_and_catchlist.py $outputHucDataDir/demDerived_reaches_split_filtered.gpkg $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg $outputHucDataDir/stage.txt $outputHucDataDir/catchment_list.txt $stage_min_meters $stage_interval_meters $stage_max_meters Tcount ## HYDRAULIC PROPERTIES ## @@ -429,7 +429,7 @@ echo -e $startDiv"Finalize catchments and model streams $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg ] && \ -$libDir/add_crosswalk.py -d $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -s $outputHucDataDir/src_base.csv -l $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $outputHucDataDir/src_full_crosswalked.csv -j $outputHucDataDir/src.json -x $outputHucDataDir/crosswalk_table.csv -t $outputHucDataDir/hydroTable.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -y $outputHucDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $input_NWM_Catchments -p $extent -k $outputHucDataDir/small_segments.csv +$srcDir/add_crosswalk.py -d $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -s $outputHucDataDir/src_base.csv -l $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $outputHucDataDir/src_full_crosswalked.csv -j $outputHucDataDir/src.json -x $outputHucDataDir/crosswalk_table.csv -t $outputHucDataDir/hydroTable.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -y $outputHucDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $input_NWM_Catchments -p $extent -k $outputHucDataDir/small_segments.csv Tcount ## CLEANUP OUTPUTS ## @@ -440,5 +440,5 @@ args=() (( viz == 1 )) && args+=( '-v' ) date -u Tstart -$libDir/output_cleanup.py $hucNumber $outputHucDataDir "${args[@]}" +$srcDir/output_cleanup.py $hucNumber $outputHucDataDir "${args[@]}" Tcount diff --git a/lib/split_flows.py b/src/split_flows.py similarity index 100% rename from lib/split_flows.py rename to src/split_flows.py diff --git a/src/time_and_tee_run_by_unit.sh b/src/time_and_tee_run_by_unit.sh new file mode 100755 index 000000000..25dffd967 --- /dev/null +++ b/src/time_and_tee_run_by_unit.sh @@ -0,0 +1,5 @@ +#!/bin/bash -e + +/usr/bin/time -v $srcDir/run_by_unit.sh $1 |& tee $outputRunDataDir/logs/$1.log +exit ${PIPESTATUS[0]} + diff --git a/lib/unique_pixel_and_allocation.py b/src/unique_pixel_and_allocation.py similarity index 100% rename from lib/unique_pixel_and_allocation.py rename to src/unique_pixel_and_allocation.py diff --git a/lib/utils/__init__.py b/src/utils/__init__.py similarity index 100% rename from lib/utils/__init__.py rename to src/utils/__init__.py diff --git a/lib/utils/archive_cleanup.py b/src/utils/archive_cleanup.py similarity index 100% rename from lib/utils/archive_cleanup.py rename to src/utils/archive_cleanup.py diff --git a/lib/utils/shared_functions.py b/src/utils/shared_functions.py similarity index 100% rename from lib/utils/shared_functions.py rename to src/utils/shared_functions.py diff --git a/lib/utils/shared_variables.py b/src/utils/shared_variables.py similarity index 100% rename from lib/utils/shared_variables.py rename to src/utils/shared_variables.py diff --git a/tests/__init__.py b/tools/__init__.py similarity index 100% rename from tests/__init__.py rename to tools/__init__.py diff --git a/tests/aggregate_mannings_calibration.py b/tools/aggregate_mannings_calibration.py similarity index 100% rename from tests/aggregate_mannings_calibration.py rename to tools/aggregate_mannings_calibration.py diff --git a/tests/aggregate_metrics.py b/tools/aggregate_metrics.py similarity index 100% rename from tests/aggregate_metrics.py rename to tools/aggregate_metrics.py diff --git a/tests/cache_metrics.py b/tools/cache_metrics.py similarity index 100% rename from tests/cache_metrics.py rename to tools/cache_metrics.py diff --git a/tests/comparing_src.py b/tools/comparing_src.py similarity index 100% rename from tests/comparing_src.py rename to tools/comparing_src.py diff --git a/tests/inundation.py b/tools/inundation.py similarity index 100% rename from tests/inundation.py rename to tools/inundation.py diff --git a/tests/mannings_calibration_run.sh b/tools/mannings_calibration_run.sh similarity index 100% rename from tests/mannings_calibration_run.sh rename to tools/mannings_calibration_run.sh diff --git a/tests/mannings_run_by_set.sh b/tools/mannings_run_by_set.sh similarity index 93% rename from tests/mannings_run_by_set.sh rename to tools/mannings_run_by_set.sh index 9ee875e2a..917672a63 100755 --- a/tests/mannings_run_by_set.sh +++ b/tools/mannings_run_by_set.sh @@ -10,6 +10,6 @@ mannings_value="${array[$mannings_row]}" subdir=$outdir/$huc"_"$strorder"_"$mannings_value mkdir -p $subdir -$libDir/add_crosswalk.py -d $hucdir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $hucdir/demDerived_reaches_split_filtered.gpkg -s $hucdir/src_base.csv -l $subdir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $subdir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $subdir/src_full_crosswalked.csv -j $subdir/src.json -x $subdir/crosswalk_table.csv -t $subdir/hydroTable.csv -w $hucdir/wbd8_clp.gpkg -b $hucdir/nwm_subset_streams.gpkg -y $hucdir/nwm_catchments_proj_subset.tif -m $param_set -z $input_NWM_Catchments -p FR -c +$srcDir/add_crosswalk.py -d $hucdir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $hucdir/demDerived_reaches_split_filtered.gpkg -s $hucdir/src_base.csv -l $subdir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $subdir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $subdir/src_full_crosswalked.csv -j $subdir/src.json -x $subdir/crosswalk_table.csv -t $subdir/hydroTable.csv -w $hucdir/wbd8_clp.gpkg -b $hucdir/nwm_subset_streams.gpkg -y $hucdir/nwm_catchments_proj_subset.tif -m $param_set -z $input_NWM_Catchments -p FR -c python3 foss_fim/tests/run_test_case_calibration.py -r $fimdir/$huc -d $subdir -t $huc"_ble" -b "mannings_calibration"/$strorder/$mannings_value diff --git a/tests/plots/eval_plots.py b/tools/plots/eval_plots.py similarity index 100% rename from tests/plots/eval_plots.py rename to tools/plots/eval_plots.py diff --git a/tests/plots/utils/__init__.py b/tools/plots/utils/__init__.py similarity index 100% rename from tests/plots/utils/__init__.py rename to tools/plots/utils/__init__.py diff --git a/tests/plots/utils/shared_functions.py b/tools/plots/utils/shared_functions.py similarity index 100% rename from tests/plots/utils/shared_functions.py rename to tools/plots/utils/shared_functions.py diff --git a/tests/preprocess/create_flow_forecast_file.py b/tools/preprocess/create_flow_forecast_file.py similarity index 99% rename from tests/preprocess/create_flow_forecast_file.py rename to tools/preprocess/create_flow_forecast_file.py index e7df52e15..9de7abfe5 100644 --- a/tests/preprocess/create_flow_forecast_file.py +++ b/tools/preprocess/create_flow_forecast_file.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + # -*- coding: utf-8 -*- """ Created on Wed Jul 29 11:48:37 2020 diff --git a/tests/preprocess/preprocess_benchmark.py b/tools/preprocess/preprocess_benchmark.py similarity index 99% rename from tests/preprocess/preprocess_benchmark.py rename to tools/preprocess/preprocess_benchmark.py index 4d6d860b0..02f8e5ea8 100644 --- a/tests/preprocess/preprocess_benchmark.py +++ b/tools/preprocess/preprocess_benchmark.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + # -*- coding: utf-8 -*- """ Created on Thu Jul 23 15:17:04 2020 diff --git a/tests/preprocess/preprocess_fimx.py b/tools/preprocess/preprocess_fimx.py similarity index 100% rename from tests/preprocess/preprocess_fimx.py rename to tools/preprocess/preprocess_fimx.py diff --git a/tests/run_test_case.py b/tools/run_test_case.py similarity index 100% rename from tests/run_test_case.py rename to tools/run_test_case.py diff --git a/tests/run_test_case_calibration.py b/tools/run_test_case_calibration.py similarity index 100% rename from tests/run_test_case_calibration.py rename to tools/run_test_case_calibration.py diff --git a/tests/synthesize_test_cases.py b/tools/synthesize_test_cases.py similarity index 100% rename from tests/synthesize_test_cases.py rename to tools/synthesize_test_cases.py diff --git a/tests/time_and_tee_mannings_calibration.sh b/tools/time_and_tee_mannings_calibration.sh similarity index 100% rename from tests/time_and_tee_mannings_calibration.sh rename to tools/time_and_tee_mannings_calibration.sh diff --git a/tests/utils/__init__.py b/tools/utils/__init__.py similarity index 100% rename from tests/utils/__init__.py rename to tools/utils/__init__.py diff --git a/tests/utils/shapefile_to_raster.py b/tools/utils/shapefile_to_raster.py similarity index 100% rename from tests/utils/shapefile_to_raster.py rename to tools/utils/shapefile_to_raster.py diff --git a/tests/utils/shared_functions.py b/tools/utils/shared_functions.py similarity index 100% rename from tests/utils/shared_functions.py rename to tools/utils/shared_functions.py diff --git a/tests/utils/shared_variables.py b/tools/utils/shared_variables.py similarity index 100% rename from tests/utils/shared_variables.py rename to tools/utils/shared_variables.py From 7f65a0ab6f040775674f1fecc3f1948d243a8d48 Mon Sep 17 00:00:00 2001 From: Brad Date: Tue, 2 Mar 2021 10:58:38 -0600 Subject: [PATCH 047/359] Renaming of benchmark data and associated variables in evaluation scripts Renamed benchmark layers in test_cases and updated variable names in evaluation scripts. - Updated run_test_case.py with new benchmark layer names. - Updated run_test_case_calibration.py with new benchmark layer names. This resolves #284. --- CHANGELOG.md | 9 +++++++++ tools/run_test_case.py | 21 +++++++++------------ tools/run_test_case_calibration.py | 2 +- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64bc07129..3f26caaa9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,15 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

+## v3.0.7.1 - 2021-03-02 - [PR #290](https://github.com/NOAA-OWP/cahaba/pull/290) + +Renamed benchmark layers in `test_cases` and updated variable names in evaluation scripts. + +### Changes + - Updated `run_test_case.py` with new benchmark layer names. + - Updated `run_test_case_calibration.py` with new benchmark layer names. + +

## v3.0.7.0 - 2021-03-01 - [PR #288](https://github.com/NOAA-OWP/cahaba/pull/288) Restructured the repository. This has no impact on hydrological work done in the codebase and is simply moving files and renaming directories. diff --git a/tools/run_test_case.py b/tools/run_test_case.py index 5beac4f16..2a0a279c5 100755 --- a/tools/run_test_case.py +++ b/tools/run_test_case.py @@ -84,7 +84,6 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous # Get path to validation_data_{benchmark} directory and huc_dir. validation_data_path = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category) - for magnitude in magnitude_list: version_test_case_dir = os.path.join(version_test_case_dir_parent, magnitude) if not os.path.exists(version_test_case_dir): @@ -94,18 +93,18 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous if benchmark_category in AHPS_BENCHMARK_CATEGORIES: benchmark_raster_path_list, forecast_list = [], [] lid_dir_list = os.listdir(os.path.join(validation_data_path, current_huc)) - lid_list, inundation_raster_list, extent_file_list = [], [], [] + lid_list, inundation_raster_list, domain_file_list = [], [], [] for lid in lid_dir_list: lid_dir = os.path.join(validation_data_path, current_huc, lid) - benchmark_raster_path_list.append(os.path.join(lid_dir, magnitude, 'ahps_' + lid + '_huc_' + current_huc + '_depth_' + magnitude + '.tif')) # TEMP + benchmark_raster_path_list.append(os.path.join(lid_dir, magnitude, 'ahps_' + lid + '_huc_' + current_huc + '_extent_' + magnitude + '.tif')) # TEMP forecast_list.append(os.path.join(lid_dir, magnitude, 'ahps_' + lid + '_huc_' + current_huc + '_flows_' + magnitude + '.csv')) # TEMP lid_list.append(lid) inundation_raster_list.append(os.path.join(version_test_case_dir, lid + '_inundation_extent.tif')) - extent_file_list.append(os.path.join(lid_dir, lid + '_extent.shp')) + domain_file_list.append(os.path.join(lid_dir, lid + '_domain.shp')) else: - benchmark_raster_file = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_depth_' + magnitude + '.tif') + benchmark_raster_file = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_extent_' + magnitude + '.tif') benchmark_raster_path_list = [benchmark_raster_file] forecast_path = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_flows_' + magnitude + '.csv') forecast_list = [forecast_path] @@ -115,23 +114,21 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous benchmark_raster_path = benchmark_raster_path_list[index] forecast = forecast_list[index] inundation_raster = inundation_raster_list[index] - # Only need to define ahps_lid and ahps_extent_file for AHPS_BENCHMARK_CATEGORIES. if benchmark_category in AHPS_BENCHMARK_CATEGORIES: ahps_lid = lid_list[index] - ahps_extent_file = extent_file_list[index] + ahps_domain_file = domain_file_list[index] mask_dict.update({ahps_lid: - {'path': ahps_extent_file, + {'path': ahps_domain_file, 'buffer': None, 'operation': 'include'} }) - if not os.path.exists(benchmark_raster_path) or not os.path.exists(ahps_extent_file) or not os.path.exists(forecast): # Skip loop instance if the benchmark raster doesn't exist. + if not os.path.exists(benchmark_raster_path) or not os.path.exists(ahps_domain_file) or not os.path.exists(forecast): # Skip loop instance if the benchmark raster doesn't exist. continue else: # If not in AHPS_BENCHMARK_CATEGORIES. if not os.path.exists(benchmark_raster_path) or not os.path.exists(forecast): # Skip loop instance if the benchmark raster doesn't exist. continue - # Run inundate. print("-----> Running inundate() to produce modeled inundation extent for the " + magnitude + " magnitude...") try: @@ -194,7 +191,7 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous parser.add_argument('-i', '--inclusion-area', help='Path to shapefile. Contingency metrics will be produced from pixels inside of shapefile extent.', required=False, default="") parser.add_argument('-ib','--inclusion-area-buffer', help='Buffer to use when masking contingency metrics with inclusion area.', required=False, default="0") parser.add_argument('-l', '--light-run', help='Using the light_run option will result in only stat files being written, and NOT grid files.', required=False, action='store_true') - parser.add_argument('-o','--overwrite',help='Overwrite all metrics or only fill in missing metrics.',required=False, default=False) + parser.add_argument('-o','--overwrite',help='Overwrite all metrics or only fill in missing metrics.',required=False, default=False, action='store_true') # Extract to dictionary and assign to variables. args = vars(parser.parse_args()) @@ -234,7 +231,7 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous if args['magnitude'] == '': if 'ble' in args['test_id'].split('_'): args['magnitude'] = ['100yr', '500yr'] - elif 'ahps' in args['test_id'].split('_'): + elif 'nws' or 'usgs' in args['test_id'].split('_'): args['magnitude'] = ['action', 'minor', 'moderate', 'major'] else: print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided magnitude (-y) " + CYAN_BOLD + args['magnitude'] + WHITE_BOLD + " is invalid. ble options include: 100yr, 500yr. ahps options include action, minor, moderate, major." + ENDC) diff --git a/tools/run_test_case_calibration.py b/tools/run_test_case_calibration.py index cefe80594..f630360b0 100755 --- a/tools/run_test_case_calibration.py +++ b/tools/run_test_case_calibration.py @@ -232,7 +232,7 @@ def run_alpha_test(fim_run_dir, calib_dir, branch_name, test_id, mask_type, retu for return_interval in return_interval_list: # Construct path to validation raster and forecast file. benchmark_category = test_id.split('_')[1] - benchmark_raster_path = os.path.join(TEST_CASES_DIR, 'validation_data_' + benchmark_category, current_huc, return_interval, benchmark_category + '_huc_' + current_huc + '_depth_' + return_interval + '.tif') + benchmark_raster_path = os.path.join(TEST_CASES_DIR, 'validation_data_' + benchmark_category, current_huc, return_interval, benchmark_category + '_huc_' + current_huc + '_extent_' + return_interval + '.tif') if not os.path.exists(benchmark_raster_path): # Skip loop instance if the benchmark raster doesn't exist. continue From f909134fe570eb1a6ab361013cc1c8dbf675eac4 Mon Sep 17 00:00:00 2001 From: MattLuck-NOAA Date: Wed, 3 Mar 2021 13:28:24 -0500 Subject: [PATCH 048/359] Update README.md Updates the move from tests/ to tools/synthesize_test_cases.py --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 311c29cde..9f44d94ad 100644 --- a/README.md +++ b/README.md @@ -88,12 +88,12 @@ After `fim_run.sh` completes, you are ready to evaluate the model's skill. To evaluate model skill, run the following: ``` -python /foss_fim/tests/synthesize_test_cases.py -c DEV -v -m -j [num_of_jobs] +python /foss_fim/tools/synthesize_test_cases.py -c DEV -v -m -j [num_of_jobs] ``` More information can be found by running: ``` -python /foss_fim/tests/synthesize_test_cases.py --help +python /foss_fim/tools/synthesize_test_cases.py --help ``` ---- From 30f401a8066291aec65cdb9629d7551a059995c3 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Tue, 9 Mar 2021 10:30:11 -0600 Subject: [PATCH 049/359] Convert CatFIM pipeline to open source Refactored CatFIM pipeline to open source. - Added VIZ_PROJECTION to shared_variables.py. - Added missing library referenced in inundation.py. - Cleaned up and converted evaluation scripts in generate_categorical_fim.py to open source. - Removed util folders under tools directory. This resolves #279. --- CHANGELOG.md | 11 + src/utils/shared_variables.py | 2 +- tools/__init__.py | 0 tools/aggregate_mannings_calibration.py | 1 - tools/aggregate_metrics.py | 64 ++-- tools/cache_metrics.py | 0 tools/comparing_src.py | 2 + tools/generate_categorical_fim.py | 336 +++++++++++++----- tools/inundation.py | 1 + tools/inundation_wrapper_custom_flow.py | 3 - tools/inundation_wrapper_nwm_flows.py | 3 - tools/mannings_calibration_run.sh | 18 +- tools/mannings_run_by_set.sh | 2 +- tools/plots/{utils => }/__init__.py | 0 tools/plots/eval_plots.py | 288 +++++++-------- .../shared_functions.py => plot_functions.py} | 0 tools/preprocess/create_flow_forecast_file.py | 67 ++-- tools/preprocess/preprocess_benchmark.py | 51 ++- tools/preprocess/preprocess_fimx.py | 74 ++-- tools/run_test_case.py | 48 +-- tools/run_test_case_calibration.py | 2 +- tools/{utils => }/shapefile_to_raster.py | 11 +- tools/synthesize_test_cases.py | 83 +++-- tools/time_and_tee_mannings_calibration.sh | 2 +- ...functions.py => tools_shared_functions.py} | 0 ...variables.py => tools_shared_variables.py} | 0 tools/utils/__init__.py | 0 27 files changed, 606 insertions(+), 463 deletions(-) mode change 100644 => 100755 tools/__init__.py mode change 100644 => 100755 tools/aggregate_metrics.py mode change 100644 => 100755 tools/cache_metrics.py mode change 100644 => 100755 tools/generate_categorical_fim.py mode change 100644 => 100755 tools/inundation_wrapper_custom_flow.py rename tools/plots/{utils => }/__init__.py (100%) mode change 100644 => 100755 mode change 100644 => 100755 tools/plots/eval_plots.py rename tools/plots/{utils/shared_functions.py => plot_functions.py} (100%) mode change 100644 => 100755 mode change 100644 => 100755 tools/preprocess/create_flow_forecast_file.py mode change 100644 => 100755 tools/preprocess/preprocess_benchmark.py mode change 100644 => 100755 tools/preprocess/preprocess_fimx.py rename tools/{utils => }/shapefile_to_raster.py (88%) mode change 100644 => 100755 mode change 100644 => 100755 tools/synthesize_test_cases.py rename tools/{utils/shared_functions.py => tools_shared_functions.py} (100%) mode change 100644 => 100755 rename tools/{utils/shared_variables.py => tools_shared_variables.py} (100%) mode change 100644 => 100755 delete mode 100644 tools/utils/__init__.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f26caaa9..da0e1cb32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,17 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

+## v3.0.8.0 - 2021-03-09 - [PR #279](https://github.com/NOAA-OWP/cahaba/pull/279) + +Refactored NWS Flood Categorical HAND FIM (CatFIM) pipeline to open source. + +### Changes + - Added `VIZ_PROJECTION` to `shared_variables.py`. + - Added missing library referenced in `inundation.py`. + - Cleaned up and converted evaluation scripts in `generate_categorical_fim.py` to open source. + - Removed `util` folders under `tools` directory. + +

## v3.0.7.1 - 2021-03-02 - [PR #290](https://github.com/NOAA-OWP/cahaba/pull/290) Renamed benchmark layers in `test_cases` and updated variable names in evaluation scripts. diff --git a/src/utils/shared_variables.py b/src/utils/shared_variables.py index 40a8feacb..244a12d2b 100644 --- a/src/utils/shared_variables.py +++ b/src/utils/shared_variables.py @@ -3,7 +3,7 @@ # Projections. #PREP_PROJECTION = "+proj=aea +datum=NAD83 +x_0=0.0 +y_0=0.0 +lon_0=96dW +lat_0=23dN +lat_1=29d30'N +lat_2=45d30'N +towgs84=-0.9956000824677655,1.901299877314078,0.5215002840524426,0.02591500053005733,0.009425998542707753,0.01159900118427752,-0.00062000005129903 +no_defs +units=m" PREP_PROJECTION = 'PROJCS["USA_Contiguous_Albers_Equal_Area_Conic_USGS_version",GEOGCS["NAD83",DATUM["North_American_Datum_1983",SPHEROID["GRS 1980",6378137,298.2572221010042,AUTHORITY["EPSG","7019"]],AUTHORITY["EPSG","6269"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AUTHORITY["EPSG","4269"]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["standard_parallel_1",29.5],PARAMETER["standard_parallel_2",45.5],PARAMETER["latitude_of_center",23],PARAMETER["longitude_of_center",-96],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]]]' - +VIZ_PROJECTION ='PROJCS["WGS_1984_Web_Mercator_Auxiliary_Sphere",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Mercator_Auxiliary_Sphere"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],PARAMETER["Standard_Parallel_1",0.0],PARAMETER["Auxiliary_Sphere_Type",0.0],UNIT["Meter",1.0]]' # -- Data URLs-- # NHD_URL_PARENT = r'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/' NWM_HYDROFABRIC_URL = r'http://www.nohrsc.noaa.gov/pub/staff/keicher/NWM_live/web/data_tools/NWM_channel_hydrofabric.tar.gz' # Temporary diff --git a/tools/__init__.py b/tools/__init__.py old mode 100644 new mode 100755 diff --git a/tools/aggregate_mannings_calibration.py b/tools/aggregate_mannings_calibration.py index f94b1d025..c57b17776 100755 --- a/tools/aggregate_mannings_calibration.py +++ b/tools/aggregate_mannings_calibration.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 - import os import pandas as pd import csv diff --git a/tools/aggregate_metrics.py b/tools/aggregate_metrics.py old mode 100644 new mode 100755 index d8a462d5b..7cc5951b5 --- a/tools/aggregate_metrics.py +++ b/tools/aggregate_metrics.py @@ -3,17 +3,17 @@ import json import os import csv - + import argparse TEST_CASES_DIR = r'/data/test_cases_new/' # TEMP = r'/data/temp' # Search through all previous_versions in test_cases -from utils.shared_functions import compute_stats_from_contingency_table +from tools_shared_functions import compute_stats_from_contingency_table def create_master_metrics_csv(): - + # Construct header metrics_to_write = ['true_negatives_count', 'false_negatives_count', @@ -57,33 +57,33 @@ def create_master_metrics_csv(): 'masked_perc', 'masked_area_km2' ] - + additional_header_info_prefix = ['version', 'nws_lid', 'magnitude', 'huc'] list_to_write = [additional_header_info_prefix + metrics_to_write + ['full_json_path'] + ['flow'] + ['benchmark_source']] - + for benchmark_type in ['ble', 'ahps']: - + if benchmark_type == 'ble': - + test_cases = r'/data/test_cases' test_cases_list = os.listdir(test_cases) # AHPS test_ids versions_to_aggregate = ['fim_1_0_0', 'fim_2_3_3', 'fim_3_0_0_3_fr_c'] - + for test_case in test_cases_list: try: int(test_case.split('_')[0]) - + huc = test_case.split('_')[0] previous_versions = os.path.join(test_cases, test_case, 'performance_archive', 'previous_versions') - + for magnitude in ['100yr', '500yr']: for version in versions_to_aggregate: version_dir = os.path.join(previous_versions, version) magnitude_dir = os.path.join(version_dir, magnitude) if os.path.exists(magnitude_dir): - + magnitude_dir_list = os.listdir(magnitude_dir) for f in magnitude_dir_list: if '.json' in f: @@ -99,40 +99,40 @@ def create_master_metrics_csv(): sub_list_to_append.append(full_json_path) sub_list_to_append.append(flow) sub_list_to_append.append(benchmark_source) - + list_to_write.append(sub_list_to_append) - + except ValueError: pass - + if benchmark_type == 'ahps': - + test_cases = r'/data/test_cases_ahps_testing' test_cases_list = os.listdir(test_cases) # AHPS test_ids - versions_to_aggregate = ['fim_1_0_0_nws_1_21_2021', 'fim_1_0_0_usgs_1_21_2021', + versions_to_aggregate = ['fim_1_0_0_nws_1_21_2021', 'fim_1_0_0_usgs_1_21_2021', 'fim_2_x_ms_nws_1_21_2021', 'fim_2_x_ms_usgs_1_21_2021', 'fim_3_0_0_3_ms_c_nws_1_21_2021', 'fim_3_0_0_3_ms_c_usgs_1_21_2021', 'ms_xwalk_fill_missing_cal_nws', 'ms_xwalk_fill_missing_cal_usgs'] - + for test_case in test_cases_list: try: int(test_case.split('_')[0]) - + huc = test_case.split('_')[0] previous_versions = os.path.join(test_cases, test_case, 'performance_archive', 'previous_versions') - + for magnitude in ['action', 'minor', 'moderate', 'major']: for version in versions_to_aggregate: - + if 'nws' in version: benchmark_source = 'ahps_nws' if 'usgs' in version: benchmark_source = 'ahps_usgs' - + version_dir = os.path.join(previous_versions, version) magnitude_dir = os.path.join(version_dir, magnitude) - + if os.path.exists(magnitude_dir): magnitude_dir_list = os.listdir(magnitude_dir) for f in magnitude_dir_list: @@ -147,7 +147,7 @@ def create_master_metrics_csv(): parent_dir = 'usgs_1_21_2021' if 'nws' in version: parent_dir = 'nws_1_21_2021' - + flow_file = os.path.join(test_cases, parent_dir, huc, nws_lid, magnitude, 'ahps_' + nws_lid + '_huc_' + huc + '_flows_' + magnitude + '.csv') if os.path.exists(flow_file): with open(flow_file, newline='') as csv_file: @@ -157,7 +157,7 @@ def create_master_metrics_csv(): flow = row[1] if nws_lid == 'mcc01': print(flow) - + stats_dict = json.load(open(full_json_path)) for metric in metrics_to_write: sub_list_to_append.append(stats_dict[metric]) @@ -165,10 +165,10 @@ def create_master_metrics_csv(): sub_list_to_append.append(flow) sub_list_to_append.append(benchmark_source) list_to_write.append(sub_list_to_append) - + except ValueError: pass - + with open(output_csv, 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile) csv_writer.writerows(list_to_write) @@ -201,7 +201,7 @@ def aggregate_metrics(config="DEV", branch="", hucs="", special_string="", outfo for magnitude in ['100yr', '500yr', 'action', 'minor', 'moderate', 'major']: huc_path_list = [['huc', 'path']] true_positives, true_negatives, false_positives, false_negatives, cell_area, masked_count = 0, 0, 0, 0, 0, 0 - + for test_case in test_cases_dir_list: if test_case not in ['other', 'validation_data_ble', 'validation_data_legacy', 'validation_data_ahps']: @@ -227,11 +227,11 @@ def aggregate_metrics(config="DEV", branch="", hucs="", special_string="", outfo cell_area = json_dict['cell_area_m2'] huc_path_list.append([huc, stats_json_path]) - - + + if cell_area == 0: continue - + # Pass all sums to shared function to calculate metrics. stats_dict = compute_stats_from_contingency_table(true_negatives, false_negatives, false_positives, true_positives, cell_area=cell_area, masked_count=masked_count) @@ -239,7 +239,7 @@ def aggregate_metrics(config="DEV", branch="", hucs="", special_string="", outfo for stat in stats_dict: list_to_write.append([stat, stats_dict[stat]]) - + # Map path to output directory for aggregate metrics. output_file = os.path.join(aggregate_output_dir, branch + '_aggregate_metrics_' + magnitude + special_string + '.csv') @@ -249,7 +249,7 @@ def aggregate_metrics(config="DEV", branch="", hucs="", special_string="", outfo csv_writer.writerows(list_to_write) csv_writer.writerow([]) csv_writer.writerows(huc_path_list) - + print() print("Finished aggregating for the '" + magnitude + "' magnitude. Aggregated metrics over " + str(len(huc_path_list)-1) + " test cases.") print() diff --git a/tools/cache_metrics.py b/tools/cache_metrics.py old mode 100644 new mode 100755 diff --git a/tools/comparing_src.py b/tools/comparing_src.py index a9c8a1c8a..977b05794 100755 --- a/tools/comparing_src.py +++ b/tools/comparing_src.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + import matplotlib.pyplot as plt import numpy as np import json diff --git a/tools/generate_categorical_fim.py b/tools/generate_categorical_fim.py old mode 100644 new mode 100755 index 9423f6c81..290a44704 --- a/tools/generate_categorical_fim.py +++ b/tools/generate_categorical_fim.py @@ -1,155 +1,315 @@ +#!/usr/bin/env python3 + +import sys import os from multiprocessing import Pool import argparse import traceback -import sys - -sys.path.insert(1, 'foss_fim/tests') +import rasterio +import geopandas as gpd +import pandas as pd +import shutil +from rasterio.features import shapes +from shapely.geometry.polygon import Polygon +from shapely.geometry.multipolygon import MultiPolygon from inundation import inundate +sys.path.append('/foss_fim/src') +from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION +from utils.shared_functions import getDriver INPUTS_DIR = r'/data/inputs' +magnitude_list = ['action', 'minor', 'moderate','major'] -# Define necessary variables for inundation(). +# Map path to points with attributes +all_mapped_ahps_conus_hipr = os.path.join(INPUTS_DIR, 'ahp_sites', 'all_mapped_ahps_reformatted.csv') + +# Define necessary variables for inundation() hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' mask_type, catchment_poly = 'huc', '' - -def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, job_number, gpkg, extif, depthtif): - - # Create output directory and log directory. - if not os.path.exists(output_cat_fim_dir): - os.mkdir(output_cat_fim_dir) - log_dir = os.path.join(output_cat_fim_dir, 'logs') - if not os.path.exists(log_dir): - os.mkdir(log_dir) - + +def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, number_of_jobs, depthtif, log_file): + no_data_list = [] procs_list = [] - - # Loop through huc directories in the source_flow directory. + source_flow_dir_list = os.listdir(source_flow_dir) - for huc in source_flow_dir_list: + output_flow_dir_list = os.listdir(fim_run_dir) + + # Log missing hucs + missing_hucs = list(set(source_flow_dir_list) - set(output_flow_dir_list)) + missing_hucs = [huc for huc in missing_hucs if "." not in huc] + if len(missing_hucs) > 0: + f = open(log_file, 'a+') + f.write(f"Missing hucs from output directory: {', '.join(missing_hucs)}\n") + f.close() + + # Loop through matching huc directories in the source_flow directory + matching_hucs = list(set(output_flow_dir_list) & set(source_flow_dir_list)) + for huc in matching_hucs: + if "." not in huc: - - # Get list of AHPS site directories. + + # Get list of AHPS site directories ahps_site_dir = os.path.join(source_flow_dir, huc) ahps_site_dir_list = os.listdir(ahps_site_dir) - - # Map paths to HAND files needed for inundation(). + + # Map paths to HAND files needed for inundation() fim_run_huc_dir = os.path.join(fim_run_dir, huc) rem = os.path.join(fim_run_huc_dir, 'rem_zeroed_masked.tif') catchments = os.path.join(fim_run_huc_dir, 'gw_catchments_reaches_filtered_addedAttributes.tif') hydroTable = os.path.join(fim_run_huc_dir, 'hydroTable.csv') - + exit_flag = False # Default to False. - - # Check if necessary data exist; set exit_flag to True if they don't exist. + + # Check if necessary data exist; set exit_flag to True if they don't exist for f in [rem, catchments, hydroTable]: if not os.path.exists(f): - print(f) no_data_list.append(f) exit_flag = True - - # Log "Missing data" if missing TODO improve this. + + # Log missing data if exit_flag == True: - f = open(os.path.join(log_dir, huc + '.txt'), 'w') - f.write("Missing data") - continue - - # Map path to huc directory inside out output_cat_fim_dir. + f = open(log_file, 'a+') + f.write(f"Missing data for: {fim_run_huc_dir}\n") + f.close() + + # Map path to huc directory inside out output_cat_fim_dir cat_fim_huc_dir = os.path.join(output_cat_fim_dir, huc) if not os.path.exists(cat_fim_huc_dir): os.mkdir(cat_fim_huc_dir) - - # Loop through AHPS sites. + + # Loop through AHPS sites for ahps_site in ahps_site_dir_list: - # Map parent directory for AHPS source data dir and list AHPS thresholds (act, min, mod, maj). + # map parent directory for AHPS source data dir and list AHPS thresholds (act, min, mod, maj) ahps_site_parent = os.path.join(ahps_site_dir, ahps_site) thresholds_dir_list = os.listdir(ahps_site_parent) - + # Map parent directory for all inundation output filesoutput files. cat_fim_huc_ahps_dir = os.path.join(cat_fim_huc_dir, ahps_site) if not os.path.exists(cat_fim_huc_ahps_dir): os.mkdir(cat_fim_huc_ahps_dir) - + # Loop through thresholds/magnitudes and define inundation output files paths for magnitude in thresholds_dir_list: + if "." not in magnitude: + magnitude_flows_csv = os.path.join(ahps_site_parent, magnitude, 'ahps_' + ahps_site + '_huc_' + huc + '_flows_' + magnitude + '.csv') + if os.path.exists(magnitude_flows_csv): - if gpkg: - output_extent_gpkg = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_extent.gpkg') - else: - output_extent_gpkg = None - if extif: - output_extent_grid = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_extent.tif') - else: - output_extent_grid = None + + output_extent_grid = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_extent.tif') + if depthtif: output_depth_grid = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_depth.tif') else: output_depth_grid = None - + # Append necessary variables to list for multiprocessing. - procs_list.append([rem, catchments, catchment_poly, magnitude_flows_csv, huc, hydroTable, output_extent_gpkg, output_extent_grid, output_depth_grid, ahps_site, magnitude, log_dir]) - # Initiate multiprocessing. - pool = Pool(job_number) + procs_list.append([rem, catchments, catchment_poly, magnitude_flows_csv, huc, hydroTable, output_extent_grid, output_depth_grid, ahps_site, magnitude, log_file]) + + # Initiate multiprocessing + print(f"Running inundation for {len(procs_list)} sites using {number_of_jobs} jobs") + pool = Pool(number_of_jobs) pool.map(run_inundation, procs_list) def run_inundation(args): - - # Parse args. - rem = args[0] - catchments = args[1] - catchment_poly = args[2] + + rem = args[0] + catchments = args[1] + catchment_poly = args[2] magnitude_flows_csv = args[3] - huc = args[4] - hydroTable = args[5] - output_extent_gpkg = args[6] - output_extent_grid = args[7] - output_depth_grid = args[8] - ahps_site = args[9] - magnitude = args[10] - log_dir = args[11] - - print("Running inundation for " + str(os.path.split(os.path.split(output_extent_gpkg)[0])[0])) + huc = args[4] + hydroTable = args[5] + output_extent_grid = args[6] + output_depth_grid = args[7] + ahps_site = args[8] + magnitude = args[9] + log_file = args[10] + try: - inundate( - rem,catchments,catchment_poly,hydroTable,magnitude_flows_csv,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, - subset_hucs=huc,num_workers=1,aggregate=False,inundation_raster=output_extent_grid,inundation_polygon=output_extent_gpkg, + inundate(rem,catchments,catchment_poly,hydroTable,magnitude_flows_csv,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, + subset_hucs=huc,num_workers=1,aggregate=False,inundation_raster=output_extent_grid,inundation_polygon=None, depths=output_depth_grid,out_raster_profile=None,out_vector_profile=None,quiet=True ) + except Exception: - # Log errors and their tracebacks. - f = open(os.path.join(log_dir, huc + "_" + ahps_site + "_" + magnitude + '.txt'), 'w') - f.write(traceback.format_exc()) + # Log errors and their tracebacks + f = open(log_file, 'a+') + f.write(f"{output_extent_grid} - inundation error: {traceback.format_exc()}\n") f.close() - - + + +def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir, log_file): + + # Create workspace + gpkg_dir = os.path.join(output_cat_fim_dir, 'gpkg') + if not os.path.exists(gpkg_dir): + os.mkdir(gpkg_dir) + + fim_version = os.path.basename(output_cat_fim_dir) + merged_layer = os.path.join(output_cat_fim_dir, 'catfim_library.gpkg') + + if not os.path.exists(merged_layer): # prevents appending to existing output + + huc_ahps_dir_list = os.listdir(output_cat_fim_dir) + skip_list=['errors','logs','gpkg',merged_layer] + + for magnitude in magnitude_list: + + procs_list = [] + + # Loop through all categories + for huc in huc_ahps_dir_list: + + if huc not in skip_list: + + huc_dir = os.path.join(output_cat_fim_dir, huc) + ahps_dir_list = os.listdir(huc_dir) + + # Loop through ahps sites + for ahps_lid in ahps_dir_list: + ahps_lid_dir = os.path.join(huc_dir, ahps_lid) + + extent_grid = os.path.join(ahps_lid_dir, ahps_lid + '_' + magnitude + '_extent_' + huc + '.tif') + + if os.path.exists(extent_grid): + procs_list.append([ahps_lid, extent_grid, gpkg_dir, fim_version, huc, magnitude]) + + else: + try: + f = open(log_file, 'a+') + f.write(f"Missing layers: {extent_gpkg}\n") + f.close() + except: + pass + + # Multiprocess with instructions + pool = Pool(number_of_jobs) + pool.map(reformat_inundation_maps, procs_list) + + # Merge all layers + print(f"Merging {len(os.listdir(gpkg_dir))} layers...") + + for layer in os.listdir(gpkg_dir): + + diss_extent_filename = os.path.join(gpkg_dir, layer) + + # Open diss_extent + diss_extent = gpd.read_file(diss_extent_filename) + + # Write/append aggregate diss_extent + if os.path.isfile(merged_layer): + diss_extent.to_file(merged_layer,driver=getDriver(merged_layer),index=False, mode='a') + else: + diss_extent.to_file(merged_layer,driver=getDriver(merged_layer),index=False) + + del diss_extent + + shutil.rmtree(gpkg_dir) + + else: + print(f"{merged_layer} already exists.") + + +def reformat_inundation_maps(args): + + try: + lid = args[0] + grid_path = args[1] + gpkg_dir = args[2] + fim_version = args[3] + huc = args[4] + magnitude = args[5] + + # Convert raster to to shapes + with rasterio.open(grid_path) as src: + image = src.read(1) + mask = image > 0 + + # Aggregate shapes + results = ({'properties': {'extent': 1}, 'geometry': s} for i, (s, v) in enumerate(shapes(image, mask=mask,transform=src.transform))) + + # convert list of shapes to polygon + extent_poly = gpd.GeoDataFrame.from_features(list(results), crs=PREP_PROJECTION) + + # Dissolve polygons + extent_poly_diss = extent_poly.dissolve(by='extent') + + # Update attributes + extent_poly_diss = extent_poly_diss.reset_index(drop=True) + extent_poly_diss['ahps_lid'] = lid + extent_poly_diss['magnitude'] = magnitude + extent_poly_diss['version'] = fim_version + extent_poly_diss['huc'] = huc + + # Project to Web Mercator + extent_poly = extent_poly.to_crs(VIZ_PROJECTION) + + # Join attributes + all_mapped_ahps_conus_hipr_fl = pd.read_table(all_mapped_ahps_conus_hipr, sep=",") + all_mapped_ahps_conus_hipr_fl = all_mapped_ahps_conus_hipr_fl.loc[(all_mapped_ahps_conus_hipr_fl.magnitude==magnitude) & (all_mapped_ahps_conus_hipr_fl.nws_lid==lid)] + + extent_poly_diss = extent_poly_diss.merge(all_mapped_ahps_conus_hipr_fl, left_on=['ahps_lid','magnitude'], right_on=['nws_lid','magnitude']) + + extent_poly_diss = extent_poly_diss.drop(columns='nws_lid') + + # Save dissolved multipolygon + handle = os.path.split(grid_path)[1].replace('.tif', '') + + diss_extent_filename = os.path.join(gpkg_dir, handle + "_dissolved.gpkg") + + extent_poly_diss["geometry"] = [MultiPolygon([feature]) if type(feature) == Polygon else feature for feature in extent_poly_diss["geometry"]] + + if not extent_poly_diss.empty: + + extent_poly_diss.to_file(diss_extent_filename,driver=getDriver(diss_extent_filename),index=False) + + except Exception as e: + # Log and clean out the gdb so it's not merged in later + try: + f = open(log_file, 'a+') + f.write(str(diss_extent_filename) + " - dissolve error: " + str(e)) + f.close() + except: + pass + + if __name__ == '__main__': - - # Parse arguments. - parser = argparse.ArgumentParser(description='Inundation mapping and regression analysis for FOSS FIM. Regression analysis results are stored in the test directory.') + + # Parse arguments + parser = argparse.ArgumentParser(description='Categorical inundation mapping for FOSS FIM.') parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh',required=True) parser.add_argument('-s', '--source-flow-dir',help='Path to directory containing flow CSVs to use to generate categorical FIM.',required=True, default="") parser.add_argument('-o', '--output-cat-fim-dir',help='Path to directory where categorical FIM outputs will be written.',required=True, default="") - parser.add_argument('-j','--job-number',help='Number of processes to use. Default is 1.',required=False, default="1") - parser.add_argument('-gpkg','--write-geopackage',help='Using this option will write a geopackage.',required=False, action='store_true') - parser.add_argument('-extif','--write-extent-tiff',help='Using this option will write extent TIFFs. This is the default.',required=False, action='store_true') + parser.add_argument('-j','--number-of-jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) parser.add_argument('-depthtif','--write-depth-tiff',help='Using this option will write depth TIFFs.',required=False, action='store_true') - + args = vars(parser.parse_args()) - + fim_run_dir = args['fim_run_dir'] source_flow_dir = args['source_flow_dir'] output_cat_fim_dir = args['output_cat_fim_dir'] - job_number = int(args['job_number']) - gpkg = args['write_geopackage'] - extif = args['write_extent_tiff'] + number_of_jobs = int(args['number_of_jobs']) depthtif = args['write_depth_tiff'] - - generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, job_number, gpkg, extif, depthtif) - - - + + + # Create output directory + if not os.path.exists(output_cat_fim_dir): + os.mkdir(output_cat_fim_dir) + + # Create log directory + log_dir = os.path.join(output_cat_fim_dir, 'logs') + if not os.path.exists(log_dir): + os.mkdir(log_dir) + + # Create error log path + log_file = os.path.join(log_dir, 'errors.log') + + print("Generating Categorical FIM") + generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, number_of_jobs, depthtif,log_file) + + print("Aggregating Categorical FIM") + post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir,log_file) diff --git a/tools/inundation.py b/tools/inundation.py index b4db4fa49..e7c600510 100755 --- a/tools/inundation.py +++ b/tools/inundation.py @@ -11,6 +11,7 @@ from shapely.geometry import shape from rasterio.mask import mask from rasterio.io import DatasetReader,DatasetWriter +from rasterio.features import shapes from collections import OrderedDict import argparse from warnings import warn diff --git a/tools/inundation_wrapper_custom_flow.py b/tools/inundation_wrapper_custom_flow.py old mode 100644 new mode 100755 index e82a474e6..6867bea5f --- a/tools/inundation_wrapper_custom_flow.py +++ b/tools/inundation_wrapper_custom_flow.py @@ -10,9 +10,6 @@ import sys import argparse import shutil - -# insert python path at runtime for accessing scripts in foss_fim/tests dir (e.g. inundation.py) -sys.path.insert(1, 'foss_fim/tests') from inundation import inundate TEST_CASES_DIR = r'/data/inundation_review/inundation_custom_flow/' # Will update. diff --git a/tools/inundation_wrapper_nwm_flows.py b/tools/inundation_wrapper_nwm_flows.py index f6d158a79..f2b641e83 100755 --- a/tools/inundation_wrapper_nwm_flows.py +++ b/tools/inundation_wrapper_nwm_flows.py @@ -10,9 +10,6 @@ import csv import argparse import shutil - -# insert python path at runtime for accessing scripts in foss_fim/tests dir (e.g. inundation.py) -sys.path.insert(1, 'foss_fim/tests') from inundation import inundate TEST_CASES_DIR = r'/data/inundation_review/inundation_nwm_recurr/' # Will update. diff --git a/tools/mannings_calibration_run.sh b/tools/mannings_calibration_run.sh index 8dc737ee4..89d54cd17 100755 --- a/tools/mannings_calibration_run.sh +++ b/tools/mannings_calibration_run.sh @@ -71,7 +71,7 @@ fi export input_NWM_Catchments=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg export outdir=$outdir -export testdir="/foss_fim/tests" +export toolsdir="/foss_fim/tools" if [ -f "$huclist" ]; then @@ -84,15 +84,15 @@ if [ -f "$huclist" ]; then ## RUN ## if [ -f "$paramfile" ]; then if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh :::: $paramfile + parallel --verbose --lb -j $jobLimit -- $toolsdir/time_and_tee_mannings_calibration.sh :::: $paramfile else - parallel --eta -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh :::: $paramfile + parallel --eta -j $jobLimit -- $toolsdir/time_and_tee_mannings_calibration.sh :::: $paramfile fi else if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh ::: $paramfile + parallel --verbose --lb -j $jobLimit -- $toolsdir/time_and_tee_mannings_calibration.sh ::: $paramfile else - parallel --eta -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh ::: $paramfile + parallel --eta -j $jobLimit -- $toolsdir/time_and_tee_mannings_calibration.sh ::: $paramfile fi fi done <$huclist @@ -108,15 +108,15 @@ else ## RUN ## if [ -f "$paramfile" ]; then if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh :::: $paramfile + parallel --verbose --lb -j $jobLimit -- $toolsdir/time_and_tee_mannings_calibration.sh :::: $paramfile else - parallel --eta -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh :::: $paramfile + parallel --eta -j $jobLimit -- $toolsdir/time_and_tee_mannings_calibration.sh :::: $paramfile fi else if [ "$jobLimit" -eq 1 ]; then - parallel --verbose --lb -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh ::: $paramfile + parallel --verbose --lb -j $jobLimit -- $toolsdir/time_and_tee_mannings_calibration.sh ::: $paramfile else - parallel --eta -j $jobLimit -- $testdir/time_and_tee_mannings_calibration.sh ::: $paramfile + parallel --eta -j $jobLimit -- $toolsdir/time_and_tee_mannings_calibration.sh ::: $paramfile fi fi done diff --git a/tools/mannings_run_by_set.sh b/tools/mannings_run_by_set.sh index 917672a63..8394b3a5f 100755 --- a/tools/mannings_run_by_set.sh +++ b/tools/mannings_run_by_set.sh @@ -12,4 +12,4 @@ mkdir -p $subdir $srcDir/add_crosswalk.py -d $hucdir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $hucdir/demDerived_reaches_split_filtered.gpkg -s $hucdir/src_base.csv -l $subdir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $subdir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $subdir/src_full_crosswalked.csv -j $subdir/src.json -x $subdir/crosswalk_table.csv -t $subdir/hydroTable.csv -w $hucdir/wbd8_clp.gpkg -b $hucdir/nwm_subset_streams.gpkg -y $hucdir/nwm_catchments_proj_subset.tif -m $param_set -z $input_NWM_Catchments -p FR -c -python3 foss_fim/tests/run_test_case_calibration.py -r $fimdir/$huc -d $subdir -t $huc"_ble" -b "mannings_calibration"/$strorder/$mannings_value +python3 foss_fim/tools/run_test_case_calibration.py -r $fimdir/$huc -d $subdir -t $huc"_ble" -b "mannings_calibration"/$strorder/$mannings_value diff --git a/tools/plots/utils/__init__.py b/tools/plots/__init__.py old mode 100644 new mode 100755 similarity index 100% rename from tools/plots/utils/__init__.py rename to tools/plots/__init__.py diff --git a/tools/plots/eval_plots.py b/tools/plots/eval_plots.py old mode 100644 new mode 100755 index 9c29087e1..f18390f5e --- a/tools/plots/eval_plots.py +++ b/tools/plots/eval_plots.py @@ -1,45 +1,48 @@ #!/usr/bin/env python3 + import pandas as pd from pathlib import Path import argparse from natsort import natsorted import geopandas as gpd -from utils.shared_functions import filter_dataframe, boxplot, scatterplot, barplot +from plot_functions import filter_dataframe, boxplot, scatterplot, barplot + + def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'] , alternate_ahps_query = False, spatial_ahps = False, fim_1_ms = False): ''' - Creates plots and summary statistics using metrics compiled from - synthesize_test_cases. Required inputs are metrics_csv and workspace. + Creates plots and summary statistics using metrics compiled from + synthesize_test_cases. Required inputs are metrics_csv and workspace. Outputs include: - aggregate__.csv: this csv + aggregate__.csv: this csv contains the aggregated total statistics (i.e. CSI, FAR, POD) using the summed area_sq_km fields - __common_sites.csv: this csv - contains the unique sites (e.g usgs/nws: nws_lid; ble: huc08) + __common_sites.csv: this csv + contains the unique sites (e.g usgs/nws: nws_lid; ble: huc08) considered for aggregation/plots for each magnitude. The selected sites occur in all versions analyzed. For example, if FIM 1, - FIM 2, FIM 3.0.0.3 were versions analyzed, the common sites - would be those that had data for ALL versions. This + FIM 2, FIM 3.0.0.3 were versions analyzed, the common sites + would be those that had data for ALL versions. This analysis is then redone for each magnitude. As such, the number of sites may vary with magnitude. The number of sites for each magnitude is annotated on generated plots. - __analyzed_data.csv: this is the - dataset used to create plots and aggregate statistics. It is + __analyzed_data.csv: this is the + dataset used to create plots and aggregate statistics. It is a subset of the input metrics file and consists of the common sites. - csi_aggr__.png: bar plot of the + csi_aggr__.png: bar plot of the aggregated CSI scores. Number of common sites is annotated (see list of sites listed in *_*_common_sites.csv). - csi__.png: box plot of CSI scores - (sites weighted equally). Number of common sites is annotated + csi__.png: box plot of CSI scores + (sites weighted equally). Number of common sites is annotated (see list of sites listed in *_*_common_sites.csv). far__*.png: box plot of FAR scores - (sites weighted equally). Number of common sites is annotated + (sites weighted equally). Number of common sites is annotated (see list of sites listed in *_*_common_sites.csv). - tpr__*.png: box plot of TPR/POD - scores (sites weighted equally). Number of common sites is + tpr__*.png: box plot of TPR/POD + scores (sites weighted equally). Number of common sites is annotated (see list of sites listed in *_*_common_sites.csv). - csi_scatter__*.png: scatter plot comparing + csi_scatter__*.png: scatter plot comparing two versions for a given magnitude. This is only generated if there are exactly two versions analyzed. @@ -49,54 +52,54 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' Path to csv produced as part of synthesize_test_cases containing all metrics across all versions. workspace : STRING - Path to the output workspace. Subdirectories will be created + Path to the output workspace. Subdirectories will be created reflecting the evaluation datasets. versions: LIST - A list of versions to be aggregated/plotted. Uses the "startswith" - approach. Versions should be supplied in the order they are to - be plotted. For example: ['fim_', 'fb']; This will evaluate all + A list of versions to be aggregated/plotted. Uses the "startswith" + approach. Versions should be supplied in the order they are to + be plotted. For example: ['fim_', 'fb']; This will evaluate all versions that start with fim_ (e.g. fim_1, fim_2, fim_3) and any feature branch that starts with "fb". To esbalish version order, - the fim versions are naturally sorted and then fb versions - (naturally sorted) are appended. These versions are also used to - filter the input metric csv as only these versions are retained - for analysis. + the fim versions are naturally sorted and then fb versions + (naturally sorted) are appended. These versions are also used to + filter the input metric csv as only these versions are retained + for analysis. stats: LIST - A list of statistics to be plotted. Must be identical to column - field in metrics_csv. CSI, POD, TPR are currently calculated, if + A list of statistics to be plotted. Must be identical to column + field in metrics_csv. CSI, POD, TPR are currently calculated, if additional statistics are desired formulas would need to be coded. alternate_ahps_query : STRING, optional - The default is false. Currently the default ahps query is same - as done for apg goals. If a different query is desired it can be - supplied and it will supercede the default query. + The default is false. Currently the default ahps query is same + as done for apg goals. If a different query is desired it can be + supplied and it will supercede the default query. spatial_ahps : DICTIONARY, optional - The default is false. A dictionary with keys as follows: + The default is false. A dictionary with keys as follows: 'static': Path to AHPS point file created during creation of FIM 3 static libraries. 'evaluated': Path to extent file created during the creation of the NWS/USGS AHPS preprocessing. - 'metadata': Path to previously created file that contains + 'metadata': Path to previously created file that contains metadata about each site (feature_id, wfo, rfc and etc). No spatial layers will be created if set to False, if a dictionary is supplied then a spatial layer is produced. fim_1_ms: BOOL - Default is false. If True then fim_1 rows are duplicated with - extent_config set to MS. This allows for FIM 1 to be included + Default is false. If True then fim_1 rows are duplicated with + extent_config set to MS. This allows for FIM 1 to be included in MS plots/stats (helpful for nws/usgs ahps comparisons). Returns ------- all_datasets : DICT - Dictionary containing all datasets generated. - Keys: (benchmark_source, extent_config), + Dictionary containing all datasets generated. + Keys: (benchmark_source, extent_config), Values: (filtered dataframe, common sites) ''' - - #Import metrics csv as DataFrame and initialize all_datasets dictionary + + # Import metrics csv as DataFrame and initialize all_datasets dictionary csv_df = pd.read_csv(metrics_csv) - #fim_1_ms flag enables FIM 1 to be shown on MS plots/stats + # fim_1_ms flag enables FIM 1 to be shown on MS plots/stats if fim_1_ms: #Query FIM 1 rows based on version beginning with "fim_1" fim_1_rows = csv_df.query('version.str.startswith("fim_1")').copy() @@ -104,74 +107,72 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' fim_1_rows['extent_config'] = 'MS' #Append duplicate FIM 1 rows to original dataframe csv_df = csv_df.append(fim_1_rows, ignore_index = True) - - #If versions are supplied then filter out + + # If versions are supplied then filter out if versions: #Filter out versions based on supplied version list metrics = csv_df.query('version.str.startswith(tuple(@versions))') else: metrics = csv_df - - #Group by benchmark source + + # Group by benchmark source benchmark_by_source = metrics.groupby(['benchmark_source', 'extent_config']) - #Iterate through benchmark_by_source. Pre-filter metrics dataframe - #as needed (e.g. usgs/nws filter query). Then further filtering to - #discard all hucs/nws_lid that are not present across all analyzed - #versions for a given magnitude. The final filtered dataset is written - #to a dictionary with the key (benchmark source, extent config) - #and values (filtered dataframe, common sites). + ''' Iterate through benchmark_by_source. Pre-filter metrics dataframe + as needed (e.g. usgs/nws filter query). Then further filtering to + discard all hucs/nws_lid that are not present across all analyzed + versions for a given magnitude. The final filtered dataset is written + to a dictionary with the key (benchmark source, extent config) + and values (filtered dataframe, common sites). ''' + all_datasets = {} - for (benchmark_source, extent_configuration), benchmark_metrics in benchmark_by_source: - - #If source is usgs/nws define the base resolution and query - #(use alternate query if passed). Append filtered datasets to - #all_datasets dictionary. + for (benchmark_source, extent_configuration), benchmark_metrics in benchmark_by_source: + + '''If source is usgs/nws define the base resolution and query + (use alternate query if passed). Append filtered datasets to + all_datasets dictionary.''' + if benchmark_source in ['usgs','nws']: - - #Set the base processing unit for the ahps runs. + + # Set the base processing unit for the ahps runs. base_resolution = 'nws_lid' - + #Default query (used for APG) it could be that bad_sites should be modified. If so pass an alternate query using the "alternate_ahps_query" bad_sites = ['grfi2','ksdm7','hohn4','rwdn4'] query = "not flow.isnull() & masked_perc<97 & not nws_lid in @bad_sites" - #If alternate ahps evaluation query argument is passed, use that. + # If alternate ahps evaluation query argument is passed, use that. if alternate_ahps_query: query = alternate_ahps_query - #Filter the dataset based on query + # Filter the dataset based on query ahps_metrics = benchmark_metrics.query(query) - - #Filter out all instances where the base_resolution doesn't - #exist across all desired fim versions for a given magnitude. + + # Filter out all instances where the base_resolution doesn't exist across all desired fim versions for a given magnitude all_datasets[(benchmark_source, extent_configuration)] = filter_dataframe(ahps_metrics, base_resolution) - - #If source is 'ble', set base_resolution and append ble dataset - #to all_datasets dictionary + + # If source is 'ble', set base_resolution and append ble dataset to all_datasets dictionary elif benchmark_source == 'ble': - - #Set the base processing unit for ble runs + + # Set the base processing unit for ble runs base_resolution = 'huc' - - #Filter out all instances where base_resolution doesn't exist - #across all desired fim versions for a given magnitude. + + # Filter out all instances where base_resolution doesn't exist across all desired fim versions for a given magnitude all_datasets[(benchmark_source, extent_configuration)] = filter_dataframe(benchmark_metrics, base_resolution) - - #For each dataset in all_datasets, generate plots and aggregate statistics. + + # For each dataset in all_datasets, generate plots and aggregate statistics for (dataset_name,configuration), (dataset, sites) in all_datasets.items(): - - #Define and create the output workspace as a subfolder within - #the supplied workspace + + # Define and create the output workspace as a subfolder within the supplied workspace output_workspace = Path(workspace) / dataset_name / configuration.lower() - output_workspace.mkdir(parents = True, exist_ok = True) - - #Write out the filtered dataset and common sites to file + output_workspace.mkdir(parents = True, exist_ok = True) + + # Write out the filtered dataset and common sites to file dataset.to_csv(output_workspace / (f'{dataset_name}_{configuration.lower()}_analyzed_data.csv'), index = False) sites_pd = pd.DataFrame.from_dict(sites, orient = 'index').transpose() sites_pd.to_csv(output_workspace / (f'{dataset_name}_{configuration.lower()}_common_sites.csv'), index = False) - - #set the order of the magnitudes and define base resolution. + + # Set the order of the magnitudes and define base resolution if dataset_name == 'ble': magnitude_order = ['100yr', '500yr'] base_resolution = 'huc' @@ -179,29 +180,28 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' magnitude_order = ['action','minor','moderate','major'] base_resolution = 'nws_lid' - #Calculate aggregated metrics based on total_sq_km fields. + # Calculate aggregated metrics based on total_sq_km fields dataset_sums = dataset.groupby(['version', 'magnitude'])[['TP_area_km2','FP_area_km2','FN_area_km2']].sum() dataset_sums['csi'] = dataset_sums['TP_area_km2']/(dataset_sums['TP_area_km2'] + dataset_sums['FP_area_km2'] + dataset_sums['FN_area_km2']) dataset_sums['far'] = dataset_sums['FP_area_km2']/(dataset_sums['TP_area_km2'] + dataset_sums['FP_area_km2']) dataset_sums['pod'] = dataset_sums['TP_area_km2']/(dataset_sums['TP_area_km2'] + dataset_sums['FN_area_km2']) dataset_sums = dataset_sums.reset_index() - - #Write aggregated metrics to file. + + # Write aggregated metrics to file dataset_sums.to_csv(output_workspace / f'aggregate_{dataset_name}_{configuration.lower()}.csv', index = False ) - #This section naturally orders analyzed versions which defines - #the hue order for the generated plots. - #Get all versions in dataset - all_versions = list(dataset.version.unique()) - version_order = [] - #If versions are not specified then use all available versions - #and assign to versions_list + ## This section naturally orders analyzed versions which defines the hue order for the generated plots + # Get all versions in dataset + all_versions = list(dataset.version.unique()) + version_order = [] + + # If versions are not specified then use all available versions and assign to versions_list if not versions: versions_list = all_versions - #if versions are supplied assign to versions_list + # If versions are supplied assign to versions_list else: - versions_list = versions - #For each version supplied by the user + versions_list = versions + # For each version supplied by the user for version in versions_list: #Select all the versions that start with the supplied version. selected_versions = [sel_version for sel_version in all_versions if sel_version.startswith(version)] @@ -209,8 +209,8 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' selected_versions = natsorted(selected_versions) #Populate version order based on the sorted subsets. version_order.extend(selected_versions) - - #Define textbox which will contain the counts of each magnitude. + + # Define textbox which will contain the counts of each magnitude textbox = [] for magnitude in sites: count = len(sites[magnitude]) @@ -218,85 +218,85 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' textbox.append(line_text) textbox = '\n'.join(textbox) - #Create aggregate barplot - aggregate_file = output_workspace / (f'csi_aggr_{dataset_name}_{configuration.lower()}.png') + # Create aggregate barplot + aggregate_file = output_workspace / (f'csi_aggr_{dataset_name}_{configuration.lower()}.png') barplot(dataframe = dataset_sums, x_field = 'magnitude', x_order = magnitude_order, y_field = 'csi', hue_field = 'version', ordered_hue = version_order, title_text = f'Aggregate {dataset_name.upper()} FIM Scores', fim_configuration = configuration, textbox_str = textbox, simplify_legend = True, dest_file = aggregate_file) - - #Create box plots for each metric in supplied stats. + + # Create box plots for each metric in supplied stats for stat in stats: - output_file = output_workspace / (f'{stat.lower()}_{dataset_name}_{configuration.lower()}.png') + output_file = output_workspace / (f'{stat.lower()}_{dataset_name}_{configuration.lower()}.png') boxplot(dataframe = dataset, x_field = 'magnitude', x_order = magnitude_order, y_field = stat, hue_field = 'version', ordered_hue = version_order, title_text = f'{dataset_name.upper()} FIM Sites', fim_configuration = configuration, textbox_str = textbox, simplify_legend = True, dest_file = output_file) - - #Get the last 2 versions from the version order for scatter plot. - if len(version_order) == 2: + + # Get the last 2 versions from the version order for scatter plot + if len(version_order) == 2: x_version, y_version = version_order for magnitude in magnitude_order: - #Scatterplot comparison between last 2 versions. + # Scatterplot comparison between last 2 versions x_csi = dataset.query(f'version == "{x_version}" & magnitude == "{magnitude}"')[[base_resolution, 'CSI']] y_csi = dataset.query(f'version == "{y_version}" & magnitude == "{magnitude}"')[[base_resolution, 'CSI']] plotdf = pd.merge(x_csi, y_csi, on = base_resolution, suffixes = (f"_{x_version}",f"_{y_version}")) - #Define arguments for scatterplot function. + # Define arguments for scatterplot function title_text = f'CSI {magnitude}' dest_file = output_workspace / f'csi_scatter_{magnitude}_{configuration.lower()}.png' scatterplot(dataframe = plotdf, x_field = f'CSI_{x_version}', y_field = f'CSI_{y_version}', title_text = title_text, annotate = False, dest_file = dest_file) - + ####################################################################### #Create spatial layers with threshold and mapping information ######################################################################## if spatial_ahps: - #Read in supplied shapefile layers - #Layer containing metadata for each site (feature_id, wfo, etc). - #Convert nws_lid to lower case. + # Read in supplied shapefile layers + # Layer containing metadata for each site (feature_id, wfo, etc) + # Convert nws_lid to lower case ahps_metadata = gpd.read_file(spatial_ahps['metadata']) ahps_metadata['nws_lid'] = ahps_metadata['nws_lid'].str.lower() metadata_crs = ahps_metadata.crs - - #Extent layer generated from preprocessing NWS/USGS datasets + + # Extent layer generated from preprocessing NWS/USGS datasets evaluated_ahps_extent = gpd.read_file(spatial_ahps['evaluated']) - - #Extent layer generated from static ahps library preprocessing + + # Extent layer generated from static ahps library preprocessing static_library = gpd.read_file(spatial_ahps['static']) - - #Fields to keep - #Get list of fields to keep in merge - preserved_static_library_fields = ['nws_lid'] + [i for i in static_library.columns if i.startswith(('Q','S'))] - #Get list of fields to keep in merge. + + # Fields to keep + # Get list of fields to keep in merge + preserved_static_library_fields = ['nws_lid'] + [i for i in static_library.columns if i.startswith(('Q','S'))] + # Get list of fields to keep in merge preserved_evaluated_ahps_fields = ['nws_lid', 'source', 'geometry'] + [i for i in evaluated_ahps_extent.columns if i.startswith(('action','minor','moderate','major'))] - #Join tables to evaluated_ahps_extent + # Join tables to evaluated_ahps_extent evaluated_ahps_extent = evaluated_ahps_extent[preserved_evaluated_ahps_fields] evaluated_ahps_extent = evaluated_ahps_extent.merge(ahps_metadata, on = 'nws_lid') evaluated_ahps_extent['geometry'] = evaluated_ahps_extent['geometry_y'] evaluated_ahps_extent.drop(columns = ['geometry_y','geometry_x'], inplace = True) evaluated_ahps_extent = evaluated_ahps_extent.merge(static_library[preserved_static_library_fields], on = 'nws_lid') - - #Join dataset metrics to evaluated_ahps_extent data. + + # Join dataset metrics to evaluated_ahps_extent data final_join = pd.DataFrame() for (dataset_name, configuration), (dataset, sites) in all_datasets.items(): - #Only select ahps from dataset if config is MS + # Only select ahps from dataset if config is MS if dataset_name in ['usgs','nws'] and configuration == 'MS': - #Select records from evaluated_ahps_extent that match the dataset name - subset = evaluated_ahps_extent.query(f'source == "{dataset_name}"') - #Join to dataset + # Select records from evaluated_ahps_extent that match the dataset name + subset = evaluated_ahps_extent.query(f'source == "{dataset_name}"') + # Join to dataset dataset_with_subset = dataset.merge(subset, on = 'nws_lid') - #Append rows to final_join dataframe + # Append rows to final_join dataframe final_join = final_join.append(dataset_with_subset) - - #Modify version field + + # Modify version field final_join['version'] = final_join.version.str.split('_nws|_usgs').str[0] - - #Write geodataframe to file + + # Write geodataframe to file gdf = gpd.GeoDataFrame(final_join, geometry = final_join['geometry'], crs = metadata_crs) output_shapefile = Path(workspace) / 'nws_usgs_site_info.shp' - gdf.to_file(output_shapefile) - + gdf.to_file(output_shapefile) + ####################################################################### if __name__ == '__main__': - #Parse arguments + # Parse arguments parser = argparse.ArgumentParser(description = 'Plot and aggregate statistics for benchmark datasets (BLE/AHPS libraries)') parser.add_argument('-m','--metrics_csv', help = 'Metrics csv created from synthesize test cases.', required = True) parser.add_argument('-w', '--workspace', help = 'Output workspace', required = True) @@ -305,22 +305,22 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' parser.add_argument('-q', '--alternate_ahps_query',help = 'Alternate filter query for AHPS. Default is: "not nws_lid.isnull() & not flow.isnull() & masked_perc<97 & not nws_lid in @bad_sites" where bad_sites are (grfi2,ksdm7,hohn4,rwdn4)', default = False, required = False) parser.add_argument('-sp', '--spatial_ahps', help = 'If spatial point layer is desired, supply a csv with 3 lines of the following format: metadata, path/to/metadata/shapefile\nevaluated, path/to/evaluated/shapefile\nstatic, path/to/static/shapefile.', default = False, required = False) parser.add_argument('-f', '--fim_1_ms', help = 'If enabled fim_1 rows will be duplicated and extent config assigned "ms" so that fim_1 can be shown on mainstems plots/stats', action = 'store_true', required = False) - - #Extract to dictionary and assign to variables. + + # Extract to dictionary and assign to variables args = vars(parser.parse_args()) - - #If errors occur reassign error to True + + # If errors occur reassign error to True error = False - #Create dictionary if file specified for spatial_ahps + # Create dictionary if file specified for spatial_ahps if args['spatial_ahps']: - #Create dictionary + # Create dictionary spatial_dict = {} with open(args['spatial_ahps']) as file: for line in file: key, value = line.strip('\n').split(',') spatial_dict[key] = Path(value) args['spatial_ahps'] = spatial_dict - #Check that all required keys are present and overwrite args with spatial_dict + # Check that all required keys are present and overwrite args with spatial_dict required_keys = set(['metadata', 'evaluated', 'static']) if required_keys - spatial_dict.keys(): print('\n Required keys are: metadata, evaluated, static') @@ -329,7 +329,7 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' args['spatial_ahps'] = spatial_dict - #Finalize Variables + # Finalize Variables m = args['metrics_csv'] w = args['workspace'] v = args['versions'] @@ -338,6 +338,6 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' sp= args['spatial_ahps'] f = args['fim_1_ms'] - #Run eval_plots function - if not error: - eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, alternate_ahps_query = q, spatial_ahps = sp, fim_1_ms = f) \ No newline at end of file + # Run eval_plots function + if not error: + eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, alternate_ahps_query = q, spatial_ahps = sp, fim_1_ms = f) diff --git a/tools/plots/utils/shared_functions.py b/tools/plots/plot_functions.py old mode 100644 new mode 100755 similarity index 100% rename from tools/plots/utils/shared_functions.py rename to tools/plots/plot_functions.py diff --git a/tools/preprocess/create_flow_forecast_file.py b/tools/preprocess/create_flow_forecast_file.py old mode 100644 new mode 100755 index 9de7abfe5..bb8833343 --- a/tools/preprocess/create_flow_forecast_file.py +++ b/tools/preprocess/create_flow_forecast_file.py @@ -1,10 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Wed Jul 29 11:48:37 2020 -@author: Fernando Aristizabal with edits by Trevor Grout -""" import os import geopandas as gpd import argparse @@ -21,54 +16,54 @@ def create_flow_forecast_file(ble_geodatabase, nwm_geodatabase, output_parent_di Path to nwm geodatabase. output_parent_dir : STRING Output parent directory of output. Flow files will be output to subdirectories within parent directory. - ble_xs_layer_name : STRING - The cross section layer in the ble geodatabase to be imported. Default is 'XS' (sometimes it is 'XS_1D') - ble_huc_layer_name : STRING + ble_xs_layer_name : STRING + The cross section layer in the ble geodatabase to be imported. Default is 'XS' (sometimes it is 'XS_1D') + ble_huc_layer_name : STRING The huc layer in the ble geodatabase. Default is 'S_HUC_Ar' (sometimes it is 'S_HUC_ar' ) - ble_huc_id_field : STRING + ble_huc_id_field : STRING The attribute field within the ble_huc_layer_name containing the huc code. Default is 'HUC_CODE'. Assumes only 1 unique code. - nwm_stream_layer_name : STRING + nwm_stream_layer_name : STRING The stream centerline layer name (or partial layer name) for the NWM geodatabase. Default is 'RouteLink_FL_2020_04_07'. - nwm_feature_id_field : STRING + nwm_feature_id_field : STRING The feature id of the nwm segments. Default is 'ID' (applicable if nwmv2.1 is used) Returns ------- None. ''' - #Read the ble xs layer into a geopandas dataframe. + # Read the ble xs layer into a geopandas dataframe. xs_layer = gpd.read_file(ble_geodatabase,layer = ble_xs_layer_name) - #Read ble huc layer into a geopandas dataframe and extract the huc code. By default it assumes only one HUC in the layer (typically always the case). + # Read ble huc layer into a geopandas dataframe and extract the huc code. By default it assumes only one HUC in the layer (typically always the case). huc_layer = gpd.read_file(ble_geodatabase, layer = ble_huc_layer_name) [huc] = huc_layer[ble_huc_id_field].unique() - - #Read in the NWM stream layer into a geopandas dataframe using the bounding box option based on the extents of the BLE XS layer. + + # Read in the NWM stream layer into a geopandas dataframe using the bounding box option based on the extents of the BLE XS layer. nwm_river_layer = gpd.read_file(nwm_geodatabase, bbox = xs_layer, layer = nwm_stream_layer_name) - - #Make sure xs_layer is in same projection as nwm_river_layer. + + # Make sure xs_layer is in same projection as nwm_river_layer. xs_layer_proj = xs_layer.to_crs(nwm_river_layer.crs) - - #Perform an intersection of the BLE layers and the NWM layers, using the keep_geom_type set to False produces a point output. + + # Perform an intersection of the BLE layers and the NWM layers, using the keep_geom_type set to False produces a point output. intersection = gpd.overlay(xs_layer_proj, nwm_river_layer, how = 'intersection', keep_geom_type = False) - #Create the flow forecast files - #define fields containing flow (typically these won't change for BLE) + ## Create the flow forecast files + # Define fields containing flow (typically these won't change for BLE) flow_fields = ['E_Q_01PCT','E_Q_0_2PCT'] - #define return period associated with flow_fields (in same order as flow_fields). These will also serve as subdirectory names. + # Define return period associated with flow_fields (in same order as flow_fields). These will also serve as subdirectory names. return_period = ['100yr','500yr'] - #Conversion factor from CFS to CMS - dischargeMultiplier = 0.3048 ** 3 - - #Write individual flow csv files + # Conversion factor from CFS to CMS + dischargeMultiplier = 0.3048 ** 3 + + # Write individual flow csv files for i,flow in enumerate(flow_fields): - #Write dataframe with just ID and single flow event + # Write dataframe with just ID and single flow event forecast = intersection[[nwm_feature_id_field,flow]] - #Rename field names and re-define datatypes + # Rename field names and re-define datatypes forecast = forecast.rename(columns={nwm_feature_id_field :'feature_id',flow : 'discharge'}) forecast = forecast.astype({'feature_id' : int , 'discharge' : float}) @@ -76,18 +71,18 @@ def create_flow_forecast_file(ble_geodatabase, nwm_geodatabase, output_parent_di forecast = forecast.groupby('feature_id').median() forecast = forecast.reset_index(level=0) - #Convert CFS to CMS + # Convert CFS to CMS forecast['discharge'] = forecast['discharge'] * dischargeMultiplier - #Set paths and write file + # Set paths and write file output_dir = os.path.join(output_parent_dir, huc) dir_of_csv = os.path.join(output_dir,return_period[i]) os.makedirs(dir_of_csv,exist_ok = True) path_to_csv = os.path.join(dir_of_csv,"ble_huc_{}_flows_{}.csv".format(huc,return_period[i])) - forecast.to_csv(path_to_csv,index=False) - + forecast.to_csv(path_to_csv,index=False) + if __name__ == '__main__': - #Parse arguments + # Parse arguments parser = argparse.ArgumentParser(description = 'Produce forecast flow files from BLE datasets') parser.add_argument('-b', '--ble-geodatabase', help = 'BLE geodatabase (.gdb file extension). Will look for layer with "XS" in name. It is assumed the 100 year flow field is "E_Q_01PCT" and the 500 year flow field is "E_Q_0_2_PCT" as these are the default field names.', required = True) parser.add_argument('-n', '--nwm-geodatabase', help = 'NWM geodatabase (.gdb file extension).', required = True) @@ -97,9 +92,7 @@ def create_flow_forecast_file(ble_geodatabase, nwm_geodatabase, output_parent_di parser.add_argument('-huid', '--ble-huc-id-field', help = 'BLE id field in the ble-huc-layer-name. Default field is "HUC_CODE".', required = False, default = 'HUC_CODE') parser.add_argument('-l', '--nwm-stream-layer-name', help = 'NWM streams layer. Default layer is "RouteLink_FL_2020_04_07")', required = False, default = 'RouteLink_FL_2020_04_07') parser.add_argument('-f', '--nwm-feature-id-field', help = 'id field for nwm streams. Not required if NWM v2.1 is used (default id field is "ID")', required = False, default = 'ID') - #Extract to dictionary and assign to variables. + # Extract to dictionary and assign to variables. args = vars(parser.parse_args()) - #Run create_flow_forecast_file + # Run create_flow_forecast_file create_flow_forecast_file(**args) - - diff --git a/tools/preprocess/preprocess_benchmark.py b/tools/preprocess/preprocess_benchmark.py old mode 100644 new mode 100755 index 02f8e5ea8..81a65db2d --- a/tools/preprocess/preprocess_benchmark.py +++ b/tools/preprocess/preprocess_benchmark.py @@ -1,12 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Thu Jul 23 15:17:04 2020 - -@author: trevor.grout -""" - import rasterio from rasterio.warp import calculate_default_transform, reproject, Resampling import rasterio.mask @@ -15,7 +8,7 @@ def preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_path = None): ''' - This function will preprocess a benchmark dataset for purposes of evaluating FIM output. A benchmark dataset will be transformed using properties (CRS, resolution) from an input reference dataset. The benchmark raster will also be converted to a boolean (True/False) raster with inundated areas (True or 1) and dry areas (False or 0). + This function will preprocess a benchmark dataset for purposes of evaluating FIM output. A benchmark dataset will be transformed using properties (CRS, resolution) from an input reference dataset. The benchmark raster will also be converted to a boolean (True/False) raster with inundated areas (True or 1) and dry areas (False or 0). Parameters ---------- @@ -34,59 +27,59 @@ def preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_p Raster profile information for the preprocessed benchmark array (required for writing to output dataset). ''' - #Open and read raster and benchmark rasters + # Open and read raster and benchmark rasters reference = rasterio.open(reference_raster) benchmark = rasterio.open(benchmark_raster) - benchmark_arr = benchmark.read(1) + benchmark_arr = benchmark.read(1) - #Set arbitrary no data value that is not possible value of the benchmark dataset. This will be reassigned later. + # Set arbitrary no data value that is not possible value of the benchmark dataset. This will be reassigned later nodata_value = -2147483648 - - #Determine the new transform and dimensions of reprojected/resampled raster. + + # Determine the new transform and dimensions of reprojected/resampled raster new_transform, new_width, new_height = calculate_default_transform(benchmark.crs, reference.crs, benchmark.width, benchmark.height, *benchmark.bounds, resolution = reference.res) - #Define an empty array that is same dimensions as output by the "calculate_default_transform" command. + # Define an empty array that is same dimensions as output by the "calculate_default_transform" command benchmark_projected = np.empty((new_height,new_width), dtype=np.int32) - #Reproject and resample the benchmark dataset. Bilinear resampling due to continuous depth data. - reproject(benchmark_arr, + # Reproject and resample the benchmark dataset. Bilinear resampling due to continuous depth data + reproject(benchmark_arr, destination = benchmark_projected, - src_transform = benchmark.transform, + src_transform = benchmark.transform, src_crs = benchmark.crs, src_nodata = benchmark.nodata, - dst_transform = new_transform, + dst_transform = new_transform, dst_crs = reference.crs, dst_nodata = nodata_value, dst_resolution = reference.res, resampling = Resampling.bilinear) - #Convert entire depth grid to boolean (1 = Flood, 0 = No Flood) + # Convert entire depth grid to boolean (1 = Flood, 0 = No Flood) boolean_benchmark = np.where(benchmark_projected != nodata_value, 1, 0) - #Update profile (data type, NODATA, transform, width/height). + #Update profile (data type, NODATA, transform, width/height) profile = reference.profile profile.update(transform = new_transform) profile.update(dtype = rasterio.int8) - profile.update(nodata = 2) #Update NODATA to some integer so we can keep int8 datatype. There are no NODATA in the raster dataset. + profile.update(nodata = 2) #Update NODATA to some integer so we can keep int8 datatype. There are no NODATA in the raster dataset profile.update (width = new_width) profile.update(height = new_height) - #Write out preprocessed benchmark array to raster if path is supplied - if out_raster_path is not None: - with rasterio.Env(): - #Write out reassigned values to raster dataset. + # Write out preprocessed benchmark array to raster if path is supplied + if out_raster_path is not None: + with rasterio.Env(): + # Write out reassigned values to raster dataset with rasterio.open(out_raster_path, 'w', **profile) as dst: - dst.write(boolean_benchmark.astype('int8'),1) + dst.write(boolean_benchmark.astype('int8'),1) return boolean_benchmark.astype('int8'), profile if __name__ == '__main__': - #Parse arguments + # Parse arguments parser = argparse.ArgumentParser(description = 'Preprocess BLE grids (in tiff format) for use in run_test_cast.py. Preprocessing includes reprojecting and converting to boolean raster (1 = Flooding, 0 = No Flooding)') parser.add_argument('-b','--benchmark-raster', help = 'BLE depth or water surface elevation grid (in GTiff format).', required = True) parser.add_argument('-r', '--reference-raster', help = 'Benchmark will use reference raster to set CRS and resolution to reference raster CRS.', required = True) parser.add_argument('-o', '--out-raster-path', help = 'Output raster path (include name and extension).', required = True) - #Extract to dictionary and assign to variables. + # Extract to dictionary and assign to variables args = vars(parser.parse_args()) - #Run preprocess benchmark function + # Run preprocess benchmark function preprocess_benchmark_static(**args) diff --git a/tools/preprocess/preprocess_fimx.py b/tools/preprocess/preprocess_fimx.py old mode 100644 new mode 100755 index 344fecf7d..cad6058d0 --- a/tools/preprocess/preprocess_fimx.py +++ b/tools/preprocess/preprocess_fimx.py @@ -1,9 +1,5 @@ -# -*- coding: utf-8 -*- -""" -Created on Fri Jul 24 13:50:59 2020 +#!/usr/bin/env python3 -@author: trevor.grout -""" import rasterio from rasterio.warp import calculate_default_transform, reproject, Resampling from rasterio import features @@ -47,74 +43,75 @@ def fimx_to_fim3(catchments_path, raster_value_field, hand_raster_path, template Preprocessed catchment raster profile. ''' - - - #Read in template raster as band object. + + + # Read in template raster as band object reference = rasterio.open(template_raster) - - #Step 1: Convert HAND grid - #Read in the hand raster + + ## Step 1: Convert HAND grid + # Read in the hand raster hand = rasterio.open(hand_raster_path) hand_arr = hand.read(1) - #Determine the new transform and dimensions of reprojected raster (CRS = reference raster). + #Determine the new transform and dimensions of reprojected raster (CRS = reference raster) new_transform, new_width, new_height = calculate_default_transform(hand.crs, reference.crs, hand.width, hand.height, *hand.bounds) - #Define an empty array that is same dimensions as output by the "calculate_default_transform" command. - hand_proj = np.empty((new_height,new_width), dtype=np.float) - #Reproject to target dataset (resample method is bilinear due to elevation type data). + # Define an empty array that is same dimensions as output by the "calculate_default_transform" command + hand_proj = np.empty((new_height,new_width), dtype=np.float) + # Reproject to target dataset (resample method is bilinear due to elevation type data) hand_nodata_value = -2147483648 - reproject(hand_arr, + reproject(hand_arr, destination = hand_proj, - src_transform = hand.transform, + src_transform = hand.transform, src_crs = hand.crs, src_nodata = hand.nodata, - dst_transform = new_transform, + dst_transform = new_transform, dst_crs = reference.crs, dst_nodata = hand_nodata_value, dst_resolution = hand.res, resampling = Resampling.bilinear) - #Update profile data type and no data value. + + # Update profile data type and no data value hand_profile = reference.profile hand_profile.update(dtype = rasterio.float32) hand_profile.update(nodata = hand_nodata_value) hand_profile.update(width = new_width) hand_profile.update(height = new_height) hand_profile.update(transform = new_transform) - - #Step 2: Catchments to Polygons (same extent as the HAND raster) - #Read in the catchment layer to geopandas dataframe and convert to same CRS as reference raster. + + ## Step 2: Catchments to Polygons (same extent as the HAND raster) + # Read in the catchment layer to geopandas dataframe and convert to same CRS as reference raster gdbpath, layername = os.path.split(catchments_path) gdb_layer=gpd.read_file(gdbpath, driver='FileGDB', layer=layername) proj_gdb_layer = gdb_layer.to_crs(reference.crs) - #Prepare vector data to be written to raster. - shapes = list(zip(proj_gdb_layer['geometry'],proj_gdb_layer[raster_value_field].astype('int32'))) - #Write vector data to raster image. Fill raster with zeros for areas that do not have data. We will set nodata to be zero later. - catchment_proj = features.rasterize(((geometry, value) for geometry, value in shapes), fill = 0, out_shape=hand_proj.shape, transform=hand_profile['transform'], dtype = 'int32' ) - #Save raster image to in-memory dataset. Reset dtype and nodata values. + # Prepare vector data to be written to raster + shapes = list(zip(proj_gdb_layer['geometry'],proj_gdb_layer[raster_value_field].astype('int32'))) + # Write vector data to raster image. Fill raster with zeros for areas that do not have data. We will set nodata to be zero later + catchment_proj = features.rasterize(((geometry, value) for geometry, value in shapes), fill = 0, out_shape=hand_proj.shape, transform=hand_profile['transform'], dtype = 'int32' ) + # Save raster image to in-memory dataset. Reset dtype and nodata values. catchment_profile = hand_profile.copy() catchment_profile.update(dtype = 'int32') catchment_profile.update(nodata=0) - - #Step 3: Union of NODATA locations applied to both HAND and Catchment grids. + + ## Step 3: Union of NODATA locations applied to both HAND and Catchment grids catchment_masked = np.where(np.logical_or(hand_proj == hand_profile['nodata'], catchment_proj == catchment_profile['nodata']), catchment_profile['nodata'],catchment_proj) - #Assign NODATA to hand where both catchment and hand have NODATA else assign hand values. + # Assign NODATA to hand where both catchment and hand have NODATA else assign hand values. hand_masked = np.where(np.logical_or(hand_proj == hand_profile['nodata'], catchment_proj == catchment_profile['nodata']), hand_profile['nodata'],hand_proj) - #Step 4: Write out hand and catchment rasters to file if path is specified + ## Step 4: Write out hand and catchment rasters to file if path is specified if out_hand_path is not None: - os.makedirs(os.path.split(out_hand_path)[0], exist_ok = True) + os.makedirs(os.path.split(out_hand_path)[0], exist_ok = True) with rasterio.Env(): with rasterio.open(out_hand_path, 'w', **hand_profile) as hnd_dst: hnd_dst.write(hand_masked.astype('float32'),1) if out_catchment_path is not None: - os.makedirs(os.path.split(out_catchment_path)[0], exist_ok = True) + os.makedirs(os.path.split(out_catchment_path)[0], exist_ok = True) with rasterio.Env(): with rasterio.open(out_catchment_path, 'w', **catchment_profile) as cat_dst: - cat_dst.write(catchment_masked.astype('int32'),1) - + cat_dst.write(catchment_masked.astype('int32'),1) + return hand_masked, hand_profile, catchment_masked, catchment_profile if __name__ == '__main__': - #Parse arguments + # Parse arguments parser = argparse.ArgumentParser(description = 'Preprocess FIM 1 and FIM 2 HAND and Catchment grids to be compatible with FIM 3.') parser.add_argument('-c','--catchments-path', help = 'Path to catchments vector file', required = True) parser.add_argument('-f', '--raster-value-field', help = 'Attribute ID field from which raster values will be assigned. Typically this will be "HydroID" for FIM2 and "feature_ID" for fim 1.', required = True) @@ -122,8 +119,7 @@ def fimx_to_fim3(catchments_path, raster_value_field, hand_raster_path, template parser.add_argument('-t', '--template-raster', help = 'Path to a template raster. Properties (CRS, resolution) of the template raster will be used to preprocess HAND and Catchments grids', required = True) parser.add_argument('-oh', '--out-hand-path', help = 'Path to the output HAND raster. Raster must be named "rem_clipped_zeroed_masked.tif', required = True) parser.add_argument('-oc', '--out-catchment-path', help = 'Path to the output Catchment raster. Raster must be named "gw_catchments_reaches_clipped_addedAttributes.tif"', required = True) - #Extract to dictionary and assign to variables. + # Extract to dictionary and assign to variables args = vars(parser.parse_args()) - #Run fimx to fim3 function. + # Run fimx to fim3 function fimx_to_fim3(**args) - diff --git a/tools/run_test_case.py b/tools/run_test_case.py index 2a0a279c5..e3168a422 100755 --- a/tools/run_test_case.py +++ b/tools/run_test_case.py @@ -5,16 +5,16 @@ import shutil import argparse -from utils.shared_functions import compute_contingency_stats_from_rasters -from utils.shared_variables import (TEST_CASES_DIR, INPUTS_DIR, ENDC, TRED_BOLD, WHITE_BOLD, CYAN_BOLD, AHPS_BENCHMARK_CATEGORIES) +from tools_shared_functions import compute_contingency_stats_from_rasters +from tools_shared_variables import (TEST_CASES_DIR, INPUTS_DIR, ENDC, TRED_BOLD, WHITE_BOLD, CYAN_BOLD, AHPS_BENCHMARK_CATEGORIES) from inundation import inundate def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous=False, archive_results=False, mask_type='huc', inclusion_area='', inclusion_area_buffer=0, light_run=False, overwrite=True): - + benchmark_category = test_id.split('_')[1] # Parse benchmark_category from test_id. current_huc = test_id.split('_')[0] # Break off HUC ID and assign to variable. - + # Construct paths to development test results if not existent. if archive_results: version_test_case_dir_parent = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', test_id, 'official_versions', version) @@ -28,7 +28,7 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous else: print("Metrics for ({version}: {test_id}) already exist. Use overwrite flag (-o) to overwrite metrics.".format(version=version, test_id=test_id)) return - + os.mkdir(version_test_case_dir_parent) print("Running the alpha test for test_id: " + test_id + ", " + version + "...") @@ -49,13 +49,13 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous else: catchment_poly = os.path.join(fim_run_parent, 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg') hydro_table = os.path.join(fim_run_parent, 'hydroTable.csv') - + # Map necessary inputs for inundation(). hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' # Create list of shapefile paths to use as exclusion areas. zones_dir = os.path.join(TEST_CASES_DIR, 'other', 'zones') - mask_dict = {'levees': + mask_dict = {'levees': {'path': os.path.join(zones_dir, 'leveed_areas_conus.shp'), 'buffer': None, 'operation': 'exclude' @@ -66,7 +66,7 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous 'operation': 'exclude', }, } - + if inclusion_area != '': inclusion_area_name = os.path.split(inclusion_area)[1].split('.')[0] # Get layer name mask_dict.update({inclusion_area_name: {'path': inclusion_area, @@ -75,7 +75,7 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous # Append the concatenated inclusion_area_name and buffer. if inclusion_area_buffer == None: inclusion_area_buffer = 0 - stats_modes_list.append(inclusion_area_name + '_b' + str(inclusion_area_buffer) + 'm') + stats_modes_list.append(inclusion_area_name + '_b' + str(inclusion_area_buffer) + 'm') # Check if magnitude is list of magnitudes or single value. magnitude_list = magnitude @@ -88,13 +88,13 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous version_test_case_dir = os.path.join(version_test_case_dir_parent, magnitude) if not os.path.exists(version_test_case_dir): os.mkdir(version_test_case_dir) - + # Construct path to validation raster and forecast file. if benchmark_category in AHPS_BENCHMARK_CATEGORIES: benchmark_raster_path_list, forecast_list = [], [] lid_dir_list = os.listdir(os.path.join(validation_data_path, current_huc)) lid_list, inundation_raster_list, domain_file_list = [], [], [] - + for lid in lid_dir_list: lid_dir = os.path.join(validation_data_path, current_huc, lid) benchmark_raster_path_list.append(os.path.join(lid_dir, magnitude, 'ahps_' + lid + '_huc_' + current_huc + '_extent_' + magnitude + '.tif')) # TEMP @@ -109,7 +109,7 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous forecast_path = os.path.join(TEST_CASES_DIR, benchmark_category + '_test_cases', 'validation_data_' + benchmark_category, current_huc, magnitude, benchmark_category + '_huc_' + current_huc + '_flows_' + magnitude + '.csv') forecast_list = [forecast_path] inundation_raster_list = [os.path.join(version_test_case_dir, 'inundation_extent.tif')] - + for index in range(0, len(benchmark_raster_path_list)): benchmark_raster_path = benchmark_raster_path_list[index] forecast = forecast_list[index] @@ -123,7 +123,7 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous 'buffer': None, 'operation': 'include'} }) - + if not os.path.exists(benchmark_raster_path) or not os.path.exists(ahps_domain_file) or not os.path.exists(forecast): # Skip loop instance if the benchmark raster doesn't exist. continue else: # If not in AHPS_BENCHMARK_CATEGORIES. @@ -137,16 +137,16 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous subset_hucs=current_huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True ) - + print("-----> Inundation mapping complete.") predicted_raster_path = os.path.join(os.path.split(inundation_raster)[0], os.path.split(inundation_raster)[1].replace('.tif', '_' + current_huc + '.tif')) # The inundate adds the huc to the name so I account for that here. - + # Define outputs for agreement_raster, stats_json, and stats_csv. if benchmark_category in AHPS_BENCHMARK_CATEGORIES: agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, lid + 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') else: agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') - + compute_contingency_stats_from_rasters(predicted_raster_path, benchmark_raster_path, agreement_raster, @@ -157,16 +157,16 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous test_id=test_id, mask_dict=mask_dict, ) - + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: del mask_dict[ahps_lid] - + print(" ") print("Evaluation complete. All metrics for " + test_id + ", " + version + ", " + magnitude + " are available at " + CYAN_BOLD + version_test_case_dir + ENDC) print(" ") except Exception as e: - print(e) - + print(e) + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: # -- Delete temp files -- # # List all files in the output directory. @@ -217,12 +217,12 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous print(WHITE_BOLD + "Please provide the parent directory name for fim_run.sh outputs. These outputs are usually written in a subdirectory, e.g. outputs/123456/123456." + ENDC) print() exit_flag = True - + # Ensure inclusion_area path exists. if args['inclusion_area'] != "" and not os.path.exists(args['inclusion_area']): print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided inclusion_area (-i) " + CYAN_BOLD + args['inclusion_area'] + WHITE_BOLD + " could not be located." + ENDC) exit_flag = True - + try: inclusion_buffer = int(args['inclusion_area_buffer']) except ValueError: @@ -235,8 +235,8 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous args['magnitude'] = ['action', 'minor', 'moderate', 'major'] else: print(TRED_BOLD + "Error: " + WHITE_BOLD + "The provided magnitude (-y) " + CYAN_BOLD + args['magnitude'] + WHITE_BOLD + " is invalid. ble options include: 100yr, 500yr. ahps options include action, minor, moderate, major." + ENDC) - exit_flag = True - + exit_flag = True + if exit_flag: print() sys.exit() diff --git a/tools/run_test_case_calibration.py b/tools/run_test_case_calibration.py index f630360b0..728b87abe 100755 --- a/tools/run_test_case_calibration.py +++ b/tools/run_test_case_calibration.py @@ -9,7 +9,7 @@ import argparse import shutil -from utils.shared_functions import get_contingency_table_from_binary_rasters, compute_stats_from_contingency_table +from tools_shared_functions import get_contingency_table_from_binary_rasters, compute_stats_from_contingency_table from inundation import inundate TEST_CASES_DIR = r'/data/test_cases/' # Will update. diff --git a/tools/utils/shapefile_to_raster.py b/tools/shapefile_to_raster.py old mode 100644 new mode 100755 similarity index 88% rename from tools/utils/shapefile_to_raster.py rename to tools/shapefile_to_raster.py index 4d1a61ed9..fc1689954 --- a/tools/utils/shapefile_to_raster.py +++ b/tools/shapefile_to_raster.py @@ -1,9 +1,4 @@ -# -*- coding: utf-8 -*- -""" -Created on Tue Jul 14 16:19:26 2020 - -@author: bradford.bates -""" +#!/usr/bin/env python3 # A script to rasterise a shapefile to the same projection & pixel resolution as a reference image. from osgeo import ogr, gdal @@ -28,7 +23,7 @@ print("Rasterising shapefile...") Output = gdal.GetDriverByName(gdalformat).Create(OutputImage, Image.RasterXSize, Image.RasterYSize, 1, datatype, options=['COMPRESS=DEFLATE']) Output.SetProjection(Image.GetProjectionRef()) -Output.SetGeoTransform(Image.GetGeoTransform()) +Output.SetGeoTransform(Image.GetGeoTransform()) # Write data to band 1 Band = Output.GetRasterBand(1) @@ -43,4 +38,4 @@ # Build image overviews subprocess.call("gdaladdo --config COMPRESS_OVERVIEW DEFLATE "+OutputImage+" 2 4 8 16 32 64", shell=True) -print("Done.") \ No newline at end of file +print("Done.") diff --git a/tools/synthesize_test_cases.py b/tools/synthesize_test_cases.py old mode 100644 new mode 100755 index 1fdb0a4dc..f3d02192c --- a/tools/synthesize_test_cases.py +++ b/tools/synthesize_test_cases.py @@ -7,11 +7,11 @@ import csv from run_test_case import run_alpha_test -from utils.shared_variables import TEST_CASES_DIR, PREVIOUS_FIM_DIR, OUTPUTS_DIR, AHPS_BENCHMARK_CATEGORIES +from tools_shared_variables import TEST_CASES_DIR, PREVIOUS_FIM_DIR, OUTPUTS_DIR, AHPS_BENCHMARK_CATEGORIES def create_master_metrics_csv(master_metrics_csv_output): - + # Construct header metrics_to_write = ['true_negatives_count', 'false_negatives_count', @@ -55,26 +55,26 @@ def create_master_metrics_csv(master_metrics_csv_output): 'masked_perc', 'masked_area_km2' ] - + additional_header_info_prefix = ['version', 'nws_lid', 'magnitude', 'huc'] list_to_write = [additional_header_info_prefix + metrics_to_write + ['full_json_path'] + ['flow'] + ['benchmark_source'] + ['extent_config'] + ["calibrated"]] - + versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR) - + for benchmark_source in ['ble', 'nws', 'usgs']: - + benchmark_test_case_dir = os.path.join(TEST_CASES_DIR, benchmark_source + '_test_cases') - + if benchmark_source == 'ble': test_cases_list = os.listdir(benchmark_test_case_dir) - + for test_case in test_cases_list: try: int(test_case.split('_')[0]) - + huc = test_case.split('_')[0] official_versions = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') - + for magnitude in ['100yr', '500yr']: for version in versions_to_aggregate: if '_fr' in version: @@ -108,21 +108,21 @@ def create_master_metrics_csv(master_metrics_csv_output): sub_list_to_append.append(benchmark_source) sub_list_to_append.append(extent_config) sub_list_to_append.append(calibrated) - + list_to_write.append(sub_list_to_append) except ValueError: pass - + if benchmark_source in AHPS_BENCHMARK_CATEGORIES: test_cases_list = os.listdir(benchmark_test_case_dir) for test_case in test_cases_list: try: int(test_case.split('_')[0]) - + huc = test_case.split('_')[0] official_versions = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') - + for magnitude in ['action', 'minor', 'moderate', 'major']: for version in versions_to_aggregate: if '_fr' in version: @@ -135,7 +135,7 @@ def create_master_metrics_csv(master_metrics_csv_output): calibrated = "yes" else: calibrated = "no" - + version_dir = os.path.join(official_versions, version) magnitude_dir = os.path.join(version_dir, magnitude) if os.path.exists(magnitude_dir): @@ -147,8 +147,8 @@ def create_master_metrics_csv(master_metrics_csv_output): full_json_path = os.path.join(magnitude_dir, f) flow = '' if os.path.exists(full_json_path): - - # Get flow used to map. + + # Get flow used to map. flow_file = os.path.join(benchmark_test_case_dir, 'validation_data_' + benchmark_source, huc, nws_lid, magnitude, 'ahps_' + nws_lid + '_huc_' + huc + '_flows_' + magnitude + '.csv') if os.path.exists(flow_file): with open(flow_file, newline='') as csv_file: @@ -158,7 +158,7 @@ def create_master_metrics_csv(master_metrics_csv_output): flow = row[1] if nws_lid == 'mcc01': print(flow) - + stats_dict = json.load(open(full_json_path)) for metric in metrics_to_write: sub_list_to_append.append(stats_dict[metric]) @@ -167,27 +167,27 @@ def create_master_metrics_csv(master_metrics_csv_output): sub_list_to_append.append(benchmark_source) sub_list_to_append.append(extent_config) sub_list_to_append.append(calibrated) - + list_to_write.append(sub_list_to_append) except ValueError: pass - + with open(master_metrics_csv_output, 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile) csv_writer.writerows(list_to_write) def process_alpha_test(args): - + fim_run_dir = args[0] version = args[1] test_id = args[2] magnitude = args[3] archive_results = args[4] overwrite = args[5] - - mask_type = 'huc' - + + mask_type = 'huc' + if archive_results == False: compare_to_previous = True else: @@ -210,7 +210,7 @@ def process_alpha_test(args): parser.add_argument('-b','--benchmark-category',help='A benchmark category to specify. Defaults to process all categories.',required=False, default="all") parser.add_argument('-o','--overwrite',help='Overwrite all metrics or only fill in missing metrics.',required=False, action="store_true") parser.add_argument('-m','--master-metrics-csv',help='Define path for master metrics CSV file.',required=True) - + # Assign variables from arguments. args = vars(parser.parse_args()) config = args['config'] @@ -220,11 +220,11 @@ def process_alpha_test(args): benchmark_category = args['benchmark_category'] overwrite = args['overwrite'] master_metrics_csv = args['master_metrics_csv'] - + if overwrite: if input("Are you sure you want to overwrite metrics? y/n: ") == "n": quit - + # Default to processing all possible versions in PREVIOUS_FIM_DIR. Otherwise, process only the user-supplied version. if fim_version != "all": previous_fim_list = [fim_version] @@ -233,7 +233,7 @@ def process_alpha_test(args): previous_fim_list = os.listdir(PREVIOUS_FIM_DIR) elif config == 'DEV': previous_fim_list = os.listdir(OUTPUTS_DIR) - + # Define whether or not to archive metrics in "official_versions" or "testing_versions" for each test_id. if config == 'PREV': archive_results = True @@ -241,7 +241,7 @@ def process_alpha_test(args): archive_results = False else: print('Config (-c) option incorrectly set. Use "DEV" or "PREV"') - + # List all available benchmark categories and test_cases. test_cases_dir_list = os.listdir(TEST_CASES_DIR) benchmark_category_list = [] @@ -251,41 +251,41 @@ def process_alpha_test(args): benchmark_category_list.append(d.replace('_test_cases', '')) else: benchmark_category_list = [benchmark_category] - + # Loop through benchmark categories. procs_list = [] for bench_cat in benchmark_category_list: - + # Map path to appropriate test_cases folder and list test_ids into bench_cat_id_list. bench_cat_test_case_dir = os.path.join(TEST_CASES_DIR, bench_cat + '_test_cases') bench_cat_id_list = os.listdir(bench_cat_test_case_dir) - + # Loop through test_ids in bench_cat_id_list. for test_id in bench_cat_id_list: if 'validation' and 'other' not in test_id: current_huc = test_id.split('_')[0] if test_id.split('_')[1] in bench_cat: - + # Loop through versions. for version in previous_fim_list: if config == 'DEV': fim_run_dir = os.path.join(OUTPUTS_DIR, version, current_huc) elif config == 'PREV': fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc) - + # For previous versions of HAND computed at HUC6 scale if not os.path.exists(fim_run_dir): if config == 'DEV': fim_run_dir = os.path.join(OUTPUTS_DIR, version, current_huc[:6]) elif config == 'PREV': - fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc[:6]) - + fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc[:6]) + if os.path.exists(fim_run_dir): - + # If a user supplies a specia_string (-s), then add it to the end of the created dirs. if special_string != "": version = version + '_' + special_string - + # Define the magnitude lists to use, depending on test_id. if 'ble' in test_id: magnitude = ['100yr', '500yr'] @@ -293,19 +293,18 @@ def process_alpha_test(args): magnitude = ['action', 'minor', 'moderate', 'major'] else: continue - + # Either add to list to multiprocess or process serially, depending on user specification. if job_number > 1: procs_list.append([fim_run_dir, version, test_id, magnitude, archive_results, overwrite]) - else: + else: process_alpha_test([fim_run_dir, version, test_id, magnitude, archive_results, overwrite]) # Multiprocess alpha test runs. if job_number > 1: pool = Pool(job_number) pool.map(process_alpha_test, procs_list) - + # Do aggregate_metrics. print("Creating master metrics CSV...") create_master_metrics_csv(master_metrics_csv_output=master_metrics_csv) - \ No newline at end of file diff --git a/tools/time_and_tee_mannings_calibration.sh b/tools/time_and_tee_mannings_calibration.sh index d45976cc8..7a1c06cea 100755 --- a/tools/time_and_tee_mannings_calibration.sh +++ b/tools/time_and_tee_mannings_calibration.sh @@ -1,4 +1,4 @@ #!/bin/bash -e -/usr/bin/time -v $testdir/mannings_run_by_set.sh $1 |& tee +/usr/bin/time -v $toolsdir/mannings_run_by_set.sh $1 |& tee exit ${PIPESTATUS[0]} diff --git a/tools/utils/shared_functions.py b/tools/tools_shared_functions.py old mode 100644 new mode 100755 similarity index 100% rename from tools/utils/shared_functions.py rename to tools/tools_shared_functions.py diff --git a/tools/utils/shared_variables.py b/tools/tools_shared_variables.py old mode 100644 new mode 100755 similarity index 100% rename from tools/utils/shared_variables.py rename to tools/tools_shared_variables.py diff --git a/tools/utils/__init__.py b/tools/utils/__init__.py deleted file mode 100644 index e69de29bb..000000000 From 18afaefeaa01b1212f131c4826d9669dcd647aa3 Mon Sep 17 00:00:00 2001 From: Brad Date: Wed, 10 Mar 2021 12:18:15 -0600 Subject: [PATCH 050/359] [1pt] PR: Patch import in tools_shared_functions.py (#302) Minor patch to tools_shared_functions.py where tools_shared_functions.py was not imported correctly. --- CHANGELOG.md | 8 ++++++++ tools/tools_shared_functions.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da0e1cb32..7b9bce771 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

+## v3.0.8.1 - 2021-03-10 - [PR #302](https://github.com/NOAA-OWP/cahaba/pull/302) + +Patched import issue in `tools_shared_functions.py`. + +### Changes + - Changed `utils.` to `tools_` in `tools_shared_functions.py` after recent structural change to `tools` directory. + +

## v3.0.8.0 - 2021-03-09 - [PR #279](https://github.com/NOAA-OWP/cahaba/pull/279) Refactored NWS Flood Categorical HAND FIM (CatFIM) pipeline to open source. diff --git a/tools/tools_shared_functions.py b/tools/tools_shared_functions.py index d36c22814..094f5e2a5 100755 --- a/tools/tools_shared_functions.py +++ b/tools/tools_shared_functions.py @@ -5,7 +5,7 @@ import csv import rasterio import pandas as pd -from utils.shared_variables import (TEST_CASES_DIR, PRINTWORTHY_STATS, GO_UP_STATS, GO_DOWN_STATS, +from tools_shared_variables import (TEST_CASES_DIR, PRINTWORTHY_STATS, GO_UP_STATS, GO_DOWN_STATS, ENDC, TGREEN_BOLD, TGREEN, TRED_BOLD, TWHITE, WHITE_BOLD, CYAN_BOLD) def check_for_regression(stats_json_to_test, previous_version, previous_version_stats_json_path, regression_test_csv=None): From 4364dc4899fee694e19e2960ce4dc1c537b97a3b Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Thu, 11 Mar 2021 11:36:42 -0600 Subject: [PATCH 051/359] Enhancements to post-processing for Viz-related use-cases. - Aggregate grids are projected to Web Mercator during -v runs in fim_run.sh. - HUC6 aggregation is parallelized. - Aggregate grid blocksize is changed from 256 to 1024 for faster postprocessing. This resolves #294 and resolves #295. --- CHANGELOG.md | 10 ++ fim_run.sh | 2 +- src/aggregate_fim_outputs.py | 206 +++++++++++++++++++++-------------- 3 files changed, 138 insertions(+), 80 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b9bce771..38b9f4980 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,16 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

+## v3.0.8.2 - 2021-03-11 - [PR #296](https://github.com/NOAA-OWP/cahaba/pull/296) + +Enhancements to post-processing for Viz-related use-cases. + +### Changes + - Aggregate grids are projected to Web Mercator during `-v` runs in `fim_run.sh`. + - HUC6 aggregation is parallelized. + - Aggregate grid blocksize is changed from 256 to 1024 for faster postprocessing. + +

## v3.0.8.1 - 2021-03-10 - [PR #302](https://github.com/NOAA-OWP/cahaba/pull/302) Patched import issue in `tools_shared_functions.py`. diff --git a/fim_run.sh b/fim_run.sh index 5acdeff71..42a5d022e 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -152,5 +152,5 @@ fi echo "$viz" if [[ "$viz" -eq 1 ]]; then # aggregate outputs - python3 /foss_fim/src/aggregate_fim_outputs.py -d $outputRunDataDir + python3 /foss_fim/src/aggregate_fim_outputs.py -d $outputRunDataDir -j 4 fi diff --git a/src/aggregate_fim_outputs.py b/src/aggregate_fim_outputs.py index edafd93a3..9d8676364 100644 --- a/src/aggregate_fim_outputs.py +++ b/src/aggregate_fim_outputs.py @@ -2,36 +2,37 @@ import os import argparse +from multiprocessing import Pool import pandas as pd import json import rasterio from rasterio.merge import merge +from rasterio.warp import calculate_default_transform, reproject, Resampling import shutil import csv -from utils.shared_variables import PREP_PROJECTION +from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION +def aggregate_fim_outputs(args): -def aggregate_fim_outputs(fim_out_dir): + fim_out_dir = args[0] + huc6 = args[1] + huc_list = args[2] - print ("aggregating outputs to HUC6 scale") + print(f"aggregating {huc6}") - drop_folders = ['logs'] - huc_list = [huc for huc in os.listdir(fim_out_dir) if huc not in drop_folders] - huc6_list = [str(huc[0:6]) for huc in os.listdir(fim_out_dir) if huc not in drop_folders] - huc6_list = list(set(huc6_list)) + huc6_dir = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc6)) + os.makedirs(huc6_dir, exist_ok=True) - for huc in huc_list: + # aggregate file name paths + aggregate_hydrotable = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc6),'hydroTable.csv') + aggregate_src = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc6),f'rating_curves_{huc6}.json') - os.makedirs(os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc[0:6])), exist_ok=True) + for huc in huc_list: # original file paths hydrotable_filename = os.path.join(fim_out_dir,huc,'hydroTable.csv') src_filename = os.path.join(fim_out_dir,huc,'src.json') - # aggregate file name paths - aggregate_hydrotable = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc[0:6]),'hydroTable.csv') - aggregate_src = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc[0:6]),f'rating_curves_{huc[0:6]}.json') - if len(huc)> 6: # open hydrotable @@ -68,107 +69,154 @@ def aggregate_fim_outputs(fim_out_dir): shutil.copy(hydrotable_filename, aggregate_hydrotable) shutil.copy(src_filename, aggregate_src) - for huc6 in huc6_list: - - ## add feature_id to aggregate src - aggregate_hydrotable = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc6),'hydroTable.csv') - aggregate_src = os.path.join(fim_out_dir,'aggregate_fim_outputs',str(huc6),f'rating_curves_{huc6}.json') - - # Open aggregate src for writing feature_ids to - src_data = {} - with open(aggregate_src) as jsonf: - src_data = json.load(jsonf) - - with open(aggregate_hydrotable) as csvf: - csvReader = csv.DictReader(csvf) - - for row in csvReader: - if row['HydroID'].lstrip('0') in src_data and 'nwm_feature_id' not in src_data[row['HydroID'].lstrip('0')]: - src_data[row['HydroID'].lstrip('0')]['nwm_feature_id'] = row['feature_id'] - - # Write src_data to JSON file - with open(aggregate_src, 'w') as jsonf: - json.dump(src_data, jsonf) + ## add feature_id to aggregate src + # Open aggregate src for writing feature_ids to + src_data = {} + with open(aggregate_src) as jsonf: + src_data = json.load(jsonf) - ## aggregate rasters - huc6_dir = os.path.join(fim_out_dir,'aggregate_fim_outputs',huc6) + with open(aggregate_hydrotable) as csvf: + csvReader = csv.DictReader(csvf) - # aggregate file paths - rem_mosaic = os.path.join(huc6_dir,f'hand_grid_{huc6}.tif') - catchment_mosaic = os.path.join(huc6_dir,f'catchments_{huc6}.tif') + for row in csvReader: + if row['HydroID'].lstrip('0') in src_data and 'nwm_feature_id' not in src_data[row['HydroID'].lstrip('0')]: + src_data[row['HydroID'].lstrip('0')]['nwm_feature_id'] = row['feature_id'] - if huc6 not in huc_list: + # Write src_data to JSON file + with open(aggregate_src, 'w') as jsonf: + json.dump(src_data, jsonf) - huc6_filter = [path.startswith(huc6) for path in huc_list] - subset_huc6_list = [i for (i, v) in zip(huc_list, huc6_filter) if v] + ## aggregate rasters + # aggregate file paths + rem_mosaic = os.path.join(huc6_dir,f'hand_grid_{huc6}_unprj.tif') + catchment_mosaic = os.path.join(huc6_dir,f'catchments_{huc6}_unprj.tif') - # aggregate and mosaic rem - rem_list = [os.path.join(fim_out_dir,huc,'rem_zeroed_masked.tif') for huc in subset_huc6_list] + if huc6 not in huc_list: - if len(rem_list) > 1: + huc6_filter = [path.startswith(huc6) for path in huc_list] + subset_huc6_list = [i for (i, v) in zip(huc_list, huc6_filter) if v] - rem_files_to_mosaic = [] + # aggregate and mosaic rem + rem_list = [os.path.join(fim_out_dir,huc,'rem_zeroed_masked.tif') for huc in subset_huc6_list] - for rem in rem_list: + if len(rem_list) > 1: - rem_src = rasterio.open(rem) - rem_files_to_mosaic.append(rem_src) + rem_files_to_mosaic = [] - mosaic, out_trans = merge(rem_files_to_mosaic) - out_meta = rem_src.meta.copy() - out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION,'compress': 'lzw'}) + for rem in rem_list: - with rasterio.open(rem_mosaic, "w", **out_meta, tiled=True, blockxsize=256, blockysize=256, BIGTIFF='YES') as dest: - dest.write(mosaic) + rem_src = rasterio.open(rem) + rem_files_to_mosaic.append(rem_src) - del rem_files_to_mosaic,rem_src,out_meta,mosaic + mosaic, out_trans = merge(rem_files_to_mosaic) + out_meta = rem_src.meta.copy() + out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION,'compress': 'lzw'}) - elif len(rem_list)==1: + with rasterio.open(rem_mosaic, "w", **out_meta, tiled=True, blockxsize=1024, blockysize=1024, BIGTIFF='YES') as dest: + dest.write(mosaic) - shutil.copy(rem_list[0], rem_mosaic) + del rem_files_to_mosaic,rem_src,out_meta,mosaic - # aggregate and mosaic catchments - catchment_list = [os.path.join(fim_out_dir,huc,'gw_catchments_reaches_filtered_addedAttributes.tif') for huc in subset_huc6_list] + elif len(rem_list)==1: - if len(catchment_list) > 1: + shutil.copy(rem_list[0], rem_mosaic) - cat_files_to_mosaic = [] + # aggregate and mosaic catchments + catchment_list = [os.path.join(fim_out_dir,huc,'gw_catchments_reaches_filtered_addedAttributes.tif') for huc in subset_huc6_list] - for cat in catchment_list: - cat_src = rasterio.open(cat) - cat_files_to_mosaic.append(cat_src) + if len(catchment_list) > 1: - mosaic, out_trans = merge(cat_files_to_mosaic) - out_meta = cat_src.meta.copy() + cat_files_to_mosaic = [] - out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION,'compress': 'lzw'}) + for cat in catchment_list: + cat_src = rasterio.open(cat) + cat_files_to_mosaic.append(cat_src) - with rasterio.open(catchment_mosaic, "w", **out_meta, tiled=True, blockxsize=256, blockysize=256, BIGTIFF='YES') as dest: - dest.write(mosaic) + mosaic, out_trans = merge(cat_files_to_mosaic) + out_meta = cat_src.meta.copy() - del cat_files_to_mosaic,cat_src,out_meta,mosaic + out_meta.update({"driver": "GTiff", "height": mosaic.shape[1], "width": mosaic.shape[2], "dtype": str(mosaic.dtype), "transform": out_trans,"crs": PREP_PROJECTION,'compress': 'lzw'}) - elif len(catchment_list)==1: + with rasterio.open(catchment_mosaic, "w", **out_meta, tiled=True, blockxsize=1024, blockysize=1024, BIGTIFF='YES') as dest: + dest.write(mosaic) - shutil.copy(catchment_list[0], catchment_mosaic) + del cat_files_to_mosaic,cat_src,out_meta,mosaic - else: - # original file paths - rem_filename = os.path.join(fim_out_dir,huc6,'rem_zeroed_masked.tif') - catchment_filename = os.path.join(fim_out_dir,huc6,'gw_catchments_reaches_filtered_addedAttributes.tif') + elif len(catchment_list)==1: - shutil.copy(rem_filename, rem_mosaic) - shutil.copy(catchment_filename, catchment_mosaic) + shutil.copy(catchment_list[0], catchment_mosaic) + else: + # original file paths + rem_filename = os.path.join(fim_out_dir,huc6,'rem_zeroed_masked.tif') + catchment_filename = os.path.join(fim_out_dir,huc6,'gw_catchments_reaches_filtered_addedAttributes.tif') + + shutil.copy(rem_filename, rem_mosaic) + shutil.copy(catchment_filename, catchment_mosaic) + + ## reproject rasters + reproject_raster(rem_mosaic) + os.remove(rem_mosaic) + + reproject_raster(catchment_mosaic) + os.remove(catchment_mosaic) + + +def reproject_raster(raster_name): + + with rasterio.open(raster_name) as src: + transform, width, height = calculate_default_transform( + src.crs, VIZ_PROJECTION, src.width, src.height, *src.bounds) + kwargs = src.meta.copy() + kwargs.update({ + 'crs': VIZ_PROJECTION, + 'transform': transform, + 'width': width, + 'height': height, + 'compress': 'lzw' + }) + + raster_proj_rename = os.path.split(raster_name)[1].replace('_unprj.tif', '.tif') + raster_proj_dir = os.path.join(os.path.dirname(raster_name), raster_proj_rename) + + with rasterio.open(raster_proj_dir, 'w', **kwargs, tiled=True, blockxsize=1024, blockysize=1024, BIGTIFF='YES') as dst: + # for i in range(1, src.count + 1): + reproject( + source=rasterio.band(src, 1), + destination=rasterio.band(dst, 1), + src_transform=src.transform, + src_crs=src.crs, + dst_transform=transform, + dst_crs=VIZ_PROJECTION, + resampling=Resampling.nearest) + del src, dst if __name__ == '__main__': parser = argparse.ArgumentParser(description='Aggregate layers buy HUC6') parser.add_argument('-d','--fim-outputs-directory', help='FIM outputs directory', required=True) + parser.add_argument('-j','--number-of-jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) args = vars(parser.parse_args()) fim_outputs_directory = args['fim_outputs_directory'] + number_of_jobs = int(args['number_of_jobs']) + + drop_folders = ['logs'] + huc_list = [huc for huc in os.listdir(fim_outputs_directory) if huc not in drop_folders] + huc6_list = [str(huc[0:6]) for huc in os.listdir(fim_outputs_directory) if huc not in drop_folders] + huc6_list = list(set(huc6_list)) + + + procs_list = [] + + for huc6 in huc6_list: + + limited_huc_list = [huc for huc in huc_list if huc.startswith(huc6)] + + procs_list.append([fim_outputs_directory,huc6,limited_huc_list]) - aggregate_fim_outputs(fim_outputs_directory) + print(f"aggregating {len(huc_list)} hucs to HUC6 scale using {number_of_jobs} jobs") + pool = Pool(number_of_jobs) + pool.map(aggregate_fim_outputs, procs_list) From 5fa77f1e33230e1480d0aef1b1feb4411ceaa7a4 Mon Sep 17 00:00:00 2001 From: NickChadwick-NOAA Date: Fri, 12 Mar 2021 11:02:25 -0600 Subject: [PATCH 052/359] Additional Enhancements for the FIM API Enhancements to FIM API. - fim_run.sh can now be run with jobs in parallel. - Viz post-processing can now be selected in API interface. - Jobs table shows jobs that end with errors. - HUC preset lists can now be selected in interface. - Better output_handler file writing. - Overall better restart and retry handlers for networking problems. - Jobs can now be canceled in API interface. - Both FR and MS configs can be selected for a single job. Resolves issues #231 and #265 --- CHANGELOG.md | 15 ++ Dockerfile.dev | 3 +- api/frontend/gui/templates/index.html | 113 ++++++-- api/frontend/output_handler/output_handler.py | 92 ++----- api/node/.env-template | 3 +- api/node/connector/Dockerfile | 2 +- api/node/connector/connector.py | 139 +++++----- api/node/docker-compose-prod.yml | 2 + api/node/updater/Dockerfile | 4 +- api/node/updater/updater.py | 243 +++++++++++++++--- 10 files changed, 434 insertions(+), 182 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 38b9f4980..177bcfc03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,21 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

+## v3.0.9.0 - 2021-03-12 - [PR #297](https://github.com/NOAA-OWP/cahaba/pull/297) + +Enhancements to FIM API. + +### Changes + - `fim_run.sh` can now be run with jobs in parallel. + - Viz post-processing can now be selected in API interface. + - Jobs table shows jobs that end with errors. + - HUC preset lists can now be selected in interface. + - Better `output_handler` file writing. + - Overall better restart and retry handlers for networking problems. + - Jobs can now be canceled in API interface. + - Both FR and MS configs can be selected for a single job. + +

## v3.0.8.2 - 2021-03-11 - [PR #296](https://github.com/NOAA-OWP/cahaba/pull/296) Enhancements to post-processing for Viz-related use-cases. diff --git a/Dockerfile.dev b/Dockerfile.dev index dea61c348..d48d3f0d0 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -71,8 +71,7 @@ RUN mkdir -p $depDir COPY --from=builder $depDir $depDir -RUN apt update --fix-missing -RUN apt install -y p7zip-full python3-pip time mpich=3.3.2-2build1 parallel=20161222-1.1 libgeos-dev=3.8.0-1build1 expect=5.45.4-2build1 +RUN apt update --fix-missing && apt install -y p7zip-full python3-pip time mpich=3.3.2-2build1 parallel=20161222-1.1 libgeos-dev=3.8.0-1build1 expect=5.45.4-2build1 RUN DEBIAN_FRONTEND=noninteractive apt install -y grass=7.8.2-1build3 grass-doc=7.8.2-1build3 diff --git a/api/frontend/gui/templates/index.html b/api/frontend/gui/templates/index.html index abccdd7cc..4dd2c2fc3 100644 --- a/api/frontend/gui/templates/index.html +++ b/api/frontend/gui/templates/index.html @@ -2,6 +2,7 @@ Cahaba API + @@ -216,7 +233,8 @@

Basic

- + +

Configuration

@@ -233,17 +251,21 @@

Configuration

Extent

- +
- +
- + +
+
+ +
@@ -266,6 +288,8 @@

Extent

Time Elapsed Status Outputs Saved + + @@ -296,27 +320,43 @@

Extent

} }) - socket.on('client_update', jobs => { - let jobsTableBody = '' + socket.on('client_update', ({jobUpdates, presetsList}) => { + if(document.querySelector('#presets-list').innerHTML === '') { + let presetsSelectBody = '' + if (Object.keys(presetsList).length === 0) + document.querySelector('#presets-list').style.display = 'none' + else + document.querySelector('#presets-list').style.display = 'block' + Object.values(presetsList).forEach(p => { + presetsSelectBody += `` + }) + presetsSelectBody += `` + document.querySelector('#presets-list').innerHTML = presetsSelectBody + } + - if (Object.keys(jobs).length === 0) + let jobsTableBody = '' + if (jobUpdates.length === 0) document.querySelector('#job-list-wrapper').style.display = 'none' else document.querySelector('#job-list-wrapper').style.display = 'block' - - Object.values(jobs).forEach(j => { + jobUpdates.forEach(j => { let outputs_saved = "N/A" - if (Object.keys(j['output_files_saved']).length > 0) { - const total = Object.keys(j['output_files_saved']).length - const current = Object.values(j['output_files_saved']).filter(v => v === true).length - outputs_saved = `${current}/${total}` + if (j['total_output_files_length']) { + outputs_saved = `${j['current_output_files_saved_length']}/${j['total_output_files_length']}` } - jobsTableBody += ` + statusClass = '' + if (j['status'] === 'Completed') statusClass = 'class="job-completed"' + if (j['status'] === 'Error' || j['exit_code'] !== 0) statusClass = 'class="job-error"' + + jobsTableBody += ` ${j['nice_name'].replace(/_/g, ' ')} - ${j['time_elapsed']} Seconds + ${new Date(j['time_elapsed'] * 1000).toISOString().substr(11, 8)} ${j['status']} ${outputs_saved} + + ` }) document.querySelector('#job-list').innerHTML = jobsTableBody @@ -327,32 +367,40 @@

Extent

document.querySelector('#fim-run-errors').innerHTML = validation_errors.map(e => `${e}`).join('') }) - socket.on('job_started', job_type => { + socket.on('job_added', job_type => { document.querySelector('#request-container').classList.remove('loading') if (job_type === 'fim_run') { document.querySelector('#job-name').value = '' document.querySelector('#hucs').value = '' document.querySelector('#git-branch').value = '' document.querySelector("input[name='dev-run']").checked = false + document.querySelector("input[name='dev-run']").disabled = false + document.querySelector("input[name='viz-run']").checked = false document.querySelector("input[name='configuration'][value='default']").checked = true document.querySelector("input[name='extent'][value='FR']").checked = true + document.querySelector("input[name='extent'][value='MS']").checked = false document.querySelector('#fim-run-errors').innerHTML = '' } }) + socket.on('job_canceled', () => { + console.log("Job has been cancelled") + }) }) // Fim_run related code const submitRequest = requestName => { let validation_errors = [] const job_name = document.querySelector('#job-name').value + const preset = document.querySelector('#presets-list').value const hucs = document.querySelector('#hucs').value const git_branch = document.querySelector('#git-branch').value const dev_run = document.querySelector("input[name='dev-run']").checked + const viz_run = document.querySelector("input[name='viz-run']").checked const configuration = Array.from(document.querySelectorAll("input[name='configuration']")).filter(c => c.checked)[0].value - const extent = Array.from(document.querySelectorAll("input[name='extent']")).filter(c => c.checked)[0].value + const extents = Array.from(document.querySelectorAll("input[name='extent']")).filter(c => c.checked).map(c => c.value) if (job_name === '') validation_errors.push('Job Name Cannot Be Empty') - if (hucs === '') validation_errors.push('Huc(s) Cannot Be Empty') + if (preset === 'custom' && hucs === '') validation_errors.push('Huc(s) Cannot Be Empty') if (git_branch === '') validation_errors.push('Git Branch Cannot Be Empty') if (validation_errors.length > 0){ @@ -364,14 +412,41 @@

Extent

socket.emit('new_job', { job_name, + preset, hucs, git_branch, dev_run, + viz_run, configuration, - extent + extents }) } + const disableDevRun = e => { + if (e.target.checked){ + document.querySelector("input[name='dev-run']").checked = false + document.querySelector("input[name='dev-run']").disabled = true + } else { + document.querySelector("input[name='dev-run']").disabled = false + } + } + + const presetListSelected = e => { + if (e.target.value === 'custom') + document.querySelector('#hucs').style.display = 'flex' + else + document.querySelector('#hucs').style.display = 'none' + } + + const getJobInfo = jobName => { + console.log(jobName) + } + + const cancelJob = jobName => { + console.log(jobName) + socket.emit('cancel_job', {job_name: jobName}) + } + const selectForm = (e, formName) => { // Close all forms and remove active from all tabs const forms = document.getElementsByClassName('request-form') diff --git a/api/frontend/output_handler/output_handler.py b/api/frontend/output_handler/output_handler.py index 1d7de4a88..f665c5669 100644 --- a/api/frontend/output_handler/output_handler.py +++ b/api/frontend/output_handler/output_handler.py @@ -5,83 +5,37 @@ SOCKET_URL = os.environ.get('SOCKET_URL') -pending_files = {} - def handle_outputs(data): - name = f"{data['job_name']}_{data['file_name']}" - if name not in pending_files: - pending_files[name] = { - 'locked': False, - 'current_index': 0, - 'nice_name': data['nice_name'], - 'job_name': data['job_name'], - 'directory_path': data['directory_path'], - 'file_name': data['file_name'] - } - - pending_files[name][data['chunk_index']] = data['file_chunk'] - - - work_to_do = True - while work_to_do: - work_to_do = False - - nice_name = pending_files[name]['nice_name'] - job_name = pending_files[name]['job_name'] - directory_path = pending_files[name]['directory_path'] - file_name = pending_files[name]['file_name'] - - # If the last chunk just got added, waiting to write any potentially missing data to file - if data['file_chunk'] == None and pending_files[name]['locked']: - while name in pending_files and pending_files[name]['locked']: - print("EOF, waiting till not locked") - sio.sleep(0.5) - if not name in pending_files: - return - - # To ensure that the files are being written in the correct order, use current_index - # to write the correct file chunk. - if not pending_files[name]['locked'] and pending_files[name]['current_index'] in pending_files[name]: - pending_files[name]['locked'] = True - file_chunk = pending_files[name].pop(pending_files[name]['current_index']) - - # End of file - if file_chunk == None: - if sio.connected: - sio.emit('output_handler_finished_file', {'job_name': job_name, 'file_path': f"{directory_path}/{file_name}"}) - print("finished with file", name, directory_path, file_name) - # files_to_delete.append(name) - pending_files.pop(name) - continue - else: - # Not end of file, keep looping till you can't do more work - work_to_do = True - - # Create folder if it doesn't yet exist and set writing mode - mode = 'ab' - if pending_files[name]['current_index'] == 0: - mode = 'wb' - try: - os.makedirs(f"/data/outputs/{nice_name}/{directory_path}") - except: - pass - - # Write binary data to file - with open(f"/data/outputs/{nice_name}/{directory_path}/{file_name}", mode) as binary_file: - print(f"Writing chunk {pending_files[name]['current_index']} for file {directory_path}/{file_name}") - binary_file.write(file_chunk) + nice_name = data['nice_name'] + job_name = data['job_name'] + directory_path = data['directory_path'] + file_name = data['file_name'] + file_chunk = data['file_chunk'] + chunk_index = data['chunk_index'] + + # Create folder if it doesn't yet exist and set writing mode + mode = 'ab' + if chunk_index == 0: + mode = 'wb' + try: + os.makedirs(f"/data/outputs/{nice_name}/{directory_path}") + except: + pass + + # Write binary data to file + with open(f"/data/outputs/{nice_name}/{directory_path}/{file_name}", mode) as binary_file: + print(f"Writing chunk {chunk_index} for file {directory_path}/{file_name}") + binary_file.write(file_chunk) + + # TODO: Write an emit that will trigger the next chunk to be sent. + sio.emit('output_handler_finished_file_chunk', {'job_name': job_name, 'file_path': f"{directory_path}/{file_name}"}) - # Remove current chunk from list - pending_files[name]['current_index'] += 1 - pending_files[name]['locked'] = False - sio = socketio.Client() @sio.event def connect(): print("Output Handler Connected!") sio.emit('output_handler_connected') - @sio.event def disconnect(): diff --git a/api/node/.env-template b/api/node/.env-template index 6df26095b..352f2d2ca 100644 --- a/api/node/.env-template +++ b/api/node/.env-template @@ -2,4 +2,5 @@ DATA_PATH= DOCKER_IMAGE_PATH= SOCKET_URL= FRONTEND_URL= -GITHUB_REPO=https://github.com/NOAA-OWP/cahaba.git \ No newline at end of file +GITHUB_REPO=https://github.com/NOAA-OWP/cahaba.git +MAX_ALLOWED_CPU_CORES= diff --git a/api/node/connector/Dockerfile b/api/node/connector/Dockerfile index ffdb1581e..091fdb364 100644 --- a/api/node/connector/Dockerfile +++ b/api/node/connector/Dockerfile @@ -1,4 +1,4 @@ -FROM docker:19.03.14-dind +FROM docker:20.10.2-dind RUN apk add --no-cache python3 python3-dev py3-pip build-base openssl-dev libffi-dev git diff --git a/api/node/connector/connector.py b/api/node/connector/connector.py index 0148947c2..992f0f0f5 100644 --- a/api/node/connector/connector.py +++ b/api/node/connector/connector.py @@ -3,6 +3,7 @@ import os import re +import time import random import logging import subprocess @@ -40,8 +41,8 @@ def ws_disconn(): emit('is_connected', False) @socketio.on('update') -def ws_update(current_jobs): - emit('client_update', current_jobs, broadcast=True) +def ws_update(data): + emit('client_update', data, broadcast=True) @socketio.on('output_handler_connected') def ws_output_handler_connected(): @@ -60,8 +61,7 @@ def ws_ready_for_output_handler(data): nice_name = data['nice_name'] job_name = data['job_name'] path = data['path'] - - print(f"handler_sid: {shared_data['handler_sid']}") + chunk_index = data['chunk_index'] if shared_data['handler_sid'] == None: print("output handler not connected!") @@ -73,58 +73,60 @@ def ws_ready_for_output_handler(data): directory_path = path_parts.group(1) file_name = path_parts.group(2) + file_read_start = time.time() with open(path, "rb") as binary_file: - print("Sending to output handler", path) - # Read and emit file chunk by chunk (50MB at a time) - chunk_index = 0 + binary_file.seek(chunk_index * 52428800) file_chunk = binary_file.read(52428800) - # file_chunk = binary_file.read(104857600) - while file_chunk: - print("Sending to output handler", path, "Chunk:", chunk_index) - emit('new_job_outputs', { - 'nice_name': nice_name, + + if len(file_chunk) == 0: + print('End of File') + emit('file_saved', { 'job_name': job_name, - 'directory_path': directory_path, - 'file_name': file_name, - 'file_chunk': file_chunk, - 'chunk_index': chunk_index - }, room=shared_data['handler_sid']) - - chunk_index += 1 - file_chunk = binary_file.read(52428800) - # file_chunk = binary_file.read(104857600) - - # Send None to indicate end of file - print("Sending to output handler", path, "Chunk:", chunk_index, "EOF") - emit('new_job_outputs', { - 'nice_name': nice_name, - 'job_name': job_name, - 'directory_path': directory_path, - 'file_name': file_name, - 'file_chunk': None, - 'chunk_index': chunk_index - }, room=shared_data['handler_sid']) - -@socketio.on('output_handler_finished_file') -def ws_output_handler_finished_file(data): + 'file_path': path + }, room=shared_data['updater_sid']) + return + + print("Sending to output handler", path, "Chunk:", chunk_index) + emit('new_job_outputs', { + 'nice_name': nice_name, + 'job_name': job_name, + 'directory_path': directory_path, + 'file_name': file_name, + 'file_chunk': file_chunk, + 'chunk_index': chunk_index + }, room=shared_data['handler_sid']) + +@socketio.on('output_handler_finished_file_chunk') +def output_handler_finished_file_chunk(data): job_name = data['job_name'] file_path = data['file_path'] - print('done saving', job_name, file_path) - emit('file_saved', { + print('done saving chunk', job_name, file_path) + emit('file_chunk_saved', { 'job_name': job_name, - 'file_path': f"/data/outputs/{job_name}/{file_path}" + 'file_path': f"/data/outputs/{job_name}/{file_path}", }, room=shared_data['updater_sid']) @socketio.on('new_job') def ws_new_job(job_params): validation_errors = [] + # Get Preset Option + preset = job_params['preset'] + # Validate Hucs Name Option - hucs = ' '.join(job_params['hucs'].replace(',', ' ').split()) - invalid_hucs = re.search('[a-zA-Z]', hucs) - if invalid_hucs: validation_errors.append('Invalid Huc(s)') + if preset == 'custom': + hucs_raw = job_params['hucs'].replace(',', ' ').split() + parallel_jobs = len(hucs_raw) + hucs_type = len(hucs_raw[0]) + hucs = ' '.join(hucs_raw) + invalid_hucs = re.search('[^0-9 ]', hucs) + if invalid_hucs: validation_errors.append('Invalid Huc(s)') + else: + hucs = f"/data/inputs/huc_lists/{preset}" + parallel_jobs = 0 + hucs_type = 0 # Validate Git Branch Option branch = '' @@ -132,14 +134,14 @@ def ws_new_job(job_params): if branch_exists: branch = job_params['git_branch'].replace(' ', '_') else: validation_errors.append('Git Branch Does Not Exist') - # Validate Job Name Option - job_name = f"apijob_{job_params['job_name'].replace(' ', '_')[0:50]}_apijob_{branch}_{date.today().strftime('%d%m%Y')}_{random.randint(0, 99999)}" - # Validate Extent Option - extent = '' - if job_params['extent'] == 'FR': extent = 'FR' - elif job_params['extent'] == 'MS': extent = 'MS' - else: validation_errors.append('Invalid Extent Option') + valid_extents = ['FR', 'MS'] + extents = [] + for extent in job_params['extents']: + if extent in valid_extents: + extents.append(extent) + else: + validation_errors.append('Invalid Extent Option') # Validate Configuration Option config_path = '' @@ -151,19 +153,40 @@ def ws_new_job(job_params): if job_params['dev_run'] : dev_run = True else: dev_run = False - if len(validation_errors) == 0: - # Clone github repo, with specific branch, to a temp folder - print(f'cd /data/temp && git clone -b {branch} {GITHUB_REPO} {job_name}') - subprocess.call(f'cd /data/temp && git clone -b {branch} {GITHUB_REPO} {job_name}', shell=True) - - # TODO: instead of starting the job right away, add it to a queue until there are enough resources to run it. Also track things like huc count and huc type (6 or 8) + # Validate Viz Run Option + if job_params['viz_run'] : viz_run = True + else: viz_run = False - # Kick off the new job as a docker container with the new cloned repo as the volume - print(f"docker run -d --rm --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim {DOCKER_IMAGE_PATH} fim_run.sh -u \"{hucs}\" -e {extent} -c {config_path} -n {job_name} -o {'' if dev_run else '-p'}") - subprocess.call(f"docker run -d --rm --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim {DOCKER_IMAGE_PATH} fim_run.sh -u \"{hucs}\" -e {extent} -c {config_path} -n {job_name} -o {'' if dev_run else '-p'}", shell=True) - emit('job_started', 'fim_run') + if len(validation_errors) == 0: + for extent in extents: + # Validate Job Name Option + job_name = f"apijob_{job_params['job_name'].replace(' ', '_')[0:50]}_{extent}_apijob_{branch}_{date.today().strftime('%d%m%Y')}_{random.randint(0, 99999)}" + print(f"adding job {job_name} {branch} {preset} {hucs} {parallel_jobs} {hucs_type} {extent} {config_path} {dev_run} {viz_run}") + emit('add_job_to_queue', { + 'job_name': job_name, + 'branch': branch, + 'hucs': hucs, + 'parallel_jobs': parallel_jobs, + 'hucs_type': hucs_type, + 'extent': extent, + 'config_path': config_path, + 'dev_run': dev_run, + 'viz_run': viz_run, + }, room=shared_data['updater_sid']) + print('job added') + emit('job_added', 'fim_run') else: emit('validation_errors', validation_errors) + + @socketio.on('cancel_job') + def ws_cancel_job(job_params): + # Validate Job Name Option + job_name = job_params['job_name'] + + emit('remove_job_from_queue', {'job_name': job_name}, room=shared_data['updater_sid']) + print('job canceled') + emit('job_canceled', 'fim_run') + if __name__ == '__main__': socketio.run(app, host="0.0.0.0", port="6000") diff --git a/api/node/docker-compose-prod.yml b/api/node/docker-compose-prod.yml index 3e1ee654a..f9787ab59 100644 --- a/api/node/docker-compose-prod.yml +++ b/api/node/docker-compose-prod.yml @@ -22,6 +22,8 @@ services: build: context: ./updater container_name: fim_node_updater + env_file: + - .env restart: always depends_on: - fim_node_connector diff --git a/api/node/updater/Dockerfile b/api/node/updater/Dockerfile index e179f3a42..d62a77652 100644 --- a/api/node/updater/Dockerfile +++ b/api/node/updater/Dockerfile @@ -1,6 +1,6 @@ -FROM docker:19.03.14-dind +FROM docker:20.10.2-dind -RUN apk add --no-cache python3 python3-dev py3-pip +RUN apk add --no-cache python3 python3-dev py3-pip build-base openssl-dev libffi-dev git ENV PYTHONUNBUFFERED 1 RUN mkdir -p /opt/updater diff --git a/api/node/updater/updater.py b/api/node/updater/updater.py index 7bc65836c..ea162f50d 100644 --- a/api/node/updater/updater.py +++ b/api/node/updater/updater.py @@ -5,42 +5,78 @@ import json import shutil import logging +import subprocess import socketio DATA_PATH = os.environ.get('DATA_PATH') +DOCKER_IMAGE_PATH = os.environ.get('DOCKER_IMAGE_PATH') +GITHUB_REPO = os.environ.get('GITHUB_REPO') +MAX_ALLOWED_CPU_CORES = int(os.environ.get('MAX_ALLOWED_CPU_CORES')) -connected = False shared_data = { - 'connected': False + 'connected': False, + 'current_saving_job': '' } +buffer_jobs = [] +buffer_remove_jobs = [] current_jobs = {} if os.path.exists('/data/outputs/current_jobs.json'): with open('/data/outputs/current_jobs.json') as f: current_jobs = json.load(f) + for job_name in current_jobs.keys(): + if 'is_actively_saving' in current_jobs[job_name] and current_jobs[job_name]['is_actively_saving'] == True: + shared_data['current_saving_job'] = current_jobs[job_name] + # Get all the current running jobs from the list of docker containers, store that data in a dictionary # along with any other needed metadata (like if it's still running, doing post processing, copying outputs # to its destination, etc), and then update the websocket server of the status of the jobs. def update_loop(): while True: + # If there are no current jobs, just check every 10 seconds till there is + if len(current_jobs.keys()) == 0: sio.sleep(10) + + while len(buffer_jobs) > 0: + new_job = buffer_jobs.pop() + current_jobs[new_job['job_name']] = new_job + + while len(buffer_remove_jobs) > 0: + job_to_remove = buffer_remove_jobs.pop() + current_jobs[job_to_remove['job_name']]['status'] = 'Cancelled' + # Get list of current docker containers that are fim run jobs - job_names = os.popen("docker container ls --filter=name=apijob --format '{{.Names}}'").read().splitlines() - for job_name in job_names: - if job_name not in current_jobs: - # If it's a new job, add it to the dictionary - current_jobs[job_name] = { - 'job_name': job_name, - 'nice_name': re.search(r"apijob_(.+)_apijob.+", job_name).group(1), - 'status': 'In Progress', - 'time_started': time.time(), - 'time_elapsed': 0, - 'output_files_saved': {} - } + # docker ps --all --filter=name=apijob --format='{{.Names}} {{.State}}' + containers_raw = os.popen("docker ps --all --filter=name=apijob --format='{{.Names}} {{.State}}'").read().splitlines() + containers_split = [ line.split() for line in containers_raw ] + container_states = { name: state for (name, state) in containers_split } jobs_to_delete = [] for job_name in current_jobs.keys(): + sio.sleep(0) + if job_name in container_states: + current_jobs[job_name]['container_state'] = container_states[job_name] + + # If the user chooses to cancel the job early + if current_jobs[job_name]['status'] == 'Cancelled': + # If the docker container is running, stop and remove it + if current_jobs[job_name]['time_elapsed'] > 0 and current_jobs[job_name]['container_state'] != 'exited': + subprocess.call(f"docker container stop {job_name}", shell=True) + subprocess.call(f"docker container rm {job_name}", shell=True) + + print("output_handler finished, deleted temp source files and output files") + temp_path = f"/data/temp/{job_name}" + if os.path.isdir(temp_path): + shutil.rmtree(temp_path) + + outputs_path = f"/data/outputs/{job_name}" + if os.path.isdir(outputs_path): + shutil.rmtree(outputs_path) + + jobs_to_delete.append(job_name) + + # Update the time elapsed for all jobs that are currently in progress or saving outputs if current_jobs[job_name]['status'] == 'In Progress' or current_jobs[job_name]['status'] == 'Ready to Save File'\ or current_jobs[job_name]['status'] == 'Saving File': @@ -48,30 +84,96 @@ def update_loop(): # TODO: While job is in progress, keep track of how many hucs are done and overall progress % + # Once resources recome available, start a new job that is in queue + if current_jobs[job_name]['status'] == 'In Queue': + # TODO: Start Dcoker containers here and set start time. + current_jobs[job_name]['time_started'] = time.time() + + total_active_cores = 0 + for j in current_jobs.keys(): + if current_jobs[j]['status'] == 'In Progress': + # This is to account for the fact that HUC6's take a lot more resources to run. + # (not necessarily cpu cores but rather RAM, so this artificially reduces how many jobs can run when HUC6's + # are running) + # HACK: this is more of a temporary solution until we no longer need to run HUC6's + if current_jobs[j]['hucs_type'] == '6': + total_active_cores += current_jobs[j]['parallel_jobs'] * 5 + else: + total_active_cores += current_jobs[j]['parallel_jobs'] + + # Machine has enough resources to run a new job + potential_active_cores = 0 + if current_jobs[job_name]['hucs_type'] == '6': + potential_active_cores = current_jobs[job_name]['parallel_jobs'] * 5 + total_active_cores + else: + potential_active_cores = current_jobs[job_name]['parallel_jobs'] + total_active_cores + + # print(f"Checking whether a new job can start {potential_active_cores} <= {MAX_ALLOWED_CPU_CORES}") + # print(potential_active_cores <= MAX_ALLOWED_CPU_CORES) + if potential_active_cores <= MAX_ALLOWED_CPU_CORES: + job_name = current_jobs[job_name]['job_name'] + branch = current_jobs[job_name]['branch'] + hucs = current_jobs[job_name]['hucs'] + parallel_jobs = current_jobs[job_name]['parallel_jobs'] + extent = current_jobs[job_name]['extent'] + config_path = current_jobs[job_name]['config_path'] + dev_run = current_jobs[job_name]['dev_run'] + viz_run = current_jobs[job_name]['viz_run'] + + # Clone github repo, with specific branch, to a temp folder + print(f'cd /data/temp && git clone -b {branch} {GITHUB_REPO} {job_name}') + subprocess.call(f'cd /data/temp && git clone -b {branch} {GITHUB_REPO} {job_name}', shell=True) + + # Kick off the new job as a docker container with the new cloned repo as the volume + print(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim {DOCKER_IMAGE_PATH} fim_run.sh -u \"{hucs}\" -e {extent} -c {config_path} -n {job_name} -o {'' if dev_run else '-p'} {'-v' if viz_run else ''} -j {parallel_jobs}") + subprocess.call(f"docker run -d --name {job_name} -v {DATA_PATH}:/data/ -v {DATA_PATH}temp/{job_name}/:/foss_fim {DOCKER_IMAGE_PATH} fim_run.sh -u \"{hucs}\" -e {extent} -c {config_path} -n {job_name} -o {'' if dev_run else '-p'} {'-v' if viz_run else ''} -j {parallel_jobs}", shell=True) + current_jobs[job_name]['status'] = 'In Progress' + # Once the Docker container is done, set the job as ready to save output - if current_jobs[job_name]['status'] == 'In Progress' and job_name not in job_names: + if current_jobs[job_name]['status'] == 'In Progress' and current_jobs[job_name]['container_state'] == 'exited': + + # Get container exit code, get the docker log, and then remove container + exit_code_raw = os.popen(f"docker inspect {job_name}" + " --format='{{.State.ExitCode}}'").read().splitlines() + + print("Exit code") + print(exit_code_raw) + print(exit_code_raw[0]) + try: + print(int(exit_code_raw[0])) + except: + pass + + exit_code = int(exit_code_raw[0]) + current_jobs[job_name]['exit_code'] = exit_code + subprocess.call(f"docker logs {job_name} >& /data/outputs/{job_name}/logs/docker.log", shell=True) + subprocess.call(f"docker container rm {job_name}", shell=True) + for path, folders, files in os.walk(f"/data/outputs/{job_name}"): for file in files: - current_jobs[job_name]['output_files_saved'][os.path.join(path, file)] = False + current_jobs[job_name]['output_files_saved'][os.path.join(path, file)] = 0 + current_jobs[job_name]['total_output_files_length'] = len(current_jobs[job_name]['output_files_saved'].keys()) current_jobs[job_name]['status'] = 'Ready to Save File' - # TODO: Possible check the completed job's log for its exit code # Trigger connector to transmit the outputs to the output_handler # If the output_handler is offline, it will keep retrying until the output_handler is online - if current_jobs[job_name]['status'] == 'Ready to Save File': + if current_jobs[job_name]['status'] == 'Ready to Save File' and (shared_data['current_saving_job'] == '' or shared_data['current_saving_job'] == current_jobs[job_name]): print(f"{job_name} ready for output handler") - outputs_to_save = [] + + shared_data['current_saving_job'] = current_jobs[job_name] + current_jobs[job_name]['is_actively_saving'] = True + output_to_save = {} for path in current_jobs[job_name]['output_files_saved']: - if current_jobs[job_name]['output_files_saved'][path] == False: - outputs_to_save.append(path) + if current_jobs[job_name]['output_files_saved'][path] != -1: + output_to_save = {'path': path, 'chunk_index': current_jobs[job_name]['output_files_saved'][path]} - if len(outputs_to_save) > 0: + if output_to_save != {}: if shared_data['connected']: sio.emit('ready_for_output_handler', { 'nice_name': current_jobs[job_name]['nice_name'], 'job_name': job_name, - 'path': outputs_to_save[0] + 'path': output_to_save['path'], + 'chunk_index': output_to_save['chunk_index'] }) current_jobs[job_name]['status'] = 'Saving File' @@ -80,7 +182,7 @@ def update_loop(): if current_jobs[job_name]['status'] == 'Saving File': is_done = True for path in current_jobs[job_name]['output_files_saved']: - if current_jobs[job_name]['output_files_saved'][path] == False: + if current_jobs[job_name]['output_files_saved'][path] != -1: is_done = False break @@ -94,12 +196,14 @@ def update_loop(): if os.path.isdir(outputs_path): shutil.rmtree(outputs_path) - current_jobs[job_name]['status'] = 'Completed' + current_jobs[job_name]['status'] = 'Completed' if current_jobs[job_name]['exit_code'] == 0 else 'Error' + shared_data['current_saving_job'] = '' + current_jobs[job_name]['is_actively_saving'] = False print(f"{job_name} completed") # TODO: Insert Slack notification here for finished job # Remove job from list after it's been completed for more than 15 minutes - if current_jobs[job_name]['status'] == 'Completed' and \ + if (current_jobs[job_name]['status'] == 'Completed' or current_jobs[job_name]['status'] == 'Error') and \ time.time() >= current_jobs[job_name]['time_started'] + current_jobs[job_name]['time_elapsed'] + 900: print(f"{job_name} removed from job list") jobs_to_delete.append(job_name) @@ -107,13 +211,27 @@ def update_loop(): for job in jobs_to_delete: del current_jobs[job] + # TODO: Get Preset Lists here + presets_list = [] + for path, folders, files in os.walk(f"/data/inputs/huc_lists"): + for file in files: + presets_list.append(file) + # Send updates to the connector and write job progress to file - if shared_data['connected']: sio.emit('update', current_jobs) + job_updates = [ { + 'job_name': job['job_name'], + 'nice_name': job['nice_name'], + 'status': job['status'], + 'exit_code': job['exit_code'], + 'time_elapsed': job['time_elapsed'], + 'total_output_files_length': job['total_output_files_length'], + 'current_output_files_saved_length': job['current_output_files_saved_length'], + } for job in current_jobs.values()] + + if shared_data['connected']: sio.emit('update', {'jobUpdates': job_updates, 'presetsList': presets_list}) with open('/data/outputs/current_jobs.json', 'w') as f: json.dump(current_jobs, f) - time.sleep(1) - sio = socketio.Client() @sio.event @@ -127,20 +245,85 @@ def disconnect(): print('disconnected from server') shared_data['connected'] = False +@sio.on('add_job_to_queue') +def ws_add_job_to_queue(data): + job_name = data['job_name'] + branch = data['branch'] + hucs = data['hucs'] + parallel_jobs = data['parallel_jobs'] + hucs_type = data['hucs_type'] + extent = data['extent'] + config_path = data['config_path'] + dev_run = data['dev_run'] + viz_run = data['viz_run'] + + # This is a preset list instead of a custom list of hucs + if hucs_type == 0: + if os.path.exists(hucs): + with open(hucs, "r") as preset_file: + hucs_raw = preset_file.read().splitlines() + parallel_jobs = len(hucs_raw) + hucs_type = len(hucs_raw[0]) + print(f"{hucs} {parallel_jobs} {hucs_type}") + + + parallel_jobs = parallel_jobs if parallel_jobs <= MAX_ALLOWED_CPU_CORES else MAX_ALLOWED_CPU_CORES + + buffer_jobs.append({ + 'job_name': job_name, + 'branch': branch, + 'hucs': hucs, + 'parallel_jobs': parallel_jobs, + 'hucs_type': hucs_type, + 'extent': extent, + 'config_path': config_path, + 'dev_run': dev_run, + 'viz_run': viz_run, + 'nice_name': re.search(r"apijob_(.+)_apijob.+", job_name).group(1), + 'status': 'In Queue', + 'time_started': 0, + 'time_elapsed': 0, + 'output_files_saved': {}, + 'total_output_files_length': 0, + 'current_output_files_saved_length': 0, + 'output_files_saved': {}, + 'container_state': 'running', + 'exit_code': 0, + 'is_actively_saving': False + }) + +@sio.on('remove_job_from_queue') +def ws_remove_job_from_queue(data): + job_name = data['job_name'] + buffer_remove_jobs.append({'job_name': job_name}) + # If the output_handler is offline, try the saving process again @sio.on('retry_saving_files') def ws_retry_saving_files(): print('saving files failed, retrying') for job_name in current_jobs: if current_jobs[job_name]['status'] == "Saving File": + for path in current_jobs[job_name]['output_files_saved']: + if current_jobs[job_name]['output_files_saved'][path] != -1: + current_jobs[job_name]['output_files_saved'][path] = 0 + current_jobs[job_name]['status'] = 'Ready to Save File' +@sio.on('file_chunk_saved') +def ws_file_chunk_saved(data): + job_name = data['job_name'] + file_path = data['file_path'] + + current_jobs[job_name]['output_files_saved'][file_path] += 1 + current_jobs[job_name]['status'] = 'Ready to Save File' + @sio.on('file_saved') def ws_file_saved(data): job_name = data['job_name'] file_path = data['file_path'] - current_jobs[job_name]['output_files_saved'][file_path] = True + current_jobs[job_name]['output_files_saved'][file_path] = -1 + current_jobs[job_name]['current_output_files_saved_length'] += 1 current_jobs[job_name]['status'] = 'Ready to Save File' sio.connect('http://fim_node_connector:6000/') From 665f53488432ac598f0b4142228843dcb05cc81f Mon Sep 17 00:00:00 2001 From: TrevorGrout-NOAA <69653333+TrevorGrout-NOAA@users.noreply.github.com> Date: Fri, 12 Mar 2021 14:24:40 -0600 Subject: [PATCH 053/359] Preprocessing of flow files for Categorical FIM. - Generate Categorical FIM flow files for each category (action, minor, moderate, major). - Generate point shapefile of Categorical FIM sites. - Generate csv of attribute data in shapefile. - Aggregate all shapefiles and csv files into one file in parent directory. - Add flood of record category. - Stability fixes to generate_categorical_fim.py. This resolves #243 and resolves #274. --- .gitignore | 3 +- CHANGELOG.md | 13 + Dockerfile.dev | 2 +- Pipfile | 3 + Pipfile.lock | 393 +++++++++-------- src/add_crosswalk.py | 4 +- tools/.env.template | 3 + tools/.gitignore | 1 + tools/generate_categorical_fim.py | 34 +- tools/generate_categorical_fim_flows.py | 236 ++++++++++ tools/tools_shared_functions.py | 545 ++++++++++++++++++++---- 11 files changed, 951 insertions(+), 286 deletions(-) mode change 100644 => 100755 Pipfile create mode 100644 tools/.env.template create mode 100644 tools/.gitignore create mode 100755 tools/generate_categorical_fim_flows.py diff --git a/.gitignore b/.gitignore index 54bcfb6a6..e18bb6432 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,5 @@ config/** !config/symbology/ .vscode/ **/.DS_Store -**/*_pytest.py \ No newline at end of file +**/*_pytest.py + diff --git a/CHANGELOG.md b/CHANGELOG.md index 177bcfc03..5601ba729 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,19 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

+## v3.0.10.0 - 2021-03-12 - [PR #298](https://github.com/NOAA-OWP/cahaba/pull/298) + + Preprocessing of flow files for Categorical FIM. + +### Additions + - Generate Categorical FIM flow files for each category (action, minor, moderate, major). + - Generate point shapefile of Categorical FIM sites. + - Generate csv of attribute data in shapefile. + - Aggregate all shapefiles and csv files into one file in parent directory. + - Add flood of record category. + ### Changes + - Stability fixes to `generate_categorical_fim.py`. +

## v3.0.9.0 - 2021-03-12 - [PR #297](https://github.com/NOAA-OWP/cahaba/pull/297) diff --git a/Dockerfile.dev b/Dockerfile.dev index d48d3f0d0..638ab46d6 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -15,7 +15,7 @@ RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/* RUN git clone https://github.com/dtarb/taudem.git RUN git clone https://github.com/fernandoa123/cybergis-toolkit.git taudem_accelerated_flowDirections -RUN apt-get update && apt-get install -y cmake mpich \ +RUN apt-get update --fix-missing && apt-get install -y cmake mpich \ libgtest-dev libboost-test-dev libnetcdf-dev && rm -rf /var/lib/apt/lists/* ## Compile Main taudem repo ## diff --git a/Pipfile b/Pipfile old mode 100644 new mode 100755 index c8fbfc0f6..219f69ba2 --- a/Pipfile +++ b/Pipfile @@ -7,6 +7,7 @@ verify_ssl = true ipython = "*" [packages] +fiona = "==1.8.17" geopandas = "==0.8.1" numba = "==0.50.1" pandas = "==1.0.5" @@ -18,6 +19,8 @@ tqdm = "==4.48.0" Shapely = "==1.7.0" grass-session = "==0.5" seaborn = "==0.11.0" +python-dotenv = "*" +natsort = "*" [requires] python_version = "3.8" diff --git a/Pipfile.lock b/Pipfile.lock index 5de3d9856..1250dcc4b 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "5e7e798bab631b1e9012e79e84031f1b4852178692adc10bb8a071c9994c9d56" + "sha256": "f74c6d08c7c900c6cea86a5ea4661791356aac573ed738c9c3a1405303a4200c" }, "pipfile-spec": 6, "requires": { @@ -25,18 +25,18 @@ }, "attrs": { "hashes": [ - "sha256:26b54ddbbb9ee1d34d5d3668dd37d6cf74990ab23c828c2888dccdceee395594", - "sha256:fce7fc47dfc976152e82d53ff92fa0407700c21acd20886a13777a0d20e655dc" + "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6", + "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==20.2.0" + "version": "==20.3.0" }, "certifi": { "hashes": [ - "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3", - "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41" + "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c", + "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830" ], - "version": "==2020.6.20" + "version": "==2020.12.5" }, "click": { "hashes": [ @@ -55,12 +55,11 @@ }, "cligj": { "hashes": [ - "sha256:2bf2042a81be581d707f726aef5efbbd935a62af85d5521305026dabeb798f5d", - "sha256:394a0905fe6f36821b82f086bf8cc12fef20d99d0a3c26a8a92a9207a18b70c6", - "sha256:9881e3b71ff450a83412fadee026347ca15b99c623b9485593da4929cf884ca9" + "sha256:07171c1e287f45511f97df4ea071abc5d19924153413d5683a8e4866369bc676", + "sha256:b2f1f7247d59a5387bd3013a08b9ed6829e96fafa4a6e6292341efdb46fe6220" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' and python_version < '4'", - "version": "==0.7.0" + "version": "==0.7.1" }, "cycler": { "hashes": [ @@ -82,6 +81,7 @@ "sha256:d38a6ef59087b5a20ad7298608c5392e37705ff14d27b44435e04072bbf6632c", "sha256:fcfd8b67403de9b1cc53c045c72542e9f30cb15e617c89c41b928046a9b27daa" ], + "index": "pypi", "version": "==1.8.17" }, "geopandas": { @@ -102,28 +102,41 @@ }, "kiwisolver": { "hashes": [ - "sha256:03662cbd3e6729f341a97dd2690b271e51a67a68322affab12a5b011344b973c", - "sha256:18d749f3e56c0480dccd1714230da0f328e6e4accf188dd4e6884bdd06bf02dd", - "sha256:247800260cd38160c362d211dcaf4ed0f7816afb5efe56544748b21d6ad6d17f", - "sha256:38d05c9ecb24eee1246391820ed7137ac42a50209c203c908154782fced90e44", - "sha256:443c2320520eda0a5b930b2725b26f6175ca4453c61f739fef7a5847bd262f74", - "sha256:4eadb361baf3069f278b055e3bb53fa189cea2fd02cb2c353b7a99ebb4477ef1", - "sha256:556da0a5f60f6486ec4969abbc1dd83cf9b5c2deadc8288508e55c0f5f87d29c", - "sha256:603162139684ee56bcd57acc74035fceed7dd8d732f38c0959c8bd157f913fec", - "sha256:60a78858580761fe611d22127868f3dc9f98871e6fdf0a15cc4203ed9ba6179b", - "sha256:63f55f490b958b6299e4e5bdac66ac988c3d11b7fafa522800359075d4fa56d1", - "sha256:7cc095a4661bdd8a5742aaf7c10ea9fac142d76ff1770a0f84394038126d8fc7", - "sha256:be046da49fbc3aa9491cc7296db7e8d27bcf0c3d5d1a40259c10471b014e4e0c", - "sha256:c31bc3c8e903d60a1ea31a754c72559398d91b5929fcb329b1c3a3d3f6e72113", - "sha256:c955791d80e464da3b471ab41eb65cf5a40c15ce9b001fdc5bbc241170de58ec", - "sha256:d069ef4b20b1e6b19f790d00097a5d5d2c50871b66d10075dab78938dc2ee2cf", - "sha256:d52b989dc23cdaa92582ceb4af8d5bcc94d74b2c3e64cd6785558ec6a879793e", - "sha256:e586b28354d7b6584d8973656a7954b1c69c93f708c0c07b77884f91640b7657", - "sha256:efcf3397ae1e3c3a4a0a0636542bcad5adad3b1dd3e8e629d0b6e201347176c8", - "sha256:fccefc0d36a38c57b7bd233a9b485e2f1eb71903ca7ad7adacad6c28a56d62d2" + "sha256:0cd53f403202159b44528498de18f9285b04482bab2a6fc3f5dd8dbb9352e30d", + "sha256:1e1bc12fb773a7b2ffdeb8380609f4f8064777877b2225dec3da711b421fda31", + "sha256:225e2e18f271e0ed8157d7f4518ffbf99b9450fca398d561eb5c4a87d0986dd9", + "sha256:232c9e11fd7ac3a470d65cd67e4359eee155ec57e822e5220322d7b2ac84fbf0", + "sha256:31dfd2ac56edc0ff9ac295193eeaea1c0c923c0355bf948fbd99ed6018010b72", + "sha256:33449715e0101e4d34f64990352bce4095c8bf13bed1b390773fc0a7295967b3", + "sha256:401a2e9afa8588589775fe34fc22d918ae839aaaf0c0e96441c0fdbce6d8ebe6", + "sha256:44a62e24d9b01ba94ae7a4a6c3fb215dc4af1dde817e7498d901e229aaf50e4e", + "sha256:50af681a36b2a1dee1d3c169ade9fdc59207d3c31e522519181e12f1b3ba7000", + "sha256:563c649cfdef27d081c84e72a03b48ea9408c16657500c312575ae9d9f7bc1c3", + "sha256:5989db3b3b34b76c09253deeaf7fbc2707616f130e166996606c284395da3f18", + "sha256:5a7a7dbff17e66fac9142ae2ecafb719393aaee6a3768c9de2fd425c63b53e21", + "sha256:5c3e6455341008a054cccee8c5d24481bcfe1acdbc9add30aa95798e95c65621", + "sha256:5f6ccd3dd0b9739edcf407514016108e2280769c73a85b9e59aa390046dbf08b", + "sha256:72c99e39d005b793fb7d3d4e660aed6b6281b502e8c1eaf8ee8346023c8e03bc", + "sha256:78751b33595f7f9511952e7e60ce858c6d64db2e062afb325985ddbd34b5c131", + "sha256:834ee27348c4aefc20b479335fd422a2c69db55f7d9ab61721ac8cd83eb78882", + "sha256:8be8d84b7d4f2ba4ffff3665bcd0211318aa632395a1a41553250484a871d454", + "sha256:950a199911a8d94683a6b10321f9345d5a3a8433ec58b217ace979e18f16e248", + "sha256:a357fd4f15ee49b4a98b44ec23a34a95f1e00292a139d6015c11f55774ef10de", + "sha256:a53d27d0c2a0ebd07e395e56a1fbdf75ffedc4a05943daf472af163413ce9598", + "sha256:acef3d59d47dd85ecf909c359d0fd2c81ed33bdff70216d3956b463e12c38a54", + "sha256:b38694dcdac990a743aa654037ff1188c7a9801ac3ccc548d3341014bc5ca278", + "sha256:b9edd0110a77fc321ab090aaa1cfcaba1d8499850a12848b81be2222eab648f6", + "sha256:c08e95114951dc2090c4a630c2385bef681cacf12636fb0241accdc6b303fd81", + "sha256:c5518d51a0735b1e6cee1fdce66359f8d2b59c3ca85dc2b0813a8aa86818a030", + "sha256:c8fd0f1ae9d92b42854b2979024d7597685ce4ada367172ed7c09edf2cef9cb8", + "sha256:ca3820eb7f7faf7f0aa88de0e54681bddcb46e485beb844fcecbcd1c8bd01689", + "sha256:cf8b574c7b9aa060c62116d4181f3a1a4e821b2ec5cbfe3775809474113748d4", + "sha256:d3155d828dec1d43283bd24d3d3e0d9c7c350cdfcc0bd06c0ad1209c1bbc36d0", + "sha256:f8d6f8db88049a699817fd9178782867bf22283e3813064302ac59f61d95be05", + "sha256:fd34fbbfbc40628200730bc1febe30631347103fc8d3d4fa012c21ab9c11eca9" ], "markers": "python_version >= '3.6'", - "version": "==1.2.0" + "version": "==1.3.1" }, "llvmlite": { "hashes": [ @@ -149,27 +162,34 @@ }, "matplotlib": { "hashes": [ - "sha256:06866c138d81a593b535d037b2727bec9b0818cadfe6a81f6ec5715b8dd38a89", - "sha256:16b241c3d17be786966495229714de37de04472da472277869b8d5b456a8df00", - "sha256:27f9de4784ae6fb97679556c5542cf36c0751dccb4d6407f7c62517fa2078868", - "sha256:2f5eefc17dc2a71318d5a3496313be5c351c0731e8c4c6182c9ac3782cfc4076", - "sha256:371518c769d84af8ec9b7dcb871ac44f7a67ef126dd3a15c88c25458e6b6d205", - "sha256:3d2edbf59367f03cd9daf42939ca06383a7d7803e3993eb5ff1bee8e8a3fbb6b", - "sha256:3fb0409754b26f48045bacd6818e44e38ca9338089f8ba689e2f9344ff2847c7", - "sha256:548cfe81476dbac44db96e9c0b074b6fb333b4d1f12b1ae68dbed47e45166384", - "sha256:57be9e21073fc367237b03ecac0d9e4b8ddbe38e86ec4a316857d8d93ac9286c", - "sha256:5ccecb5f78b51b885f0028b646786889f49c54883e554fca41a2a05998063f23", - "sha256:69cf76d673682140f46c6cb5e073332c1f1b2853c748dc1cb04f7d00023567f7", - "sha256:793e061054662aa27acaff9201cdd510a698541c6e8659eeceb31d66c16facc6", - "sha256:799c421bc245a0749c1515b6dea6dc02db0a8c1f42446a0f03b3b82a60a900dc", - "sha256:8bc1d3284dee001f41ec98f59675f4d723683e1cc082830b440b5f081d8e0ade", - "sha256:a522de31e07ed7d6f954cda3fbd5ca4b8edbfc592a821a7b00291be6f843292e", - "sha256:be2f0ec62e0939a9dcfd3638c140c5a74fc929ee3fd1f31408ab8633db6e1523", - "sha256:c5d0c2ae3e3ed4e9f46b7c03b40d443601012ffe8eb8dfbb2bd6b2d00509f797", - "sha256:f0268613073df055bcc6a490de733012f2cf4fe191c1adb74e41cec8add1a165" + "sha256:1de0bb6cbfe460725f0e97b88daa8643bcf9571c18ba90bb8e41432aaeca91d6", + "sha256:1e850163579a8936eede29fad41e202b25923a0a8d5ffd08ce50fc0a97dcdc93", + "sha256:215e2a30a2090221a9481db58b770ce56b8ef46f13224ae33afe221b14b24dc1", + "sha256:348e6032f666ffd151b323342f9278b16b95d4a75dfacae84a11d2829a7816ae", + "sha256:3d2eb9c1cc254d0ffa90bc96fde4b6005d09c2228f99dfd493a4219c1af99644", + "sha256:3e477db76c22929e4c6876c44f88d790aacdf3c3f8f3a90cb1975c0bf37825b0", + "sha256:451cc89cb33d6652c509fc6b588dc51c41d7246afdcc29b8624e256b7663ed1f", + "sha256:46b1a60a04e6d884f0250d5cc8dc7bd21a9a96c584a7acdaab44698a44710bab", + "sha256:5f571b92a536206f7958f7cb2d367ff6c9a1fa8229dc35020006e4cdd1ca0acd", + "sha256:672960dd114e342b7c610bf32fb99d14227f29919894388b41553217457ba7ef", + "sha256:7310e353a4a35477c7f032409966920197d7df3e757c7624fd842f3eeb307d3d", + "sha256:746a1df55749629e26af7f977ea426817ca9370ad1569436608dc48d1069b87c", + "sha256:7c155437ae4fd366e2700e2716564d1787700687443de46bcb895fe0f84b761d", + "sha256:9265ae0fb35e29f9b8cc86c2ab0a2e3dcddc4dd9de4b85bf26c0f63fe5c1c2ca", + "sha256:94bdd1d55c20e764d8aea9d471d2ae7a7b2c84445e0fa463f02e20f9730783e1", + "sha256:9a79e5dd7bb797aa611048f5b70588b23c5be05b63eefd8a0d152ac77c4243db", + "sha256:a17f0a10604fac7627ec82820439e7db611722e80c408a726cd00d8c974c2fb3", + "sha256:a1acb72f095f1d58ecc2538ed1b8bca0b57df313b13db36ed34b8cdf1868e674", + "sha256:aa49571d8030ad0b9ac39708ee77bd2a22f87815e12bdee52ecaffece9313ed8", + "sha256:c24c05f645aef776e8b8931cb81e0f1632d229b42b6d216e30836e2e145a2b40", + "sha256:cf3a7e54eff792f0815dbbe9b85df2f13d739289c93d346925554f71d484be78", + "sha256:d738acfdfb65da34c91acbdb56abed46803db39af259b7f194dc96920360dbe4", + "sha256:e15fa23d844d54e7b3b7243afd53b7567ee71c721f592deb0727ee85e668f96a", + "sha256:ed4a9e6dcacba56b17a0a9ac22ae2c72a35b7f0ef0693aa68574f0b2df607a89", + "sha256:f44149a0ef5b4991aaef12a93b8e8d66d6412e762745fea1faa61d98524e0ba9" ], "markers": "python_version >= '3.6'", - "version": "==3.3.2" + "version": "==3.3.4" }, "munch": { "hashes": [ @@ -178,6 +198,14 @@ ], "version": "==2.5.0" }, + "natsort": { + "hashes": [ + "sha256:00c603a42365830c4722a2eb7663a25919551217ec09a243d3399fa8dd4ac403", + "sha256:d0f4fc06ca163fa4a5ef638d9bf111c67f65eedcc7920f98dec08e489045b67e" + ], + "index": "pypi", + "version": "==7.1.1" + }, "numba": { "hashes": [ "sha256:24852c21fbf7edf9e000eeec9fbd1b24d1ca17c86ae449b06a3707bcdec95479", @@ -205,35 +233,33 @@ }, "numpy": { "hashes": [ - "sha256:04c7d4ebc5ff93d9822075ddb1751ff392a4375e5885299445fcebf877f179d5", - "sha256:0bfd85053d1e9f60234f28f63d4a5147ada7f432943c113a11afcf3e65d9d4c8", - "sha256:0c66da1d202c52051625e55a249da35b31f65a81cb56e4c69af0dfb8fb0125bf", - "sha256:0d310730e1e793527065ad7dde736197b705d0e4c9999775f212b03c44a8484c", - "sha256:1669ec8e42f169ff715a904c9b2105b6640f3f2a4c4c2cb4920ae8b2785dac65", - "sha256:2117536e968abb7357d34d754e3733b0d7113d4c9f1d921f21a3d96dec5ff716", - "sha256:3733640466733441295b0d6d3dcbf8e1ffa7e897d4d82903169529fd3386919a", - "sha256:4339741994c775396e1a274dba3609c69ab0f16056c1077f18979bec2a2c2e6e", - "sha256:51ee93e1fac3fe08ef54ff1c7f329db64d8a9c5557e6c8e908be9497ac76374b", - "sha256:54045b198aebf41bf6bf4088012777c1d11703bf74461d70cd350c0af2182e45", - "sha256:58d66a6b3b55178a1f8a5fe98df26ace76260a70de694d99577ddeab7eaa9a9d", - "sha256:59f3d687faea7a4f7f93bd9665e5b102f32f3fa28514f15b126f099b7997203d", - "sha256:62139af94728d22350a571b7c82795b9d59be77fc162414ada6c8b6a10ef5d02", - "sha256:7118f0a9f2f617f921ec7d278d981244ba83c85eea197be7c5a4f84af80a9c3c", - "sha256:7c6646314291d8f5ea900a7ea9c4261f834b5b62159ba2abe3836f4fa6705526", - "sha256:967c92435f0b3ba37a4257c48b8715b76741410467e2bdb1097e8391fccfae15", - "sha256:9a3001248b9231ed73894c773142658bab914645261275f675d86c290c37f66d", - "sha256:aba1d5daf1144b956bc87ffb87966791f5e9f3e1f6fab3d7f581db1f5b598f7a", - "sha256:addaa551b298052c16885fc70408d3848d4e2e7352de4e7a1e13e691abc734c1", - "sha256:b594f76771bc7fc8a044c5ba303427ee67c17a09b36e1fa32bde82f5c419d17a", - "sha256:c35a01777f81e7333bcf276b605f39c872e28295441c265cd0c860f4b40148c1", - "sha256:cebd4f4e64cfe87f2039e4725781f6326a61f095bc77b3716502bed812b385a9", - "sha256:d526fa58ae4aead839161535d59ea9565863bb0b0bdb3cc63214613fb16aced4", - "sha256:d7ac33585e1f09e7345aa902c281bd777fdb792432d27fca857f39b70e5dd31c", - "sha256:e6ddbdc5113628f15de7e4911c02aed74a4ccff531842c583e5032f6e5a179bd", - "sha256:eb25c381d168daf351147713f49c626030dcff7a393d5caa62515d415a6071d8" + "sha256:032be656d89bbf786d743fee11d01ef318b0781281241997558fa7950028dd29", + "sha256:104f5e90b143dbf298361a99ac1af4cf59131218a045ebf4ee5990b83cff5fab", + "sha256:125a0e10ddd99a874fd357bfa1b636cd58deb78ba4a30b5ddb09f645c3512e04", + "sha256:12e4ba5c6420917571f1a5becc9338abbde71dd811ce40b37ba62dec7b39af6d", + "sha256:13adf545732bb23a796914fe5f891a12bd74cf3d2986eed7b7eba2941eea1590", + "sha256:2d7e27442599104ee08f4faed56bb87c55f8b10a5494ac2ead5c98a4b289e61f", + "sha256:3bc63486a870294683980d76ec1e3efc786295ae00128f9ea38e2c6e74d5a60a", + "sha256:3d3087e24e354c18fb35c454026af3ed8997cfd4997765266897c68d724e4845", + "sha256:4ed8e96dc146e12c1c5cdd6fb9fd0757f2ba66048bf94c5126b7efebd12d0090", + "sha256:60759ab15c94dd0e1ed88241fd4fa3312db4e91d2c8f5a2d4cf3863fad83d65b", + "sha256:65410c7f4398a0047eea5cca9b74009ea61178efd78d1be9847fac1d6716ec1e", + "sha256:66b467adfcf628f66ea4ac6430ded0614f5cc06ba530d09571ea404789064adc", + "sha256:7199109fa46277be503393be9250b983f325880766f847885607d9b13848f257", + "sha256:72251e43ac426ff98ea802a931922c79b8d7596480300eb9f1b1e45e0543571e", + "sha256:89e5336f2bec0c726ac7e7cdae181b325a9c0ee24e604704ed830d241c5e47ff", + "sha256:89f937b13b8dd17b0099c7c2e22066883c86ca1575a975f754babc8fbf8d69a9", + "sha256:9c94cab5054bad82a70b2e77741271790304651d584e2cdfe2041488e753863b", + "sha256:9eb551d122fadca7774b97db8a112b77231dcccda8e91a5bc99e79890797175e", + "sha256:a1d7995d1023335e67fb070b2fae6f5968f5be3802b15ad6d79d81ecaa014fe0", + "sha256:ae61f02b84a0211abb56462a3b6cd1e7ec39d466d3160eb4e1da8bf6717cdbeb", + "sha256:b9410c0b6fed4a22554f072a86c361e417f0258838957b78bd063bde2c7f841f", + "sha256:c26287dfc888cf1e65181f39ea75e11f42ffc4f4529e5bd19add57ad458996e2", + "sha256:c91ec9569facd4757ade0888371eced2ecf49e7982ce5634cc2cf4e7331a4b14", + "sha256:ecb5b74c702358cdc21268ff4c37f7466357871f53a30e6f84c686952bef16a9" ], - "markers": "python_version >= '3.6'", - "version": "==1.19.2" + "markers": "python_version >= '3.7'", + "version": "==1.20.1" }, "pandas": { "hashes": [ @@ -259,37 +285,42 @@ }, "pillow": { "hashes": [ - "sha256:006de60d7580d81f4a1a7e9f0173dc90a932e3905cc4d47ea909bc946302311a", - "sha256:0a2e8d03787ec7ad71dc18aec9367c946ef8ef50e1e78c71f743bc3a770f9fae", - "sha256:0eeeae397e5a79dc088d8297a4c2c6f901f8fb30db47795113a4a605d0f1e5ce", - "sha256:11c5c6e9b02c9dac08af04f093eb5a2f84857df70a7d4a6a6ad461aca803fb9e", - "sha256:2fb113757a369a6cdb189f8df3226e995acfed0a8919a72416626af1a0a71140", - "sha256:4b0ef2470c4979e345e4e0cc1bbac65fda11d0d7b789dbac035e4c6ce3f98adb", - "sha256:59e903ca800c8cfd1ebe482349ec7c35687b95e98cefae213e271c8c7fffa021", - "sha256:5abd653a23c35d980b332bc0431d39663b1709d64142e3652890df4c9b6970f6", - "sha256:5f9403af9c790cc18411ea398a6950ee2def2a830ad0cfe6dc9122e6d528b302", - "sha256:6b4a8fd632b4ebee28282a9fef4c341835a1aa8671e2770b6f89adc8e8c2703c", - "sha256:6c1aca8231625115104a06e4389fcd9ec88f0c9befbabd80dc206c35561be271", - "sha256:795e91a60f291e75de2e20e6bdd67770f793c8605b553cb6e4387ce0cb302e09", - "sha256:7ba0ba61252ab23052e642abdb17fd08fdcfdbbf3b74c969a30c58ac1ade7cd3", - "sha256:7c9401e68730d6c4245b8e361d3d13e1035cbc94db86b49dc7da8bec235d0015", - "sha256:81f812d8f5e8a09b246515fac141e9d10113229bc33ea073fec11403b016bcf3", - "sha256:895d54c0ddc78a478c80f9c438579ac15f3e27bf442c2a9aa74d41d0e4d12544", - "sha256:8de332053707c80963b589b22f8e0229f1be1f3ca862a932c1bcd48dafb18dd8", - "sha256:92c882b70a40c79de9f5294dc99390671e07fc0b0113d472cbea3fde15db1792", - "sha256:95edb1ed513e68bddc2aee3de66ceaf743590bf16c023fb9977adc4be15bd3f0", - "sha256:b63d4ff734263ae4ce6593798bcfee6dbfb00523c82753a3a03cbc05555a9cc3", - "sha256:bd7bf289e05470b1bc74889d1466d9ad4a56d201f24397557b6f65c24a6844b8", - "sha256:cc3ea6b23954da84dbee8025c616040d9aa5eaf34ea6895a0a762ee9d3e12e11", - "sha256:cc9ec588c6ef3a1325fa032ec14d97b7309db493782ea8c304666fb10c3bd9a7", - "sha256:d3d07c86d4efa1facdf32aa878bd508c0dc4f87c48125cc16b937baa4e5b5e11", - "sha256:d8a96747df78cda35980905bf26e72960cba6d355ace4780d4bdde3b217cdf1e", - "sha256:e38d58d9138ef972fceb7aeec4be02e3f01d383723965bfcef14d174c8ccd039", - "sha256:eb472586374dc66b31e36e14720747595c2b265ae962987261f044e5cce644b5", - "sha256:fbd922f702582cb0d71ef94442bfca57624352622d75e3be7a1e7e9360b07e72" + "sha256:15306d71a1e96d7e271fd2a0737038b5a92ca2978d2e38b6ced7966583e3d5af", + "sha256:1940fc4d361f9cc7e558d6f56ff38d7351b53052fd7911f4b60cd7bc091ea3b1", + "sha256:1f93f2fe211f1ef75e6f589327f4d4f8545d5c8e826231b042b483d8383e8a7c", + "sha256:30d33a1a6400132e6f521640dd3f64578ac9bfb79a619416d7e8802b4ce1dd55", + "sha256:328240f7dddf77783e72d5ed79899a6b48bc6681f8d1f6001f55933cb4905060", + "sha256:46c2bcf8e1e75d154e78417b3e3c64e96def738c2a25435e74909e127a8cba5e", + "sha256:5762ebb4436f46b566fc6351d67a9b5386b5e5de4e58fdaa18a1c83e0e20f1a8", + "sha256:5a2d957eb4aba9d48170b8fe6538ec1fbc2119ffe6373782c03d8acad3323f2e", + "sha256:5cf03b9534aca63b192856aa601c68d0764810857786ea5da652581f3a44c2b0", + "sha256:5daba2b40782c1c5157a788ec4454067c6616f5a0c1b70e26ac326a880c2d328", + "sha256:63cd413ac52ee3f67057223d363f4f82ce966e64906aea046daf46695e3c8238", + "sha256:6efac40344d8f668b6c4533ae02a48d52fd852ef0654cc6f19f6ac146399c733", + "sha256:71b01ee69e7df527439d7752a2ce8fb89e19a32df484a308eca3e81f673d3a03", + "sha256:71f31ee4df3d5e0b366dd362007740106d3210fb6a56ec4b581a5324ba254f06", + "sha256:72027ebf682abc9bafd93b43edc44279f641e8996fb2945104471419113cfc71", + "sha256:74cd9aa648ed6dd25e572453eb09b08817a1e3d9f8d1bd4d8403d99e42ea790b", + "sha256:81b3716cc9744ffdf76b39afb6247eae754186838cedad0b0ac63b2571253fe6", + "sha256:8565355a29655b28fdc2c666fd9a3890fe5edc6639d128814fafecfae2d70910", + "sha256:87f42c976f91ca2fc21a3293e25bd3cd895918597db1b95b93cbd949f7d019ce", + "sha256:89e4c757a91b8c55d97c91fa09c69b3677c227b942fa749e9a66eef602f59c28", + "sha256:8c4e32218c764bc27fe49b7328195579581aa419920edcc321c4cb877c65258d", + "sha256:903293320efe2466c1ab3509a33d6b866dc850cfd0c5d9cc92632014cec185fb", + "sha256:90882c6f084ef68b71bba190209a734bf90abb82ab5e8f64444c71d5974008c6", + "sha256:98afcac3205d31ab6a10c5006b0cf040d0026a68ec051edd3517b776c1d78b09", + "sha256:a01da2c266d9868c4f91a9c6faf47a251f23b9a862dce81d2ff583135206f5be", + "sha256:aeab4cd016e11e7aa5cfc49dcff8e51561fa64818a0be86efa82c7038e9369d0", + "sha256:b07c660e014852d98a00a91adfbe25033898a9d90a8f39beb2437d22a203fc44", + "sha256:bead24c0ae3f1f6afcb915a057943ccf65fc755d11a1410a909c1fefb6c06ad1", + "sha256:d1d6bca39bb6dd94fba23cdb3eeaea5e30c7717c5343004d900e2a63b132c341", + "sha256:e2cd8ac157c1e5ae88b6dd790648ee5d2777e76f1e5c7d184eaddb2938594f34", + "sha256:e5739ae63636a52b706a0facec77b2b58e485637e1638202556156e424a02dc2", + "sha256:f36c3ff63d6fc509ce599a2f5b0d0732189eed653420e7294c039d342c6e204a", + "sha256:f91b50ad88048d795c0ad004abbe1390aa1882073b1dca10bfd55d0b8cf18ec5" ], "markers": "python_version >= '3.6'", - "version": "==8.0.1" + "version": "==8.1.2" }, "pygeos": { "hashes": [ @@ -323,32 +354,34 @@ }, "pyproj": { "hashes": [ - "sha256:2518d1606e2229b82318e704b40290e02a2a52d77b40cdcb2978973d6fc27b20", - "sha256:33a5d1cfbb40a019422eb80709a0e270704390ecde7278fdc0b88f3647c56a39", - "sha256:33c1c2968a4f4f87d517c4275a18b557e5c13907cf2609371fadea8463c3ba05", - "sha256:3fef83a01c1e86dd9fa99d8214f749837cfafc34d9d6230b4b0a998fa7a68a1a", - "sha256:451a3d1c563b672458029ebc04acbb3266cd8b3025268eb871a9176dc3638911", - "sha256:457ad3856014ac26af1d86def6dc8cf69c1fa377b6e2fd6e97912d51cf66bdbe", - "sha256:4f5b02b4abbd41610397c635b275a8ee4a2b5bc72a75572b98ac6ae7befa471e", - "sha256:6a212d0e5c7efa33d039f0c8b0a489e2204fcd28b56206567852ad7f5f2a653e", - "sha256:6f3f36440ea61f5f6da4e6beb365dddcbe159815450001d9fb753545affa45ff", - "sha256:93cbad7b699e8e80def7de80c350617f35e6a0b82862f8ce3c014657c25fdb3c", - "sha256:9f097e8f341a162438918e908be86d105a28194ff6224633b2e9616c5031153f", - "sha256:a13e5731b3a360ee7fbd1e9199ec9203fafcece8ebd0b1351f16d0a90cad6828", - "sha256:a6ac4861979cd05a0f5400fefa41d26c0269a5fb8237618aef7c998907db39e1", - "sha256:a8b7c8accdc61dac8e91acab7c1f7b4590d1e102f2ee9b1f1e6399fad225958e", - "sha256:adacb67a9f71fb54ca1b887a6ab20f32dd536fcdf2acec84a19e25ad768f7965", - "sha256:bc2f3a15d065e206d63edd2cc4739aa0a35c05338ee276ab1dc72f56f1944bda", - "sha256:cbf6ccf990860b06c5262ff97c4b78e1d07883981635cd53a6aa438a68d92945", - "sha256:d87836be6b720fb4d9c112136aa47621b6ca09a554e645c1081561eb8e2fa1f4", - "sha256:d90a5d1fdd066b0e9b22409b0f5e81933469918fa04c2cf7f9a76ce84cb29dad", - "sha256:daf2998e3f5bcdd579a18faf009f37f53538e9b7d0a252581a610297d31e8536", - "sha256:e015f900b4b84e908f8035ab16ebf02d67389c1c216c17a2196fc2e515c00762", - "sha256:e50d5d20b87758acf8f13f39a3b3eb21d5ef32339d2bc8cdeb8092416e0051df", - "sha256:f5a8015c74ec8f6508aebf493b58ba20ccb4da8168bf05f0c2a37faccb518da9" + "sha256:09bead60769e69b592e8cb3ac51b5215f75e9bb9c213ce575031961deb48d6da", + "sha256:09db64a8088b23f001e574d92bcc3080bf7de44ddca152d0282a2b50c918a64a", + "sha256:1be7d54900eb7e2d1e637319080b3a047c70d1fb2f3c12d3400c0fa8a90cf440", + "sha256:36ba436675f9dea4ab3db7d9a32d3ff11c2fbb4d6690a83454d2f3c5c0b54041", + "sha256:3e7e851e6d58c16ac2cd920a1bacb7fbb24758a6fcd7f234d594a88ebae04ec9", + "sha256:489a96da87d8846c34c90da90e637544e4f4f50a13589b5aac54297f5ee1b01d", + "sha256:4a333f3e46fe8b2eb4647a3daa3a2cec52ddc6c107c653b45880526114942ee8", + "sha256:708d6e01b9ff3d6dc62a5ad2d2ba1264a863eaa657c1a9bf713a10cc35d34553", + "sha256:7ae8e7052f18fde1884574da449010e94fa205ad27aeeaa34a097f49a1ed6a2b", + "sha256:7bfaa34e8bb0510d4380310374deecd9e4328b9cf556925cfb45b5a94d5bbdbe", + "sha256:81c06df20d09d621e52791c19ce3c880695fb430061e59c2472fa5467e890391", + "sha256:86ef2fcd584a3222bf73e2befc24b2badd139b3371f4a1e88649978ef7649540", + "sha256:9666d01faf4e758ac68f2c16695c90de49c3170e3760988bf76a34aae11f4e15", + "sha256:9b845510255f9580d7e226dd3321a51c468cefb7be24e46415caf67caa4287c4", + "sha256:9e2ef75401f17062166d3fe53c555cd62c9577697a2f5ded916b23c54e5db497", + "sha256:a3805e026a5547be205a5e322c08e3069f0a48c63bbd53dbc7a8e3499bc66d58", + "sha256:a3a8ab19232bf4f4bb2590536538881b7bd0c07df23e0c2a792402ca2476c197", + "sha256:aa0a2981b25145523ca17a643c5be077fe13e514fdca9b6d1c412a95d723a5a5", + "sha256:bc70b6adcfa713d89bc561673cb57af5fb3a1718cd7d57ec537430cd1007a864", + "sha256:bfbac35490dd17f706700673506eeb8170f8a2a63fb5878171d4e6eef242d141", + "sha256:c658afc8a6115b58b02aa53d27bf2a67c1b00b55067edb1b7711c6c7391cfaa9", + "sha256:cba99e171d744969e13a865ad28fa9c949c4400b0e9c431a802cdd804f52f632", + "sha256:d27d40ec541ef69a5107bfcd85f40170e9e122ceb6315ce508cd44d199983d41", + "sha256:f942a976ea3de6a519cf48be30a12f465e44d0ac0c38a0d820ab3acfcc0a48a6", + "sha256:fee7517bd389a1db7b8bebb18838d04dedca9eaacda01d353d98f5ee421f263e" ], - "markers": "python_version >= '3.5'", - "version": "==2.6.1.post1" + "markers": "python_version >= '3.6'", + "version": "==3.0.1" }, "python-dateutil": { "hashes": [ @@ -358,12 +391,20 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.8.1" }, + "python-dotenv": { + "hashes": [ + "sha256:0c8d1b80d1a1e91717ea7d526178e3882732420b03f08afea0406db6402e220e", + "sha256:587825ed60b1711daea4832cf37524dfd404325b7db5e25ebe88c495c9f807a0" + ], + "index": "pypi", + "version": "==0.15.0" + }, "pytz": { "hashes": [ - "sha256:a494d53b6d39c3c6e44c3bec237336e14305e4f29bbf800b599253057fbb79ed", - "sha256:c35965d010ce31b23eeb663ed3cc8c906275d6be1a34393a1d73a41febf4a048" + "sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da", + "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798" ], - "version": "==2020.1" + "version": "==2021.1" }, "rasterio": { "hashes": [ @@ -410,28 +451,28 @@ }, "scipy": { "hashes": [ - "sha256:07b083128beae040f1129bd8a82b01804f5e716a7fd2962c1053fa683433e4ab", - "sha256:0edd67e8a00903aaf7a29c968555a2e27c5a69fea9d1dcfffda80614281a884f", - "sha256:12fdcbfa56cac926a0a9364a30cbf4ad03c2c7b59f75b14234656a5e4fd52bf3", - "sha256:1fee28b6641ecbff6e80fe7788e50f50c5576157d278fa40f36c851940eb0aff", - "sha256:33e6a7439f43f37d4c1135bc95bcd490ffeac6ef4b374892c7005ce2c729cf4a", - "sha256:5163200ab14fd2b83aba8f0c4ddcc1fa982a43192867264ab0f4c8065fd10d17", - "sha256:66ec29348444ed6e8a14c9adc2de65e74a8fc526dc2c770741725464488ede1f", - "sha256:8cc5c39ed287a8b52a5509cd6680af078a40b0e010e2657eca01ffbfec929468", - "sha256:a1a13858b10d41beb0413c4378462b43eafef88a1948d286cb357eadc0aec024", - "sha256:a3db1fe7c6cb29ca02b14c9141151ebafd11e06ffb6da8ecd330eee5c8283a8a", - "sha256:aebb69bcdec209d874fc4b0c7ac36f509d50418a431c1422465fa34c2c0143ea", - "sha256:b9751b39c52a3fa59312bd2e1f40144ee26b51404db5d2f0d5259c511ff6f614", - "sha256:bc0e63daf43bf052aefbbd6c5424bc03f629d115ece828e87303a0bcc04a37e4", - "sha256:d5e3cc60868f396b78fc881d2c76460febccfe90f6d2f082b9952265c79a8788", - "sha256:ddae76784574cc4c172f3d5edd7308be16078dd3b977e8746860c76c195fa707", - "sha256:e2602f79c85924e4486f684aa9bbab74afff90606100db88d0785a0088be7edb", - "sha256:e527c9221b6494bcd06a17f9f16874406b32121385f9ab353b8a9545be458f0b", - "sha256:f574558f1b774864516f3c3fe072ebc90a29186f49b720f60ed339294b7f32ac", - "sha256:ffcbd331f1ffa82e22f1d408e93c37463c9a83088243158635baec61983aaacf" + "sha256:0c8a51d33556bf70367452d4d601d1742c0e806cd0194785914daf19775f0e67", + "sha256:0e5b0ccf63155d90da576edd2768b66fb276446c371b73841e3503be1d63fb5d", + "sha256:2481efbb3740977e3c831edfd0bd9867be26387cacf24eb5e366a6a374d3d00d", + "sha256:33d6b7df40d197bdd3049d64e8e680227151673465e5d85723b3b8f6b15a6ced", + "sha256:5da5471aed911fe7e52b86bf9ea32fb55ae93e2f0fac66c32e58897cfb02fa07", + "sha256:5f331eeed0297232d2e6eea51b54e8278ed8bb10b099f69c44e2558c090d06bf", + "sha256:5fa9c6530b1661f1370bcd332a1e62ca7881785cc0f80c0d559b636567fab63c", + "sha256:6725e3fbb47da428794f243864f2297462e9ee448297c93ed1dcbc44335feb78", + "sha256:68cb4c424112cd4be886b4d979c5497fba190714085f46b8ae67a5e4416c32b4", + "sha256:794e768cc5f779736593046c9714e0f3a5940bc6dcc1dba885ad64cbfb28e9f0", + "sha256:83bf7c16245c15bc58ee76c5418e46ea1811edcc2e2b03041b804e46084ab627", + "sha256:8e403a337749ed40af60e537cc4d4c03febddcc56cd26e774c9b1b600a70d3e4", + "sha256:a15a1f3fc0abff33e792d6049161b7795909b40b97c6cc2934ed54384017ab76", + "sha256:a423533c55fec61456dedee7b6ee7dce0bb6bfa395424ea374d25afa262be261", + "sha256:a5193a098ae9f29af283dcf0041f762601faf2e595c0db1da929875b7570353f", + "sha256:bd50daf727f7c195e26f27467c85ce653d41df4358a25b32434a50d8870fc519", + "sha256:c4fceb864890b6168e79b0e714c585dbe2fd4222768ee90bc1aa0f8218691b11", + "sha256:e79570979ccdc3d165456dd62041d9556fb9733b86b4b6d818af7a0afc15f092", + "sha256:f46dd15335e8a320b0fb4685f58b7471702234cba8bb3442b69a3e1dc329c345" ], - "markers": "python_version >= '3.6'", - "version": "==1.5.3" + "markers": "python_version >= '3.7'", + "version": "==1.6.1" }, "seaborn": { "hashes": [ @@ -558,11 +599,11 @@ }, "ipython": { "hashes": [ - "sha256:2e22c1f74477b5106a6fb301c342ab8c64bb75d702e350f05a649e8cb40a0fb8", - "sha256:a331e78086001931de9424940699691ad49dfb457cea31f5471eae7b78222d5e" + "sha256:04323f72d5b85b606330b6d7e2dc8d2683ad46c3905e955aa96ecc7a99388e70", + "sha256:34207ffb2f653bced2bc8e3756c1db86e7d93e44ed049daae9814fed66d408ec" ], "index": "pypi", - "version": "==7.18.1" + "version": "==7.21.0" }, "ipython-genutils": { "hashes": [ @@ -573,19 +614,19 @@ }, "jedi": { "hashes": [ - "sha256:86ed7d9b750603e4ba582ea8edc678657fb4007894a12bcf6f4bb97892f31d20", - "sha256:98cc583fa0f2f8304968199b01b6b4b94f469a1f4a74c1560506ca2a211378b5" + "sha256:18456d83f65f400ab0c2d3319e48520420ef43b23a086fdc05dff34132f0fb93", + "sha256:92550a404bad8afed881a137ec9a461fed49eca661414be45059329614ed0707" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", - "version": "==0.17.2" + "markers": "python_version >= '3.6'", + "version": "==0.18.0" }, "parso": { "hashes": [ - "sha256:97218d9159b2520ff45eb78028ba8b50d2bc61dcc062a9682666f2dc4bd331ea", - "sha256:caba44724b994a8a5e086460bb212abc5a8bc46951bf4a9a1210745953622eb9" + "sha256:15b00182f472319383252c18d5913b69269590616c947747bc50bf4ac768f410", + "sha256:8519430ad07087d4c997fda3a7918f7cfa27cb58972a8c89c2a0295a1c940e9e" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==0.7.1" + "markers": "python_version >= '3.6'", + "version": "==0.8.1" }, "pexpect": { "hashes": [ @@ -604,26 +645,26 @@ }, "prompt-toolkit": { "hashes": [ - "sha256:25c95d2ac813909f813c93fde734b6e44406d1477a9faef7c915ff37d39c0a8c", - "sha256:7debb9a521e0b1ee7d2fe96ee4bd60ef03c6492784de0547337ca4433e46aa63" + "sha256:0fa02fa80363844a4ab4b8d6891f62dd0645ba672723130423ca4037b80c1974", + "sha256:62c811e46bd09130fb11ab759012a4ae385ce4fb2073442d1898867a824183bd" ], "markers": "python_full_version >= '3.6.1'", - "version": "==3.0.8" + "version": "==3.0.16" }, "ptyprocess": { "hashes": [ - "sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0", - "sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f" + "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", + "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220" ], - "version": "==0.6.0" + "version": "==0.7.0" }, "pygments": { "hashes": [ - "sha256:307543fe65c0947b126e83dd5a61bd8acbd84abec11f43caebaf5534cbc17998", - "sha256:926c3f319eda178d1bd90851e4317e6d8cdb5e292a3386aac9bd75eca29cf9c7" + "sha256:2656e1a6edcdabf4275f9a3640db59fd5de107d88e8663c5d4e9a0fa62f77f94", + "sha256:534ef71d539ae97d4c3a4cf7d6f110f214b0e687e92f9cb9d2a3b0d3101289c8" ], "markers": "python_version >= '3.5'", - "version": "==2.7.1" + "version": "==2.8.1" }, "traitlets": { "hashes": [ diff --git a/src/add_crosswalk.py b/src/add_crosswalk.py index eb4198cb3..2958c2882 100755 --- a/src/add_crosswalk.py +++ b/src/add_crosswalk.py @@ -260,12 +260,12 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Subset vector layers') + parser = argparse.ArgumentParser(description='Crosswalk for MS/FR networks; calculate synthetic rating curves; update short rating curves') parser.add_argument('-d','--input-catchments-fileName', help='DEM derived catchments', required=True) parser.add_argument('-a','--input-flows-fileName', help='DEM derived streams', required=True) parser.add_argument('-s','--input-srcbase-fileName', help='Base synthetic rating curve table', required=True) parser.add_argument('-l','--output-catchments-fileName', help='Subset crosswalked catchments', required=True) - parser.add_argument('-f','--output-flows-fileName', help='Subset crosswalked streams', required=True) + parser.add_argument('-f','--output-flows-fileName', help='Subset crosswalked streams', required=True) parser.add_argument('-r','--output-src-fileName', help='Output crosswalked synthetic rating curve table', required=True) parser.add_argument('-j','--output-src-json-fileName',help='Output synthetic rating curve json',required=True) parser.add_argument('-x','--output-crosswalk-fileName',help='Crosswalk table',required=True) diff --git a/tools/.env.template b/tools/.env.template new file mode 100644 index 000000000..462d9c556 --- /dev/null +++ b/tools/.env.template @@ -0,0 +1,3 @@ +API_BASE_URL= +EVALUATED_SITES_CSV= +WBD_LAYER= diff --git a/tools/.gitignore b/tools/.gitignore new file mode 100644 index 000000000..4c49bd78f --- /dev/null +++ b/tools/.gitignore @@ -0,0 +1 @@ +.env diff --git a/tools/generate_categorical_fim.py b/tools/generate_categorical_fim.py index 290a44704..bec059960 100755 --- a/tools/generate_categorical_fim.py +++ b/tools/generate_categorical_fim.py @@ -18,10 +18,7 @@ from utils.shared_functions import getDriver INPUTS_DIR = r'/data/inputs' -magnitude_list = ['action', 'minor', 'moderate','major'] - -# Map path to points with attributes -all_mapped_ahps_conus_hipr = os.path.join(INPUTS_DIR, 'ahp_sites', 'all_mapped_ahps_reformatted.csv') +magnitude_list = ['action', 'minor', 'moderate','major', 'record'] # Define necessary variables for inundation() hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' @@ -142,7 +139,7 @@ def run_inundation(args): f.close() -def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir, log_file): +def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir, nws_lid_attributes_filename, log_file): # Create workspace gpkg_dir = os.path.join(output_cat_fim_dir, 'gpkg') @@ -176,7 +173,7 @@ def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir, log_file): extent_grid = os.path.join(ahps_lid_dir, ahps_lid + '_' + magnitude + '_extent_' + huc + '.tif') if os.path.exists(extent_grid): - procs_list.append([ahps_lid, extent_grid, gpkg_dir, fim_version, huc, magnitude]) + procs_list.append([ahps_lid, extent_grid, gpkg_dir, fim_version, huc, magnitude, nws_lid_attributes_filename]) else: try: @@ -217,12 +214,13 @@ def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir, log_file): def reformat_inundation_maps(args): try: - lid = args[0] - grid_path = args[1] - gpkg_dir = args[2] - fim_version = args[3] - huc = args[4] - magnitude = args[5] + lid = args[0] + grid_path = args[1] + gpkg_dir = args[2] + fim_version = args[3] + huc = args[4] + magnitude = args[5] + nws_lid_attributes_filename = args[6] # Convert raster to to shapes with rasterio.open(grid_path) as src: @@ -249,10 +247,11 @@ def reformat_inundation_maps(args): extent_poly = extent_poly.to_crs(VIZ_PROJECTION) # Join attributes - all_mapped_ahps_conus_hipr_fl = pd.read_table(all_mapped_ahps_conus_hipr, sep=",") - all_mapped_ahps_conus_hipr_fl = all_mapped_ahps_conus_hipr_fl.loc[(all_mapped_ahps_conus_hipr_fl.magnitude==magnitude) & (all_mapped_ahps_conus_hipr_fl.nws_lid==lid)] + nws_lid_attributes_table = pd.read_csv(nws_lid_attributes_filename, dtype={'huc':str}) + nws_lid_attributes_table = nws_lid_attributes_table.loc[(nws_lid_attributes_table.magnitude==magnitude) & (nws_lid_attributes_table.nws_lid==lid)] + - extent_poly_diss = extent_poly_diss.merge(all_mapped_ahps_conus_hipr_fl, left_on=['ahps_lid','magnitude'], right_on=['nws_lid','magnitude']) + extent_poly_diss = extent_poly_diss.merge(nws_lid_attributes_table, left_on=['ahps_lid','magnitude','huc'], right_on=['nws_lid','magnitude','huc']) extent_poly_diss = extent_poly_diss.drop(columns='nws_lid') @@ -308,8 +307,11 @@ def reformat_inundation_maps(args): # Create error log path log_file = os.path.join(log_dir, 'errors.log') + # Map path to points with attributes + nws_lid_attributes_filename = os.path.join(source_flow_dir, 'nws_lid_attributes.csv') + print("Generating Categorical FIM") generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, number_of_jobs, depthtif,log_file) print("Aggregating Categorical FIM") - post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir,log_file) + post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir,nws_lid_attributes_filename,log_file) diff --git a/tools/generate_categorical_fim_flows.py b/tools/generate_categorical_fim_flows.py new file mode 100755 index 000000000..562b3e8e5 --- /dev/null +++ b/tools/generate_categorical_fim_flows.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +from pathlib import Path +import geopandas as gpd +import pandas as pd +import time +from tools_shared_functions import aggregate_wbd_hucs, mainstem_nwm_segs, get_thresholds, flow_data, get_metadata, get_nwm_segs, flow_data +import argparse +from dotenv import load_dotenv +import os +import sys +sys.path.append('/foss_fim/src') +from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION + +load_dotenv() +#import variables from .env file +API_BASE_URL = os.getenv("API_BASE_URL") +EVALUATED_SITES_CSV = os.getenv("EVALUATED_SITES_CSV") +WBD_LAYER = os.getenv("WBD_LAYER") + +def static_flow_lids(workspace, nwm_us_search, nwm_ds_search): + ''' + This will create static flow files for all nws_lids and save to the + workspace directory with the following format: + huc code + nws_lid_code + threshold (action/minor/moderate/major if they exist/are defined by WRDS) + flow file (ahps_{lid code}_huc_{huc 8 code}_flows_{threshold}.csv) + + This will use the WRDS API to get the nwm segments as well as the flow + values for each threshold at each nws_lid and then create the necessary + flow file to use for inundation mapping. + + Parameters + ---------- + workspace : STR + Location where output flow files will exist. + nwm_us_search : STR + Upstream distance (in miles) for walking up NWM network. + nwm_ds_search : STR + Downstream distance (in miles) for walking down NWM network. + wbd_path : STR + Location of HUC geospatial data (geopackage). + + Returns + ------- + None. + + ''' + all_start = time.time() + #Define workspace and wbd_path as a pathlib Path. Convert search distances to integer. + workspace = Path(workspace) + nwm_us_search = int(nwm_us_search) + nwm_ds_search = int(nwm_ds_search) + metadata_url = f'{API_BASE_URL}/metadata' + threshold_url = f'{API_BASE_URL}/threshold' + ################################################################### + #Create workspace + workspace.mkdir(exist_ok = True) + + #Return dictionary of huc (key) and sublist of ahps(value) as well as geodataframe of sites. + print('Retrieving metadata...') + #Get metadata for 'CONUS' + conus_list, conus_dataframe = get_metadata(metadata_url, select_by = 'nws_lid', selector = ['all'], must_include = 'nws_data.rfc_forecast_point', upstream_trace_distance = nwm_us_search, downstream_trace_distance = nwm_ds_search ) + + #Get metadata for Islands + islands_list, islands_dataframe = get_metadata(metadata_url, select_by = 'state', selector = ['HI','PR'] , must_include = None, upstream_trace_distance = nwm_us_search, downstream_trace_distance = nwm_ds_search) + + #Append the dataframes and lists + all_lists = conus_list + islands_list + all_dataframe = conus_dataframe.append(islands_dataframe) + + print('Determining HUC using WBD layer...') + #Assign FIM HUC to GeoDataFrame and export to shapefile all candidate sites. + agg_start = time.time() + huc_dictionary, out_gdf = aggregate_wbd_hucs(metadata_list = all_lists, wbd_huc8_path = WBD_LAYER) + viz_out_gdf = out_gdf.to_crs(VIZ_PROJECTION) + viz_out_gdf.to_file(workspace / f'candidate_sites.shp') + agg_end = time.time() + print(f'agg time is {(agg_end - agg_start)/60} minutes') + #Get all possible mainstem segments + print('Getting list of mainstem segments') + #Import list of evaluated sites + list_of_sites = pd.read_csv(EVALUATED_SITES_CSV)['Total_List'].to_list() + #The entire routine to get mainstems is harcoded in this function. + ms_segs = mainstem_nwm_segs(metadata_url, list_of_sites) + + #Loop through each huc unit + all_messages = [] + for huc in huc_dictionary: + print(f'Iterating through {huc}') + #Get list of nws_lids + nws_lids = huc_dictionary[huc] + #Loop through each lid in list to create flow file + for lid in nws_lids: + #In some instances the lid is not assigned a name, skip over these. + if not isinstance(lid,str): + print(f'{lid} is {type(lid)}') + continue + #Convert lid to lower case + lid = lid.lower() + #Get stages and flows for each threshold from the WRDS API. Priority given to USGS calculated flows. + stages, flows = get_thresholds(threshold_url = threshold_url, location_ids = lid, physical_element = 'all', threshold = 'all', bypass_source_flag = False) + #If stages/flows don't exist write message and exit out. + if not (stages and flows): + message = f'{lid} no thresholds' + all_messages.append(message) + continue + + #find lid metadata from master list of metadata dictionaries (line 66). + metadata = next((item for item in all_lists if item['identifiers']['nws_lid'] == lid.upper()), False) + + #Get mainstem segments of LID by intersecting LID segments with known mainstem segments. + segments = get_nwm_segs(metadata) + site_ms_segs = set(segments).intersection(ms_segs) + segments = list(site_ms_segs) + #if no segments, write message and exit out + if not segments: + print(f'{lid} no segments') + message = f'{lid} no segments' + all_messages.append(message) + continue + #For each flood category + for category in ['action', 'minor', 'moderate', 'major', 'record']: + #Get the flow + flow = flows[category] + #If there is a valid flow value, write a flow file. + if flow: + #round flow to nearest hundredth + flow = round(flow,2) + #Create the guts of the flow file. + flow_info = flow_data(segments,flow) + #Define destination path and create folders + output_file = workspace / huc / lid / category / (f'ahps_{lid}_huc_{huc}_flows_{category}.csv') + output_file.parent.mkdir(parents = True, exist_ok = True) + #Write flow file to file + flow_info.to_csv(output_file, index = False) + else: + message = f'{lid}_{category}_no flow' + all_messages.append(message) + #This section will produce a point file of the LID location + #Get various attributes of the site. + lat = float(metadata['usgs_data']['latitude']) + lon = float(metadata['usgs_data']['longitude']) + wfo = metadata['nws_data']['wfo'] + rfc = metadata['nws_data']['rfc'] + state = metadata['nws_data']['state'] + county = metadata['nws_data']['county'] + name = metadata['nws_data']['name'] + q_act = flows['action'] + q_min = flows['minor'] + q_mod = flows['moderate'] + q_maj = flows['major'] + q_rec = flows['record'] + flow_units = flows['units'] + flow_source = flows['source'] + s_act = stages['action'] + s_min = stages['minor'] + s_mod = stages['moderate'] + s_maj = stages['major'] + s_rec = stages['record'] + stage_units = stages['units'] + stage_source = stages['source'] + wrds_timestamp = stages['wrds_timestamp'] + #Create a DataFrame using the collected attributes + df = pd.DataFrame({'nws_lid': [lid], 'name':name, 'WFO': wfo, 'rfc':rfc, 'huc':[huc], 'state':state, 'county':county, 'q_act':q_act, 'q_min':q_min, 'q_mod':q_mod, 'q_maj':q_maj, 'q_rec':q_rec, 'q_uni':flow_units, 'q_src':flow_source, 'stage_act':s_act, 'stage_min':s_min, 'stage_mod':s_mod, 'stage_maj':s_maj, 'stage_rec':s_rec, 'stage_uni':stage_units, 's_src':stage_source, 'wrds_time':wrds_timestamp, 'lat':[lat], 'lon':[lon]}) + #Round stages and flows to nearest hundredth + df = df.round({'q_act':2,'q_min':2,'q_mod':2,'q_maj':2,'q_rec':2,'stage_act':2,'stage_min':2,'stage_mod':2,'stage_maj':2,'stage_rec':2}) + + #Create a geodataframe using usgs lat/lon property from WRDS then reproject to WGS84. + #Define EPSG codes for possible usgs latlon datum names (NAD83WGS84 assigned NAD83) + crs_lookup ={'NAD27':'EPSG:4267', 'NAD83':'EPSG:4269', 'NAD83WGS84': 'EPSG:4269'} + #Get horizontal datum (from dataframe) and assign appropriate EPSG code, assume NAD83 if not assigned. + h_datum = metadata['usgs_data']['latlon_datum_name'] + src_crs = crs_lookup.get(h_datum, 'EPSG:4269') + gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['lon'], df['lat']), crs = src_crs) + #Reproject to VIZ_PROJECTION + viz_gdf = gdf.to_csv(VIZ_PROJECTION) + + #Create a csv with same info as shapefile + csv_df = pd.DataFrame() + for threshold in ['action', 'minor', 'moderate', 'major', 'record']: + line_df = pd.DataFrame({'nws_lid': [lid], 'name':name, 'WFO': wfo, 'rfc':rfc, 'huc':[huc], 'state':state, 'county':county, 'magnitude': threshold, 'q':flows[threshold], 'q_uni':flows['units'], 'q_src':flow_source, 'stage':stages[threshold], 'stage_uni':stages['units'], 's_src':stage_source, 'wrds_time':wrds_timestamp, 'lat':[lat], 'lon':[lon]}) + csv_df = csv_df.append(line_df) + #Round flow and stage columns to 2 decimal places. + csv = csv_df.round({'q':2,'stage':2}) + + #If a site folder exists (ie a flow file was written) save files containing site attributes. + try: + #Save GeoDataFrame to shapefile format and export csv containing attributes + output_dir = workspace / huc / lid + viz_gdf.to_file(output_dir / f'{lid}_location.shp' ) + csv_df.to_csv(output_dir / f'{lid}_attributes.csv', index = False) + except: + print(f'{lid} missing all flows') + message = f'{lid} missing all flows' + all_messages.append(message) + #Write out messages to file + messages_df = pd.DataFrame(all_messages, columns = ['message']) + messages_df.to_csv(workspace / f'all_messages.csv', index = False) + + #Recursively find all location shapefiles + locations_files = list(workspace.rglob('*_location.shp')) + spatial_layers = gpd.GeoDataFrame() + #Append all shapefile info to a geodataframe + for location in locations_files: + location_gdf = gpd.read_file(location) + spatial_layers = spatial_layers.append(location_gdf) + #Write appended spatial data to disk. + output_file = workspace /'all_mapped_ahps.shp' + spatial_layers.to_file(output_file) + + #Recursively find all *_info csv files and append + csv_files = list(workspace.rglob('*_attributes.csv')) + all_csv_df = pd.DataFrame() + for csv in csv_files: + temp_df = pd.read_csv(csv, dtype={'huc':str}) + all_csv_df = all_csv_df.append(temp_df, ignore_index = True) + #Write appended _info.csvs to file + all_info_csv = workspace / 'nws_lid_attributes.csv' + all_csv_df.to_csv(all_info_csv, index = False) + all_end = time.time() + print(f'total time is {(all_end - all_start)/60} minutes') + + +if __name__ == '__main__': + #Parse arguments + parser = argparse.ArgumentParser(description = 'Create forecast files for all nws_lid sites') + parser.add_argument('-w', '--workspace', help = 'Workspace where all data will be stored.', required = True) + parser.add_argument('-u', '--nwm_us_search', help = 'Walk upstream on NWM network this many miles', required = True) + parser.add_argument('-d', '--nwm_ds_search', help = 'Walk downstream on NWM network this many miles', required = True) + #Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + + + #Run create_flow_forecast_file + static_flow_lids(**args) diff --git a/tools/tools_shared_functions.py b/tools/tools_shared_functions.py index 094f5e2a5..aed515f5c 100755 --- a/tools/tools_shared_functions.py +++ b/tools/tools_shared_functions.py @@ -5,6 +5,9 @@ import csv import rasterio import pandas as pd +import geopandas as gpd +import requests + from tools_shared_variables import (TEST_CASES_DIR, PRINTWORTHY_STATS, GO_UP_STATS, GO_DOWN_STATS, ENDC, TGREEN_BOLD, TGREEN, TRED_BOLD, TWHITE, WHITE_BOLD, CYAN_BOLD) @@ -158,7 +161,7 @@ def profile_test_case_archive(archive_to_check, magnitude, stats_mode): # # print() # print("--------------------------------------------------------------------------------------------------") -# +# # stats_mode = stats_modes_list[0] # try: # last_version_index = text_block[0].index('dev_latest') @@ -173,7 +176,7 @@ def profile_test_case_archive(archive_to_check, magnitude, stats_mode): # print() # continue # -# +# # # for line in text_block: # first_item = line[0] @@ -238,7 +241,7 @@ def compute_stats_from_contingency_table(true_negatives, false_negatives, false_ """ This generic function takes contingency table metrics as arguments and returns a dictionary of contingency table statistics. Much of the calculations below were taken from older Python files. This is evident in the inconsistent use of case. - + Args: true_negatives (int): The true negatives from a contingency table. false_negatives (int): The false negatives from a contingency table. @@ -246,37 +249,37 @@ def compute_stats_from_contingency_table(true_negatives, false_negatives, false_ true_positives (int): The true positives from a contingency table. cell_area (float or None): This optional argument allows for area-based statistics to be calculated, in the case that contingency table metrics were derived from areal analysis. - + Returns: stats_dictionary (dict): A dictionary of statistics. Statistic names are keys and statistic values are the values. Refer to dictionary definition in bottom of function for statistic names. - + """ - + import numpy as np - + total_population = true_negatives + false_negatives + false_positives + true_positives - + # Basic stats. # Percent_correct = ((true_positives + true_negatives) / total_population) * 100 # pod = true_positives / (true_positives + false_negatives) - + try: FAR = false_positives / (true_positives + false_positives) except ZeroDivisionError: FAR = "NA" - + try: CSI = true_positives / (true_positives + false_positives + false_negatives) except ZeroDivisionError: CSI = "NA" - + try: BIAS = (true_positives + false_positives) / (true_positives + false_negatives) except ZeroDivisionError: BIAS = "NA" - - # Compute equitable threat score (ETS) / Gilbert Score. + + # Compute equitable threat score (ETS) / Gilbert Score. try: a_ref = ((true_positives + false_positives)*(true_positives + false_negatives)) / total_population EQUITABLE_THREAT_SCORE = (true_positives - a_ref) / (true_positives - a_ref + false_positives + false_negatives) @@ -290,12 +293,12 @@ def compute_stats_from_contingency_table(true_negatives, false_negatives, false_ FP_perc = (false_positives / total_population) * 100 TN_perc = (true_negatives / total_population) * 100 FN_perc = (false_negatives / total_population) * 100 - + predPositive = true_positives + false_positives predNegative = true_negatives + false_negatives obsPositive = true_positives + false_negatives obsNegative = true_negatives + false_positives - + TP = float(true_positives) TN = float(true_negatives) FN = float(false_negatives) @@ -304,7 +307,7 @@ def compute_stats_from_contingency_table(true_negatives, false_negatives, false_ MCC = (TP*TN - FP*FN)/ np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)) except ZeroDivisionError: MCC = "NA" - + if masked_count != None: total_pop_and_mask_pop = total_population + masked_count if total_pop_and_mask_pop == 0: @@ -313,23 +316,23 @@ def compute_stats_from_contingency_table(true_negatives, false_negatives, false_ masked_perc = (masked_count / total_pop_and_mask_pop) * 100 else: masked_perc = None - + # This checks if a cell_area has been provided, thus making areal calculations possible. sq_km_converter = 1000000 - + if cell_area != None: TP_area = (true_positives * cell_area) / sq_km_converter FP_area = (false_positives * cell_area) / sq_km_converter TN_area = (true_negatives * cell_area) / sq_km_converter FN_area = (false_negatives * cell_area) / sq_km_converter area = (total_population * cell_area) / sq_km_converter - + predPositive_area = (predPositive * cell_area) / sq_km_converter predNegative_area = (predNegative * cell_area) / sq_km_converter obsPositive_area = (obsPositive * cell_area) / sq_km_converter obsNegative_area = (obsNegative * cell_area) / sq_km_converter positiveDiff_area = predPositive_area - obsPositive_area - + if masked_count != None: masked_area = (masked_count * cell_area) / sq_km_converter else: @@ -342,14 +345,14 @@ def compute_stats_from_contingency_table(true_negatives, false_negatives, false_ TN_area = None FN_area = None area = None - + predPositive_area = None predNegative_area = None obsPositive_area = None obsNegative_area = None positiveDiff_area = None MCC = None - + if total_population == 0: predPositive_perc, predNegative_perc, obsPositive_perc, obsNegative_perc , positiveDiff_perc = "NA", "NA", "NA", "NA", "NA" else: @@ -357,32 +360,32 @@ def compute_stats_from_contingency_table(true_negatives, false_negatives, false_ predNegative_perc = (predNegative / total_population) * 100 obsPositive_perc = (obsPositive / total_population) * 100 obsNegative_perc = (obsNegative / total_population) * 100 - + positiveDiff_perc = predPositive_perc - obsPositive_perc - + if total_population == 0: prevalence = "NA" else: prevalence = (true_positives + false_negatives) / total_population - + try: PPV = true_positives / predPositive except ZeroDivisionError: PPV = "NA" - + try: NPV = true_negatives / predNegative except ZeroDivisionError: NPV = "NA" - + try: TNR = true_negatives / obsNegative except ZeroDivisionError: TNR = "NA" - + try: TPR = true_positives / obsPositive - + except ZeroDivisionError: TPR = "NA" @@ -390,12 +393,12 @@ def compute_stats_from_contingency_table(true_negatives, false_negatives, false_ Bal_ACC = np.mean([TPR,TNR]) except TypeError: Bal_ACC = "NA" - + if total_population == 0: ACC = "NA" else: ACC = (true_positives + true_negatives) / total_population - + try: F1_score = (2*true_positives) / (2*true_positives + false_positives + false_negatives) except ZeroDivisionError: @@ -407,7 +410,7 @@ def compute_stats_from_contingency_table(true_negatives, false_negatives, false_ 'false_positives_count': int(false_positives), 'contingency_tot_count': int(total_population), 'cell_area_m2': cell_area, - + 'TP_area_km2': TP_area, 'FP_area_km2': FP_area, 'TN_area_km2': TN_area, @@ -422,15 +425,15 @@ def compute_stats_from_contingency_table(true_negatives, false_negatives, false_ 'CSI': CSI, 'FAR': FAR, - 'TPR': TPR, - 'TNR': TNR, - + 'TPR': TPR, + 'TNR': TNR, + 'PPV': PPV, 'NPV': NPV, 'ACC': ACC, 'Bal_ACC': Bal_ACC, 'MCC': MCC, - 'EQUITABLE_THREAT_SCORE': EQUITABLE_THREAT_SCORE, + 'EQUITABLE_THREAT_SCORE': EQUITABLE_THREAT_SCORE, 'PREVALENCE': prevalence, 'BIAS': BIAS, 'F1_SCORE': F1_score, @@ -444,11 +447,11 @@ def compute_stats_from_contingency_table(true_negatives, false_negatives, false_ 'obsPositive_perc': obsPositive_perc, 'obsNegative_perc': obsNegative_perc, 'positiveDiff_perc': positiveDiff_perc, - + 'masked_count': int(masked_count), 'masked_perc': masked_perc, 'masked_area_km2': masked_area, - + } return stats_dictionary @@ -461,15 +464,15 @@ def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_r 1: False Negative 2: False Positive 3: True Positive - + Args: benchmark_raster_path (str): Path to the binary benchmark raster. 0 = phenomena not present, 1 = phenomena present, NoData = NoData. predicted_raster_path (str): Path to the predicted raster. 0 = phenomena not present, 1 = phenomena present, NoData = NoData. - + Returns: contingency_table_dictionary (dict): A Python dictionary of a contingency table. Key/value pair formatted as: {true_negatives: int, false_negatives: int, false_positives: int, true_positives: int} - + """ from rasterio.warp import reproject, Resampling import rasterio @@ -478,72 +481,72 @@ def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_r import rasterio.mask import geopandas as gpd from shapely.geometry import box - + print("-----> Evaluating performance across the total area...") # Load rasters. benchmark_src = rasterio.open(benchmark_raster_path) predicted_src = rasterio.open(predicted_raster_path) predicted_array = predicted_src.read(1) - + benchmark_array_original = benchmark_src.read(1) - + if benchmark_array_original.shape != predicted_array.shape: benchmark_array = np.empty(predicted_array.shape, dtype=np.int8) - - reproject(benchmark_array_original, + + reproject(benchmark_array_original, destination = benchmark_array, - src_transform = benchmark_src.transform, + src_transform = benchmark_src.transform, src_crs = benchmark_src.crs, src_nodata = benchmark_src.nodata, - dst_transform = predicted_src.transform, + dst_transform = predicted_src.transform, dst_crs = predicted_src.crs, dst_nodata = benchmark_src.nodata, dst_resolution = predicted_src.res, resampling = Resampling.nearest) - + predicted_array_raw = predicted_src.read(1) - + # Align the benchmark domain to the modeled domain. benchmark_array = np.where(predicted_array==predicted_src.nodata, 10, benchmark_array) - - # Ensure zeros and ones for binary comparison. Assume that positive values mean flooding and 0 or negative values mean dry. + + # Ensure zeros and ones for binary comparison. Assume that positive values mean flooding and 0 or negative values mean dry. predicted_array = np.where(predicted_array==predicted_src.nodata, 10, predicted_array) # Reclassify NoData to 10 predicted_array = np.where(predicted_array<0, 0, predicted_array) predicted_array = np.where(predicted_array>0, 1, predicted_array) - - benchmark_array = np.where(benchmark_array==benchmark_src.nodata, 10, benchmark_array) # Reclassify NoData to 10 + + benchmark_array = np.where(benchmark_array==benchmark_src.nodata, 10, benchmark_array) # Reclassify NoData to 10 agreement_array = np.add(benchmark_array, 2*predicted_array) agreement_array = np.where(agreement_array>4, 10, agreement_array) - + del benchmark_src, benchmark_array, predicted_array, predicted_array_raw # Loop through exclusion masks and mask the agreement_array. if mask_dict != {}: for poly_layer in mask_dict: - + operation = mask_dict[poly_layer]['operation'] - + if operation == 'exclude': - + poly_path = mask_dict[poly_layer]['path'] buffer_val = mask_dict[poly_layer]['buffer'] - + reference = predicted_src - + bounding_box = gpd.GeoDataFrame({'geometry': box(*reference.bounds)}, index=[0], crs=reference.crs) - #Read layer using the bbox option. CRS mismatches are handled if bbox is passed a geodataframe (which it is). + #Read layer using the bbox option. CRS mismatches are handled if bbox is passed a geodataframe (which it is). poly_all = gpd.read_file(poly_path, bbox = bounding_box) - + # Make sure features are present in bounding box area before projecting. Continue to next layer if features are absent. if poly_all.empty: continue - + print("-----> Masking at " + poly_layer + "...") #Project layer to reference crs. poly_all_proj = poly_all.to_crs(reference.crs) # check if there are any lakes within our reference raster extent. - if poly_all_proj.empty: + if poly_all_proj.empty: #If no features within reference raster extent, create a zero array of same shape as reference raster. poly_mask = np.zeros(reference.shape) else: @@ -554,7 +557,7 @@ def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_r else: #If features are present and a buffer is passed, assign buffered geometry to variable. geometry = poly_all_proj.buffer(buffer_val) - + #Perform mask operation on the reference raster and using the previously declared geometry geoseries. Invert set to true as we want areas outside of poly areas to be False and areas inside poly areas to be True. in_poly,transform,c = rasterio.mask.raster_geometry_mask(reference, geometry, invert = True) #Write mask array, areas inside polys are set to 1 and areas outside poly are set to 0. @@ -562,12 +565,12 @@ def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_r # Perform mask. masked_agreement_array = np.where(poly_mask == 1, 4, agreement_array) - + # Get rid of masked values outside of the modeled domain. agreement_array = np.where(agreement_array == 10, 10, masked_agreement_array) - + contingency_table_dictionary = {} # Initialize empty dictionary. - + # Only write the agreement raster if user-specified. if agreement_raster != None: with rasterio.Env(): @@ -575,15 +578,15 @@ def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_r profile.update(nodata=10) with rasterio.open(agreement_raster, 'w', **profile) as dst: dst.write(agreement_array, 1) - + # Write legend text file legend_txt = os.path.join(os.path.split(agreement_raster)[0], 'read_me.txt') - + from datetime import datetime - + now = datetime.now() current_time = now.strftime("%m/%d/%Y %H:%M:%S") - + with open(legend_txt, 'w') as f: f.write("%s\n" % '0: True Negative') f.write("%s\n" % '1: False Negative') @@ -591,7 +594,7 @@ def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_r f.write("%s\n" % '3: True Positive') f.write("%s\n" % '4: Masked area (excluded from contingency table analysis). Mask layers: {mask_dict}'.format(mask_dict=mask_dict)) f.write("%s\n" % 'Results produced at: {current_time}'.format(current_time=current_time)) - + # Store summed pixel counts in dictionary. contingency_table_dictionary.update({'total_area':{'true_negatives': int((agreement_array == 0).sum()), 'false_negatives': int((agreement_array == 1).sum()), @@ -599,34 +602,34 @@ def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_r 'true_positives': int((agreement_array == 3).sum()), 'masked_count': int((agreement_array == 4).sum()), 'file_handle': 'total_area' - - }}) - + + }}) + # After agreement_array is masked with default mask layers, check for inclusion masks in mask_dict. if mask_dict != {}: for poly_layer in mask_dict: - + operation = mask_dict[poly_layer]['operation'] - + if operation == 'include': poly_path = mask_dict[poly_layer]['path'] buffer_val = mask_dict[poly_layer]['buffer'] reference = predicted_src - + bounding_box = gpd.GeoDataFrame({'geometry': box(*reference.bounds)}, index=[0], crs=reference.crs) - #Read layer using the bbox option. CRS mismatches are handled if bbox is passed a geodataframe (which it is). + #Read layer using the bbox option. CRS mismatches are handled if bbox is passed a geodataframe (which it is). poly_all = gpd.read_file(poly_path, bbox = bounding_box) - + # Make sure features are present in bounding box area before projecting. Continue to next layer if features are absent. if poly_all.empty: continue - + print("-----> Evaluating performance at " + poly_layer + "...") #Project layer to reference crs. poly_all_proj = poly_all.to_crs(reference.crs) # check if there are any lakes within our reference raster extent. - if poly_all_proj.empty: + if poly_all_proj.empty: #If no features within reference raster extent, create a zero array of same shape as reference raster. poly_mask = np.zeros(reference.shape) else: @@ -637,7 +640,7 @@ def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_r else: #If features are present and a buffer is passed, assign buffered geometry to variable. geometry = poly_all_proj.buffer(buffer_val) - + #Perform mask operation on the reference raster and using the previously declared geometry geoseries. Invert set to true as we want areas outside of poly areas to be False and areas inside poly areas to be True. in_poly,transform,c = rasterio.mask.raster_geometry_mask(reference, geometry, invert = True) #Write mask array, areas inside polys are set to 1 and areas outside poly are set to 0. @@ -645,15 +648,15 @@ def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_r # Perform mask. masked_agreement_array = np.where(poly_mask == 0, 4, agreement_array) # Changed to poly_mask == 0 - + # Get rid of masked values outside of the modeled domain. temp_agreement_array = np.where(agreement_array == 10, 10, masked_agreement_array) - + if buffer_val == None: # The buffer used is added to filename, and 0 is easier to read than None. buffer_val = 0 - + poly_handle = poly_layer + '_b' + str(buffer_val) + 'm' - + # Write the layer_agreement_raster. layer_agreement_raster = os.path.join(os.path.split(agreement_raster)[0], poly_handle + '_agreement.tif') with rasterio.Env(): @@ -661,7 +664,7 @@ def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_r profile.update(nodata=10) with rasterio.open(layer_agreement_raster, 'w', **profile) as dst: dst.write(temp_agreement_array, 1) - + # Store summed pixel counts in dictionary. contingency_table_dictionary.update({poly_handle:{'true_negatives': int((temp_agreement_array == 0).sum()), @@ -673,4 +676,366 @@ def get_contingency_table_from_binary_rasters(benchmark_raster_path, predicted_r }}) return contingency_table_dictionary - +######################################################################## +######################################################################## +#Functions related to categorical fim and ahps evaluation +######################################################################## +def get_metadata(metadata_url, select_by, selector, must_include = None, upstream_trace_distance = None, downstream_trace_distance = None ): + ''' + Retrieve metadata for a site or list of sites. + + Parameters + ---------- + metadata_url : STR + metadata base URL. + select_by : STR + Location search option. + selector : LIST + Value to match location data against. Supplied as a LIST. + must_include : STR, optional + What attributes are required to be valid response. The default is None. + upstream_trace_distance : INT, optional + Distance in miles upstream of site to trace NWM network. The default is None. + downstream_trace_distance : INT, optional + Distance in miles downstream of site to trace NWM network. The default is None. + + Returns + ------- + metadata_list : LIST + Dictionary or list of dictionaries containing metadata at each site. + metadata_dataframe : Pandas DataFrame + Dataframe of metadata for each site. + + ''' + + #Format selector variable in case multiple selectors supplied + format_selector = '%2C'.join(selector) + #Define the url + url = f'{metadata_url}/{select_by}/{format_selector}/' + #Assign optional parameters to a dictionary + params = {} + params['must_include'] = must_include + params['upstream_trace_distance'] = upstream_trace_distance + params['downstream_trace_distance'] = downstream_trace_distance + #Request data from url + response = requests.get(url, params = params) + if response.ok: + #Convert data response to a json + metadata_json = response.json() + #Get the count of returned records + location_count = metadata_json['_metrics']['location_count'] + #Get metadata + metadata_list = metadata_json['locations'] + #Add timestamp of WRDS retrieval + timestamp = response.headers['Date'] + #get crosswalk info (always last dictionary in list) + *metadata_list, crosswalk_info = metadata_list + #Update each dictionary with timestamp and crosswalk info + for metadata in metadata_list: + metadata.update({"wrds_timestamp": timestamp}) + metadata.update(crosswalk_info) + #If count is 1 + if location_count == 1: + metadata_list = metadata_json['locations'][0] + metadata_dataframe = pd.json_normalize(metadata_list) + #Replace all periods with underscores in column names + metadata_dataframe.columns = metadata_dataframe.columns.str.replace('.','_') + else: + #if request was not succesful, print error message. + print(f'Code: {response.status_code}\nMessage: {response.reason}\nURL: {response.url}') + #Return empty outputs + metadata_list = [] + metadata_dataframe = pd.DataFrame() + return metadata_list, metadata_dataframe + +######################################################################## +#Function to assign HUC code using the WBD spatial layer using a spatial join +######################################################################## +def aggregate_wbd_hucs(metadata_list, wbd_huc8_path, retain_attributes = False): + ''' + Assigns the proper FIM HUC 08 code to each site in the input DataFrame. + Converts input DataFrame to a GeoDataFrame using the lat/lon attributes + with sites containing null lat/lon removed. Reprojects GeoDataFrame + to same CRS as the HUC 08 layer. Performs a spatial join to assign the + HUC 08 layer to the GeoDataFrame. Sites that are not assigned a HUC + code removed as well as sites in Alaska and Canada. + + Parameters + ---------- + metadata_list: List of Dictionaries + Output list from get_metadata + wbd_huc8_path : pathlib Path + Path to HUC8 wbd layer (assumed to be geopackage format) + retain_attributes ; Bool OR List + Flag to define attributes of output GeoDataBase. If True, retain + all attributes. If False, the site metadata will be trimmed to a + default list. If a list of desired attributes is supplied these + will serve as the retained attributes. + Returns + ------- + dictionary : DICT + Dictionary with HUC (key) and corresponding AHPS codes (values). + all_gdf: GeoDataFrame + GeoDataFrame of all NWS_LID sites. + + ''' + #Import huc8 layer as geodataframe and retain necessary columns + huc8 = gpd.read_file(wbd_huc8_path, layer = 'WBDHU8') + huc8 = huc8[['HUC8','name','states', 'geometry']] + #Define EPSG codes for possible usgs latlon datum names (NAD83WGS84 assigned NAD83) + crs_lookup ={'NAD27':'EPSG:4267', 'NAD83':'EPSG:4269', 'NAD83WGS84': 'EPSG:4269'} + #Create empty geodataframe and define CRS for potential horizontal datums + metadata_gdf = gpd.GeoDataFrame() + #Iterate through each site + for metadata in metadata_list: + #Convert metadata to json + df = pd.json_normalize(metadata) + #Columns have periods due to nested dictionaries + df.columns = df.columns.str.replace('.', '_') + #Drop any metadata sites that don't have lat/lon populated + df.dropna(subset = ['identifiers_nws_lid','usgs_data_latitude','usgs_data_longitude'], inplace = True) + #If dataframe still has data + if not df.empty: + #Get horizontal datum (use usgs) and assign appropriate EPSG code + h_datum = df.usgs_data_latlon_datum_name.item() + src_crs = crs_lookup[h_datum] + #Convert dataframe to geodataframe using lat/lon (USGS) + site_gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.usgs_data_longitude, df.usgs_data_latitude), crs = src_crs) + #Reproject to huc 8 crs + site_gdf = site_gdf.to_crs(huc8.crs) + #Append site geodataframe to metadata geodataframe + metadata_gdf = metadata_gdf.append(site_gdf, ignore_index = True) + + #Trim metadata to only have certain fields. + if not retain_attributes: + metadata_gdf = metadata_gdf[['identifiers_nwm_feature_id', 'identifiers_nws_lid', 'geometry']] + #If a list of attributes is supplied then use that list. + elif isinstance(retain_attributes,list): + metadata_gdf = metadata_gdf[retain_attributes] + + #Perform a spatial join to get the WBD HUC 8 assigned to each AHPS + joined_gdf = gpd.sjoin(metadata_gdf, huc8, how = 'inner', op = 'intersects', lsuffix = 'ahps', rsuffix = 'wbd') + joined_gdf = joined_gdf.drop(columns = 'index_wbd') + + #Remove all Alaska HUCS (Not in NWM v2.0 domain) + joined_gdf = joined_gdf[~joined_gdf.states.str.contains('AK')] + + #Create a dictionary of huc [key] and nws_lid[value] + dictionary = joined_gdf.groupby('HUC8')['identifiers_nws_lid'].apply(list).to_dict() + + return dictionary, joined_gdf + +######################################################################## +def mainstem_nwm_segs(metadata_url, list_of_sites): + ''' + Define the mainstems network. Currently a 4 pass approach that probably needs refined. + Once a final method is decided the code can be shortened. Passes are: + 1) Search downstream of gages designated as upstream. This is done to hopefully reduce the issue of mapping starting at the nws_lid. 91038 segments + 2) Search downstream of all LID that are rfc_forecast_point = True. Additional 48,402 segments + 3) Search downstream of all evaluated sites (sites with detailed FIM maps) Additional 222 segments + 4) Search downstream of all sites in HI/PR (locations have no rfc_forecast_point = True) Additional 408 segments + + Parameters + ---------- + metadata_url : STR + URL of API. + list_of_sites : LIST + List of evaluated sites. + + Returns + ------- + ms_nwm_segs_set : SET + Mainstems network segments as a set. + + ''' + + #Define the downstream trace distance + downstream_trace_distance = 'all' + + #Trace downstream from all 'headwater' usgs gages + select_by = 'tag' + selector = ['usgs_gages_ii_ref_headwater'] + must_include = None + gages_list, gages_dataframe = get_metadata(metadata_url = metadata_url, select_by = select_by, selector = selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = downstream_trace_distance ) + + #Trace downstream from all rfc_forecast_point. + select_by = 'nws_lid' + selector = ['all'] + must_include = 'nws_data.rfc_forecast_point' + fcst_list, fcst_dataframe = get_metadata(metadata_url = metadata_url, select_by = select_by, selector = selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = downstream_trace_distance ) + + #Trace downstream from all evaluated ahps sites. + select_by = 'nws_lid' + selector = list_of_sites + must_include = None + eval_list, eval_dataframe = get_metadata(metadata_url = metadata_url, select_by = select_by, selector = selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = downstream_trace_distance ) + + #Trace downstream from all sites in HI/PR. + select_by = 'state' + selector = ['HI','PR'] + must_include = None + islands_list, islands_dataframe = get_metadata(metadata_url = metadata_url, select_by = select_by, selector = selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = downstream_trace_distance ) + + #Combine all lists of metadata dictionaries into a single list. + combined_lists = gages_list + fcst_list + eval_list + islands_list + #Define list that will contain all segments listed in metadata. + all_nwm_segments = [] + #For each lid metadata dictionary in list + for lid in combined_lists: + #get all downstream segments + downstream_nwm_segs = lid.get('downstream_nwm_features') + #Append downstream segments + if downstream_nwm_segs: + all_nwm_segments.extend(downstream_nwm_segs) + #Get the nwm feature id associated with the location + location_nwm_seg = lid.get('identifiers').get('nwm_feature_id') + if location_nwm_seg: + #Append nwm segment (conver to list) + all_nwm_segments.extend([location_nwm_seg]) + #Remove duplicates by assigning to a set. + ms_nwm_segs_set = set(all_nwm_segments) + + return ms_nwm_segs_set + +############################################################################## +#Function to create list of NWM segments +############################################################################### +def get_nwm_segs(metadata): + ''' + Using the metadata output from "get_metadata", output the NWM segments. + + Parameters + ---------- + metadata : DICT + Dictionary output from "get_metadata" function. + + Returns + ------- + all_segments : LIST + List of all NWM segments. + + ''' + + nwm_feature_id = metadata.get('identifiers').get('nwm_feature_id') + upstream_nwm_features = metadata.get('upstream_nwm_features') + downstream_nwm_features = metadata.get('downstream_nwm_features') + + all_segments = [] + #Convert NWM feature id segment to a list (this is always a string or empty) + if nwm_feature_id: + nwm_feature_id = [nwm_feature_id] + all_segments.extend(nwm_feature_id) + #Add all upstream segments (always a list or empty) + if upstream_nwm_features: + all_segments.extend(upstream_nwm_features) + #Add all downstream segments (always a list or empty) + if downstream_nwm_features: + all_segments.extend(downstream_nwm_features) + + return all_segments + +####################################################################### +#Thresholds +####################################################################### +def get_thresholds(threshold_url, location_ids, physical_element = 'all', threshold = 'all', bypass_source_flag = False): + ''' + Get nws_lid threshold stages and flows (i.e. bankfull, action, minor, + moderate, major). Returns a dictionary for stages and one for flows. + + Parameters + ---------- + threshold_url : STR + WRDS threshold API. + location_ids : STR + nws_lid code (only a single code). + physical_element : STR, optional + Physical element option. The default is 'all'. + threshold : STR, optional + Threshold option. The default is 'all'. + bypass_source_flag : BOOL, optional + Special case if calculated values are not available (e.g. no rating + curve is available) then this allows for just a stage to be returned. + Used in case a flow is already known from another source, such as + a model. The default is False. + + Returns + ------- + stages : DICT + Dictionary of stages at each threshold. + flows : DICT + Dictionary of flows at each threshold. + + ''' + + url = f'{threshold_url}/{physical_element}/{threshold}/{location_ids}' + response = requests.get(url) + if response.ok: + thresholds_json = response.json() + #Get metadata + thresholds_info = thresholds_json['stream_thresholds'] + #Initialize stages/flows dictionaries + stages = {} + flows = {} + #Check if thresholds information is populated. If site is non-existent thresholds info is blank + if thresholds_info: + #Get all rating sources and corresponding indexes in a dictionary + rating_sources = {i.get('calc_flow_values').get('rating_curve').get('source'): index for index, i in enumerate(thresholds_info)} + #Get threshold data use USGS Rating Depot (priority) otherwise NRLDB. + if 'USGS Rating Depot' in rating_sources: + threshold_data = thresholds_info[rating_sources['USGS Rating Depot']] + elif 'NRLDB' in rating_sources: + threshold_data = thresholds_info[rating_sources['NRLDB']] + #If neither USGS or NRLDB is available + else: + #A flag option for cases where only a stage is needed for USGS scenario where a rating curve source is not available yet stages are available for the site. If flag is enabled, then stages are retrieved from the first record in thresholds_info. Typically the flows will not be populated as no rating curve is available. Flag should only be enabled when flows are already supplied by source (e.g. USGS) and threshold stages are needed. + if bypass_source_flag: + threshold_data = thresholds_info[0] + else: + threshold_data = [] + #Get stages and flows for each threshold + if threshold_data: + stages = threshold_data['stage_values'] + flows = threshold_data['calc_flow_values'] + #Add source information to stages and flows. Flows source inside a nested dictionary. Remove key once source assigned to flows. + stages['source'] = threshold_data['metadata']['threshold_source'] + flows['source'] = flows['rating_curve']['source'] + flows.pop('rating_curve', None) + #Add timestamp WRDS data was retrieved. + stages['wrds_timestamp'] = response.headers['Date'] + flows['wrds_timestamp'] = response.headers['Date'] + return stages, flows + +######################################################################## +# Function to write flow file +######################################################################## +def flow_data(segments, flows, convert_to_cms = True): + ''' + Given a list of NWM segments and a flow value in cfs, convert flow to + cms and return a DataFrame that is set up for export to a flow file. + + Parameters + ---------- + segments : LIST + List of NWM segments. + flows : FLOAT + Flow in CFS. + convert_to_cms : BOOL + Flag to indicate if supplied flows should be converted to metric. + Default value is True (assume input flows are CFS). + + Returns + ------- + flow_data : DataFrame + Dataframe ready for export to a flow file. + + ''' + if convert_to_cms: + #Convert cfs to cms + cfs_to_cms = 0.3048**3 + flows_cms = round(flows * cfs_to_cms,2) + else: + flows_cms = round(flows,2) + + flow_data = pd.DataFrame({'feature_id':segments, 'discharge':flows_cms}) + flow_data = flow_data.astype({'feature_id' : int , 'discharge' : float}) + return flow_data From d43bd8b6ab433f68729fb9337bcb7cb6132461e2 Mon Sep 17 00:00:00 2001 From: Brad Date: Wed, 24 Mar 2021 10:21:04 -0500 Subject: [PATCH 054/359] Patch to synthesize_test_cases.py Bug fix to synthesize_test_cases.py to allow comparison between testing version and official versions. --- CHANGELOG.md | 9 ++ tools/synthesize_test_cases.py | 202 ++++++++++++++++++--------------- 2 files changed, 122 insertions(+), 89 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5601ba729..07be1fdb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,15 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

+ +## v3.0.10.1 - 2021-03-24 - [PR #320](https://github.com/NOAA-OWP/cahaba/pull/320) + + Patch to synthesize_test_cases.py. + + ### Changes + - Bug fix to `synthesize_test_cases.py` to allow comparison between `testing` version and `official` versions. +

+ ## v3.0.10.0 - 2021-03-12 - [PR #298](https://github.com/NOAA-OWP/cahaba/pull/298) Preprocessing of flow files for Categorical FIM. diff --git a/tools/synthesize_test_cases.py b/tools/synthesize_test_cases.py index f3d02192c..06f55b4a0 100755 --- a/tools/synthesize_test_cases.py +++ b/tools/synthesize_test_cases.py @@ -10,7 +10,7 @@ from tools_shared_variables import TEST_CASES_DIR, PREVIOUS_FIM_DIR, OUTPUTS_DIR, AHPS_BENCHMARK_CATEGORIES -def create_master_metrics_csv(master_metrics_csv_output): +def create_master_metrics_csv(master_metrics_csv_output, dev_comparison): # Construct header metrics_to_write = ['true_negatives_count', @@ -61,10 +61,13 @@ def create_master_metrics_csv(master_metrics_csv_output): versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR) - for benchmark_source in ['ble', 'nws', 'usgs']: + if dev_comparison != None: + iteration_list = ['official', 'comparison'] + if dev_comparison == None: + iteration_list = ['official'] + for benchmark_source in ['ble', 'nws', 'usgs']: benchmark_test_case_dir = os.path.join(TEST_CASES_DIR, benchmark_source + '_test_cases') - if benchmark_source == 'ble': test_cases_list = os.listdir(benchmark_test_case_dir) @@ -73,43 +76,51 @@ def create_master_metrics_csv(master_metrics_csv_output): int(test_case.split('_')[0]) huc = test_case.split('_')[0] - official_versions = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') - - for magnitude in ['100yr', '500yr']: - for version in versions_to_aggregate: - if '_fr' in version: - extent_config = 'FR' - elif '_ms' in version: - extent_config = 'MS' - else: - extent_config = 'FR' - if "_c" in version and version.split('_c')[1] == "": - calibrated = "yes" - else: - calibrated = "no" - version_dir = os.path.join(official_versions, version) - magnitude_dir = os.path.join(version_dir, magnitude) - - if os.path.exists(magnitude_dir): - magnitude_dir_list = os.listdir(magnitude_dir) - for f in magnitude_dir_list: - if '.json' in f: - flow = 'NA' - nws_lid = "NA" - benchmark_source = 'ble' - sub_list_to_append = [version, nws_lid, magnitude, huc] - full_json_path = os.path.join(magnitude_dir, f) - if os.path.exists(full_json_path): - stats_dict = json.load(open(full_json_path)) - for metric in metrics_to_write: - sub_list_to_append.append(stats_dict[metric]) - sub_list_to_append.append(full_json_path) - sub_list_to_append.append(flow) - sub_list_to_append.append(benchmark_source) - sub_list_to_append.append(extent_config) - sub_list_to_append.append(calibrated) - - list_to_write.append(sub_list_to_append) + + for iteration in iteration_list: + + if iteration == "official": + versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') + versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR) + if iteration == "comparison": + versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'testing_versions') + versions_to_aggregate = [dev_comparison] + + for magnitude in ['100yr', '500yr']: + for version in versions_to_aggregate: + if '_fr' in version: + extent_config = 'FR' + elif '_ms' in version: + extent_config = 'MS' + else: + extent_config = 'FR' + if "_c" in version and version.split('_c')[1] == "": + calibrated = "yes" + else: + calibrated = "no" + version_dir = os.path.join(versions_to_crawl, version) + magnitude_dir = os.path.join(version_dir, magnitude) + + if os.path.exists(magnitude_dir): + magnitude_dir_list = os.listdir(magnitude_dir) + for f in magnitude_dir_list: + if '.json' in f: + flow = 'NA' + nws_lid = "NA" + benchmark_source = 'ble' + sub_list_to_append = [version, nws_lid, magnitude, huc] + full_json_path = os.path.join(magnitude_dir, f) + if os.path.exists(full_json_path): + stats_dict = json.load(open(full_json_path)) + for metric in metrics_to_write: + sub_list_to_append.append(stats_dict[metric]) + sub_list_to_append.append(full_json_path) + sub_list_to_append.append(flow) + sub_list_to_append.append(benchmark_source) + sub_list_to_append.append(extent_config) + sub_list_to_append.append(calibrated) + + list_to_write.append(sub_list_to_append) except ValueError: pass @@ -121,54 +132,62 @@ def create_master_metrics_csv(master_metrics_csv_output): int(test_case.split('_')[0]) huc = test_case.split('_')[0] - official_versions = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') - - for magnitude in ['action', 'minor', 'moderate', 'major']: - for version in versions_to_aggregate: - if '_fr' in version: - extent_config = 'FR' - elif '_ms' in version: - extent_config = 'MS' - else: - extent_config = 'FR' - if "_c" in version and version.split('_c')[1] == "": - calibrated = "yes" - else: - calibrated = "no" - - version_dir = os.path.join(official_versions, version) - magnitude_dir = os.path.join(version_dir, magnitude) - if os.path.exists(magnitude_dir): - magnitude_dir_list = os.listdir(magnitude_dir) - for f in magnitude_dir_list: - if '.json' in f and 'total_area' not in f: - nws_lid = f[:5] - sub_list_to_append = [version, nws_lid, magnitude, huc] - full_json_path = os.path.join(magnitude_dir, f) - flow = '' - if os.path.exists(full_json_path): - - # Get flow used to map. - flow_file = os.path.join(benchmark_test_case_dir, 'validation_data_' + benchmark_source, huc, nws_lid, magnitude, 'ahps_' + nws_lid + '_huc_' + huc + '_flows_' + magnitude + '.csv') - if os.path.exists(flow_file): - with open(flow_file, newline='') as csv_file: - reader = csv.reader(csv_file) - next(reader) - for row in reader: - flow = row[1] - if nws_lid == 'mcc01': - print(flow) - - stats_dict = json.load(open(full_json_path)) - for metric in metrics_to_write: - sub_list_to_append.append(stats_dict[metric]) - sub_list_to_append.append(full_json_path) - sub_list_to_append.append(flow) - sub_list_to_append.append(benchmark_source) - sub_list_to_append.append(extent_config) - sub_list_to_append.append(calibrated) - - list_to_write.append(sub_list_to_append) + + for iteration in iteration_list: + + if iteration == "official": + versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') + versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR) + if iteration == "comparison": + versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'testing_versions') + versions_to_aggregate = [dev_comparison] + + for magnitude in ['action', 'minor', 'moderate', 'major']: + for version in versions_to_aggregate: + if '_fr' in version: + extent_config = 'FR' + elif '_ms' in version: + extent_config = 'MS' + else: + extent_config = 'FR' + if "_c" in version and version.split('_c')[1] == "": + calibrated = "yes" + else: + calibrated = "no" + + version_dir = os.path.join(versions_to_crawl, version) + magnitude_dir = os.path.join(version_dir, magnitude) + if os.path.exists(magnitude_dir): + magnitude_dir_list = os.listdir(magnitude_dir) + for f in magnitude_dir_list: + if '.json' in f and 'total_area' not in f: + nws_lid = f[:5] + sub_list_to_append = [version, nws_lid, magnitude, huc] + full_json_path = os.path.join(magnitude_dir, f) + flow = '' + if os.path.exists(full_json_path): + + # Get flow used to map. + flow_file = os.path.join(benchmark_test_case_dir, 'validation_data_' + benchmark_source, huc, nws_lid, magnitude, 'ahps_' + nws_lid + '_huc_' + huc + '_flows_' + magnitude + '.csv') + if os.path.exists(flow_file): + with open(flow_file, newline='') as csv_file: + reader = csv.reader(csv_file) + next(reader) + for row in reader: + flow = row[1] + if nws_lid == 'mcc01': + print(flow) + + stats_dict = json.load(open(full_json_path)) + for metric in metrics_to_write: + sub_list_to_append.append(stats_dict[metric]) + sub_list_to_append.append(full_json_path) + sub_list_to_append.append(flow) + sub_list_to_append.append(benchmark_source) + sub_list_to_append.append(extent_config) + sub_list_to_append.append(calibrated) + + list_to_write.append(sub_list_to_append) except ValueError: pass @@ -307,4 +326,9 @@ def process_alpha_test(args): # Do aggregate_metrics. print("Creating master metrics CSV...") - create_master_metrics_csv(master_metrics_csv_output=master_metrics_csv) + + if config == 'DEV': + dev_comparison = fim_version + "_" + special_string + else: + dev_comparison = None + create_master_metrics_csv(master_metrics_csv_output=master_metrics_csv, dev_comparison=dev_comparison) From 34123ddb37d6b6c44caf3572d8683d6b433ebca1 Mon Sep 17 00:00:00 2001 From: TrevorGrout-NOAA <69653333+TrevorGrout-NOAA@users.noreply.github.com> Date: Thu, 25 Mar 2021 08:51:04 -0500 Subject: [PATCH 055/359] Improvements to CatFIM service source data generation Improvements to CatFIM service source data generation. - Renamed generate_categorical_fim.py to generate_categorical_fim_mapping.py. - Updated the status outputs of the nws_lid_sites layer and saved it in the same directory as the merged catfim_library layer. - Additional stability fixes (such as improved compatability with WRDS updates). - Added generate_categorical_fim.py to wrap generate_categorical_fim_flows.py and generate_categorical_fim_mapping.py. - Create new nws_lid_sites shapefile located in same directory as the catfim_library shapefile. This resolves #305, resolves #321, and resolves #322. --- CHANGELOG.md | 13 + tools/generate_categorical_fim.py | 417 ++++++---------------- tools/generate_categorical_fim_flows.py | 183 +++++----- tools/generate_categorical_fim_mapping.py | 329 +++++++++++++++++ tools/inundation.py | 13 +- tools/tools_shared_functions.py | 82 +++-- 6 files changed, 592 insertions(+), 445 deletions(-) create mode 100755 tools/generate_categorical_fim_mapping.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 07be1fdb7..164ce1a44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. + +## v3.0.11.0 - 2021-03-22 - [PR #319](https://github.com/NOAA-OWP/cahaba/pull/298) + + Improvements to CatFIM service source data generation. + + ### Changes + - Renamed `generate_categorical_fim.py` to `generate_categorical_fim_mapping.py`. + - Updated the status outputs of the `nws_lid_sites layer` and saved it in the same directory as the `merged catfim_library layer`. + - Additional stability fixes (such as improved compatability with WRDS updates). +### Additions + - Added `generate_categorical_fim.py` to wrap `generate_categorical_fim_flows.py` and `generate_categorical_fim_mapping.py`. + - Create new `nws_lid_sites` shapefile located in same directory as the `catfim_library` shapefile. +

## v3.0.10.1 - 2021-03-24 - [PR #320](https://github.com/NOAA-OWP/cahaba/pull/320) diff --git a/tools/generate_categorical_fim.py b/tools/generate_categorical_fim.py index bec059960..f51bf5aa8 100755 --- a/tools/generate_categorical_fim.py +++ b/tools/generate_categorical_fim.py @@ -1,317 +1,112 @@ #!/usr/bin/env python3 - -import sys -import os -from multiprocessing import Pool +import subprocess import argparse -import traceback -import rasterio +import time +from pathlib import Path import geopandas as gpd import pandas as pd -import shutil -from rasterio.features import shapes -from shapely.geometry.polygon import Polygon -from shapely.geometry.multipolygon import MultiPolygon -from inundation import inundate -sys.path.append('/foss_fim/src') -from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION -from utils.shared_functions import getDriver - -INPUTS_DIR = r'/data/inputs' -magnitude_list = ['action', 'minor', 'moderate','major', 'record'] - -# Define necessary variables for inundation() -hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' -mask_type, catchment_poly = 'huc', '' - - -def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, number_of_jobs, depthtif, log_file): - - no_data_list = [] - procs_list = [] - - source_flow_dir_list = os.listdir(source_flow_dir) - output_flow_dir_list = os.listdir(fim_run_dir) - - # Log missing hucs - missing_hucs = list(set(source_flow_dir_list) - set(output_flow_dir_list)) - missing_hucs = [huc for huc in missing_hucs if "." not in huc] - if len(missing_hucs) > 0: - f = open(log_file, 'a+') - f.write(f"Missing hucs from output directory: {', '.join(missing_hucs)}\n") - f.close() - - # Loop through matching huc directories in the source_flow directory - matching_hucs = list(set(output_flow_dir_list) & set(source_flow_dir_list)) - for huc in matching_hucs: - - if "." not in huc: - - # Get list of AHPS site directories - ahps_site_dir = os.path.join(source_flow_dir, huc) - ahps_site_dir_list = os.listdir(ahps_site_dir) - - # Map paths to HAND files needed for inundation() - fim_run_huc_dir = os.path.join(fim_run_dir, huc) - rem = os.path.join(fim_run_huc_dir, 'rem_zeroed_masked.tif') - catchments = os.path.join(fim_run_huc_dir, 'gw_catchments_reaches_filtered_addedAttributes.tif') - hydroTable = os.path.join(fim_run_huc_dir, 'hydroTable.csv') - - exit_flag = False # Default to False. - - # Check if necessary data exist; set exit_flag to True if they don't exist - for f in [rem, catchments, hydroTable]: - if not os.path.exists(f): - no_data_list.append(f) - exit_flag = True - - # Log missing data - if exit_flag == True: - f = open(log_file, 'a+') - f.write(f"Missing data for: {fim_run_huc_dir}\n") - f.close() - - # Map path to huc directory inside out output_cat_fim_dir - cat_fim_huc_dir = os.path.join(output_cat_fim_dir, huc) - if not os.path.exists(cat_fim_huc_dir): - os.mkdir(cat_fim_huc_dir) - - # Loop through AHPS sites - for ahps_site in ahps_site_dir_list: - # map parent directory for AHPS source data dir and list AHPS thresholds (act, min, mod, maj) - ahps_site_parent = os.path.join(ahps_site_dir, ahps_site) - thresholds_dir_list = os.listdir(ahps_site_parent) - - # Map parent directory for all inundation output filesoutput files. - cat_fim_huc_ahps_dir = os.path.join(cat_fim_huc_dir, ahps_site) - if not os.path.exists(cat_fim_huc_ahps_dir): - os.mkdir(cat_fim_huc_ahps_dir) - - # Loop through thresholds/magnitudes and define inundation output files paths - for magnitude in thresholds_dir_list: - - if "." not in magnitude: - - magnitude_flows_csv = os.path.join(ahps_site_parent, magnitude, 'ahps_' + ahps_site + '_huc_' + huc + '_flows_' + magnitude + '.csv') - - if os.path.exists(magnitude_flows_csv): - - output_extent_grid = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_extent.tif') - - if depthtif: - output_depth_grid = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_depth.tif') - else: - output_depth_grid = None - - # Append necessary variables to list for multiprocessing. - procs_list.append([rem, catchments, catchment_poly, magnitude_flows_csv, huc, hydroTable, output_extent_grid, output_depth_grid, ahps_site, magnitude, log_file]) - - # Initiate multiprocessing - print(f"Running inundation for {len(procs_list)} sites using {number_of_jobs} jobs") - pool = Pool(number_of_jobs) - pool.map(run_inundation, procs_list) - - -def run_inundation(args): - - rem = args[0] - catchments = args[1] - catchment_poly = args[2] - magnitude_flows_csv = args[3] - huc = args[4] - hydroTable = args[5] - output_extent_grid = args[6] - output_depth_grid = args[7] - ahps_site = args[8] - magnitude = args[9] - log_file = args[10] - - try: - inundate(rem,catchments,catchment_poly,hydroTable,magnitude_flows_csv,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, - subset_hucs=huc,num_workers=1,aggregate=False,inundation_raster=output_extent_grid,inundation_polygon=None, - depths=output_depth_grid,out_raster_profile=None,out_vector_profile=None,quiet=True - ) - - except Exception: - # Log errors and their tracebacks - f = open(log_file, 'a+') - f.write(f"{output_extent_grid} - inundation error: {traceback.format_exc()}\n") - f.close() - - -def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir, nws_lid_attributes_filename, log_file): - - # Create workspace - gpkg_dir = os.path.join(output_cat_fim_dir, 'gpkg') - if not os.path.exists(gpkg_dir): - os.mkdir(gpkg_dir) - - fim_version = os.path.basename(output_cat_fim_dir) - merged_layer = os.path.join(output_cat_fim_dir, 'catfim_library.gpkg') - - if not os.path.exists(merged_layer): # prevents appending to existing output - - huc_ahps_dir_list = os.listdir(output_cat_fim_dir) - skip_list=['errors','logs','gpkg',merged_layer] - - for magnitude in magnitude_list: - - procs_list = [] - - # Loop through all categories - for huc in huc_ahps_dir_list: - - if huc not in skip_list: - - huc_dir = os.path.join(output_cat_fim_dir, huc) - ahps_dir_list = os.listdir(huc_dir) - - # Loop through ahps sites - for ahps_lid in ahps_dir_list: - ahps_lid_dir = os.path.join(huc_dir, ahps_lid) - - extent_grid = os.path.join(ahps_lid_dir, ahps_lid + '_' + magnitude + '_extent_' + huc + '.tif') - - if os.path.exists(extent_grid): - procs_list.append([ahps_lid, extent_grid, gpkg_dir, fim_version, huc, magnitude, nws_lid_attributes_filename]) - - else: - try: - f = open(log_file, 'a+') - f.write(f"Missing layers: {extent_gpkg}\n") - f.close() - except: - pass - - # Multiprocess with instructions - pool = Pool(number_of_jobs) - pool.map(reformat_inundation_maps, procs_list) - - # Merge all layers - print(f"Merging {len(os.listdir(gpkg_dir))} layers...") - - for layer in os.listdir(gpkg_dir): - - diss_extent_filename = os.path.join(gpkg_dir, layer) - - # Open diss_extent - diss_extent = gpd.read_file(diss_extent_filename) - - # Write/append aggregate diss_extent - if os.path.isfile(merged_layer): - diss_extent.to_file(merged_layer,driver=getDriver(merged_layer),index=False, mode='a') - else: - diss_extent.to_file(merged_layer,driver=getDriver(merged_layer),index=False) - - del diss_extent - - shutil.rmtree(gpkg_dir) - - else: - print(f"{merged_layer} already exists.") - - -def reformat_inundation_maps(args): - - try: - lid = args[0] - grid_path = args[1] - gpkg_dir = args[2] - fim_version = args[3] - huc = args[4] - magnitude = args[5] - nws_lid_attributes_filename = args[6] - - # Convert raster to to shapes - with rasterio.open(grid_path) as src: - image = src.read(1) - mask = image > 0 - - # Aggregate shapes - results = ({'properties': {'extent': 1}, 'geometry': s} for i, (s, v) in enumerate(shapes(image, mask=mask,transform=src.transform))) - - # convert list of shapes to polygon - extent_poly = gpd.GeoDataFrame.from_features(list(results), crs=PREP_PROJECTION) - - # Dissolve polygons - extent_poly_diss = extent_poly.dissolve(by='extent') - - # Update attributes - extent_poly_diss = extent_poly_diss.reset_index(drop=True) - extent_poly_diss['ahps_lid'] = lid - extent_poly_diss['magnitude'] = magnitude - extent_poly_diss['version'] = fim_version - extent_poly_diss['huc'] = huc - - # Project to Web Mercator - extent_poly = extent_poly.to_crs(VIZ_PROJECTION) - - # Join attributes - nws_lid_attributes_table = pd.read_csv(nws_lid_attributes_filename, dtype={'huc':str}) - nws_lid_attributes_table = nws_lid_attributes_table.loc[(nws_lid_attributes_table.magnitude==magnitude) & (nws_lid_attributes_table.nws_lid==lid)] - - - extent_poly_diss = extent_poly_diss.merge(nws_lid_attributes_table, left_on=['ahps_lid','magnitude','huc'], right_on=['nws_lid','magnitude','huc']) - - extent_poly_diss = extent_poly_diss.drop(columns='nws_lid') - - # Save dissolved multipolygon - handle = os.path.split(grid_path)[1].replace('.tif', '') - - diss_extent_filename = os.path.join(gpkg_dir, handle + "_dissolved.gpkg") - - extent_poly_diss["geometry"] = [MultiPolygon([feature]) if type(feature) == Polygon else feature for feature in extent_poly_diss["geometry"]] - - if not extent_poly_diss.empty: - - extent_poly_diss.to_file(diss_extent_filename,driver=getDriver(diss_extent_filename),index=False) - - except Exception as e: - # Log and clean out the gdb so it's not merged in later - try: - f = open(log_file, 'a+') - f.write(str(diss_extent_filename) + " - dissolve error: " + str(e)) - f.close() - except: - pass - - +from datetime import date + +def update_mapping_status(output_mapping_dir, output_flows_dir): + ''' + Updates the status for nws_lids from the flows subdirectory. Status + is updated for sites where the inundation.py routine was not able to + produce inundation for the supplied flow files. It is assumed that if + an error occured in inundation.py that all flow files for a given site + experienced the error as they all would have the same nwm segments. + + Parameters + ---------- + output_mapping_dir : STR + Path to the output directory of all inundation maps. + output_flows_dir : STR + Path to the directory containing all flows. + + Returns + ------- + None. + + ''' + #Find all LIDs with empty mapping output folders + subdirs = [str(i) for i in Path(output_mapping_dir).rglob('**/*') if i.is_dir()] + empty_nws_lids = [Path(directory).name for directory in subdirs if not list(Path(directory).iterdir())] + + #Write list of empty nws_lids to DataFrame, these are sites that failed in inundation.py + mapping_df = pd.DataFrame({'nws_lid':empty_nws_lids}) + mapping_df['did_it_map'] = 'no' + mapping_df['map_status'] = ' and all categories failed to map' + + #Import shapefile output from flows creation + shapefile = Path(output_flows_dir)/'nws_lid_flows_sites.shp' + flows_df = gpd.read_file(shapefile) + + #Join failed sites to flows df + flows_df = flows_df.merge(mapping_df, how = 'left', on = 'nws_lid') + + #Switch mapped column to no for failed sites and update status + flows_df.loc[flows_df['did_it_map'] == 'no', 'mapped'] = 'no' + flows_df.loc[flows_df['did_it_map']=='no','status'] = flows_df['status'] + flows_df['map_status'] + + #Perform pass for HUCs where mapping was skipped due to missing data. + flows_hucs = [i.stem for i in Path(output_flows_dir).iterdir() if i.is_dir()] + mapping_hucs = [i.stem for i in Path(output_mapping_dir).iterdir() if i.is_dir()] + missing_mapping_hucs = list(set(flows_hucs) - set(mapping_hucs)) + #Update status for nws_lid in missing hucs and change mapped attribute to 'no' + flows_df.loc[flows_df.eval('HUC8 in @missing_mapping_hucs & mapped == "yes"'), 'status'] = flows_df['status'] + ' and all categories failed to map because missing HUC information' + flows_df.loc[flows_df.eval('HUC8 in @missing_mapping_hucs & mapped == "yes"'), 'mapped'] = 'no' + + #Clean up GeoDataFrame and rename columns for consistency. + flows_df = flows_df.drop(columns = ['did_it_map','map_status']) + flows_df = flows_df.rename(columns = {'nws_lid':'ahps_lid'}) + + #Write out to file + nws_lid_path = Path(output_mapping_dir) / 'nws_lid_sites.shp' + flows_df.to_file(nws_lid_path) + if __name__ == '__main__': - - # Parse arguments - parser = argparse.ArgumentParser(description='Categorical inundation mapping for FOSS FIM.') - parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh',required=True) - parser.add_argument('-s', '--source-flow-dir',help='Path to directory containing flow CSVs to use to generate categorical FIM.',required=True, default="") - parser.add_argument('-o', '--output-cat-fim-dir',help='Path to directory where categorical FIM outputs will be written.',required=True, default="") - parser.add_argument('-j','--number-of-jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) - parser.add_argument('-depthtif','--write-depth-tiff',help='Using this option will write depth TIFFs.',required=False, action='store_true') - + + #Parse arguments + parser = argparse.ArgumentParser(description = 'Run Categorical FIM') + parser.add_argument('-f','--fim_version',help='Name of directory containing outputs of fim_run.sh',required=True) + parser.add_argument('-j','--number_of_jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) args = vars(parser.parse_args()) - - fim_run_dir = args['fim_run_dir'] - source_flow_dir = args['source_flow_dir'] - output_cat_fim_dir = args['output_cat_fim_dir'] - number_of_jobs = int(args['number_of_jobs']) - depthtif = args['write_depth_tiff'] - - - # Create output directory - if not os.path.exists(output_cat_fim_dir): - os.mkdir(output_cat_fim_dir) - - # Create log directory - log_dir = os.path.join(output_cat_fim_dir, 'logs') - if not os.path.exists(log_dir): - os.mkdir(log_dir) - - # Create error log path - log_file = os.path.join(log_dir, 'errors.log') - - # Map path to points with attributes - nws_lid_attributes_filename = os.path.join(source_flow_dir, 'nws_lid_attributes.csv') - - print("Generating Categorical FIM") - generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, number_of_jobs, depthtif,log_file) - - print("Aggregating Categorical FIM") - post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir,nws_lid_attributes_filename,log_file) + + #Get arguments + fim_version = args['fim_version'] + number_of_jobs = args['number_of_jobs'] + + #################################################################### + #Define default arguments. Modify these if necessary. + today = date.today().strftime('%m%d%Y') + fim_run_dir = Path(f'/data/previous_fim/{fim_version}') + output_flows_dir = Path(f'/data/catfim/{fim_version}/{today}/flows') + output_mapping_dir = Path(f'/data/catfim/{fim_version}/{today}/mapping') + nwm_us_search = '10' + nwm_ds_search = '10' + write_depth_tiff = False + #################################################################### + + #################################################################### + #Run CatFIM scripts in sequence + #################################################################### + #Generate CatFIM flow files. + print('Creating flow files') + start = time.time() + subprocess.call(['python3','generate_categorical_fim_flows.py', '-w' , str(output_flows_dir), '-u', nwm_us_search, '-d', nwm_ds_search]) + end = time.time() + elapsed_time = (end-start)/60 + print(f'Finished creating flow files in {elapsed_time} minutes') + + #Generate CatFIM mapping. + print('Begin mapping') + start = time.time() + subprocess.call(['python3','generate_categorical_fim_mapping.py', '-r' , str(fim_run_dir), '-s', str(output_flows_dir), '-o', str(output_mapping_dir), '-j', str(number_of_jobs)]) + end = time.time() + elapsed_time = (end-start)/60 + print(f'Finished mapping in {elapsed_time} minutes') + + #Updating Mapping Status + print('Updating mapping status') + update_mapping_status(str(output_mapping_dir), str(output_flows_dir)) + + \ No newline at end of file diff --git a/tools/generate_categorical_fim_flows.py b/tools/generate_categorical_fim_flows.py index 562b3e8e5..ca57e089c 100755 --- a/tools/generate_categorical_fim_flows.py +++ b/tools/generate_categorical_fim_flows.py @@ -11,13 +11,15 @@ sys.path.append('/foss_fim/src') from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION -load_dotenv() -#import variables from .env file -API_BASE_URL = os.getenv("API_BASE_URL") -EVALUATED_SITES_CSV = os.getenv("EVALUATED_SITES_CSV") -WBD_LAYER = os.getenv("WBD_LAYER") +def get_env_paths(): + load_dotenv() + #import variables from .env file + API_BASE_URL = os.getenv("API_BASE_URL") + EVALUATED_SITES_CSV = os.getenv("EVALUATED_SITES_CSV") + WBD_LAYER = os.getenv("WBD_LAYER") + return API_BASE_URL, EVALUATED_SITES_CSV, WBD_LAYER -def static_flow_lids(workspace, nwm_us_search, nwm_ds_search): +def generate_catfim_flows(workspace, nwm_us_search, nwm_ds_search): ''' This will create static flow files for all nws_lids and save to the workspace directory with the following format: @@ -45,19 +47,20 @@ def static_flow_lids(workspace, nwm_us_search, nwm_ds_search): ------- None. - ''' + ''' + all_start = time.time() #Define workspace and wbd_path as a pathlib Path. Convert search distances to integer. workspace = Path(workspace) nwm_us_search = int(nwm_us_search) nwm_ds_search = int(nwm_ds_search) metadata_url = f'{API_BASE_URL}/metadata' - threshold_url = f'{API_BASE_URL}/threshold' + threshold_url = f'{API_BASE_URL}/nws_threshold' ################################################################### + #Create workspace - workspace.mkdir(exist_ok = True) + workspace.mkdir(parents=True,exist_ok = True) - #Return dictionary of huc (key) and sublist of ahps(value) as well as geodataframe of sites. print('Retrieving metadata...') #Get metadata for 'CONUS' conus_list, conus_dataframe = get_metadata(metadata_url, select_by = 'nws_lid', selector = ['all'], must_include = 'nws_data.rfc_forecast_point', upstream_trace_distance = nwm_us_search, downstream_trace_distance = nwm_ds_search ) @@ -67,42 +70,41 @@ def static_flow_lids(workspace, nwm_us_search, nwm_ds_search): #Append the dataframes and lists all_lists = conus_list + islands_list - all_dataframe = conus_dataframe.append(islands_dataframe) print('Determining HUC using WBD layer...') - #Assign FIM HUC to GeoDataFrame and export to shapefile all candidate sites. - agg_start = time.time() + #Assign HUCs to all sites using a spatial join of the FIM 3 HUC layer. + #Get a dictionary of hucs (key) and sites (values) as well as a GeoDataFrame + #of all sites used later in script. huc_dictionary, out_gdf = aggregate_wbd_hucs(metadata_list = all_lists, wbd_huc8_path = WBD_LAYER) - viz_out_gdf = out_gdf.to_crs(VIZ_PROJECTION) - viz_out_gdf.to_file(workspace / f'candidate_sites.shp') - agg_end = time.time() - print(f'agg time is {(agg_end - agg_start)/60} minutes') + #Get all possible mainstem segments print('Getting list of mainstem segments') #Import list of evaluated sites list_of_sites = pd.read_csv(EVALUATED_SITES_CSV)['Total_List'].to_list() - #The entire routine to get mainstems is harcoded in this function. + #The entire routine to get mainstems is hardcoded in this function. ms_segs = mainstem_nwm_segs(metadata_url, list_of_sites) - #Loop through each huc unit + #Loop through each huc unit, first define message variable and flood categories. all_messages = [] + flood_categories = ['action', 'minor', 'moderate', 'major', 'record'] for huc in huc_dictionary: print(f'Iterating through {huc}') #Get list of nws_lids nws_lids = huc_dictionary[huc] #Loop through each lid in list to create flow file for lid in nws_lids: - #In some instances the lid is not assigned a name, skip over these. - if not isinstance(lid,str): - print(f'{lid} is {type(lid)}') - continue #Convert lid to lower case lid = lid.lower() #Get stages and flows for each threshold from the WRDS API. Priority given to USGS calculated flows. - stages, flows = get_thresholds(threshold_url = threshold_url, location_ids = lid, physical_element = 'all', threshold = 'all', bypass_source_flag = False) - #If stages/flows don't exist write message and exit out. - if not (stages and flows): - message = f'{lid} no thresholds' + stages, flows = get_thresholds(threshold_url = threshold_url, select_by = 'nws_lid', selector = lid, threshold = 'all') + #Check if stages are supplied, if not write message and exit. + if all(stages.get(category, None)==None for category in flood_categories): + message = f'{lid}:missing threshold stages' + all_messages.append(message) + continue + #Check if calculated flows are supplied, if not write message and exit. + if all(flows.get(category, None) == None for category in flood_categories): + message = f'{lid}:missing calculated flows' all_messages.append(message) continue @@ -116,11 +118,11 @@ def static_flow_lids(workspace, nwm_us_search, nwm_ds_search): #if no segments, write message and exit out if not segments: print(f'{lid} no segments') - message = f'{lid} no segments' + message = f'{lid}:missing nwm segments' all_messages.append(message) continue #For each flood category - for category in ['action', 'minor', 'moderate', 'major', 'record']: + for category in flood_categories: #Get the flow flow = flows[category] #If there is a valid flow value, write a flow file. @@ -135,102 +137,99 @@ def static_flow_lids(workspace, nwm_us_search, nwm_ds_search): #Write flow file to file flow_info.to_csv(output_file, index = False) else: - message = f'{lid}_{category}_no flow' + message = f'{lid}:{category} is missing calculated flow' all_messages.append(message) - #This section will produce a point file of the LID location + #Get various attributes of the site. - lat = float(metadata['usgs_data']['latitude']) - lon = float(metadata['usgs_data']['longitude']) + lat = float(metadata['usgs_preferred']['latitude']) + lon = float(metadata['usgs_preferred']['longitude']) wfo = metadata['nws_data']['wfo'] rfc = metadata['nws_data']['rfc'] state = metadata['nws_data']['state'] county = metadata['nws_data']['county'] name = metadata['nws_data']['name'] - q_act = flows['action'] - q_min = flows['minor'] - q_mod = flows['moderate'] - q_maj = flows['major'] - q_rec = flows['record'] flow_units = flows['units'] flow_source = flows['source'] - s_act = stages['action'] - s_min = stages['minor'] - s_mod = stages['moderate'] - s_maj = stages['major'] - s_rec = stages['record'] stage_units = stages['units'] stage_source = stages['source'] wrds_timestamp = stages['wrds_timestamp'] - #Create a DataFrame using the collected attributes - df = pd.DataFrame({'nws_lid': [lid], 'name':name, 'WFO': wfo, 'rfc':rfc, 'huc':[huc], 'state':state, 'county':county, 'q_act':q_act, 'q_min':q_min, 'q_mod':q_mod, 'q_maj':q_maj, 'q_rec':q_rec, 'q_uni':flow_units, 'q_src':flow_source, 'stage_act':s_act, 'stage_min':s_min, 'stage_mod':s_mod, 'stage_maj':s_maj, 'stage_rec':s_rec, 'stage_uni':stage_units, 's_src':stage_source, 'wrds_time':wrds_timestamp, 'lat':[lat], 'lon':[lon]}) - #Round stages and flows to nearest hundredth - df = df.round({'q_act':2,'q_min':2,'q_mod':2,'q_maj':2,'q_rec':2,'stage_act':2,'stage_min':2,'stage_mod':2,'stage_maj':2,'stage_rec':2}) - - #Create a geodataframe using usgs lat/lon property from WRDS then reproject to WGS84. - #Define EPSG codes for possible usgs latlon datum names (NAD83WGS84 assigned NAD83) - crs_lookup ={'NAD27':'EPSG:4267', 'NAD83':'EPSG:4269', 'NAD83WGS84': 'EPSG:4269'} - #Get horizontal datum (from dataframe) and assign appropriate EPSG code, assume NAD83 if not assigned. - h_datum = metadata['usgs_data']['latlon_datum_name'] - src_crs = crs_lookup.get(h_datum, 'EPSG:4269') - gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['lon'], df['lat']), crs = src_crs) - #Reproject to VIZ_PROJECTION - viz_gdf = gdf.to_csv(VIZ_PROJECTION) + nrldb_timestamp = metadata['nrldb_timestamp'] + nwis_timestamp = metadata['nwis_timestamp'] - #Create a csv with same info as shapefile + #Create a csv with same information as shapefile but with each threshold as new record. csv_df = pd.DataFrame() - for threshold in ['action', 'minor', 'moderate', 'major', 'record']: - line_df = pd.DataFrame({'nws_lid': [lid], 'name':name, 'WFO': wfo, 'rfc':rfc, 'huc':[huc], 'state':state, 'county':county, 'magnitude': threshold, 'q':flows[threshold], 'q_uni':flows['units'], 'q_src':flow_source, 'stage':stages[threshold], 'stage_uni':stages['units'], 's_src':stage_source, 'wrds_time':wrds_timestamp, 'lat':[lat], 'lon':[lon]}) + for threshold in flood_categories: + line_df = pd.DataFrame({'nws_lid': [lid], 'name':name, 'WFO': wfo, 'rfc':rfc, 'huc':[huc], 'state':state, 'county':county, 'magnitude': threshold, 'q':flows[threshold], 'q_uni':flows['units'], 'q_src':flow_source, 'stage':stages[threshold], 'stage_uni':stages['units'], 's_src':stage_source, 'wrds_time':wrds_timestamp, 'nrldb_time':nrldb_timestamp,'nwis_time':nwis_timestamp, 'lat':[lat], 'lon':[lon]}) csv_df = csv_df.append(line_df) #Round flow and stage columns to 2 decimal places. - csv = csv_df.round({'q':2,'stage':2}) + csv_df = csv_df.round({'q':2,'stage':2}) #If a site folder exists (ie a flow file was written) save files containing site attributes. - try: - #Save GeoDataFrame to shapefile format and export csv containing attributes - output_dir = workspace / huc / lid - viz_gdf.to_file(output_dir / f'{lid}_location.shp' ) + output_dir = workspace / huc / lid + if output_dir.exists(): + #Export DataFrame to csv containing attributes csv_df.to_csv(output_dir / f'{lid}_attributes.csv', index = False) - except: - print(f'{lid} missing all flows') - message = f'{lid} missing all flows' + else: + message = f'{lid}:missing all calculated flows' all_messages.append(message) - #Write out messages to file - messages_df = pd.DataFrame(all_messages, columns = ['message']) - messages_df.to_csv(workspace / f'all_messages.csv', index = False) - - #Recursively find all location shapefiles - locations_files = list(workspace.rglob('*_location.shp')) - spatial_layers = gpd.GeoDataFrame() - #Append all shapefile info to a geodataframe - for location in locations_files: - location_gdf = gpd.read_file(location) - spatial_layers = spatial_layers.append(location_gdf) - #Write appended spatial data to disk. - output_file = workspace /'all_mapped_ahps.shp' - spatial_layers.to_file(output_file) - - #Recursively find all *_info csv files and append + + print('wrapping up...') + #Recursively find all *_attributes csv files and append csv_files = list(workspace.rglob('*_attributes.csv')) all_csv_df = pd.DataFrame() for csv in csv_files: + #Huc has to be read in as string to preserve leading zeros. temp_df = pd.read_csv(csv, dtype={'huc':str}) all_csv_df = all_csv_df.append(temp_df, ignore_index = True) - #Write appended _info.csvs to file - all_info_csv = workspace / 'nws_lid_attributes.csv' - all_csv_df.to_csv(all_info_csv, index = False) + #Write to file + all_csv_df.to_csv(workspace / 'nws_lid_attributes.csv', index = False) + + #This section populates a shapefile of all potential sites and details + #whether it was mapped or not (mapped field) and if not, why (status field). + + #Preprocess the out_gdf GeoDataFrame. Reproject and reformat fields. + viz_out_gdf = out_gdf.to_crs(VIZ_PROJECTION) + viz_out_gdf.rename(columns = {'identifiers_nwm_feature_id': 'nwm_seg', 'identifiers_nws_lid':'nws_lid', 'identifiers_usgs_site_code':'usgs_gage'}, inplace = True) + viz_out_gdf['nws_lid'] = viz_out_gdf['nws_lid'].str.lower() + + #Using list of csv_files, populate DataFrame of all nws_lids that had + #a flow file produced and denote with "mapped" column. + nws_lids = [file.stem.split('_attributes')[0] for file in csv_files] + lids_df = pd.DataFrame(nws_lids, columns = ['nws_lid']) + lids_df['mapped'] = 'yes' + + #Identify what lids were mapped by merging with lids_df. Populate + #'mapped' column with 'No' if sites did not map. + viz_out_gdf = viz_out_gdf.merge(lids_df, how = 'left', on = 'nws_lid') + viz_out_gdf['mapped'] = viz_out_gdf['mapped'].fillna('no') + + #Write messages to DataFrame, split into columns, aggregate messages. + messages_df = pd.DataFrame(all_messages, columns = ['message']) + messages_df = messages_df['message'].str.split(':', n = 1, expand = True).rename(columns={0:'nws_lid', 1:'status'}) + status_df = messages_df.groupby(['nws_lid'])['status'].apply(', '.join).reset_index() + + #Join messages to populate status field to candidate sites. Assign + #status for null fields. + viz_out_gdf = viz_out_gdf.merge(status_df, how = 'left', on = 'nws_lid') + viz_out_gdf['status'] = viz_out_gdf['status'].fillna('all calculated flows available') + + #Filter out columns and write out to file + viz_out_gdf = viz_out_gdf.filter(['nws_lid','usgs_gage','nwm_seg','HUC8','mapped','status','geometry']) + viz_out_gdf.to_file(workspace /'nws_lid_flows_sites.shp') + + #time operation all_end = time.time() - print(f'total time is {(all_end - all_start)/60} minutes') + print(f'total time is {round((all_end - all_start)/60),1} minutes') - if __name__ == '__main__': #Parse arguments parser = argparse.ArgumentParser(description = 'Create forecast files for all nws_lid sites') parser.add_argument('-w', '--workspace', help = 'Workspace where all data will be stored.', required = True) parser.add_argument('-u', '--nwm_us_search', help = 'Walk upstream on NWM network this many miles', required = True) parser.add_argument('-d', '--nwm_ds_search', help = 'Walk downstream on NWM network this many miles', required = True) - #Extract to dictionary and assign to variables. args = vars(parser.parse_args()) - #Run create_flow_forecast_file - static_flow_lids(**args) + #Run get_env_paths and static_flow_lids + API_BASE_URL, EVALUATED_SITES_CSV, WBD_LAYER = get_env_paths() + generate_catfim_flows(**args) diff --git a/tools/generate_categorical_fim_mapping.py b/tools/generate_categorical_fim_mapping.py new file mode 100755 index 000000000..3d591b989 --- /dev/null +++ b/tools/generate_categorical_fim_mapping.py @@ -0,0 +1,329 @@ +#!/usr/bin/env python3 + +import sys +import os +from multiprocessing import Pool +import argparse +import traceback +import rasterio +import geopandas as gpd +import pandas as pd +import shutil +from rasterio.features import shapes +from shapely.geometry.polygon import Polygon +from shapely.geometry.multipolygon import MultiPolygon +from inundation import inundate +sys.path.append('/foss_fim/src') +from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION +from utils.shared_functions import getDriver + +INPUTS_DIR = r'/data/inputs' +magnitude_list = ['action', 'minor', 'moderate','major', 'record'] + +# Define necessary variables for inundation() +hucs, hucs_layerName = os.path.join(INPUTS_DIR, 'wbd', 'WBD_National.gpkg'), 'WBDHU8' +mask_type, catchment_poly = 'huc', '' + + +def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, number_of_jobs, depthtif, log_file): + + no_data_list = [] + procs_list = [] + + source_flow_dir_list = os.listdir(source_flow_dir) + output_flow_dir_list = os.listdir(fim_run_dir) + + # Log missing hucs + missing_hucs = list(set(source_flow_dir_list) - set(output_flow_dir_list)) + missing_hucs = [huc for huc in missing_hucs if "." not in huc] + if len(missing_hucs) > 0: + f = open(log_file, 'a+') + f.write(f"Missing hucs from output directory: {', '.join(missing_hucs)}\n") + f.close() + + # Loop through matching huc directories in the source_flow directory + matching_hucs = list(set(output_flow_dir_list) & set(source_flow_dir_list)) + for huc in matching_hucs: + + if "." not in huc: + + # Get list of AHPS site directories + ahps_site_dir = os.path.join(source_flow_dir, huc) + ahps_site_dir_list = os.listdir(ahps_site_dir) + + # Map paths to HAND files needed for inundation() + fim_run_huc_dir = os.path.join(fim_run_dir, huc) + rem = os.path.join(fim_run_huc_dir, 'rem_zeroed_masked.tif') + catchments = os.path.join(fim_run_huc_dir, 'gw_catchments_reaches_filtered_addedAttributes.tif') + hydroTable = os.path.join(fim_run_huc_dir, 'hydroTable.csv') + + exit_flag = False # Default to False. + + # Check if necessary data exist; set exit_flag to True if they don't exist + for f in [rem, catchments, hydroTable]: + if not os.path.exists(f): + no_data_list.append(f) + exit_flag = True + + # Log missing data + if exit_flag == True: + f = open(log_file, 'a+') + f.write(f"Missing data for: {fim_run_huc_dir}\n") + f.close() + + # Map path to huc directory inside out output_cat_fim_dir + cat_fim_huc_dir = os.path.join(output_cat_fim_dir, huc) + if not os.path.exists(cat_fim_huc_dir): + os.mkdir(cat_fim_huc_dir) + + # Loop through AHPS sites + for ahps_site in ahps_site_dir_list: + # map parent directory for AHPS source data dir and list AHPS thresholds (act, min, mod, maj) + ahps_site_parent = os.path.join(ahps_site_dir, ahps_site) + thresholds_dir_list = os.listdir(ahps_site_parent) + + # Map parent directory for all inundation output filesoutput files. + cat_fim_huc_ahps_dir = os.path.join(cat_fim_huc_dir, ahps_site) + if not os.path.exists(cat_fim_huc_ahps_dir): + os.mkdir(cat_fim_huc_ahps_dir) + + # Loop through thresholds/magnitudes and define inundation output files paths + for magnitude in thresholds_dir_list: + + if "." not in magnitude: + + magnitude_flows_csv = os.path.join(ahps_site_parent, magnitude, 'ahps_' + ahps_site + '_huc_' + huc + '_flows_' + magnitude + '.csv') + + if os.path.exists(magnitude_flows_csv): + + output_extent_grid = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_extent.tif') + + if depthtif: + output_depth_grid = os.path.join(cat_fim_huc_ahps_dir, ahps_site + '_' + magnitude + '_depth.tif') + else: + output_depth_grid = None + + # Append necessary variables to list for multiprocessing. + procs_list.append([rem, catchments, magnitude_flows_csv, huc, hydroTable, output_extent_grid, output_depth_grid, ahps_site, magnitude, log_file]) + + # Initiate multiprocessing + print(f"Running inundation for {len(procs_list)} sites using {number_of_jobs} jobs") + pool = Pool(number_of_jobs) + pool.map(run_inundation, procs_list) + + +def run_inundation(args): + + rem = args[0] + catchments = args[1] + magnitude_flows_csv = args[2] + huc = args[3] + hydroTable = args[4] + output_extent_grid = args[5] + output_depth_grid = args[6] + ahps_site = args[7] + magnitude = args[8] + log_file = args[9] + + try: + inundate(rem,catchments,catchment_poly,hydroTable,magnitude_flows_csv,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, + subset_hucs=huc,num_workers=1,aggregate=False,inundation_raster=output_extent_grid,inundation_polygon=None, + depths=output_depth_grid,out_raster_profile=None,out_vector_profile=None,quiet=True + ) + + except: + # Log errors and their tracebacks + f = open(log_file, 'a+') + f.write(f"{output_extent_grid} - inundation error: {traceback.format_exc()}\n") + f.close() + + #Inundation.py appends the huc code to the supplied output_extent_grid. + #Modify output_extent_grid to match inundation.py saved filename. + #Search for this file, if it didn't create, send message to log file. + base_file_path,extension = os.path.splitext(output_extent_grid) + saved_extent_grid_filename = "{}_{}{}".format(base_file_path,huc,extension) + if not os.path.exists(saved_extent_grid_filename): + with open(log_file, 'a+') as f: + f.write('FAILURE_huc_{}:{}:{} map failed to create\n'.format(huc,ahps_site,magnitude)) + +def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir, nws_lid_attributes_filename, log_file): + + # Create workspace + gpkg_dir = os.path.join(output_cat_fim_dir, 'gpkg') + if not os.path.exists(gpkg_dir): + os.mkdir(gpkg_dir) + + + #Find the FIM version + norm_path = os.path.normpath(output_cat_fim_dir) + cat_fim_dir_parts = norm_path.split(os.sep) + [fim_version] = [part for part in cat_fim_dir_parts if part.startswith('fim_3')] + merged_layer = os.path.join(output_cat_fim_dir, 'catfim_library.shp') + + if not os.path.exists(merged_layer): # prevents appending to existing output + + huc_ahps_dir_list = os.listdir(output_cat_fim_dir) + skip_list=['errors','logs','gpkg',merged_layer] + + for magnitude in magnitude_list: + + procs_list = [] + + # Loop through all categories + for huc in huc_ahps_dir_list: + + if huc not in skip_list: + + huc_dir = os.path.join(output_cat_fim_dir, huc) + ahps_dir_list = os.listdir(huc_dir) + + # Loop through ahps sites + for ahps_lid in ahps_dir_list: + ahps_lid_dir = os.path.join(huc_dir, ahps_lid) + + extent_grid = os.path.join(ahps_lid_dir, ahps_lid + '_' + magnitude + '_extent_' + huc + '.tif') + + if os.path.exists(extent_grid): + procs_list.append([ahps_lid, extent_grid, gpkg_dir, fim_version, huc, magnitude, nws_lid_attributes_filename]) + + else: + try: + f = open(log_file, 'a+') + f.write(f"Missing layers: {extent_gpkg}\n") + f.close() + except: + pass + + # Multiprocess with instructions + pool = Pool(number_of_jobs) + pool.map(reformat_inundation_maps, procs_list) + + # Merge all layers + print(f"Merging {len(os.listdir(gpkg_dir))} layers...") + + for layer in os.listdir(gpkg_dir): + + diss_extent_filename = os.path.join(gpkg_dir, layer) + + # Open diss_extent + diss_extent = gpd.read_file(diss_extent_filename) + diss_extent['viz'] = 'yes' + + # Write/append aggregate diss_extent + if os.path.isfile(merged_layer): + diss_extent.to_file(merged_layer,driver=getDriver(merged_layer),index=False, mode='a') + else: + diss_extent.to_file(merged_layer,driver=getDriver(merged_layer),index=False) + + del diss_extent + + shutil.rmtree(gpkg_dir) + + else: + print(f"{merged_layer} already exists.") + + +def reformat_inundation_maps(args): + + try: + lid = args[0] + grid_path = args[1] + gpkg_dir = args[2] + fim_version = args[3] + huc = args[4] + magnitude = args[5] + nws_lid_attributes_filename = args[6] + + # Convert raster to to shapes + with rasterio.open(grid_path) as src: + image = src.read(1) + mask = image > 0 + + # Aggregate shapes + results = ({'properties': {'extent': 1}, 'geometry': s} for i, (s, v) in enumerate(shapes(image, mask=mask,transform=src.transform))) + + # convert list of shapes to polygon + extent_poly = gpd.GeoDataFrame.from_features(list(results), crs=PREP_PROJECTION) + + # Dissolve polygons + extent_poly_diss = extent_poly.dissolve(by='extent') + + # Update attributes + extent_poly_diss = extent_poly_diss.reset_index(drop=True) + extent_poly_diss['ahps_lid'] = lid + extent_poly_diss['magnitude'] = magnitude + extent_poly_diss['version'] = fim_version + extent_poly_diss['huc'] = huc + + # Project to Web Mercator + extent_poly_diss = extent_poly_diss.to_crs(VIZ_PROJECTION) + + # Join attributes + nws_lid_attributes_table = pd.read_csv(nws_lid_attributes_filename, dtype={'huc':str}) + nws_lid_attributes_table = nws_lid_attributes_table.loc[(nws_lid_attributes_table.magnitude==magnitude) & (nws_lid_attributes_table.nws_lid==lid)] + + + extent_poly_diss = extent_poly_diss.merge(nws_lid_attributes_table, left_on=['ahps_lid','magnitude','huc'], right_on=['nws_lid','magnitude','huc']) + + extent_poly_diss = extent_poly_diss.drop(columns='nws_lid') + + # Save dissolved multipolygon + handle = os.path.split(grid_path)[1].replace('.tif', '') + + diss_extent_filename = os.path.join(gpkg_dir, handle + "_dissolved.gpkg") + + extent_poly_diss["geometry"] = [MultiPolygon([feature]) if type(feature) == Polygon else feature for feature in extent_poly_diss["geometry"]] + + if not extent_poly_diss.empty: + + extent_poly_diss.to_file(diss_extent_filename,driver=getDriver(diss_extent_filename),index=False) + + except Exception as e: + # Log and clean out the gdb so it's not merged in later + try: + f = open(log_file, 'a+') + f.write(str(diss_extent_filename) + " - dissolve error: " + str(e)) + f.close() + except: + pass + + +if __name__ == '__main__': + + # Parse arguments + parser = argparse.ArgumentParser(description='Categorical inundation mapping for FOSS FIM.') + parser.add_argument('-r','--fim-run-dir',help='Name of directory containing outputs of fim_run.sh',required=True) + parser.add_argument('-s', '--source-flow-dir',help='Path to directory containing flow CSVs to use to generate categorical FIM.',required=True, default="") + parser.add_argument('-o', '--output-cat-fim-dir',help='Path to directory where categorical FIM outputs will be written.',required=True, default="") + parser.add_argument('-j','--number-of-jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) + parser.add_argument('-depthtif','--write-depth-tiff',help='Using this option will write depth TIFFs.',required=False, action='store_true') + + args = vars(parser.parse_args()) + + fim_run_dir = args['fim_run_dir'] + source_flow_dir = args['source_flow_dir'] + output_cat_fim_dir = args['output_cat_fim_dir'] + number_of_jobs = int(args['number_of_jobs']) + depthtif = args['write_depth_tiff'] + + + # Create output directory + if not os.path.exists(output_cat_fim_dir): + os.mkdir(output_cat_fim_dir) + + # Create log directory + log_dir = os.path.join(output_cat_fim_dir, 'logs') + if not os.path.exists(log_dir): + os.mkdir(log_dir) + + # Create error log path + log_file = os.path.join(log_dir, 'errors.log') + + # Map path to points with attributes + nws_lid_attributes_filename = os.path.join(source_flow_dir, 'nws_lid_attributes.csv') + + print("Generating Categorical FIM") + generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, number_of_jobs, depthtif,log_file) + + print("Aggregating Categorical FIM") + post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir,nws_lid_attributes_filename,log_file) diff --git a/tools/inundation.py b/tools/inundation.py index e7c600510..d093385b8 100755 --- a/tools/inundation.py +++ b/tools/inundation.py @@ -17,6 +17,7 @@ from warnings import warn from gdal import BuildVRT import geopandas as gpd +import sys def inundate( @@ -454,14 +455,14 @@ def __subset_hydroTable_to_forecast(hydroTable,forecast,subset_hucs=None): 'HydroID':str,'stage':float, 'discharge_cms':float,'LakeID' : int} ) - + huc_error = hydroTable.HUC.unique() hydroTable.set_index(['HUC','feature_id','HydroID'],inplace=True) hydroTable = hydroTable[hydroTable["LakeID"] == -999] # Subset hydroTable to include only non-lake catchments. if hydroTable.empty: - print ("All stream segments in HUC are within lake boundaries.") - return + print(f"All stream segments in HUC(s): {huc_error} are within lake boundaries.") + sys.exit(0) elif isinstance(hydroTable,pd.DataFrame): pass #consider checking for correct dtypes, indices, and columns @@ -504,7 +505,11 @@ def __subset_hydroTable_to_forecast(hydroTable,forecast,subset_hucs=None): hydroTable = hydroTable[np.in1d(hydroTable.index.get_level_values('HUC'), subset_hucs)] # join tables - hydroTable = hydroTable.join(forecast,on=['feature_id'],how='inner') + try: + hydroTable = hydroTable.join(forecast,on=['feature_id'],how='inner') + except AttributeError: + print (f"No matching feature IDs between forecast and hydrotable for HUC(s): {subset_hucs}") + sys.exit(0) # initialize dictionary catchmentStagesDict = typed.Dict.empty(types.int32,types.float64) diff --git a/tools/tools_shared_functions.py b/tools/tools_shared_functions.py index aed515f5c..13534f87b 100755 --- a/tools/tools_shared_functions.py +++ b/tools/tools_shared_functions.py @@ -728,15 +728,16 @@ def get_metadata(metadata_url, select_by, selector, must_include = None, upstrea metadata_list = metadata_json['locations'] #Add timestamp of WRDS retrieval timestamp = response.headers['Date'] + #Add timestamp of sources retrieval + nrldb_timestamp, nwis_timestamp = metadata_json['data_sources']['metadata_sources'] #get crosswalk info (always last dictionary in list) - *metadata_list, crosswalk_info = metadata_list - #Update each dictionary with timestamp and crosswalk info + crosswalk_info = metadata_json['data_sources'] + #Update each dictionary with timestamp and crosswalk info also save to DataFrame. for metadata in metadata_list: metadata.update({"wrds_timestamp": timestamp}) + metadata.update({"nrldb_timestamp":nrldb_timestamp}) + metadata.update({"nwis_timestamp":nwis_timestamp}) metadata.update(crosswalk_info) - #If count is 1 - if location_count == 1: - metadata_list = metadata_json['locations'][0] metadata_dataframe = pd.json_normalize(metadata_list) #Replace all periods with underscores in column names metadata_dataframe.columns = metadata_dataframe.columns.str.replace('.','_') @@ -754,8 +755,8 @@ def get_metadata(metadata_url, select_by, selector, must_include = None, upstrea def aggregate_wbd_hucs(metadata_list, wbd_huc8_path, retain_attributes = False): ''' Assigns the proper FIM HUC 08 code to each site in the input DataFrame. - Converts input DataFrame to a GeoDataFrame using the lat/lon attributes - with sites containing null lat/lon removed. Reprojects GeoDataFrame + Converts input DataFrame to a GeoDataFrame using lat/lon attributes + with sites containing null nws_lid/lat/lon removed. Reprojects GeoDataFrame to same CRS as the HUC 08 layer. Performs a spatial join to assign the HUC 08 layer to the GeoDataFrame. Sites that are not assigned a HUC code removed as well as sites in Alaska and Canada. @@ -782,8 +783,8 @@ def aggregate_wbd_hucs(metadata_list, wbd_huc8_path, retain_attributes = False): #Import huc8 layer as geodataframe and retain necessary columns huc8 = gpd.read_file(wbd_huc8_path, layer = 'WBDHU8') huc8 = huc8[['HUC8','name','states', 'geometry']] - #Define EPSG codes for possible usgs latlon datum names (NAD83WGS84 assigned NAD83) - crs_lookup ={'NAD27':'EPSG:4267', 'NAD83':'EPSG:4269', 'NAD83WGS84': 'EPSG:4269'} + #Define EPSG codes for possible latlon datum names (default of NAD83 if unassigned) + crs_lookup ={'NAD27':'EPSG:4267', 'NAD83':'EPSG:4269', 'WGS84': 'EPSG:4326'} #Create empty geodataframe and define CRS for potential horizontal datums metadata_gdf = gpd.GeoDataFrame() #Iterate through each site @@ -793,14 +794,20 @@ def aggregate_wbd_hucs(metadata_list, wbd_huc8_path, retain_attributes = False): #Columns have periods due to nested dictionaries df.columns = df.columns.str.replace('.', '_') #Drop any metadata sites that don't have lat/lon populated - df.dropna(subset = ['identifiers_nws_lid','usgs_data_latitude','usgs_data_longitude'], inplace = True) + df.dropna(subset = ['identifiers_nws_lid','usgs_preferred_latitude', 'usgs_preferred_longitude'], inplace = True) #If dataframe still has data if not df.empty: - #Get horizontal datum (use usgs) and assign appropriate EPSG code - h_datum = df.usgs_data_latlon_datum_name.item() - src_crs = crs_lookup[h_datum] - #Convert dataframe to geodataframe using lat/lon (USGS) - site_gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.usgs_data_longitude, df.usgs_data_latitude), crs = src_crs) + #Get horizontal datum + h_datum = df['usgs_preferred_latlon_datum_name'].item() + #Look up EPSG code, if not returned Assume NAD83 as default. + dict_crs = crs_lookup.get(h_datum,'EPSG:4269_ Assumed') + #We want to know what sites were assumed, hence the split. + src_crs, *message = dict_crs.split('_') + #Convert dataframe to geodataframe using lat/lon (USGS). Add attribute of assigned crs (label ones that are assumed) + site_gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['usgs_preferred_longitude'], df['usgs_preferred_latitude']), crs = src_crs) + #Field to indicate if a latlon datum was assumed + site_gdf['assigned_crs'] = src_crs + ''.join(message) + #Reproject to huc 8 crs site_gdf = site_gdf.to_crs(huc8.crs) #Append site geodataframe to metadata geodataframe @@ -808,7 +815,7 @@ def aggregate_wbd_hucs(metadata_list, wbd_huc8_path, retain_attributes = False): #Trim metadata to only have certain fields. if not retain_attributes: - metadata_gdf = metadata_gdf[['identifiers_nwm_feature_id', 'identifiers_nws_lid', 'geometry']] + metadata_gdf = metadata_gdf[['identifiers_nwm_feature_id', 'identifiers_nws_lid', 'identifiers_usgs_site_code', 'geometry']] #If a list of attributes is supplied then use that list. elif isinstance(retain_attributes,list): metadata_gdf = metadata_gdf[retain_attributes] @@ -937,7 +944,7 @@ def get_nwm_segs(metadata): ####################################################################### #Thresholds ####################################################################### -def get_thresholds(threshold_url, location_ids, physical_element = 'all', threshold = 'all', bypass_source_flag = False): +def get_thresholds(threshold_url, select_by, selector, threshold = 'all'): ''' Get nws_lid threshold stages and flows (i.e. bankfull, action, minor, moderate, major). Returns a dictionary for stages and one for flows. @@ -946,17 +953,12 @@ def get_thresholds(threshold_url, location_ids, physical_element = 'all', thresh ---------- threshold_url : STR WRDS threshold API. - location_ids : STR - nws_lid code (only a single code). - physical_element : STR, optional - Physical element option. The default is 'all'. + select_by : STR + Type of site (nws_lid, usgs_site_code etc). + selector : STR + Site for selection. Must be a single site. threshold : STR, optional Threshold option. The default is 'all'. - bypass_source_flag : BOOL, optional - Special case if calculated values are not available (e.g. no rating - curve is available) then this allows for just a stage to be returned. - Used in case a flow is already known from another source, such as - a model. The default is False. Returns ------- @@ -966,13 +968,14 @@ def get_thresholds(threshold_url, location_ids, physical_element = 'all', thresh Dictionary of flows at each threshold. ''' - - url = f'{threshold_url}/{physical_element}/{threshold}/{location_ids}' - response = requests.get(url) + params = {} + params['threshold'] = threshold + url = f'{threshold_url}/{select_by}/{selector}' + response = requests.get(url, params = params) if response.ok: thresholds_json = response.json() #Get metadata - thresholds_info = thresholds_json['stream_thresholds'] + thresholds_info = thresholds_json['value_set'] #Initialize stages/flows dictionaries stages = {} flows = {} @@ -985,24 +988,27 @@ def get_thresholds(threshold_url, location_ids, physical_element = 'all', thresh threshold_data = thresholds_info[rating_sources['USGS Rating Depot']] elif 'NRLDB' in rating_sources: threshold_data = thresholds_info[rating_sources['NRLDB']] - #If neither USGS or NRLDB is available + #If neither USGS or NRLDB is available use first dictionary to get stage values. else: - #A flag option for cases where only a stage is needed for USGS scenario where a rating curve source is not available yet stages are available for the site. If flag is enabled, then stages are retrieved from the first record in thresholds_info. Typically the flows will not be populated as no rating curve is available. Flag should only be enabled when flows are already supplied by source (e.g. USGS) and threshold stages are needed. - if bypass_source_flag: - threshold_data = thresholds_info[0] - else: - threshold_data = [] + threshold_data = thresholds_info[0] #Get stages and flows for each threshold if threshold_data: stages = threshold_data['stage_values'] flows = threshold_data['calc_flow_values'] #Add source information to stages and flows. Flows source inside a nested dictionary. Remove key once source assigned to flows. - stages['source'] = threshold_data['metadata']['threshold_source'] - flows['source'] = flows['rating_curve']['source'] + stages['source'] = threshold_data.get('metadata').get('threshold_source') + flows['source'] = flows.get('rating_curve', {}).get('source') flows.pop('rating_curve', None) #Add timestamp WRDS data was retrieved. stages['wrds_timestamp'] = response.headers['Date'] flows['wrds_timestamp'] = response.headers['Date'] + #Add Site information + stages['nws_lid'] = threshold_data.get('metadata').get('nws_lid') + flows['nws_lid'] = threshold_data.get('metadata').get('nws_lid') + stages['usgs_site_code'] = threshold_data.get('metadata').get('usgs_site_code') + flows['usgs_site_code'] = threshold_data.get('metadata').get('usgs_site_code') + stages['units'] = threshold_data.get('metadata').get('stage_units') + flows['units'] = threshold_data.get('metadata').get('calc_flow_units') return stages, flows ######################################################################## From 2b260655cd018d6ffb44f2868f55efdd1d98585d Mon Sep 17 00:00:00 2001 From: Brad Date: Thu, 25 Mar 2021 09:08:06 -0500 Subject: [PATCH 056/359] Update CHANGELOG.md --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 164ce1a44..c32f8a555 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,9 +18,10 @@ We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. ## v3.0.10.1 - 2021-03-24 - [PR #320](https://github.com/NOAA-OWP/cahaba/pull/320) Patch to synthesize_test_cases.py. - + ### Changes - Bug fix to `synthesize_test_cases.py` to allow comparison between `testing` version and `official` versions. +

## v3.0.10.0 - 2021-03-12 - [PR #298](https://github.com/NOAA-OWP/cahaba/pull/298) @@ -35,6 +36,7 @@ We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. - Add flood of record category. ### Changes - Stability fixes to `generate_categorical_fim.py`. +

## v3.0.9.0 - 2021-03-12 - [PR #297](https://github.com/NOAA-OWP/cahaba/pull/297) From a60e6eee97391416a3c8f44242f181cdf02abac1 Mon Sep 17 00:00:00 2001 From: TrevorGrout-NOAA <69653333+TrevorGrout-NOAA@users.noreply.github.com> Date: Tue, 30 Mar 2021 16:17:02 -0500 Subject: [PATCH 057/359] Adds detail and more information to plotting capabilities Add detail/information to plotting capabilities. - Merge plot_functions.py into eval_plots.py and move eval_plots.py into the tools directory. - Remove plots subdirectory. - Optional argument to create barplots of CSI for each individual site. - Create a csv containing the data used to create the scatterplots. This resolves #325. --- CHANGELOG.md | 11 + tools/{plots => }/eval_plots.py | 354 +++++++++++++++++++++++++++++++- tools/plots/__init__.py | 0 tools/plots/plot_functions.py | 321 ----------------------------- 4 files changed, 359 insertions(+), 327 deletions(-) rename tools/{plots => }/eval_plots.py (54%) mode change 100755 => 100644 delete mode 100755 tools/plots/__init__.py delete mode 100755 tools/plots/plot_functions.py diff --git a/CHANGELOG.md b/CHANGELOG.md index c32f8a555..5f11e2d3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,17 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.12.0 - 2021-03-26 - [PR #327](https://github.com/NOAA-OWP/cahaba/pull/237) + + Add more detail/information to plotting capabilities. + ### Changes + - Merge `plot_functions.py` into `eval_plots.py` and move `eval_plots.py` into the tools directory. + - Remove `plots` subdirectory. +### Additions + - Optional argument to create barplots of CSI for each individual site. + - Create a csv containing the data used to create the scatterplots. +

+ ## v3.0.11.0 - 2021-03-22 - [PR #319](https://github.com/NOAA-OWP/cahaba/pull/298) Improvements to CatFIM service source data generation. diff --git a/tools/plots/eval_plots.py b/tools/eval_plots.py old mode 100755 new mode 100644 similarity index 54% rename from tools/plots/eval_plots.py rename to tools/eval_plots.py index f18390f5e..b22af66ec --- a/tools/plots/eval_plots.py +++ b/tools/eval_plots.py @@ -1,14 +1,332 @@ #!/usr/bin/env python3 - import pandas as pd from pathlib import Path import argparse from natsort import natsorted import geopandas as gpd -from plot_functions import filter_dataframe, boxplot, scatterplot, barplot +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +import re +######################################################################### +#Create boxplot +######################################################################### +def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, dest_file = False): + ''' + Create boxplots. + + Parameters + ---------- + dataframe : DataFrame + Pandas dataframe data to be plotted. + x_field : STR + Field to use for x-axis + x_order : List + Order to arrange the x-axis. + y_field : STR + Field to use for the y-axis + hue_field : STR + Field to use for hue (typically FIM version) + title_text : STR + Text for plot title. + fim_configuration: STR + Configuration of FIM (FR or MS or Composite). + simplify_legend : BOOL, optional + If True, it will simplify legend to FIM 1, FIM 2, FIM 3. + The default is False. + dest_file : STR or BOOL, optional + If STR provide the full path to the figure to be saved. If False + no plot is saved to disk. The default is False. + + Returns + ------- + fig : MATPLOTLIB + Plot. + + ''' + + #initialize plot + fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) + #Use seaborn to plot the boxplot + axes=sns.boxplot(x=x_field, y=y_field, order=x_order, hue=hue_field, hue_order = ordered_hue, data=dataframe, palette='bright') + #set title of plot + axes.set_title(f'{title_text} ({y_field})',fontsize=20, weight = 'bold') + #Set yticks and background horizontal line. + axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1)) + for index,ytick in enumerate(axes.get_yticks()): + plt.axhline(y=ytick,color='black',linestyle = '--',linewidth = 1,alpha = 0.1) + #Define y axis label and x axis label. + axes.set_ylabel(f'{y_field}',fontsize='xx-large',weight = 'bold') + axes.set_xlabel('',fontsize=0,weight = 'bold') + #Set sizes of ticks and legend. + axes.tick_params(labelsize = 'xx-large') + axes.legend(markerscale = 2, fontsize =20, loc = 'lower left') + + #If simple legend desired + if simplify_legend: + #trim labels to FIM 1, FIM 2, and the FIM 3 version + handles, org_labels = axes.get_legend_handles_labels() + label_dict = {} + for label in org_labels: + if 'fim_1' in label: + label_dict[label] = 'FIM 1' + elif 'fim_2' in label: + label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower() + elif 'fim_3' in label: + label_dict[label] = re.split('_fr|_ms', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower() + if label.endswith('_c'): + label_dict[label] = label_dict[label] + ' c' + else: + label_dict[label] = label + ' ' + fim_configuration.lower() + #Define simplified labels as a list. + new_labels = [label_dict[label] for label in org_labels] + #Define legend location. FAR needs to be in different location than CSI/POD. + if y_field == 'FAR': + legend_location = 'upper right' + else: + legend_location = 'lower left' + #rename legend labels to the simplified labels. + axes.legend(handles, new_labels, markerscale = 2, fontsize = 20, loc = legend_location, ncol = int(np.ceil(len(new_labels)/7))) + #Print textbox if supplied + if textbox_str: + box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) + axes.text(0.01, 0.99, textbox_str, transform=axes.transAxes, fontsize=14, verticalalignment='top', bbox=box_props) + + #If figure to be saved to disk, then do so, otherwise return figure + if dest_file: + fig.savefig(dest_file) + plt.close(fig) + else: + return fig +######################################################################### +#Create scatter plot +######################################################################### +def scatterplot(dataframe, x_field, y_field, title_text, stats_text=False, annotate = False, dest_file = False): + ''' + Create boxplots. -def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'] , alternate_ahps_query = False, spatial_ahps = False, fim_1_ms = False): + Parameters + ---------- + dataframe : DataFrame + Pandas dataframe data to be plotted. + x_field : STR + Field to use for x-axis (Assumes FIM 2) + y_field : STR + Field to use for the y-axis (Assumes FIM 3) + title_text : STR + Text for plot title. + stats_text : STR or BOOL + Text for stats to place on chart. Default is false (no stats printed) + dest_file : STR or BOOL, optional + If STR provide the full path to the figure to be saved. If False + no plot is saved to disk. The default is False. + + Returnsy + ------- + fig : MATPLOTLIB + Plot. + + ''' + + #initialize plot + fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) + + #Use seaborn to plot the boxplot + axes=sns.scatterplot(data=dataframe, x=x_field, y=y_field, color = 'black', s = 150) + + #Set xticks and yticks and background horizontal line. + axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1)) + axes.set(xlim=(0.0,1.0),xticks = np.arange(0,1.1,0.1)) + axes.grid(b=True, which='major', axis='both') + + #Set sizes of ticks and legend. + axes.tick_params(labelsize = 'xx-large') + + #Define y axis label and x axis label. + axes.set_ylabel(f'{y_field.replace("_"," ")}',fontsize='xx-large',weight = 'bold') + axes.set_xlabel(f'{x_field.replace("_"," ")}',fontsize='xx-large',weight = 'bold') + + #Plot diagonal line + diag_range = [0,1] + axes.plot(diag_range, diag_range, color='gray', transform=axes.transAxes) + + + #set title of plot + axes.set_title(f'{title_text}',fontsize=20, weight = 'bold') + + if annotate: + #Set text for labels + box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) + textbox_str = 'Target Better' + axes.text(0.3, 0.6, textbox_str, transform=axes.transAxes, fontsize=32, color = 'gray', fontweight = 'bold', verticalalignment='top', bbox=box_props, rotation = 35, rotation_mode = 'anchor') + textbox_str = 'Baseline Better' + axes.text(0.5, 0.2, textbox_str, transform=axes.transAxes, fontsize=32, color = 'gray', fontweight = 'bold', verticalalignment='top', bbox=box_props, rotation = 35, rotation_mode = 'anchor') + + if stats_text: + #Add statistics textbox + axes.text(0.01, 0.80, stats_text, transform=axes.transAxes, fontsize=24, verticalalignment='top', bbox=box_props) + + #If figure to be saved to disk, then do so, otherwise return fig + if dest_file: + fig.savefig(dest_file) + plt.close(fig) + else: + return fig +######################################################################### +#Create barplot +######################################################################### +def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, display_values = False, dest_file = False): + ''' + Create barplots. + + Parameters + ---------- + dataframe : DataFrame + Pandas dataframe data to be plotted. + x_field : STR + Field to use for x-axis + x_order : List + Order to arrange the x-axis. + y_field : STR + Field to use for the y-axis + hue_field : STR + Field to use for hue (typically FIM version) + title_text : STR + Text for plot title. + fim_configuration: STR + Configuration of FIM (FR or MS or Composite). + simplify_legend : BOOL, optional + If True, it will simplify legend to FIM 1, FIM 2, FIM 3. + Default is False. + display_values : BOOL, optional + If True, Y values will be displayed above bars. + Default is False. + dest_file : STR or BOOL, optional + If STR provide the full path to the figure to be saved. If False + no plot is saved to disk. Default is False. + + Returns + ------- + fig : MATPLOTLIB + Plot. + + ''' + + #initialize plot + fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) + #Use seaborn to plot the boxplot + axes=sns.barplot(x=x_field, y=y_field, order=x_order, hue=hue_field, hue_order = ordered_hue, data=dataframe, palette='bright') + #set title of plot + axes.set_title(f'{title_text}',fontsize=20, weight = 'bold') + #Set yticks and background horizontal line. + axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1)) + for index,ytick in enumerate(axes.get_yticks()): + plt.axhline(y=ytick,color='black',linestyle = '--',linewidth = 1,alpha = 0.1) + #Define y axis label and x axis label. + axes.set_ylabel(f'{y_field.upper()}',fontsize='xx-large',weight = 'bold') + axes.set_xlabel('',fontsize=0,weight = 'bold') + #Set sizes of ticks and legend. + axes.tick_params(labelsize = 'xx-large') + axes.legend(markerscale = 2, fontsize =20, loc = 'upper right') + #If simple legend desired + if simplify_legend: + #trim labels to FIM 1, FIM 2, FIM 3 + handles, org_labels = axes.get_legend_handles_labels() + label_dict = {} + for label in org_labels: + if 'fim_1' in label: + label_dict[label] = 'FIM 1' + elif 'fim_2' in label: + label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower() + elif 'fim_3' in label: + label_dict[label] = re.split('_fr|_ms', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower() + if label.endswith('_c'): + label_dict[label] = label_dict[label] + ' c' + else: + label_dict[label] = label + ' ' + fim_configuration.lower() + #Define simplified labels as a list. + new_labels = [label_dict[label] for label in org_labels] + #rename legend labels to the simplified labels. + axes.legend(handles, new_labels, markerscale = 2, fontsize = 20, loc = 'upper right', ncol = int(np.ceil(len(new_labels)/7))) + #Add Textbox + if textbox_str: + box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) + axes.text(0.01, 0.99, textbox_str, transform=axes.transAxes, fontsize=18, verticalalignment='top', bbox=box_props) + + #Display Y values above bars + if display_values: + #Add values of bars directly above bar. + for patch in axes.patches: + value = round(patch.get_height(),3) + axes.text(patch.get_x()+patch.get_width()/2., + patch.get_height(), + '{:1.3f}'.format(value), + ha="center", fontsize=18) + + #If figure to be saved to disk, then do so, otherwise return fig + if dest_file: + fig.savefig(dest_file) + plt.close(fig) + else: + return fig +####################################################################### +#Filter dataframe generated from csv file from run_test_case aggregation +######################################################################## +def filter_dataframe(dataframe, unique_field): + ''' + + This script will filter out the sites (or hucs) which are not consistently + found for all versions for a given magnitude. For example, an AHPS + lid site must have output for all 3 versions (fim1, fim2, fim3) for + a given magnitude (eg action) otherwise that lid is filtered out. + Likewise for a BLE a huc must have output for all 3 versions + (fim1, fim2, fim3) for a given magnitude (eg 100yr) otherwise it is + filtered out. + + Parameters + ---------- + dataframe : Pandas DataFrame + Containing the input metrics originating from synthesize_test_cases + unique_field : STR + base resolution for each benchmark source: 'nws'/'usgs' (nws_lid) + ble (huc). + + Returns + ------- + final_filtered_dataframe : Pandas Dataframe + Filtered dataframe that contains only common sites (lids or hucs) between versions for each magnitude. For example, for AHPS all sites which were run for each version for a given magnitude will be kept or for ble, all hucs which ran for all versions for a given magnitude. + unique_sites: DICT + The sites that were included in the dataframe for each magnitude. + + ''' + + #Get lists of sites for each magnitude/version + unique_sites = dataframe.groupby(['magnitude','version'])[unique_field].agg('unique') + #Get unique magnitudes + magnitudes = dataframe.magnitude.unique() + #Create new dataframe to hold metrics for the common sites as well as the actual lists of common sites. + final_filtered_dataframe = pd.DataFrame() + all_unique_sites = {} + #Cycle through each magnitude + for magnitude in magnitudes: + #Compile a list of sets containing unique lids pertaining to each threshold. List contains 3 unique sets [{fim1:unique lids},{fim2: unique lids},{fim3: unique lids}] + sites_per_magnitude=[set(a) for a in unique_sites[magnitude]] + #Intersect the sets to get the common lids per threshold then convert to list. + common_sites_per_magnitude = list(set.intersection(*sites_per_magnitude)) + #Write common sites to dataframe + all_unique_sites[magnitude] = common_sites_per_magnitude + #Query filtered dataframe and only include data associated with the common sites for that magnitude + filtered_common_sites = dataframe.query(f'magnitude == "{magnitude}" & {unique_field} in @common_sites_per_magnitude') + #Append the data for each magnitude to a final dataframe that will contain data for all common sites for all magnitudes. + final_filtered_dataframe = final_filtered_dataframe.append(filtered_common_sites, ignore_index = True) + + return final_filtered_dataframe, all_unique_sites +############################################################################## +############################################################################## +#Main function to analyze metric csv. +############################################################################## +def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'] , alternate_ahps_query = False, spatial_ahps = False, fim_1_ms = False, site_barplots = False): ''' Creates plots and summary statistics using metrics compiled from @@ -45,6 +363,13 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' csi_scatter__*.png: scatter plot comparing two versions for a given magnitude. This is only generated if there are exactly two versions analyzed. + csi_scatter___data.csv: data used to create the + csi_scatter_plot + Optional: 'individual' directory with subfolders for each site in analysis. In these + site subdirectories are the following files: + csi___.png: A barplot + of CSI for each version for all magnitudes for the site. + Parameters ---------- @@ -86,6 +411,10 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' Default is false. If True then fim_1 rows are duplicated with extent_config set to MS. This allows for FIM 1 to be included in MS plots/stats (helpful for nws/usgs ahps comparisons). + site_barplots: BOOL + Default is false. If True then barplots for each individual site are + created. An 'individual' directory with subdirectories of each site + are created and the plot is located in each site subdirectory. Returns ------- @@ -222,6 +551,15 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' aggregate_file = output_workspace / (f'csi_aggr_{dataset_name}_{configuration.lower()}.png') barplot(dataframe = dataset_sums, x_field = 'magnitude', x_order = magnitude_order, y_field = 'csi', hue_field = 'version', ordered_hue = version_order, title_text = f'Aggregate {dataset_name.upper()} FIM Scores', fim_configuration = configuration, textbox_str = textbox, simplify_legend = True, dest_file = aggregate_file) + #If enabled, write out barplots of CSI for individual sites. + if site_barplots: + subset = dataset.groupby(base_resolution) + for site_name, site_data in subset: + individual_dirs = output_workspace / 'individual' / str(site_name) + individual_dirs.mkdir(parents = True, exist_ok = True) + site_file = individual_dirs / f'csi_{str(site_name)}_{dataset_name}_{configuration.lower()}.png' + barplot(dataframe = site_data, x_field = 'magnitude', x_order = magnitude_order, y_field = 'CSI', hue_field = 'version', ordered_hue = version_order, title_text = f'{str(site_name).upper()} FIM Scores', fim_configuration = configuration, textbox_str = False, simplify_legend = True, dest_file = site_file) + # Create box plots for each metric in supplied stats for stat in stats: output_file = output_workspace / (f'{stat.lower()}_{dataset_name}_{configuration.lower()}.png') @@ -239,7 +577,9 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' title_text = f'CSI {magnitude}' dest_file = output_workspace / f'csi_scatter_{magnitude}_{configuration.lower()}.png' scatterplot(dataframe = plotdf, x_field = f'CSI_{x_version}', y_field = f'CSI_{y_version}', title_text = title_text, annotate = False, dest_file = dest_file) - + #Write out dataframe used to create scatter plots + plotdf['Diff (C-B)'] = plotdf[f'CSI_{y_version}'] - plotdf[f'CSI_{x_version}'] + plotdf.to_csv(output_workspace / f'csi_scatter_{magnitude}_{configuration.lower()}_data.csv', index = False) ####################################################################### #Create spatial layers with threshold and mapping information @@ -305,7 +645,8 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' parser.add_argument('-q', '--alternate_ahps_query',help = 'Alternate filter query for AHPS. Default is: "not nws_lid.isnull() & not flow.isnull() & masked_perc<97 & not nws_lid in @bad_sites" where bad_sites are (grfi2,ksdm7,hohn4,rwdn4)', default = False, required = False) parser.add_argument('-sp', '--spatial_ahps', help = 'If spatial point layer is desired, supply a csv with 3 lines of the following format: metadata, path/to/metadata/shapefile\nevaluated, path/to/evaluated/shapefile\nstatic, path/to/static/shapefile.', default = False, required = False) parser.add_argument('-f', '--fim_1_ms', help = 'If enabled fim_1 rows will be duplicated and extent config assigned "ms" so that fim_1 can be shown on mainstems plots/stats', action = 'store_true', required = False) - + parser.add_argument('-i', '--site_plots', help = 'If enabled individual barplots for each site are created.', action = 'store_true', required = False) + # Extract to dictionary and assign to variables args = vars(parser.parse_args()) @@ -337,7 +678,8 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' q = args['alternate_ahps_query'] sp= args['spatial_ahps'] f = args['fim_1_ms'] + i = args['site_plots'] # Run eval_plots function if not error: - eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, alternate_ahps_query = q, spatial_ahps = sp, fim_1_ms = f) + eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, alternate_ahps_query = q, spatial_ahps = sp, fim_1_ms = f, site_barplots = i) diff --git a/tools/plots/__init__.py b/tools/plots/__init__.py deleted file mode 100755 index e69de29bb..000000000 diff --git a/tools/plots/plot_functions.py b/tools/plots/plot_functions.py deleted file mode 100755 index 60342059e..000000000 --- a/tools/plots/plot_functions.py +++ /dev/null @@ -1,321 +0,0 @@ -#!/usr/bin/env python3 -import pandas as pd -import numpy as np -import matplotlib.pyplot as plt -import seaborn as sns -import re -######################################################################### -#Create boxplot -######################################################################### -def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, dest_file = False): - ''' - Create boxplots. - - Parameters - ---------- - dataframe : DataFrame - Pandas dataframe data to be plotted. - x_field : STR - Field to use for x-axis - x_order : List - Order to arrange the x-axis. - y_field : STR - Field to use for the y-axis - hue_field : STR - Field to use for hue (typically FIM version) - title_text : STR - Text for plot title. - fim_configuration: STR - Configuration of FIM (FR or MS or Composite). - simplify_legend : BOOL, optional - If True, it will simplify legend to FIM 1, FIM 2, FIM 3. - The default is False. - dest_file : STR or BOOL, optional - If STR provide the full path to the figure to be saved. If False - no plot is saved to disk. The default is False. - - Returns - ------- - fig : MATPLOTLIB - Plot. - - ''' - - #initialize plot - fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) - #Use seaborn to plot the boxplot - axes=sns.boxplot(x=x_field, y=y_field, order=x_order, hue=hue_field, hue_order = ordered_hue, data=dataframe, palette='bright') - #set title of plot - axes.set_title(f'{title_text} ({y_field})',fontsize=20, weight = 'bold') - #Set yticks and background horizontal line. - axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1)) - for index,ytick in enumerate(axes.get_yticks()): - plt.axhline(y=ytick,color='black',linestyle = '--',linewidth = 1,alpha = 0.1) - #Define y axis label and x axis label. - axes.set_ylabel(f'{y_field}',fontsize='xx-large',weight = 'bold') - axes.set_xlabel('',fontsize=0,weight = 'bold') - #Set sizes of ticks and legend. - axes.tick_params(labelsize = 'xx-large') - axes.legend(markerscale = 2, fontsize =20, loc = 'lower left') - - #If simple legend desired - if simplify_legend: - #trim labels to FIM 1, FIM 2, and the FIM 3 version - handles, org_labels = axes.get_legend_handles_labels() - label_dict = {} - for label in org_labels: - if 'fim_1' in label: - label_dict[label] = 'FIM 1' - elif 'fim_2' in label: - label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower() - elif 'fim_3' in label: - label_dict[label] = re.split('_fr|_ms', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower() - if label.endswith('_c'): - label_dict[label] = label_dict[label] + ' c' - else: - label_dict[label] = label + ' ' + fim_configuration.lower() - #Define simplified labels as a list. - new_labels = [label_dict[label] for label in org_labels] - #Define legend location. FAR needs to be in different location than CSI/POD. - if y_field == 'FAR': - legend_location = 'upper right' - else: - legend_location = 'lower left' - #rename legend labels to the simplified labels. - axes.legend(handles, new_labels, markerscale = 2, fontsize = 20, loc = legend_location, ncol = int(np.ceil(len(new_labels)/7))) - #Print textbox if supplied - if textbox_str: - box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) - axes.text(0.01, 0.99, textbox_str, transform=axes.transAxes, fontsize=14, verticalalignment='top', bbox=box_props) - - #If figure to be saved to disk, then do so, otherwise return figure - if dest_file: - fig.savefig(dest_file) - plt.close(fig) - else: - return fig - -######################################################################### -#Create scatter plot -######################################################################### -def scatterplot(dataframe, x_field, y_field, title_text, stats_text=False, annotate = False, dest_file = False): - ''' - Create boxplots. - - Parameters - ---------- - dataframe : DataFrame - Pandas dataframe data to be plotted. - x_field : STR - Field to use for x-axis (Assumes FIM 2) - y_field : STR - Field to use for the y-axis (Assumes FIM 3) - title_text : STR - Text for plot title. - stats_text : STR or BOOL - Text for stats to place on chart. Default is false (no stats printed) - dest_file : STR or BOOL, optional - If STR provide the full path to the figure to be saved. If False - no plot is saved to disk. The default is False. - - Returnsy - ------- - fig : MATPLOTLIB - Plot. - - ''' - - #initialize plot - fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) - - #Use seaborn to plot the boxplot - axes=sns.scatterplot(data=dataframe, x=x_field, y=y_field, color = 'black', s = 150) - - #Set xticks and yticks and background horizontal line. - axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1)) - axes.set(xlim=(0.0,1.0),xticks = np.arange(0,1.1,0.1)) - axes.grid(b=True, which='major', axis='both') - - #Set sizes of ticks and legend. - axes.tick_params(labelsize = 'xx-large') - - #Define y axis label and x axis label. - axes.set_ylabel(f'{y_field.replace("_"," ")}',fontsize='xx-large',weight = 'bold') - axes.set_xlabel(f'{x_field.replace("_"," ")}',fontsize='xx-large',weight = 'bold') - - #Plot diagonal line - diag_range = [0,1] - axes.plot(diag_range, diag_range, color='gray', transform=axes.transAxes) - - - #set title of plot - axes.set_title(f'{title_text}',fontsize=20, weight = 'bold') - - if annotate: - #Set text for labels - box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) - textbox_str = 'Target Better' - axes.text(0.3, 0.6, textbox_str, transform=axes.transAxes, fontsize=32, color = 'gray', fontweight = 'bold', verticalalignment='top', bbox=box_props, rotation = 35, rotation_mode = 'anchor') - textbox_str = 'Baseline Better' - axes.text(0.5, 0.2, textbox_str, transform=axes.transAxes, fontsize=32, color = 'gray', fontweight = 'bold', verticalalignment='top', bbox=box_props, rotation = 35, rotation_mode = 'anchor') - - if stats_text: - #Add statistics textbox - axes.text(0.01, 0.80, stats_text, transform=axes.transAxes, fontsize=24, verticalalignment='top', bbox=box_props) - - #If figure to be saved to disk, then do so, otherwise return fig - if dest_file: - fig.savefig(dest_file) - plt.close(fig) - else: - return fig -######################################################################### -#Create barplot -######################################################################### -def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, display_values = False, dest_file = False): - ''' - Create barplots. - - Parameters - ---------- - dataframe : DataFrame - Pandas dataframe data to be plotted. - x_field : STR - Field to use for x-axis - x_order : List - Order to arrange the x-axis. - y_field : STR - Field to use for the y-axis - hue_field : STR - Field to use for hue (typically FIM version) - title_text : STR - Text for plot title. - fim_configuration: STR - Configuration of FIM (FR or MS or Composite). - simplify_legend : BOOL, optional - If True, it will simplify legend to FIM 1, FIM 2, FIM 3. - Default is False. - display_values : BOOL, optional - If True, Y values will be displayed above bars. - Default is False. - dest_file : STR or BOOL, optional - If STR provide the full path to the figure to be saved. If False - no plot is saved to disk. Default is False. - - Returns - ------- - fig : MATPLOTLIB - Plot. - - ''' - - #initialize plot - fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) - #Use seaborn to plot the boxplot - axes=sns.barplot(x=x_field, y=y_field, order=x_order, hue=hue_field, hue_order = ordered_hue, data=dataframe, palette='bright') - #set title of plot - axes.set_title(f'{title_text}',fontsize=20, weight = 'bold') - #Set yticks and background horizontal line. - axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1)) - for index,ytick in enumerate(axes.get_yticks()): - plt.axhline(y=ytick,color='black',linestyle = '--',linewidth = 1,alpha = 0.1) - #Define y axis label and x axis label. - axes.set_ylabel(f'{y_field.upper()}',fontsize='xx-large',weight = 'bold') - axes.set_xlabel('',fontsize=0,weight = 'bold') - #Set sizes of ticks and legend. - axes.tick_params(labelsize = 'xx-large') - axes.legend(markerscale = 2, fontsize =20, loc = 'upper right') - #If simple legend desired - if simplify_legend: - #trim labels to FIM 1, FIM 2, FIM 3 - handles, org_labels = axes.get_legend_handles_labels() - label_dict = {} - for label in org_labels: - if 'fim_1' in label: - label_dict[label] = 'FIM 1' - elif 'fim_2' in label: - label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower() - elif 'fim_3' in label: - label_dict[label] = re.split('_fr|_ms', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower() - if label.endswith('_c'): - label_dict[label] = label_dict[label] + ' c' - else: - label_dict[label] = label + ' ' + fim_configuration.lower() - #Define simplified labels as a list. - new_labels = [label_dict[label] for label in org_labels] - #rename legend labels to the simplified labels. - axes.legend(handles, new_labels, markerscale = 2, fontsize = 20, loc = 'upper right', ncol = int(np.ceil(len(new_labels)/7))) - #Add Textbox - if textbox_str: - box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) - axes.text(0.01, 0.99, textbox_str, transform=axes.transAxes, fontsize=18, verticalalignment='top', bbox=box_props) - - #Display Y values above bars - if display_values: - #Add values of bars directly above bar. - for patch in axes.patches: - value = round(patch.get_height(),3) - axes.text(patch.get_x()+patch.get_width()/2., - patch.get_height(), - '{:1.3f}'.format(value), - ha="center", fontsize=18) - - #If figure to be saved to disk, then do so, otherwise return fig - if dest_file: - fig.savefig(dest_file) - plt.close(fig) - else: - return fig -####################################################################### -#Filter dataframe generated from csv file from run_test_case aggregation -######################################################################## -def filter_dataframe(dataframe, unique_field): - ''' - - This script will filter out the sites (or hucs) which are not consistently - found for all versions for a given magnitude. For example, an AHPS - lid site must have output for all 3 versions (fim1, fim2, fim3) for - a given magnitude (eg action) otherwise that lid is filtered out. - Likewise for a BLE a huc must have output for all 3 versions - (fim1, fim2, fim3) for a given magnitude (eg 100yr) otherwise it is - filtered out. - - Parameters - ---------- - dataframe : Pandas DataFrame - Containing the input metrics originating from synthesize_test_cases - unique_field : STR - base resolution for each benchmark source: 'nws'/'usgs' (nws_lid) - ble (huc). - - Returns - ------- - final_filtered_dataframe : Pandas Dataframe - Filtered dataframe that contains only common sites (lids or hucs) between versions for each magnitude. For example, for AHPS all sites which were run for each version for a given magnitude will be kept or for ble, all hucs which ran for all versions for a given magnitude. - unique_sites: DICT - The sites that were included in the dataframe for each magnitude. - - ''' - - #Get lists of sites for each magnitude/version - unique_sites = dataframe.groupby(['magnitude','version'])[unique_field].agg('unique') - #Get unique magnitudes - magnitudes = dataframe.magnitude.unique() - #Create new dataframe to hold metrics for the common sites as well as the actual lists of common sites. - final_filtered_dataframe = pd.DataFrame() - all_unique_sites = {} - #Cycle through each magnitude - for magnitude in magnitudes: - #Compile a list of sets containing unique lids pertaining to each threshold. List contains 3 unique sets [{fim1:unique lids},{fim2: unique lids},{fim3: unique lids}] - sites_per_magnitude=[set(a) for a in unique_sites[magnitude]] - #Intersect the sets to get the common lids per threshold then convert to list. - common_sites_per_magnitude = list(set.intersection(*sites_per_magnitude)) - #Write common sites to dataframe - all_unique_sites[magnitude] = common_sites_per_magnitude - #Query filtered dataframe and only include data associated with the common sites for that magnitude - filtered_common_sites = dataframe.query(f'magnitude == "{magnitude}" & {unique_field} in @common_sites_per_magnitude') - #Append the data for each magnitude to a final dataframe that will contain data for all common sites for all magnitudes. - final_filtered_dataframe = final_filtered_dataframe.append(filtered_common_sites, ignore_index = True) - - return final_filtered_dataframe, all_unique_sites - From 391b547ab9bec87eb26671a73190474c9c6c0a5d Mon Sep 17 00:00:00 2001 From: Brad Date: Wed, 31 Mar 2021 10:00:34 -0500 Subject: [PATCH 058/359] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9f44d94ad..c067ea29d 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ The following input data sources should be downloaded and preprocessed prior to - `acquire_and_preprocess_inputs.py` - `aggregate_nhd_hr_streams.py` -**Please note:** For the following two datasets, please contact Brad Bates (bradford.bates@noaa.gov). We are currently working on a long-term data sharing solution for the in-house NOAA data. +**Please note:** For the following two datasets, please contact Mark Glaudemans (mark.glaudemans@noaa.gov). We are currently working on a long-term data sharing solution for the in-house NOAA data. ### NWM Hydrofabric - `nwm_flows.gpkg` @@ -84,7 +84,7 @@ fim_run.sh -u -c /foss_fim/config/ -n Date: Thu, 1 Apr 2021 12:24:05 -0500 Subject: [PATCH 059/359] Update spatial option when performing eval plots Removes file dependencies from spatial option. Does require the WBD layer which should be specified in .env file. - Produces outputs in a format consistent with requirements needed for publishing. - Preserves leading zeros in huc information for all outputs from eval_plots.py. - Creates fim_performance_points.shp: this layer consists of all evaluated ahps points (with metrics). Spatial data retrieved from WRDS on the fly. - Creates fim_performance_polys.shp: this layer consists of all evaluated huc8s (with metrics). Spatial data retrieved from WBD layer. This resolves #325. --- CHANGELOG.md | 15 ++++ tools/eval_plots.py | 188 ++++++++++++++++++++++++-------------------- 2 files changed, 118 insertions(+), 85 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f11e2d3d..843e37282 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,21 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. + +## v3.0.12.1 - 2021-03-26 - [PR #336](https://github.com/NOAA-OWP/cahaba/pull/336) + + Fix spatial option in `eval_plots.py` when creating plots and spatial outputs. + + ### Changes + - Removes file dependencies from spatial option. Does require the WBD layer which should be specified in `.env` file. + - Produces outputs in a format consistent with requirements needed for publishing. + - Preserves leading zeros in huc information for all outputs from `eval_plots.py`. + +### Additions +- Creates `fim_performance_points.shp`: this layer consists of all evaluated ahps points (with metrics). Spatial data retrieved from WRDS on the fly. +- Creates `fim_performance_polys.shp`: this layer consists of all evaluated huc8s (with metrics). Spatial data retrieved from WBD layer. +

+ ## v3.0.12.0 - 2021-03-26 - [PR #327](https://github.com/NOAA-OWP/cahaba/pull/237) Add more detail/information to plotting capabilities. diff --git a/tools/eval_plots.py b/tools/eval_plots.py index b22af66ec..0af2ae9ad 100644 --- a/tools/eval_plots.py +++ b/tools/eval_plots.py @@ -8,6 +8,18 @@ import matplotlib.pyplot as plt import seaborn as sns import re +import os +import sys +sys.path.append('/foss_fim/src') +from utils.shared_variables import VIZ_PROJECTION +from dotenv import load_dotenv +from tools_shared_functions import aggregate_wbd_hucs, get_metadata + +#Get variables from .env file. +load_dotenv() +WBD_LAYER = os.getenv("WBD_LAYER") +API_BASE_URL = os.getenv("API_BASE_URL") + ######################################################################### #Create boxplot ######################################################################### @@ -326,7 +338,7 @@ def filter_dataframe(dataframe, unique_field): ############################################################################## #Main function to analyze metric csv. ############################################################################## -def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'] , alternate_ahps_query = False, spatial_ahps = False, fim_1_ms = False, site_barplots = False): +def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR'] , alternate_ahps_query = False, spatial = False, fim_1_ms = False, site_barplots = False): ''' Creates plots and summary statistics using metrics compiled from @@ -369,6 +381,12 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' site subdirectories are the following files: csi___.png: A barplot of CSI for each version for all magnitudes for the site. + Optional (if spatial argument supplied): + fim_performance_points.shp -- A shapefile of ahps points with + metrics contained in attribute table. + fim_performance_polys.shp -- A shapefile of huc8 polygons with + metrics contained in attribute table. + Parameters @@ -397,16 +415,15 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' The default is false. Currently the default ahps query is same as done for apg goals. If a different query is desired it can be supplied and it will supercede the default query. - spatial_ahps : DICTIONARY, optional - The default is false. A dictionary with keys as follows: - 'static': Path to AHPS point file created during creation of - FIM 3 static libraries. - 'evaluated': Path to extent file created during the creation - of the NWS/USGS AHPS preprocessing. - 'metadata': Path to previously created file that contains - metadata about each site (feature_id, wfo, rfc and etc). - No spatial layers will be created if set to False, if a dictionary - is supplied then a spatial layer is produced. + spatial : BOOL, optional + Creates spatial datasets of the base unit (ble: huc polygon, ahps: point) + with metrics contained in attribute tables. The geospatial data is + either supplied in the .env file (WBD Huc layer) or from WRDS (ahps). + The outputs are consistent with requirements set forth by the vizualization team. + Additionally, there is a commented out section where if the user + passes the extent files generated during creation of nws/usgs ahps + preprocessing, the actual maps and flows used for evaluation are + appended to the ahps shapefile output. fim_1_ms: BOOL Default is false. If True then fim_1 rows are duplicated with extent_config set to MS. This allows for FIM 1 to be included @@ -426,7 +443,7 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' ''' # Import metrics csv as DataFrame and initialize all_datasets dictionary - csv_df = pd.read_csv(metrics_csv) + csv_df = pd.read_csv(metrics_csv, dtype = {'huc':str}) # fim_1_ms flag enables FIM 1 to be shown on MS plots/stats if fim_1_ms: @@ -584,55 +601,77 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' ####################################################################### #Create spatial layers with threshold and mapping information ######################################################################## - if spatial_ahps: - - # Read in supplied shapefile layers - # Layer containing metadata for each site (feature_id, wfo, etc) - # Convert nws_lid to lower case - ahps_metadata = gpd.read_file(spatial_ahps['metadata']) - ahps_metadata['nws_lid'] = ahps_metadata['nws_lid'].str.lower() - metadata_crs = ahps_metadata.crs - - # Extent layer generated from preprocessing NWS/USGS datasets - evaluated_ahps_extent = gpd.read_file(spatial_ahps['evaluated']) - - # Extent layer generated from static ahps library preprocessing - static_library = gpd.read_file(spatial_ahps['static']) - - # Fields to keep - # Get list of fields to keep in merge - preserved_static_library_fields = ['nws_lid'] + [i for i in static_library.columns if i.startswith(('Q','S'))] - # Get list of fields to keep in merge - preserved_evaluated_ahps_fields = ['nws_lid', 'source', 'geometry'] + [i for i in evaluated_ahps_extent.columns if i.startswith(('action','minor','moderate','major'))] - - # Join tables to evaluated_ahps_extent - evaluated_ahps_extent = evaluated_ahps_extent[preserved_evaluated_ahps_fields] - evaluated_ahps_extent = evaluated_ahps_extent.merge(ahps_metadata, on = 'nws_lid') - evaluated_ahps_extent['geometry'] = evaluated_ahps_extent['geometry_y'] - evaluated_ahps_extent.drop(columns = ['geometry_y','geometry_x'], inplace = True) - evaluated_ahps_extent = evaluated_ahps_extent.merge(static_library[preserved_static_library_fields], on = 'nws_lid') - - # Join dataset metrics to evaluated_ahps_extent data - final_join = pd.DataFrame() - for (dataset_name, configuration), (dataset, sites) in all_datasets.items(): - # Only select ahps from dataset if config is MS - if dataset_name in ['usgs','nws'] and configuration == 'MS': - # Select records from evaluated_ahps_extent that match the dataset name - subset = evaluated_ahps_extent.query(f'source == "{dataset_name}"') - # Join to dataset - dataset_with_subset = dataset.merge(subset, on = 'nws_lid') - # Append rows to final_join dataframe - final_join = final_join.append(dataset_with_subset) - - # Modify version field - final_join['version'] = final_join.version.str.split('_nws|_usgs').str[0] - - # Write geodataframe to file - gdf = gpd.GeoDataFrame(final_join, geometry = final_join['geometry'], crs = metadata_crs) - output_shapefile = Path(workspace) / 'nws_usgs_site_info.shp' - gdf.to_file(output_shapefile) - - + if spatial: + ############################################################### + #This section will join ahps metrics to a spatial point layer + ############################################################### + + #Get point data for ahps sites + #Get metrics for usgs and nws benchmark sources + usgs_dataset,sites = all_datasets.get(('usgs','MS')) + nws_dataset, sites = all_datasets.get(('nws','MS')) + #Append usgs/nws dataframes and filter unnecessary columns and rename remaining. + all_ahps_datasets = usgs_dataset.append(nws_dataset) + all_ahps_datasets = all_ahps_datasets.filter(['huc','nws_lid','version','magnitude','TP_area_km2','FP_area_km2','TN_area_km2','FN_area_km2','CSI','FAR','TPR','benchmark_source']) + all_ahps_datasets.rename(columns = {'benchmark_source':'source'}, inplace = True) + + #Get spatial data from WRDS + #Get metadata from WRDS API + select_by = 'nws_lid' + selector = list(all_ahps_datasets.nws_lid.unique()) + metadata_url = f'{API_BASE_URL}/metadata' + metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector) + #Create geospatial data from WRDS output + dictionary, gdf = aggregate_wbd_hucs(metadata_list, Path(WBD_LAYER), retain_attributes = True) + #Trim out unecessary columns and rename remaining columns + gdf = gdf.filter(['identifiers_nws_lid', 'nws_data_name', 'identifiers_nwm_feature_id','nws_data_wfo','nws_data_state','nws_data_county','geometry']) + gdf.rename(columns = {'identifiers_nws_lid':'nws_lid', 'nws_data_name':'lid_name','identifiers_nwm_feature_id':'feature_id','nws_data_wfo':'wfo','nws_data_state':'state','nws_data_county':'county','HUC8':'huc8'}, inplace = True) + + #Join spatial data to metric data + gdf['nws_lid'] = gdf['nws_lid'].str.lower() + joined = gdf.merge(all_ahps_datasets, on = 'nws_lid') + #Project to VIZ projection and write to file + joined = joined.to_crs(VIZ_PROJECTION) + joined.to_file(Path(workspace) / 'fim_performance_points.shp') + + ''' + ############################################################### + #If user wants to append information such as what maps or flows were used for evaluation. This is already tested. + #User must supply the extent layer generated from preprocessing NWS/USGS datasets. + ############################################################### + #Read extent layer to GeoDataFrame and drop the geometry column + evaluated_ahps_extent = gpd.read_file(/Path/to/extent/layer/generated/during/preprocessing) + evaluated_ahps_extent.drop(columns = ['geometry'], inplace = True) + #Re-arrange dataset to get flows used for evaluation + flows = pd.melt(evaluated_ahps_extent, id_vars = ['nws_lid','source'], value_vars = ['action_Q','minor_Q','moderate_Q','major_Q'], var_name = 'magnitude', value_name = 'eval_Q') + flows['magnitude'] = flows['magnitude'].str.split('_', 1, expand = True) + #Re-arrange dataset to get maps used for evaluation + maps = pd.melt(evaluated_ahps_extent, id_vars = ['nws_lid','source'], value_vars = ['action','minor','moderate','major'], var_name = 'magnitude', value_name = 'eval_maps') + maps['eval_maps'] = maps['eval_maps'].str.split('\\').str[-1] + #Merge flows and maps into single DataFrame + flows_maps = pd.merge(flows,maps, how = 'left', left_on = ['nws_lid','source','magnitude'], right_on = ['nws_lid','source','magnitude']) + # combine flows_maps to spatial layer (gdf) + joined = joined.merge(flows_maps, left_on = ['nws_lid','magnitude','source'], right_on = ['nws_lid','magnitude','source']) + #Write to file + joined.to_file(Path(workspace)/'fim_performance_points.shp') + ''' + ################################################################ + #This section joins ble (FR) metrics to a spatial layer of HUCs. + ################################################################ + #Read in HUC spatial layer + wbd_gdf = gpd.read_file(Path(WBD_LAYER), layer = 'WBDHU8') + #Select BLE, FR dataset. + ble_dataset, sites = all_datasets.get(('ble','FR')) + #Join metrics to HUC spatial layer + wbd_with_metrics = wbd_gdf.merge(ble_dataset, how = 'inner', left_on = 'HUC8', right_on = 'huc') + #Filter out unnecessary columns + wbd_with_metrics = wbd_with_metrics.filter(['version','magnitude','huc','TP_area_km2','FP_area_km2','TN_area_km2','FN_area_km2','CSI','FAR','TPR','benchmark_source','geometry']) + wbd_with_metrics.rename(columns = {'benchmark_source':'source'}, inplace = True ) + #Project to VIZ projection + wbd_with_metrics = wbd_with_metrics.to_crs(VIZ_PROJECTION) + #Write out to file + wbd_with_metrics.to_file(Path(workspace) / 'fim_performance_polys.shp') + ####################################################################### if __name__ == '__main__': @@ -643,43 +682,22 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' parser.add_argument('-v', '--versions', help = 'List of versions to be plotted/aggregated. Versions are filtered using the "startswith" approach. For example, ["fim_","fb1"] would retain all versions that began with "fim_" (e.g. fim_1..., fim_2..., fim_3...) as well as any feature branch that began with "fb". An other example ["fim_3","fb"] would result in all fim_3 versions being plotted along with the fb.', nargs = '+', default = []) parser.add_argument('-s', '--stats', help = 'List of statistics (abbrev to 3 letters) to be plotted/aggregated', nargs = '+', default = ['CSI','TPR','FAR'], required = False) parser.add_argument('-q', '--alternate_ahps_query',help = 'Alternate filter query for AHPS. Default is: "not nws_lid.isnull() & not flow.isnull() & masked_perc<97 & not nws_lid in @bad_sites" where bad_sites are (grfi2,ksdm7,hohn4,rwdn4)', default = False, required = False) - parser.add_argument('-sp', '--spatial_ahps', help = 'If spatial point layer is desired, supply a csv with 3 lines of the following format: metadata, path/to/metadata/shapefile\nevaluated, path/to/evaluated/shapefile\nstatic, path/to/static/shapefile.', default = False, required = False) + parser.add_argument('-sp', '--spatial', help = 'If enabled, creates spatial layers with metrics populated in attribute table.', action = 'store_true', required = False) parser.add_argument('-f', '--fim_1_ms', help = 'If enabled fim_1 rows will be duplicated and extent config assigned "ms" so that fim_1 can be shown on mainstems plots/stats', action = 'store_true', required = False) parser.add_argument('-i', '--site_plots', help = 'If enabled individual barplots for each site are created.', action = 'store_true', required = False) # Extract to dictionary and assign to variables args = vars(parser.parse_args()) - # If errors occur reassign error to True - error = False - # Create dictionary if file specified for spatial_ahps - if args['spatial_ahps']: - # Create dictionary - spatial_dict = {} - with open(args['spatial_ahps']) as file: - for line in file: - key, value = line.strip('\n').split(',') - spatial_dict[key] = Path(value) - args['spatial_ahps'] = spatial_dict - # Check that all required keys are present and overwrite args with spatial_dict - required_keys = set(['metadata', 'evaluated', 'static']) - if required_keys - spatial_dict.keys(): - print('\n Required keys are: metadata, evaluated, static') - error = True - else: - args['spatial_ahps'] = spatial_dict - - # Finalize Variables m = args['metrics_csv'] w = args['workspace'] v = args['versions'] s = args['stats'] q = args['alternate_ahps_query'] - sp= args['spatial_ahps'] + sp= args['spatial'] f = args['fim_1_ms'] i = args['site_plots'] # Run eval_plots function - if not error: - eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, alternate_ahps_query = q, spatial_ahps = sp, fim_1_ms = f, site_barplots = i) + eval_plots(metrics_csv = m, workspace = w, versions = v, stats = s, alternate_ahps_query = q, spatial = sp, fim_1_ms = f, site_barplots = i) From 4df6d1d7a5935310a873415f046da6c84ddc4f0c Mon Sep 17 00:00:00 2001 From: Brad Date: Thu, 1 Apr 2021 12:27:10 -0500 Subject: [PATCH 060/359] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 843e37282..af8181826 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. -## v3.0.12.1 - 2021-03-26 - [PR #336](https://github.com/NOAA-OWP/cahaba/pull/336) +## v3.0.12.1 - 2021-03-31 - [PR #336](https://github.com/NOAA-OWP/cahaba/pull/336) Fix spatial option in `eval_plots.py` when creating plots and spatial outputs. From 44b633381d37eb7f0a636ba56914850a33acdd42 Mon Sep 17 00:00:00 2001 From: "brian.avant" Date: Thu, 1 Apr 2021 13:40:49 -0500 Subject: [PATCH 061/359] [1pt] Tool to compare synthetic rating curve with benchmark rating curve (sierra test) (#332) * `usgs_gage_crosswalk.py`: generates `usgs_elev_table.csv` in run_by_unit.py with elevation at USGS gages * `rating_curve_comparison.py`: post-processing script to plot and calculate metrics between FIM and USGS rating curve data * updates `aggregate_fim_outputs.py` call argument in `fim_run.sh` from 4 jobs to 6 jobs (optimizing API performance) * reroutes median elevation data from `add_crosswalk.py` and `rem.py` to new file (depreciating `hand_ref_elev_table.csv`) *- adds new files to `viz_whitelist` in `output_cleanup.py` --- CHANGELOG.md | 42 ++-- fim_run.sh | 2 +- src/add_crosswalk.py | 3 +- src/output_cleanup.py | 9 +- src/rem.py | 23 +- src/run_by_unit.sh | 10 +- src/usgs_gage_crosswalk.py | 124 ++++++++++ tools/rating_curve_comparison.py | 408 +++++++++++++++++++++++++++++++ 8 files changed, 590 insertions(+), 31 deletions(-) create mode 100755 src/usgs_gage_crosswalk.py create mode 100755 tools/rating_curve_comparison.py diff --git a/CHANGELOG.md b/CHANGELOG.md index af8181826..8845241ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,27 +1,42 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.12.2 - 2021-04-01 - [PR #332](https://github.com/NOAA-OWP/cahaba/pull/332) + +Created tool to compare synthetic rating curve with benchmark rating curve (sierra test). resolves issue #293; resolves issue #308 + +### Changes + - update `aggregate_fim_outputs.py` call argument in `fim_run.sh` from 4 jobs to 6 jobs (optimizing API performance) + - reroutes median elevation data from `add_crosswalk.py` and `rem.py` to new file (depreciating `hand_ref_elev_table.csv`) + - adding new files to `viz_whitelist` in `output_cleanup.py` + +### Additions + - `usgs_gage_crosswalk.py`: generates `usgs_elev_table.csv` in run_by_unit.py with elevation and additional attributes at USGS gages. + - `rating_curve_comparison.py`: post-processing script to plot and calculate metrics between synthetic rating curves and USGS rating curve data. +

## v3.0.12.1 - 2021-03-31 - [PR #336](https://github.com/NOAA-OWP/cahaba/pull/336) - Fix spatial option in `eval_plots.py` when creating plots and spatial outputs. +Fix spatial option in `eval_plots.py` when creating plots and spatial outputs. - ### Changes +### Changes - Removes file dependencies from spatial option. Does require the WBD layer which should be specified in `.env` file. - Produces outputs in a format consistent with requirements needed for publishing. - Preserves leading zeros in huc information for all outputs from `eval_plots.py`. ### Additions -- Creates `fim_performance_points.shp`: this layer consists of all evaluated ahps points (with metrics). Spatial data retrieved from WRDS on the fly. -- Creates `fim_performance_polys.shp`: this layer consists of all evaluated huc8s (with metrics). Spatial data retrieved from WBD layer. + - Creates `fim_performance_points.shp`: this layer consists of all evaluated ahps points (with metrics). Spatial data retrieved from WRDS on the fly. + - Creates `fim_performance_polys.shp`: this layer consists of all evaluated huc8s (with metrics). Spatial data retrieved from WBD layer.

## v3.0.12.0 - 2021-03-26 - [PR #327](https://github.com/NOAA-OWP/cahaba/pull/237) - Add more detail/information to plotting capabilities. - ### Changes +Add more detail/information to plotting capabilities. + +### Changes - Merge `plot_functions.py` into `eval_plots.py` and move `eval_plots.py` into the tools directory. - Remove `plots` subdirectory. + - ### Additions - Optional argument to create barplots of CSI for each individual site. - Create a csv containing the data used to create the scatterplots. @@ -29,30 +44,29 @@ We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. ## v3.0.11.0 - 2021-03-22 - [PR #319](https://github.com/NOAA-OWP/cahaba/pull/298) - Improvements to CatFIM service source data generation. +Improvements to CatFIM service source data generation. - ### Changes +### Changes - Renamed `generate_categorical_fim.py` to `generate_categorical_fim_mapping.py`. - Updated the status outputs of the `nws_lid_sites layer` and saved it in the same directory as the `merged catfim_library layer`. - Additional stability fixes (such as improved compatability with WRDS updates). + ### Additions - Added `generate_categorical_fim.py` to wrap `generate_categorical_fim_flows.py` and `generate_categorical_fim_mapping.py`. - Create new `nws_lid_sites` shapefile located in same directory as the `catfim_library` shapefile. -

## v3.0.10.1 - 2021-03-24 - [PR #320](https://github.com/NOAA-OWP/cahaba/pull/320) - Patch to synthesize_test_cases.py. +Patch to synthesize_test_cases.py. - ### Changes +### Changes - Bug fix to `synthesize_test_cases.py` to allow comparison between `testing` version and `official` versions. -

## v3.0.10.0 - 2021-03-12 - [PR #298](https://github.com/NOAA-OWP/cahaba/pull/298) - Preprocessing of flow files for Categorical FIM. +Preprocessing of flow files for Categorical FIM. ### Additions - Generate Categorical FIM flow files for each category (action, minor, moderate, major). @@ -60,9 +74,9 @@ We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. - Generate csv of attribute data in shapefile. - Aggregate all shapefiles and csv files into one file in parent directory. - Add flood of record category. + ### Changes - Stability fixes to `generate_categorical_fim.py`. -

## v3.0.9.0 - 2021-03-12 - [PR #297](https://github.com/NOAA-OWP/cahaba/pull/297) diff --git a/fim_run.sh b/fim_run.sh index 42a5d022e..8d1875e5f 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -152,5 +152,5 @@ fi echo "$viz" if [[ "$viz" -eq 1 ]]; then # aggregate outputs - python3 /foss_fim/src/aggregate_fim_outputs.py -d $outputRunDataDir -j 4 + time python3 /foss_fim/src/aggregate_fim_outputs.py -d $outputRunDataDir -j 6 fi diff --git a/src/add_crosswalk.py b/src/add_crosswalk.py index 2958c2882..96f2805c0 100755 --- a/src/add_crosswalk.py +++ b/src/add_crosswalk.py @@ -220,9 +220,8 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f output_hydro_table = output_hydro_table.merge(input_huc.loc[:,[FIM_ID,'HUC8']],how='left',on=FIM_ID) if output_flows.HydroID.dtype != 'str': output_flows.HydroID = output_flows.HydroID.astype(str) - output_hydro_table = output_hydro_table.merge(output_flows.loc[:,['HydroID','LakeID','Median_Thal_Elev_m']],how='left',on='HydroID') + output_hydro_table = output_hydro_table.merge(output_flows.loc[:,['HydroID','LakeID']],how='left',on='HydroID') output_hydro_table['LakeID'] = output_hydro_table['LakeID'].astype(int) - output_hydro_table['Median_Thal_Elev_m'] = output_hydro_table['Median_Thal_Elev_m'].astype(float).round(2) output_hydro_table = output_hydro_table.rename(columns={'HUC8':'HUC'}) if output_hydro_table.HUC.dtype != 'str': output_hydro_table.HUC = output_hydro_table.HUC.astype(str) diff --git a/src/output_cleanup.py b/src/output_cleanup.py index 7e211bdc5..e74e26b17 100755 --- a/src/output_cleanup.py +++ b/src/output_cleanup.py @@ -31,15 +31,20 @@ def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_prod 'gw_catchments_reaches_filtered_addedAttributes.tif', 'hydroTable.csv', 'src.json', - 'small_segments.csv' + 'small_segments.csv', + 'usgs_elev_table.csv', + 'hand_ref_elev_table.csv' ] # List of files that will be saved during a viz run viz_whitelist = [ 'rem_zeroed_masked.tif', + 'gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg', + 'demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg', 'gw_catchments_reaches_filtered_addedAttributes.tif', 'hydroTable.csv', - 'src.json' + 'src.json', + 'small_segments.csv' ] # If "production" run, only keep whitelisted files diff --git a/src/rem.py b/src/rem.py index 403edf9db..f0cd8fad3 100755 --- a/src/rem.py +++ b/src/rem.py @@ -11,7 +11,7 @@ from utils.shared_functions import getDriver -def rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster, hydroid_fileName, hand_ref_elev_fileName, dem_reaches_filename): +def rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster, hydroid_fileName, dem_reaches_filename): """ Calculates REM/HAND/Detrended DEM @@ -25,8 +25,6 @@ def rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raste File name of output relative elevation raster. hydroid_fileName : str File name of the hydroid raster (i.e. gw_catchments_reaches.tif) - hand_ref_elev_fileName - File name of the output csv containing list of hydroid values and HAND zero/reference elev dem_reaches_filename File name of the reaches layer to populate HAND elevation attribute values and overwrite as output @@ -108,7 +106,6 @@ def make_catchment_min_dict(flat_dem, catchment_min_dict, flat_catchments, thalw gw_catchments_pixels_masked_object.close() thalweg_raster_object.close() -############################################### # Merge and export dictionary to to_csv catchment_min_dict_df = pd.DataFrame.from_dict(catchment_min_dict, orient='index') # convert dict to dataframe catchment_min_dict_df.columns = ['Median_Thal_Elev_m'] @@ -116,12 +113,18 @@ def make_catchment_min_dict(flat_dem, catchment_min_dict, flat_catchments, thalw catchment_hydroid_dict_df.columns = ['HydroID'] merge_df = catchment_hydroid_dict_df.merge(catchment_min_dict_df, left_index=True, right_index=True) merge_df.index.name = 'pixelcatch_id' - merge_df.to_csv(hand_ref_elev_fileName,index=True) # export dataframe to csv file - # Merge the HAND reference elvation by HydroID dataframe with the demDerived_reaches layer (add new layer attribute) - merge_df = merge_df.groupby(['HydroID']).median() # median value of all Median_Thal_Elev_m for pixel catchments in each HydroID reach + # Merge the HAND reference elevation by HydroID dataframe with the demDerived_reaches layer (add new layer attribute) + min_by_hydroid = merge_df.groupby(['HydroID']).min() # min value of all med_thal_elev for pixel catchments in each HydroID reach + min_by_hydroid.columns = ['min_thal_elev'] + med_by_hydroid = merge_df.groupby(['HydroID']).median() # median value of all med_thal_elev for pixel catchments in each HydroID reach + med_by_hydroid.columns = ['med_thal_elev'] + max_by_hydroid = merge_df.groupby(['HydroID']).max() # max value of all med_thal_elev for pixel catchments in each HydroID reach + max_by_hydroid.columns = ['max_thal_elev'] input_reaches = gpd.read_file(dem_reaches_filename) - input_reaches = input_reaches.merge(merge_df, on='HydroID') # merge dataframes by HydroID variable + input_reaches = input_reaches.merge(min_by_hydroid, on='HydroID') # merge dataframes by HydroID variable + input_reaches = input_reaches.merge(med_by_hydroid, on='HydroID') # merge dataframes by HydroID variable + input_reaches = input_reaches.merge(max_by_hydroid, on='HydroID') # merge dataframes by HydroID variable input_reaches.to_file(dem_reaches_filename,driver=getDriver(dem_reaches_filename),index=False) # ------------------------------------------------------------------------------------------------------------------------ # @@ -171,7 +174,6 @@ def calculate_rem(flat_dem,catchmentMinDict,flat_catchments,ndv): parser.add_argument('-t','--thalweg-raster',help='A binary raster representing the thalweg. 1 for thalweg, 0 for non-thalweg.',required=True) parser.add_argument('-o','--rem',help='Output REM raster',required=True) parser.add_argument('-i','--hydroid', help='HydroID raster to use within project path', required=True) - parser.add_argument('-r','--hand_ref_elev_table',help='Output table of HAND reference elev by catchment',required=True) parser.add_argument('-s','--dem_reaches_in_out',help='DEM derived reach layer to join HAND reference elevation attribute',required=True) @@ -184,7 +186,6 @@ def calculate_rem(flat_dem,catchmentMinDict,flat_catchments,ndv): rem_fileName = args['rem'] thalweg_raster = args['thalweg_raster'] hydroid_fileName = args['hydroid'] - hand_ref_elev_fileName = args['hand_ref_elev_table'] dem_reaches_filename = args['dem_reaches_in_out'] - rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster, hydroid_fileName, hand_ref_elev_fileName, dem_reaches_filename) + rel_dem(dem_fileName, pixel_watersheds_fileName, rem_fileName, thalweg_raster, hydroid_fileName, dem_reaches_filename) diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index 68866729d..1242768dc 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -321,7 +321,7 @@ echo -e $startDiv"D8 REM $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/rem.tif ] && \ -$srcDir/rem.py -d $dem_thalwegCond -w $outputHucDataDir/gw_catchments_pixels.tif -o $outputHucDataDir/rem.tif -t $demDerived_streamPixels -i $outputHucDataDir/gw_catchments_reaches.tif -r $outputHucDataDir/hand_ref_elev_table.csv -s $outputHucDataDir/demDerived_reaches_split.gpkg +$srcDir/rem.py -d $dem_thalwegCond -w $outputHucDataDir/gw_catchments_pixels.tif -o $outputHucDataDir/rem.tif -t $demDerived_streamPixels -i $outputHucDataDir/gw_catchments_reaches.tif -s $outputHucDataDir/demDerived_reaches_split.gpkg Tcount ## DINF DISTANCE DOWN ## @@ -432,6 +432,14 @@ Tstart $srcDir/add_crosswalk.py -d $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -s $outputHucDataDir/src_base.csv -l $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $outputHucDataDir/src_full_crosswalked.csv -j $outputHucDataDir/src.json -x $outputHucDataDir/crosswalk_table.csv -t $outputHucDataDir/hydroTable.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -y $outputHucDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $input_NWM_Catchments -p $extent -k $outputHucDataDir/small_segments.csv Tcount + +## USGS CROSSWALK ## +echo -e $startDiv"USGS Crosswalk $hucNumber"$stopDiv +date -u +Tstart +$srcDir/usgs_gage_crosswalk.py -gages $inputDataDir/usgs_gages/usgs_gages.gpkg -dem $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -outtable $outputHucDataDir/usgs_elev_table.csv +Tcount + ## CLEANUP OUTPUTS ## echo -e $startDiv"Cleaning up outputs $hucNumber"$stopDiv args=() diff --git a/src/usgs_gage_crosswalk.py b/src/usgs_gage_crosswalk.py new file mode 100755 index 000000000..29ef7b592 --- /dev/null +++ b/src/usgs_gage_crosswalk.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 + +import os +import geopandas as gpd +import pandas as pd +import numpy as np +import rasterio +import argparse +import pygeos +from shapely.wkb import dumps, loads +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) + +''' Get elevation at adjusted USGS gages locations + + Parameters + ---------- + usgs_gages_filename : str + File name of USGS stations layer. + dem_filename : str + File name of original DEM. + input_flows_filename : str + File name of FIM streams layer. + input_catchment_filename : str + File name of FIM catchment layer. + wbd_buffer_filename : str + File name of buffered wbd. + dem_adj_filename : str + File name of thalweg adjusted DEM. + output_table_filename : str + File name of output table. +''' + + +def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename,dem_adj_filename,output_table_filename): + + wbd_buffer = gpd.read_file(wbd_buffer_filename) + usgs_gages = gpd.read_file(usgs_gages_filename, mask=wbd_buffer) + dem_m = rasterio.open(dem_filename,'r') + input_flows = gpd.read_file(input_flows_filename) + input_catchment = gpd.read_file(input_catchment_filename) + dem_adj = rasterio.open(dem_adj_filename,'r') + + if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) + + # Identify closest HydroID + closest_catchment = gpd.sjoin(usgs_gages, input_catchment, how='left', op='within').reset_index(drop=True) + closest_hydro_id = closest_catchment.filter(items=['site_no','HydroID','min_thal_elev','med_thal_elev','max_thal_elev', 'order_']) + closest_hydro_id = closest_hydro_id.dropna() + + # Get USGS gages that are within catchment boundaries + usgs_gages = usgs_gages.loc[usgs_gages.site_no.isin(list(closest_hydro_id.site_no))] + + columns = ['location_id','HydroID','dem_elevation','dem_adj_elevation','min_thal_elev', 'med_thal_elev','max_thal_elev','str_order'] + gage_data = [] + + # Move USGS gage to stream + for index, gage in usgs_gages.iterrows(): + + # Get stream attributes + hydro_id = closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].HydroID.item() + str_order = str(int(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].order_.item())) + min_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].min_thal_elev.item(),2) + med_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].med_thal_elev.item(),2) + max_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].max_thal_elev.item(),2) + + # Convert headwater point geometries to WKB representation + wkb_gages = dumps(gage.geometry) + + # Create pygeos headwater point geometries from WKB representation + gage_bin_geom = pygeos.io.from_wkb(wkb_gages) + + # Closest segment to headwater + closest_stream = input_flows.loc[input_flows.HydroID==hydro_id] + wkb_closest_stream = dumps(closest_stream.geometry.item()) + stream_bin_geom = pygeos.io.from_wkb(wkb_closest_stream) + + # Linear reference headwater to closest stream segment + gage_distance_to_line = pygeos.linear.line_locate_point(stream_bin_geom, gage_bin_geom) + referenced_gage = pygeos.linear.line_interpolate_point(stream_bin_geom, gage_distance_to_line) + + # Convert geometries to wkb representation + bin_referenced_gage = pygeos.io.to_wkb(referenced_gage) + + # Convert to shapely geometries + shply_referenced_gage = loads(bin_referenced_gage) + + # Sample rasters at adjusted gage + dem_m_elev = round(list(rasterio.sample.sample_gen(dem_m,shply_referenced_gage.coords))[0].item(),2) + dem_adj_elev = round(list(rasterio.sample.sample_gen(dem_adj,shply_referenced_gage.coords))[0].item(),2) + + # Append dem_m_elev, dem_adj_elev, hydro_id, and gage number to table + site_elevations = [str(gage.site_no), str(hydro_id), dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev,str(str_order)] + gage_data.append(site_elevations) + + + elev_table = pd.DataFrame(gage_data, columns=columns) + + if not elev_table.empty: + elev_table.to_csv(output_table_filename,index=False) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Crosswalk USGS sites to HydroID and get elevations') + parser.add_argument('-gages','--usgs-gages-filename', help='USGS gages', required=True) + parser.add_argument('-dem','--dem-filename',help='DEM',required=True) + parser.add_argument('-flows','--input-flows-filename', help='DEM derived streams', required=True) + parser.add_argument('-cat','--input-catchment-filename', help='DEM derived catchments', required=True) + parser.add_argument('-wbd','--wbd-buffer-filename', help='WBD buffer', required=True) + parser.add_argument('-dem_adj','--dem-adj-filename', help='Thalweg adjusted DEM', required=True) + parser.add_argument('-outtable','--output-table-filename', help='Table to append data', required=True) + + args = vars(parser.parse_args()) + + usgs_gages_filename = args['usgs_gages_filename'] + dem_filename = args['dem_filename'] + input_flows_filename = args['input_flows_filename'] + input_catchment_filename = args['input_catchment_filename'] + wbd_buffer_filename = args['wbd_buffer_filename'] + dem_adj_filename = args['dem_adj_filename'] + output_table_filename = args['output_table_filename'] + + crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename, dem_adj_filename,output_table_filename) diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py new file mode 100755 index 000000000..d8df892fb --- /dev/null +++ b/tools/rating_curve_comparison.py @@ -0,0 +1,408 @@ +#!/usr/bin/env python3 + +import os +import sys +import geopandas as gpd +import pandas as pd +import numpy as np +import argparse +import matplotlib.pyplot as plt +import seaborn as sns +from functools import reduce +from multiprocessing import Pool +from os.path import isfile, join, dirname +import shutil +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) + +""" + Plot Rating Curves and Compare to USGS Gages + + Parameters + ---------- + fim_dir : str + Directory containing FIM output folders. + output_dir : str + Directory containing rating curve plots and tables. + usgs_gages_filename : str + File name of USGS rating curves. + nwm_flow_dir : str + Directory containing NWM recurrence flows files. + number_of_jobs : str + Number of jobs. + stat_groups : str + string of columns to group eval metrics. +""" + +# recurr_intervals = ['recurr_1_5_cms.csv','recurr_5_0_cms.csv','recurr_10_0_cms.csv'] + +def generate_rating_curve_metrics(args): + + elev_table_filename = args[0] + hydrotable_filename = args[1] + usgs_gages_filename = args[2] + usgs_recurr_stats_filename = args[3] + nwm_recurr_data_filename = args[4] + rc_comparison_plot_filename = args[5] + nwm_flow_dir = args[6] + huc = args[7] + + elev_table = pd.read_csv(elev_table_filename,dtype={'location_id': str}) + hydrotable = pd.read_csv(hydrotable_filename,dtype={'HUC': str,'feature_id': str}) + usgs_gages = pd.read_csv(usgs_gages_filename,dtype={'location_id': str}) + + # Join rating curves with elevation data + hydrotable = hydrotable.merge(elev_table, on="HydroID") + relevant_gages = list(hydrotable.location_id.unique()) + usgs_gages = usgs_gages[usgs_gages['location_id'].isin(relevant_gages)] + usgs_gages = usgs_gages.reset_index(drop=True) + + if len(usgs_gages) > 0: + + # Adjust rating curve to elevation + hydrotable['elevation_ft'] = (hydrotable.stage + hydrotable.dem_adj_elevation) * 3.28084 # convert from m to ft + # hydrotable['raw_elevation_ft'] = (hydrotable.stage + hydrotable.dem_elevation) * 3.28084 # convert from m to ft + hydrotable['discharge_cfs'] = hydrotable.discharge_cms * 35.3147 + usgs_gages = usgs_gages.rename(columns={"flow": "discharge_cfs", "elevation_navd88": "elevation_ft"}) + + hydrotable['source'] = "FIM" + usgs_gages['source'] = "USGS" + limited_hydrotable = hydrotable.filter(items=['location_id','elevation_ft','discharge_cfs','source']) + select_usgs_gages = usgs_gages.filter(items=['location_id', 'elevation_ft', 'discharge_cfs','source']) + + rating_curves = limited_hydrotable.append(select_usgs_gages) + + # Add stream order + stream_orders = hydrotable.filter(items=['location_id','str_order']).drop_duplicates() + rating_curves = rating_curves.merge(stream_orders, on='location_id') + rating_curves['str_order'] = rating_curves['str_order'].astype('int') + + # plot rating curves + generate_facet_plot(rating_curves, rc_comparison_plot_filename) + + # NWM recurr intervals + recurr_1_5_yr_filename = join(nwm_flow_dir,'recurr_1_5_cms.csv') + recurr_5_yr_filename = join(nwm_flow_dir,'recurr_5_0_cms.csv') + recurr_10_yr_filename = join(nwm_flow_dir,'recurr_10_0_cms.csv') + + # Update column names + recurr_1_5_yr = pd.read_csv(recurr_1_5_yr_filename,dtype={'feature_id': str}) + recurr_1_5_yr = recurr_1_5_yr.rename(columns={"discharge": "1.5"}) + recurr_5_yr = pd.read_csv(recurr_5_yr_filename,dtype={'feature_id': str}) + recurr_5_yr = recurr_5_yr.rename(columns={"discharge": "5.0"}) + recurr_10_yr = pd.read_csv(recurr_10_yr_filename,dtype={'feature_id': str}) + recurr_10_yr = recurr_10_yr.rename(columns={"discharge": "10.0"}) + + # Merge NWM recurr intervals into a single layer + nwm_recurr_intervals_all = reduce(lambda x,y: pd.merge(x,y, on='feature_id', how='outer'), [recurr_1_5_yr, recurr_5_yr, recurr_10_yr]) + nwm_recurr_intervals_all = pd.melt(nwm_recurr_intervals_all, id_vars=['feature_id'], value_vars=['1.5','5.0','10.0'], var_name='recurr_interval', value_name='discharge_cms') + nwm_recurr_intervals_all['discharge_cfs'] = nwm_recurr_intervals_all.discharge_cms * 35.3147 + nwm_recurr_intervals_all = nwm_recurr_intervals_all.filter(items=['discharge_cfs', 'recurr_interval','feature_id']).drop_duplicates() + + + # Identify unique gages + usgs_crosswalk = hydrotable.filter(items=['location_id', 'feature_id']).drop_duplicates() + + nwm_recurr_data_table = pd.DataFrame() + usgs_recurr_data = pd.DataFrame() + + # Interpolate USGS/FIM elevation at each gage + for index, gage in usgs_crosswalk.iterrows(): + + # Interpolate USGS elevation at NWM recurrence intervals + usgs_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.source=="USGS")] + + if len(usgs_rc) <1: + print(f"missing USGS rating curve data for usgs station {gage.location_id} in huc {huc}") + continue + + str_order = np.unique(usgs_rc.str_order).item() + feature_id = str(gage.feature_id) + + usgs_pred_elev = get_reccur_intervals(usgs_rc, usgs_crosswalk,nwm_recurr_intervals_all) + + # Handle sites missing data + if len(usgs_pred_elev) <1: + print(f"missing USGS elevation data for usgs station {gage.location_id} in huc {huc}") + continue + + # Clean up data + usgs_pred_elev['location_id'] = gage.location_id + usgs_pred_elev = usgs_pred_elev.filter(items=['location_id','recurr_interval', 'discharge_cfs','pred_elev']) + usgs_pred_elev = usgs_pred_elev.rename(columns={"pred_elev": "USGS"}) + + # Interpolate FIM elevation at NWM recurrence intervals + fim_rc = rating_curves.loc[(rating_curves.location_id==gage.location_id) & (rating_curves.source=="FIM")] + + if len(fim_rc) <1: + print(f"missing FIM rating curve data for usgs station {gage.location_id} in huc {huc}") + continue + + fim_pred_elev = get_reccur_intervals(fim_rc, usgs_crosswalk,nwm_recurr_intervals_all) + + # Handle sites missing data + if len(fim_pred_elev) <1: + print(f"missing FIM elevation data for usgs station {gage.location_id} in huc {huc}") + continue + + # Clean up data + fim_pred_elev = fim_pred_elev.rename(columns={"pred_elev": "FIM"}) + fim_pred_elev = fim_pred_elev.filter(items=['recurr_interval', 'discharge_cfs','FIM']) + usgs_pred_elev = usgs_pred_elev.merge(fim_pred_elev, on=['recurr_interval','discharge_cfs']) + + # Add attributes + usgs_pred_elev['HUC'] = huc + usgs_pred_elev['HUC4'] = huc[0:4] + usgs_pred_elev['str_order'] = str_order + usgs_pred_elev['feature_id'] = feature_id + + # Melt dataframe + usgs_pred_elev = pd.melt(usgs_pred_elev, id_vars=['location_id','feature_id','recurr_interval','discharge_cfs','HUC','HUC4','str_order'], value_vars=['USGS','FIM'], var_name="source", value_name='elevation_ft') + nwm_recurr_data_table = nwm_recurr_data_table.append(usgs_pred_elev) + + # Interpolate FIM elevation at USGS observations + # fim_rc = fim_rc.merge(usgs_crosswalk, on="location_id") + # usgs_rc = usgs_rc.rename(columns={"elevation_ft": "USGS"}) + # + # # Sort stage in ascending order + # usgs_rc = usgs_rc.sort_values('USGS',ascending=True) + # + # # Interpolate FIM elevation at USGS observations + # usgs_rc['FIM'] = np.interp(usgs_rc.discharge_cfs.values, fim_rc['discharge_cfs'], fim_rc['elevation_ft'], left = np.nan, right = np.nan) + # usgs_rc = usgs_rc[usgs_rc['FIM'].notna()] + # usgs_rc = usgs_rc.drop(columns=["source"]) + # + # # Melt dataframe + # usgs_rc = pd.melt(usgs_rc, id_vars=['location_id','discharge_cfs','str_order'], value_vars=['USGS','FIM'], var_name="source", value_name='elevation_ft') + # + # if not usgs_rc.empty: + # usgs_recurr_data = usgs_recurr_data.append(usgs_rc) + + # Generate stats for all sites in huc + # if not usgs_recurr_data.empty: + # usgs_recurr_stats_table = calculate_rc_stats_elev(usgs_recurr_data) + # usgs_recurr_stats_table.to_csv(usgs_recurr_stats_filename,index=False) + + # # Generate plots (not currently being used) + # fim_elev_at_USGS_rc_plot_filename = join(dirname(rc_comparison_plot_filename),'FIM_elevations_at_USGS_rc_' + str(huc) +'.png') + # generate_facet_plot(usgs_recurr_data, fim_elev_at_USGS_rc_plot_filename) + + if not nwm_recurr_data_table.empty: + nwm_recurr_data_table.discharge_cfs = np.round(nwm_recurr_data_table.discharge_cfs,2) + nwm_recurr_data_table.elevation_ft = np.round(nwm_recurr_data_table.elevation_ft,2) + nwm_recurr_data_table.to_csv(nwm_recurr_data_filename,index=False) + + else: + print(f"no USGS data for gage(s): {relevant_gages} in huc {huc}") + +def aggregate_metrics(output_dir,procs_list,stat_groups): + + # agg_usgs_interp_elev_stats = join(output_dir,'agg_usgs_interp_elev_stats.csv') + agg_nwm_recurr_flow_elev = join(output_dir,'agg_nwm_recurr_flow_elevations.csv') + agg_nwm_recurr_flow_elev_stats = join(output_dir,f"agg_nwm_recurr_flow_elev_stats_{'_'.join(stat_groups)}.csv") + + # if os.path.isfile(agg_usgs_interp_elev_stats): + # os.remove(agg_usgs_interp_elev_stats) + if os.path.isfile(agg_nwm_recurr_flow_elev): + os.remove(agg_nwm_recurr_flow_elev) + if os.path.isfile(agg_nwm_recurr_flow_elev_stats): + os.remove(agg_nwm_recurr_flow_elev_stats) + + for huc in procs_list: + # if os.path.isfile(huc[3]): + # usgs_recurr_stats = pd.read_csv(huc[3]) + # + # # Write/append usgs_recurr_stats + # if os.path.isfile(agg_usgs_interp_elev_stats): + # usgs_recurr_stats.to_csv(agg_usgs_interp_elev_stats,index=False, mode='a',header=False) + # else: + # usgs_recurr_stats.to_csv(agg_usgs_interp_elev_stats,index=False) + + if os.path.isfile(huc[4]): + nwm_recurr_data = pd.read_csv(huc[4],dtype={'location_id': str, + 'feature_id': str}) + + # Write/append nwm_recurr_data + if os.path.isfile(agg_nwm_recurr_flow_elev): + nwm_recurr_data.to_csv(agg_nwm_recurr_flow_elev,index=False, mode='a',header=False) + else: + nwm_recurr_data.to_csv(agg_nwm_recurr_flow_elev,index=False) + + agg_stats = pd.read_csv(agg_nwm_recurr_flow_elev,dtype={'location_id': str, + 'feature_id': str}) + + agg_recurr_stats_table = calculate_rc_stats_elev(agg_stats,stat_groups) + + agg_recurr_stats_table.to_csv(agg_nwm_recurr_flow_elev_stats,index=False) + + +def generate_facet_plot(rc, plot_filename): + + # Filter FIM elevation based on USGS data + for gage in rc.location_id.unique(): + + min_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation_ft.min() + max_elev = rc.loc[(rc.location_id==gage) & (rc.source=='USGS')].elevation_ft.max() + + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (rc.elevation_ft > (max_elev + 2))].index) + rc = rc.drop(rc[(rc.location_id==gage) & (rc.source=='FIM') & (rc.elevation_ft < min_elev - 2)].index) + + rc = rc.rename(columns={"location_id": "USGS Gage"}) + + ## Generate rating curve plots + num_plots = len(rc["USGS Gage"].unique()) + if num_plots > 3: + columns = num_plots // 3 + else: + columns = 1 + + sns.set(style="ticks") + g = sns.FacetGrid(rc, col="USGS Gage", hue="source",sharex=False, sharey=False,col_wrap=columns) + g.map(sns.scatterplot, "discharge_cfs", "elevation_ft", palette="tab20c", marker="o") + g.set_axis_labels(x_var="Discharge (cfs)", y_var="Elevation (ft)") + + # Adjust the arrangement of the plots + g.fig.tight_layout(w_pad=1) + g.add_legend() + + plt.savefig(plot_filename) + plt.close() + + +def get_reccur_intervals(site_rc, usgs_crosswalk,nwm_recurr_intervals): + + usgs_site = site_rc.merge(usgs_crosswalk, on="location_id") + nwm_ids = len(usgs_site.feature_id.drop_duplicates()) + + if nwm_ids > 0: + + nwm_recurr_intervals = nwm_recurr_intervals.copy().loc[nwm_recurr_intervals.feature_id==usgs_site.feature_id.drop_duplicates().item()] + nwm_recurr_intervals['pred_elev'] = np.interp(nwm_recurr_intervals.discharge_cfs.values, usgs_site['discharge_cfs'], usgs_site['elevation_ft'], left = np.nan, right = np.nan) + + return nwm_recurr_intervals + + else: + return [] + + +def calculate_rc_stats_elev(rc,stat_groups=None): + + usgs_elev = "USGS" + src_elev = "FIM" + + # Collect any extra columns not associated with melt + col_index = list(rc.columns) + pivot_vars = ['source','elevation_ft'] + col_index = [col for col in col_index if col not in pivot_vars] + + # Unmelt elevation/source + rc_unmelt = (rc.set_index(col_index) + .pivot(columns="source")['elevation_ft'] + .reset_index() + .rename_axis(None, axis=1) + ) + + if stat_groups is None: + stat_groups = ['location_id'] + + # Calculate variables for NRMSE + rc_unmelt["yhat_minus_y"] = rc_unmelt[src_elev] - rc_unmelt[usgs_elev] + rc_unmelt["yhat_minus_y_squared"] = rc_unmelt["yhat_minus_y"] ** 2 + + # Calculate metrics by group + station_rc = rc_unmelt.groupby(stat_groups) + + # Calculate variables for NRMSE + sum_y_diff = station_rc.apply(lambda x: x["yhat_minus_y_squared"].sum())\ + .reset_index(stat_groups, drop = False).rename({0: "sum_y_diff"}, axis=1) + + # Determine number of events that are modeled + n = station_rc.apply(lambda x: x[usgs_elev].count())\ + .reset_index(stat_groups, drop = False).rename({0: "n"}, axis=1) + + # Determine the maximum/minimum USGS elevation + y_max = station_rc.apply(lambda x: x[usgs_elev].max())\ + .reset_index(stat_groups, drop = False).rename({0: "y_max"}, axis=1) + y_min = station_rc.apply(lambda x: x[usgs_elev].min())\ + .reset_index(stat_groups, drop = False).rename({0: "y_min"}, axis=1) + + # Collect variables for NRMSE + nrmse_table = reduce(lambda x,y: pd.merge(x,y, on=stat_groups, how='outer'), [sum_y_diff, n, y_max, y_min]) + nrmse_table_group = nrmse_table.groupby(stat_groups) + + # Calculate nrmse + nrmse = nrmse_table_group.apply(lambda x: ((x['sum_y_diff'] / x['n']) ** 0.5) / (x['y_max'] - x['y_min']))\ + .reset_index(stat_groups, drop = False).rename({0: "nrmse"}, axis=1) + + # Calculate Mean Absolute Depth Difference + mean_abs_y_diff = station_rc.apply(lambda x: (abs(x["yhat_minus_y"]).mean()))\ + .reset_index(stat_groups, drop = False).rename({0: "mean_abs_y_diff_ft"}, axis=1) + + # Calculate Percent Bias + percent_bias = station_rc.apply(lambda x: 100 * (x["yhat_minus_y"].sum() / x[usgs_elev].sum()))\ + .reset_index(stat_groups, drop = False).rename({0: "percent_bias"}, axis=1) + + rc_stat_table = reduce(lambda x,y: pd.merge(x,y, on=stat_groups, how='outer'), [nrmse, mean_abs_y_diff, percent_bias]) + + return rc_stat_table + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='generate rating curve plots and tables for FIM and USGS gages') + parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True) + parser.add_argument('-output_dir','--output-dir', help='rating curves output folder', required=True) + parser.add_argument('-gages','--usgs-gages-filename',help='USGS rating curves',required=True) + parser.add_argument('-flows','--nwm-flow-dir',help='NWM recurrence flows dir',required=True) + parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) + parser.add_argument('-group','--stat-groups',help='column(s) to group stats',required=False) + + args = vars(parser.parse_args()) + + fim_dir = args['fim_dir'] + output_dir = args['output_dir'] + usgs_gages_filename = args['usgs_gages_filename'] + nwm_flow_dir = args['nwm_flow_dir'] + number_of_jobs = args['number_of_jobs'] + stat_groups = args['stat_groups'] + + stat_groups = stat_groups.split() + procs_list = [] + + plots_dir = join(output_dir,'plots') + os.makedirs(plots_dir, exist_ok=True) + tables_dir = join(output_dir,'tables') + os.makedirs(tables_dir, exist_ok=True) + + # Open log file + sys.__stdout__ = sys.stdout + log_file = open(join(output_dir,'rating_curve_comparison.log'),"w") + sys.stdout = log_file + + huc_list = os.listdir(fim_dir) + for huc in huc_list: + + if huc != 'logs': + elev_table_filename = join(fim_dir,huc,'usgs_elev_table.csv') + hydrotable_filename = join(fim_dir,huc,'hydroTable.csv') + usgs_recurr_stats_filename = join(tables_dir,f"usgs_interpolated_elevation_stats_{huc}.csv") + nwm_recurr_data_filename = join(tables_dir,f"nwm_recurrence_flow_elevations_{huc}.csv") + rc_comparison_plot_filename = join(plots_dir,f"FIM-USGS_rating_curve_comparison_{huc}.png") + + if isfile(elev_table_filename): + procs_list.append([elev_table_filename, hydrotable_filename, usgs_gages_filename, usgs_recurr_stats_filename, nwm_recurr_data_filename, rc_comparison_plot_filename,nwm_flow_dir,huc]) + + # Initiate multiprocessing + print(f"Generating rating curve metrics for {len(procs_list)} hucs using {number_of_jobs} jobs") + pool = Pool(number_of_jobs) + pool.map(generate_rating_curve_metrics, procs_list) + + print(f"Aggregating rating curve metrics for {len(procs_list)} hucs") + aggregate_metrics(output_dir,procs_list,stat_groups) + + print('Delete intermediate tables') + shutil.rmtree(tables_dir, ignore_errors=True) + + # Close log file + sys.stdout = sys.__stdout__ + log_file.close() From 40631f42e5f9ac97204c9a54d4bd9cb7204efffa Mon Sep 17 00:00:00 2001 From: TrevorGrout-NOAA <69653333+TrevorGrout-NOAA@users.noreply.github.com> Date: Mon, 5 Apr 2021 13:45:09 -0500 Subject: [PATCH 062/359] Create tool to retrieve rating curves from USGS sites and convert to elevation (NAVD88). Intended to be used as part of the Sierra Test. - Modify usgs_gage_crosswalk.py to: - Look for location_id instead of site_no attribute field in usgs_gages.gpkg file. - Filter out gages that do not have rating curves included in the usgs_rating_curves.csv - Modify rating_curve_comparison.py to perform a check on the age of the user specified usgs_rating_curves.csv and alert user to the age of the file and recommend updating if file is older the 30 days. - Add rating_curve_get_usgs_curves.py. This script will generate the following files: - usgs_rating_curves.csv: A csv file that contains rating curves (including converted to NAVD88 elevation) for USGS gages in a format that is compatible with rating_curve_comparisons.py. As it is is currently configured, only gages within CONUS will have rating curve data. - log.csv: A log file that records status for each gage and includes error messages. - usgs_gages.gpkg: A geospatial layer (in FIM projection) of all active USGS gages that meet a predefined criteria. Additionally, the curve attribute indicates whether a rating curve is found in the usgs_rating_curves.csv. This spatial file is only generated if the all option is passed with the -l argument. This resolves #289. --- CHANGELOG.md | 29 ++- src/usgs_gage_crosswalk.py | 19 +- tools/rating_curve_comparison.py | 25 +++ tools/rating_curve_get_usgs_curves.py | 288 ++++++++++++++++++++++++++ tools/tools_shared_functions.py | 210 +++++++++++++++++++ 5 files changed, 556 insertions(+), 15 deletions(-) create mode 100644 tools/rating_curve_get_usgs_curves.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 8845241ae..ba1d25901 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,17 +1,32 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.14.0 - 2021-04-05 - [PR #338](https://github.com/NOAA-OWP/cahaba/pull/338) -## v3.0.12.2 - 2021-04-01 - [PR #332](https://github.com/NOAA-OWP/cahaba/pull/332) +Create tool to retrieve rating curves from USGS sites and convert to elevation (NAVD88). Intended to be used as part of the Sierra Test. + +### Changes + - Modify `usgs_gage_crosswalk.py` to: + 1) Look for `location_id` instead of `site_no` attribute field in `usgs_gages.gpkg` file. + 2) Filter out gages that do not have rating curves included in the `usgs_rating_curves.csv`. + - Modify `rating_curve_comparison.py` to perform a check on the age of the user specified `usgs_rating_curves.csv` and alert user to the age of the file and recommend updating if file is older the 30 days. + +### Additions + - Add `rating_curve_get_usgs_curves.py`. This script will generate the following files: + 1) `usgs_rating_curves.csv`: A csv file that contains rating curves (including converted to NAVD88 elevation) for USGS gages in a format that is compatible with `rating_curve_comparisons.py`. As it is is currently configured, only gages within CONUS will have rating curve data. + 2) `log.csv`: A log file that records status for each gage and includes error messages. + 3) `usgs_gages.gpkg`: A geospatial layer (in FIM projection) of all active USGS gages that meet a predefined criteria. Additionally, the `curve` attribute indicates whether a rating curve is found in the `usgs_rating_curves.csv`. This spatial file is only generated if the `all` option is passed with the `-l` argument. +

+## v3.0.13.0 - 2021-04-01 - [PR #332](https://github.com/NOAA-OWP/cahaba/pull/332) -Created tool to compare synthetic rating curve with benchmark rating curve (sierra test). resolves issue #293; resolves issue #308 +Created tool to compare synthetic rating curve with benchmark rating curve (Sierra Test). ### Changes - - update `aggregate_fim_outputs.py` call argument in `fim_run.sh` from 4 jobs to 6 jobs (optimizing API performance) - - reroutes median elevation data from `add_crosswalk.py` and `rem.py` to new file (depreciating `hand_ref_elev_table.csv`) - - adding new files to `viz_whitelist` in `output_cleanup.py` + - Update `aggregate_fim_outputs.py` call argument in `fim_run.sh` from 4 jobs to 6 jobs, to optimize API performance. + - Reroutes median elevation data from `add_crosswalk.py` and `rem.py` to new file (depreciating `hand_ref_elev_table.csv`). + - Adds new files to `viz_whitelist` in `output_cleanup.py`. ### Additions - - `usgs_gage_crosswalk.py`: generates `usgs_elev_table.csv` in run_by_unit.py with elevation and additional attributes at USGS gages. + - `usgs_gage_crosswalk.py`: generates `usgs_elev_table.csv` in `run_by_unit.py` with elevation and additional attributes at USGS gages. - `rating_curve_comparison.py`: post-processing script to plot and calculate metrics between synthetic rating curves and USGS rating curve data.

@@ -36,7 +51,7 @@ Add more detail/information to plotting capabilities. ### Changes - Merge `plot_functions.py` into `eval_plots.py` and move `eval_plots.py` into the tools directory. - Remove `plots` subdirectory. - - + ### Additions - Optional argument to create barplots of CSI for each individual site. - Create a csv containing the data used to create the scatterplots. diff --git a/src/usgs_gage_crosswalk.py b/src/usgs_gage_crosswalk.py index 29ef7b592..fb4b9533d 100755 --- a/src/usgs_gage_crosswalk.py +++ b/src/usgs_gage_crosswalk.py @@ -41,15 +41,18 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in input_catchment = gpd.read_file(input_catchment_filename) dem_adj = rasterio.open(dem_adj_filename,'r') + #Query out usgs_gages that don't have rating curve data + usgs_gages = usgs_gages.query('curve == "yes"') + if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) # Identify closest HydroID closest_catchment = gpd.sjoin(usgs_gages, input_catchment, how='left', op='within').reset_index(drop=True) - closest_hydro_id = closest_catchment.filter(items=['site_no','HydroID','min_thal_elev','med_thal_elev','max_thal_elev', 'order_']) + closest_hydro_id = closest_catchment.filter(items=['location_id','HydroID','min_thal_elev','med_thal_elev','max_thal_elev', 'order_']) closest_hydro_id = closest_hydro_id.dropna() # Get USGS gages that are within catchment boundaries - usgs_gages = usgs_gages.loc[usgs_gages.site_no.isin(list(closest_hydro_id.site_no))] + usgs_gages = usgs_gages.loc[usgs_gages.location_id.isin(list(closest_hydro_id.location_id))] columns = ['location_id','HydroID','dem_elevation','dem_adj_elevation','min_thal_elev', 'med_thal_elev','max_thal_elev','str_order'] gage_data = [] @@ -58,11 +61,11 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in for index, gage in usgs_gages.iterrows(): # Get stream attributes - hydro_id = closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].HydroID.item() - str_order = str(int(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].order_.item())) - min_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].min_thal_elev.item(),2) - med_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].med_thal_elev.item(),2) - max_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.site_no==gage.site_no].max_thal_elev.item(),2) + hydro_id = closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].HydroID.item() + str_order = str(int(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].order_.item())) + min_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].min_thal_elev.item(),2) + med_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].med_thal_elev.item(),2) + max_thal_elev = round(closest_hydro_id.loc[closest_hydro_id.location_id==gage.location_id].max_thal_elev.item(),2) # Convert headwater point geometries to WKB representation wkb_gages = dumps(gage.geometry) @@ -90,7 +93,7 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in dem_adj_elev = round(list(rasterio.sample.sample_gen(dem_adj,shply_referenced_gage.coords))[0].item(),2) # Append dem_m_elev, dem_adj_elev, hydro_id, and gage number to table - site_elevations = [str(gage.site_no), str(hydro_id), dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev,str(str_order)] + site_elevations = [str(gage.location_id), str(hydro_id), dem_m_elev, dem_adj_elev, min_thal_elev, med_thal_elev, max_thal_elev,str(str_order)] gage_data.append(site_elevations) diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py index d8df892fb..77e6b91ba 100755 --- a/tools/rating_curve_comparison.py +++ b/tools/rating_curve_comparison.py @@ -13,6 +13,8 @@ from os.path import isfile, join, dirname import shutil import warnings +from pathlib import Path +import time warnings.simplefilter(action='ignore', category=FutureWarning) """ @@ -33,6 +35,26 @@ stat_groups : str string of columns to group eval metrics. """ +def check_file_age(file): + ''' + Checks if file exists, determines the file age, and recommends + updating if older than 1 month. + + Returns + ------- + None. + + ''' + file = Path(file) + if file.is_file(): + modification_time = file.stat().st_mtime + current_time = time.time() + file_age_days = (current_time - modification_time)/86400 + if file_age_days > 30: + check = f'{file.name} is {int(file_age_days)} days old, consider updating.\nUpdate with rating_curve_get_usgs_curves.py' + else: + check = f'{file.name} is {int(file_age_days)} days old.' + return check # recurr_intervals = ['recurr_1_5_cms.csv','recurr_5_0_cms.csv','recurr_10_0_cms.csv'] @@ -374,6 +396,9 @@ def calculate_rc_stats_elev(rc,stat_groups=None): tables_dir = join(output_dir,'tables') os.makedirs(tables_dir, exist_ok=True) + #Check age of gages csv and recommend updating if older than 30 days. + print(check_file_age(usgs_gages_filename)) + # Open log file sys.__stdout__ = sys.stdout log_file = open(join(output_dir,'rating_curve_comparison.log'),"w") diff --git a/tools/rating_curve_get_usgs_curves.py b/tools/rating_curve_get_usgs_curves.py new file mode 100644 index 000000000..eb43bab3e --- /dev/null +++ b/tools/rating_curve_get_usgs_curves.py @@ -0,0 +1,288 @@ +#!/usr/bin/env python3 +import time +import pandas as pd +import geopandas as gpd +from pathlib import Path +from tools_shared_functions import get_metadata, get_datum, ngvd_to_navd_ft, get_rating_curve, aggregate_wbd_hucs +from dotenv import load_dotenv +import os +import argparse +import sys +sys.path.append('/foss_fim/src') +from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION + +''' +This script calls the NOAA Tidal API for datum conversions. Experience shows that +running script outside of business hours seems to be most consistent way +to avoid API errors. Currently configured to get rating curve data within +CONUS. Tidal API call may need to be modified to get datum conversions for +AK, HI, PR/VI. +''' + +#import variables from .env file +load_dotenv() +API_BASE_URL = os.getenv("API_BASE_URL") +WBD_LAYER = os.getenv("WBD_LAYER") + +def get_all_active_usgs_sites(): + ''' + Compile a list of all active usgs gage sites that meet certain criteria. + Return a GeoDataFrame of all sites. + + Returns + ------- + None. + + ''' + #Get metadata for all usgs_site_codes that are active in the U.S. + metadata_url = f'{API_BASE_URL}/metadata' + #Define arguments to retrieve metadata and then get metadata from WRDS + select_by = 'usgs_site_code' + selector = ['all'] + must_include = 'usgs_data.active' + metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = None ) + + #Filter out sites based quality of site. These acceptable codes were initially + #decided upon and may need fine tuning. A link where more information + #regarding the USGS attributes is provided. + + #https://help.waterdata.usgs.gov/code/coord_acy_cd_query?fmt=html + acceptable_coord_acc_code = ['H','1','5','S','R','B','C','D','E'] + #https://help.waterdata.usgs.gov/code/coord_meth_cd_query?fmt=html + acceptable_coord_method_code = ['C','D','W','X','Y','Z','N','M','L','G','R','F','S'] + #https://help.waterdata.usgs.gov/codes-and-parameters/codes#SI + acceptable_alt_acc_thresh = 1 + #https://help.waterdata.usgs.gov/code/alt_meth_cd_query?fmt=html + acceptable_alt_meth_code = ['A','D','F','I','J','L','N','R','W','X','Y','Z'] + #https://help.waterdata.usgs.gov/code/site_tp_query?fmt=html + acceptable_site_type = ['ST'] + + #Cycle through each site and filter out if site doesn't meet criteria. + acceptable_sites_metadata = [] + for metadata in metadata_list: + #Get the usgs info from each site + usgs_data = metadata['usgs_data'] + + #Get site quality attributes + coord_accuracy_code = usgs_data.get('coord_accuracy_code') + coord_method_code = usgs_data.get('coord_method_code') + alt_accuracy_code = usgs_data.get('alt_accuracy_code') + alt_method_code = usgs_data.get('alt_method_code') + site_type = usgs_data.get('site_type') + + #Check to make sure that none of the codes were null, if null values are found, skip to next. + if not all([coord_accuracy_code, coord_method_code, alt_accuracy_code, alt_method_code, site_type]): + continue + + #Test if site meets criteria. + if (coord_accuracy_code in acceptable_coord_acc_code and + coord_method_code in acceptable_coord_method_code and + alt_accuracy_code <= acceptable_alt_acc_thresh and + alt_method_code in acceptable_alt_meth_code and + site_type in acceptable_site_type): + + #If nws_lid is not populated then add a dummy ID so that 'aggregate_wbd_hucs' works correctly. + if not metadata.get('identifiers').get('nws_lid'): + metadata['identifiers']['nws_lid'] = 'Bogus_ID' + + #Append metadata of acceptable site to acceptable_sites list. + acceptable_sites_metadata.append(metadata) + + #Get a geospatial layer (gdf) for all acceptable sites + dictionary, gdf = aggregate_wbd_hucs(acceptable_sites_metadata, Path(WBD_LAYER), retain_attributes = False) + #Get a list of all sites in gdf + list_of_sites = gdf['identifiers_usgs_site_code'].to_list() + #Rename gdf fields + gdf.columns = gdf.columns.str.replace('identifiers_','') + + return gdf, list_of_sites, acceptable_sites_metadata + + +def usgs_rating_to_elev(list_of_gage_sites, workspace=False, sleep_time = 1.0): + ''' + + Returns rating curves, for a set of sites, adjusted to elevation NAVD. + Currently configured to get rating curve data within CONUS. Tidal API + call may need to be modified to get datum conversions for AK, HI, PR/VI. + Workflow as follows: + 1a. If 'all' option passed, get metadata for all acceptable USGS sites in CONUS. + 1b. If a list of sites passed, get metadata for all sites supplied by user. + 2. Extract datum information for each site. + 3. If site is not in contiguous US skip (due to issue with datum conversions) + 4. Convert datum if NGVD + 5. Get rating curve for each site individually + 6. Convert rating curve to absolute elevation (NAVD) and store in DataFrame + 7. Append all rating curves to a master DataFrame. + + + Outputs, if a workspace is specified, are: + usgs_rating_curves.csv -- A csv containing USGS rating curve as well + as datum adjustment and rating curve expressed as an elevation (NAVD88). + ONLY SITES IN CONUS ARE CURRENTLY LISTED IN THIS CSV. To get + additional sites, the Tidal API will need to be reconfigured and tested. + + log.csv -- A csv containing runtime messages. + + (if all option passed) usgs_gages.gpkg -- a point layer containing ALL USGS gage sites that meet + certain criteria. In the attribute table is a 'curve' column that will indicate if a rating + curve is provided in "usgs_rating_curves.csv" + + Parameters + ---------- + list_of_gage_sites : LIST + List of all gage site IDs. If all acceptable sites in CONUS are desired + list_of_gage_sites can be passed 'all' and it will use the get_all_active_usgs_sites + function to filter out sites that meet certain requirements across CONUS. + + workspace : STR + Directory, if specified, where output csv is saved. OPTIONAL, Default is False. + + sleep_time: FLOAT + Amount of time to rest between API calls. The Tidal API appears to + error out more during business hours. Increasing sleep_time may help. + + + Returns + ------- + all_rating_curves : Pandas DataFrame + DataFrame containing USGS rating curves adjusted to elevation for + all input sites. Additional metadata also contained in DataFrame + + ''' + #Define URLs for metadata and rating curve + metadata_url = f'{API_BASE_URL}/metadata' + rating_curve_url = f'{API_BASE_URL}/rating_curve' + + #If 'all' option passed to list of gages sites, it retrieves all acceptable sites within CONUS. + print('getting metadata for all sites') + if list_of_gage_sites == ['all']: + acceptable_sites_gdf, acceptable_sites_list, metadata_list = get_all_active_usgs_sites() + #Otherwise, if a list of sites is passed, retrieve sites from WRDS. + else: + #Define arguments to retrieve metadata and then get metadata from WRDS + select_by = 'usgs_site_code' + selector = list_of_gage_sites + #Since there is a limit to number characters in url, split up selector if too many sites. + max_sites = 150 + if len(selector)>max_sites: + chunks = [selector[i:i+max_sites] for i in range(0,len(selector),max_sites)] + #Get metadata for each chunk + metadata_list = [] + metadata_df = pd.DataFrame() + for chunk in chunks: + chunk_list, chunk_df = get_metadata(metadata_url, select_by, chunk, must_include = None, upstream_trace_distance = None, downstream_trace_distance = None ) + #Append chunk data to metadata_list/df + metadata_list.extend(chunk_list) + metadata_df = metadata_df.append(chunk_df) + else: + #If selector has less than max sites, then get metadata. + metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector, must_include = None, upstream_trace_distance = None, downstream_trace_distance = None ) + + #Create DataFrame to store all appended rating curves + print('processing metadata') + all_rating_curves = pd.DataFrame() + regular_messages = [] + api_failure_messages=[] + #For each site in metadata_list + for metadata in metadata_list: + + #Get datum information for site (only need usgs_data) + nws, usgs = get_datum(metadata) + + #Filter out sites that are not in contiguous US. If this section is removed be sure to test with datum adjustment section (region will need changed) + if usgs['state'] in ['Alaska', 'Puerto Rico', 'Virgin Islands', 'Hawaii']: + continue + + #Get rating curve for site + location_ids = usgs['usgs_site_code'] + curve = get_rating_curve(rating_curve_url, location_ids = [location_ids]) + #If no rating curve was returned, skip site. + if curve.empty: + message = f'{location_ids}: has no rating curve' + regular_messages.append(message) + continue + + #Adjust datum to NAVD88 if needed. If datum unknown, skip site. + if usgs['vcs'] == 'NGVD29': + #To prevent time-out errors + time.sleep(sleep_time) + #Get the datum adjustment to convert NGVD to NAVD. Region needs changed if not in CONUS. + datum_adj_ft = ngvd_to_navd_ft(datum_info = usgs, region = 'contiguous') + + #If datum API failed, print message and skip site. + if datum_adj_ft is None: + api_message = f"{location_ids}: datum adjustment failed!!" + api_failure_messages.append(api_message) + print(api_message) + continue + + #If datum adjustment succeeded, calculate datum in NAVD88 + navd88_datum = round(usgs['datum'] + datum_adj_ft, 2) + message = f'{location_ids}:succesfully converted NGVD29 to NAVD88' + regular_messages.append(message) + + elif usgs['vcs'] == 'NAVD88': + navd88_datum = usgs['datum'] + message = f'{location_ids}: already NAVD88' + regular_messages.append(message) + + else: + message = f"{location_ids}: datum unknown" + regular_messages.append(message) + continue + + #Populate rating curve with metadata and use navd88 datum to convert stage to elevation. + curve['active'] = usgs['active'] + curve['datum'] = usgs['datum'] + curve['datum_vcs'] = usgs['vcs'] + curve['navd88_datum'] = navd88_datum + curve['elevation_navd88'] = curve['stage'] + navd88_datum + #Append all rating curves to a dataframe + all_rating_curves = all_rating_curves.append(curve) + + #Rename columns and add attribute indicating if rating curve exists + acceptable_sites_gdf.rename(columns = {'nwm_feature_id':'feature_id','usgs_site_code':'location_id'}, inplace = True) + sites_with_data = pd.DataFrame({'location_id':all_rating_curves['location_id'].unique(),'curve':'yes'}) + acceptable_sites_gdf = acceptable_sites_gdf.merge(sites_with_data, on = 'location_id', how = 'left') + acceptable_sites_gdf.fillna({'curve':'no'},inplace = True) + + #If workspace is specified, write data to file. + if workspace: + #Write rating curve dataframe to file + Path(workspace).mkdir(parents = True, exist_ok = True) + all_rating_curves.to_csv(Path(workspace) / 'usgs_rating_curves.csv', index = False) + #Save out messages to file. + first_line = [f'THERE WERE {len(api_failure_messages)} SITES THAT EXPERIENCED DATUM CONVERSION ISSUES'] + api_failure_messages = first_line + api_failure_messages + regular_messages = api_failure_messages + regular_messages + all_messages = pd.DataFrame({'Messages':regular_messages}) + all_messages.to_csv(Path(workspace) / 'log.csv', index = False) + #If 'all' option specified, reproject then write out shapefile of acceptable sites. + if list_of_gage_sites == ['all']: + acceptable_sites_gdf = acceptable_sites_gdf.to_crs(PREP_PROJECTION) + acceptable_sites_gdf.to_file(Path(workspace) / 'usgs_gages.gpkg', layer = 'usgs_gages', driver = 'GPKG') + + return all_rating_curves + +if __name__ == '__main__': + #Parse arguments + parser = argparse.ArgumentParser(description = 'Retrieve USGS rating curves adjusted to elevation (NAVD88).\nCurrently configured to get rating curves within CONUS.\nRecommend running outside of business hours to reduce API related errors.\nIf error occurs try increasing sleep time (from default of 1).') + parser.add_argument('-l', '--list_of_gage_sites', help = '"all" for all active usgs sites, specify individual sites separated by space, or provide a csv of sites (one per line).', nargs = '+', required = True) + parser.add_argument('-w', '--workspace', help = 'Directory where all outputs will be stored.', default = False, required = False) + parser.add_argument('-t', '--sleep_timer', help = 'How long to rest between datum API calls', default = 1.0, required = False) + + #Extract to dictionary and assign to variables. + args = vars(parser.parse_args()) + + #Check if csv is supplied + if args['list_of_gage_sites'][0].endswith('.csv'): + #Convert csv list to python list + with open(args['list_of_gage_sites']) as f: + sites = f.read().splitlines() + args['list_of_gage_sites'] = sites + + l = args['list_of_gage_sites'] + w = args['workspace'] + t = float(args['sleep_timer']) + #Run create_flow_forecast_file + usgs_rating_to_elev(list_of_gage_sites = l, workspace=w, sleep_time = t) \ No newline at end of file diff --git a/tools/tools_shared_functions.py b/tools/tools_shared_functions.py index 13534f87b..8ea3d5d59 100755 --- a/tools/tools_shared_functions.py +++ b/tools/tools_shared_functions.py @@ -7,6 +7,8 @@ import pandas as pd import geopandas as gpd import requests +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry from tools_shared_variables import (TEST_CASES_DIR, PRINTWORTHY_STATS, GO_UP_STATS, GO_DOWN_STATS, ENDC, TGREEN_BOLD, TGREEN, TRED_BOLD, TWHITE, WHITE_BOLD, CYAN_BOLD) @@ -1045,3 +1047,211 @@ def flow_data(segments, flows, convert_to_cms = True): flow_data = pd.DataFrame({'feature_id':segments, 'discharge':flows_cms}) flow_data = flow_data.astype({'feature_id' : int , 'discharge' : float}) return flow_data +####################################################################### +#Function to get datum information +####################################################################### +def get_datum(metadata): + ''' + Given a record from the metadata endpoint, retrieve important information + related to the datum and site from both NWS and USGS sources. This information + is saved to a dictionary with common keys. USGS has more data available so + it has more keys. + + Parameters + ---------- + metadata : DICT + Single record from the get_metadata function. Must iterate through + the get_metadata output list. + + Returns + ------- + nws_datums : DICT + Dictionary of NWS data. + usgs_datums : DICT + Dictionary of USGS Data. + + ''' + #Get site and datum information from nws sub-dictionary. Use consistent naming between USGS and NWS sources. + nws_datums = {} + nws_datums['nws_lid'] = metadata['identifiers']['nws_lid'] + nws_datums['usgs_site_code'] = metadata['identifiers']['usgs_site_code'] + nws_datums['state'] = metadata['nws_data']['state'] + nws_datums['datum'] = metadata['nws_data']['zero_datum'] + nws_datums['vcs'] = metadata['nws_data']['vertical_datum_name'] + nws_datums['lat'] = metadata['nws_data']['latitude'] + nws_datums['lon'] = metadata['nws_data']['longitude'] + nws_datums['crs'] = metadata['nws_data']['horizontal_datum_name'] + nws_datums['source'] = 'nws_data' + + #Get site and datum information from usgs_data sub-dictionary. Use consistent naming between USGS and NWS sources. + usgs_datums = {} + usgs_datums['nws_lid'] = metadata['identifiers']['nws_lid'] + usgs_datums['usgs_site_code'] = metadata['identifiers']['usgs_site_code'] + usgs_datums['active'] = metadata['usgs_data']['active'] + usgs_datums['state'] = metadata['usgs_data']['state'] + usgs_datums['datum'] = metadata['usgs_data']['altitude'] + usgs_datums['vcs'] = metadata['usgs_data']['alt_datum_code'] + usgs_datums['datum_acy'] = metadata['usgs_data']['alt_accuracy_code'] + usgs_datums['datum_meth'] = metadata['usgs_data']['alt_method_code'] + usgs_datums['lat'] = metadata['usgs_data']['latitude'] + usgs_datums['lon'] = metadata['usgs_data']['longitude'] + usgs_datums['crs'] = metadata['usgs_data']['latlon_datum_name'] + usgs_datums['source'] = 'usgs_data' + + return nws_datums, usgs_datums +######################################################################## +#Function to convert horizontal datums +######################################################################## +def convert_latlon_datum(lat,lon,src_crs,dest_crs): + ''' + Converts latitude and longitude datum from a source CRS to a dest CRS + using geopandas and returns the projected latitude and longitude coordinates. + + Parameters + ---------- + lat : FLOAT + Input Latitude. + lon : FLOAT + Input Longitude. + src_crs : STR + CRS associated with input lat/lon. Geopandas must recognize code. + dest_crs : STR + Target CRS that lat/lon will be projected to. Geopandas must recognize code. + + Returns + ------- + new_lat : FLOAT + Reprojected latitude coordinate in dest_crs. + new_lon : FLOAT + Reprojected longitude coordinate in dest_crs. + + ''' + + #Create a temporary DataFrame containing the input lat/lon. + temp_df = pd.DataFrame({'lat':[lat],'lon':[lon]}) + #Convert dataframe to a GeoDataFrame using the lat/lon coords. Input CRS is assigned. + temp_gdf = gpd.GeoDataFrame(temp_df, geometry=gpd.points_from_xy(temp_df.lon, temp_df.lat), crs = src_crs) + #Reproject GeoDataFrame to destination CRS. + reproject = temp_gdf.to_crs(dest_crs) + #Get new Lat/Lon coordinates from the geometry data. + new_lat,new_lon = [reproject.geometry.y.item(), reproject.geometry.x.item()] + return new_lat, new_lon +####################################################################### +#Function to get conversion adjustment NGVD to NAVD in FEET +####################################################################### +def ngvd_to_navd_ft(datum_info, region = 'contiguous'): + ''' + Given the lat/lon, retrieve the adjustment from NGVD29 to NAVD88 in feet. + Uses NOAA tidal API to get conversion factor. Requires that lat/lon is + in NAD27 crs. If input lat/lon are not NAD27 then these coords are + reprojected to NAD27 and the reproject coords are used to get adjustment. + There appears to be an issue when region is not in contiguous US. + + Parameters + ---------- + lat : FLOAT + Latitude. + lon : FLOAT + Longitude. + + Returns + ------- + datum_adj_ft : FLOAT + Vertical adjustment in feet, from NGVD29 to NAVD88, and rounded to nearest hundredth. + + ''' + #If crs is not NAD 27, convert crs to NAD27 and get adjusted lat lon + if datum_info['crs'] != 'NAD27': + lat, lon = convert_latlon_datum(datum_info['lat'],datum_info['lon'],datum_info['crs'],'NAD27') + else: + #Otherwise assume lat/lon is in NAD27. + lat = datum_info['lat'] + lon = datum_info['lon'] + + #Define url for datum API + datum_url = 'https://vdatum.noaa.gov/vdatumweb/api/tidal' + + #Define parameters. Hard code most parameters to convert NGVD to NAVD. + params = {} + params['lat'] = lat + params['lon'] = lon + params['region'] = region + params['s_h_frame'] = 'NAD27' #Source CRS + params['s_v_frame'] = 'NGVD29' #Source vertical coord datum + params['s_vertical_unit'] = 'm' #Source vertical units + params['src_height'] = 0.0 #Source vertical height + params['t_v_frame'] = 'NAVD88' #Target vertical datum + params['tar_vertical_unit'] = 'm' #Target vertical height + + #Call the API + response = requests.get(datum_url, params = params) + #If succesful get the navd adjustment + if response: + results = response.json() + #Get adjustment in meters (NGVD29 to NAVD88) + adjustment = results['tar_height'] + #convert meters to feet + adjustment_ft = round(float(adjustment) * 3.28084,2) + else: + adjustment_ft = None + return adjustment_ft +####################################################################### +#Function to download rating curve from API +####################################################################### +def get_rating_curve(rating_curve_url, location_ids): + ''' + Given list of location_ids (nws_lids, usgs_site_codes, etc) get the + rating curve from WRDS API and export as a DataFrame. + + Parameters + ---------- + rating_curve_url : STR + URL to retrieve rating curve + location_ids : LIST + List of location ids. Can be nws_lid or usgs_site_codes. + + Returns + ------- + all_curves : pandas DataFrame + Rating curves from input list as well as other site information. + + ''' + #Define DataFrame to contain all returned curves. + all_curves = pd.DataFrame() + + #Define call to retrieve all rating curve information from WRDS. + joined_location_ids = '%2C'.join(location_ids) + url = f'{rating_curve_url}/{joined_location_ids}' + + #Call the API + response = requests.get(url) + + #If successful + if response.ok: + + #Write return to json and extract the rating curves + site_json = response.json() + rating_curves_list = site_json['rating_curves'] + + #For each curve returned + for curve in rating_curves_list: + #Check if a curve was populated (e.g wasn't blank) + if curve: + + #Write rating curve to pandas dataframe as well as site attributes + curve_df = pd.DataFrame(curve['rating_curve'],dtype=float) + + #Add other information such as site, site type, source, units, and timestamp. + curve_df['location_id'] = curve['metadata']['location_id'] + curve_df['location_type'] = curve['metadata']['id_type'] + curve_df['source'] = curve['metadata']['source'] + curve_df['flow_units'] = curve['metadata']['flow_unit'] + curve_df['stage_units'] = curve['metadata']['stage_unit'] + curve_df['wrds_timestamp'] = response.headers['Date'] + + #Append rating curve to DataFrame containing all curves + all_curves = all_curves.append(curve_df) + else: + continue + + return all_curves \ No newline at end of file From c9161b81bf124a16f714486cb36a0e8d44067cf0 Mon Sep 17 00:00:00 2001 From: RyanSpies-NOAA Date: Thu, 8 Apr 2021 11:11:56 -0500 Subject: [PATCH 063/359] Implementing a prototype technique to estimate the missing bathymetric component in the HAND-derived synthetic rating curves. Implementing a prototype technique to estimate the missing bathymetric component in the HAND-derived synthetic rating curves. The new Bathymetric Adjusted Rating Curve (BARC) function is built within the fim_run.sh workflow and will ingest bankfull geometry estimates provided by the user to modify the cross section area used in the synthetic rating curve generation. - add_crosswalk.py outputs the stream order variables to src_full_crosswalked.csv and calls the new bathy_rc_adjust.py if bathy env variable set to True and extent=MS. - run_by_unit.sh includes a new csv outputs for reviewing BARC calculations. - params_template.env & params_calibrated.env contain new BARC function input variables and on/off toggle variable. eval_plots.py now includes additional AHPS eval sites in the list of "bad_sites" (flagged issues with MS flowlines). - Adds bathy_rc_adjust.py - Imports the existing synthetic rating curve table and the bankfull geometry input data (topwidth and cross section area per COMID). - Performs new synthetic rating curve calculations with bathymetry estimation modifications. - Flags issues with the thalweg-notch artifact. This resolves #306, resolves #328, resolves #309, resolves #311, and resolves #330. --- CHANGELOG.md | 58 ++++++++----- config/params_calibrated.env | 8 ++ config/params_template.env | 8 ++ src/add_crosswalk.py | 28 +++++-- src/bathy_rc_adjust.py | 156 +++++++++++++++++++++++++++++++++++ src/output_cleanup.py | 5 ++ src/run_by_unit.sh | 5 +- tools/eval_plots.py | 82 +++++++++--------- 8 files changed, 280 insertions(+), 70 deletions(-) create mode 100755 src/bathy_rc_adjust.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ba1d25901..b5e9662f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,25 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.15.0 - 2021-04-08 - [PR #340](https://github.com/NOAA-OWP/cahaba/pull/340) + +Implementing a prototype technique to estimate the missing bathymetric component in the HAND-derived synthetic rating curves. The new Bathymetric Adjusted Rating Curve (BARC) function is built within the `fim_run.sh` workflow and will ingest bankfull geometry estimates provided by the user to modify the cross section area used in the synthetic rating curve generation. + +### Changes + - `add_crosswalk.py` outputs the stream order variables to `src_full_crosswalked.csv` and calls the new `bathy_rc_adjust.py` if bathy env variable set to True and `extent=MS`. + - `run_by_unit.sh` includes a new csv outputs for reviewing BARC calculations. + - `params_template.env` & `params_calibrated.env` contain new BARC function input variables and on/off toggle variable. + - `eval_plots.py` now includes additional AHPS eval sites in the list of "bad_sites" (flagged issues with MS flowlines). + +### Additions + - `bathy_rc_adjust.py`: + - Imports the existing synthetic rating curve table and the bankfull geometry input data (topwidth and cross section area per COMID). + - Performs new synthetic rating curve calculations with bathymetry estimation modifications. + - Flags issues with the thalweg-notch artifact. +

## v3.0.14.0 - 2021-04-05 - [PR #338](https://github.com/NOAA-OWP/cahaba/pull/338) Create tool to retrieve rating curves from USGS sites and convert to elevation (NAVD88). Intended to be used as part of the Sierra Test. - + ### Changes - Modify `usgs_gage_crosswalk.py` to: 1) Look for `location_id` instead of `site_no` attribute field in `usgs_gages.gpkg` file. @@ -19,23 +35,23 @@ Create tool to retrieve rating curves from USGS sites and convert to elevation ( ## v3.0.13.0 - 2021-04-01 - [PR #332](https://github.com/NOAA-OWP/cahaba/pull/332) Created tool to compare synthetic rating curve with benchmark rating curve (Sierra Test). - + ### Changes - Update `aggregate_fim_outputs.py` call argument in `fim_run.sh` from 4 jobs to 6 jobs, to optimize API performance. - Reroutes median elevation data from `add_crosswalk.py` and `rem.py` to new file (depreciating `hand_ref_elev_table.csv`). - Adds new files to `viz_whitelist` in `output_cleanup.py`. ### Additions - - `usgs_gage_crosswalk.py`: generates `usgs_elev_table.csv` in `run_by_unit.py` with elevation and additional attributes at USGS gages. - - `rating_curve_comparison.py`: post-processing script to plot and calculate metrics between synthetic rating curves and USGS rating curve data. + - `usgs_gage_crosswalk.py`: generates `usgs_elev_table.csv` in `run_by_unit.py` with elevation and additional attributes at USGS gages. + - `rating_curve_comparison.py`: post-processing script to plot and calculate metrics between synthetic rating curves and USGS rating curve data.

## v3.0.12.1 - 2021-03-31 - [PR #336](https://github.com/NOAA-OWP/cahaba/pull/336) Fix spatial option in `eval_plots.py` when creating plots and spatial outputs. - + ### Changes - - Removes file dependencies from spatial option. Does require the WBD layer which should be specified in `.env` file. + - Removes file dependencies from spatial option. Does require the WBD layer which should be specified in `.env` file. - Produces outputs in a format consistent with requirements needed for publishing. - Preserves leading zeros in huc information for all outputs from `eval_plots.py`. @@ -54,18 +70,18 @@ Add more detail/information to plotting capabilities. ### Additions - Optional argument to create barplots of CSI for each individual site. - - Create a csv containing the data used to create the scatterplots. + - Create a csv containing the data used to create the scatterplots.

## v3.0.11.0 - 2021-03-22 - [PR #319](https://github.com/NOAA-OWP/cahaba/pull/298) Improvements to CatFIM service source data generation. - + ### Changes - Renamed `generate_categorical_fim.py` to `generate_categorical_fim_mapping.py`. - Updated the status outputs of the `nws_lid_sites layer` and saved it in the same directory as the `merged catfim_library layer`. - Additional stability fixes (such as improved compatability with WRDS updates). - + ### Additions - Added `generate_categorical_fim.py` to wrap `generate_categorical_fim_flows.py` and `generate_categorical_fim_mapping.py`. - Create new `nws_lid_sites` shapefile located in same directory as the `catfim_library` shapefile. @@ -74,7 +90,7 @@ Improvements to CatFIM service source data generation. ## v3.0.10.1 - 2021-03-24 - [PR #320](https://github.com/NOAA-OWP/cahaba/pull/320) Patch to synthesize_test_cases.py. - + ### Changes - Bug fix to `synthesize_test_cases.py` to allow comparison between `testing` version and `official` versions.

@@ -82,14 +98,14 @@ Patch to synthesize_test_cases.py. ## v3.0.10.0 - 2021-03-12 - [PR #298](https://github.com/NOAA-OWP/cahaba/pull/298) Preprocessing of flow files for Categorical FIM. - + ### Additions - Generate Categorical FIM flow files for each category (action, minor, moderate, major). - Generate point shapefile of Categorical FIM sites. - Generate csv of attribute data in shapefile. - Aggregate all shapefiles and csv files into one file in parent directory. - Add flood of record category. - + ### Changes - Stability fixes to `generate_categorical_fim.py`.

@@ -107,7 +123,7 @@ Enhancements to FIM API. - Overall better restart and retry handlers for networking problems. - Jobs can now be canceled in API interface. - Both FR and MS configs can be selected for a single job. - +

## v3.0.8.2 - 2021-03-11 - [PR #296](https://github.com/NOAA-OWP/cahaba/pull/296) @@ -117,7 +133,7 @@ Enhancements to post-processing for Viz-related use-cases. - Aggregate grids are projected to Web Mercator during `-v` runs in `fim_run.sh`. - HUC6 aggregation is parallelized. - Aggregate grid blocksize is changed from 256 to 1024 for faster postprocessing. - +

## v3.0.8.1 - 2021-03-10 - [PR #302](https://github.com/NOAA-OWP/cahaba/pull/302) @@ -125,7 +141,7 @@ Patched import issue in `tools_shared_functions.py`. ### Changes - Changed `utils.` to `tools_` in `tools_shared_functions.py` after recent structural change to `tools` directory. - +

## v3.0.8.0 - 2021-03-09 - [PR #279](https://github.com/NOAA-OWP/cahaba/pull/279) @@ -145,7 +161,7 @@ Renamed benchmark layers in `test_cases` and updated variable names in evaluatio ### Changes - Updated `run_test_case.py` with new benchmark layer names. - Updated `run_test_case_calibration.py` with new benchmark layer names. - +

## v3.0.7.0 - 2021-03-01 - [PR #288](https://github.com/NOAA-OWP/cahaba/pull/288) @@ -159,7 +175,7 @@ Restructured the repository. This has no impact on hydrological work done in the

## v3.0.6.0 - 2021-02-25 - [PR #276](https://github.com/NOAA-OWP/cahaba/pull/276) -Enhancement that creates metric plots and summary statistics using metrics compiled by `synthesize_test_cases.py`. +Enhancement that creates metric plots and summary statistics using metrics compiled by `synthesize_test_cases.py`. ### Additions - Added `eval_plots.py`, which produces: @@ -281,7 +297,7 @@ New python script "wrappers" for using `inundation.py`. - Created `inundation_wrapper_nwm_flows.py` to produce inundation outputs using NWM recurrence flows: 1.5 year, 5 year, 10 year. - Created `inundation_wrapper_custom_flow.py` to produce inundation outputs with user-created flow file. - Created new `tools` parent directory to store `inundation_wrapper_nwm_flows.py` and `inundation_wrapper_custom_flow.py`. - +

## v3.0.3.1 - 2021-02-04 - [PR #253](https://github.com/NOAA-OWP/cahaba/pull/253) @@ -300,7 +316,7 @@ Post-process to aggregate FIM outputs to HUC6 scale. ### Additions - Viz outputs aggregated to HUC6 scale; saves outputs to `aggregate_fim_outputs` folder. - + ### Changes - `split_flows.py` now splits streams at HUC8 boundaries to ensure consistent catchment boundaries along edges. @@ -354,7 +370,7 @@ Hotfix for handling nodata value in rasterized levee lines. - Resolves bug for HUCs where `$ndv > 0` (Great Lakes region). - Initialize the `nld_rasterized_elev.tif` using a value of `-9999` instead of `$ndv`. - +

## v3.0.0.2 - 2021-01-06 - [PR #200](https://github.com/NOAA-OWP/cahaba/pull/200) @@ -379,7 +395,7 @@ Modifications to build and run Docker image more reliably. Cleanup on some pre-p ### Notes - `aggregate_vector_inputs.py` doesn't work yet. Need to externally download required data to run fim_run.sh - +

## v3.0.0.0 - 2020-12-22 - [PR #181](https://github.com/NOAA-OWP/cahaba/pull/181) diff --git a/config/params_calibrated.env b/config/params_calibrated.env index c5b040215..899c8a4cb 100644 --- a/config/params_calibrated.env +++ b/config/params_calibrated.env @@ -19,6 +19,14 @@ export slope_min=0.001 export min_catchment_area=0.25 export min_stream_length=0.5 +#### bathy SRC estimation parameters #### +export bathy_src_modification=True +export surf_area_thalweg_ratio_flag=10 +export thalweg_stg_search_max_limit=3 +export bathy_xs_area_chg_flag=5 +export bankful_xs_area_ratio_flag=10 +export thalweg_hyd_radius_flag=10 + #### computational parameters #### export ncores_gw=1 # mpi number of cores for gagewatershed export ncores_fd=1 # mpi number of cores for flow directions diff --git a/config/params_template.env b/config/params_template.env index d6c9e2865..21d31d1de 100644 --- a/config/params_template.env +++ b/config/params_template.env @@ -19,6 +19,14 @@ export slope_min=0.001 export min_catchment_area=0.25 export min_stream_length=0.5 +#### bathy SRC estimation parameters #### +export bathy_src_modification=True +export surf_area_thalweg_ratio_flag=10 +export thalweg_stg_search_max_limit=3 +export bathy_xs_area_chg_flag=5 +export bankful_xs_area_ratio_flag=10 +export thalweg_hyd_radius_flag=10 + #### computational parameters #### export ncores_gw=1 # mpi number of cores for gagewatershed export ncores_fd=1 # mpi number of cores for flow directions diff --git a/src/add_crosswalk.py b/src/add_crosswalk.py index 96f2805c0..2e7fbccbd 100755 --- a/src/add_crosswalk.py +++ b/src/add_crosswalk.py @@ -8,17 +8,19 @@ import json import argparse import sys +from bathy_rc_adjust import bathy_rc_lookup from utils.shared_functions import getDriver from utils.shared_variables import FIM_ID -def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,small_segments_filename,calibration_mode=False): +def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,input_bathy_fileName,output_bathy_fileName,output_bathy_streamorder_fileName,output_bathy_thalweg_fileName,output_bathy_xs_lookup_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,small_segments_filename,calibration_mode=False): input_catchments = gpd.read_file(input_catchments_fileName) input_flows = gpd.read_file(input_flows_fileName) input_huc = gpd.read_file(input_huc_fileName) input_nwmflows = gpd.read_file(input_nwmflows_fileName) - min_catchment_area = float(os.environ['min_catchment_area']) - min_stream_length = float(os.environ['min_stream_length']) + min_catchment_area = float(os.environ['min_catchment_area']) #0.25# + min_stream_length = float(os.environ['min_stream_length']) #0.5# + bathy_src_calc = os.environ['bathy_src_modification'] == "True" # env variable to toggle on/off the bathy calc and src modifications if extent == 'FR': ## crosswalk using majority catchment method @@ -167,7 +169,7 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f input_src_base = pd.read_csv(input_srcbase_fileName, dtype= object) if input_src_base.CatchId.dtype != 'int': input_src_base.CatchId = input_src_base.CatchId.astype(int) - input_src_base = input_src_base.merge(output_flows[['ManningN','HydroID']],left_on='CatchId',right_on='HydroID') + input_src_base = input_src_base.merge(output_flows[['ManningN','HydroID','order_']],left_on='CatchId',right_on='HydroID') input_src_base = input_src_base.rename(columns=lambda x: x.strip(" ")) input_src_base = input_src_base.apply(pd.to_numeric,**{'errors' : 'coerce'}) @@ -209,6 +211,12 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f output_crosswalk = output_src[['HydroID','feature_id']] output_crosswalk = output_crosswalk.drop_duplicates(ignore_index=True) + ## bathy estimation integration in synthetic rating curve calculations + if (bathy_src_calc == True and extent == 'MS'): + output_src = bathy_rc_lookup(output_src,input_bathy_fileName,output_bathy_fileName,output_bathy_streamorder_fileName,output_bathy_thalweg_fileName,output_bathy_xs_lookup_fileName) + else: + print('Note: NOT using bathy estimation approach to modify the SRC...') + # make hydroTable output_hydro_table = output_src.loc[:,['HydroID','feature_id','Stage','Discharge (m3s-1)']] output_hydro_table.rename(columns={'Stage' : 'stage','Discharge (m3s-1)':'discharge_cms'},inplace=True) @@ -263,6 +271,11 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f parser.add_argument('-d','--input-catchments-fileName', help='DEM derived catchments', required=True) parser.add_argument('-a','--input-flows-fileName', help='DEM derived streams', required=True) parser.add_argument('-s','--input-srcbase-fileName', help='Base synthetic rating curve table', required=True) + parser.add_argument('-u','--input-bathy-fileName', help='Text file with bankfull bathy variables', required=True) + parser.add_argument('-v','--output-bathy-fileName', help='Output bathy/bankfull crosswalk and calculated bathy variables', required=True) + parser.add_argument('-e','--output-bathy-order-fileName', help='Output bathy/bankfull stream order avg calculated bathy variables', required=True) + parser.add_argument('-g','--output-bathy-thalweg-fileName', help='Output bathy thalweg calculation', required=True) + parser.add_argument('-i','--output-bathy-xs-lookup-fileName', help='Output bathy XS Area lookup calculation', required=True) parser.add_argument('-l','--output-catchments-fileName', help='Subset crosswalked catchments', required=True) parser.add_argument('-f','--output-flows-fileName', help='Subset crosswalked streams', required=True) parser.add_argument('-r','--output-src-fileName', help='Output crosswalked synthetic rating curve table', required=True) @@ -283,6 +296,11 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f input_catchments_fileName = args['input_catchments_fileName'] input_flows_fileName = args['input_flows_fileName'] input_srcbase_fileName = args['input_srcbase_fileName'] + input_bathy_fileName = args['input_bathy_fileName'] + output_bathy_fileName = args['output_bathy_fileName'] + output_bathy_streamorder_fileName = args['output_bathy_order_fileName'] + output_bathy_thalweg_fileName = args['output_bathy_thalweg_fileName'] + output_bathy_xs_lookup_fileName = args['output_bathy_xs_lookup_fileName'] output_catchments_fileName = args['output_catchments_fileName'] output_flows_fileName = args['output_flows_fileName'] output_src_fileName = args['output_src_fileName'] @@ -298,4 +316,4 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f small_segments_filename = args['small_segments_filename'] calibration_mode = args['calibration_mode'] - add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,small_segments_filename,calibration_mode) + add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_fileName,input_bathy_fileName,output_bathy_fileName,output_bathy_streamorder_fileName,output_bathy_thalweg_fileName,output_bathy_xs_lookup_fileName,output_catchments_fileName,output_flows_fileName,output_src_fileName,output_src_json_fileName,output_crosswalk_fileName,output_hydro_table_fileName,input_huc_fileName,input_nwmflows_fileName,input_nwmcatras_fileName,mannings_n,input_nwmcat_fileName,extent,small_segments_filename,calibration_mode) diff --git a/src/bathy_rc_adjust.py b/src/bathy_rc_adjust.py new file mode 100755 index 000000000..15a0055b5 --- /dev/null +++ b/src/bathy_rc_adjust.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 + +from os import environ +import geopandas as gpd +import pandas as pd +import numpy as np + +sa_ratio_flag = float(environ['surf_area_thalweg_ratio_flag']) #10x +thal_stg_limit = float(environ['thalweg_stg_search_max_limit']) #3m +bankful_xs_ratio_flag = float(environ['bankful_xs_area_ratio_flag']) #10x +bathy_xsarea_flag = float(environ['bathy_xs_area_chg_flag']) #5x +thal_hyd_radius_flag = float(environ['thalweg_hyd_radius_flag']) #10x + +def bathy_rc_lookup(input_src_base,input_bathy_fileName,output_bathy_fileName,output_bathy_streamorder_fileName,output_bathy_thalweg_fileName,output_bathy_xs_lookup_fileName,): + ## Convert input_src_base featureid to integer + if input_src_base.feature_id.dtype != 'int': input_src_base.feature_id = input_src_base.feature_id.astype(int) + + ## Read in the bankfull channel geometry text file + input_bathy = pd.read_csv(input_bathy_fileName, dtype= {'COMID': int}) + + ## Merge input_bathy and modified_src_base df using feature_id/COMID attributes + input_bathy = input_bathy.rename(columns={'COMID':'feature_id','BANKFULL_WIDTH':'BANKFULL_WIDTH (m)','BANKFULL_XSEC_AREA':'BANKFULL_XSEC_AREA (m2)'}) + modified_src_base = input_src_base.merge(input_bathy.loc[:,['feature_id','BANKFULL_WIDTH (m)','BANKFULL_XSEC_AREA (m2)']],how='left',on='feature_id') + + ## Check that the merge process returned matching feature_id entries + if modified_src_base['BANKFULL_WIDTH (m)'].count() == 0: + print('No matching feature_id found between input bathy data and src_base --> No bathy calculations added to SRC!') + return(input_src_base) + else: + ## Use SurfaceArea variable to identify thalweg-restricted stage values for each hydroid + ## Calculate the interrow SurfaceArea ratio n/(n-1) + modified_src_base['SA_div'] = modified_src_base['SurfaceArea (m2)'].div(modified_src_base['SurfaceArea (m2)'].shift(1)) + ## Mask SA_div when Stage = 0 or when the SA_div value (n / n-1) is > threshold value (i.e. 10x) + modified_src_base['SA_div'].mask((modified_src_base['Stage']==0) | (modified_src_base['SA_div'] this is used to mask the discharge after Manning's equation + modified_src_base = modified_src_base.merge(find_thalweg_notch.loc[:,['HydroID','Thalweg_burn_elev']],how='left',on='HydroID') + + ## Calculate bankfull vs top width difference for each feature_id + modified_src_base['Top Width Diff (m)'] = (modified_src_base['TopWidth (m)'] - modified_src_base['BANKFULL_WIDTH (m)']).abs() + ## Calculate XS Area field (Channel Volume / Stream Length) + modified_src_base['XS Area (m2)'] = modified_src_base['Volume (m3)'] / (modified_src_base['LENGTHKM'] * 1000) + + ## Groupby HydroID and find min of Top Width Diff (m) + output_bathy = modified_src_base[['feature_id','HydroID','order_','Stage','SurfaceArea (m2)','Thalweg_burn_elev','BANKFULL_WIDTH (m)','TopWidth (m)','XS Area (m2)','BANKFULL_XSEC_AREA (m2)','Top Width Diff (m)']] + ## filter out stage = 0 rows in SRC (assuming geom at stage 0 is not a valid channel geom) + output_bathy = output_bathy[output_bathy['Stage'] > 0] + ## filter SRC rows identified as Thalweg burned + output_bathy['Top Width Diff (m)'].mask(output_bathy['Stage'] <= output_bathy['Thalweg_burn_elev'],inplace=True) + ## ignore hydroid/featureid that did not have a valid Bankfull lookup (areas outside CONUS - i.e. Canada) + output_bathy = output_bathy[output_bathy['BANKFULL_XSEC_AREA (m2)'].notnull()] + ## ignore SRC entries with 0 surface area --> handles input SRC artifacts/errors in Great Lakes region + output_bathy = output_bathy[output_bathy['SurfaceArea (m2)'] > 0] + ## find index of minimum top width difference --> this will be used as the SRC "bankfull" row for future calcs + output_bathy = output_bathy.loc[output_bathy.groupby('HydroID')['Top Width Diff (m)'].idxmin()].reset_index(drop=True) + print('Average: bankfull width crosswalk difference (m): ' + str(output_bathy['Top Width Diff (m)'].mean())) + print('Minimum: bankfull width crosswalk difference (m): ' + str(output_bathy['Top Width Diff (m)'].min())) + print('Maximum: bankfull width crosswalk difference (m): ' + str(output_bathy['Top Width Diff (m)'].max())) + print('STD: bankfull width crosswalk difference (m): ' + str(output_bathy['Top Width Diff (m)'].std()) +'\n' + '#################') + + ## Calculate XS Area difference between SRC and Bankfull database + output_bathy['XS Area Diff (m2)'] = (output_bathy['BANKFULL_XSEC_AREA (m2)'] - output_bathy['XS Area (m2)']) + output_bathy['XS Bankfull Area Ratio'] = (output_bathy['BANKFULL_XSEC_AREA (m2)'] / output_bathy['XS Area (m2)']).round(2) + ## masking negative XS Area Diff and XS Area = 0 + output_bathy['XS Bankfull Area Ratio'].mask((output_bathy['XS Area Diff (m2)']<0) | (output_bathy['XS Area (m2)'] == 0),inplace=True) + ## masking negative XS Area Diff and XS Area = 0 + output_bathy['XS Area Diff (m2)'].mask((output_bathy['XS Area Diff (m2)']<0) | (output_bathy['XS Area (m2)'] == 0),inplace=True) + ## remove bogus values where bankfull area ratio > threshold --> 10x (topwidth crosswalk issues or bad bankfull regression data points??) + output_bathy['XS Area Diff (m2)'].mask(output_bathy['XS Bankfull Area Ratio']>bankful_xs_ratio_flag,inplace=True) + ## remove bogus values where bankfull area ratio > threshold --> 10x (topwidth crosswalk issues or bad bankfull regression data points??) + output_bathy['XS Bankfull Area Ratio'].mask(output_bathy['XS Bankfull Area Ratio']>bankful_xs_ratio_flag,inplace=True) + ## Print XS Area Diff statistics + print('Average: bankfull XS Area crosswalk difference (m2): ' + str(output_bathy['XS Area Diff (m2)'].mean())) + print('Minimum: bankfull XS Area crosswalk difference (m2): ' + str(output_bathy['XS Area Diff (m2)'].min())) + print('Maximum: bankfull XS Area crosswalk difference (m2): ' + str(output_bathy['XS Area Diff (m2)'].max())) + print('STD: bankfull XS Area crosswalk difference (m2): ' + str(output_bathy['XS Area Diff (m2)'].std())) + + ## Bin XS Bankfull Area Ratio by stream order + stream_order_bathy_ratio = output_bathy[['order_','Stage','XS Bankfull Area Ratio']] + ## mask stage values when XS Bankfull Area Ratio is null (need to filter to calculate the median for valid values below) + stream_order_bathy_ratio['Stage'].mask(stream_order_bathy_ratio['XS Bankfull Area Ratio'].isnull(),inplace=True) + stream_order_bathy_ratio = stream_order_bathy_ratio.groupby('order_').agg(count=('XS Bankfull Area Ratio','count'),mean_xs_area_ratio=('XS Bankfull Area Ratio','mean'),median_stage_bankfull=('Stage','median')) + ## fill XS Bankfull Area Ratio and Stage values if no values were found in the grouby calcs + stream_order_bathy_ratio = (stream_order_bathy_ratio.ffill()+stream_order_bathy_ratio.bfill())/2 + ## fill first and last stream order values if needed + stream_order_bathy_ratio = stream_order_bathy_ratio.bfill().ffill() + ## Get count_total tally of the total number of stream order hydroids in the HUC (not filtering anything out) + stream_order_bathy_ratio_count = output_bathy[['order_','Stage']] + stream_order_bathy_ratio_count = output_bathy.groupby('order_').agg(count_total=('Stage','count')) + stream_order_bathy_ratio = stream_order_bathy_ratio.merge(stream_order_bathy_ratio_count,how='left',on='order_') + ## Fill any remaining null values: mean_xs_area_ratio --> 1 median_stage_bankfull --> 0 + stream_order_bathy_ratio['mean_xs_area_ratio'].mask(stream_order_bathy_ratio['mean_xs_area_ratio'].isnull(),1,inplace=True) + stream_order_bathy_ratio['median_stage_bankfull'].mask(stream_order_bathy_ratio['median_stage_bankfull'].isnull(),0,inplace=True) + print(stream_order_bathy_ratio.head) + + ## Combine SRC df and df of XS Area for each hydroid and matching stage and order from bins above + output_bathy = output_bathy.merge(stream_order_bathy_ratio,how='left',on='order_') + modified_src_base = modified_src_base.merge(stream_order_bathy_ratio,how='left',on='order_') + + ## Calculate stage vs median_stage_bankfull difference for bankfull lookup + modified_src_base['lookup_stage_diff'] = (modified_src_base[['median_stage_bankfull','Thalweg_burn_elev']].max(axis=1) - modified_src_base['Stage']).abs() + + ## If median_stage_bankfull is null then set lookup_stage_diff to 999 at stage 0 (handles errors for channels outside CONUS) + modified_src_base['lookup_stage_diff'].mask((modified_src_base['Stage'] == 0) & (modified_src_base['median_stage_bankfull'].isnull()),999,inplace=True) + + ## Groupby HydroID again and find min of lookup_stage_diff + xs_area_hydroid_lookup = modified_src_base[['HydroID','BANKFULL_XSEC_AREA (m2)','XS Area (m2)','Stage','Thalweg_burn_elev','median_stage_bankfull','lookup_stage_diff','mean_xs_area_ratio']] + xs_area_hydroid_lookup = xs_area_hydroid_lookup.loc[xs_area_hydroid_lookup.groupby('HydroID')['lookup_stage_diff'].idxmin()].reset_index(drop=True) + + ## Calculate bathy adjusted XS Area ('XS Area (m2)' mutliplied by mean_xs_area_ratio) + xs_area_hydroid_lookup['bathy_calc_xs_area'] = (xs_area_hydroid_lookup['XS Area (m2)'] * xs_area_hydroid_lookup['mean_xs_area_ratio']) - xs_area_hydroid_lookup['XS Area (m2)'] + + ## Calculate the ratio btw the lookup SRC XS_Area and the Bankfull_XSEC_AREA --> use this as a flag for potentially bad XS data + xs_area_hydroid_lookup['bankfull_XS_ratio_flag'] = (xs_area_hydroid_lookup['bathy_calc_xs_area'] / xs_area_hydroid_lookup['BANKFULL_XSEC_AREA (m2)']) + ## Set bath_cal_xs_area to 0 if the bankfull_XS_ratio_flag is > threshold --> 5x (assuming too large of difference to be a reliable bankfull calculation) + xs_area_hydroid_lookup['bathy_calc_xs_area'].mask((xs_area_hydroid_lookup['bankfull_XS_ratio_flag']>bathy_xsarea_flag) | (xs_area_hydroid_lookup['bankfull_XS_ratio_flag'].isnull()),0,inplace=True) + + ## Merge bathy_calc_xs_area to the modified_src_base + modified_src_base = modified_src_base.merge(xs_area_hydroid_lookup.loc[:,['HydroID','bathy_calc_xs_area']],how='left',on='HydroID') + + ## Calculate new bathy adjusted channel geometry variables + modified_src_base = modified_src_base.rename(columns={'Discharge (m3s-1)':'Discharge (m3s-1)_nobathy'}) + modified_src_base['XS Area (m2)_bathy_adj'] = modified_src_base['XS Area (m2)'] + modified_src_base['bathy_calc_xs_area'] + modified_src_base['Volume (m3)_bathy_adj'] = modified_src_base['XS Area (m2)_bathy_adj'] * modified_src_base['LENGTHKM'] * 1000 + modified_src_base['WetArea (m2)_bathy_adj'] = modified_src_base['Volume (m3)_bathy_adj']/modified_src_base['LENGTHKM']/1000 + modified_src_base['HydraulicRadius (m)_bathy_adj'] = modified_src_base['WetArea (m2)_bathy_adj']/modified_src_base['WettedPerimeter (m)'] + modified_src_base['HydraulicRadius (m)_bathy_adj'].fillna(0, inplace=True) + ## mask out negative top width differences (avoid thalweg burn notch) + modified_src_base['HydraulicRadius (m)_bathy_adj'].mask((modified_src_base['HydraulicRadius (m)_bathy_adj']>thal_hyd_radius_flag) & (modified_src_base['Stage'] do we need SRC to start at 0?? + modified_src_base['Discharge (m3s-1)'].mask(modified_src_base['Stage'] == 0,0,inplace=True) + modified_src_base['Discharge (m3s-1)'].mask(modified_src_base['Stage'] == modified_src_base['Thalweg_burn_elev'],0,inplace=True) + modified_src_base['Discharge (m3s-1)'].mask(modified_src_base['Stage'] < modified_src_base['Thalweg_burn_elev'],-999,inplace=True) + + ## Organize bathy calc output variables for csv + output_bathy = output_bathy[['HydroID','order_','Stage','SurfaceArea (m2)','TopWidth (m)','BANKFULL_WIDTH (m)','Top Width Diff (m)','XS Area (m2)','BANKFULL_XSEC_AREA (m2)','XS Area Diff (m2)','XS Bankfull Area Ratio','count','median_stage_bankfull','mean_xs_area_ratio']] + + ## Export bathy/bankful calculation tables for easy viewing + output_bathy.to_csv(output_bathy_fileName,index=False) + stream_order_bathy_ratio.to_csv(output_bathy_streamorder_fileName,index=True) + find_thalweg_notch.to_csv(output_bathy_thalweg_fileName,index=True) + xs_area_hydroid_lookup.to_csv(output_bathy_xs_lookup_fileName,index=True) + + print('Completed Bathy Calculations...') + return(modified_src_base) diff --git a/src/output_cleanup.py b/src/output_cleanup.py index e74e26b17..def5a286c 100755 --- a/src/output_cleanup.py +++ b/src/output_cleanup.py @@ -32,6 +32,11 @@ def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_prod 'hydroTable.csv', 'src.json', 'small_segments.csv', + 'bathy_crosswalk_calcs.csv', + 'bathy_stream_order_calcs.csv', + 'bathy_thalweg_flag.csv', + 'bathy_xs_area_hydroid_lookup.csv' + 'src_full_crosswalked.csv' 'usgs_elev_table.csv', 'hand_ref_elev_table.csv' ] diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index 1242768dc..de460fb1f 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -425,14 +425,13 @@ $taudemDir/catchhydrogeo -hand $outputHucDataDir/rem_zeroed_masked.tif -catch $o Tcount ## FINALIZE CATCHMENTS AND MODEL STREAMS ## -echo -e $startDiv"Finalize catchments and model streams $hucNumber"$stopDiv +echo -e $startDiv"Finalize catchments and model streams $hucNumber"$stopDiv output_bathy_thalweg_fileName,output_bathy_xs_lookup_fileName, date -u Tstart [ ! -f $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg ] && \ -$srcDir/add_crosswalk.py -d $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -s $outputHucDataDir/src_base.csv -l $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $outputHucDataDir/src_full_crosswalked.csv -j $outputHucDataDir/src.json -x $outputHucDataDir/crosswalk_table.csv -t $outputHucDataDir/hydroTable.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -y $outputHucDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $input_NWM_Catchments -p $extent -k $outputHucDataDir/small_segments.csv +$srcDir/add_crosswalk.py -d $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -s $outputHucDataDir/src_base.csv -u $inputDataDir/bathymetry/BANKFULL_CONUS.txt -v $outputHucDataDir/bathy_crosswalk_calcs.csv -e $outputHucDataDir/bathy_stream_order_calcs.csv -g $outputHucDataDir/bathy_thalweg_flag.csv -i $outputHucDataDir/bathy_xs_area_hydroid_lookup.csv -l $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $outputHucDataDir/src_full_crosswalked.csv -j $outputHucDataDir/src.json -x $outputHucDataDir/crosswalk_table.csv -t $outputHucDataDir/hydroTable.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -y $outputHucDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $input_NWM_Catchments -p $extent -k $outputHucDataDir/small_segments.csv Tcount - ## USGS CROSSWALK ## echo -e $startDiv"USGS Crosswalk $hucNumber"$stopDiv date -u diff --git a/tools/eval_plots.py b/tools/eval_plots.py index 0af2ae9ad..0327319cb 100644 --- a/tools/eval_plots.py +++ b/tools/eval_plots.py @@ -23,9 +23,9 @@ ######################################################################### #Create boxplot ######################################################################### -def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, dest_file = False): +def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, dest_file = False): ''' - Create boxplots. + Create boxplots. Parameters ---------- @@ -44,10 +44,10 @@ def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_ fim_configuration: STR Configuration of FIM (FR or MS or Composite). simplify_legend : BOOL, optional - If True, it will simplify legend to FIM 1, FIM 2, FIM 3. + If True, it will simplify legend to FIM 1, FIM 2, FIM 3. The default is False. dest_file : STR or BOOL, optional - If STR provide the full path to the figure to be saved. If False + If STR provide the full path to the figure to be saved. If False no plot is saved to disk. The default is False. Returns @@ -73,17 +73,17 @@ def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_ #Set sizes of ticks and legend. axes.tick_params(labelsize = 'xx-large') axes.legend(markerscale = 2, fontsize =20, loc = 'lower left') - + #If simple legend desired if simplify_legend: - #trim labels to FIM 1, FIM 2, and the FIM 3 version + #trim labels to FIM 1, FIM 2, and the FIM 3 version handles, org_labels = axes.get_legend_handles_labels() label_dict = {} for label in org_labels: if 'fim_1' in label: label_dict[label] = 'FIM 1' elif 'fim_2' in label: - label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower() + label_dict[label] = 'FIM 2' + ' ' + fim_configuration.lower() elif 'fim_3' in label: label_dict[label] = re.split('_fr|_ms', label)[0].replace('_','.').replace('fim.','FIM ') + ' ' + fim_configuration.lower() if label.endswith('_c'): @@ -96,7 +96,7 @@ def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_ if y_field == 'FAR': legend_location = 'upper right' else: - legend_location = 'lower left' + legend_location = 'lower left' #rename legend labels to the simplified labels. axes.legend(handles, new_labels, markerscale = 2, fontsize = 20, loc = legend_location, ncol = int(np.ceil(len(new_labels)/7))) #Print textbox if supplied @@ -114,9 +114,9 @@ def boxplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_ ######################################################################### #Create scatter plot ######################################################################### -def scatterplot(dataframe, x_field, y_field, title_text, stats_text=False, annotate = False, dest_file = False): +def scatterplot(dataframe, x_field, y_field, title_text, stats_text=False, annotate = False, dest_file = False): ''' - Create boxplots. + Create boxplots. Parameters ---------- @@ -127,11 +127,11 @@ def scatterplot(dataframe, x_field, y_field, title_text, stats_text=False, annot y_field : STR Field to use for the y-axis (Assumes FIM 3) title_text : STR - Text for plot title. + Text for plot title. stats_text : STR or BOOL Text for stats to place on chart. Default is false (no stats printed) dest_file : STR or BOOL, optional - If STR provide the full path to the figure to be saved. If False + If STR provide the full path to the figure to be saved. If False no plot is saved to disk. The default is False. Returnsy @@ -143,15 +143,15 @@ def scatterplot(dataframe, x_field, y_field, title_text, stats_text=False, annot #initialize plot fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) - + #Use seaborn to plot the boxplot axes=sns.scatterplot(data=dataframe, x=x_field, y=y_field, color = 'black', s = 150) - + #Set xticks and yticks and background horizontal line. axes.set(ylim=(0.0,1.0),yticks = np.arange(0,1.1,0.1)) axes.set(xlim=(0.0,1.0),xticks = np.arange(0,1.1,0.1)) - axes.grid(b=True, which='major', axis='both') - + axes.grid(b=True, which='major', axis='both') + #Set sizes of ticks and legend. axes.tick_params(labelsize = 'xx-large') @@ -166,7 +166,7 @@ def scatterplot(dataframe, x_field, y_field, title_text, stats_text=False, annot #set title of plot axes.set_title(f'{title_text}',fontsize=20, weight = 'bold') - + if annotate: #Set text for labels box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) @@ -188,9 +188,9 @@ def scatterplot(dataframe, x_field, y_field, title_text, stats_text=False, annot ######################################################################### #Create barplot ######################################################################### -def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, display_values = False, dest_file = False): +def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_text, fim_configuration, textbox_str = False, simplify_legend = False, display_values = False, dest_file = False): ''' - Create barplots. + Create barplots. Parameters ---------- @@ -209,13 +209,13 @@ def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_ fim_configuration: STR Configuration of FIM (FR or MS or Composite). simplify_legend : BOOL, optional - If True, it will simplify legend to FIM 1, FIM 2, FIM 3. + If True, it will simplify legend to FIM 1, FIM 2, FIM 3. Default is False. display_values : BOOL, optional - If True, Y values will be displayed above bars. + If True, Y values will be displayed above bars. Default is False. dest_file : STR or BOOL, optional - If STR provide the full path to the figure to be saved. If False + If STR provide the full path to the figure to be saved. If False no plot is saved to disk. Default is False. Returns @@ -243,7 +243,7 @@ def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_ axes.legend(markerscale = 2, fontsize =20, loc = 'upper right') #If simple legend desired if simplify_legend: - #trim labels to FIM 1, FIM 2, FIM 3 + #trim labels to FIM 1, FIM 2, FIM 3 handles, org_labels = axes.get_legend_handles_labels() label_dict = {} for label in org_labels: @@ -274,8 +274,8 @@ def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_ axes.text(patch.get_x()+patch.get_width()/2., patch.get_height(), '{:1.3f}'.format(value), - ha="center", fontsize=18) - + ha="center", fontsize=18) + #If figure to be saved to disk, then do so, otherwise return fig if dest_file: fig.savefig(dest_file) @@ -288,12 +288,12 @@ def barplot(dataframe, x_field, x_order, y_field, hue_field, ordered_hue, title_ def filter_dataframe(dataframe, unique_field): ''' - This script will filter out the sites (or hucs) which are not consistently - found for all versions for a given magnitude. For example, an AHPS - lid site must have output for all 3 versions (fim1, fim2, fim3) for - a given magnitude (eg action) otherwise that lid is filtered out. - Likewise for a BLE a huc must have output for all 3 versions - (fim1, fim2, fim3) for a given magnitude (eg 100yr) otherwise it is + This script will filter out the sites (or hucs) which are not consistently + found for all versions for a given magnitude. For example, an AHPS + lid site must have output for all 3 versions (fim1, fim2, fim3) for + a given magnitude (eg action) otherwise that lid is filtered out. + Likewise for a BLE a huc must have output for all 3 versions + (fim1, fim2, fim3) for a given magnitude (eg 100yr) otherwise it is filtered out. Parameters @@ -307,12 +307,12 @@ def filter_dataframe(dataframe, unique_field): Returns ------- final_filtered_dataframe : Pandas Dataframe - Filtered dataframe that contains only common sites (lids or hucs) between versions for each magnitude. For example, for AHPS all sites which were run for each version for a given magnitude will be kept or for ble, all hucs which ran for all versions for a given magnitude. + Filtered dataframe that contains only common sites (lids or hucs) between versions for each magnitude. For example, for AHPS all sites which were run for each version for a given magnitude will be kept or for ble, all hucs which ran for all versions for a given magnitude. unique_sites: DICT The sites that were included in the dataframe for each magnitude. ''' - + #Get lists of sites for each magnitude/version unique_sites = dataframe.groupby(['magnitude','version'])[unique_field].agg('unique') #Get unique magnitudes @@ -331,8 +331,8 @@ def filter_dataframe(dataframe, unique_field): #Query filtered dataframe and only include data associated with the common sites for that magnitude filtered_common_sites = dataframe.query(f'magnitude == "{magnitude}" & {unique_field} in @common_sites_per_magnitude') #Append the data for each magnitude to a final dataframe that will contain data for all common sites for all magnitudes. - final_filtered_dataframe = final_filtered_dataframe.append(filtered_common_sites, ignore_index = True) - + final_filtered_dataframe = final_filtered_dataframe.append(filtered_common_sites, ignore_index = True) + return final_filtered_dataframe, all_unique_sites ############################################################################## ############################################################################## @@ -429,7 +429,7 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' extent_config set to MS. This allows for FIM 1 to be included in MS plots/stats (helpful for nws/usgs ahps comparisons). site_barplots: BOOL - Default is false. If True then barplots for each individual site are + Default is false. If True then barplots for each individual site are created. An 'individual' directory with subdirectories of each site are created and the plot is located in each site subdirectory. @@ -470,21 +470,21 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' versions for a given magnitude. The final filtered dataset is written to a dictionary with the key (benchmark source, extent config) and values (filtered dataframe, common sites). ''' - + all_datasets = {} for (benchmark_source, extent_configuration), benchmark_metrics in benchmark_by_source: '''If source is usgs/nws define the base resolution and query (use alternate query if passed). Append filtered datasets to all_datasets dictionary.''' - + if benchmark_source in ['usgs','nws']: # Set the base processing unit for the ahps runs. base_resolution = 'nws_lid' #Default query (used for APG) it could be that bad_sites should be modified. If so pass an alternate query using the "alternate_ahps_query" - bad_sites = ['grfi2','ksdm7','hohn4','rwdn4'] + bad_sites = ['grfi2','ksdm7','hohn4','rwdn4','efdn7','kilo1','chin7','segt2','eagi1','levk1','trbf1'] query = "not flow.isnull() & masked_perc<97 & not nws_lid in @bad_sites" # If alternate ahps evaluation query argument is passed, use that. @@ -540,7 +540,7 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' # Get all versions in dataset all_versions = list(dataset.version.unique()) version_order = [] - + # If versions are not specified then use all available versions and assign to versions_list if not versions: versions_list = all_versions @@ -685,7 +685,7 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' parser.add_argument('-sp', '--spatial', help = 'If enabled, creates spatial layers with metrics populated in attribute table.', action = 'store_true', required = False) parser.add_argument('-f', '--fim_1_ms', help = 'If enabled fim_1 rows will be duplicated and extent config assigned "ms" so that fim_1 can be shown on mainstems plots/stats', action = 'store_true', required = False) parser.add_argument('-i', '--site_plots', help = 'If enabled individual barplots for each site are created.', action = 'store_true', required = False) - + # Extract to dictionary and assign to variables args = vars(parser.parse_args()) From 5a8ebcd1e8c054c0a6e82a724142a78a469bffe2 Mon Sep 17 00:00:00 2001 From: TrevorGrout-NOAA <69653333+TrevorGrout-NOAA@users.noreply.github.com> Date: Wed, 14 Apr 2021 08:47:58 -0500 Subject: [PATCH 064/359] Add mainstems attribute for sierra test Sierra test considered all USGS gage locations to be mainstems even though many actually occurred with tributaries. This resulted in unrealistic comparisons as incorrect gages were assigned to mainstems segments. This feature branch identifies gages that are on mainstems via attribute field. - Modifies usgs_gage_crosswalk.py to filter out gages from the usgs_gages.gpkg layer such that for a "MS" run, only consider gages that contain rating curve information (via curve attribute) and are also mainstems gages (via mainstems attribute). - Modifies usgs_gage_crosswalk.py to filter out gages from the usgs_gages.gpkg layer such that for a "FR" run, only consider gages that contain rating curve information (via curve attribute) and are not mainstems gages (via mainstems attribute). - Modifies how mainstems segments are determined by using the nwm_flows_ms.gpkg as a lookup to determine if the NWM segment specified by WRDS for a gage site is a mainstems gage. - Adds a mainstem attribute field to usgs_gages.gpkg that indicates whether a gage is located on a mainstems river. - Adds NWM_FLOWS_MS variable to the .env and .env.template files. - Adds the extent argument specified by user when running fim_run.sh to usgs_gage_crosswalk.py. --- CHANGELOG.md | 18 ++++++++++++++++++ src/run_by_unit.sh | 2 +- src/usgs_gage_crosswalk.py | 15 ++++++++++----- tools/.env.template | 1 + tools/rating_curve_get_usgs_curves.py | 13 ++++++++++++- 5 files changed, 42 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b5e9662f1..baa3ba19f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,23 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. + +## v3.0.15.1 - 2021-04-13 - [PR #355](https://github.com/NOAA-OWP/cahaba/pull/355) + +Sierra test considered all USGS gage locations to be mainstems even though many actually occurred with tributaries. This resulted in unrealistic comparisons as incorrect gages were assigned to mainstems segments. This feature branch identifies gages that are on mainstems via attribute field. +## Changes + +- Modifies `usgs_gage_crosswalk.py` to filter out gages from the `usgs_gages.gpkg` layer such that for a "MS" run, only consider gages that contain rating curve information (via `curve` attribute) and are also mainstems gages (via `mainstems` attribute). +- Modifies `usgs_gage_crosswalk.py` to filter out gages from the `usgs_gages.gpkg` layer such that for a "FR" run, only consider gages that contain rating curve information (via `curve` attribute) and are not mainstems gages (via `mainstems` attribute). +- Modifies how mainstems segments are determined by using the `nwm_flows_ms.gpkg` as a lookup to determine if the NWM segment specified by WRDS for a gage site is a mainstems gage. + +## Additions + +- Adds a `mainstem` attribute field to `usgs_gages.gpkg` that indicates whether a gage is located on a mainstems river. +- Adds `NWM_FLOWS_MS` variable to the `.env` and `.env.template` files. +- Adds the `extent` argument specified by user when running `fim_run.sh` to `usgs_gage_crosswalk.py`. + +

+ ## v3.0.15.0 - 2021-04-08 - [PR #340](https://github.com/NOAA-OWP/cahaba/pull/340) Implementing a prototype technique to estimate the missing bathymetric component in the HAND-derived synthetic rating curves. The new Bathymetric Adjusted Rating Curve (BARC) function is built within the `fim_run.sh` workflow and will ingest bankfull geometry estimates provided by the user to modify the cross section area used in the synthetic rating curve generation. diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index de460fb1f..841f07522 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -436,7 +436,7 @@ Tcount echo -e $startDiv"USGS Crosswalk $hucNumber"$stopDiv date -u Tstart -$srcDir/usgs_gage_crosswalk.py -gages $inputDataDir/usgs_gages/usgs_gages.gpkg -dem $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -outtable $outputHucDataDir/usgs_elev_table.csv +$srcDir/usgs_gage_crosswalk.py -gages $inputDataDir/usgs_gages/usgs_gages.gpkg -dem $outputHucDataDir/dem_meters.tif -flows $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -cat $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -wbd $outputHucDataDir/wbd_buffered.gpkg -dem_adj $dem_thalwegCond -outtable $outputHucDataDir/usgs_elev_table.csv -e $extent Tcount ## CLEANUP OUTPUTS ## diff --git a/src/usgs_gage_crosswalk.py b/src/usgs_gage_crosswalk.py index fb4b9533d..ee61c3d11 100755 --- a/src/usgs_gage_crosswalk.py +++ b/src/usgs_gage_crosswalk.py @@ -32,7 +32,7 @@ ''' -def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename,dem_adj_filename,output_table_filename): +def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename,dem_adj_filename,output_table_filename,extent): wbd_buffer = gpd.read_file(wbd_buffer_filename) usgs_gages = gpd.read_file(usgs_gages_filename, mask=wbd_buffer) @@ -41,8 +41,12 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in input_catchment = gpd.read_file(input_catchment_filename) dem_adj = rasterio.open(dem_adj_filename,'r') - #Query out usgs_gages that don't have rating curve data - usgs_gages = usgs_gages.query('curve == "yes"') + #MS extent use gages that are mainstem + if extent == "MS": + usgs_gages = usgs_gages.query('curve == "yes" & mainstem == "yes"') + #FR extent use gages that are not mainstem + if extent == "FR": + usgs_gages = usgs_gages.query('curve == "yes" & mainstem == "no"') if input_flows.HydroID.dtype != 'int': input_flows.HydroID = input_flows.HydroID.astype(int) @@ -113,7 +117,7 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in parser.add_argument('-wbd','--wbd-buffer-filename', help='WBD buffer', required=True) parser.add_argument('-dem_adj','--dem-adj-filename', help='Thalweg adjusted DEM', required=True) parser.add_argument('-outtable','--output-table-filename', help='Table to append data', required=True) - + parser.add_argument('-e', '--extent', help="extent configuration entered by user when running fim_run.sh", required = True) args = vars(parser.parse_args()) usgs_gages_filename = args['usgs_gages_filename'] @@ -123,5 +127,6 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in wbd_buffer_filename = args['wbd_buffer_filename'] dem_adj_filename = args['dem_adj_filename'] output_table_filename = args['output_table_filename'] + extent = args['extent'] - crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename, dem_adj_filename,output_table_filename) + crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,input_catchment_filename,wbd_buffer_filename, dem_adj_filename,output_table_filename, extent) diff --git a/tools/.env.template b/tools/.env.template index 462d9c556..ed83c8113 100644 --- a/tools/.env.template +++ b/tools/.env.template @@ -1,3 +1,4 @@ API_BASE_URL= EVALUATED_SITES_CSV= WBD_LAYER= +NWM_FLOWS_MS= diff --git a/tools/rating_curve_get_usgs_curves.py b/tools/rating_curve_get_usgs_curves.py index eb43bab3e..6676c27b3 100644 --- a/tools/rating_curve_get_usgs_curves.py +++ b/tools/rating_curve_get_usgs_curves.py @@ -23,6 +23,8 @@ load_dotenv() API_BASE_URL = os.getenv("API_BASE_URL") WBD_LAYER = os.getenv("WBD_LAYER") +EVALUATED_SITES_CSV = os.getenv("EVALUATED_SITES_CSV") +NWM_FLOWS_MS = os.getenv("NWM_FLOWS_MS") def get_all_active_usgs_sites(): ''' @@ -245,7 +247,16 @@ def usgs_rating_to_elev(list_of_gage_sites, workspace=False, sleep_time = 1.0): sites_with_data = pd.DataFrame({'location_id':all_rating_curves['location_id'].unique(),'curve':'yes'}) acceptable_sites_gdf = acceptable_sites_gdf.merge(sites_with_data, on = 'location_id', how = 'left') acceptable_sites_gdf.fillna({'curve':'no'},inplace = True) - + #Add mainstems attribute to acceptable sites + print('Attributing mainstems sites') + #Import mainstems segments used in run_by_unit.sh + ms_df = gpd.read_file(NWM_FLOWS_MS) + ms_segs = ms_df.ID.astype(str).to_list() + #Populate mainstems attribute field + acceptable_sites_gdf['mainstem'] = 'no' + acceptable_sites_gdf.loc[acceptable_sites_gdf.eval('feature_id in @ms_segs'),'mainstem'] = 'yes' + + #If workspace is specified, write data to file. if workspace: #Write rating curve dataframe to file From 1a15cd822d249744c74f8a2c43ac7ffb588dc346 Mon Sep 17 00:00:00 2001 From: TrevorGrout-NOAA <69653333+TrevorGrout-NOAA@users.noreply.github.com> Date: Fri, 16 Apr 2021 14:09:21 -0500 Subject: [PATCH 065/359] Fixes production list in output_cleanup.py. Fixes production list in output_cleanup.py. --- CHANGELOG.md | 7 +++++++ src/output_cleanup.py | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index baa3ba19f..c883cc7ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,13 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.15.2 - 2021-04-16 - [PR #359](https://github.com/NOAA-OWP/cahaba/pull/359) +Hotfix to preserve desired files when production flag used in `fim_run.sh`. +## Changes + +- Fixed production whitelisted files. + +

## v3.0.15.1 - 2021-04-13 - [PR #355](https://github.com/NOAA-OWP/cahaba/pull/355) Sierra test considered all USGS gage locations to be mainstems even though many actually occurred with tributaries. This resulted in unrealistic comparisons as incorrect gages were assigned to mainstems segments. This feature branch identifies gages that are on mainstems via attribute field. diff --git a/src/output_cleanup.py b/src/output_cleanup.py index def5a286c..879103ad6 100755 --- a/src/output_cleanup.py +++ b/src/output_cleanup.py @@ -35,8 +35,8 @@ def output_cleanup(huc_number, output_folder_path, additional_whitelist, is_prod 'bathy_crosswalk_calcs.csv', 'bathy_stream_order_calcs.csv', 'bathy_thalweg_flag.csv', - 'bathy_xs_area_hydroid_lookup.csv' - 'src_full_crosswalked.csv' + 'bathy_xs_area_hydroid_lookup.csv', + 'src_full_crosswalked.csv', 'usgs_elev_table.csv', 'hand_ref_elev_table.csv' ] From a304cf386b6c196d58d6bf7d794672ccea2a500d Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Mon, 19 Apr 2021 12:14:47 -0500 Subject: [PATCH 066/359] Preprocess NHDPlus HR rasters for consistent projections, nodata values, and convert from cm to meters. Preprocess NHDPlus HR rasters for consistent projections, nodata values, and convert from cm to meters. - preprocess_rasters.py reprojects raster, converts to meters, and updates nodata value to -9999. - Cleaned up log messages from bathy_rc_adjust.py and usgs_gage_crosswalk.py. - Outputs paths updated in generate_categorical_fim_mapping.py and generate_categorical_fim.py. - update_raster_profile cleans up raster crs, blocksize, nodata values, and converts elevation grids from cm to meters. - reproject_dem.py imports gdal to reproject elevation rasters because an error was occurring when using rasterio. - burn_in_levees.py replaces the gdal_calc.py command to resolve inconsistent outputs with burned in levee values. This resolves #300. --- CHANGELOG.md | 15 + README.md | 5 +- src/acquire_and_preprocess_inputs.py | 9 +- src/bathy_rc_adjust.py | 4 +- src/burn_in_levees.py | 42 +++ src/preprocess_rasters.py | 56 +++ src/run_by_unit.sh | 20 +- src/usgs_gage_crosswalk.py | 2 +- src/utils/reproject_dem.py | 50 +++ src/utils/shared_functions.py | 124 +++++++ src/utils/shared_variables.py | 1 + tools/comparing_src.py | 393 ---------------------- tools/generate_categorical_fim.py | 84 +++-- tools/generate_categorical_fim_mapping.py | 20 +- 14 files changed, 353 insertions(+), 472 deletions(-) create mode 100755 src/burn_in_levees.py create mode 100755 src/preprocess_rasters.py create mode 100755 src/utils/reproject_dem.py delete mode 100755 tools/comparing_src.py diff --git a/CHANGELOG.md b/CHANGELOG.md index c883cc7ba..2d9969a11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,20 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.15.3 - 2021-04-19 - [PR #358](https://github.com/NOAA-OWP/cahaba/pull/358) + +Preprocess NHDPlus HR rasters for consistent projections, nodata values, and convert from cm to meters. + +## Additions +- `preprocess_rasters.py` reprojects raster, converts to meters, and updates nodata value to -9999. +- Cleaned up log messages from `bathy_rc_adjust.py` and `usgs_gage_crosswalk.py`. +- Outputs paths updated in `generate_categorical_fim_mapping.py` and `generate_categorical_fim.py`. +- `update_raster_profile` cleans up raster crs, blocksize, nodata values, and converts elevation grids from cm to meters. +- `reproject_dem.py` imports gdal to reproject elevation rasters because an error was occurring when using rasterio. + +## Changes +- `burn_in_levees.py` replaces the `gdal_calc.py` command to resolve inconsistent outputs with burned in levee values. + +

## v3.0.15.2 - 2021-04-16 - [PR #359](https://github.com/NOAA-OWP/cahaba/pull/359) Hotfix to preserve desired files when production flag used in `fim_run.sh`. diff --git a/README.md b/README.md index c067ea29d..8bbeaeb96 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ The following input data sources should be downloaded and preprocessed prior to ### NHDPlus HR datasets - `acquire_and_preprocess_inputs.py` +- `preprocess_rasters.py` - `aggregate_nhd_hr_streams.py` **Please note:** For the following two datasets, please contact Mark Glaudemans (mark.glaudemans@noaa.gov). We are currently working on a long-term data sharing solution for the in-house NOAA data. @@ -68,11 +69,11 @@ docker run --rm -it -v :/data -v :/foss_fim -c /foss_fim/config/ -n ``` diff --git a/src/acquire_and_preprocess_inputs.py b/src/acquire_and_preprocess_inputs.py index 30ce7974a..852942ff3 100755 --- a/src/acquire_and_preprocess_inputs.py +++ b/src/acquire_and_preprocess_inputs.py @@ -4,6 +4,7 @@ import argparse import csv import sys +sys.path.append('/foss_fim/src') import shutil from multiprocessing import Pool import geopandas as gpd @@ -23,7 +24,9 @@ OVERWRITE_NHD, OVERWRITE_ALL) -from utils.shared_functions import pull_file, run_system_command, subset_wbd_gpkg, delete_file, getDriver +from utils.shared_functions import (pull_file, run_system_command, + subset_wbd_gpkg, delete_file, + getDriver) NHDPLUS_VECTORS_DIRNAME = 'nhdplus_vectors' NHDPLUS_RASTERS_DIRNAME = 'nhdplus_rasters' @@ -180,9 +183,7 @@ def pull_and_prepare_nhd_data(args): if not os.path.exists(elev_cm_tif) or overwrite_nhd: pull_file(nhd_raster_download_url, nhd_raster_extraction_path) os.system("7za e {nhd_raster_extraction_path} -o{nhd_raster_parent_dir} elev_cm.tif -r ".format(nhd_raster_extraction_path=nhd_raster_extraction_path, nhd_raster_parent_dir=nhd_raster_parent_dir)) - # Change projection for elev_cm.tif. - #print("Projecting elev_cm...") - #run_system_command(['gdal_edit.py -a_srs "{projection}" {elev_cm_tif}'.format(projection=PREP_PROJECTION, elev_cm_tif=elev_cm_tif)]) + file_list = os.listdir(nhd_raster_parent_dir) for f in file_list: full_path = os.path.join(nhd_raster_parent_dir, f) diff --git a/src/bathy_rc_adjust.py b/src/bathy_rc_adjust.py index 15a0055b5..41683c905 100755 --- a/src/bathy_rc_adjust.py +++ b/src/bathy_rc_adjust.py @@ -82,7 +82,7 @@ def bathy_rc_lookup(input_src_base,input_bathy_fileName,output_bathy_fileName,ou print('STD: bankfull XS Area crosswalk difference (m2): ' + str(output_bathy['XS Area Diff (m2)'].std())) ## Bin XS Bankfull Area Ratio by stream order - stream_order_bathy_ratio = output_bathy[['order_','Stage','XS Bankfull Area Ratio']] + stream_order_bathy_ratio = output_bathy[['order_','Stage','XS Bankfull Area Ratio']].copy() ## mask stage values when XS Bankfull Area Ratio is null (need to filter to calculate the median for valid values below) stream_order_bathy_ratio['Stage'].mask(stream_order_bathy_ratio['XS Bankfull Area Ratio'].isnull(),inplace=True) stream_order_bathy_ratio = stream_order_bathy_ratio.groupby('order_').agg(count=('XS Bankfull Area Ratio','count'),mean_xs_area_ratio=('XS Bankfull Area Ratio','mean'),median_stage_bankfull=('Stage','median')) @@ -91,13 +91,11 @@ def bathy_rc_lookup(input_src_base,input_bathy_fileName,output_bathy_fileName,ou ## fill first and last stream order values if needed stream_order_bathy_ratio = stream_order_bathy_ratio.bfill().ffill() ## Get count_total tally of the total number of stream order hydroids in the HUC (not filtering anything out) - stream_order_bathy_ratio_count = output_bathy[['order_','Stage']] stream_order_bathy_ratio_count = output_bathy.groupby('order_').agg(count_total=('Stage','count')) stream_order_bathy_ratio = stream_order_bathy_ratio.merge(stream_order_bathy_ratio_count,how='left',on='order_') ## Fill any remaining null values: mean_xs_area_ratio --> 1 median_stage_bankfull --> 0 stream_order_bathy_ratio['mean_xs_area_ratio'].mask(stream_order_bathy_ratio['mean_xs_area_ratio'].isnull(),1,inplace=True) stream_order_bathy_ratio['median_stage_bankfull'].mask(stream_order_bathy_ratio['median_stage_bankfull'].isnull(),0,inplace=True) - print(stream_order_bathy_ratio.head) ## Combine SRC df and df of XS Area for each hydroid and matching stage and order from bins above output_bathy = output_bathy.merge(stream_order_bathy_ratio,how='left',on='order_') diff --git a/src/burn_in_levees.py b/src/burn_in_levees.py new file mode 100755 index 000000000..89d10a0b0 --- /dev/null +++ b/src/burn_in_levees.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 + +import rasterio +import numpy as np +import argparse + + +def burn_in_levees(dem_filename,nld_filename,out_dem_filename): + + dem = rasterio.open(dem_filename) + nld = rasterio.open(nld_filename) + + dem_data = dem.read(1) + nld_data = nld.read(1) + + no_data = nld.nodata + + nld_m = np.where(nld_data == int(no_data), -9999.0, (nld_data*0.3048).astype(rasterio.float32)) + + dem_profile = dem.profile.copy() + + dem_nld_burn = np.maximum(dem_data, nld_m) + + + with rasterio.open(out_dem_filename, "w", **dem_profile, BIGTIFF='YES') as dest: + dest.write(dem_nld_burn, indexes = 1) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Burn in NLD elevations') + parser.add_argument('-dem','--dem-filename', help='DEM filename', required=True,type=str) + parser.add_argument('-nld','--nld-filename', help='NLD filename', required=True,type=str) + parser.add_argument('-out','--out-dem-filename', help='out DEM filename', required=True,type=str) + + args = vars(parser.parse_args()) + + dem_filename = args['dem_filename'] + nld_filename = args['nld_filename'] + out_dem_filename = args['out_dem_filename'] + + burn_in_levees(dem_filename,nld_filename,out_dem_filename) diff --git a/src/preprocess_rasters.py b/src/preprocess_rasters.py new file mode 100755 index 000000000..7026f4cb2 --- /dev/null +++ b/src/preprocess_rasters.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 + +import os +from osgeo import gdal +import sys +sys.path.append('/foss_fim/src') +from multiprocessing import Pool +import argparse +from utils.reproject_dem import reproject_dem +from utils.shared_functions import update_raster_profile +from utils.shared_variables import PREP_PROJECTION, PREP_PROJECTION_CM + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Reproject Elevation rasters and update profile') + parser.add_argument('-dem_dir','--dem-dir', help='DEM filename', required=True,type=str) + parser.add_argument('-j','--number-of-jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) + parser.add_argument('-nodata','--nodata-val', help='DEM nodata value', required=False,type=float,default=-9999.0) + parser.add_argument('-block','--blocksize', help='DEM blocksize', required=False,type=int,default=512) + parser.add_argument('-keep','--keep-intermediate', help='keep intermediate files', required=False,type=bool,default=True) + + args = vars(parser.parse_args()) + + dem_dir = args['dem_dir'] + number_of_jobs = args['number_of_jobs'] + nodata_val = args['nodata_val'] + blocksize = args['blocksize'] + keep_intermediate = args['keep_intermediate'] + + reproject_procs_list = [] + + for huc in os.listdir(dem_dir): + raster_dir = os.path.join(dem_dir,huc) + elev_cm = os.path.join(raster_dir, 'elev_cm.tif') + elev_cm_proj = os.path.join(raster_dir, 'elev_cm_proj.tif') + reproject_procs_list.append([raster_dir, elev_cm, elev_cm_proj, PREP_PROJECTION_CM]) + + # Multiprocess reprojection + with Pool(processes=number_of_jobs) as pool: + pool.map(reproject_dem, reproject_procs_list) + + profile_procs_list = [] + + for huc in os.listdir(dem_dir): + elev_m_tif = os.path.join(dem_dir,huc, 'elev_m.tif') + if not os.path.exists(elev_m_tif): + raster_dir = os.path.join(dem_dir,huc) + elev_cm_proj = os.path.join(raster_dir, 'elev_cm_proj.tif') + elev_m = os.path.join(raster_dir, 'elev_m.tif') + profile_procs_list.append([elev_cm_proj, elev_m,PREP_PROJECTION,nodata_val,blocksize,keep_intermediate]) + + # Multiprocess update profile + with Pool(processes=2) as pool: + # TODO read in windows becasue gdal rasters are massive + pool.map(update_raster_profile, profile_procs_list) diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index 841f07522..c8f490696 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -32,7 +32,7 @@ hucUnitLength=${#hucNumber} huc4Identifier=${hucNumber:0:4} huc2Identifier=${hucNumber:0:2} input_NHD_WBHD_layer=WBDHU$hucUnitLength -input_DEM=$inputDataDir/nhdplus_rasters/HRNHDPlusRasters"$huc4Identifier"/elev_cm.tif +input_DEM=$inputDataDir/nhdplus_rasters/HRNHDPlusRasters"$huc4Identifier"/elev_m.tif input_NLD=$inputDataDir/nld_vectors/huc2_levee_lines/nld_preprocessed_"$huc2Identifier".gpkg # Define the landsea water body mask using either Great Lakes or Ocean polygon input # @@ -99,30 +99,22 @@ Tcount echo -e $startDiv"Clip DEM $hucNumber"$stopDiv date -u Tstart -[ ! -f $outputHucDataDir/dem.tif ] && \ -gdalwarp -cutline $outputHucDataDir/wbd_buffered.gpkg -crop_to_cutline -ot Int32 -r bilinear -of "GTiff" -overwrite -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "TILED=YES" -co "COMPRESS=LZW" -co "BIGTIFF=YES" $input_DEM $outputHucDataDir/dem.tif +[ ! -f $outputHucDataDir/dem_meters.tif ] && \ +gdalwarp -cutline $outputHucDataDir/wbd_buffered.gpkg -crop_to_cutline -ot Float32 -r bilinear -of "GTiff" -overwrite -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "TILED=YES" -co "COMPRESS=LZW" -co "BIGTIFF=YES" $input_DEM $outputHucDataDir/dem_meters.tif Tcount ## GET RASTER METADATA echo -e $startDiv"Get DEM Metadata $hucNumber"$stopDiv date -u Tstart -read fsize ncols nrows ndv xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($srcDir/getRasterInfoNative.py $outputHucDataDir/dem.tif) +read fsize ncols nrows ndv xmin ymin xmax ymax cellsize_resx cellsize_resy<<<$($srcDir/getRasterInfoNative.py $outputHucDataDir/dem_meters.tif) ## RASTERIZE NLD MULTILINES ## echo -e $startDiv"Rasterize all NLD multilines using zelev vertices"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/nld_rasterized_elev.tif ] && [ -f $outputHucDataDir/nld_subset_levees.gpkg ] && \ -gdal_rasterize -l nld_subset_levees -3d -at -init -9999 -a_nodata $ndv -te $xmin $ymin $xmax $ymax -ts $ncols $nrows -ot Float32 -of GTiff -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" $outputHucDataDir/nld_subset_levees.gpkg $outputHucDataDir/nld_rasterized_elev.tif -Tcount - -## CONVERT TO METERS ## -echo -e $startDiv"Convert DEM to Meters $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/dem_meters.tif ] && \ -gdal_calc.py --quiet --type=Float32 --co "BLOCKXSIZE=512" --co "BLOCKYSIZE=512" --co "TILED=YES" --co "COMPRESS=LZW" --co "BIGTIFF=YES" -A $outputHucDataDir/dem.tif --outfile="$outputHucDataDir/dem_meters.tif" --calc="A/100" --NoDataValue=$ndv +gdal_rasterize -l nld_subset_levees -3d -at -a_nodata $ndv -te $xmin $ymin $xmax $ymax -ts $ncols $nrows -ot Float32 -of GTiff -co "BLOCKXSIZE=512" -co "BLOCKYSIZE=512" -co "COMPRESS=LZW" -co "BIGTIFF=YES" -co "TILED=YES" $outputHucDataDir/nld_subset_levees.gpkg $outputHucDataDir/nld_rasterized_elev.tif Tcount ## RASTERIZE REACH BOOLEAN (1 & 0) ## @@ -156,7 +148,7 @@ echo -e $startDiv"Burn nld levees into dem & convert nld elev to meters (*Overwr date -u Tstart [ -f $outputHucDataDir/nld_rasterized_elev.tif ] && \ -gdal_calc.py --quiet --type=Float32 --overwrite --NoDataValue $ndv --co "BLOCKXSIZE=512" --co "BLOCKYSIZE=512" --co "TILED=YES" --co "COMPRESS=LZW" --co "BIGTIFF=YES" -A $outputHucDataDir/dem_meters.tif -B $outputHucDataDir/nld_rasterized_elev.tif --outfile="$outputHucDataDir/dem_meters.tif" --calc="maximum(A,((B>-9999)*0.3048))" --NoDataValue=$ndv +$srcDir/burn_in_levees.py -dem $outputHucDataDir/dem_meters.tif -nld $outputHucDataDir/nld_rasterized_elev.tif -out $outputHucDataDir/dem_meters.tif Tcount ## DEM Reconditioning ## diff --git a/src/usgs_gage_crosswalk.py b/src/usgs_gage_crosswalk.py index ee61c3d11..c29a92acf 100755 --- a/src/usgs_gage_crosswalk.py +++ b/src/usgs_gage_crosswalk.py @@ -9,7 +9,7 @@ import pygeos from shapely.wkb import dumps, loads import warnings -warnings.simplefilter(action='ignore', category=FutureWarning) +warnings.simplefilter("ignore") ''' Get elevation at adjusted USGS gages locations diff --git a/src/utils/reproject_dem.py b/src/utils/reproject_dem.py new file mode 100755 index 000000000..dba8f65de --- /dev/null +++ b/src/utils/reproject_dem.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 + +import os +from osgeo import gdal +import sys +sys.path.append('/foss_fim/src') +from utils.shared_variables import PREP_PROJECTION_CM +import shutil +from multiprocessing import Pool +import argparse + + +def reproject_dem(args): + + raster_dir = args[0] + elev_cm = args[1] + elev_cm_proj = args[2] + reprojection = args[3] + + if os.path.exists(elev_cm_proj): + os.remove(elev_cm_proj) + + shutil.copy(elev_cm, elev_cm_proj) + + print(f"Reprojecting {elev_cm_proj}") + gdal.Warp(elev_cm_proj,elev_cm_proj,dstSRS=reprojection) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Burn in NLD elevations') + parser.add_argument('-dem_dir','--dem-dir', help='DEM filename', required=True,type=str) + parser.add_argument('-j','--number-of-jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) + + args = vars(parser.parse_args()) + + dem_dir = args['dem_dir'] + number_of_jobs = args['number_of_jobs'] + + reproject_procs_list = [] + + for huc in os.listdir(dem_dir): + raster_dir = os.path.join(dem_dir,huc) + elev_cm = os.path.join(raster_dir, 'elev_cm.tif') + elev_cm_proj = os.path.join(raster_dir, 'elev_cm_proj.tif') + reproject_procs_list.append([raster_dir, elev_cm, elev_cm_proj, PREP_PROJECTION_CM]) + + # Multiprocess reprojection + with Pool(processes=number_of_jobs) as pool: + pool.map(reproject_dem, reproject_procs_list) diff --git a/src/utils/shared_functions.py b/src/utils/shared_functions.py index 6ea7b0a74..876c1769e 100644 --- a/src/utils/shared_functions.py +++ b/src/utils/shared_functions.py @@ -2,6 +2,10 @@ import os from os.path import splitext +import rasterio +import numpy as np +from rasterio.warp import calculate_default_transform, reproject, Resampling +from pyproj.crs import CRS def getDriver(fileName): @@ -83,3 +87,123 @@ def subset_wbd_gpkg(wbd_gpkg, multilayer_wbd_geopackage): layer_name = os.path.split(wbd_gpkg)[1].strip('.gpkg') gdf.crs = PREP_PROJECTION gdf.to_file(multilayer_wbd_geopackage, layer=layer_name,driver='GPKG',index=False) + + +def update_raster_profile(args): + + elev_cm_filename = args[0] + elev_m_filename = args[1] + projection = args[2] + nodata_val = args[3] + blocksize = args[4] + keep_intermediate = args[5] + + if isinstance(blocksize, int): + pass + elif isinstance(blocksize,str): + blocksize = int(blocksize) + elif isinstance(blocksize,float): + blocksize = int(blocksize) + else: + raise TypeError("Pass integer for blocksize") + + assert elev_cm_filename.endswith('.tif'), "input raster needs to be a tif" + + # Update nodata value and convert from cm to meters + dem_cm = rasterio.open(elev_cm_filename) + + no_data = dem_cm.nodata + data = dem_cm.read(1) + + dem_m = np.where(data == int(no_data), nodata_val, (data/100).astype(rasterio.float32)) + + del data + + dem_m_profile = dem_cm.profile.copy() + + dem_m_profile.update(driver='GTiff',tiled=True,nodata=nodata_val, + blockxsize=blocksize, blockysize=blocksize, + dtype='float32',crs=projection,compress='lzw',interleave='band') + + with rasterio.open(elev_m_filename, "w", **dem_m_profile, BIGTIFF='YES') as dest: + dest.write(dem_m, indexes = 1) + + if keep_intermediate == False: + os.remove(elev_cm_filename) + + del dem_m + dem_cm.close() + + +''' +This function isn't currently used but is the preferred method for +reprojecting elevation grids. + +Several USGS elev_cm.tifs have the crs value in their profile stored as the string "CRS.from_epsg(26904)" +instead of the actual output of that command. + +Rasterio fails to properly read the crs but using gdal retrieves the correct projection. +Until this issue is resolved use the reproject_dem function in reproject_dem.py instead. +reproject_dem is not stored in the shared_functions.py because rasterio and +gdal bindings are not entirely compatible: https://rasterio.readthedocs.io/en/latest/topics/switch.html + +''' +def reproject_raster(input_raster_name,reprojection,blocksize=None,reprojected_raster_name=None): + + if blocksize is not None: + if isinstance(blocksize, int): + pass + elif isinstance(blocksize,str): + blocksize = int(blocksize) + elif isinstance(blocksize,float): + blocksize = int(blocksize) + else: + raise TypeError("Pass integer for blocksize") + else: + blocksize = 256 + + assert input_raster_name.endswith('.tif'), "input raster needs to be a tif" + + reprojection = rasterio.crs.CRS.from_string(reprojection) + + with rasterio.open(input_raster_name) as src: + + # Check projection + if src.crs.to_string() != reprojection: + if src.crs.to_string().startswith('EPSG'): + epsg = src.crs.to_epsg() + proj_crs = CRS.from_epsg(epsg) + rio_crs = rasterio.crs.CRS.from_user_input(proj_crs).to_string() + else: + rio_crs = src.crs.to_string() + + print(f"{input_raster_name} not projected") + print(f"Reprojecting from {rio_crs} to {reprojection}") + + transform, width, height = calculate_default_transform( + src.crs, reprojection, src.width, src.height, *src.bounds) + kwargs = src.meta.copy() + kwargs.update({ + 'crs': reprojection, + 'transform': transform, + 'width': width, + 'height': height, + 'compress': 'lzw' + }) + + if reprojected_raster_name is None: + reprojected_raster_name = input_raster_name + + assert reprojected_raster_name.endswith('.tif'), "output raster needs to be a tif" + + with rasterio.open(reprojected_raster_name, 'w', **kwargs, tiled=True, blockxsize=blocksize, blockysize=blocksize, BIGTIFF='YES') as dst: + reproject( + source=rasterio.band(src, 1), + destination=rasterio.band(dst, 1), + src_transform=src.transform, + src_crs=rio_crs, + dst_transform=transform, + dst_crs=reprojection.to_string(), + resampling=Resampling.nearest) + del dst + del src diff --git a/src/utils/shared_variables.py b/src/utils/shared_variables.py index 244a12d2b..37a98b424 100644 --- a/src/utils/shared_variables.py +++ b/src/utils/shared_variables.py @@ -2,6 +2,7 @@ # Projections. #PREP_PROJECTION = "+proj=aea +datum=NAD83 +x_0=0.0 +y_0=0.0 +lon_0=96dW +lat_0=23dN +lat_1=29d30'N +lat_2=45d30'N +towgs84=-0.9956000824677655,1.901299877314078,0.5215002840524426,0.02591500053005733,0.009425998542707753,0.01159900118427752,-0.00062000005129903 +no_defs +units=m" +PREP_PROJECTION_CM = 'PROJCS["USA_Contiguous_Albers_Equal_Area_Conic_USGS_version",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Albers"],PARAMETER["false_easting",0.0],PARAMETER["false_northing",0.0],PARAMETER["central_meridian",-96.0],PARAMETER["standard_parallel_1",29.5],PARAMETER["standard_parallel_2",45.5],PARAMETER["latitude_of_origin",23.0],UNIT["Meter",1.0],VERTCS["NAVD_1988",VDATUM["North_American_Vertical_Datum_1988"],PARAMETER["Vertical_Shift",0.0],PARAMETER["Direction",1.0],UNIT["Centimeter",0.01]]]' PREP_PROJECTION = 'PROJCS["USA_Contiguous_Albers_Equal_Area_Conic_USGS_version",GEOGCS["NAD83",DATUM["North_American_Datum_1983",SPHEROID["GRS 1980",6378137,298.2572221010042,AUTHORITY["EPSG","7019"]],AUTHORITY["EPSG","6269"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AUTHORITY["EPSG","4269"]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["standard_parallel_1",29.5],PARAMETER["standard_parallel_2",45.5],PARAMETER["latitude_of_center",23],PARAMETER["longitude_of_center",-96],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]]]' VIZ_PROJECTION ='PROJCS["WGS_1984_Web_Mercator_Auxiliary_Sphere",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Mercator_Auxiliary_Sphere"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],PARAMETER["Standard_Parallel_1",0.0],PARAMETER["Auxiliary_Sphere_Type",0.0],UNIT["Meter",1.0]]' # -- Data URLs-- # diff --git a/tools/comparing_src.py b/tools/comparing_src.py deleted file mode 100755 index 977b05794..000000000 --- a/tools/comparing_src.py +++ /dev/null @@ -1,393 +0,0 @@ -#!/usr/bin/env python3 - -import matplotlib.pyplot as plt -import numpy as np -import json -import geopandas as gpd -import pandas as pd -from raster import Raster -import os -from shapely.geometry import Point - -projectDirectory = os.path.join(os.path.expanduser('~'),'projects','foss_fim') -dataDirectory = os.path.join(projectDirectory,'data') - -# nwm_catchments_fileName = os.path.join(dataDirectory,'nwm','NWMCatchment.shp') -# nwm_flows_fileName = os.path.join(dataDirectory,'test2','inputs','nwm_flows_proj_120903_v2.gpkg') -# -# esri_catchments_fileName = os.path.join(projectDirectory,'tests','CatchmentH.shp') -esri_flows_fileName = os.path.join(projectDirectory,'tests','eval_1','final_esri_hand_outputs','FPRiver.gpkg') - -# foss_catchments_fileName = os.path.join(dataDirectory,'test2','outputs','gw_catchments_reaches_clipped_addedAttributes_crosswalked.gpkg') -# foss_raster_catchments_fileName = os.path.join(dataDirectory,'test2','outputs','gw_catchments_reaches_clipped_addedAttributes.tif') -foss_flows_fileName = os.path.join(dataDirectory,'test2','outputs','demDerived_reaches_split_clipped_addedAttributes_crosswalked.gpkg') -foss_flows_fileName = os.path.join(dataDirectory,'test2','outputs','NHDPlusBurnLineEvent_subset_split_clipped_addedAttributes_crosswalked.gpkg') -# foss_flows_fileName = os.path.join(dataDirectory,'test2','outputs_v32','demDerived_reaches_split_clipped_addedAttributes_crosswalked.gpkg') - -esri_src_fileName = os.path.join(projectDirectory,'tests','eval_1','final_esri_hand_outputs','120903_channel_properties.json') -foss_src_fileName = os.path.join(dataDirectory,'test2','outputs','src.json') -# foss_src_fileName = os.path.join(dataDirectory,'test2','outputs_v32','src.json') - -esri_src_table_fileName = os.path.join(projectDirectory,'tests','eval_1','final_esri_hand_outputs','pf_ModelStream.csv') -foss_src_table_fileName = os.path.join(dataDirectory,'test2','outputs','src_full_crosswalked.csv') -# foss_src_table_fileName = os.path.join(dataDirectory,'test2','outputs_v32','src_full_crosswalked.csv') - -esri_cw_fileName = os.path.join(projectDirectory,'tests','eval_1','final_esri_hand_outputs','cross_walk_table_esri_120903.csv') -foss_cw_fileName = os.path.join(dataDirectory,'test2','outputs','crosswalk_table.csv') -# foss_cw_fileName = os.path.join(dataDirectory,'test2','outputs_v32','crosswalk_table.csv') - -esri_rem_fileName = os.path.join(projectDirectory,'tests','eval_1','final_esri_hand_outputs','hand_120903.tif') -foss_rem_fileName = os.path.join(dataDirectory,'test2','outputs','rem_clipped_zeroed_masked.tif') - -forecast_100_fileName = os.path.join(projectDirectory,'tests','eval_1','validation_data','forecast_120903_100yr.csv') - -# catchments -# esri_catchments = gpd.read_file(esri_catchments_fileName) -# foss_catchments = gpd.read_file(foss_catchments_fileName) -# # foss_raster_catchments = Raster(foss_raster_catchments_fileName) -# nwm_catchments = gpd.read_file(nwm_catchments_fileName) -# -# # flows -esri_flows = gpd.read_file(esri_flows_fileName) -foss_flows = gpd.read_file(foss_flows_fileName) -# nwm_flows = gpd.read_file(nwm_flows_fileName) - -# slopes -# esri_slope=Raster('eval_1/final_esri_hand_outputs/unitrun/hdr.adf') -# foss_slope=Raster('../data/test2/outputs/slopes_d8_thalwegCond_filled_clipped_masked.tif') -# -# foss_cell_area = abs(foss_rem.gt[1]*foss_rem.gt[5]) -# esri_cell_area = abs(esri_rem.gt[1] * esri_rem.gt[5]) -# -# foss_dv_bool = foss_rem.array!=foss_rem.ndv -# esri_dv_bool = esri_rem.array!=esri_rem.ndv -# -# esri_slope.array = esri_slope.array[esri_dv_bool] - 1 -# -# foss_slopes_trans = np.sqrt(1+(foss_slope.array[foss_dv_bool])**2) -# esri_slopes_trans = np.sqrt(1+(esri_slope.array[esri_dv_bool])**2) -# for d in np.array(range(0,30))*.3048: -# foss_area = np.sum(np.logical_and(foss_dv_bool,foss_rem.array<=d) * foss_cell_area * foss_slopes_trans) -# esri_area = np.sum(np.logical_and(esri_dv_bool,esri_rem.array<=d) * esri_cell_area * esri_slopes_trans) - - - - - -# sinuosity - -def sinuosity(flows_geometry): - - numberOfGeoms = len(flows_geometry) - arc_lengths = [-1] * numberOfGeoms ; straight_lengths = [-1] * numberOfGeoms - - for i,geom in enumerate(flows_geometry): - arc_lengths[i] = geom.length - - point_1 = Point(*geom.bounds[0:2]) - point_2 = Point(*geom.bounds[2:4]) - - straight_lengths[i] = point_1.distance(point_2) - - sinuosity_table = pd.DataFrame({'arc_lengths' : arc_lengths , 'straight_lengths' : straight_lengths}) - - return(sinuosity_table) - -esri_sinuosity = sinuosity(esri_flows.geometry) -foss_sinuosity = sinuosity(foss_flows.geometry) - -avg_esri_si = (esri_sinuosity['arc_lengths']/esri_sinuosity['straight_lengths']).mean() -avg_foss_si = (foss_sinuosity['arc_lengths']/foss_sinuosity['straight_lengths']).mean() - -print(avg_esri_si,avg_foss_si,avg_esri_si/avg_foss_si) - - -# SRCS's -with open(esri_src_fileName,'r') as f: - esri_src = json.load(f) - -with open(foss_src_fileName,'r') as f: - foss_src = json.load(f) - -esri_cw = pd.read_csv(esri_cw_fileName,dtype=int) -foss_cw = pd.read_csv(foss_cw_fileName,dtype=int) - -esri_rem = Raster(esri_rem_fileName) -foss_rem = Raster(foss_rem_fileName) - -esri_src_table = pd.read_csv(esri_src_table_fileName,dtype={'A':float, 'B':float, 'H':float, 'Length_m':float, 'P':float, 'R':float, 'HydroID':int, 'Q':float}) -foss_src_table = pd.read_csv(foss_src_table_fileName,dtype={'HydroID':int, 'Stage':float, 'Number of Cells':int, 'SurfaceArea (m2)':float, - 'BedArea (m2)':float, 'Volume (m3)':float, 'SLOPE':float, 'LENGTHKM':float, 'AREASQKM':float, - 'Roughness':float, 'TopWidth (m)':float, 'WettedPerimeter (m)':float, 'WetArea (m2)':float, - 'HydraulicRadius (m)':float, 'Discharge (m3s-1)':float, 'feature_id':int}) - -forecast_100 = pd.read_csv(forecast_100_fileName,dtype={'feature_id' : int , 'discharge' : float}) - -intersection_of_feature_id = list(set(esri_cw['feature_id'].unique()) & set(foss_cw['feature_id'].unique()) & set(forecast_100['feature_id'].unique()) ) - - -max_q = np.max(forecast_100['discharge']) -# print(max_q) - -esri_src_table['BA'] = esri_src_table['P'] * esri_src_table['Length_m'] -esri_src_table['V'] = esri_src_table['A'] * esri_src_table['Length_m'] - -esri_src_table = esri_src_table[:][esri_src_table['H']<=10] -foss_src_table = foss_src_table[:][foss_src_table['Stage']<=10] -# print(esri_src_table.sort_values(by=['HydroID','H'])) - -esri_cw = esri_cw[:][esri_cw['feature_id'].isin(intersection_of_feature_id)] -foss_cw = foss_cw[:][foss_cw['feature_id'].isin(intersection_of_feature_id)] - -esri_src_table = esri_src_table.merge(esri_cw,on='HydroID',how='inner') -foss_src_table = foss_src_table.merge(foss_cw,on='HydroID',how='inner') - -foss_src_table.drop(columns='feature_id_y',inplace=True) -foss_src_table.rename(columns={'feature_id_x':'feature_id'},inplace=True) -# esri_hids = esri_cw['HydroID'][esri_cw['feature_id'].isin(intersection_of_feature_id)] -# foss_hids = foss_cw['HydroID'][foss_cw['feature_id'].isin(intersection_of_feature_id)] - -# esri_src_table = esri_src_table[:][esri_src_table['HydroID'].isin(esri_hids)] -# foss_src_table = foss_src_table[:][foss_src_table['HydroID'].isin(foss_hids)] - -# esri_src_table = esri_src_table[:][esri_src_table['HydroID'].isin(esri_hids)] -# foss_src_table = foss_src_table[:][foss_src_table['HydroID'].isin(foss_hids)] - -foss_src_table['Length_m'] = foss_src_table['LENGTHKM'] *1000 -esri_src_table = esri_src_table.merge(esri_flows[['HydroID','S0']],on='HydroID',how='left') - -foss_src_table.rename(columns={'Stage' : 'H' , 'BedArea (m2)' : 'BA','Volume (m3)' : 'V' , - 'SLOPE' : 'S0' , 'WettedPerimeter (m)': 'P', 'WetArea (m2)' : 'A', - 'HydraulicRadius (m)':'R', 'Discharge (m3s-1)': 'Q'},inplace=True) - -foss_src_table = foss_src_table[['H' , 'BA','V' ,'S0' ,'P','A','R','Q','feature_id','HydroID','Length_m']] - -foss_src_table['n'] = 0.06 -esri_src_table['n'] = 0.06 - -# esri_src_table.sort_values(by=['HydroID','H'],inplace=True) -# foss_src_table.sort_values(by=['HydroID','H'],inplace=True) - -esri_src_table.drop(columns='HydroID',inplace=True) -foss_src_table.drop(columns='HydroID',inplace=True) - -esri_src_table = esri_src_table.astype({'H' : str}) -foss_src_table = foss_src_table.astype({'H' : str}) -# esri_src_table = esri_src_table.groupby(['feature_id','H']).mean() -# foss_src_table = foss_src_table.groupby(['feature_id','H']).mean() -# esri_src_table = esri_src_table.astype({'H' :float}) -# foss_src_table = foss_src_table.astype({'H' :float}) - -# esri_src_table.reset_index(drop=True) -# foss_src_table.reset_index(drop=True) - -src_table = foss_src_table.merge(esri_src_table,suffixes=('_foss','_esri'),on=['feature_id','H']) -# esri_src_table.sort_values(by=['HydroID','H'],inplace=True) - -# src_table.sort_values(by=['feature_id','H'],inplace=True) -# src_table.reset_index(drop=False,inplace=True) - -src_table = src_table.groupby('H').mean() -src_table.reset_index(drop=False,inplace=True) -src_table = src_table.astype({'H' :float}) -src_table.sort_values(by=['H'],inplace=True) -# print(src_table.index) - -pd.set_option('display.max_rows', 2000) -# print(src_table[['feature_id','H','V_esri','V_foss']].iloc[0:200,:]) -# print(src_table) -percent_error_V = 100 * (src_table['V_foss'].iloc[1:]-src_table['V_esri'].iloc[1:])/src_table['V_esri'].iloc[1:] -percent_error_BA = 100 * (src_table['BA_foss'].iloc[1:]-src_table['BA_esri'].iloc[1:])/src_table['BA_esri'].iloc[1:] -percent_error_L = 100 * (src_table['Length_m_foss']-src_table['Length_m_esri'])/src_table['Length_m_esri'] -percent_error_S = 100 * (src_table['S0_foss']-src_table['S0_esri'])/src_table['S0_esri'] -percent_error_Q = 100 * (src_table['Q_foss'].iloc[1:]-src_table['Q_esri'].iloc[1:])/src_table['Q_esri'].iloc[1:] - -multiplied_error_V = (src_table['V_foss'].iloc[1:]/src_table['V_esri'].iloc[1:])**(5/3) -multiplied_error_BA = (src_table['BA_foss'].iloc[1:]/src_table['BA_esri'].iloc[1:])**(2/3) -multiplied_error_L = (src_table['Length_m_foss']/src_table['Length_m_esri']) -multiplied_error_S = (src_table['S0_foss']/src_table['S0_esri'])**(1/2) -multiplied_error_Q = (src_table['Q_foss'].iloc[1:]/src_table['Q_esri'].iloc[1:]) - -print(percent_error_V.mean(),percent_error_BA.mean(),percent_error_L.mean(),percent_error_S.mean(),percent_error_Q.mean()) -print(multiplied_error_V.mean(),multiplied_error_BA.mean(),multiplied_error_L.mean(),multiplied_error_S.mean(),multiplied_error_Q.mean()) -print((multiplied_error_V.mean()*multiplied_error_S.mean())/(multiplied_error_BA.mean()*multiplied_error_L.mean())) -# print(percent_error_V,percent_error_BA,percent_error_L,percent_error_S,percent_error_Q) -# exit() -# -# tot_V_esri = [] ; tot_V_foss = [] -# foss_dv_bool = foss_rem.array!=foss_rem.ndv -# esri_dv_bool = esri_rem.array!=esri_rem.ndv -# for d in np.array(range(0,30))*.3048: -# foss_cell_area = abs(foss_rem.gt[1]*foss_rem.gt[5]) -# esri_cell_area = abs(esri_rem.gt[1] * esri_rem.gt[5]) -# foss_volume = np.sum(d-foss_rem.array[np.logical_and(foss_dv_bool,foss_rem.array<=d)]) * foss_cell_area -# esri_volume = np.sum(d-esri_rem.array[np.logical_and(esri_dv_bool,esri_rem.array<=d)]) * esri_cell_area -# tot_V_esri = tot_V_esri + [esri_volume] ; tot_V_foss = tot_V_foss + [foss_volume] -# -# print(np.array(tot_V_foss).mean()/np.array(tot_V_esri).mean()) -# print((foss_dv_bool.sum() * foss_cell_area) / (esri_dv_bool.sum() * esri_cell_area)) - - - - - -# print(esri_src_table[['feature_id','H','V']].iloc[0:20,:]) -# print(foss_src_table) -# print(esri_src_table) - - - - -# foss_src_table['HydroID'] - -stage_list = foss_src[str(500)]['stage_list'] -maxLength = len(stage_list) - -overall_esri = None -for fid in intersection_of_feature_id: - esri_hid = esri_cw['HydroID'][esri_cw['feature_id'] == fid].to_numpy() - foss_hid = foss_cw['HydroID'][foss_cw['feature_id'] == fid].to_numpy() - - # all_esri_q = np.zeros(len(esri_src[str(esri_hid[0])]['stage_list']),dtype=np.float32) - all_esri_q = None - for hid in esri_hid: - current_esri_q = np.array(esri_src[str(hid)]['q_list']) - - if len(current_esri_q) < maxLength: - nan_array = np.repeat(np.nan, maxLength - len(current_esri_q)) - # print(nan_array) - current_esri_q = np.hstack((current_esri_q,nan_array)) - - if len(current_esri_q) > maxLength: - current_esri_q = current_esri_q[0:maxLength] - - if all_esri_q is None: - all_esri_q = current_esri_q - else: - all_esri_q = np.vstack((all_esri_q,current_esri_q)) - - all_foss_q = None - for hid in foss_hid: - - current_foss_q = np.array(foss_src[str(hid)]['q_list']) - - if all_foss_q is None: - all_foss_q = current_foss_q - else: - all_foss_q = np.vstack((all_foss_q,current_foss_q)) - - # print(all_esri_q.shape,all_foss_q.shape) - # print(all_esri_q) - - if len(all_esri_q.shape) == 2: - mean_esri_q = np.nanmean(all_esri_q,axis=0) - - if len(all_foss_q.shape) == 2: - mean_foss_q = np.nanmean(all_foss_q,axis=0) - - # mean_error = mean_foss_q-mean_esri_q - - # print(mean_esri_q.shape,mean_foss_q.shape,mean_error.shape) - - # mean_abs_error = np.absolute(mean_error) - - if overall_esri is None: - # overall_error = mean_error - overall_esri = mean_esri_q - overall_foss = mean_foss_q - # overall_abs_error = mean_abs_error - else: - # print(mean_error,overall_error.shape) - # overall_error = np.vstack((overall_error,mean_error)) - overall_esri = np.vstack((overall_esri,mean_esri_q)) - overall_foss = np.vstack((overall_foss,mean_foss_q)) - # overall_abs_error = np.vstack((overall_abs_error,mean_abs_error)) - -# print(overall_error) -# print(list(overall_error)) -# overall_error_q_list = list(np.nanmean(overall_error,axis=0)) -overall_esri_q_list = list(np.nanmean(overall_esri,axis=0)) -overall_foss_q_list = list(np.nanmean(overall_foss,axis=0)) - -plt.plot(overall_esri_q_list,stage_list,'r') -plt.plot(overall_foss_q_list,stage_list,'b') -# plt.axis([0,max_q*1.1,0,10]) -plt.show() - -exit() - - - - - - - - -# print(np.mean(overall_abs_error,axis=0)) - -# foss_src = pd.read_csv(foss_src_fileName,skip_blank_lines=True,dtype=object) - -# print('\nFeature IDs') -# print("ESRI # of unique catchments: {}".format(len(np.unique(esri_catchments['feature_id'])))) -# print("FOSS # of unique catchments: {}".format(len(np.unique(foss_catchments['feature_id'])))) -# print("NWM # of unique catchments: {}".format(len(np.unique(nwm_catchments['feature_id'])))) -# print("ESRI # of unique flows: {}".format(len(np.unique(esri_flows['feature_id'])))) -# print("FOSS # of unique flows: {}".format(len(np.unique(foss_flows['feature_id'])))) -# print("NWM # of unique flows: {}".format(len(np.unique(nwm_flows['ID'])))) -# print("FOSS # of unique SRC Feature ID: {}".format(len(np.unique(foss_src['feature_id'])))) -# -# print('\nHydroID') -# print("ESRI # of unique catchments: {}".format(len(np.unique(esri_catchments['HydroID'])))) -# print("FOSS # of unique catchments: {}".format(len(np.unique(foss_catchments['HydroID'])))) -# # print("FOSS # of unique catchments in raster: {}".format(len(np.unique(foss_raster_catchments.array[foss_raster_catchments.array!=foss_raster_catchments.ndv])))) -# print("ESRI # of unique flows: {}".format(len(np.unique(esri_flows['HydroID'])))) -# print("FOSS # of unique flows: {}".format(len(np.unique(foss_flows['HydroID'])))) -# print("ESRI # of unique SRC HydroID: {}".format(len(np.unique(list(esri_src.keys()))))) -# print("FOSS # of unique HydroID's: {}".format(len(np.unique(foss_src['HydroID'])))) -# -# print(foss_flows['LengthKm'].max()) -# print(foss_flows['LengthKm'].mean()) - -# print(list(esri_src.keys())) - -# print(len(foss_src)) -# plots src's -# unique_feature_ids_in_foss_src = np.unique(foss_src['feature_id']) - -# featID = 5791828 - -# indices_of_feature = np.where(foss_src['feature_id'] == featID) - -# unique_hydro_ids = np.unique(foss_src['HydroID'][indices_of_feature]) - -# hydroID = '822' -# esri_hydroID = '9975' - -# hydroID = '1279' -# esri_hydroID = '10349' - -hydroID = '1268' -esri_hydroID = '10743' - -hydroID = '1269' -esri_hydroID = '10742' - -# indices_of_hydroid = np.where(foss_src['HydroID'] == hydroID)[0] - -foss_stages = foss_src[hydroID]['stage_list'] -foss_discharge = foss_src[hydroID]['q_list'] - -# feature_id = foss_src['feature_id'][indices_of_hydroid[0]] -esri_stages = esri_src[esri_hydroID]['stage_list'] -esri_flows = esri_src[esri_hydroID]['q_list'] - - -plt.plot(foss_discharge,foss_stages,'b') -plt.plot(esri_flows,esri_stages,'r') -plt.show() - -# for hid in unique_hydro_ids: - - - -# for featID in unique_feature_ids_in_foss_src: diff --git a/tools/generate_categorical_fim.py b/tools/generate_categorical_fim.py index f51bf5aa8..cea992df1 100755 --- a/tools/generate_categorical_fim.py +++ b/tools/generate_categorical_fim.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 + +import os import subprocess import argparse import time @@ -11,7 +13,7 @@ def update_mapping_status(output_mapping_dir, output_flows_dir): ''' Updates the status for nws_lids from the flows subdirectory. Status is updated for sites where the inundation.py routine was not able to - produce inundation for the supplied flow files. It is assumed that if + produce inundation for the supplied flow files. It is assumed that if an error occured in inundation.py that all flow files for a given site experienced the error as they all would have the same nwm segments. @@ -27,86 +29,80 @@ def update_mapping_status(output_mapping_dir, output_flows_dir): None. ''' - #Find all LIDs with empty mapping output folders + # Find all LIDs with empty mapping output folders subdirs = [str(i) for i in Path(output_mapping_dir).rglob('**/*') if i.is_dir()] empty_nws_lids = [Path(directory).name for directory in subdirs if not list(Path(directory).iterdir())] - - #Write list of empty nws_lids to DataFrame, these are sites that failed in inundation.py + + # Write list of empty nws_lids to DataFrame, these are sites that failed in inundation.py mapping_df = pd.DataFrame({'nws_lid':empty_nws_lids}) mapping_df['did_it_map'] = 'no' mapping_df['map_status'] = ' and all categories failed to map' - - #Import shapefile output from flows creation + + # Import shapefile output from flows creation shapefile = Path(output_flows_dir)/'nws_lid_flows_sites.shp' flows_df = gpd.read_file(shapefile) - - #Join failed sites to flows df + + # Join failed sites to flows df flows_df = flows_df.merge(mapping_df, how = 'left', on = 'nws_lid') - - #Switch mapped column to no for failed sites and update status + + # Switch mapped column to no for failed sites and update status flows_df.loc[flows_df['did_it_map'] == 'no', 'mapped'] = 'no' flows_df.loc[flows_df['did_it_map']=='no','status'] = flows_df['status'] + flows_df['map_status'] - - #Perform pass for HUCs where mapping was skipped due to missing data. + + # Perform pass for HUCs where mapping was skipped due to missing data flows_hucs = [i.stem for i in Path(output_flows_dir).iterdir() if i.is_dir()] mapping_hucs = [i.stem for i in Path(output_mapping_dir).iterdir() if i.is_dir()] missing_mapping_hucs = list(set(flows_hucs) - set(mapping_hucs)) - #Update status for nws_lid in missing hucs and change mapped attribute to 'no' + # Update status for nws_lid in missing hucs and change mapped attribute to 'no' flows_df.loc[flows_df.eval('HUC8 in @missing_mapping_hucs & mapped == "yes"'), 'status'] = flows_df['status'] + ' and all categories failed to map because missing HUC information' flows_df.loc[flows_df.eval('HUC8 in @missing_mapping_hucs & mapped == "yes"'), 'mapped'] = 'no' - - #Clean up GeoDataFrame and rename columns for consistency. + + # Clean up GeoDataFrame and rename columns for consistency flows_df = flows_df.drop(columns = ['did_it_map','map_status']) flows_df = flows_df.rename(columns = {'nws_lid':'ahps_lid'}) - - #Write out to file + + # Write out to file nws_lid_path = Path(output_mapping_dir) / 'nws_lid_sites.shp' flows_df.to_file(nws_lid_path) - + if __name__ == '__main__': - - #Parse arguments + + # Parse arguments parser = argparse.ArgumentParser(description = 'Run Categorical FIM') parser.add_argument('-f','--fim_version',help='Name of directory containing outputs of fim_run.sh',required=True) parser.add_argument('-j','--number_of_jobs',help='Number of processes to use. Default is 1.',required=False, default="1",type=int) args = vars(parser.parse_args()) - - #Get arguments + + # Get arguments fim_version = args['fim_version'] number_of_jobs = args['number_of_jobs'] - - #################################################################### - #Define default arguments. Modify these if necessary. - today = date.today().strftime('%m%d%Y') - fim_run_dir = Path(f'/data/previous_fim/{fim_version}') - output_flows_dir = Path(f'/data/catfim/{fim_version}/{today}/flows') - output_mapping_dir = Path(f'/data/catfim/{fim_version}/{today}/mapping') + + # Define default arguments. Modify these if necessary + fim_run_dir = Path(f'{fim_version}') + fim_version_folder = os.path.basename(fim_version) + output_flows_dir = Path(f'/data/catfim/{fim_version_folder}/flows') + output_mapping_dir = Path(f'/data/catfim/{fim_version_folder}/mapping') nwm_us_search = '10' - nwm_ds_search = '10' + nwm_ds_search = '10' write_depth_tiff = False - #################################################################### - - #################################################################### - #Run CatFIM scripts in sequence - #################################################################### - #Generate CatFIM flow files. + + ## Run CatFIM scripts in sequence + # Generate CatFIM flow files print('Creating flow files') start = time.time() - subprocess.call(['python3','generate_categorical_fim_flows.py', '-w' , str(output_flows_dir), '-u', nwm_us_search, '-d', nwm_ds_search]) + subprocess.call(['python3','foss_fim/tools/generate_categorical_fim_flows.py', '-w' , str(output_flows_dir), '-u', nwm_us_search, '-d', nwm_ds_search]) end = time.time() elapsed_time = (end-start)/60 print(f'Finished creating flow files in {elapsed_time} minutes') - - #Generate CatFIM mapping. + + # Generate CatFIM mapping print('Begin mapping') start = time.time() - subprocess.call(['python3','generate_categorical_fim_mapping.py', '-r' , str(fim_run_dir), '-s', str(output_flows_dir), '-o', str(output_mapping_dir), '-j', str(number_of_jobs)]) + subprocess.call(['python3','foss_fim/tools/generate_categorical_fim_mapping.py', '-r' , str(fim_run_dir), '-s', str(output_flows_dir), '-o', str(output_mapping_dir), '-j', str(number_of_jobs)]) end = time.time() elapsed_time = (end-start)/60 print(f'Finished mapping in {elapsed_time} minutes') - - #Updating Mapping Status + + # Updating mapping status print('Updating mapping status') update_mapping_status(str(output_mapping_dir), str(output_flows_dir)) - - \ No newline at end of file diff --git a/tools/generate_categorical_fim_mapping.py b/tools/generate_categorical_fim_mapping.py index 3d591b989..9924d9305 100755 --- a/tools/generate_categorical_fim_mapping.py +++ b/tools/generate_categorical_fim_mapping.py @@ -130,7 +130,7 @@ def run_inundation(args): subset_hucs=huc,num_workers=1,aggregate=False,inundation_raster=output_extent_grid,inundation_polygon=None, depths=output_depth_grid,out_raster_profile=None,out_vector_profile=None,quiet=True ) - + except: # Log errors and their tracebacks f = open(log_file, 'a+') @@ -138,14 +138,14 @@ def run_inundation(args): f.close() #Inundation.py appends the huc code to the supplied output_extent_grid. - #Modify output_extent_grid to match inundation.py saved filename. + #Modify output_extent_grid to match inundation.py saved filename. #Search for this file, if it didn't create, send message to log file. base_file_path,extension = os.path.splitext(output_extent_grid) saved_extent_grid_filename = "{}_{}{}".format(base_file_path,huc,extension) if not os.path.exists(saved_extent_grid_filename): with open(log_file, 'a+') as f: f.write('FAILURE_huc_{}:{}:{} map failed to create\n'.format(huc,ahps_site,magnitude)) - + def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir, nws_lid_attributes_filename, log_file): # Create workspace @@ -153,11 +153,9 @@ def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir, nws_lid_att if not os.path.exists(gpkg_dir): os.mkdir(gpkg_dir) - - #Find the FIM version - norm_path = os.path.normpath(output_cat_fim_dir) - cat_fim_dir_parts = norm_path.split(os.sep) - [fim_version] = [part for part in cat_fim_dir_parts if part.startswith('fim_3')] + + # Find the FIM version + fim_version = os.path.basename(output_cat_fim_dir) merged_layer = os.path.join(output_cat_fim_dir, 'catfim_library.shp') if not os.path.exists(merged_layer): # prevents appending to existing output @@ -241,8 +239,8 @@ def reformat_inundation_maps(args): # Aggregate shapes results = ({'properties': {'extent': 1}, 'geometry': s} for i, (s, v) in enumerate(shapes(image, mask=mask,transform=src.transform))) - - # convert list of shapes to polygon + + # Convert list of shapes to polygon extent_poly = gpd.GeoDataFrame.from_features(list(results), crs=PREP_PROJECTION) # Dissolve polygons @@ -254,7 +252,7 @@ def reformat_inundation_maps(args): extent_poly_diss['magnitude'] = magnitude extent_poly_diss['version'] = fim_version extent_poly_diss['huc'] = huc - + # Project to Web Mercator extent_poly_diss = extent_poly_diss.to_crs(VIZ_PROJECTION) From 6387384b62523b601790a04e1f5dd2ca09578c29 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Tue, 20 Apr 2021 09:54:37 -0500 Subject: [PATCH 067/359] Closing multiprocessing pool objects Updating repo to close all multiprocessing Pool objects to fix memory issues --- CHANGELOG.md | 4 ++ src/acquire_and_preprocess_inputs.py | 14 +++---- src/aggregate_fim_outputs.py | 4 +- tools/cache_metrics.py | 46 +++++++++++------------ tools/generate_categorical_fim_mapping.py | 9 ++--- tools/rating_curve_comparison.py | 8 ++-- tools/synthesize_test_cases.py | 28 +++++++------- 7 files changed, 58 insertions(+), 55 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d9969a11..f6143d388 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.15.4 - 2021-04-20 - [PR #356](https://github.com/NOAA-OWP/cahaba/pull/356) + +Closing all multiprocessing Pool objects in repo. +

## v3.0.15.3 - 2021-04-19 - [PR #358](https://github.com/NOAA-OWP/cahaba/pull/358) Preprocess NHDPlus HR rasters for consistent projections, nodata values, and convert from cm to meters. diff --git a/src/acquire_and_preprocess_inputs.py b/src/acquire_and_preprocess_inputs.py index 852942ff3..89e432966 100755 --- a/src/acquire_and_preprocess_inputs.py +++ b/src/acquire_and_preprocess_inputs.py @@ -107,8 +107,8 @@ def pull_and_prepare_wbd(path_to_saved_data_parent_dir,nwm_dir_name,nwm_file_to_ #wbd_gpkg_list.append(output_gpkg) #procs_list.append(['ogr2ogr -overwrite -progress -f GPKG -t_srs "{projection}" {output_gpkg} {wbd_gdb_path} {wbd_layer}'.format(output_gpkg=output_gpkg, wbd_gdb_path=wbd_gdb_path, wbd_layer=wbd_layer, projection=PREP_PROJECTION)]) - #pool = Pool(num_workers) - #pool.map(run_system_command, procs_list) + # with Pool(processes=num_workers) as pool: + # pool.map(run_system_command, procs_list) # Subset WBD layers to CONUS and add to single geopackage. #print("Subsetting WBD layers to CONUS...") @@ -150,9 +150,8 @@ def pull_and_prepare_nwm_hydrofabric(path_to_saved_data_parent_dir, path_to_prei output_gpkg = os.path.join(nwm_hydrofabric_directory, nwm_layer + '_proj.gpkg') procs_list.append(['ogr2ogr -overwrite -progress -f GPKG -t_srs "{projection}" {output_gpkg} {nwm_hydrofabric_gdb} {nwm_layer}'.format(projection=PREP_PROJECTION, output_gpkg=output_gpkg, nwm_hydrofabric_gdb=nwm_hydrofabric_gdb, nwm_layer=nwm_layer)]) - pool = Pool(num_workers) - pool.map(run_system_command, procs_list) - pool.close() + with Pool(processes=num_workers) as pool: + pool.map(run_system_command, procs_list) def pull_and_prepare_nhd_data(args): @@ -349,8 +348,9 @@ def manage_preprocessing(hucs_of_interest, num_workers=1,overwrite_nhd=False, ov nhd_procs_list.append([nhd_raster_download_url, nhd_raster_extraction_path, nhd_vector_download_url, nhd_vector_extraction_path, overwrite_nhd]) # Pull and prepare NHD data. - #pool = Pool(num_workers) - #pool.map(pull_and_prepare_nhd_data, nhd_procs_list) + # with Pool(processes=num_workers) as pool: + # pool.map(pull_and_prepare_nhd_data, nhd_procs_list) + for huc in nhd_procs_list: try: pull_and_prepare_nhd_data(huc) diff --git a/src/aggregate_fim_outputs.py b/src/aggregate_fim_outputs.py index 9d8676364..d6fdcf698 100644 --- a/src/aggregate_fim_outputs.py +++ b/src/aggregate_fim_outputs.py @@ -218,5 +218,5 @@ def reproject_raster(raster_name): procs_list.append([fim_outputs_directory,huc6,limited_huc_list]) print(f"aggregating {len(huc_list)} hucs to HUC6 scale using {number_of_jobs} jobs") - pool = Pool(number_of_jobs) - pool.map(aggregate_fim_outputs, procs_list) + with Pool(processes=number_of_jobs) as pool: + pool.map(aggregate_fim_outputs, procs_list) diff --git a/tools/cache_metrics.py b/tools/cache_metrics.py index 3f601cc5c..0d02fb217 100755 --- a/tools/cache_metrics.py +++ b/tools/cache_metrics.py @@ -13,15 +13,15 @@ def process_alpha_test(args): - + fim_run_dir = args[0] version = args[1] test_id = args[2] magnitude = args[3] archive_results = args[4] - - mask_type = 'huc' - + + mask_type = 'huc' + if archive_results == False: compare_to_previous = True else: @@ -42,9 +42,9 @@ def process_alpha_test(args): parser.add_argument('-j','--job-number',help='Number of processes to use. Default is 1.',required=False, default="1") parser.add_argument('-s','--special-string',help='Add a special name to the end of the branch.',required=False, default="") parser.add_argument('-b','--benchmark-category',help='Options include ble or ahps. Defaults to process both.',required=False, default=None) - + test_cases_dir_list = os.listdir(TEST_CASES_DIR) - + args = vars(parser.parse_args()) config = args['config'] @@ -52,21 +52,21 @@ def process_alpha_test(args): job_number = int(args['job_number']) special_string = args['special_string'] benchmark_category = args['benchmark_category'] - + if fim_version != "all": previous_fim_list = [fim_version] else: - previous_fim_list = os.listdir(PREVIOUS_FIM_DIR) - + previous_fim_list = os.listdir(PREVIOUS_FIM_DIR) + if config == 'PREV': archive_results = True elif config == 'DEV': archive_results = False else: print('Config (-c) option incorrectly set. Use "DEV" or "PREV"') - + benchmark_category_list = [] - + if benchmark_category == None: for d in test_cases_dir_list: if 'test_cases' in d: @@ -77,42 +77,42 @@ def process_alpha_test(args): procs_list = [] for bench_cat in benchmark_category_list: bench_cat_test_case_dir = os.path.join(TEST_CASES_DIR, bench_cat + '_test_cases') - + bench_cat_test_case_list = os.listdir(bench_cat_test_case_dir) - + for test_id in bench_cat_test_case_list: if 'validation' and 'other' not in test_id: - + current_huc = test_id.split('_')[0] if test_id.split('_')[1] in bench_cat: - + for version in previous_fim_list: - + if config == 'DEV': fim_run_dir = os.path.join(OUTPUTS_DIR, version, current_huc) elif config == 'PREV': fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc) - + if not os.path.exists(fim_run_dir): fim_run_dir = os.path.join(PREVIOUS_FIM_DIR, version, current_huc[:6]) # For previous versions of HAND computed at HUC6 scale - + if os.path.exists(fim_run_dir): if special_string != "": version = version + '_' + special_string - + if 'ble' in test_id: magnitude = ['100yr', '500yr'] elif 'usgs' or 'nws' in test_id: magnitude = ['action', 'minor', 'moderate', 'major'] else: continue - + print("Adding " + test_id + " to list of test_ids to process...") if job_number > 1: procs_list.append([fim_run_dir, version, test_id, magnitude, archive_results]) - else: + else: process_alpha_test([fim_run_dir, version, test_id, magnitude, archive_results]) if job_number > 1: - pool = Pool(job_number) - pool.map(process_alpha_test, procs_list) \ No newline at end of file + with Pool(processes=job_number) as pool: + pool.map(process_alpha_test, procs_list) diff --git a/tools/generate_categorical_fim_mapping.py b/tools/generate_categorical_fim_mapping.py index 9924d9305..ebe76cd61 100755 --- a/tools/generate_categorical_fim_mapping.py +++ b/tools/generate_categorical_fim_mapping.py @@ -108,8 +108,8 @@ def generate_categorical_fim(fim_run_dir, source_flow_dir, output_cat_fim_dir, n # Initiate multiprocessing print(f"Running inundation for {len(procs_list)} sites using {number_of_jobs} jobs") - pool = Pool(number_of_jobs) - pool.map(run_inundation, procs_list) + with Pool(processes=number_of_jobs) as pool: + pool.map(run_inundation, procs_list) def run_inundation(args): @@ -153,7 +153,6 @@ def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir, nws_lid_att if not os.path.exists(gpkg_dir): os.mkdir(gpkg_dir) - # Find the FIM version fim_version = os.path.basename(output_cat_fim_dir) merged_layer = os.path.join(output_cat_fim_dir, 'catfim_library.shp') @@ -193,8 +192,8 @@ def post_process_cat_fim_for_viz(number_of_jobs, output_cat_fim_dir, nws_lid_att pass # Multiprocess with instructions - pool = Pool(number_of_jobs) - pool.map(reformat_inundation_maps, procs_list) + with Pool(processes=number_of_jobs) as pool: + pool.map(reformat_inundation_maps, procs_list) # Merge all layers print(f"Merging {len(os.listdir(gpkg_dir))} layers...") diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py index 77e6b91ba..714f2c535 100755 --- a/tools/rating_curve_comparison.py +++ b/tools/rating_curve_comparison.py @@ -38,13 +38,13 @@ def check_file_age(file): ''' Checks if file exists, determines the file age, and recommends - updating if older than 1 month. + updating if older than 1 month. Returns ------- None. - ''' + ''' file = Path(file) if file.is_file(): modification_time = file.stat().st_mtime @@ -419,8 +419,8 @@ def calculate_rc_stats_elev(rc,stat_groups=None): # Initiate multiprocessing print(f"Generating rating curve metrics for {len(procs_list)} hucs using {number_of_jobs} jobs") - pool = Pool(number_of_jobs) - pool.map(generate_rating_curve_metrics, procs_list) + with Pool(processes=number_of_jobs) as pool: + pool.map(generate_rating_curve_metrics, procs_list) print(f"Aggregating rating curve metrics for {len(procs_list)} hucs") aggregate_metrics(output_dir,procs_list,stat_groups) diff --git a/tools/synthesize_test_cases.py b/tools/synthesize_test_cases.py index 06f55b4a0..142f0d387 100755 --- a/tools/synthesize_test_cases.py +++ b/tools/synthesize_test_cases.py @@ -76,16 +76,16 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison): int(test_case.split('_')[0]) huc = test_case.split('_')[0] - + for iteration in iteration_list: - + if iteration == "official": versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR) if iteration == "comparison": versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'testing_versions') versions_to_aggregate = [dev_comparison] - + for magnitude in ['100yr', '500yr']: for version in versions_to_aggregate: if '_fr' in version: @@ -100,7 +100,7 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison): calibrated = "no" version_dir = os.path.join(versions_to_crawl, version) magnitude_dir = os.path.join(version_dir, magnitude) - + if os.path.exists(magnitude_dir): magnitude_dir_list = os.listdir(magnitude_dir) for f in magnitude_dir_list: @@ -119,7 +119,7 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison): sub_list_to_append.append(benchmark_source) sub_list_to_append.append(extent_config) sub_list_to_append.append(calibrated) - + list_to_write.append(sub_list_to_append) except ValueError: pass @@ -132,9 +132,9 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison): int(test_case.split('_')[0]) huc = test_case.split('_')[0] - + for iteration in iteration_list: - + if iteration == "official": versions_to_crawl = os.path.join(benchmark_test_case_dir, test_case, 'official_versions') versions_to_aggregate = os.listdir(PREVIOUS_FIM_DIR) @@ -154,7 +154,7 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison): calibrated = "yes" else: calibrated = "no" - + version_dir = os.path.join(versions_to_crawl, version) magnitude_dir = os.path.join(version_dir, magnitude) if os.path.exists(magnitude_dir): @@ -166,7 +166,7 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison): full_json_path = os.path.join(magnitude_dir, f) flow = '' if os.path.exists(full_json_path): - + # Get flow used to map. flow_file = os.path.join(benchmark_test_case_dir, 'validation_data_' + benchmark_source, huc, nws_lid, magnitude, 'ahps_' + nws_lid + '_huc_' + huc + '_flows_' + magnitude + '.csv') if os.path.exists(flow_file): @@ -177,7 +177,7 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison): flow = row[1] if nws_lid == 'mcc01': print(flow) - + stats_dict = json.load(open(full_json_path)) for metric in metrics_to_write: sub_list_to_append.append(stats_dict[metric]) @@ -186,7 +186,7 @@ def create_master_metrics_csv(master_metrics_csv_output, dev_comparison): sub_list_to_append.append(benchmark_source) sub_list_to_append.append(extent_config) sub_list_to_append.append(calibrated) - + list_to_write.append(sub_list_to_append) except ValueError: pass @@ -321,12 +321,12 @@ def process_alpha_test(args): # Multiprocess alpha test runs. if job_number > 1: - pool = Pool(job_number) - pool.map(process_alpha_test, procs_list) + with Pool(processes=job_number) as pool: + pool.map(process_alpha_test, procs_list) # Do aggregate_metrics. print("Creating master metrics CSV...") - + if config == 'DEV': dev_comparison = fim_version + "_" + special_string else: From c31131716224eccfd937343deb67e226e24dcef3 Mon Sep 17 00:00:00 2001 From: TrevorGrout-NOAA <69653333+TrevorGrout-NOAA@users.noreply.github.com> Date: Thu, 22 Apr 2021 15:32:10 -0500 Subject: [PATCH 068/359] Update plotting script to not error out if FR or MS are not analyzed when using spatial argument update eval_plots.py to correctly produce spatial datasets if version is not analyzed. For example, if no FR versions are analyzed, instead of throwing an error, it will check to make sure there exists a FR analyzed datasets, if one exists then create spatial data for FR datasets (ble). --- CHANGELOG.md | 7 ++ tools/eval_plots.py | 111 +++++++++++++----------------- tools/generate_categorical_fim.py | 4 +- 3 files changed, 56 insertions(+), 66 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6143d388..1aa4e5883 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.15.5 - 2021-04-20 - [PR #363](https://github.com/NOAA-OWP/cahaba/pull/363) + +Prevent eval_plots.py from erroring out when spatial argument enabled if certain datasets not analyzed. + +## Changes +- Add check to make sure analyzed dataset is available prior to creating spatial dataset. +

## v3.0.15.4 - 2021-04-20 - [PR #356](https://github.com/NOAA-OWP/cahaba/pull/356) Closing all multiprocessing Pool objects in repo. diff --git a/tools/eval_plots.py b/tools/eval_plots.py index 0327319cb..5ea839d8e 100644 --- a/tools/eval_plots.py +++ b/tools/eval_plots.py @@ -605,73 +605,56 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' ############################################################### #This section will join ahps metrics to a spatial point layer ############################################################### - - #Get point data for ahps sites - #Get metrics for usgs and nws benchmark sources - usgs_dataset,sites = all_datasets.get(('usgs','MS')) - nws_dataset, sites = all_datasets.get(('nws','MS')) - #Append usgs/nws dataframes and filter unnecessary columns and rename remaining. - all_ahps_datasets = usgs_dataset.append(nws_dataset) - all_ahps_datasets = all_ahps_datasets.filter(['huc','nws_lid','version','magnitude','TP_area_km2','FP_area_km2','TN_area_km2','FN_area_km2','CSI','FAR','TPR','benchmark_source']) - all_ahps_datasets.rename(columns = {'benchmark_source':'source'}, inplace = True) - - #Get spatial data from WRDS - #Get metadata from WRDS API - select_by = 'nws_lid' - selector = list(all_ahps_datasets.nws_lid.unique()) - metadata_url = f'{API_BASE_URL}/metadata' - metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector) - #Create geospatial data from WRDS output - dictionary, gdf = aggregate_wbd_hucs(metadata_list, Path(WBD_LAYER), retain_attributes = True) - #Trim out unecessary columns and rename remaining columns - gdf = gdf.filter(['identifiers_nws_lid', 'nws_data_name', 'identifiers_nwm_feature_id','nws_data_wfo','nws_data_state','nws_data_county','geometry']) - gdf.rename(columns = {'identifiers_nws_lid':'nws_lid', 'nws_data_name':'lid_name','identifiers_nwm_feature_id':'feature_id','nws_data_wfo':'wfo','nws_data_state':'state','nws_data_county':'county','HUC8':'huc8'}, inplace = True) - - #Join spatial data to metric data - gdf['nws_lid'] = gdf['nws_lid'].str.lower() - joined = gdf.merge(all_ahps_datasets, on = 'nws_lid') - #Project to VIZ projection and write to file - joined = joined.to_crs(VIZ_PROJECTION) - joined.to_file(Path(workspace) / 'fim_performance_points.shp') - - ''' - ############################################################### - #If user wants to append information such as what maps or flows were used for evaluation. This is already tested. - #User must supply the extent layer generated from preprocessing NWS/USGS datasets. - ############################################################### - #Read extent layer to GeoDataFrame and drop the geometry column - evaluated_ahps_extent = gpd.read_file(/Path/to/extent/layer/generated/during/preprocessing) - evaluated_ahps_extent.drop(columns = ['geometry'], inplace = True) - #Re-arrange dataset to get flows used for evaluation - flows = pd.melt(evaluated_ahps_extent, id_vars = ['nws_lid','source'], value_vars = ['action_Q','minor_Q','moderate_Q','major_Q'], var_name = 'magnitude', value_name = 'eval_Q') - flows['magnitude'] = flows['magnitude'].str.split('_', 1, expand = True) - #Re-arrange dataset to get maps used for evaluation - maps = pd.melt(evaluated_ahps_extent, id_vars = ['nws_lid','source'], value_vars = ['action','minor','moderate','major'], var_name = 'magnitude', value_name = 'eval_maps') - maps['eval_maps'] = maps['eval_maps'].str.split('\\').str[-1] - #Merge flows and maps into single DataFrame - flows_maps = pd.merge(flows,maps, how = 'left', left_on = ['nws_lid','source','magnitude'], right_on = ['nws_lid','source','magnitude']) - # combine flows_maps to spatial layer (gdf) - joined = joined.merge(flows_maps, left_on = ['nws_lid','magnitude','source'], right_on = ['nws_lid','magnitude','source']) - #Write to file - joined.to_file(Path(workspace)/'fim_performance_points.shp') - ''' + if all_datasets.get(('nws','MS')) and all_datasets.get(('usgs','MS')): + #Get point data for ahps sites + #Get metrics for usgs and nws benchmark sources + usgs_dataset,sites = all_datasets.get(('usgs','MS')) + nws_dataset, sites = all_datasets.get(('nws','MS')) + #Append usgs/nws dataframes and filter unnecessary columns and rename remaining. + all_ahps_datasets = usgs_dataset.append(nws_dataset) + all_ahps_datasets = all_ahps_datasets.filter(['huc','nws_lid','version','magnitude','TP_area_km2','FP_area_km2','TN_area_km2','FN_area_km2','CSI','FAR','TPR','benchmark_source']) + all_ahps_datasets.rename(columns = {'benchmark_source':'source'}, inplace = True) + + #Get spatial data from WRDS + #Get metadata from WRDS API + select_by = 'nws_lid' + selector = list(all_ahps_datasets.nws_lid.unique()) + metadata_url = f'{API_BASE_URL}/metadata' + metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector) + #Create geospatial data from WRDS output + dictionary, gdf = aggregate_wbd_hucs(metadata_list, Path(WBD_LAYER), retain_attributes = True) + #Trim out unecessary columns and rename remaining columns + gdf = gdf.filter(['identifiers_nws_lid', 'nws_data_name', 'identifiers_nwm_feature_id','nws_data_wfo','nws_data_state','nws_data_county','geometry']) + gdf.rename(columns = {'identifiers_nws_lid':'nws_lid', 'nws_data_name':'lid_name','identifiers_nwm_feature_id':'feature_id','nws_data_wfo':'wfo','nws_data_state':'state','nws_data_county':'county','HUC8':'huc8'}, inplace = True) + + #Join spatial data to metric data + gdf['nws_lid'] = gdf['nws_lid'].str.lower() + joined = gdf.merge(all_ahps_datasets, on = 'nws_lid') + #Project to VIZ projection and write to file + joined = joined.to_crs(VIZ_PROJECTION) + joined.to_file(Path(workspace) / 'fim_performance_points.shp') + else: + print('NWS/USGS MS datasets not analyzed, no spatial data created.\nTo produce spatial data analyze a MS version.') + ################################################################ #This section joins ble (FR) metrics to a spatial layer of HUCs. ################################################################ - #Read in HUC spatial layer - wbd_gdf = gpd.read_file(Path(WBD_LAYER), layer = 'WBDHU8') - #Select BLE, FR dataset. - ble_dataset, sites = all_datasets.get(('ble','FR')) - #Join metrics to HUC spatial layer - wbd_with_metrics = wbd_gdf.merge(ble_dataset, how = 'inner', left_on = 'HUC8', right_on = 'huc') - #Filter out unnecessary columns - wbd_with_metrics = wbd_with_metrics.filter(['version','magnitude','huc','TP_area_km2','FP_area_km2','TN_area_km2','FN_area_km2','CSI','FAR','TPR','benchmark_source','geometry']) - wbd_with_metrics.rename(columns = {'benchmark_source':'source'}, inplace = True ) - #Project to VIZ projection - wbd_with_metrics = wbd_with_metrics.to_crs(VIZ_PROJECTION) - #Write out to file - wbd_with_metrics.to_file(Path(workspace) / 'fim_performance_polys.shp') - + if all_datasets.get(('ble','FR')): + #Select BLE, FR dataset. + ble_dataset, sites = all_datasets.get(('ble','FR')) + #Read in HUC spatial layer + wbd_gdf = gpd.read_file(Path(WBD_LAYER), layer = 'WBDHU8') + #Join metrics to HUC spatial layer + wbd_with_metrics = wbd_gdf.merge(ble_dataset, how = 'inner', left_on = 'HUC8', right_on = 'huc') + #Filter out unnecessary columns + wbd_with_metrics = wbd_with_metrics.filter(['version','magnitude','huc','TP_area_km2','FP_area_km2','TN_area_km2','FN_area_km2','CSI','FAR','TPR','benchmark_source','geometry']) + wbd_with_metrics.rename(columns = {'benchmark_source':'source'}, inplace = True ) + #Project to VIZ projection + wbd_with_metrics = wbd_with_metrics.to_crs(VIZ_PROJECTION) + #Write out to file + wbd_with_metrics.to_file(Path(workspace) / 'fim_performance_polys.shp') + else: + print('BLE FR datasets not analyzed, no spatial data created.\nTo produce spatial data analyze a FR version') ####################################################################### if __name__ == '__main__': diff --git a/tools/generate_categorical_fim.py b/tools/generate_categorical_fim.py index cea992df1..c572ee6ee 100755 --- a/tools/generate_categorical_fim.py +++ b/tools/generate_categorical_fim.py @@ -90,7 +90,7 @@ def update_mapping_status(output_mapping_dir, output_flows_dir): # Generate CatFIM flow files print('Creating flow files') start = time.time() - subprocess.call(['python3','foss_fim/tools/generate_categorical_fim_flows.py', '-w' , str(output_flows_dir), '-u', nwm_us_search, '-d', nwm_ds_search]) + subprocess.call(['python3','/foss_fim/tools/generate_categorical_fim_flows.py', '-w' , str(output_flows_dir), '-u', nwm_us_search, '-d', nwm_ds_search]) end = time.time() elapsed_time = (end-start)/60 print(f'Finished creating flow files in {elapsed_time} minutes') @@ -98,7 +98,7 @@ def update_mapping_status(output_mapping_dir, output_flows_dir): # Generate CatFIM mapping print('Begin mapping') start = time.time() - subprocess.call(['python3','foss_fim/tools/generate_categorical_fim_mapping.py', '-r' , str(fim_run_dir), '-s', str(output_flows_dir), '-o', str(output_mapping_dir), '-j', str(number_of_jobs)]) + subprocess.call(['python3','/foss_fim/tools/generate_categorical_fim_mapping.py', '-r' , str(fim_run_dir), '-s', str(output_flows_dir), '-o', str(output_mapping_dir), '-j', str(number_of_jobs)]) end = time.time() elapsed_time = (end-start)/60 print(f'Finished mapping in {elapsed_time} minutes') From 99c197b5ec6cbddb69d58795b0103980e8075c18 Mon Sep 17 00:00:00 2001 From: TrevorGrout-NOAA <69653333+TrevorGrout-NOAA@users.noreply.github.com> Date: Tue, 27 Apr 2021 12:42:59 -0500 Subject: [PATCH 069/359] Implement CatFIM threshold flows to Sierra test and add AHPS benchmark preprocessing scripts. Implement CatFIM threshold flows to Sierra test and add AHPS benchmark preprocessing scripts. - Produce CatFIM flows file when running rating_curve_get_usgs_gages.py. - Several scripts to preprocess AHPS benchmark data. Requires numerous file dependencies not available through Cahaba. - Modify rating_curve_comparison.py to ingest CatFIM threshold flows in calculations. - Modify eval_plots.py to save all site specific bar plots in same parent directory instead of in subdirectories. - Add variables to env.template for AHPS benchmark preprocessing. This resolves #341, resolves #361, and resolves #357. --- CHANGELOG.md | 15 + tools/.env.template | 2 + tools/eval_plots.py | 4 +- tools/preprocess_ahps_nws.py | 355 ++++++++++++++++++ tools/preprocess_ahps_usgs.py | 471 ++++++++++++++++++++++++ tools/preprocess_download_usgs_grids.py | 162 ++++++++ tools/rating_curve_comparison.py | 27 +- tools/rating_curve_get_usgs_curves.py | 79 +++- tools/tools_shared_functions.py | 315 +++++++++++++++- 9 files changed, 1411 insertions(+), 19 deletions(-) create mode 100644 tools/preprocess_ahps_nws.py create mode 100644 tools/preprocess_ahps_usgs.py create mode 100644 tools/preprocess_download_usgs_grids.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1aa4e5883..1cc4cc499 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v3.0.15.6 - 2021-04-23 - [PR #365](https://github.com/NOAA-OWP/cahaba/pull/365) + +Implement CatFIM threshold flows to Sierra test and add AHPS benchmark preprocessing scripts. + +## Additions +- Produce CatFIM flows file when running `rating_curve_get_usgs_gages.py`. +- Several scripts to preprocess AHPS benchmark data. Requires numerous file dependencies not available through Cahaba. + +## Changes +- Modify `rating_curve_comparison.py` to ingest CatFIM threshold flows in calculations. +- Modify `eval_plots.py` to save all site specific bar plots in same parent directory instead of in subdirectories. +- Add variables to `env.template` for AHPS benchmark preprocessing. + +

## v3.0.15.5 - 2021-04-20 - [PR #363](https://github.com/NOAA-OWP/cahaba/pull/363) Prevent eval_plots.py from erroring out when spatial argument enabled if certain datasets not analyzed. @@ -7,6 +21,7 @@ Prevent eval_plots.py from erroring out when spatial argument enabled if certain ## Changes - Add check to make sure analyzed dataset is available prior to creating spatial dataset.

+ ## v3.0.15.4 - 2021-04-20 - [PR #356](https://github.com/NOAA-OWP/cahaba/pull/356) Closing all multiprocessing Pool objects in repo. diff --git a/tools/.env.template b/tools/.env.template index ed83c8113..048c2283c 100644 --- a/tools/.env.template +++ b/tools/.env.template @@ -2,3 +2,5 @@ API_BASE_URL= EVALUATED_SITES_CSV= WBD_LAYER= NWM_FLOWS_MS= +USGS_METADATA_URL= +USGS_DOWNLOAD_URL= diff --git a/tools/eval_plots.py b/tools/eval_plots.py index 5ea839d8e..860c38382 100644 --- a/tools/eval_plots.py +++ b/tools/eval_plots.py @@ -570,10 +570,10 @@ def eval_plots(metrics_csv, workspace, versions = [], stats = ['CSI','FAR','TPR' #If enabled, write out barplots of CSI for individual sites. if site_barplots: + individual_dirs = output_workspace / 'individual' + individual_dirs.mkdir(parents = True, exist_ok = True) subset = dataset.groupby(base_resolution) for site_name, site_data in subset: - individual_dirs = output_workspace / 'individual' / str(site_name) - individual_dirs.mkdir(parents = True, exist_ok = True) site_file = individual_dirs / f'csi_{str(site_name)}_{dataset_name}_{configuration.lower()}.png' barplot(dataframe = site_data, x_field = 'magnitude', x_order = magnitude_order, y_field = 'CSI', hue_field = 'version', ordered_hue = version_order, title_text = f'{str(site_name).upper()} FIM Scores', fim_configuration = configuration, textbox_str = False, simplify_legend = True, dest_file = site_file) diff --git a/tools/preprocess_ahps_nws.py b/tools/preprocess_ahps_nws.py new file mode 100644 index 000000000..e2db2b47d --- /dev/null +++ b/tools/preprocess_ahps_nws.py @@ -0,0 +1,355 @@ +#!/usr/bin/env python3 +import numpy as np +import pathlib +from pathlib import Path +import pandas as pd +import geopandas as gpd +import rasterio +from collections import defaultdict +from tools_shared_functions import mainstem_nwm_segs, get_metadata, aggregate_wbd_hucs, get_thresholds, get_datum, ngvd_to_navd_ft, get_rating_curve, select_grids, get_nwm_segs, flow_data, process_extent, process_grid, raster_to_feature +import argparse +from dotenv import load_dotenv +import os +import traceback +import sys +sys.path.append('/foss_fim/src') +from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION + +def get_env_paths(): + load_dotenv() + #import variables from .env file + API_BASE_URL = os.getenv("API_BASE_URL") + EVALUATED_SITES_CSV = os.getenv("EVALUATED_SITES_CSV") + WBD_LAYER = os.getenv("WBD_LAYER") + return API_BASE_URL, EVALUATED_SITES_CSV, WBD_LAYER + +######################################################## +#Preprocess AHPS NWS +#This script will work on NWS AHPS fim data (some assumptions made about the data structure). +#Provide a source directory path (source_dir) where all NWS AHPS FIM data is located. NWS source data was previously downloaded and extracted. Some data is buried through several layers of subfolders in the source data. In general, the downloaded datasets were unzipped and starting from where the folder name was the AHPS code, this was copied and pasted into a new directory which is the source_dir. +#Provide a destination directory path (destination) which is where all outputs are located. +#Provide a reference raster path. +######################################################## +# source_dir = Path(r'path/to/nws/downloads') +# destination = Path(r'path/to/preprocessed/nws/data') +# reference_raster= Path(r'path/to/reference raster') + +def preprocess_nws(source_dir, destination, reference_raster): + source_dir = Path(source_dir) + destination = Path(destination) + reference_raster = Path(reference_raster) + metadata_url = f'{API_BASE_URL}/metadata' + threshold_url = f'{API_BASE_URL}/nws_threshold' + rating_curve_url = f'{API_BASE_URL}/rating_curve' + log_file = destination / 'log.txt' + + #Write a run-time log file + destination.mkdir(parents = True, exist_ok = True) + log_file = destination / 'log.txt' + f = open(log_file, 'a+') + + #Define distance (in miles) to search for nwm segments + nwm_ds_search = 10 + nwm_us_search = 10 + #The NWS data was downloaded and unzipped. The ahps folder (with 5 digit code as folder name) was cut and pasted into a separate directory. So the ahps_codes iterates through that parent directory to get all of the AHPS codes that have data. + ahps_codes = [i.name for i in source_dir.glob('*') if i.is_dir() and len(i.name) == 5] + #Get mainstems NWM segments + #Workaround for sites in 02030103 and 02030104, many are not rfc_forecast_point = True + list_of_sites = pd.read_csv(EVALUATED_SITES_CSV)['Total_List'].to_list() + ms_segs = mainstem_nwm_segs(metadata_url, list_of_sites) + + #Find depth grid subfolder + for code in ahps_codes: + f.write(f'{code} : Processing\n') + print(f'processing {code}') + #'mnda2' is in Alaska outside of NWM domain. + if code in ['mnda2']: + f.write(f'{code} : skipping because outside of NWM domain\n') + continue + + #Get metadata of site and search for NWM segments x miles upstream/x miles downstream + select_by = 'nws_lid' + selector = [code] + metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector, must_include = None, upstream_trace_distance = nwm_us_search, downstream_trace_distance = nwm_ds_search) + metadata = metadata_list[0] + + #Assign huc to site using FIM huc layer. + dictionary, out_gdf = aggregate_wbd_hucs(metadata_list, Path(WBD_LAYER), retain_attributes = False) + [huc] = list(dictionary.keys()) + + #Get thresholds for action, minor, moderate, major. If no threshold data present, exit. + #The threshold flows source will dictate what rating curve (and datum) to use as it uses a decision tree (USGS priority then NRLDB) + #In multiple instances a USGS ID is given but then no USGS rating curve or in some cases no USGS datum is supplied. + select_by = 'nws_lid' + selector = code + stages, flows =get_thresholds(threshold_url, select_by, selector, threshold = 'all') + + #Make sure at least one valid threshold is supplied from WRDS. + threshold_categories = ['action','minor','moderate','major'] + if not any([stages[threshold] for threshold in threshold_categories]): + f.write(f'{code} : skipping because no threshold stages available\n') + continue + + #determine source of interpolated threshold flows, this will be the rating curve that will be used. + rating_curve_source = flows.get('source') + if rating_curve_source is None: + f.write(f'{code} : skipping because no rating curve source\n') + continue + + #Get the datum and adjust to NAVD if necessary. + nws, usgs = get_datum(metadata) + datum_data = {} + if rating_curve_source == 'USGS Rating Depot': + datum_data = usgs + elif rating_curve_source == 'NRLDB': + datum_data = nws + + #If datum not supplied, skip to new site + datum = datum_data.get('datum', None) + if datum is None: + f.write(f'{code} : skipping because site is missing datum\n') + continue + + #Custom workaround these sites have faulty crs from WRDS. CRS needed for NGVD29 conversion to NAVD88 + # USGS info indicates NAD83 for site: bgwn7, fatw3, mnvn4, nhpp1, pinn4, rgln4, rssk1, sign4, smfn7, stkn4, wlln7 + # Assumed to be NAD83 (no info from USGS or NWS data): dlrt2, eagi1, eppt2, jffw3, ldot2, rgdt2 + if code in ['bgwn7', 'dlrt2','eagi1','eppt2','fatw3','jffw3','ldot2','mnvn4','nhpp1','pinn4','rgdt2','rgln4','rssk1','sign4','smfn7','stkn4','wlln7' ]: + datum_data.update(crs = 'NAD83') + + #Custom workaround these sites have poorly defined vcs from WRDS. VCS needed to ensure datum reported in NAVD88. If NGVD29 it is converted to NAVD88. + #bgwn7, eagi1 vertical datum unknown, assume navd88 + #fatw3 USGS data indicates vcs is NAVD88 (USGS and NWS info agree on datum value). + #wlln7 USGS data indicates vcs is NGVD29 (USGS and NWS info agree on datum value). + if code in ['bgwn7','eagi1','fatw3']: + datum_data.update(vcs = 'NAVD88') + elif code == 'wlln7': + datum_data.update(vcs = 'NGVD29') + + #Adjust datum to NAVD88 if needed + if datum_data.get('vcs') in ['NGVD29', 'NGVD 1929', 'NGVD,1929']: + #Get the datum adjustment to convert NGVD to NAVD. Sites not in contiguous US are previously removed otherwise the region needs changed. + datum_adj_ft = ngvd_to_navd_ft(datum_info = datum_data, region = 'contiguous') + datum88 = round(datum + datum_adj_ft, 2) + else: + datum88 = datum + + #get entire rating curve, same source as interpolated threshold flows (USGS Rating Depot first then NRLDB rating curve). + if rating_curve_source == 'NRLDB': + site = [code] + elif rating_curve_source == 'USGS Rating Depot': + site = [metadata.get('identifiers').get('usgs_site_code')] + + rating_curve = get_rating_curve(rating_curve_url, site) + + #Add elevation fields to rating curve + #Add field with vertical coordinate system + vcs = datum_data['vcs'] + if not vcs: + vcs = 'Unspecified, Assumed NAVD88' + rating_curve['vcs'] = vcs + + #Add field with original datum + rating_curve['datum'] = datum + + #If VCS is NGVD29 add rating curve elevation (in NGVD) as well as the NAVD88 datum + if vcs in ['NGVD29', 'NGVD 1929']: + #Add field with raw elevation conversion (datum + stage) + rating_curve['elevation_ngvd29'] = rating_curve['stage'] + datum + #Add field with adjusted NAVD88 datum + rating_curve['datum_navd88'] = datum88 + + #Add field with NAVD88 elevation + rating_curve['elevation_navd88'] = rating_curve['stage'] + datum88 + + + #Search through ahps directory find depth grid folder + parent_path = source_dir / code + + #Work around for bgwn7 and smit2 where grids were custom created from polygons (bgwn7-no grids, smit2 - no projection and applying projection from polygons had errors) + if code in ['bgwn7', 'smit2']: + [grids_dir] = [directory for directory in parent_path.glob('*custom*') if directory.is_dir()] + else: + #Find the directory containing depth grids. Assumes only one directory will be returned. + [grids_dir] = [directory for directory in parent_path.glob('*depth_grid*') if directory.is_dir()] + + #Get grids (all NWS ESRI grids were converted to Geotiff) + grid_paths = [grids for grids in grids_dir.glob('*.tif*') if grids.suffix in ['.tif', '.tiff']] + grid_names = [name.stem for name in grid_paths] + #If grids are present, interpolate a flow for the grid. + if grid_paths: + #Construct Dataframe containing grid paths, names, datum, code + df = pd.DataFrame({'code': code, 'path':grid_paths, 'name': grid_names, 'datum88': datum88}) + #Determine elevation from the grid name. All elevations are assumed to be in NAVD88 based on random inspection of AHPS inundation website layers. + df['elevation'] = df['name'].str.replace('elev_', '', case = False).str.replace('_','.').astype(float) + # Add a stage column using the datum (in NAVD88). Stage is rounded to the nearest 0.1 ft. + df['stage'] = round(df['elevation'] - df['datum88'],1) + #Sort stage in ascending order + df.sort_values(by = 'elevation', ascending = True, inplace = True) + #Interpolate flow from the rating curve using the elevation_navd88 values, if value is above or below the rating curve assign nan. + df['flow'] = np.interp(df['elevation'], rating_curve['elevation_navd88'], rating_curve['flow'], left = np.nan, right = np.nan) + #Assign flow source to reflect interpolation from rc + df['flow_source'] = f'interpolated from {rating_curve_source} rating curve' + + else: + f.write(f'{code} : Site has no benchmark grids\n') + + #Select the appropriate threshold grid for evaluation. Using the supplied threshold stages and the calculated map stages. + grids,grid_flows = select_grids(df, stages, datum88, 1.1) + + #workaroud for bigi1 and eag1 which have gridnames based on flows (not elevations) + if code in ['eagi1', 'bigi1']: + #Elevation is really flows (due to file names), assign this to stage + df['flow'] = df['elevation'] + df['stage'] = df['elevation'] + #Select grids using flows + grids, grid_flows = select_grids(df, flows, datum88, 500) + f.write(f'{code} : Site workaround grids names based on flows not elevation\n') + + #Obtain NWM segments that are on ms to apply flows + segments = get_nwm_segs(metadata) + site_ms_segs = set(segments).intersection(ms_segs) + segments = list(site_ms_segs) + + #Write out boolean benchmark raster and flow file + try: + #for each threshold + for i in ['action', 'minor', 'moderate', 'major']: + #Obtain the flow and grid associated with threshold. + flow = grid_flows[i] + grid = grids[i] + extent = grids['extent'] + #Make sure that flow and flow grid are valid + if not grid in ['No Map', 'No Threshold', 'No Flow']: + #define output directory (to be created later) + outputdir = destination / huc / code / i + + #Create Binary Grids, first create domain of analysis, then create binary grid + + #Domain extent is largest floodmap in the static library WITH holes filled + filled_domain_raster = outputdir.parent / f'{code}_extent.tif' + + #Open benchmark data as a rasterio object. + benchmark = rasterio.open(grid) + benchmark_profile = benchmark.profile + + #Open extent data as rasterio object + domain = rasterio.open(extent) + domain_profile = domain.profile + + #if grid doesn't have CRS, then assign CRS using a polygon from the ahps inundation library + if not benchmark.crs: + #Obtain crs of the first polygon inundation layer associated with ahps code. Assumes only one polygon* subdirectory and assumes the polygon directory has at least 1 inundation shapefile. + [ahps_polygons_directory] = [directory for directory in parent_path.glob('*polygon*') if directory.is_dir()] + shapefile_path = list(ahps_polygons_directory.glob('*.shp'))[0] + shapefile = gpd.read_file(shapefile_path) + #Update benchmark and domain profiles with crs from shapefile. Assumed that benchmark/extent have same crs. + benchmark_profile.update(crs = shapefile.crs) + domain_profile.update(crs = shapefile.crs) + + #Create a domain raster if it does not exist. + if not filled_domain_raster.exists(): + #Domain should have donut holes removed + process_extent(domain, domain_profile, output_raster = filled_domain_raster) + + + #Open domain raster as rasterio object + filled_domain = rasterio.open(filled_domain_raster) + filled_domain_profile = filled_domain.profile + + #Create the binary benchmark raster + boolean_benchmark, boolean_profile = process_grid(benchmark, benchmark_profile, filled_domain, filled_domain_profile, reference_raster) + + #Output binary benchmark grid and flow file to destination + outputdir.mkdir(parents = True, exist_ok = True) + output_raster = outputdir / (f'ahps_{code}_huc_{huc}_depth_{i}.tif') + + with rasterio.Env(): + with rasterio.open(output_raster, 'w', **boolean_profile) as dst: + dst.write(boolean_benchmark,1) + + #Close datasets + domain.close() + filled_domain.close() + benchmark.close() + + #Create the guts of the flow file. + flow_info = flow_data(segments,flow) + #Write out the flow file to csv + output_flow_file = outputdir / (f'ahps_{code}_huc_{huc}_flows_{i}.csv') + flow_info.to_csv(output_flow_file, index = False) + + except Exception as e: + f.write(f'{code} : Error preprocessing benchmark\n{repr(e)}\n') + f.write(traceback.format_exc()) + f.write('\n') + print(traceback.format_exc()) + #Process extents, only create extent if ahps code subfolder is present in destination directory. + ahps_directory = destination / huc / code + if ahps_directory.exists(): + #Delete extent raster + filled_extent = ahps_directory / f'{code}_extent.tif' + if filled_extent.exists: + filled_extent.unlink() + + #Populate attribute information for site + grids_attributes = pd.DataFrame(data=grids.items(), columns = ['magnitude','path']) + flows_attributes = pd.DataFrame(data=grid_flows.items(), columns=['magnitude','grid_flow_cfs']) + threshold_attributes = pd.DataFrame(data=stages.items(), columns = ['magnitude','magnitude_stage']) + #merge dataframes + attributes = grids_attributes.merge(flows_attributes, on = 'magnitude') + attributes = attributes.merge(threshold_attributes, on = 'magnitude') + attributes = attributes.merge(df[['path','stage','elevation', 'flow_source']], on = 'path') + #Strip out sensitive paths and convert magnitude stage to elevation + attributes['path'] = attributes['path'].apply(lambda x :Path(x).name) + attributes['magnitude_elev_navd88']=(datum88 + attributes['magnitude_stage']).astype(float).round(1) + #Add general site information + attributes['nws_lid'] = code + attributes['wfo'] = metadata['nws_data']['wfo'] + attributes['rfc'] = metadata['nws_data']['rfc'] + attributes['state'] = metadata['nws_data']['state'] + attributes['huc'] = huc + #Rename and Reorder columns + attributes.rename(columns = {'path':'grid_name', 'flow_source':'grid_flow_source','stage':'grid_stage','elevation':'grid_elev_navd88'}, inplace = True) + attributes = attributes[['nws_lid','wfo','rfc','state','huc','magnitude','magnitude_stage','magnitude_elev_navd88','grid_name','grid_stage','grid_elev_navd88', 'grid_flow_cfs','grid_flow_source']] + #Save attributes to csv + attributes.to_csv(ahps_directory / f'{code}_attributes.csv', index = False) + + #Write the rating curve to a file + rating_curve_output = ahps_directory / (f'{code}_rating_curve.csv') + rating_curve['lat'] = datum_data['lat'] + rating_curve['lon'] = datum_data['lon'] + rating_curve.to_csv(rating_curve_output, index = False) + + #Write the interpolated flows to file + df_output = ahps_directory / (f'{code}_interpolated_flows.csv') + df.to_csv(df_output, index = False) + + else: + f.write(f'{code} : Unable to evaluate site, missing all flows\n') + + #Close log file. + f.close() + + #Combine all attribute files + attribute_files = list(destination.rglob('*_attributes.csv')) + all_attributes = pd.DataFrame() + for i in attribute_files: + attribute_df = pd.read_csv(i, dtype={'huc':str}) + all_attributes = all_attributes.append(attribute_df) + + if not all_attributes.empty: + all_attributes.to_csv(destination / 'attributes.csv', index = False) + return + +if __name__ == '__main__': + #Parse arguments + parser = argparse.ArgumentParser(description = 'Create preprocessed USGS benchmark datasets at AHPS locations.') + parser.add_argument('-s', '--source_dir', help = 'Workspace where all source data is located.', required = True) + parser.add_argument('-d', '--destination', help = 'Directory where outputs are to be stored', required = True) + parser.add_argument('-r', '--reference_raster', help = 'reference raster used for benchmark raster creation', required = True) + args = vars(parser.parse_args()) + + + #Run get_env_paths and static_flow_lids + API_BASE_URL, EVALUATED_SITES_CSV, WBD_LAYER = get_env_paths() + preprocess_nws(**args) \ No newline at end of file diff --git a/tools/preprocess_ahps_usgs.py b/tools/preprocess_ahps_usgs.py new file mode 100644 index 000000000..f465ba682 --- /dev/null +++ b/tools/preprocess_ahps_usgs.py @@ -0,0 +1,471 @@ +#!/usr/bin/env python3 + +import numpy as np +import pathlib +from pathlib import Path +import pandas as pd +import geopandas as gpd +import rasterio +import requests +from collections import defaultdict +from tools_shared_functions import mainstem_nwm_segs, get_metadata, aggregate_wbd_hucs, get_thresholds, get_datum, ngvd_to_navd_ft, get_rating_curve, select_grids, get_nwm_segs, flow_data, process_extent, process_grid, raster_to_feature +import argparse +from dotenv import load_dotenv +import os +import sys +sys.path.append('/foss_fim/src') +import traceback +from utils.shared_variables import PREP_PROJECTION,VIZ_PROJECTION + + +def get_env_paths(): + load_dotenv() + #import variables from .env file + API_BASE_URL = os.getenv("API_BASE_URL") + EVALUATED_SITES_CSV = os.getenv("EVALUATED_SITES_CSV") + WBD_LAYER = os.getenv("WBD_LAYER") + USGS_METADATA_URL = os.getenv("USGS_METADATA_URL") + return API_BASE_URL, EVALUATED_SITES_CSV, WBD_LAYER, USGS_METADATA_URL +############################################################################### +#Get USGS Site metadata +############################################################################### +def usgs_site_metadata(code): + ''' + Retrieves site metadata from USGS API and saves output as dictionary. Information used includes shortname and site number. + + Parameters + ---------- + code : STR + AHPS code. + USGS_METADATA_URL : STR + URL for USGS datasets. + + Returns + ------- + site_metadata : DICT + Output metadata for an AHPS site. + ''' + # Make sure code is lower case + code = code.lower() + # Get site metadata from USGS API using ahps code + site_url = f'{USGS_METADATA_URL}/server/rest/services/FIMMapper/sites/MapServer/0/query?where=AHPS_ID+%3D+%27{code}%27&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&having=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&historicMoment=&returnDistinctValues=false&resultOffset=&resultRecordCount=&queryByDistance=&returnExtentOnly=false&datumTransformation=¶meterValues=&rangeValues=&quantizationParameters=&f=pjson' + #Get data from API + response = requests.get(site_url) + #If response is valid, then get metadata and save to dictionary + if response.ok: + response_json = response.json() + site_metadata = response_json['features'][0]['attributes'] + return site_metadata + +############################################################################### +#Get USGS grid metadata +############################################################################### +def usgs_grid_metadata(code, has_grid_override = False): + ''' + Given an ahps code, retrieve the site metadata (using usgs_site_metadata) and then use that information to obtain metadata about available grids. Information includes elevation, stage, and flow for each grid. + + Parameters + ---------- + code : STR + AHPS code. + + Returns + ------- + appended_dictionary : DICT + Dictionary of metadata for each available inundation grid including grid id, flows, elevations, grid name for each inundation grid. + ''' + #Make sure code is in lower case + code = code.lower() + # Get site_metadata + site_metadata = usgs_site_metadata(code) + #From site metadata get the SHORT_NAME, SITE_NO, and 'MULTI_SITE', 'HAS_GRIDS' key values + short_name = site_metadata['SHORT_NAME'] + site_no = site_metadata['SITE_NO'] + has_grids = site_metadata['HAS_GRIDS'] + #There is at least one site (kilo1) that doesn't have grids but polygons are available which have been converted grids. + if has_grid_override: + has_grids = 1 + multi_site = site_metadata['MULTI_SITE'] + #Grid metadata located at one of three URLs + if multi_site == 0 and has_grids == 1: + grids_url = f'{USGS_METADATA_URL}/server/rest/services/FIMMapper/floodExtents/MapServer/0/query?where=USGSID+%3D+%27{site_no}%27&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&having=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&historicMoment=&returnDistinctValues=false&resultOffset=&resultRecordCount=&queryByDistance=&returnExtentOnly=false&datumTransformation=¶meterValues=&rangeValues=&quantizationParameters=&f=pjson' + elif multi_site > 0 and multi_site < 3 and has_grids == 1: + grids_url = f'{USGS_METADATA_URL}/server/rest/services/FIMMapper/floodExtentsMulti/MapServer/0/query?where=USGSID_1+%3D+%27{site_no}%27+OR+USGSID_2+%3D+%27{site_no}%27&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&having=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&historicMoment=&returnDistinctValues=false&resultOffset=&resultRecordCount=&queryByDistance=&returnExtentOnly=false&datumTransformation=¶meterValues=&rangeValues=&quantizationParameters=&f=pjson' + elif multi_site == 3 and has_grids == 1: + grids_url = f'{USGS_METADATA_URL}/server/rest/services/FIMMapper/floodExtentsThreeSites/MapServer/0/query?where=USGSID_1+%3D+%27{site_no}%27+OR+USGSID_2+%3D+%27{site_no}%27+OR+USGSID_3+%3D+%27{site_no}%27&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&having=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&historicMoment=&returnDistinctValues=false&resultOffset=&resultRecordCount=&queryByDistance=&returnExtentOnly=false&datumTransformation=¶meterValues=&rangeValues=&quantizationParameters=&f=pjson' + #Only get metadata on grids if site has grids available + if has_grids == 1: + #Get data from API + response = requests.get(grids_url) + #If response is valid then combine metadata on all grids into a single dictionary and write out to DataFrame. + if response.ok: + response_json =response.json() + metadata = response_json['features'] + appended_dictionary = {} + for i in metadata: + dictionary = i['attributes'] + gridname = short_name + '_' + str(dictionary['GRIDID']).zfill(4) + appended_dictionary[gridname] = dictionary + else: + appended_dictionary = {} + return appended_dictionary + + +######################################################## +#Preprocess USGS FIM +#This script will work on USGS FIM datasets. +#Provide source directory path (source_dir) where all USGS FIM data is located. This data was previously downloaded from USGS urls. +#Provide a destination directory path (destination) where all outputs are located. +#Provide a reference raster path. +######################################################## +#source_dir = Path(r'path/to/usgs/downloads') +#destination = Path(r'path/to/preprocessed/usgs/data') +#reference_raster= Path(r'path/to/reference raster') +def preprocess_usgs(source_dir, destination, reference_raster): + ''' + Preprocess USGS AHPS datasets. + + Parameters + ---------- + source_dir : str + Path to USGS Benchmark Datasets (AHPS) + destination : str + Path to output directory of preprocessed datasets. + reference_raster : str + Path to reference raster for benchmark binary raster creation. + + Returns + ------- + None. + + ''' + + source_dir = Path(source_dir) + destination = Path(destination) + reference_raster = Path(reference_raster) + metadata_url = f'{API_BASE_URL}/metadata' + threshold_url = f'{API_BASE_URL}/nws_threshold' + rating_curve_url = f'{API_BASE_URL}/rating_curve' + + #Write log file + destination.mkdir(parents=True, exist_ok = True) + log_file = destination / 'log.txt' + f = open(log_file, 'a+') + + #Define distance (in miles) to search for nwm segments + nwm_ds_search = 10 + nwm_us_search = 10 + #Need a list of AHPS codes. See "ahps_dictionaries" for method of getting this list. + ahps_codes = [folder.name for folder in source_dir.glob('*') if len(folder.name) == 5] + + #Get mainstems NWM segments + #Workaround for sites in 02030103 and 02030104, many are not rfc_forecast_point = True + #Import list of evaluated sites + list_of_sites = pd.read_csv(EVALUATED_SITES_CSV)['Total_List'].to_list() + ms_segs = mainstem_nwm_segs(metadata_url, list_of_sites) + + for code in ahps_codes: + f.write(f'{code} : Processing\n') + print(f'processing {code}') + #For a given code, find all inundation grids under that code. + code = code.lower() + + #Get metadata of site and search for NWM segments x miles upstream/x miles downstream + select_by = 'nws_lid' + selector = [code] + metadata_list, metadata_df = get_metadata(metadata_url, select_by, selector, must_include = None, upstream_trace_distance = nwm_us_search, downstream_trace_distance = nwm_ds_search) + metadata = metadata_list[0] + + #Assign huc to site using FIM huc layer. + dictionary, out_gdf = aggregate_wbd_hucs(metadata_list, Path(WBD_LAYER), retain_attributes = False) + [huc] = list(dictionary.keys()) + + #There are 12 sites with special issues such as these don't have any crs coordinates and grid/polygon data don't align or missing grid data but polygons are available. + #Sites with no grid data but polygon data --> cfmm8, kilo1 + #Sites with no projection assigned to grid and polygon/grid don't align --> stak1, nmso1, nori3, sasi3 + #Sites with reprojection issues using rasterio (manually reprojected with ESRI) --> kcdm7, knym7, mcri2, ptvn6, tmai4 + #Sites with incomplete grids (used polys to convert to grids) --> 'roun6' + ahps_dir = source_dir / code / 'depth_grids' + if code in ['cfmm8','kilo1','stak1', 'sasi3', 'nori3', 'nmso1', 'kcdm7', 'knym7', 'mcri2','ptvn6','tmai4', 'roun6']: + f.write(f'{code} : Custom workaround related to benchmark data (mismatch crs, no grid data, etc)\n') + ahps_dir = source_dir / code / 'custom' + + #Get thresholds (action/minor/moderate/major flows and stages), if not available exit. + #For USGS many sites may not have rating curves but the threshold stages are available. + + select_by = 'nws_lid' + selector = code + stages, flows =get_thresholds(threshold_url, select_by, selector, threshold = 'all') + + #Make sure at least one valid threshold is supplied from WRDS. + threshold_categories = ['action','minor','moderate','major'] + if not any([stages[threshold] for threshold in threshold_categories]): + f.write(f'{code} : Skipping because no threshold stages available\n') + continue + + #We need to adjust stages to elevations using the datum adjustment. This next section finds the datum adjustment. + #determine primary source for interpolated threshold flows (USGS first then NRLDB). This will dictate what rating curve to pull. + rating_curve_source = flows['source'] + #Workaround for sites that don't have rating curve but do have flows specified (USGS only). Assign rating_curve_source to 'USGS Rating Depot' manually inspected all of these sites and USGS datum is available and will be used. + if code in ['bdxt1','ccti3', 'fnnm7', 'mtao1', 'nfsi3', 'omot1' , 'sbrn1', 'vron4', 'watv1']: + rating_curve_source = 'USGS Rating Depot' + + #Get the datum and adjust to NAVD if necessary. + nws, usgs = get_datum(metadata) + datum_data = {} + if rating_curve_source == 'USGS Rating Depot': + datum_data = usgs + elif rating_curve_source == 'NRLDB': + datum_data = nws + + #If datum not supplied, skip to new site + datum = datum_data.get('datum', None) + if datum is None: + f.write(f'{code}: Skipping because of missing datum\n') + continue + + #Custom workaround, some sites have poorly defined crs. CRS requuired for ngvd to navd conversions + # Assumed NAVD88 (no info from USGS or NWS metadata): kynm7, ksvm7, yesg1 + # Assigned NAVD88 because USGS metadata indicates NAD83: arnm7, grfi2, kctm7, nast1, nhri3, roun6, vllm7 + # Assigned NAVD88 (reported NAVD 1988): cmtl1 + if code in ['arnm7', 'cmtl1','grfi2','kctm7','knym7','ksvm7','nast1','nhri3','roun6','vllm7','yesg1']: + #Update crs to NAD83 (some are assumed, others have USGS info indicating NAD83 crs) + datum_data.update(crs = 'NAD83') + + #Adjust datum to NAVD88 if needed (Assumes that if vcs not NGVD29 or NGVD 1929 it is in NAVD88) + if datum_data.get('vcs') in ['NGVD29', 'NGVD 1929']: + #Get the datum adjustment to convert NGVD to NAVD. Sites not in contiguous US are previously removed otherwise the region needs changed. + datum_adj_ft = ngvd_to_navd_ft(datum_info = datum_data, region = 'contiguous') + datum88 = round(datum + datum_adj_ft, 2) + else: + datum88 = datum + + + #Set Grid override flag, if set to True then the 'has_grids' property is ignored. Allows for custom workaround. + #Special exception for kilo1, where it has attribute (has_grids == 0) yet there is grid metadata and polygons were converted to grids. + if code == 'kilo1': + grid_override = True + f.write(f'{code} : Custom workaround related to "has_grids" attribute') + else: + grid_override = False + #get grid metadata (metadata includes, elevation/stage/flow and etc for each site). If empty exit. + grid_metadata = usgs_grid_metadata(code, has_grid_override=grid_override) + if not grid_metadata: + f.write(f'{code} : Skipping because no grid metadata available\n') + continue + + #Get paths of all grids that have been downloaded, if no grids available for site then exit. + grid_paths = [grids for grids in ahps_dir.glob('*.tif*') if grids.suffix in ['.tif', '.tiff']] + if not grid_paths: + f.write(f'{code} : Skipping because no benchmark grids available\n') + continue + + # Iterate through grid_metadata and add the path to the dictionary as well as an indicator of whether the path exists. + for key in grid_metadata: + #When USGS grid data was downloaded, grid was saved with the 'key' name. Update the grid_metadata to include the path. + path = ahps_dir / (key + '.tif') + grid_metadata[key]['path'] = path + #Verify that the path exists (in some instances the grid should be available but it isn't) and add as subkey + if path.is_file(): + grid_metadata[key]['path_exist'] = True + else: + grid_metadata[key]['path_exist'] = False + + #Convert grid metadata information to a DataFrame + df = pd.DataFrame.from_dict(grid_metadata, orient = 'index') + #Filter out rows where grids do not exist + df = df.query('path_exist == True') + #Prior to renaming columns do a check to make sure single site (will add functionality for multi-sites later) + if not 'QCFS' in df.columns: + f.write(f'{code} : Skipping because multisite\n') + continue + #Rename columns to match NWS AHPS data structure, this only applies to single USGS sites, if a multisite the columns are different from QCFS. + df.rename(columns = {'QCFS':'flow', 'STAGE':'stage', 'ELEV':'elevation'}, inplace=True) + #Many USGS maps have elevations to numerous decimal places. Round to nearest tenth. + #NWS has maps to nearest tenth, for example HARP1 is both USGS and NWS, the USGS maps are to the hundredth of foot and NWS are to tenth. + df['elevation'] = round(df['elevation'],1) + #Assume flow source is supplied, if it is interpolated, this column will be changed later on. + df['flow_source'] = 'supplied by USGS' + #Accomodate for vdsg1 (upon inspection WRDS API reports thresholds in elevation instead of stage for this site) + if code == 'vdsg1': + df['stage'] = df['elevation'] + f.write(f'{code} : Custom workaround because thresholds are reported as elevations\n') + + #Define rating curve as empty dataframe, populate if needed. + rating_curve = pd.DataFrame() + #If flows are missing from the grid metadata, then interpolate flows using NWS or USGS rating curve + if df['flow'].isnull().all(): + #get entire rating curve, same source as interpolated threshold flows (USGS Rating Depot first then NRLDB rating curve). + if rating_curve_source == 'NRLDB': + site = [code] + elif rating_curve_source == 'USGS Rating Depot': + site = [metadata.get('identifiers').get('usgs_site_code')] + + rating_curve = get_rating_curve(rating_curve_url, site) + + #If rating curve is not present, skip site + if rating_curve.empty: + f.write(f'{code} : Skipping because no rating curve\n') + continue + #Add elevation fields to rating curve + #Add field with vertical coordinate system + vcs = datum_data['vcs'] + if not vcs: + vcs = 'Unspecified, Assumed NAVD88' + rating_curve['vcs'] = vcs + + #Add field with original datum + rating_curve['datum'] = datum + + #If VCS is NGVD29 add rating curve elevation (in NGVD) as well as the NAVD88 datum + if vcs in ['NGVD29', 'NGVD 1929']: + #Add field with raw elevation conversion (datum + stage) + rating_curve['elevation_ngvd29'] = rating_curve['stage'] + datum + #Add field with adjusted NAVD88 datum + rating_curve['datum_navd88'] = datum88 + #Add field with NAVD88 elevation + rating_curve['elevation_navd88'] = rating_curve['stage'] + datum88 + # sort inundation grids in ascending order based on stage + df.sort_values(by = 'elevation', ascending = True, inplace = True) + #interpolate based on stage (don't need elevation because we have stage of floodgrid) + df['flow'] = np.interp(df['elevation'], rating_curve['elevation_navd88'], rating_curve['flow'], left = np.nan, right = np.nan) + #Overwrite flow source to reflect interpolation from rc + df['flow_source'] = f'interpolated from {rating_curve_source} rating curve' + + #Select the appropriate threshold grid for evaluation. Using the supplied threshold stages and the calculated map stages. + grids,grid_flows = select_grids(df, stages, datum88, 1.1) + + #Obtain NWM segments that are on ms to apply flows + segments = get_nwm_segs(metadata) + site_ms_segs = set(segments).intersection(ms_segs) + segments = list(site_ms_segs) + #Preprocess grids and export to file and create flow file. + try: + #for each threshold + for i in threshold_categories: + #Obtain the flow and grid associated with threshold as well as extent grid which serves as the domain. + flow = grid_flows[i] + grid = grids[i] + extent = grids['extent'] + #Make sure that flow and flow grid are valid + if not grid in ['No Map', 'No Threshold', 'No Flow']: + #Define output directory (to be created later) + outputdir = destination / huc / code / i + + #Create Binary Grids, first create domain of analysis, then create binary grid + + #Domain extent is largest floodmap in the static library WITH holes filled + filled_domain_raster = outputdir.parent / f'{code}_extent.tif' + #Create a domain raster if it does not exist. + if not filled_domain_raster.exists(): + #Open extent data as rasterio object + domain = rasterio.open(extent) + domain_profile = domain.profile + #Domain should have donut holes removed + process_extent(domain, domain_profile, output_raster = filled_domain_raster) + + #Open domain raster as rasterio object + filled_domain = rasterio.open(filled_domain_raster) + filled_domain_profile = filled_domain.profile + + #Open benchmark data as a rasterio object. + benchmark = rasterio.open(grid) + benchmark_profile = benchmark.profile + + #Create the binary benchmark raster + boolean_benchmark, boolean_profile = process_grid(benchmark, benchmark_profile, filled_domain, filled_domain_profile, reference_raster) + + #Output binary benchmark grid and flow file to destination + outputdir.mkdir(parents = True, exist_ok = True) + output_raster = outputdir / (f'ahps_{code}_huc_{huc}_depth_{i}.tif') + with rasterio.Env(): + with rasterio.open(output_raster, 'w', **boolean_profile) as dst: + dst.write(boolean_benchmark,1) + + #Close datasets + domain.close() + filled_domain.close() + benchmark.close() + + #Create the guts of the flow file. + flow_info = flow_data(segments,flow) + #Write out the flow file to csv + output_flow_file = outputdir / (f'ahps_{code}_huc_{huc}_flows_{i}.csv') + flow_info.to_csv(output_flow_file, index = False) + + except Exception as e: + f.write(f'{code} : Error preprocessing benchmark\n{repr(e)}\n') + f.write(traceback.format_exc()) + f.write('\n') + print(traceback.format_exc()) + #Create extent if ahps code subfolder is present in destination directory. + ahps_directory = destination / huc / code + if ahps_directory.exists(): + #Delete extent raster + filled_extent = ahps_directory / f'{code}_extent.tif' + if filled_extent.exists: + filled_extent.unlink() + + #Populate attribute information for site + grids_attributes = pd.DataFrame(data=grids.items(), columns = ['magnitude','path']) + flows_attributes = pd.DataFrame(data=grid_flows.items(), columns=['magnitude','grid_flow_cfs']) + threshold_attributes = pd.DataFrame(data=stages.items(), columns = ['magnitude','magnitude_stage']) + #merge dataframes + attributes = grids_attributes.merge(flows_attributes, on = 'magnitude') + attributes = attributes.merge(threshold_attributes, on = 'magnitude') + attributes = attributes.merge(df[['path','stage','elevation', 'flow_source']], on = 'path') + #Strip out sensitive paths and convert magnitude stage to elevation + attributes['path'] = attributes['path'].apply(lambda x :Path(x).name) + attributes['magnitude_elev_navd88']=(datum88 + attributes['magnitude_stage']).astype(float).round(1) + #Add general site information + attributes['nws_lid'] = code + attributes['wfo'] = metadata['nws_data']['wfo'] + attributes['rfc'] = metadata['nws_data']['rfc'] + attributes['state'] = metadata['nws_data']['state'] + attributes['huc'] = huc + #Rename and Reorder columns + attributes.rename(columns = {'path':'grid_name', 'flow_source':'grid_flow_source','stage':'grid_stage','elevation':'grid_elev_navd88'}, inplace = True) + attributes = attributes[['nws_lid','wfo','rfc','state','huc','magnitude','magnitude_stage','magnitude_elev_navd88','grid_name','grid_stage','grid_elev_navd88', 'grid_flow_cfs','grid_flow_source']] + #Save attributes to csv + attributes.to_csv(ahps_directory / f'{code}_attributes.csv', index = False) + + #if rating_curve generated, write the rating curve to a file + if not rating_curve.empty: + rating_curve_output = ahps_directory / (f'{code}_rating_curve.csv') + rating_curve['lat'] = datum_data['lat'] + rating_curve['lon'] = datum_data['lon'] + rating_curve.to_csv(rating_curve_output, index = False) + f.write(f'{code} : Rating curve needed to interpolate flow\n') + + #Write the interpolated flows to file + df_output = ahps_directory / (f'{code}_flows.csv') + df.to_csv(df_output, index = False) + + else: + f.write(f'{code} : Unable to evaluate site, missing all flows\n') + + f.close() + + #Combine all attribute files + attribute_files = list(destination.rglob('*_attributes.csv')) + all_attributes = pd.DataFrame() + for i in attribute_files: + attribute_df = pd.read_csv(i, dtype={'huc':str}) + all_attributes = all_attributes.append(attribute_df) + if not all_attributes.empty: + all_attributes.to_csv(destination / 'attributes.csv', index = False) + + return + +if __name__ == '__main__': + #Parse arguments + parser = argparse.ArgumentParser(description = 'Create preprocessed USGS benchmark datasets at AHPS locations.') + parser.add_argument('-s', '--source_dir', help = 'Workspace where all source data is located.', required = True) + parser.add_argument('-d', '--destination', help = 'Directory where outputs are to be stored', required = True) + parser.add_argument('-r', '--reference_raster', help = 'reference raster used for benchmark raster creation', required = True) + args = vars(parser.parse_args()) + + + #Run get_env_paths and static_flow_lids + API_BASE_URL, EVALUATED_SITES_CSV, WBD_LAYER, USGS_METADATA_URL = get_env_paths() + preprocess_usgs(**args) \ No newline at end of file diff --git a/tools/preprocess_download_usgs_grids.py b/tools/preprocess_download_usgs_grids.py new file mode 100644 index 000000000..332a30ed1 --- /dev/null +++ b/tools/preprocess_download_usgs_grids.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +import urllib.request +from pathlib import Path +from dotenv import load_dotenv +import os +import argparse +import requests +from collections import defaultdict +import urllib +import pandas as pd + +load_dotenv() +USGS_DOWNLOAD_URL = os.getenv("USGS_DOWNLOAD_URL") +USGS_METADATA_URL = os.getenv("USGS_METADATA_URL") +EVALUATED_SITES_CSV = os.getenv("EVALUATED_SITES_CSV") +############################################################################### +#Get all usgs grids available for download. This step is required because the grid metadata API returns gridID as an integer and truncates leading zeros found in grid names. +############################################################################### +def get_all_usgs_gridnames(): + ''' + Retrieve all the available grids for download from USGS. This is necessary as the grid metadata available from USGS API doesn't preserve leading zeros. + + Returns + ------- + grid_lookup : collections.defaultdict + Dictionary with shortname as the key and a list of gridnames associated with a given shortname as values. + ''' + + #Grid names are split between 4 websites + sites = ['grids_1', 'grids_2', 'grids_3', 'grids_4'] + #Append all grid names to this variable + grid_names = [] + #loop through each site and append the grid name to a list. + for i in sites: + #Get gridnames + url = f'{USGS_METADATA_URL}/server/rest/services/FIMMapper/{i}/MapServer?f=pjson' + response = requests.get(url) + site_json = response.json() + info = site_json['layers'] + #Loop through all grid info and extract the grid name. + for i in info: + grid_name = i['name'] + grid_names.append(grid_name) + #Create dictionary with key of shortname and values being list of grids available. + grid_lookup = defaultdict(list) + for i in grid_names: + #define key (shortname) and value (gridname) + key = i.split('_')[0] + value = i + grid_lookup[key].append(value) + return grid_lookup +############################################################################### +#Get USGS Site metadata +############################################################################### +def usgs_site_metadata(code): + ''' + Retrieves site metadata from USGS API and saves output as dictionary. Information used includes shortname and site number. + + Parameters + ---------- + code : STR + AHPS code. + USGS_METADATA_URL : STR + URL for USGS datasets. + + Returns + ------- + site_metadata : DICT + Output metadata for an AHPS site. + ''' + # Make sure code is lower case + code = code.lower() + # Get site metadata from USGS API using ahps code + site_url = f'{USGS_METADATA_URL}/server/rest/services/FIMMapper/sites/MapServer/0/query?where=AHPS_ID+%3D+%27{code}%27&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&having=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&historicMoment=&returnDistinctValues=false&resultOffset=&resultRecordCount=&queryByDistance=&returnExtentOnly=false&datumTransformation=¶meterValues=&rangeValues=&quantizationParameters=&f=pjson' + #Get data from API + response = requests.get(site_url) + #If response is valid, then get metadata and save to dictionary + if response.ok: + response_json = response.json() + site_metadata = response_json['features'][0]['attributes'] + return site_metadata +######################################################################## +#Get USGS Benchmark Grids +######################################################################## +def obtain_usgs_data(workspace): + ''' + Download GRIDS from USGS FIM studies + + Parameters + ---------- + workspace : STR + Output directory where grids are placed. + + Returns + ------- + None. + + ''' + + + #Define workspace where output data is downloaded to + workspace = Path(workspace) + #Get all names of grids available for download from USGS website. + grid_lookup = get_all_usgs_gridnames() + #List of target ahps codes. In "ahps_dictionary.py" we defined a dictionary (ahps_lookup) that contains all ahps codes and their sources. + target_ahps_codes = pd.read_csv(EVALUATED_SITES_CSV) + target_ahps_codes = target_ahps_codes.query('Source in ["Both","USGS"]')['Total_List'].to_list() + #Loop through all codes in the target_ahps_codes list. + all_messages = [] + for code in target_ahps_codes: + #Get metadata information related to ahps code from USGS API. + code_metadata = usgs_site_metadata(code) + #From code_metadata get the shortname and site_no associated with the code. + shortname = code_metadata['SHORT_NAME'] + site_no = code_metadata['SITE_NO'] + #Define the output location for all grids and create if it doesn't exist. + dest_dir = workspace / code.lower() / 'depth_grids' + dest_dir.mkdir(parents = True, exist_ok = True) + #Get list of all available grids for download using the grid_lookup dictionary + gridnames = grid_lookup[shortname] + #Loop through all available grids for download, download them, and save to defined location. + for gridname in gridnames: + print(f'working on {gridname}') + gridid = gridname.split('_')[1] + #Define a filled gridID that has leading zeros out to 4 digits. + filled_gridid = gridid.zfill(4) + #Download gridded data from the USGS s3 website. The files will be copied specified directory and the GRIDID will have 4 digits with leading zeros. + base_url = f'{USGS_DOWNLOAD_URL}/FIM/tmp1/fimgrids2iwrss/' + #Each grid dataset has these file extensions. Download each file + extensions = ['.tif', '.tfw', '.tif.aux.xml', '.tif.ovr', '.tif.xml'] + #Loop through each extension type and download. + for gridext in extensions: + #Define the url associated with each grid + url = base_url + gridname + gridext + #Define the output file path of the grid. The grid filename uses the filled gridID. This resolves issues down the road of USGS grid metadata information storing the gridid as a number and truncating leading zeros from the gridname. + saved_grid_path = dest_dir / (f'{shortname}_{filled_gridid}{gridext}') + #Check to see if file has already been downloaded + if not saved_grid_path.is_file(): + #If file hasn't been downloaded, download it. If there was an error downloading, make note. + try: + urllib.request.urlretrieve(url, saved_grid_path) + message = f'{gridname} downloaded' + all_messages.append(message) + except: + message = f'{gridname} error downloading' + all_messages.append(message) + #If file exists make note of it. + else: + message = f'skipping {gridname}, exists on file' + all_messages.append(message) + return + +if __name__ == '__main__': + #Parse arguments + parser = argparse.ArgumentParser(description = 'Download Grid data associated with USGS FIM studies.') + parser.add_argument('-w', '--workspace', help = 'Workspace where all outputs will be saved.', required = True) + args = vars(parser.parse_args()) + + #Download datasets + obtain_usgs_data(**args) + + diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py index 714f2c535..c5bf9884d 100755 --- a/tools/rating_curve_comparison.py +++ b/tools/rating_curve_comparison.py @@ -54,7 +54,8 @@ def check_file_age(file): check = f'{file.name} is {int(file_age_days)} days old, consider updating.\nUpdate with rating_curve_get_usgs_curves.py' else: check = f'{file.name} is {int(file_age_days)} days old.' - return check + + return check # recurr_intervals = ['recurr_1_5_cms.csv','recurr_5_0_cms.csv','recurr_10_0_cms.csv'] @@ -67,7 +68,8 @@ def generate_rating_curve_metrics(args): nwm_recurr_data_filename = args[4] rc_comparison_plot_filename = args[5] nwm_flow_dir = args[6] - huc = args[7] + catfim_flows_filename = args[7] + huc = args[8] elev_table = pd.read_csv(elev_table_filename,dtype={'location_id': str}) hydrotable = pd.read_csv(hydrotable_filename,dtype={'HUC': str,'feature_id': str}) @@ -118,10 +120,15 @@ def generate_rating_curve_metrics(args): # Merge NWM recurr intervals into a single layer nwm_recurr_intervals_all = reduce(lambda x,y: pd.merge(x,y, on='feature_id', how='outer'), [recurr_1_5_yr, recurr_5_yr, recurr_10_yr]) nwm_recurr_intervals_all = pd.melt(nwm_recurr_intervals_all, id_vars=['feature_id'], value_vars=['1.5','5.0','10.0'], var_name='recurr_interval', value_name='discharge_cms') + + # Append catfim data (already set up in format similar to nwm_recurr_intervals_all) + cat_fim = pd.read_csv(catfim_flows_filename, dtype={'feature_id':str}) + nwm_recurr_intervals_all = nwm_recurr_intervals_all.append(cat_fim) + + # Convert discharge to cfs and filter nwm_recurr_intervals_all['discharge_cfs'] = nwm_recurr_intervals_all.discharge_cms * 35.3147 nwm_recurr_intervals_all = nwm_recurr_intervals_all.filter(items=['discharge_cfs', 'recurr_interval','feature_id']).drop_duplicates() - # Identify unique gages usgs_crosswalk = hydrotable.filter(items=['location_id', 'feature_id']).drop_duplicates() @@ -372,12 +379,13 @@ def calculate_rc_stats_elev(rc,stat_groups=None): if __name__ == '__main__': parser = argparse.ArgumentParser(description='generate rating curve plots and tables for FIM and USGS gages') - parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True) - parser.add_argument('-output_dir','--output-dir', help='rating curves output folder', required=True) - parser.add_argument('-gages','--usgs-gages-filename',help='USGS rating curves',required=True) - parser.add_argument('-flows','--nwm-flow-dir',help='NWM recurrence flows dir',required=True) + parser.add_argument('-fim_dir','--fim-dir', help='FIM output dir', required=True,type=str) + parser.add_argument('-output_dir','--output-dir', help='rating curves output folder', required=True,type=str) + parser.add_argument('-gages','--usgs-gages-filename',help='USGS rating curves',required=True,type=str) + parser.add_argument('-flows','--nwm-flow-dir',help='NWM recurrence flows dir',required=True,type=str) + parser.add_argument('-catfim', '--catfim-flows-filename', help='Categorical FIM flows file',required = True,type=str) parser.add_argument('-j','--number-of-jobs',help='number of workers',required=False,default=1,type=int) - parser.add_argument('-group','--stat-groups',help='column(s) to group stats',required=False) + parser.add_argument('-group','--stat-groups',help='column(s) to group stats',required=False,type=str) args = vars(parser.parse_args()) @@ -385,6 +393,7 @@ def calculate_rc_stats_elev(rc,stat_groups=None): output_dir = args['output_dir'] usgs_gages_filename = args['usgs_gages_filename'] nwm_flow_dir = args['nwm_flow_dir'] + catfim_flows_filename = args['catfim_flows_filename'] number_of_jobs = args['number_of_jobs'] stat_groups = args['stat_groups'] @@ -415,7 +424,7 @@ def calculate_rc_stats_elev(rc,stat_groups=None): rc_comparison_plot_filename = join(plots_dir,f"FIM-USGS_rating_curve_comparison_{huc}.png") if isfile(elev_table_filename): - procs_list.append([elev_table_filename, hydrotable_filename, usgs_gages_filename, usgs_recurr_stats_filename, nwm_recurr_data_filename, rc_comparison_plot_filename,nwm_flow_dir,huc]) + procs_list.append([elev_table_filename, hydrotable_filename, usgs_gages_filename, usgs_recurr_stats_filename, nwm_recurr_data_filename, rc_comparison_plot_filename,nwm_flow_dir, catfim_flows_filename, huc]) # Initiate multiprocessing print(f"Generating rating curve metrics for {len(procs_list)} hucs using {number_of_jobs} jobs") diff --git a/tools/rating_curve_get_usgs_curves.py b/tools/rating_curve_get_usgs_curves.py index 6676c27b3..cea2a0dbf 100644 --- a/tools/rating_curve_get_usgs_curves.py +++ b/tools/rating_curve_get_usgs_curves.py @@ -3,7 +3,7 @@ import pandas as pd import geopandas as gpd from pathlib import Path -from tools_shared_functions import get_metadata, get_datum, ngvd_to_navd_ft, get_rating_curve, aggregate_wbd_hucs +from tools_shared_functions import get_metadata, get_datum, ngvd_to_navd_ft, get_rating_curve, aggregate_wbd_hucs, get_thresholds, flow_data from dotenv import load_dotenv import os import argparse @@ -99,7 +99,73 @@ def get_all_active_usgs_sites(): return gdf, list_of_sites, acceptable_sites_metadata - +############################################################################## +#Generate categorical flows for each category across all sites. +############################################################################## +def write_categorical_flow_files(metadata, workspace): + ''' + Writes flow files of each category for every feature_id in the input metadata. + Written to supply input flow files of all gage sites for each flood category. + + Parameters + ---------- + metadata : DICT + Dictionary of metadata from WRDS (e.g. output from get_all_active_usgs_sites). + workspace : STR + Path to workspace where flow files will be saved. + + Returns + ------- + None. + + ''' + + threshold_url = f'{API_BASE_URL}/nws_threshold' + workspace = Path(workspace) + workspace.mkdir(parents = True, exist_ok = True) + #For each site in metadata + all_data = pd.DataFrame() + + for site in metadata: + #Get the feature_id and usgs_site_code + feature_id = site.get('identifiers').get('nwm_feature_id') + usgs_code = site.get('identifiers').get('usgs_site_code') + nws_lid = site.get('identifiers').get('nws_lid') + + #thresholds only provided for valid nws_lid. + if nws_lid == 'Bogus_ID': + continue + + #if invalid feature_id skip to next site + if feature_id is None: + continue + + #Get the stages and flows + stages, flows = get_thresholds(threshold_url, select_by = 'nws_lid', selector = nws_lid, threshold = 'all') + + #For each flood category + for category in ['action','minor','moderate','major']: + #Get flow + flow = flows.get(category, None) + #If flow or feature id are not valid, skip to next site + if flow is None: + continue + #Otherwise, write 'guts' of a flow file and append to a master DataFrame. + else: + data = flow_data([feature_id], flow, convert_to_cms = True) + data['recurr_interval'] = category + data['nws_lid'] = nws_lid + data['location_id'] = usgs_code + data = data.rename(columns = {'discharge':'discharge_cms'}) + #Append site data to master DataFrame + all_data = all_data.append(data, ignore_index = True) + + #Write CatFIM flows to file + final_data = all_data[['feature_id','discharge_cms', 'recurr_interval']] + final_data.to_csv(workspace / f'catfim_flows_cms.csv', index = False) + return all_data +############################################################################### + def usgs_rating_to_elev(list_of_gage_sites, workspace=False, sleep_time = 1.0): ''' @@ -272,6 +338,9 @@ def usgs_rating_to_elev(list_of_gage_sites, workspace=False, sleep_time = 1.0): if list_of_gage_sites == ['all']: acceptable_sites_gdf = acceptable_sites_gdf.to_crs(PREP_PROJECTION) acceptable_sites_gdf.to_file(Path(workspace) / 'usgs_gages.gpkg', layer = 'usgs_gages', driver = 'GPKG') + + #Write out flow files for each threshold across all sites + all_data = write_categorical_flow_files(metadata_list, workspace) return all_rating_curves @@ -295,5 +364,7 @@ def usgs_rating_to_elev(list_of_gage_sites, workspace=False, sleep_time = 1.0): l = args['list_of_gage_sites'] w = args['workspace'] t = float(args['sleep_timer']) - #Run create_flow_forecast_file - usgs_rating_to_elev(list_of_gage_sites = l, workspace=w, sleep_time = t) \ No newline at end of file + + #Generate USGS rating curves + usgs_rating_to_elev(list_of_gage_sites = l, workspace=w, sleep_time = t) + \ No newline at end of file diff --git a/tools/tools_shared_functions.py b/tools/tools_shared_functions.py index 8ea3d5d59..fb6204dd5 100755 --- a/tools/tools_shared_functions.py +++ b/tools/tools_shared_functions.py @@ -7,9 +7,18 @@ import pandas as pd import geopandas as gpd import requests -from requests.adapters import HTTPAdapter -from requests.packages.urllib3.util.retry import Retry - +import numpy as np +import pathlib +from pathlib import Path +import rasterio.shutil +from rasterio.warp import calculate_default_transform, reproject, Resampling +import rasterio.crs +from rasterio.merge import merge +from rasterio import features +from shapely.geometry import shape +from shapely.geometry import Polygon +from shapely.geometry import MultiPolygon +import re from tools_shared_variables import (TEST_CASES_DIR, PRINTWORTHY_STATS, GO_UP_STATS, GO_DOWN_STATS, ENDC, TGREEN_BOLD, TGREEN, TRED_BOLD, TWHITE, WHITE_BOLD, CYAN_BOLD) @@ -1254,4 +1263,302 @@ def get_rating_curve(rating_curve_url, location_ids): else: continue - return all_curves \ No newline at end of file + return all_curves +####################################################################### +#Following Functions used for preprocesing of AHPS sites (NWS and USGS) +######################################################################## + +####################################################################### +#Function to return a correct maps. +######################################################################## +def select_grids(dataframe, stages, datum88, buffer): + ''' + Given a DataFrame (in a specific format), and a dictionary of stages, and the datum (in navd88). + loop through the available inundation datasets and find the datasets that are equal to or immediately above the thresholds and only return 1 dataset per threshold (if any). + + Parameters + ---------- + dataframe : DataFrame + DataFrame that has to be in a specific format and contains the stages and paths to the inundation datasets. + stages : DICT + Dictionary of thresholds (key) and stages (values) + datum88: FLOAT + The datum associated with the LID that is pre-converted to NAVD88 (if needed) + buffer: Float + Interval which the uppder bound can be assigned. For example, Threshold + buffer = upper bound. Recommended to make buffer 0.1 greater than desired interval as code selects maps < and not <= + + Returns + ------- + maps : DICT + Dictionary of threshold (key) and inundation dataset path (value) + map_flows: DICT + Dictionary of threshold (key) and flows in CFS rounded to the nearest whole number associated with the selected maps (value) + + ''' + #Define threshold categories + thresholds = ['action', 'minor', 'moderate', 'major'] + maps = {} + map_flows={} + #For each threshold, pick the appropriate map for analysis. + for i,threshold in enumerate(thresholds): + #Check if stage is None + if not stages[threshold] is None: + #Define the threshold floor elevation (navd88). + lower_bound = round((stages[threshold] + datum88),1) + #Define the threshold ceiling (navd88) + upper_bound = round((stages[threshold] + datum88 + buffer),1) + #For thresholds that are action, minor, moderate + if threshold in ['action', 'minor', 'moderate']: + #Make sure the next threshold has a valid stage + if stages[thresholds[i+1]] is None: + next_threshold = upper_bound + else: + #Determine what the next threshold elevation is. + next_threshold = round((stages[thresholds[i+1]] + datum88),1) + #Make sure the upper_bound is not greater than the next threshold, if it is then reassign upper_bound. + if upper_bound > next_threshold: + upper_bound = next_threshold + #Get the single map which meets the criteria. + value = dataframe.query(f'({lower_bound}<=elevation) & (elevation<{upper_bound})')['elevation'].min() + #For major threshold + else: + #Get the single map which meets criteria. + value = dataframe.query(f'({lower_bound}<=elevation) & (elevation<{upper_bound})')['elevation'].min() + + #If the selected value is a number + if np.isfinite(value): + #Get the map path and the flow associated with the map (rounded to nearest whole number) + map_path = dataframe.query(f'elevation == {value}')['path'].item() + map_flow = round(dataframe.query(f'elevation == {value}')['flow'].item(),0) + #Check to see if map_flow is valid (if beyond rating_curve it is nan) + if not np.isfinite(map_flow): + map_path = 'No Flow' + map_flow = 'No Flow' + + #If the selected value is not a number (or interpolated flow is nan caused by elevation of map which is beyond rating curve range), then map_path and map_flows are both set to 'No Map'. + else: + map_path = 'No Map' + map_flow = 'No Map' + else: + map_path = 'No Threshold' + map_flow = 'No Threshold' + + #Write map paths and flows to dictionary + maps[threshold] = map_path + map_flows[threshold] = map_flow + + #Get the maximum inundation map (using elevation) and this will be the domain extent + max_value = dataframe['elevation'].max() + map_path = dataframe.query(f'elevation == {max_value}')['path'].item() + map_flow = 'Not Used' + maps['extent'] = map_path + map_flows['extent'] = map_flow + + return maps,map_flows + +####################################################################### +#Process AHPS Extent Grid (Fill Holes) +####################################################################### +def process_extent(extent, profile, output_raster = False): + ''' + Convert raster to feature (using raster_to_feature), the footprint is used so all raster values are set to 1 where there is data. + fill all donut holes in resulting feature. + Filled geometry is then converted back to raster using same raster properties as input profile. + Output raster will have be encoded as follows: + filled footprint (wet) = 1 + remaining area in raster domain (dry) = 0 + NoData = 3 + + Parameters + ---------- + extent : Rasterio Dataset Reader + Path to extent raster + extent_profile: Rasterio Profile + profile related to the extent argument + output_raster: STR + Path to output raster. If no path supplied, then no raster is written to disk. default = False + + Returns (If no output raster specified) + ------- + extent_filled_raster : rasterio dataset + Extent raster with filled donut holes + profile : rasterio profile + Profile associated with extent_filled_raster + + ''' + + #Convert extent to feature and explode geometry + poly_extent = raster_to_feature(extent, profile, footprint_only = True) + poly_extent = poly_extent.explode() + + #Fill holes in extent + poly_extent_fill_holes=MultiPolygon(Polygon(p.exterior) for p in poly_extent['geometry']) + # loop through the filled polygons and insert the new geometry + for i in range(len(poly_extent_fill_holes)): + poly_extent.loc[i,'geometry'] = poly_extent_fill_holes[i] + + #Dissolve filled holes with main map and explode + poly_extent['dissolve_field'] = 1 + poly_extent = poly_extent.dissolve(by = 'dissolve_field') + poly_extent = poly_extent.explode() + poly_extent = poly_extent.reset_index() + + #Convert filled polygon back to raster + extent_filled_raster = features.rasterize(((geometry, 1) for geometry in poly_extent['geometry']), fill = 0, dtype = 'int32',transform = profile['transform'], out_shape = (profile['height'], profile['width'])) + + #Update profile properties (dtype and no data) + profile.update(dtype = rasterio.int32) + profile.update(nodata=0) + + #Check if output raster is specified. If so, the write extent filled raster to disk. + if output_raster: + #Create directory + Path(output_raster).parent.mkdir(parents = True, exist_ok = True) + with rasterio.Env(): + with rasterio.open(output_raster, 'w', **profile) as dst: + dst.write(extent_filled_raster, 1) + #If no output raster is supplied the return the rasterio array and profile. + else: + return extent_filled_raster, profile +######################################################################## +#Convert raster to polygon +######################################################################## +def raster_to_feature(grid, profile_override = False, footprint_only = False): + ''' + Given a grid path, convert to vector, dissolved by grid value, in GeoDataFrame format. + + Parameters + ---------- + grid_path : pathlib path OR rasterio Dataset Reader + Path to grid or a rasterio Dataset Reader + profile_override: rasterio Profile + Default is False, If a rasterio Profile is supplied, it will dictate the transform and crs. + footprint_only: BOOL + If true, dataset will be divided by itself to remove all unique values. If False, all values in grid will be carried through on raster to feature conversion. default = False + + Returns + ------- + dissolve_geodatabase : GeoDataFrame + Dissolved (by gridvalue) vector data in GeoDataFrame. + + ''' + #Determine what format input grid is: + #If a pathlib path, open with rasterio + if isinstance(grid, pathlib.PurePath): + dataset = rasterio.open(grid) + #If a rasterio dataset object, assign to dataset + elif isinstance(grid, rasterio.DatasetReader): + dataset = grid + + #Get data/mask and profile properties from dataset + data = dataset.read(1) + msk = dataset.read_masks(1) + data_transform = dataset.transform + coord_sys = dataset.crs + + #If a profile override was supplied, use it to get the transform and coordinate system. + if profile_override: + data_transform = profile_override['transform'] + coord_sys = profile_override['crs'] + + #If a footprint of the raster is desired, convert all data values to 1 + if footprint_only: + data[msk == 255] = 1 + + #Convert grid to feature + spatial = [] + values = [] + for geom, val in rasterio.features.shapes(data, mask = msk, transform = data_transform): + spatial.append(shape(geom)) + values.append(val) + spatial_geodataframe = gpd.GeoDataFrame({'values': values,'geometry':spatial }, crs = coord_sys) + dissolve_geodataframe = spatial_geodataframe.dissolve(by = 'values') + return dissolve_geodataframe +######################################################################## +#Create AHPS Benchmark Grid +######################################################################## +def process_grid(benchmark, benchmark_profile, domain, domain_profile, reference_raster): + ''' + Given a benchmark grid and profile, a domain rasterio dataset and profile, and a reference raster, + Match the benchmark dataset to the domain extent and create a classified grid convert to: + 0 (no data footprint of domain) + 1 (data footprint of domain) + 2 (data footprint of benchmark) + Then reproject classified benchmark grid to match reference grid resolution and crs. + Output is an array of values and a profile. + + Parameters + ---------- + benchmark : rasterio dataset + Rasterio dataset of the benchmark dataset for a given threshold + benchmark_profile : rasterio profile + A potentially modified profile to the benchmark dataset. + domain: rasterio dataset + Rasterio dataset of the domain grid (the maximum available grid for a given site) + domain_profile: rasterio profile + A potentially modified profile of the domain dataset. + reference_raster : pathlib Path + Path to reference dataset. + + Returns + ------- + boolean_benchmark : numpy Array + Array of values for the benchmark_boolean grid. + profile : rasterio profile + Updated, final profile of the boolean_benchmark grid. + + ''' + + #Make benchmark have same dimensions as domain (Assume domain has same CRS as benchmark) + #Get source CRS (benchmark and domain assumed to be same CRS) + source_crs = benchmark_profile['crs'].to_wkt() + #Get domain data + domain_arr = domain.read(1) + #Get benchmark data + benchmark_arr = benchmark.read(1) + #Create empty array with same dimensions as domain + benchmark_fit_to_domain = np.empty(domain_arr.shape) + #Make benchmark have same footprint as domain (Assume domain has same CRS as benchmark) + reproject(benchmark_arr, + destination = benchmark_fit_to_domain, + src_transform = benchmark.transform, + src_crs = source_crs, + src_nodata = benchmark.nodata, + dst_transform = domain.transform, + dst_crs = source_crs, + dst_nodata = benchmark.nodata, + dst_resolution = source_crs, + resampling = Resampling.bilinear) + #Convert fitted benchmark dataset to boolean. 0 = NODATA Regions and 1 = Data Regions + benchmark_fit_to_domain_bool = np.where(benchmark_fit_to_domain == benchmark.nodata,0,1) + #Merge domain datamask and benchmark data mask. New_nodata_value (2) = Domain NO DATA footprint, 0 = NO DATA for benchmark (within data region of domain), 1 = DATA region of benchmark. + new_nodata_value = 2 + classified_benchmark = np.where(domain_arr == domain.nodata, new_nodata_value, benchmark_fit_to_domain_bool) + + ##Reproject classified benchmark to reference raster crs and resolution. + #Read in reference raster + reference = rasterio.open(reference_raster) + #Determine the new transform and dimensions of reprojected/resampled classified benchmark dataset whos width, height, and bounds are same as domain dataset. + new_benchmark_transform, new_benchmark_width, new_benchmark_height = calculate_default_transform(source_crs, reference.crs, domain.width, domain.height, *domain.bounds, resolution = reference.res) + #Define an empty array that is same dimensions as output by the "calculate_default_transform" command. + classified_benchmark_projected = np.empty((new_benchmark_height,new_benchmark_width), dtype=np.uint8) + #Reproject and resample the classified benchmark dataset. Nearest Neighbor resampling due to integer values of classified benchmark. + reproject(classified_benchmark, + destination = classified_benchmark_projected, + src_transform = domain.transform, + src_crs = source_crs, + src_nodata = new_nodata_value, + dst_transform = new_benchmark_transform, + dst_crs = reference.crs, + dst_nodata = new_nodata_value, + dst_resolution = reference.res, + resampling = Resampling.nearest) + + #Update profile using reference profile as base (data type, NODATA, transform, width/height). + profile = reference.profile + profile.update(transform = new_benchmark_transform) + profile.update(dtype = rasterio.uint8) + profile.update(nodata = new_nodata_value) + profile.update (width = new_benchmark_width) + profile.update(height = new_benchmark_height) + return classified_benchmark_projected, profile \ No newline at end of file From 199ab6e787b1742f0558a69247e08d411add2505 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Wed, 28 Apr 2021 11:32:11 -0500 Subject: [PATCH 070/359] Refactor synthesize_test_case.py to handle exceptions during multiprocessing Refactor synthesize_test_case.py to handle exceptions during multiprocessing. Resolves issue #351 Changes - refactored inundation.py and run_test_case.py to handle exceptions without using sys.exit(). This isn't the ideal way to handle exceptions when using multiprocessing because the child process is expecting a return. --- CHANGELOG.md | 29 ++++-- tools/inundation.py | 217 +++++++++++++++++++++-------------------- tools/run_test_case.py | 59 +++++------ 3 files changed, 164 insertions(+), 141 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cc4cc499..82b0f0cd3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +

+## v3.0.15.7 - 2021-04-28 - [PR #367](https://github.com/NOAA-OWP/cahaba/pull/367) + +Refactor synthesize_test_case.py to handle exceptions during multiprocessing. Resolves issue #351 + +## Changes +- refactored `inundation.py` and `run_test_case.py` to handle exceptions without using `sys.exit()`. + +

## v3.0.15.6 - 2021-04-23 - [PR #365](https://github.com/NOAA-OWP/cahaba/pull/365) Implement CatFIM threshold flows to Sierra test and add AHPS benchmark preprocessing scripts. @@ -20,11 +29,12 @@ Prevent eval_plots.py from erroring out when spatial argument enabled if certain ## Changes - Add check to make sure analyzed dataset is available prior to creating spatial dataset. -

+

## v3.0.15.4 - 2021-04-20 - [PR #356](https://github.com/NOAA-OWP/cahaba/pull/356) Closing all multiprocessing Pool objects in repo. +

## v3.0.15.3 - 2021-04-19 - [PR #358](https://github.com/NOAA-OWP/cahaba/pull/358) @@ -44,6 +54,7 @@ Preprocess NHDPlus HR rasters for consistent projections, nodata values, and con ## v3.0.15.2 - 2021-04-16 - [PR #359](https://github.com/NOAA-OWP/cahaba/pull/359) Hotfix to preserve desired files when production flag used in `fim_run.sh`. + ## Changes - Fixed production whitelisted files. @@ -52,6 +63,7 @@ Hotfix to preserve desired files when production flag used in `fim_run.sh`. ## v3.0.15.1 - 2021-04-13 - [PR #355](https://github.com/NOAA-OWP/cahaba/pull/355) Sierra test considered all USGS gage locations to be mainstems even though many actually occurred with tributaries. This resulted in unrealistic comparisons as incorrect gages were assigned to mainstems segments. This feature branch identifies gages that are on mainstems via attribute field. + ## Changes - Modifies `usgs_gage_crosswalk.py` to filter out gages from the `usgs_gages.gpkg` layer such that for a "MS" run, only consider gages that contain rating curve information (via `curve` attribute) and are also mainstems gages (via `mainstems` attribute). @@ -65,7 +77,6 @@ Sierra test considered all USGS gage locations to be mainstems even though many - Adds the `extent` argument specified by user when running `fim_run.sh` to `usgs_gage_crosswalk.py`.

- ## v3.0.15.0 - 2021-04-08 - [PR #340](https://github.com/NOAA-OWP/cahaba/pull/340) Implementing a prototype technique to estimate the missing bathymetric component in the HAND-derived synthetic rating curves. The new Bathymetric Adjusted Rating Curve (BARC) function is built within the `fim_run.sh` workflow and will ingest bankfull geometry estimates provided by the user to modify the cross section area used in the synthetic rating curve generation. @@ -81,6 +92,7 @@ Implementing a prototype technique to estimate the missing bathymetric component - Imports the existing synthetic rating curve table and the bankfull geometry input data (topwidth and cross section area per COMID). - Performs new synthetic rating curve calculations with bathymetry estimation modifications. - Flags issues with the thalweg-notch artifact. +

## v3.0.14.0 - 2021-04-05 - [PR #338](https://github.com/NOAA-OWP/cahaba/pull/338) @@ -97,6 +109,7 @@ Create tool to retrieve rating curves from USGS sites and convert to elevation ( 1) `usgs_rating_curves.csv`: A csv file that contains rating curves (including converted to NAVD88 elevation) for USGS gages in a format that is compatible with `rating_curve_comparisons.py`. As it is is currently configured, only gages within CONUS will have rating curve data. 2) `log.csv`: A log file that records status for each gage and includes error messages. 3) `usgs_gages.gpkg`: A geospatial layer (in FIM projection) of all active USGS gages that meet a predefined criteria. Additionally, the `curve` attribute indicates whether a rating curve is found in the `usgs_rating_curves.csv`. This spatial file is only generated if the `all` option is passed with the `-l` argument. +

## v3.0.13.0 - 2021-04-01 - [PR #332](https://github.com/NOAA-OWP/cahaba/pull/332) @@ -110,8 +123,8 @@ Created tool to compare synthetic rating curve with benchmark rating curve (Sier ### Additions - `usgs_gage_crosswalk.py`: generates `usgs_elev_table.csv` in `run_by_unit.py` with elevation and additional attributes at USGS gages. - `rating_curve_comparison.py`: post-processing script to plot and calculate metrics between synthetic rating curves and USGS rating curve data. -

+

## v3.0.12.1 - 2021-03-31 - [PR #336](https://github.com/NOAA-OWP/cahaba/pull/336) Fix spatial option in `eval_plots.py` when creating plots and spatial outputs. @@ -124,8 +137,8 @@ Fix spatial option in `eval_plots.py` when creating plots and spatial outputs. ### Additions - Creates `fim_performance_points.shp`: this layer consists of all evaluated ahps points (with metrics). Spatial data retrieved from WRDS on the fly. - Creates `fim_performance_polys.shp`: this layer consists of all evaluated huc8s (with metrics). Spatial data retrieved from WBD layer. -

+

## v3.0.12.0 - 2021-03-26 - [PR #327](https://github.com/NOAA-OWP/cahaba/pull/237) Add more detail/information to plotting capabilities. @@ -137,8 +150,8 @@ Add more detail/information to plotting capabilities. ### Additions - Optional argument to create barplots of CSI for each individual site. - Create a csv containing the data used to create the scatterplots. -

+

## v3.0.11.0 - 2021-03-22 - [PR #319](https://github.com/NOAA-OWP/cahaba/pull/298) Improvements to CatFIM service source data generation. @@ -151,16 +164,16 @@ Improvements to CatFIM service source data generation. ### Additions - Added `generate_categorical_fim.py` to wrap `generate_categorical_fim_flows.py` and `generate_categorical_fim_mapping.py`. - Create new `nws_lid_sites` shapefile located in same directory as the `catfim_library` shapefile. -

+

## v3.0.10.1 - 2021-03-24 - [PR #320](https://github.com/NOAA-OWP/cahaba/pull/320) Patch to synthesize_test_cases.py. ### Changes - Bug fix to `synthesize_test_cases.py` to allow comparison between `testing` version and `official` versions. -

+

## v3.0.10.0 - 2021-03-12 - [PR #298](https://github.com/NOAA-OWP/cahaba/pull/298) Preprocessing of flow files for Categorical FIM. @@ -174,8 +187,8 @@ Preprocessing of flow files for Categorical FIM. ### Changes - Stability fixes to `generate_categorical_fim.py`. -

+

## v3.0.9.0 - 2021-03-12 - [PR #297](https://github.com/NOAA-OWP/cahaba/pull/297) Enhancements to FIM API. diff --git a/tools/inundation.py b/tools/inundation.py index d093385b8..d105ea52c 100755 --- a/tools/inundation.py +++ b/tools/inundation.py @@ -156,59 +156,63 @@ def inundate( else: raise TypeError("Pass hydro table csv") - # make windows generator - window_gen = __make_windows_generator(rem,catchments,catchment_poly,mask_type,catchmentStagesDict,inundation_raster,inundation_polygon, - depths,out_raster_profile,out_vector_profile,quiet,hucs=hucs,hucSet=hucSet) + if catchmentStagesDict is not None: - # start up thread pool - executor = ThreadPoolExecutor(max_workers=num_workers) + # make windows generator + window_gen = __make_windows_generator(rem,catchments,catchment_poly,mask_type,catchmentStagesDict,inundation_raster,inundation_polygon, + depths,out_raster_profile,out_vector_profile,quiet,hucs=hucs,hucSet=hucSet) - # submit jobs - results = {executor.submit(__inundate_in_huc,*wg) : wg[6] for wg in window_gen} + # start up thread pool + executor = ThreadPoolExecutor(max_workers=num_workers) - inundation_rasters = [] ; depth_rasters = [] ; inundation_polys = [] - for future in as_completed(results): - try: - future.result() - except Exception as exc: - __vprint("Exception {} for {}".format(exc,results[future]),not quiet) - else: + # submit jobs + results = {executor.submit(__inundate_in_huc,*wg) : wg[6] for wg in window_gen} - if results[future] is not None: - __vprint("... {} complete".format(results[future]),not quiet) + inundation_rasters = [] ; depth_rasters = [] ; inundation_polys = [] + for future in as_completed(results): + try: + future.result() + except Exception as exc: + __vprint("Exception {} for {}".format(exc,results[future]),not quiet) else: - __vprint("... complete",not quiet) - - inundation_rasters += [future.result()[0]] - depth_rasters += [future.result()[1]] - inundation_polys += [future.result()[2]] - - # power down pool - executor.shutdown(wait=True) - - # optional aggregation - if (aggregate) & (hucs is not None): - # inun grid vrt - if inundation_raster is not None: - inun_vrt = BuildVRT(splitext(inundation_raster)[0]+'.vrt',inundation_rasters) - inun_vrt = None - #_ = run('gdalbuildvrt -q -overwrite {} {}'.format(splitext(inundation_raster)[0]+'.vrt'," ".join(inundation_rasters)),shell=True) - # depths vrt - if depths is not None: - depths_vrt = BuildVRT(splitext(depths)[0]+'.vrt',depth_rasters,resampleAlg='bilinear') - depths_vrt = None - #_ = run('gdalbuildvrt -q -overwrite -r bilinear {} {}'.format(splitext(depths)[0]+'.vrt'," ".join(depth_rasters)),shell=True) - # concat inun poly - if inundation_polygon is not None: - _ = run('ogrmerge.py -o {} {} -f GPKG -single -overwrite_ds'.format(inundation_polygon," ".join(inundation_polys)),shell=True) - - # close datasets - rem.close() - catchments.close() - - return(0) + if results[future] is not None: + __vprint("... {} complete".format(results[future]),not quiet) + else: + __vprint("... complete",not quiet) + + inundation_rasters += [future.result()[0]] + depth_rasters += [future.result()[1]] + inundation_polys += [future.result()[2]] + + # power down pool + executor.shutdown(wait=True) + + # optional aggregation + if (aggregate) & (hucs is not None): + # inun grid vrt + if inundation_raster is not None: + inun_vrt = BuildVRT(splitext(inundation_raster)[0]+'.vrt',inundation_rasters) + inun_vrt = None + #_ = run('gdalbuildvrt -q -overwrite {} {}'.format(splitext(inundation_raster)[0]+'.vrt'," ".join(inundation_rasters)),shell=True) + # depths vrt + if depths is not None: + depths_vrt = BuildVRT(splitext(depths)[0]+'.vrt',depth_rasters,resampleAlg='bilinear') + depths_vrt = None + #_ = run('gdalbuildvrt -q -overwrite -r bilinear {} {}'.format(splitext(depths)[0]+'.vrt'," ".join(depth_rasters)),shell=True) + + # concat inun poly + if inundation_polygon is not None: + _ = run('ogrmerge.py -o {} {} -f GPKG -single -overwrite_ds'.format(inundation_polygon," ".join(inundation_polys)),shell=True) + + # close datasets + rem.close() + catchments.close() + + return(0) + else: + return(1) def __inundate_in_huc(rem_array,catchments_array,crs,window_transform,rem_profile,catchments_profile,hucCode, catchmentStagesDict,depths,inundation_raster,inundation_polygon, @@ -328,6 +332,7 @@ def __inundate_in_huc(rem_array,catchments_array,crs,window_transform,rem_profil if isinstance(depths,DatasetWriter): depths.close() if isinstance(inundation_raster,DatasetWriter): inundation_raster.close() if isinstance(inundation_polygon,fiona.Collection): inundation_polygon.close() + if isinstance(hucs,fiona.Collection): inundation_polygon.close() # return file names of outputs for aggregation. Handle Nones try: @@ -414,6 +419,7 @@ def __return_huc_in_hucSet(hucCode,hucSet): rem_array,window_transform = mask(rem,catchment_poly['geometry'],crop=True,indexes=1) catchments_array,_ = mask(catchments,catchment_poly['geometry'],crop=True,indexes=1) + del catchment_poly else: print ("invalid mask type. Options are 'huc' or 'filter'") except ValueError: # shape doesn't overlap raster @@ -458,78 +464,79 @@ def __subset_hydroTable_to_forecast(hydroTable,forecast,subset_hucs=None): huc_error = hydroTable.HUC.unique() hydroTable.set_index(['HUC','feature_id','HydroID'],inplace=True) - hydroTable = hydroTable[hydroTable["LakeID"] == -999] # Subset hydroTable to include only non-lake catchments. - - if hydroTable.empty: - print(f"All stream segments in HUC(s): {huc_error} are within lake boundaries.") - sys.exit(0) - elif isinstance(hydroTable,pd.DataFrame): pass #consider checking for correct dtypes, indices, and columns else: raise TypeError("Pass path to hydro-table csv or Pandas DataFrame") - if isinstance(forecast,str): - forecast = pd.read_csv( - forecast, - dtype={'feature_id' : str , 'discharge' : float} - ) - forecast.set_index('feature_id',inplace=True) - elif isinstance(forecast,pd.DataFrame): - pass # consider checking for dtypes, indices, and columns - else: - raise TypeError("Pass path to forecast file csv or Pandas DataFrame") - - - # susbset hucs if passed - if subset_hucs is not None: - if isinstance(subset_hucs,list): - if len(subset_hucs) == 1: - try: - subset_hucs = open(subset_hucs[0]).read().split('\n') - except FileNotFoundError: - pass - elif isinstance(subset_hucs,str): - try: - subset_hucs = open(subset_hucs).read().split('\n') - except FileNotFoundError: - subset_hucs = [subset_hucs] - - # subsets HUCS - subset_hucs_orig = subset_hucs.copy() ; subset_hucs = [] - for huc in np.unique(hydroTable.index.get_level_values('HUC')): - for sh in subset_hucs_orig: - if huc.startswith(sh): - subset_hucs += [huc] - - hydroTable = hydroTable[np.in1d(hydroTable.index.get_level_values('HUC'), subset_hucs)] - - # join tables - try: - hydroTable = hydroTable.join(forecast,on=['feature_id'],how='inner') - except AttributeError: - print (f"No matching feature IDs between forecast and hydrotable for HUC(s): {subset_hucs}") - sys.exit(0) + hydroTable = hydroTable[hydroTable["LakeID"] == -999] # Subset hydroTable to include only non-lake catchments. - # initialize dictionary - catchmentStagesDict = typed.Dict.empty(types.int32,types.float64) + if not hydroTable.empty: - # interpolate stages - for hid,sub_table in hydroTable.groupby(level='HydroID'): + if isinstance(forecast,str): + forecast = pd.read_csv( + forecast, + dtype={'feature_id' : str , 'discharge' : float} + ) + forecast.set_index('feature_id',inplace=True) + elif isinstance(forecast,pd.DataFrame): + pass # consider checking for dtypes, indices, and columns + else: + raise TypeError("Pass path to forecast file csv or Pandas DataFrame") + + # susbset hucs if passed + if subset_hucs is not None: + if isinstance(subset_hucs,list): + if len(subset_hucs) == 1: + try: + subset_hucs = open(subset_hucs[0]).read().split('\n') + except FileNotFoundError: + pass + elif isinstance(subset_hucs,str): + try: + subset_hucs = open(subset_hucs).read().split('\n') + except FileNotFoundError: + subset_hucs = [subset_hucs] + + # subsets HUCS + subset_hucs_orig = subset_hucs.copy() ; subset_hucs = [] + for huc in np.unique(hydroTable.index.get_level_values('HUC')): + for sh in subset_hucs_orig: + if huc.startswith(sh): + subset_hucs += [huc] + + hydroTable = hydroTable[np.in1d(hydroTable.index.get_level_values('HUC'), subset_hucs)] + + # join tables + try: + hydroTable = hydroTable.join(forecast,on=['feature_id'],how='inner') + + + # initialize dictionary + catchmentStagesDict = typed.Dict.empty(types.int32,types.float64) - interpolated_stage = np.interp(sub_table.loc[:,'discharge'].unique(),sub_table.loc[:,'discharge_cms'],sub_table.loc[:,'stage']) + # interpolate stages + for hid,sub_table in hydroTable.groupby(level='HydroID'): - # add this interpolated stage to catchment stages dict - h = round(interpolated_stage[0],4) + interpolated_stage = np.interp(sub_table.loc[:,'discharge'].unique(),sub_table.loc[:,'discharge_cms'],sub_table.loc[:,'stage']) - hid = types.int32(hid) ; h = types.float32(h) - catchmentStagesDict[hid] = h + # add this interpolated stage to catchment stages dict + h = round(interpolated_stage[0],4) - # huc set - hucSet = [str(i) for i in hydroTable.index.get_level_values('HUC').unique().to_list()] + hid = types.int32(hid) ; h = types.float32(h) + catchmentStagesDict[hid] = h - return(catchmentStagesDict,hucSet) + # huc set + hucSet = [str(i) for i in hydroTable.index.get_level_values('HUC').unique().to_list()] + return(catchmentStagesDict,hucSet) + + except AttributeError: + print (f"No matching feature IDs between forecast and hydrotable for HUC(s): {subset_hucs}") + return(None,None) + else: + print(f"All stream segments in HUC(s): {huc_error} are within lake boundaries.") + return(None,None) def __vprint(message,verbose): if verbose: diff --git a/tools/run_test_case.py b/tools/run_test_case.py index e3168a422..3b0f2ff1f 100755 --- a/tools/run_test_case.py +++ b/tools/run_test_case.py @@ -132,38 +132,41 @@ def run_alpha_test(fim_run_dir, version, test_id, magnitude, compare_to_previous # Run inundate. print("-----> Running inundate() to produce modeled inundation extent for the " + magnitude + " magnitude...") try: - inundate( + inundate_test = inundate( rem,catchments,catchment_poly,hydro_table,forecast,mask_type,hucs=hucs,hucs_layerName=hucs_layerName, subset_hucs=current_huc,num_workers=1,aggregate=False,inundation_raster=inundation_raster,inundation_polygon=None, depths=None,out_raster_profile=None,out_vector_profile=None,quiet=True ) - - print("-----> Inundation mapping complete.") - predicted_raster_path = os.path.join(os.path.split(inundation_raster)[0], os.path.split(inundation_raster)[1].replace('.tif', '_' + current_huc + '.tif')) # The inundate adds the huc to the name so I account for that here. - - # Define outputs for agreement_raster, stats_json, and stats_csv. - if benchmark_category in AHPS_BENCHMARK_CATEGORIES: - agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, lid + 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') - else: - agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') - - compute_contingency_stats_from_rasters(predicted_raster_path, - benchmark_raster_path, - agreement_raster, - stats_csv=stats_csv, - stats_json=stats_json, - mask_values=[], - stats_modes_list=stats_modes_list, - test_id=test_id, - mask_dict=mask_dict, - ) - - if benchmark_category in AHPS_BENCHMARK_CATEGORIES: - del mask_dict[ahps_lid] - - print(" ") - print("Evaluation complete. All metrics for " + test_id + ", " + version + ", " + magnitude + " are available at " + CYAN_BOLD + version_test_case_dir + ENDC) - print(" ") + if inundate_test == 0: + print("-----> Inundation mapping complete.") + predicted_raster_path = os.path.join(os.path.split(inundation_raster)[0], os.path.split(inundation_raster)[1].replace('.tif', '_' + current_huc + '.tif')) # The inundate adds the huc to the name so I account for that here. + + # Define outputs for agreement_raster, stats_json, and stats_csv. + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: + agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, lid + 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') + else: + agreement_raster, stats_json, stats_csv = os.path.join(version_test_case_dir, 'total_area_agreement.tif'), os.path.join(version_test_case_dir, 'stats.json'), os.path.join(version_test_case_dir, 'stats.csv') + + compute_contingency_stats_from_rasters(predicted_raster_path, + benchmark_raster_path, + agreement_raster, + stats_csv=stats_csv, + stats_json=stats_json, + mask_values=[], + stats_modes_list=stats_modes_list, + test_id=test_id, + mask_dict=mask_dict, + ) + + if benchmark_category in AHPS_BENCHMARK_CATEGORIES: + del mask_dict[ahps_lid] + + print(" ") + print("Evaluation complete. All metrics for " + test_id + ", " + version + ", " + magnitude + " are available at " + CYAN_BOLD + version_test_case_dir + ENDC) + print(" ") + elif inundate_test == 1: + print (f"No matching feature IDs between forecast and hydrotable for magnitude: {magnitude}") + return except Exception as e: print(e) From 9518cbe20383ebbff4d2dbfbf7e658ed112835e3 Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Fri, 30 Apr 2021 15:41:29 -0500 Subject: [PATCH 071/359] Refactor NHDPlus HR preprocessing workflow Refactor NHDPlus HR preprocessing workflow. - Consolidate NHD streams, NWM catchments, and headwaters MS and FR layers with mainstem column. - HUC8 intersections are included in the input headwaters layer. - clip_vectors_to_wbd.py removes incoming stream segment from the selected layers. This resolves #238. --- CHANGELOG.md | 10 + fim_run.sh | 15 +- src/add_crosswalk.py | 1 + src/adjust_headwater_streams.py | 237 ++++++++++------- src/aggregate_fim_outputs.py | 18 +- src/aggregate_vector_inputs.py | 435 +++++++++++++++---------------- src/agreedem.py | 2 - src/clip_vectors_to_wbd.py | 81 +++--- src/reduce_nhd_stream_density.py | 129 +++++---- src/run_by_unit.sh | 17 +- src/usgs_gage_crosswalk.py | 2 + src/utils/shared_functions.py | 1 + src/utils/shared_variables.py | 27 ++ tools/rating_curve_comparison.py | 1 + 14 files changed, 539 insertions(+), 437 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 82b0f0cd3..4c6e3ade4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +

+## v3.0.15.8 - 2021-04-29 - [PR #371](https://github.com/NOAA-OWP/cahaba/pull/371) + +Refactor NHDPlus HR preprocessing workflow. Resolves issue #238 + +## Changes +- Consolidate NHD streams, NWM catchments, and headwaters MS and FR layers with `mainstem` column. +- HUC8 intersections are included in the input headwaters layer. +- `clip_vectors_to_wbd.py` removes incoming stream segment from the selected layers. +

## v3.0.15.7 - 2021-04-28 - [PR #367](https://github.com/NOAA-OWP/cahaba/pull/367) diff --git a/fim_run.sh b/fim_run.sh index 8d1875e5f..2cfc744e2 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -111,16 +111,11 @@ logFile=$outputRunDataDir/logs/summary.log ## Define inputs export input_WBD_gdb=$inputDataDir/wbd/WBD_National.gpkg -export input_NWM_Lakes=$inputDataDir/nwm_hydrofabric/nwm_lakes.gpkg -export input_NWM_Catchments_fr=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg -export input_NWM_Catchments_ms=$inputDataDir/nwm_hydrofabric/nwm_catchments_ms.gpkg -export input_NWM_Flows_fr=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg -export input_NWM_Flows_ms=$inputDataDir/nwm_hydrofabric/nwm_flows_ms.gpkg -export input_NWM_Headwaters=$inputDataDir/nwm_hydrofabric/nwm_headwaters.gpkg -export input_nhd_flowlines_fr=$inputDataDir/nhdplus_vectors_aggregate/NHDPlusBurnLineEvent_fr_adjusted.gpkg -export input_nhd_flowlines_ms=$inputDataDir/nhdplus_vectors_aggregate/NHDPlusBurnLineEvent_ms_adjusted.gpkg -export input_nhd_headwaters_fr=$inputDataDir/nhdplus_vectors_aggregate/nhd_headwaters_adjusted_fr.gpkg -export input_nhd_headwaters_ms=$inputDataDir/nhdplus_vectors_aggregate/nhd_headwaters_adjusted_ms.gpkg +export input_nwm_lakes=$inputDataDir/nwm_hydrofabric/nwm_lakes.gpkg +export input_nwm_catchments=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg +export input_nwm_flows=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg +export input_nhd_flowlines=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_streams_adj.gpkg +export input_nhd_headwaters=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_headwaters_adj.gpkg ## Input handling ## $srcDir/check_huc_inputs.py -u "$hucList" diff --git a/src/add_crosswalk.py b/src/add_crosswalk.py index 2e7fbccbd..ef21ea23a 100755 --- a/src/add_crosswalk.py +++ b/src/add_crosswalk.py @@ -52,6 +52,7 @@ def add_crosswalk(input_catchments_fileName,input_flows_fileName,input_srcbase_f elif extent == 'MS': ## crosswalk using stream segment midpoint method input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc) + input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem==1] input_nwmcat = input_nwmcat.rename(columns={'ID':'feature_id'}) if input_nwmcat.feature_id.dtype != 'int': input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int) input_nwmcat=input_nwmcat.set_index('feature_id') diff --git a/src/adjust_headwater_streams.py b/src/adjust_headwater_streams.py index e08bf3352..71f73186e 100644 --- a/src/adjust_headwater_streams.py +++ b/src/adjust_headwater_streams.py @@ -3,8 +3,6 @@ import geopandas as gpd import pandas as pd import numpy as np -from os.path import splitext -from tqdm import tqdm import argparse import pygeos from shapely.geometry import Point,LineString @@ -12,131 +10,176 @@ from shapely.wkb import dumps, loads from utils.shared_variables import PREP_PROJECTION from utils.shared_functions import getDriver +import warnings +warnings.simplefilter("ignore") -def adjust_headwaters(huc,nhd_streams,headwaters,headwater_id): - # identify true headwater segments - if nhd_streams['headwaters_id'].dtype=='int': - nhd_streams_adj = nhd_streams.loc[(nhd_streams.headwaters_id > 0) & (nhd_streams.downstream_of_headwater == False),:].copy() - if headwaters[headwater_id].dtype != 'int': headwaters[headwater_id] = headwaters[headwater_id].astype(int) - else: - nhd_streams_adj = nhd_streams.loc[(nhd_streams.headwaters_id.notna()) & (nhd_streams.downstream_of_headwater == False),:].copy() +def adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id): + # Identify true headwater segments + nhd_streams_adj = nhd_streams.loc[(nhd_streams.headwaters_id > 0) & (nhd_streams.downstream_of_headwater == False),:].copy() nhd_streams_adj = nhd_streams_adj.explode() nhd_streams_adj = nhd_streams_adj.reset_index(drop=True) - headwater_limited = headwaters.merge(nhd_streams_adj["headwaters_id"],left_on=headwater_id, right_on="headwaters_id",how='right') + if nwm_headwaters["site_id"].dtype != 'int': nwm_headwaters["site_id"] = nwm_headwaters["site_id"].astype(int) + headwater_limited = nwm_headwaters.merge(nhd_streams_adj[["headwaters_id","mainstem"]],left_on="site_id", right_on="headwaters_id",how='right') + headwater_limited = headwater_limited.drop(columns=['headwaters_id']) + + nws_lid_limited = nws_lids.merge(nhd_streams[["nws_lid"]],left_on="site_id", right_on="nws_lid",how='right') + nws_lid_limited = nws_lid_limited.loc[nws_lid_limited.nws_lid!=''] + nws_lid_limited = nws_lid_limited.drop(columns=['nws_lid']) + + # Check for issues in nws_lid layer + if len(nws_lid_limited) < len(nws_lids): + missing_nws_lids = list(set(nws_lids.site_id) - set(nws_lid_limited.site_id)) + print (f"nws lid(s) {missing_nws_lids} missing from aggregate dataset in huc {huc}") + + # Combine NWM headwaters and AHPS sites to be snapped to NHDPlus HR segments + headwater_pts = headwater_limited.append(nws_lid_limited) + headwater_pts = headwater_pts.reset_index(drop=True) + + if headwater_pts is not None: + + headwaterstreams = [] + referencedpoints = [] + snapped_ahps = [] + nws_lid = [] + for index, point in headwater_pts.iterrows(): + + # Convert headwaterpoint geometries to WKB representation + wkb_points = dumps(point.geometry) + + # Create pygeos headwaterpoint geometries from WKB representation + pointbin_geom = pygeos.io.from_wkb(wkb_points) + + if point.pt_type == 'nwm_headwater': + # Closest segment to headwater + closest_stream = nhd_streams_adj.loc[nhd_streams_adj["headwaters_id"]==point[headwater_id]] + else: + # Closest segment to ahps site + closest_stream = nhd_streams.loc[nhd_streams["nws_lid"]==point[headwater_id]] + + try: # Seeing inconsistent geometry objects even after exploding nhd_streams_adj; not sure why this is + closest_stream =closest_stream.explode() + except: + pass + + try: + wkb_closest_stream = dumps(closest_stream.geometry[0]) + except: + wkb_closest_stream = dumps(closest_stream.geometry[0][0]) + + streambin_geom = pygeos.io.from_wkb(wkb_closest_stream) + + # Linear reference headwater to closest stream segment + pointdistancetoline = pygeos.linear.line_locate_point(streambin_geom, pointbin_geom) + referencedpoint = pygeos.linear.line_interpolate_point(streambin_geom, pointdistancetoline) + + # Convert geometries to wkb representation + bin_referencedpoint = pygeos.io.to_wkb(referencedpoint) + + # Convert to shapely geometries + shply_referencedpoint = loads(bin_referencedpoint) + shply_linestring = loads(wkb_closest_stream) + headpoint = Point(shply_referencedpoint.coords) + + if point.pt_type == 'nwm_headwater': + cumulative_line = [] + relativedistlst = [] + + # Collect all nhd stream segment linestring verticies + for point in zip(*shply_linestring.coords.xy): + cumulative_line = cumulative_line + [point] + relativedist = shply_linestring.project(Point(point)) + relativedistlst = relativedistlst + [relativedist] + + # Add linear referenced headwater point to closest nhd stream segment + if not headpoint in cumulative_line: + cumulative_line = cumulative_line + [headpoint] + relativedist = shply_linestring.project(headpoint) + relativedistlst = relativedistlst + [relativedist] + + # Sort by relative line distance to place headwater point in linestring + sortline = pd.DataFrame({'geom' : cumulative_line, 'dist' : relativedistlst}).sort_values('dist') + shply_linestring = LineString(sortline.geom.tolist()) + referencedpoints = referencedpoints + [headpoint] + + # Split the new linestring at the new headwater point + try: + line1,line2 = split(shply_linestring, headpoint) + headwaterstreams = headwaterstreams + [LineString(line1)] + nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1) + except: + line1 = split(shply_linestring, headpoint) + headwaterstreams = headwaterstreams + [LineString(line1[0])] + nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1[0]) + + try: + del cumulative_line, relativedistlst + except: + print (f"issue deleting adjusted stream variables for huc {huc}") + + else: + snapped_ahps = snapped_ahps + [headpoint] + nws_lid = nws_lid + [point[headwater_id]] + + nhd_streams = nhd_streams.drop(columns=['is_relevant_stream', 'headwaters_id', 'downstream_of_headwater']) - headwaterstreams = [] - referencedpoints = [] - - for index, point in headwater_limited.iterrows(): - - # convert headwaterpoint geometries to WKB representation - wkb_points = dumps(point.geometry) - - # create pygeos headwaterpoint geometries from WKB representation - pointbin_geom = pygeos.io.from_wkb(wkb_points) - - # Closest segment to headwater - closest_stream = nhd_streams_adj.loc[nhd_streams_adj["headwaters_id"]==point[headwater_id]] - - try: # seeing inconsistent geometry objects even after exploding nhd_streams_adj; not sure why this is - closest_stream =closest_stream.explode() - except: - pass try: - wkb_closest_stream = dumps(closest_stream.geometry[0]) + del nhd_streams_adj, headwater_limited, referencedpoints, headwaterstreams except: - wkb_closest_stream = dumps(closest_stream.geometry[0][0]) - - streambin_geom = pygeos.io.from_wkb(wkb_closest_stream) - - # Linear reference headwater to closest stream segment - pointdistancetoline = pygeos.linear.line_locate_point(streambin_geom, pointbin_geom) - referencedpoint = pygeos.linear.line_interpolate_point(streambin_geom, pointdistancetoline) - - # convert geometries to wkb representation - bin_referencedpoint = pygeos.io.to_wkb(referencedpoint) - - # convert to shapely geometries - shply_referencedpoint = loads(bin_referencedpoint) - shply_linestring = loads(wkb_closest_stream) - headpoint = Point(shply_referencedpoint.coords) - cumulative_line = [] - relativedistlst = [] - - # collect all nhd stream segment linestring verticies - for point in zip(*shply_linestring.coords.xy): - cumulative_line = cumulative_line + [point] - relativedist = shply_linestring.project(Point(point)) - relativedistlst = relativedistlst + [relativedist] - - # add linear referenced headwater point to closest nhd stream segment - if not headpoint in cumulative_line: - cumulative_line = cumulative_line + [headpoint] - relativedist = shply_linestring.project(headpoint) - relativedistlst = relativedistlst + [relativedist] - - # sort by relative line distance to place headwater point in linestring - sortline = pd.DataFrame({'geom' : cumulative_line, 'dist' : relativedistlst}).sort_values('dist') - shply_linestring = LineString(sortline.geom.tolist()) - referencedpoints = referencedpoints + [headpoint] - - # split the new linestring at the new headwater point - try: - line1,line2 = split(shply_linestring, headpoint) - headwaterstreams = headwaterstreams + [LineString(line1)] - nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1) - except: - line1 = split(shply_linestring, headpoint) - headwaterstreams = headwaterstreams + [LineString(line1[0])] - nhd_streams.loc[nhd_streams.NHDPlusID==closest_stream.NHDPlusID.values[0],'geometry'] = LineString(line1[0]) + print (f"issue deleting adjusted stream variables for huc {huc}") + + # Create snapped ahps sites + if len(snapped_ahps) > 0: + snapped_ahps_points = gpd.GeoDataFrame({'pt_type': 'nws_lid', headwater_id: nws_lid, 'mainstem': True, + 'geometry': snapped_ahps},geometry='geometry',crs=PREP_PROJECTION) - nhd_streams = nhd_streams.drop(columns=['is_relevant_stream', 'headwaters_id', 'downstream_of_headwater']) + # Identify ajusted nhd headwaters + nhd_headwater_streams_adj = nhd_streams.loc[nhd_streams['is_headwater'],:] + nhd_headwater_streams_adj = nhd_headwater_streams_adj.explode() - try: - del nhd_streams_adj, headwaters, headwater_limited, headwaterstreams, referencedpoints, cumulative_line, relativedistlst - except: - print ('issue deleting adjusted stream variables for huc ' + str(huc)) + hw_points = np.zeros(len(nhd_headwater_streams_adj),dtype=object) + for index,lineString in enumerate(nhd_headwater_streams_adj.geometry): + hw_point = [point for point in zip(*lineString.coords.xy)][-1] + hw_points[index] = Point(*hw_point) - ## identify ajusted nhd headwaters - # print('Identify NHD headwater points',flush=True) - nhd_headwater_streams_adj = nhd_streams.loc[nhd_streams['is_headwater'],:] - nhd_headwater_streams_adj = nhd_headwater_streams_adj.explode() - hw_points = np.zeros(len(nhd_headwater_streams_adj),dtype=object) - for index,lineString in enumerate(nhd_headwater_streams_adj.geometry): - hw_point = [point for point in zip(*lineString.coords.xy)][-1] - hw_points[index] = Point(*hw_point) + nhd_headwater_points_adj = gpd.GeoDataFrame({'pt_type': 'NHDPlusID', headwater_id: nhd_headwater_streams_adj['NHDPlusID'], + 'mainstem': False, 'geometry': hw_points},geometry='geometry',crs=PREP_PROJECTION) - nhd_headwater_points_adj = gpd.GeoDataFrame({'NHDPlusID' : nhd_headwater_streams_adj['NHDPlusID'], - 'geometry' : hw_points},geometry='geometry',crs=PREP_PROJECTION) + nhd_headwater_points_adj = nhd_headwater_points_adj.reset_index(drop=True) - del nhd_headwater_streams_adj + del nhd_headwater_streams_adj + + try: + combined_pts = snapped_ahps_points.append(nhd_headwater_points_adj) + except: + combined_pts = nhd_headwater_points_adj.copy() - return(nhd_streams, nhd_headwater_points_adj) + return nhd_streams, combined_pts if __name__ == '__main__': parser = argparse.ArgumentParser(description='adjust headwater stream geometery based on headwater start points') parser.add_argument('-f','--huc',help='huc number',required=True) parser.add_argument('-l','--nhd-streams',help='NHDPlus HR geodataframe',required=True) - parser.add_argument('-p','--headwaters',help='Headwater points layer',required=True,type=str) + parser.add_argument('-p','--nwm-headwaters',help='Headwater points layer',required=True,type=str) parser.add_argument('-s','--subset-nhd-streams-fileName',help='Output streams layer name',required=False,type=str,default=None) - parser.add_argument('-s','--adj-headwater-points-fileName',help='Output adj headwater points layer name',required=False,type=str,default=None) + parser.add_argument('-a','--adj-headwater-points-fileName',help='Output adj headwater points layer name',required=False,type=str,default=None) parser.add_argument('-g','--headwater-points-fileName',help='Output headwater points layer name',required=False,type=str,default=None) - parser.add_argument('-i','--headwater-id',help='Output headwaters points',required=True) + parser.add_argument('-b','--nws-lids',help='NWS lid points',required=True) + parser.add_argument('-i','--headwater-id',help='Headwater id column name',required=True) args = vars(parser.parse_args()) - adj_streams_gdf,adj_headwaters_gdf = adjust_headwaters(huc,nhd_streams,headwaters,headwater_id) + adj_streams_gdf, adj_headwaters_gdf = adjust_headwaters(huc,nhd_streams,nwm_headwaters,nws_lids,headwater_id) if subset_nhd_streams_fileName is not None: - adj_streams_gdf.to_file(args['subset_nhd_streams_fileName'],driver=getDriver(args['subset_nhd_streams_fileName']),index=False) + adj_streams_gdf.to_file(args['subset_nhd_streams_fileName'],driver=getDriver(args['subset_nhd_streams_fileName'])) if headwater_points_fileName is not None: - headwater_points_fileName.to_file(args['headwater_points_fileName'],driver=getDriver(args['headwater_points_fileName']),index=False) + headwater_points_fileName.to_file(args['headwater_points_fileName'],driver=getDriver(args['headwater_points_fileName'])) if adj_headwater_points_fileName is not None: - adj_headwaters_gdf.to_file(args['adj_headwater_points_fileName'],driver=getDriver(args['adj_headwater_points_fileName']),index=False) + adj_headwaters_gdf.to_file(args['adj_headwater_points_fileName'],driver=getDriver(args['adj_headwater_points_fileName'])) diff --git a/src/aggregate_fim_outputs.py b/src/aggregate_fim_outputs.py index d6fdcf698..bf1a8d09c 100644 --- a/src/aggregate_fim_outputs.py +++ b/src/aggregate_fim_outputs.py @@ -88,8 +88,8 @@ def aggregate_fim_outputs(args): ## aggregate rasters # aggregate file paths - rem_mosaic = os.path.join(huc6_dir,f'hand_grid_{huc6}_unprj.tif') - catchment_mosaic = os.path.join(huc6_dir,f'catchments_{huc6}_unprj.tif') + rem_mosaic = os.path.join(huc6_dir,f'hand_grid_{huc6}_prepprj.tif') + catchment_mosaic = os.path.join(huc6_dir,f'catchments_{huc6}_prepprj.tif') if huc6 not in huc_list: @@ -155,28 +155,28 @@ def aggregate_fim_outputs(args): shutil.copy(catchment_filename, catchment_mosaic) ## reproject rasters - reproject_raster(rem_mosaic) + reproject_raster(rem_mosaic,VIZ_PROJECTION) os.remove(rem_mosaic) - reproject_raster(catchment_mosaic) + reproject_raster(catchment_mosaic,VIZ_PROJECTION) os.remove(catchment_mosaic) -def reproject_raster(raster_name): +def reproject_raster(raster_name,reprojection): with rasterio.open(raster_name) as src: transform, width, height = calculate_default_transform( - src.crs, VIZ_PROJECTION, src.width, src.height, *src.bounds) + src.crs, reprojection, src.width, src.height, *src.bounds) kwargs = src.meta.copy() kwargs.update({ - 'crs': VIZ_PROJECTION, + 'crs': reprojection, 'transform': transform, 'width': width, 'height': height, 'compress': 'lzw' }) - raster_proj_rename = os.path.split(raster_name)[1].replace('_unprj.tif', '.tif') + raster_proj_rename = os.path.split(raster_name)[1].replace('_prepprj.tif', '.tif') raster_proj_dir = os.path.join(os.path.dirname(raster_name), raster_proj_rename) with rasterio.open(raster_proj_dir, 'w', **kwargs, tiled=True, blockxsize=1024, blockysize=1024, BIGTIFF='YES') as dst: @@ -187,7 +187,7 @@ def reproject_raster(raster_name): src_transform=src.transform, src_crs=src.crs, dst_transform=transform, - dst_crs=VIZ_PROJECTION, + dst_crs=reprojection, resampling=Resampling.nearest) del src, dst diff --git a/src/aggregate_vector_inputs.py b/src/aggregate_vector_inputs.py index eb4d3e4f5..2c1081989 100755 --- a/src/aggregate_vector_inputs.py +++ b/src/aggregate_vector_inputs.py @@ -1,57 +1,55 @@ #!/usr/bin/env python3 import os +import sys +# sys.path.append('/foss_fim/src') import geopandas as gpd from utils.shared_variables import PREP_PROJECTION from utils.shared_functions import getDriver from derive_headwaters import findHeadWaterPoints from reduce_nhd_stream_density import subset_nhd_network from adjust_headwater_streams import adjust_headwaters -from tqdm import tqdm -from os.path import splitext from shapely.geometry import Point -from concurrent.futures import ProcessPoolExecutor,as_completed +from concurrent.futures import ProcessPoolExecutor from collections import deque import numpy as np from shapely.wkb import dumps, loads import pygeos -in_dir ='data/inputs/nhdplus_vectors' -nwm_dir = 'data/inputs/nwm_hydrofabric' -wbd_dir = 'data/inputs/wbd' -ahps_dir = 'data/inputs/ahp_sites' -agg_dir = 'data/inputs/nhdplus_vectors_aggregate' - -wbd_filename = os.path.join(wbd_dir, 'WBD_National.gpkg') -nwm_streams_fr_filename = os.path.join(nwm_dir,'nwm_flows.gpkg') -nwm_streams_ms_filename = os.path.join(nwm_dir,'nwm_flows_ms.gpkg') -nwm_headwaters_filename = os.path.join(nwm_dir,'nwm_headwaters.gpkg') -nwm_huc4_intersections_ms_filename = os.path.join(nwm_dir,'nwm_ms_huc4_intersections.gpkg') -nwm_huc4_intersections_fr_filename = os.path.join(nwm_dir,'nwm_fr_huc4_intersections.gpkg') - -def subset_nwm_ms_streams(args): - nwm_streams_filename = args[0] - in_dir = args[1] - ahps_dir = args[2] - output_filename = args[3] - - # subset nwm network to ms - ahps_headwaters_filename = os.path.join(ahps_dir,'bed_lids.gpkg') - ahps_headwaters = gpd.read_file(ahps_headwaters_filename) +nhdplus_vectors_dir = os.environ.get('nhdplus_vectors_dir') +wbd_filename = os.environ.get('wbd_filename') +nwm_streams_orig_filename = os.environ.get('nwm_streams_orig_filename') +nwm_streams_all_filename = os.environ.get('nwm_streams_all_filename') +nwm_headwaters_filename = os.environ.get('nwm_headwaters_filename') +nwm_catchments_orig_filename = os.environ.get('nwm_catchments_orig_filename') +nwm_catchments_all_filename = os.environ.get('nwm_catchments_all_filename') +ahps_filename = os.environ.get('ahps_filename') +nwm_huc4_intersections_filename = os.environ.get('nwm_huc4_intersections_filename') +nhd_huc8_intersections_filename = os.environ.get('nhd_huc8_intersections_filename') +agg_nhd_headwaters_adj_fileName = os.environ['agg_nhd_headwaters_adj_fileName'] +agg_nhd_streams_adj_fileName = os.environ['agg_nhd_streams_adj_fileName'] + + +def identify_nwm_ms_streams(nwm_streams_filename,ahps_filename,nwm_streams_all_filename): + + # Subset nwm network to ms + ahps_headwaters = gpd.read_file(ahps_filename) nwm_streams = gpd.read_file(nwm_streams_filename) + # Remove mainstem column if it already exists + nwm_streams = nwm_streams.drop(['mainstem'], axis=1, errors='ignore') + nwm_streams['is_headwater'] = False - nwm_streams['downstream_of_headwater'] = False nwm_streams.loc[nwm_streams.ID.isin(list(ahps_headwaters.nwm_featur)),'is_headwater'] = True - ## subset NHDPlus HR + # Subset NHDPlus HR nwm_streams['is_relevant_stream'] = nwm_streams['is_headwater'].copy() nwm_streams = nwm_streams.explode() - # trace down from headwaters + # Trace down from headwaters nwm_streams.set_index('ID',inplace=True,drop=False) Q = deque(nwm_streams.loc[nwm_streams['is_headwater'],'ID'].tolist()) @@ -61,69 +59,100 @@ def subset_nwm_ms_streams(args): q = Q.popleft() if q in visited: continue - # + visited.add(q) toNode = nwm_streams.loc[q,'to'] - # + if not toNode == 0: - # + nwm_streams.loc[nwm_streams.ID==toNode,'is_relevant_stream'] = True - # + if toNode not in visited: Q.append(toNode) - nwm_streams = nwm_streams.loc[nwm_streams['is_relevant_stream'],:] + nwm_streams_ms = nwm_streams.loc[nwm_streams['is_relevant_stream'],:] + ms_segments = nwm_streams_ms.ID.to_list() nwm_streams.reset_index(drop=True,inplace=True) - nwm_streams.to_file(output_filename,driver=getDriver(output_filename),index=False) + # Add column to FR nwm layer to indicate MS segments + nwm_streams['mainstem'] = np.where(nwm_streams.ID.isin(ms_segments), 1, 0) + + nwm_streams = nwm_streams.drop(['is_relevant_stream','is_headwater'], axis=1, errors='ignore') -def find_nwm_incoming_streams(args): + nwm_streams.to_file(nwm_streams_all_filename,driver=getDriver(nwm_streams_all_filename),index=False,layer='nwm_streams') - nwm_streams_filename = args[0] - wbd_filename = args[1] - in_dir = args[2] - output_filename = args[3] + return ms_segments - wbd = gpd.read_file(wbd_filename, layer='WBDHU4') + +def find_nwm_incoming_streams(nwm_streams_,wbd,huc_unit): + + # Input wbd + if isinstance(wbd,str): + layer = f"WBDHU{huc_unit}" + wbd = gpd.read_file(wbd, layer=layer) + elif isinstance(wbd,gpd.GeoDataFrame): + pass + else: + raise TypeError("Pass dataframe or filepath for wbd") intersecting_points = [] - for index, row in tqdm(wbd.iterrows(),total=len(wbd)): - col_name = 'HUC4' - huc = row[col_name] + nhdplus_ids = [] + mainstem_flag = [] + print (f"iterating through {len(wbd)} hucs") + for index, row in wbd.iterrows(): + col_name = f"HUC{huc_unit}" + huc = row[col_name] huc_mask = wbd.loc[wbd[col_name]==str(huc)] huc_mask = huc_mask.explode() huc_mask = huc_mask.reset_index(drop=True) - nwm_streams = gpd.read_file(nwm_streams_filename, mask=huc_mask) + # Input nwm streams + if isinstance(nwm_streams_,str): + nwm_streams = gpd.read_file(nwm_streams_, mask=huc_mask) + elif isinstance(nwm_streams_,gpd.GeoDataFrame): + nwm_streams = nwm_streams_.copy() + else: + raise TypeError("Pass dataframe or filepath for nwm streams") + nwm_streams = nwm_streams.explode() nwm_streams = nwm_streams.reset_index(drop=True) for index, polygon in enumerate(huc_mask.geometry): + crosses=nwm_streams.crosses(polygon.exterior) nwm_streams_subset =nwm_streams[crosses] nwm_streams_subset = nwm_streams_subset.reset_index(drop=True) - for index, linestring in enumerate(nwm_streams_subset.geometry): + for index, segment in nwm_streams_subset.iterrows(): distances = [] - # distance to each stream segment + + try: + nhdplus_id = segment.ID + except: + nhdplus_id = segment.NHDPlusID + + linestring = segment.geometry + mainstem = segment.mainstem + + # Distance to each stream segment for point in zip(*linestring.coords.xy): distance = Point(point).distance(polygon.exterior) distances = distances + [distance] - # find minimum distance + # Find minimum distance min_index = np.argmin(distances) # Closest segment to headwater closest_point = list(linestring.coords)[min_index] last_node = Point(closest_point) - # convert geometries to WKB representation + # Convert geometries to WKB representation wkb_point = dumps(last_node) wkb_poly = dumps(polygon.exterior) - # create pygeos geometries from WKB representation + # Create pygeos geometries from WKB representation stream_point_geom = pygeos.io.from_wkb(wkb_point) polybin_geom = pygeos.io.from_wkb(wkb_poly) @@ -131,32 +160,35 @@ def find_nwm_incoming_streams(args): pointdistancetoline = pygeos.linear.line_locate_point(polybin_geom,stream_point_geom) referencedpoint = pygeos.linear.line_interpolate_point(polybin_geom, pointdistancetoline) - # convert geometries to wkb representation + # Convert geometries to wkb representation bin_referencedpoint = pygeos.io.to_wkb(referencedpoint) - # convert to shapely geometries + # Convert to shapely geometries shply_referencedpoint = loads(bin_referencedpoint) - # collect all nhd stream segment linestring verticies + # Collect all nhd stream segment linestring verticies intersecting_points = intersecting_points + [shply_referencedpoint] + nhdplus_ids = nhdplus_ids + [nhdplus_id] + mainstem_flag = mainstem_flag + [mainstem] + + del huc_mask - huc_intersection = gpd.GeoDataFrame({'geometry' : intersecting_points},crs=nwm_streams.crs,geometry='geometry') + huc_intersection = gpd.GeoDataFrame({'geometry': intersecting_points, 'NHDPlusID': nhdplus_ids,'mainstem': mainstem_flag},crs=nwm_streams.crs,geometry='geometry') huc_intersection = huc_intersection.drop_duplicates() - huc_intersection.to_file(output_filename,driver=getDriver(output_filename)) + del nwm_streams,wbd + return huc_intersection -def collect_stream_attributes(args, huc): - print ('Starting huc: ' + str(huc)) - in_dir = args[0] - nwm_dir = args[1] - ahps_dir = args[2] - print ('Collecting NHDPlus HR attributes') - burnline_filename = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg') - vaa_filename = os.path.join(in_dir,huc,'NHDPlusFlowLineVAA' + str(huc) + '.gpkg') - flowline_filename = os.path.join(in_dir,huc,'NHDFlowline' + str(huc) + '.gpkg') +def collect_stream_attributes(nhdplus_vectors_dir, huc): + + print ('Starting huc: ' + str(huc)) + # Collecting NHDPlus HR attributes + burnline_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg') + vaa_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusFlowLineVAA' + str(huc) + '.gpkg') + flowline_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDFlowline' + str(huc) + '.gpkg') - if os.path.exists(os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg')): + if os.path.exists(os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '.gpkg')): burnline = gpd.read_file(burnline_filename) burnline = burnline[['NHDPlusID','ReachCode','geometry']] @@ -177,8 +209,8 @@ def collect_stream_attributes(args, huc): nhd_streams = nhd_streams.loc[nhd_streams.geometry!=None,:] # special case: remove segments without geometries nhd_streams['HUC4'] = str(huc) - # write out NHDPlus HR aggregated - nhd_streams_agg_fileName = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') + # Write out NHDPlus HR aggregated + nhd_streams_agg_fileName = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') nhd_streams.to_file(nhd_streams_agg_fileName,driver=getDriver(nhd_streams_agg_fileName),index=False) del nhd_streams @@ -187,226 +219,185 @@ def collect_stream_attributes(args, huc): else: print ('missing data for huc ' + str(huc)) + def subset_stream_networks(args, huc): - nwm_dir = args[0] - ahps_dir = args[1] + nwm_headwaters_filename = args[0] + ahps_filename = args[1] wbd4 = args[2] wbd8 = args[3] - in_dir = args[4] - nwm_huc4_intersect_fr_filename = args[5] - nwm_huc4_intersect_ms_filename = args[6] + nhdplus_vectors_dir = args[4] + nwm_huc4_intersections_filename = args[5] print("starting HUC " + str(huc),flush=True) nwm_headwater_id = 'ID' - nwm_headwaters_filename = os.path.join(nwm_dir,'nwm_headwaters.gpkg') ahps_headwater_id = 'nws_lid' - ahps_headwaters_filename = os.path.join(ahps_dir,'nws_lid.gpkg') - nhd_streams_filename = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') + headwater_pts_id = 'site_id' + column_order = ['pt_type', headwater_pts_id, 'mainstem', 'geometry'] + nhd_streams_filename = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') - # subset to reduce footprint - selected_wbd4 = wbd4.loc[wbd4.HUC4.str.startswith(str(huc))] + # Subset to reduce footprint + selected_wbd4 = wbd4.loc[wbd4.HUC4.str.startswith(huc)] del wbd4 selected_wbd8 = wbd8.loc[wbd8.HUC8.str.startswith(huc)] del wbd8 - huc_mask = selected_wbd4.loc[selected_wbd4.HUC4.str.startswith(str(huc))] + huc_mask = selected_wbd4.loc[selected_wbd4.HUC4.str.startswith(huc)] huc_mask = huc_mask.explode() huc_mask = huc_mask.reset_index(drop=True) if len(selected_wbd8.HUC8) > 0: selected_wbd8 = selected_wbd8.reset_index(drop=True) - # identify FR/NWM headwaters - nhd_streams_fr = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersect_fr_filename) - - ## adjust FR/NWM headwater segments - nwm_headwaters = gpd.read_file(nwm_headwaters_filename, mask=huc_mask) - - if len(nwm_headwaters) > 0: - - adj_nhd_streams_fr, adj_nhd_headwater_points_fr = adjust_headwaters(str(huc),nhd_streams_fr,nwm_headwaters,nwm_headwater_id) + # Identify FR/NWM headwaters and subset HR network + nhd_streams_fr = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,nwm_headwaters_filename,nwm_headwater_id,nwm_huc4_intersections_filename) - nhd_streams_fr_adjusted_fileName=os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') - adj_nhd_headwaters_fr_fileName=os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') + # Identify nhd mainstem streams + nhd_streams_all = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_fr,ahps_filename,ahps_headwater_id,nwm_huc4_intersections_filename,True) - # write out FR adjusted - adj_nhd_streams_fr.to_file(nhd_streams_fr_adjusted_fileName,driver=getDriver(nhd_streams_fr_adjusted_fileName),index=False) - adj_nhd_headwater_points_fr.to_file(adj_nhd_headwaters_fr_fileName,driver=getDriver(adj_nhd_headwaters_fr_fileName),index=False) - - del adj_nhd_streams_fr, adj_nhd_headwater_points_fr - else: - print ('skipping FR headwater adjustments for HUC: ' + str(huc)) + # Identify HUC8 intersection points + nhd_huc8_intersections = find_nwm_incoming_streams(nhd_streams_all,selected_wbd8,8) - del nhd_streams_fr - - ## identify MS/AHPs headwaters - nhd_streams_ms = subset_nhd_network(huc,huc_mask,selected_wbd8,nhd_streams_filename,ahps_headwaters_filename,ahps_headwater_id,nwm_huc4_intersect_ms_filename) + # Load nwm headwaters + nwm_headwaters = gpd.read_file(nwm_headwaters_filename, mask=huc_mask) + nwm_headwaters['pt_type'] = 'nwm_headwater' + nwm_headwaters = nwm_headwaters.rename(columns={"ID": headwater_pts_id}) - ## adjust MS/AHPs headwater segments - ahps_headwaters = gpd.read_file(ahps_headwaters_filename, mask=huc_mask) + # Load nws lids + nws_lids = gpd.read_file(ahps_filename, mask=huc_mask) + nws_lids = nws_lids.drop(columns=['name','nwm_featur']) + nws_lids = nws_lids.rename(columns={"nws_lid": headwater_pts_id}) + nws_lids['pt_type'] = 'nws_lid' + nws_lids['mainstem'] = True - if len(ahps_headwaters) > 0: + if (len(nwm_headwaters) > 0) or (len(nws_lids) > 0): + # Adjust FR/NWM headwater segments + adj_nhd_streams_all, adj_nhd_headwater_points = adjust_headwaters(huc,nhd_streams_all,nwm_headwaters,nws_lids,headwater_pts_id) - adj_nhd_streams_ms, adj_nhd_headwater_points_ms = adjust_headwaters(str(huc),nhd_streams_ms,ahps_headwaters,ahps_headwater_id) + adj_nhd_headwater_points = adj_nhd_headwater_points[column_order] - nhd_streams_ms_adjusted_fileName=os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') - adj_nhd_headwaters_ms_fileName=os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') + nhd_huc8_intersections['pt_type'] = 'nhd_huc8_intersections' + nhd_huc8_intersections = nhd_huc8_intersections.rename(columns={"NHDPlusID": headwater_pts_id}) + nhd_huc8_intersections = nhd_huc8_intersections[column_order] + adj_nhd_headwater_points_all = adj_nhd_headwater_points.append(nhd_huc8_intersections) + adj_nhd_headwater_points_all = adj_nhd_headwater_points_all.reset_index(drop=True) - # write out MS adjusted - adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,driver=getDriver(nhd_streams_ms_adjusted_fileName),index=False) - adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,driver=getDriver(adj_nhd_headwaters_ms_fileName),index=False) + adj_nhd_streams_all_fileName = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') + adj_nhd_headwaters_all_fileName = os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adj.gpkg') - del adj_nhd_streams_ms, adj_nhd_headwater_points_ms + # Write out FR adjusted + adj_nhd_streams_all.to_file(adj_nhd_streams_all_fileName,driver=getDriver(adj_nhd_streams_all_fileName),index=False) + adj_nhd_headwater_points_all.to_file(adj_nhd_headwaters_all_fileName,driver=getDriver(adj_nhd_headwaters_all_fileName),index=False) + del adj_nhd_streams_all, adj_nhd_headwater_points_all else: - print ('skipping MS headwater adjustments for HUC: ' + str(huc)) - del nhd_streams_ms + print (f"skipping headwater adjustments for HUC: {huc}") -def aggregate_stream_networks(in_dir,agg_dir, huc_list): - - for huc in huc_list: + del nhd_streams_fr - ## FR adjusted - adj_nhd_headwaters_fr_fileName=os.path.join(agg_dir,'nhd_headwaters_adjusted_fr.gpkg') - nhd_fr_adj_huc_subset = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') - nhd_streams_fr_adjusted_fileName=os.path.join(agg_dir,'NHDPlusBurnLineEvent_fr_adjusted.gpkg') - nhd_fr_adj_headwaters_subset = os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') - if os.path.isfile(nhd_fr_adj_huc_subset): - adj_nhd_streams_fr = gpd.read_file(nhd_fr_adj_huc_subset) +def aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileName,agg_nhd_streams_adj_fileName,huc_list): - # write out FR adjusted - if os.path.isfile(nhd_streams_fr_adjusted_fileName): - adj_nhd_streams_fr.to_file(nhd_streams_fr_adjusted_fileName,driver=getDriver(nhd_streams_fr_adjusted_fileName),index=False, mode='a') - else: - adj_nhd_streams_fr.to_file(nhd_streams_fr_adjusted_fileName,driver=getDriver(nhd_streams_fr_adjusted_fileName),index=False) + for huc in huc_list: - del adj_nhd_streams_fr + # aggregated final filenames + nhd_agg_adj_huc_subset = os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') + nhd_agg_adj_headwaters_subset = os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adj.gpkg') - if os.path.isfile(nhd_fr_adj_headwaters_subset): - adj_nhd_headwater_points_fr = gpd.read_file(nhd_fr_adj_headwaters_subset) + if os.path.isfile(nhd_agg_adj_huc_subset): + adj_nhd_streams_all = gpd.read_file(nhd_agg_adj_huc_subset) - # write out FR adjusted - if os.path.isfile(adj_nhd_headwaters_fr_fileName): - adj_nhd_headwater_points_fr.to_file(adj_nhd_headwaters_fr_fileName,driver=getDriver(adj_nhd_headwaters_fr_fileName),index=False, mode='a') + # Write out FR adjusted + if os.path.isfile(agg_nhd_streams_adj_fileName): + adj_nhd_streams_all.to_file(agg_nhd_streams_adj_fileName,driver=getDriver(agg_nhd_streams_adj_fileName),index=False, mode='a') else: - adj_nhd_headwater_points_fr.to_file(adj_nhd_headwaters_fr_fileName,driver=getDriver(adj_nhd_headwaters_fr_fileName),index=False) - - del adj_nhd_headwater_points_fr + adj_nhd_streams_all.to_file(agg_nhd_streams_adj_fileName,driver=getDriver(agg_nhd_streams_adj_fileName),index=False) - ## MS adjusted - adj_nhd_headwaters_ms_fileName=os.path.join(agg_dir,'nhd_headwaters_adjusted_ms.gpkg') - nhd_ms_adj_huc_subset = os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') - nhd_streams_ms_adjusted_fileName=os.path.join(agg_dir,'NHDPlusBurnLineEvent_ms_adjusted.gpkg') - nhd_ms_adj_headwater_subset = os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') + del adj_nhd_streams_all - if os.path.isfile(nhd_ms_adj_huc_subset): - adj_nhd_streams_ms = gpd.read_file(nhd_ms_adj_huc_subset) + if os.path.isfile(nhd_agg_adj_headwaters_subset): + adj_nhd_headwater_points_all = gpd.read_file(nhd_agg_adj_headwaters_subset) - # write out ms adjusted - if os.path.isfile(nhd_streams_ms_adjusted_fileName): - adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,driver=getDriver(nhd_streams_ms_adjusted_fileName),index=False, mode='a') + # Write out FR adjusted + if os.path.isfile(agg_nhd_headwaters_adj_fileName): + adj_nhd_headwater_points_all.to_file(agg_nhd_headwaters_adj_fileName,driver=getDriver(agg_nhd_headwaters_adj_fileName),index=False, mode='a') else: - adj_nhd_streams_ms.to_file(nhd_streams_ms_adjusted_fileName,driver=getDriver(nhd_streams_ms_adjusted_fileName),index=False) + adj_nhd_headwater_points_all.to_file(agg_nhd_headwaters_adj_fileName,driver=getDriver(agg_nhd_headwaters_adj_fileName),index=False) - del adj_nhd_streams_ms + del adj_nhd_headwater_points_all - if os.path.isfile(nhd_ms_adj_headwater_subset): - adj_nhd_headwater_points_ms = gpd.read_file(nhd_ms_adj_headwater_subset) - # write out ms adjusted - if os.path.isfile(adj_nhd_headwaters_ms_fileName): - adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,driver=getDriver(adj_nhd_headwaters_ms_fileName),index=False, mode='a') - else: - adj_nhd_headwater_points_ms.to_file(adj_nhd_headwaters_ms_fileName,driver=getDriver(adj_nhd_headwaters_ms_fileName),index=False) +def clean_up_intermediate_files(nhdplus_vectors_dir): - del adj_nhd_headwater_points_ms - -def clean_up_intermediate_files(in_dir): - - for huc in os.listdir(in_dir): - agg_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') - fr_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr.gpkg') - fr_adj_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_fr_adjusted.gpkg') - ms_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms.gpkg') - ms_adj_path= os.path.join(in_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_ms_adjusted.gpkg') - ms_headwater_adj_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_ms.gpkg') - fr_headwater_adj_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_adjusted_fr.gpkg') - ms_headwater_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_ms.gpkg') - fr_headwater_path= os.path.join(in_dir,huc,'nhd' + str(huc) + '_headwaters_fr.gpkg') + for huc in os.listdir(nhdplus_vectors_dir): + agg_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_agg.gpkg') + streams_adj_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') + headwater_adj_path= os.path.join(nhdplus_vectors_dir,huc,'nhd' + str(huc) + '_headwaters_adj.gpkg') if os.path.exists(agg_path): os.remove(agg_path) - if os.path.exists(fr_path): - os.remove(fr_path) - - if os.path.exists(fr_adj_path): - os.remove(fr_adj_path) - - if os.path.exists(ms_path): - os.remove(ms_path) - - if os.path.exists(ms_adj_path): - os.remove(ms_adj_path) - - if os.path.exists(ms_headwater_adj_path): - os.remove(ms_headwater_adj_path) - - if os.path.exists(fr_headwater_adj_path): - os.remove(fr_headwater_adj_path) - - if os.path.exists(ms_headwater_path): - os.remove(ms_headwater_path) - - if os.path.exists(fr_headwater_path): - os.remove(fr_headwater_path) + if os.path.exists(streams_adj_path): + os.remove(streams_adj_path) + if os.path.exists(headwater_adj_path): + os.remove(headwater_adj_path) if(__name__=='__main__'): - ## generate NWM Headwaters + # # Generate NWM Headwaters # print ('deriving nwm headwater points') - # nwm_headwaters = findHeadWaterPoints(nwm_streams_fr_filename) + # nwm_headwaters = findHeadWaterPoints(nwm_streams_orig_filename) # nwm_headwaters['ID'] = nwm_headwaters.index + 1 - # nwm_headwaters.to_file(nwm_headwaters_filename,driver=getDriver(nwm_headwaters_filename),index=False) - + # nwm_headwaters.to_file(nwm_headwaters_filename,driver=getDriver(nwm_headwaters_filename),index=False,layer='nwm_headwaters') # del nwm_headwaters, nwm_streams - - ## subset NWM MS Streams - # nwm_subset_ms_args = (nwm_streams_fr_filename,in_dir,ahps_dir,nwm_streams_ms_filename) - # print ('deriving nwm ms streams') - # subset_nwm_ms_streams(nwm_subset_ms_args) - - ## generate NWM intersection points with WBD4 boundaries - # ms_nwm_intersect_args = (nwm_streams_ms_filename,wbd_filename,in_dir,nwm_huc4_intersections_ms_filename) - # fr_nwm_intersect_args = (nwm_streams_fr_filename,wbd_filename,in_dir,nwm_huc4_intersections_fr_filename) - # print ('deriving nwm ms intersection points') - # find_nwm_incoming_streams(ms_nwm_intersect_args) - # print ('deriving nwm fr intersection points') - # find_nwm_incoming_streams(fr_nwm_intersect_args) - - print ('loading wb4') + # + # # Identify NWM MS Streams + # print ('identifing nwm ms streams') + # ms_segments = identify_nwm_ms_streams(nwm_streams_orig_filename,ahps_filename,nwm_streams_all_filename) + # + # # Identify NWM MS Catchments + # print ('identifing nwm ms catchments') + # nwm_catchments = gpd.read_file(nwm_catchments_orig_filename) + # # Add column to FR nwm layer to indicate MS segments + # nwm_catchments['mainstem'] = np.where(nwm_catchments.ID.isin(ms_segments), 1, 0) + # nwm_catchments.to_file(nwm_catchments_all_filename,driver=getDriver(nwm_catchments_all_filename),index=False,layer='nwm_catchments') + # del nwm_catchments, ms_segments + + # # Generate NWM intersection points with WBD4 boundaries + # print ('deriving NWM fr/ms intersection points') + # huc4_intersection = find_nwm_incoming_streams(nwm_streams_all_filename,wbd_filename,4) + # huc4_intersection.to_file(nwm_huc4_intersections_filename,driver=getDriver(nwm_huc4_intersections_filename),layer='huc4_intersection') + # del huc4_intersection + + print ('loading HUC4s') wbd4 = gpd.read_file(wbd_filename, layer='WBDHU4') - print ('loading wb8') + print ('loading HUC8s') wbd8 = gpd.read_file(wbd_filename, layer='WBDHU8') - subset_arg_list = (nwm_dir,ahps_dir,wbd4,wbd8,in_dir,nwm_huc4_intersections_fr_filename,nwm_huc4_intersections_ms_filename) - collect_arg_list = (in_dir,nwm_dir,ahps_dir) + subset_arg_list = (nwm_headwaters_filename,ahps_filename,wbd4,wbd8,nhdplus_vectors_dir,nwm_huc4_intersections_filename) + huc_list = os.listdir(nhdplus_vectors_dir) + + missing_subsets = [] + for huc in os.listdir(nhdplus_vectors_dir): + streams_adj_path= os.path.join(nhdplus_vectors_dir,huc,'NHDPlusBurnLineEvent' + str(huc) + '_adj.gpkg') + if not os.path.isfile(streams_adj_path): + missing_subsets = missing_subsets + [huc] - num_workers=9 + print (f"running subset_results on {len(missing_subsets)} HUC4s") + num_workers=11 with ProcessPoolExecutor(max_workers=num_workers) as executor: - ## preprocess nhd hr and add attributes - collect_attributes = [executor.submit(collect_stream_attributes, collect_arg_list, str(huc)) for huc in os.listdir(in_dir)] - ## subset nhd hr network - subset_results = [executor.submit(subset_stream_networks, subset_arg_list, str(huc)) for huc in os.listdir(in_dir)] + # Preprocess nhd hr and add attributes + # collect_attributes = [executor.submit(collect_stream_attributes, nhdplus_vectors_dir, str(huc)) for huc in huc_list] + # Subset nhd hr network + subset_results = [executor.submit(subset_stream_networks, subset_arg_list, str(huc)) for huc in missing_subsets] + # del wbd4,wbd8 - ## aggregate fr and ms nhd netowrks for entire nwm domain - aggregate_stream_networks(in_dir,agg_dir, os.listdir(in_dir)) + # Aggregate fr and ms nhd netowrks for entire nwm domain + aggregate_stream_networks(nhdplus_vectors_dir,agg_nhd_headwaters_adj_fileName,agg_nhd_streams_adj_fileName,huc_list) - ## remove intermediate files - # clean_up_intermediate_files(in_dir) + # Remove intermediate files + # clean_up_intermediate_files(nhdplus_vectors_dir) diff --git a/src/agreedem.py b/src/agreedem.py index 15ae40c4c..dbff2d2d4 100755 --- a/src/agreedem.py +++ b/src/agreedem.py @@ -45,8 +45,6 @@ def agreedem(rivers_raster, dem, output_raster, workspace, grass_workspace, buff # Import dem layer and river layer and get dem profile. elev = rasterio.open(dem) dem_profile = elev.profile - if elev.nodata == 0.0: - dem_profile.update(nodata = -999) rivers = rasterio.open(rivers_raster) diff --git a/src/clip_vectors_to_wbd.py b/src/clip_vectors_to_wbd.py index 654fe6e4f..f29217e76 100755 --- a/src/clip_vectors_to_wbd.py +++ b/src/clip_vectors_to_wbd.py @@ -7,7 +7,7 @@ from shapely.geometry import MultiPolygon,Polygon,Point from utils.shared_functions import getDriver -def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,dissolveLinks=False): +def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,dissolveLinks=False): hucUnitLength = len(str(hucCode)) @@ -22,76 +22,80 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l landsea.to_file(subset_landsea_filename,driver=getDriver(subset_landsea_filename),index=False) del landsea - # find intersecting lakes and writeout + # Find intersecting lakes and writeout print("Subsetting NWM Lakes for HUC{} {}".format(hucUnitLength,hucCode),flush=True) nwm_lakes = gpd.read_file(nwm_lakes_filename, mask = wbd_buffer) if not nwm_lakes.empty: - # perform fill process to remove holes/islands in the NWM lake polygons + # Perform fill process to remove holes/islands in the NWM lake polygons nwm_lakes = nwm_lakes.explode() nwm_lakes_fill_holes=MultiPolygon(Polygon(p.exterior) for p in nwm_lakes['geometry']) # remove donut hole geometries - # loop through the filled polygons and insert the new geometry + # Loop through the filled polygons and insert the new geometry for i in range(len(nwm_lakes_fill_holes)): nwm_lakes.loc[i,'geometry'] = nwm_lakes_fill_holes[i] nwm_lakes.to_file(subset_nwm_lakes_filename,driver=getDriver(subset_nwm_lakes_filename),index=False) del nwm_lakes - # find intersecting levee lines + # Find intersecting levee lines print("Subsetting NLD levee lines for HUC{} {}".format(hucUnitLength,hucCode),flush=True) nld_lines = gpd.read_file(nld_lines_filename, mask = wbd_buffer) if not nld_lines.empty: nld_lines.to_file(subset_nld_lines_filename,driver=getDriver(subset_nld_lines_filename),index=False) del nld_lines - # find intersecting nwm_catchments - print("Subsetting NWM Catchments for HUC{} {}".format(hucUnitLength,hucCode),flush=True) - nwm_catchments = gpd.read_file(nwm_catchments_filename, mask = wbd_buffer) - nwm_catchments.to_file(subset_nwm_catchments_filename,driver=getDriver(subset_nwm_catchments_filename),index=False) - del nwm_catchments - - # subset nhd headwaters + # Subset nhd headwaters print("Subsetting NHD Headwater Points for HUC{} {}".format(hucUnitLength,hucCode),flush=True) nhd_headwaters = gpd.read_file(nhd_headwaters_filename, mask = wbd_buffer) + if extent == 'MS': + nhd_headwaters = nhd_headwaters.loc[nhd_headwaters.mainstem==1] - # subset nhd streams + if len(nhd_headwaters) > 0: + nhd_headwaters.to_file(subset_nhd_headwaters_filename,driver=getDriver(subset_nhd_headwaters_filename),index=False) + else: + print ("No headwater point(s) within HUC " + str(hucCode) + " boundaries.") + sys.exit(0) + del nhd_headwaters + + # Subset nhd streams print("Querying NHD Streams for HUC{} {}".format(hucUnitLength,hucCode),flush=True) nhd_streams = gpd.read_file(nhd_streams_filename, mask = wbd_buffer) + if extent == 'MS': + nhd_streams = nhd_streams.loc[nhd_streams.mainstem==1] - ## identify local headwater stream segments - nhd_streams_subset = gpd.read_file(nhd_streams_filename, mask = wbd) - nhd_streams_subset = nhd_streams_subset.loc[~nhd_streams_subset.FromNode.isin(list(set(nhd_streams_subset.ToNode) & set(nhd_streams_subset.FromNode)))] - nhd_streams_subset = nhd_streams_subset[~nhd_streams_subset['is_headwater']] - - if not nhd_streams_subset.empty: - nhd_streams_subset = nhd_streams_subset.reset_index(drop=True) - start_coords = [] - NHDPlusIDs = [] - for index, linestring in enumerate(nhd_streams_subset.geometry): - start_coords = start_coords + [linestring.coords[-1]] - NHDPlusIDs = NHDPlusIDs + [nhd_streams_subset.iloc[index].NHDPlusID] - - start_geoms = [Point(point) for point in start_coords] - local_headwaters = gpd.GeoDataFrame({'NHDPlusID': NHDPlusIDs,'geometry': start_geoms}, crs=projection, geometry='geometry') - nhd_headwaters = nhd_headwaters.append(local_headwaters) + if len(nhd_streams) > 0: + # Find incoming stream segments (to WBD buffer) and identify which are upstream + threshold_segments = gpd.overlay(nhd_streams, wbd_buffer, how='symmetric_difference') + from_list = threshold_segments.FromNode.to_list() + to_list = nhd_streams.ToNode.to_list() + missing_segments = list(set(from_list) - set(to_list)) - # nhd_streams = nhd_streams.loc[~nhd_streams.NHDPlusID.isin(NHDPlusIDs)] + # Remove incoming stream segment so it won't be routed as outflow during hydroconditioning + nhd_streams = nhd_streams.loc[~nhd_streams.FromNode.isin(missing_segments)] - if len(nhd_streams) > 0: nhd_streams.to_file(subset_nhd_streams_filename,driver=getDriver(subset_nhd_streams_filename),index=False) else: print ("No NHD streams within HUC " + str(hucCode) + " boundaries.") sys.exit(0) + del nhd_streams - if len(nhd_headwaters) > 0: - nhd_headwaters.to_file(subset_nhd_headwaters_filename,driver=getDriver(subset_nhd_headwaters_filename),index=False) - del nhd_headwaters, nhd_streams + # Find intersecting nwm_catchments + print("Subsetting NWM Catchments for HUC{} {}".format(hucUnitLength,hucCode),flush=True) + nwm_catchments = gpd.read_file(nwm_catchments_filename, mask = wbd_buffer) + if extent == 'MS': + nwm_catchments = nwm_catchments.loc[nwm_catchments.mainstem==1] + + if len(nwm_catchments) > 0: + nwm_catchments.to_file(subset_nwm_catchments_filename,driver=getDriver(subset_nwm_catchments_filename),index=False) else: - print ("No headwater point(s) within HUC " + str(hucCode) + " boundaries.") + print ("No NHD catchments within HUC " + str(hucCode) + " boundaries.") sys.exit(0) + del nwm_catchments - # subset nwm streams + # Subset nwm streams print("Subsetting NWM Streams and deriving headwaters for HUC{} {}".format(hucUnitLength,hucCode),flush=True) nwm_streams = gpd.read_file(nwm_streams_filename, mask = wbd_buffer) + if extent == 'MS': + nwm_streams = nwm_streams.loc[nwm_streams.mainstem==1] nwm_streams.to_file(subset_nwm_streams_filename,driver=getDriver(subset_nwm_streams_filename),index=False) del nwm_streams @@ -115,8 +119,10 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l parser.add_argument('-e','--subset-nhd-headwaters',help='NHD headwaters subset',required=True,default=None) parser.add_argument('-b','--subset-nwm-streams',help='NWM streams subset',required=True) parser.add_argument('-x','--subset-landsea',help='LandSea subset',required=True) + parser.add_argument('-extent','--extent',help='FIM extent',required=True) parser.add_argument('-o','--dissolve-links',help='remove multi-line strings',action="store_true",default=False) + args = vars(parser.parse_args()) hucCode = args['hucCode'] @@ -136,6 +142,7 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l subset_nhd_headwaters_filename = args['subset_nhd_headwaters'] subset_nwm_streams_filename = args['subset_nwm_streams'] subset_landsea_filename = args['subset_landsea'] + extent = args['extent'] dissolveLinks = args['dissolve_links'] - subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,dissolveLinks) + subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,dissolveLinks) diff --git a/src/reduce_nhd_stream_density.py b/src/reduce_nhd_stream_density.py index cce2fa7ca..9614bfe32 100644 --- a/src/reduce_nhd_stream_density.py +++ b/src/reduce_nhd_stream_density.py @@ -3,114 +3,139 @@ import geopandas as gpd import pandas as pd import numpy as np -from os.path import splitext -from tqdm import tqdm from collections import deque import argparse import pygeos from shapely.wkb import dumps from utils.shared_functions import getDriver -def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_filename,headwaters_filename,headwater_id,nwm_intersections_filename): - headwater_streams = pd.DataFrame() +def subset_nhd_network(huc4,huc4_mask,selected_wbd8,nhd_streams_,headwaters_filename,headwater_id,nwm_intersections_filename,mainstem_flag=False): - nhd_streams = gpd.read_file(nhd_streams_filename) + headwater_streams = pd.DataFrame() + if mainstem_flag == False: + nhd_streams = gpd.read_file(nhd_streams_) + headwater_col = 'is_headwater' + id_col = 'headwaters_id' + n = -1 + else: + nhd_streams = nhd_streams_.copy() + headwater_col = 'mainstem' + id_col = 'nws_lid' + n = '' + + # Locate the closest NHDPlus HR stream segment to NWM headwater points. Done by HUC8 to reduce processing time and to contain NWM headwater in the same HUC for index, row in selected_wbd8.iterrows(): huc = row["HUC8"] + # Double check that this is a nested HUC (probably overkill) if huc.startswith(str(huc4)): - huc8_mask = selected_wbd8.loc[selected_wbd8.HUC8.str.startswith(huc)] + + huc8_mask = selected_wbd8.loc[selected_wbd8.HUC8==huc] huc8_mask = huc8_mask.reset_index(drop=True) + # Masking headwaters by HUC8 headwaters_mask = gpd.read_file(headwaters_filename, mask = huc8_mask) headwaters_mask = headwaters_mask.reset_index(drop=True) - streams_subset = gpd.read_file(nhd_streams_filename, mask = huc8_mask) + # Masking subset streams by HUC8 + if mainstem_flag == False: + streams_subset = gpd.read_file(nhd_streams_, mask = huc8_mask) + else: + streams_subset = nhd_streams.loc[nhd_streams.HUC8==huc].copy() if not streams_subset.empty: - streams_subset.loc[:,'is_headwater'] = False + streams_subset[headwater_col] = False streams_subset = streams_subset.reset_index(drop=True) + # Create WKB geometry column streams_subset['b_geom'] = None for index, linestring in enumerate(streams_subset.geometry): streams_subset.at[index, 'b_geom'] = dumps(linestring) - # create pygeos nhd stream geometries from WKB representation + # Create pygeos nhd stream geometries from WKB representation streambin_geom = pygeos.io.from_wkb(streams_subset['b_geom']) - streams_subset.loc[:,'HUC8'] = str(huc) - - if headwaters_mask[headwater_id].dtype=='int': - n = -1 - else: - n = '' + # Add HUC8 column + streams_subset['HUC8'] = str(huc) - streams_subset.loc[:,'headwaters_id'] = n + # Add headwaters_id column + streams_subset[id_col] = n - # find stream segment closest to headwater point + # Find stream segment closest to headwater point for index, point in headwaters_mask.iterrows(): - # convert headwaterpoint geometries to WKB representation + # Convert headwaterpoint geometries to WKB representation wkb_points = dumps(point.geometry) - # create pygeos headwaterpoint geometries from WKB representation + # Create pygeos headwaterpoint geometries from WKB representation pointbin_geom = pygeos.io.from_wkb(wkb_points) - # distance to each stream segment + # Distance to each stream segment distances = pygeos.measurement.distance(streambin_geom, pointbin_geom) - # find minimum distance + # Find minimum distance min_index = np.argmin(distances) # Closest segment to headwater - streams_subset.loc[min_index,'is_headwater'] = True - streams_subset.loc[min_index,'headwaters_id'] = point[headwater_id] + streams_subset.loc[min_index,headwater_col] = True + streams_subset.loc[min_index,id_col] = point[headwater_id] - headwater_streams = headwater_streams.append(streams_subset[['NHDPlusID','is_headwater','headwaters_id','HUC8']]) + headwater_streams = headwater_streams.append(streams_subset[['NHDPlusID',headwater_col,id_col,'HUC8']]) - headwater_streams = headwater_streams.sort_values('is_headwater', ascending=False).drop_duplicates('NHDPlusID') # keeps headwater=True for conflicting duplicates - nhd_streams = nhd_streams.merge(headwater_streams,on='NHDPlusID',how='inner') + headwater_streams = headwater_streams.sort_values(headwater_col, ascending=False).drop_duplicates('NHDPlusID') # keeps headwater=True for conflicting duplicates + + if mainstem_flag == False: + nhd_streams = nhd_streams.merge(headwater_streams,on='NHDPlusID',how='inner') + else: + headwater_streams = headwater_streams.drop(columns=['HUC8']) + nhd_streams = nhd_streams.merge(headwater_streams,on='NHDPlusID',how='outer') + nhd_streams[id_col] = nhd_streams[id_col].fillna(n) + nhd_streams[headwater_col] = nhd_streams[headwater_col].fillna(0) del selected_wbd8, streams_subset, headwater_streams huc4_mask_buffer = huc4_mask.buffer(10) - # identify inflowing streams + # Identify inflowing streams nwm_intersections = gpd.read_file(nwm_intersections_filename, mask=huc4_mask_buffer) - nhd_streams['downstream_of_headwater'] = False + if mainstem_flag == False: + nhd_streams['downstream_of_headwater'] = False + else: + nwm_intersections = nwm_intersections.loc[nwm_intersections.mainstem==1] + nhd_streams = nhd_streams.explode() nhd_streams = nhd_streams.reset_index(drop=True) - # find stream segment closest to nwm intersection point + # Find stream segment closest to nwm intersection point for index, point in nwm_intersections.iterrows(): - # distance to each stream segment + # Distance to each stream segment distances = nhd_streams.distance(point.geometry) - # find minimum distance + # Find minimum distance min_index = np.argmin(distances) - # update attributes for incoming stream - nhd_streams.loc[min_index,'is_headwater'] = True - nhd_streams.loc[min_index,'downstream_of_headwater'] = True + # Update attributes for incoming stream + nhd_streams.loc[min_index,headwater_col] = True - ## subset NHDPlus HR - nhd_streams['is_relevant_stream'] = nhd_streams['is_headwater'].copy() + if mainstem_flag == False: + nhd_streams.loc[min_index,'downstream_of_headwater'] = True + nhd_streams['is_relevant_stream'] = nhd_streams[headwater_col].copy() - # trace down from headwaters + # Trace down from headwaters nhd_streams.set_index('NHDPlusID',inplace=True,drop=False) - nhd_streams = get_downstream_segments(nhd_streams, 'is_headwater') + nhd_streams = get_downstream_segments(nhd_streams,headwater_col,mainstem_flag) nhd_streams = nhd_streams.loc[nhd_streams['is_relevant_stream'],:] nhd_streams.reset_index(drop=True,inplace=True) - return(nhd_streams) + return nhd_streams -def get_downstream_segments(streams, attribute): +def get_downstream_segments(streams, attribute,mainstem_flag): Q = deque(streams.loc[streams[attribute],'NHDPlusID'].tolist()) visited = set() @@ -136,8 +161,11 @@ def get_downstream_segments(streams, attribute): else: relevant_ids = downstream_ids - streams.loc[relevant_ids,'is_relevant_stream'] = True - streams.loc[relevant_ids,'downstream_of_headwater'] = True + if mainstem_flag == False: + streams.loc[relevant_ids,'is_relevant_stream'] = True + streams.loc[relevant_ids,'downstream_of_headwater'] = True + else: + streams.loc[relevant_ids,'mainstem'] = True for i in relevant_ids: if i not in visited: @@ -155,11 +183,22 @@ def get_downstream_segments(streams, attribute): parser.add_argument('-a','--headwaters-filename',help='Headwaters points layer name',required=True,type=str) parser.add_argument('-s','--subset-nhd-streams-fileName',help='Output streams layer name',required=False,type=str,default=None) parser.add_argument('-i','--headwater-id',help='Headwater points ID column',required=True) - parser.add_argument('-i','--nwm-intersections-filename',help='NWM HUC4 intersection points',required=True) + parser.add_argument('-c','--nwm-intersections-filename',help='NWM HUC4 intersection points',required=True) + parser.add_argument('-d','--mainstem-flag',help='flag for mainstems network',required=False,default=False) args = vars(parser.parse_args()) - subset_streams_gdf = subset_nhd_network(huc_number,huc4_mask,selected_wbd8,nhd_streams,headwaters_filename,headwater_id) + huc_number = args['huc_number'] + huc4_mask = args['huc4_mask'] + selected_wbd8 = args['selected_wbd8'] + nhd_streams = args['nhd_streams'] + headwaters_filename = args['headwaters_filename'] + subset_nhd_streams_fileName = args['subset_nhd_streams_fileName'] + headwater_id = args['headwater_id'] + nwm_intersections_filename = args['nwm_intersections_filename'] + mainstem_flag = args['mainstem_flag'] + + subset_streams_gdf = subset_nhd_network(huc_number,huc4_mask,selected_wbd8,nhd_streams,headwaters_filename,headwater_id,nwm_intersections_filename,mainstem_flag=False) if subset_nhd_streams_fileName is not None: - subset_streams_gdf.to_file(args['subset_nhd_streams_fileName'],driver=getDriver(args['subset_nhd_streams_fileName']),index=False) + subset_streams_gdf.to_file(subset_nhd_streams_fileName,driver=getDriver(subset_nhd_streams_fileName),index=False) diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index c8f490696..0c5e65cf5 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -43,19 +43,6 @@ else input_LANDSEA=$inputDataDir/landsea/water_polygons_us.gpkg fi -# Define streams and headwaters based on extent # -if [ "$extent" = "MS" ]; then - input_nhd_flowlines=$input_nhd_flowlines_ms - input_nhd_headwaters=$input_nhd_headwaters_ms - input_NWM_Flows=$input_NWM_Flows_ms - input_NWM_Catchments=$input_NWM_Catchments_ms -else - input_nhd_flowlines=$input_nhd_flowlines_fr - input_nhd_headwaters=$input_nhd_headwaters_fr - input_NWM_Flows=$input_NWM_Flows_fr - input_NWM_Catchments=$input_NWM_Catchments_fr -fi - ## GET WBD ## echo -e $startDiv"Get WBD $hucNumber"$stopDiv date -u @@ -77,7 +64,7 @@ echo -e $startDiv"Get Vector Layers and Subset $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg ] && \ -$srcDir/clip_vectors_to_wbd.py -d $hucNumber -w $input_NWM_Flows -s $input_nhd_flowlines -l $input_NWM_Lakes -r $input_NLD -g $outputHucDataDir/wbd.gpkg -f $outputHucDataDir/wbd_buffered.gpkg -m $input_NWM_Catchments -y $input_nhd_headwaters -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -x $outputHucDataDir/LandSea_subset.gpkg +$srcDir/clip_vectors_to_wbd.py -d $hucNumber -w $input_nwm_flows -s $input_nhd_flowlines -l $input_nwm_lakes -r $input_NLD -g $outputHucDataDir/wbd.gpkg -f $outputHucDataDir/wbd_buffered.gpkg -m $input_nwm_catchments -y $input_nhd_headwaters -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -x $outputHucDataDir/LandSea_subset.gpkg -extent $extent Tcount if [ "$extent" = "MS" ]; then @@ -421,7 +408,7 @@ echo -e $startDiv"Finalize catchments and model streams $hucNumber"$stopDiv outp date -u Tstart [ ! -f $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg ] && \ -$srcDir/add_crosswalk.py -d $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -s $outputHucDataDir/src_base.csv -u $inputDataDir/bathymetry/BANKFULL_CONUS.txt -v $outputHucDataDir/bathy_crosswalk_calcs.csv -e $outputHucDataDir/bathy_stream_order_calcs.csv -g $outputHucDataDir/bathy_thalweg_flag.csv -i $outputHucDataDir/bathy_xs_area_hydroid_lookup.csv -l $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $outputHucDataDir/src_full_crosswalked.csv -j $outputHucDataDir/src.json -x $outputHucDataDir/crosswalk_table.csv -t $outputHucDataDir/hydroTable.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -y $outputHucDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $input_NWM_Catchments -p $extent -k $outputHucDataDir/small_segments.csv +$srcDir/add_crosswalk.py -d $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes.gpkg -a $outputHucDataDir/demDerived_reaches_split_filtered.gpkg -s $outputHucDataDir/src_base.csv -u $inputDataDir/bathymetry/BANKFULL_CONUS.txt -v $outputHucDataDir/bathy_crosswalk_calcs.csv -e $outputHucDataDir/bathy_stream_order_calcs.csv -g $outputHucDataDir/bathy_thalweg_flag.csv -i $outputHucDataDir/bathy_xs_area_hydroid_lookup.csv -l $outputHucDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked.gpkg -f $outputHucDataDir/demDerived_reaches_split_filtered_addedAttributes_crosswalked.gpkg -r $outputHucDataDir/src_full_crosswalked.csv -j $outputHucDataDir/src.json -x $outputHucDataDir/crosswalk_table.csv -t $outputHucDataDir/hydroTable.csv -w $outputHucDataDir/wbd8_clp.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -y $outputHucDataDir/nwm_catchments_proj_subset.tif -m $manning_n -z $input_nwm_catchments -p $extent -k $outputHucDataDir/small_segments.csv Tcount ## USGS CROSSWALK ## diff --git a/src/usgs_gage_crosswalk.py b/src/usgs_gage_crosswalk.py index c29a92acf..c85cdbe32 100755 --- a/src/usgs_gage_crosswalk.py +++ b/src/usgs_gage_crosswalk.py @@ -41,6 +41,7 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in input_catchment = gpd.read_file(input_catchment_filename) dem_adj = rasterio.open(dem_adj_filename,'r') + #MS extent use gages that are mainstem if extent == "MS": usgs_gages = usgs_gages.query('curve == "yes" & mainstem == "yes"') @@ -118,6 +119,7 @@ def crosswalk_usgs_gage(usgs_gages_filename,dem_filename,input_flows_filename,in parser.add_argument('-dem_adj','--dem-adj-filename', help='Thalweg adjusted DEM', required=True) parser.add_argument('-outtable','--output-table-filename', help='Table to append data', required=True) parser.add_argument('-e', '--extent', help="extent configuration entered by user when running fim_run.sh", required = True) + args = vars(parser.parse_args()) usgs_gages_filename = args['usgs_gages_filename'] diff --git a/src/utils/shared_functions.py b/src/utils/shared_functions.py index 876c1769e..78d884eea 100644 --- a/src/utils/shared_functions.py +++ b/src/utils/shared_functions.py @@ -148,6 +148,7 @@ def update_raster_profile(args): gdal bindings are not entirely compatible: https://rasterio.readthedocs.io/en/latest/topics/switch.html ''' + def reproject_raster(input_raster_name,reprojection,blocksize=None,reprojected_raster_name=None): if blocksize is not None: diff --git a/src/utils/shared_variables.py b/src/utils/shared_variables.py index 37a98b424..fefad3cfa 100644 --- a/src/utils/shared_variables.py +++ b/src/utils/shared_variables.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 +import os + # Projections. #PREP_PROJECTION = "+proj=aea +datum=NAD83 +x_0=0.0 +y_0=0.0 +lon_0=96dW +lat_0=23dN +lat_1=29d30'N +lat_2=45d30'N +towgs84=-0.9956000824677655,1.901299877314078,0.5215002840524426,0.02591500053005733,0.009425998542707753,0.01159900118427752,-0.00062000005129903 +no_defs +units=m" PREP_PROJECTION_CM = 'PROJCS["USA_Contiguous_Albers_Equal_Area_Conic_USGS_version",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Albers"],PARAMETER["false_easting",0.0],PARAMETER["false_northing",0.0],PARAMETER["central_meridian",-96.0],PARAMETER["standard_parallel_1",29.5],PARAMETER["standard_parallel_2",45.5],PARAMETER["latitude_of_origin",23.0],UNIT["Meter",1.0],VERTCS["NAVD_1988",VDATUM["North_American_Vertical_Datum_1988"],PARAMETER["Vertical_Shift",0.0],PARAMETER["Direction",1.0],UNIT["Centimeter",0.01]]]' @@ -34,3 +36,28 @@ OVERWRITE_WBD = 'OVERWRITE_WBD' OVERWRITE_NHD = 'OVERWRITE_NHD' OVERWRITE_ALL = 'OVERWRITE_ALL' + +## Input Paths and Directories +# Directories +os.environ['src_dir'] = '/foss_fim/src' +os.environ['input_dir'] = 'data/inputs' + +os.environ['nhdplus_rasters_dir'] = os.path.join(os.environ.get('input_dir'),'nhdplus_rasters') +os.environ['nhdplus_vectors_dir'] = os.path.join(os.environ.get('input_dir'),'nhdplus_vectors') +os.environ['nwm_dir'] = os.path.join(os.environ.get('input_dir'),'nwm_hydrofabric') +os.environ['wbd_dir'] = os.path.join(os.environ.get('input_dir'),'wbd') +os.environ['ahps_dir'] = os.path.join(os.environ.get('input_dir'),'ahp_sites') +os.environ['nhdplus_aggregate_dir'] = os.path.join(os.environ.get('input_dir'),'nhdplus_vectors_aggregate') + +# File Paths +os.environ['wbd_filename'] = os.path.join(os.environ.get('wbd_dir'),'WBD_National.gpkg') +os.environ['nwm_streams_orig_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_flows_original.gpkg') +os.environ['nwm_streams_all_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_flows.gpkg') +os.environ['nwm_headwaters_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_headwaters.gpkg') +os.environ['nwm_huc4_intersections_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_huc4_intersections.gpkg') +os.environ['nhd_huc8_intersections_filename'] = os.path.join(os.environ.get('nwm_dir'),'nhd_huc8_intersections.gpkg') +os.environ['ahps_filename'] = os.path.join(os.environ.get('ahps_dir'),'nws_lid.gpkg') +os.environ['agg_nhd_headwaters_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_headwaters_adj.gpkg') +os.environ['agg_nhd_streams_adj_fileName'] = os.path.join(os.environ.get('nhdplus_aggregate_dir'),'agg_nhd_streams_adj.gpkg') +os.environ['nwm_catchments_orig_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_catchments_original.gpkg') +os.environ['nwm_catchments_all_filename'] = os.path.join(os.environ.get('nwm_dir'),'nwm_catchments.gpkg') diff --git a/tools/rating_curve_comparison.py b/tools/rating_curve_comparison.py index c5bf9884d..1f3b138d6 100755 --- a/tools/rating_curve_comparison.py +++ b/tools/rating_curve_comparison.py @@ -35,6 +35,7 @@ stat_groups : str string of columns to group eval metrics. """ + def check_file_age(file): ''' Checks if file exists, determines the file age, and recommends From 25c3ec155cb4439f527bcd6dcdfb1415056f0975 Mon Sep 17 00:00:00 2001 From: TrevorGrout-NOAA <69653333+TrevorGrout-NOAA@users.noreply.github.com> Date: Mon, 3 May 2021 10:07:19 -0500 Subject: [PATCH 072/359] Generate nws_lid.gpkg Generate nws_lid.gpkg. - Generate nws_lid.gpkg with attributes indicating if site is a headwater nws_lid as well as if it is co-located with another nws_lid which is referenced to the same nwm_feature_id segment. This resolves #368. --- CHANGELOG.md | 8 ++ tools/generate_nws_lid.py | 180 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 188 insertions(+) create mode 100644 tools/generate_nws_lid.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c6e3ade4..e10fd0822 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +

+## v3.0.15.9 - 2021-05-03 - [PR #372](https://github.com/NOAA-OWP/cahaba/pull/372) + +Generate `nws_lid.gpkg`. + +## Additions +- Generate `nws_lid.gpkg` with attributes indicating if site is a headwater `nws_lid` as well as if it is co-located with another `nws_lid` which is referenced to the same `nwm_feature_id` segment. +

## v3.0.15.8 - 2021-04-29 - [PR #371](https://github.com/NOAA-OWP/cahaba/pull/371) diff --git a/tools/generate_nws_lid.py b/tools/generate_nws_lid.py new file mode 100644 index 000000000..a2110c57d --- /dev/null +++ b/tools/generate_nws_lid.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python3 + +from pathlib import Path +import pandas as pd +import geopandas as gpd +from collections import defaultdict +from tools_shared_functions import aggregate_wbd_hucs, get_metadata +import argparse +from dotenv import load_dotenv +import os +import sys +sys.path.append('/foss_fim/src') +from utils.shared_variables import PREP_PROJECTION + + +load_dotenv() +#import variables from .env file +API_BASE_URL = os.getenv("API_BASE_URL") +EVALUATED_SITES_CSV = os.getenv("EVALUATED_SITES_CSV") +WBD_LAYER = os.getenv("WBD_LAYER") +#Define path to NWM stream layer +NWM_FILE='/data/inputs/nwm_hydrofabric/nwm_flows.gpkg' + + +def generate_nws_lid(workspace): + ''' + Generate the nws_lid layer containing all nws_lid points attributed whether site is mainstems and co-located + + Parameters + ---------- + workspace : STR + Directory where outputs will be saved. + + Returns + ------- + None. + + ''' + + + ############################################################################## + #Get all nws_lid points + print('Retrieving metadata ..') + + metadata_url = f'{API_BASE_URL}/metadata/' + #Trace downstream from all rfc_forecast_point. + select_by = 'nws_lid' + selector = ['all'] + must_include = 'nws_data.rfc_forecast_point' + downstream_trace_distance = 'all' + fcst_list, fcst_dataframe = get_metadata(metadata_url = metadata_url, select_by = select_by, selector = selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = downstream_trace_distance ) + + #Get list of all evaluated sites not in fcst_list + fcst_list_sites = [record.get('identifiers').get('nws_lid').lower() for record in fcst_list] + evaluated_sites = pd.read_csv(EVALUATED_SITES_CSV)['Total_List'].str.lower().to_list() + evaluated_sites= list(set(evaluated_sites) - set(fcst_list_sites)) + + #Trace downstream from all evaluated sites not in fcst_list + select_by = 'nws_lid' + selector = evaluated_sites + must_include = None + downstream_trace_distance = 'all' + eval_list, eval_dataframe = get_metadata(metadata_url = metadata_url, select_by = select_by, selector = selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = downstream_trace_distance ) + + #Trace downstream from all sites in HI/PR. + select_by = 'state' + selector = ['HI','PR'] + must_include = None + downstream_trace_distance = 'all' + islands_list, islands_dataframe = get_metadata(metadata_url = metadata_url, select_by = select_by, selector = selector, must_include = must_include, upstream_trace_distance = None, downstream_trace_distance = downstream_trace_distance ) + + #Append all lists + all_lists = fcst_list + eval_list + islands_list + + ############################################################################### + #Compile NWM segments from all_lists + + #Get dictionary of downstream segment (key) and target segments (values) + #Get dictionary of target segment (key) and site code (value) + downstream = defaultdict(list) + target = defaultdict(list) + #For each lid metadata dictionary in list + for lid in all_lists: + site = lid.get('identifiers').get('nws_lid') + #Get the nwm feature id associated with the location + location_nwm_seg = lid.get('identifiers').get('nwm_feature_id') + #get all downstream segments + downstream_nwm_segs = lid.get('downstream_nwm_features') + #If valid location_nwm_segs construct two dictionaries. + if location_nwm_seg: + #Dictionary with target segment and site + target[int(location_nwm_seg)].append(site) + #Dictionary of key (2nd to last element) and value (target segment) + #2nd to last element used because last element is always 0 (ocean) and the 2nd to last allows for us to get the river 'tree' (Mississippi, Colorado, etc) + value = location_nwm_seg + if not downstream_nwm_segs: + #Special case, no downstream nwm segments are returned (PR/VI/HI). + key = location_nwm_seg + elif len(downstream_nwm_segs) == 1: + #Special case, the nws_lid is within 1 segment of the ocean (0) + key = location_nwm_seg + elif len(downstream_nwm_segs)>1: + #Otherwise, 2nd to last element used to identify proper river system. + key = downstream_nwm_segs[-2] + #Dictionary with key of 2nd to last downstream segment and value of site nwm segment + downstream[int(key)].append(int(value)) + ############################################################################### + #Walk downstream the network and identify headwater points + print('Traversing network..') + + #Import NWM file and create dictionary of network and create the NWM network dictionary. + nwm_gdf = gpd.read_file(NWM_FILE) + network = nwm_gdf.groupby('ID')['to'].apply(list).to_dict() + + #Walk through network and find headwater points + all_dicts = {} + for tree, targets in downstream.items(): + #All targets are assigned headwaters + sub_dict = {i:'is_headwater' for i in targets} + #Walk downstream of each target + for i in targets: + #Check to see element is not a headwater + if sub_dict[i] == 'not_headwater': + continue + #Get from_node and to_node. + from_node = i + [to_node] = network[from_node] + #Walk downstream from target + while to_node>0: + [to_node] = network[to_node] + #Check if to_node is in targets list + if to_node in targets: + sub_dict[to_node] = 'not_headwater' + #Append status to master dictionary + all_dicts.update(sub_dict) + + #Create dictionaries of nws_lid (key) and headwater status (value) and nws_lid (key) and co-located with same feature_id(value) + final_dict = {} + duplicate_dict = {} + for key,status in all_dicts.items(): + site_list = target[key] + for site in site_list: + final_dict[site] = status + if len(site_list) > 1: + duplicate_dict[site] = 'is_colocated' + else: + duplicate_dict[site] = 'not_colocated' + + ############################################################################## + #Get Spatial data and populate headwater/duplicate attributes + print('Attributing nws_lid layer..') + + #Geodataframe from all_lists, reproject, and reset index. + trash, nws_lid_gdf = aggregate_wbd_hucs(all_lists, WBD_LAYER, retain_attributes = False) + nws_lid_gdf.columns = [name.replace('identifiers_','') for name in nws_lid_gdf.columns] + nws_lid_gdf.to_crs(PREP_PROJECTION, inplace = True) + nws_lid_gdf.reset_index(drop = True) + + #Create DataFrames of headwater and duplicates and join. + final_dict_pd = pd.DataFrame(list(final_dict.items()), columns = ['nws_lid','is_headwater']) + duplicate_dict_pd = pd.DataFrame(list(duplicate_dict.items()),columns = ['nws_lid','is_colocated']) + attributes = final_dict_pd.merge(duplicate_dict_pd, on = 'nws_lid') + attributes.replace({'is_headwater': True,'is_colocated': True,'not_headwater': False,'not_colocated':False}, inplace = True) + + #Join attributes, remove sites with no assigned nwm_feature_id and write to file + joined = nws_lid_gdf.merge(attributes, on='nws_lid', how = 'left') + joined.dropna(subset =['nwm_feature_id'], inplace = True) + Path(workspace).mkdir(parents = True, exist_ok = True) + joined.to_file(Path(workspace) / 'nws_lid.gpkg', driver = 'GPKG') + + + +if __name__ == '__main__': + #Parse arguments + parser = argparse.ArgumentParser(description = 'Create spatial data of nws_lid points attributed with mainstems and colocated.') + parser.add_argument('-w', '--workspace', help = 'Workspace where all data will be stored.', required = True) + args = vars(parser.parse_args()) + + #Run get_env_paths and static_flow_lids + generate_nws_lid(**args) From 22ba0df00e403681e12d8f247e35da124536a74a Mon Sep 17 00:00:00 2001 From: Brian Avant Date: Fri, 7 May 2021 12:51:19 -0500 Subject: [PATCH 073/359] Remove Great Lakes from wbd buffer - The gl_water_polygons.gpkg layer is used to mask out Great Lakes boundaries and remove NHDPlus HR coastline segments. These segments are causing issues later in run_by_unit.sh and unnecessarily increasing total processing time. Resolves issue #374 --- CHANGELOG.md | 8 ++++++++ fim_run.sh | 2 +- src/clip_vectors_to_wbd.py | 37 ++++++++++++++++++++++++++++++++++--- src/run_by_unit.sh | 12 ++---------- 4 files changed, 45 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e10fd0822..e24396a88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +

+## v3.0.15.10 - 2021-05-06 - [PR #375](https://github.com/NOAA-OWP/cahaba/pull/375) + +Remove Great Lakes coastlines from WBD buffer. + +## Changes +- `gl_water_polygons.gpkg` layer is used to mask out Great Lakes boundaries and remove NHDPlus HR coastline segments. +

## v3.0.15.9 - 2021-05-03 - [PR #372](https://github.com/NOAA-OWP/cahaba/pull/372) diff --git a/fim_run.sh b/fim_run.sh index 2cfc744e2..cf5de36da 100755 --- a/fim_run.sh +++ b/fim_run.sh @@ -116,7 +116,7 @@ export input_nwm_catchments=$inputDataDir/nwm_hydrofabric/nwm_catchments.gpkg export input_nwm_flows=$inputDataDir/nwm_hydrofabric/nwm_flows.gpkg export input_nhd_flowlines=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_streams_adj.gpkg export input_nhd_headwaters=$inputDataDir/nhdplus_vectors_aggregate/agg_nhd_headwaters_adj.gpkg - +export input_GL_boundaries=$inputDataDir/landsea/gl_water_polygons.gpkg ## Input handling ## $srcDir/check_huc_inputs.py -u "$hucList" diff --git a/src/clip_vectors_to_wbd.py b/src/clip_vectors_to_wbd.py index f29217e76..9c3d73470 100755 --- a/src/clip_vectors_to_wbd.py +++ b/src/clip_vectors_to_wbd.py @@ -7,15 +7,38 @@ from shapely.geometry import MultiPolygon,Polygon,Point from utils.shared_functions import getDriver -def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,dissolveLinks=False): +def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,great_lakes_filename,wbd_buffer_distance,lake_buffer_distance,dissolveLinks=False): hucUnitLength = len(str(hucCode)) # Get wbd buffer wbd = gpd.read_file(wbd_filename) - wbd_buffer = gpd.read_file(wbd_buffer_filename) + wbd_buffer = wbd.copy() + wbd_buffer.geometry = wbd.geometry.buffer(wbd_buffer_distance,resolution=32) projection = wbd_buffer.crs + great_lakes = gpd.read_file(great_lakes_filename, mask = wbd_buffer).reset_index(drop=True) + + if not great_lakes.empty: + print("Masking Great Lakes for HUC{} {}".format(hucUnitLength,hucCode),flush=True) + + # Clip excess lake area + great_lakes = gpd.clip(great_lakes, wbd_buffer) + + # Buffer remaining lake area + great_lakes.geometry = great_lakes.buffer(lake_buffer_distance) + + # Removed buffered GL from WBD buffer + wbd_buffer = gpd.overlay(wbd_buffer, great_lakes, how='difference') + wbd_buffer = wbd_buffer[['geometry']] + wbd_buffer.to_file(wbd_buffer_filename,driver=getDriver(wbd_buffer_filename),index=False) + + else: + wbd_buffer = wbd_buffer[['geometry']] + wbd_buffer.to_file(wbd_buffer_filename,driver=getDriver(wbd_buffer_filename),index=False) + + del great_lakes + # Clip ocean water polygon for future masking ocean areas (where applicable) landsea = gpd.read_file(landsea_filename, mask = wbd_buffer) if not landsea.empty: @@ -25,6 +48,7 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l # Find intersecting lakes and writeout print("Subsetting NWM Lakes for HUC{} {}".format(hucUnitLength,hucCode),flush=True) nwm_lakes = gpd.read_file(nwm_lakes_filename, mask = wbd_buffer) + nwm_lakes = nwm_lakes.loc[nwm_lakes.Shape_Area < 18990454000.0] if not nwm_lakes.empty: # Perform fill process to remove holes/islands in the NWM lake polygons @@ -59,6 +83,7 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l # Subset nhd streams print("Querying NHD Streams for HUC{} {}".format(hucUnitLength,hucCode),flush=True) nhd_streams = gpd.read_file(nhd_streams_filename, mask = wbd_buffer) + if extent == 'MS': nhd_streams = nhd_streams.loc[nhd_streams.mainstem==1] @@ -120,6 +145,9 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l parser.add_argument('-b','--subset-nwm-streams',help='NWM streams subset',required=True) parser.add_argument('-x','--subset-landsea',help='LandSea subset',required=True) parser.add_argument('-extent','--extent',help='FIM extent',required=True) + parser.add_argument('-gl','--great-lakes-filename',help='Great Lakes layer',required=True) + parser.add_argument('-wb','--wbd-buffer-distance',help='WBD Mask buffer distance',required=True,type=int) + parser.add_argument('-lb','--lake-buffer-distance',help='Great Lakes Mask buffer distance',required=True,type=int) parser.add_argument('-o','--dissolve-links',help='remove multi-line strings',action="store_true",default=False) @@ -143,6 +171,9 @@ def subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_l subset_nwm_streams_filename = args['subset_nwm_streams'] subset_landsea_filename = args['subset_landsea'] extent = args['extent'] + great_lakes_filename = args['great_lakes_filename'] + wbd_buffer_distance = args['wbd_buffer_distance'] + lake_buffer_distance = args['lake_buffer_distance'] dissolveLinks = args['dissolve_links'] - subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,dissolveLinks) + subset_vector_layers(hucCode,nwm_streams_filename,nhd_streams_filename,nwm_lakes_filename,nld_lines_filename,nwm_catchments_filename,nhd_headwaters_filename,landsea_filename,wbd_filename,wbd_buffer_filename,subset_nhd_streams_filename,subset_nld_lines_filename,subset_nwm_lakes_filename,subset_nwm_catchments_filename,subset_nhd_headwaters_filename,subset_nwm_streams_filename,subset_landsea_filename,extent,great_lakes_filename,wbd_buffer_distance,lake_buffer_distance,dissolveLinks) diff --git a/src/run_by_unit.sh b/src/run_by_unit.sh index 0c5e65cf5..01cc2c98a 100755 --- a/src/run_by_unit.sh +++ b/src/run_by_unit.sh @@ -37,7 +37,7 @@ input_NLD=$inputDataDir/nld_vectors/huc2_levee_lines/nld_preprocessed_"$huc2Iden # Define the landsea water body mask using either Great Lakes or Ocean polygon input # if [[ $huc2Identifier == "04" ]] ; then - input_LANDSEA=$inputDataDir/landsea/gl_water_polygons.gpkg + input_LANDSEA=$input_GL_boundaries echo -e "Using $input_LANDSEA for water body mask (Great Lakes)" else input_LANDSEA=$inputDataDir/landsea/water_polygons_us.gpkg @@ -51,20 +51,12 @@ Tstart ogr2ogr -f GPKG $outputHucDataDir/wbd.gpkg $input_WBD_gdb $input_NHD_WBHD_layer -where "HUC$hucUnitLength='$hucNumber'" Tcount -## BUFFER WBD ## -echo -e $startDiv"Buffer WBD $hucNumber"$stopDiv -date -u -Tstart -[ ! -f $outputHucDataDir/wbd_buffered.gpkg ] && \ -ogr2ogr -f GPKG -dialect sqlite -sql "select ST_buffer(geom, $wbd_buffer) from 'WBDHU$hucUnitLength'" $outputHucDataDir/wbd_buffered.gpkg $outputHucDataDir/wbd.gpkg -Tcount - ## Subset Vector Layers ## echo -e $startDiv"Get Vector Layers and Subset $hucNumber"$stopDiv date -u Tstart [ ! -f $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg ] && \ -$srcDir/clip_vectors_to_wbd.py -d $hucNumber -w $input_nwm_flows -s $input_nhd_flowlines -l $input_nwm_lakes -r $input_NLD -g $outputHucDataDir/wbd.gpkg -f $outputHucDataDir/wbd_buffered.gpkg -m $input_nwm_catchments -y $input_nhd_headwaters -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -x $outputHucDataDir/LandSea_subset.gpkg -extent $extent +$srcDir/clip_vectors_to_wbd.py -d $hucNumber -w $input_nwm_flows -s $input_nhd_flowlines -l $input_nwm_lakes -r $input_NLD -g $outputHucDataDir/wbd.gpkg -f $outputHucDataDir/wbd_buffered.gpkg -m $input_nwm_catchments -y $input_nhd_headwaters -v $input_LANDSEA -c $outputHucDataDir/NHDPlusBurnLineEvent_subset.gpkg -z $outputHucDataDir/nld_subset_levees.gpkg -a $outputHucDataDir/nwm_lakes_proj_subset.gpkg -n $outputHucDataDir/nwm_catchments_proj_subset.gpkg -e $outputHucDataDir/nhd_headwater_points_subset.gpkg -b $outputHucDataDir/nwm_subset_streams.gpkg -x $outputHucDataDir/LandSea_subset.gpkg -extent $extent -gl $input_GL_boundaries -lb $lakes_buffer_dist_meters -wb $wbd_buffer Tcount if [ "$extent" = "MS" ]; then From b5da4b4869ccff2d87b2326c29d378990daf1c3e Mon Sep 17 00:00:00 2001 From: NickChadwick-NOAA Date: Fri, 7 May 2021 14:45:39 -0500 Subject: [PATCH 074/359] Added fuctionality for new "Release" feature in FIM API New "Release" feature added to the FIM API. This feature will allow for automated FIM, CatFIM, and relevant metrics to be generated when a new FIM Version is released. See #373 for more detailed steps that take place in this feature. Additions - Added new window to the UI in api/frontend/gui/templates/index.html. - Added new job type to api/node/connector/connector.py to allow these release jobs to run. - Added additional logic in api/node/updater/updater.py to run the new eval and CatFIM scripts used in the release feature. Changes - Updated api/frontend/output_handler/output_handler.py to allow for copying more broad ranges of file paths instead of only the /data/outputs directory. Resolves Issues #264 #278 #307 and #373 --- CHANGELOG.md | 13 + api/frontend/gui/templates/index.html | 158 +++++++-- api/frontend/output_handler/output_handler.py | 6 +- api/node/connector/connector.py | 157 +++++---- api/node/updater/updater.py | 306 ++++++++++++++---- 5 files changed, 486 insertions(+), 154 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e24396a88..fd4928cf9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +

+## v3.0.16.0 - 2021-05-07 - [PR #378](https://github.com/NOAA-OWP/cahaba/pull/378) + +New "Release" feature added to the FIM API. This feature will allow for automated FIM, CatFIM, and relevant metrics to be generated when a new FIM Version is released. See [#373](https://github.com/NOAA-OWP/cahaba/issues/373) for more detailed steps that take place in this feature. + +## Additions +- Added new window to the UI in `api/frontend/gui/templates/index.html`. +- Added new job type to `api/node/connector/connector.py` to allow these release jobs to run. +- Added additional logic in `api/node/updater/updater.py` to run the new eval and CatFIM scripts used in the release feature. + +## Changes +- Updated `api/frontend/output_handler/output_handler.py` to allow for copying more broad ranges of file paths instead of only the `/data/outputs` directory. +

## v3.0.15.10 - 2021-05-06 - [PR #375](https://github.com/NOAA-OWP/cahaba/pull/375) diff --git a/api/frontend/gui/templates/index.html b/api/frontend/gui/templates/index.html index 4dd2c2fc3..421050dbd 100644 --- a/api/frontend/gui/templates/index.html +++ b/api/frontend/gui/templates/index.html @@ -59,6 +59,7 @@ color: black; background-color: white; + font-size: 0.9rem; transition: all 0.1s linear; @@ -137,6 +138,27 @@ background-color: #093568; } + #release-button { + display: flex; + width: 100%; + height: 100%; + + align-items: center; + justify-content: center; + + font-size: 1.2rem; + color: white; + background-color: #28be99; + cursor: pointer; + + transition: all 0.5s linear; + pointer-events: inherit; + } + + #release-button:hover, #release-button.active { + background-color: #09685b; + } + .form-field { display: flex; width: 0; @@ -224,15 +246,40 @@
+
Release
FIM Run
Calibration
Pre-processing
+

Basic

- + @@ -271,7 +318,7 @@

Extent

-
Not Connected
+
Not Connected