merge with dev

NOAA-OWP · May 17, 2024 · 6fc6d6b · 6fc6d6b
2 parents 75edd96 + 39504f3
commit 6fc6d6b
Show file tree

Hide file tree

Showing 12 changed files with 538 additions and 109 deletions.
diff --git a/config/params_template.env b/config/params_template.env
@@ -31,6 +31,10 @@ export branch_zero_id="0"
 export mask_leveed_area_toggle=True # Toggle to mask levee-protected areas from DEM
 export levee_id_attribute=SYSTEM_ID
 
+#### Healed HAND ####
+# Removes Hydro-conditioning Artifacts (true=on; false=off)
+export healed_hand_hydrocondition=true
+
 #### apply bathymetry adjustment to rating curve ####
 export bathymetry_adjust=True
 

diff --git a/data/update_benchmark_flows.py b/data/update_benchmark_flows.py
@@ -0,0 +1,200 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import shutil
+
+import geopandas as gpd
+import pandas as pd
+
+
+gpd.options.io_engine = "pyogrio"
+
+
+def update_benchmark_flows(fim_dir: str, output_dir_base: str, verbose: bool = False):
+    """
+    Update benchmark flows of the levelpath in the domain for stream segments missing from the flow file
+
+    Parameters
+    ----------
+    fim_dir : str
+        Location of FIM files (e.g., "/outputs/dev-4.4.11.0")
+    output_dir_base : str
+        Output directory (e.g., "/outputs/temp"). If None, the output files will be saved in the input directory (the original files will be saved with a ".bak" extension appended to the original filename).
+    """
+
+    def iterate_over_sites(
+        levelpaths: gpd.GeoDataFrame, levelpaths_domain: gpd.GeoDataFrame, flows: pd.DataFrame
+    ) -> pd.DataFrame:
+        """
+        Update benchmark flows of stream segments missing from the flow file
+
+        Parameters
+        ----------
+        levelpaths : gpd.GeoDataFrame
+            Level paths of the HUC
+        levelpaths_domain : gpd.GeoDataFrame
+            Levelpaths in the domain
+        flows : pd.DataFrame
+            Benchmark flows
+
+        Returns
+        -------
+        pd.DataFrame
+            Flows with updated benchmark flows
+        """
+
+        if levelpaths_domain.empty:
+            return flows
+
+        # Find the levelpaths with the highest order (to exclude tributaries)
+        levelpaths_domain = levelpaths_domain[
+            levelpaths_domain['order_'] == levelpaths_domain['order_'].max()
+        ]
+
+        # Find the levelpath that has flows in the flow file
+        if levelpaths_domain['levpa_id'].nunique() > 1:
+            # If there are multiple levelpaths with the highest order, take the longest one (intersect levelpaths_domain with the domain to get the length of the levelpath)
+            levelpaths_domain_intersect = gpd.overlay(levelpaths, domain, how='intersection')
+            levelpaths_domain_intersect['length'] = levelpaths_domain_intersect['geometry'].length
+
+            # Get the total length of all the segments in the levelpath
+            levelpaths_domain_intersect_length = (
+                levelpaths_domain_intersect.groupby('levpa_id').agg({'length': 'sum'}).reset_index()
+            )
+
+            # Get the longest levelpath
+            levelpath = levelpaths_domain_intersect_length.loc[
+                levelpaths_domain_intersect_length['length'].idxmax(), 'levpa_id'
+            ]
+
+        else:
+            levelpath = levelpaths_domain['levpa_id'].iloc[0]
+
+        # Get IDs of all features in the levelpath
+        IDs_to_keep = levelpaths[levelpaths['levpa_id'] == levelpath]['ID']
+
+        # Get IDs of all features in the levelpath in the domain
+        IDs = levelpaths_domain.loc[levelpaths_domain['levpa_id'] == levelpath, 'ID']
+
+        # Keep the flows that are in the levelpath (remove tributaries)
+        flows = flows.iloc[flows.loc[flows['feature_id'].isin(IDs_to_keep)].index]
+
+        if flows.empty:
+            return flows
+
+        # Find IDs not in the flow file
+        add_IDs = ~IDs.isin(flows['feature_id'])
+
+        # Exit if no IDs to add
+        if not any(add_IDs):
+            return flows
+
+        IDs_to_add = levelpaths_domain.loc[add_IDs.index, 'ID']
+
+        # Add the missing IDs with flows to the flow file
+        if not IDs_to_add.empty:
+            flows_out = pd.concat(
+                [flows, pd.DataFrame({'feature_id': IDs_to_add, 'discharge': flows['discharge'].iloc[0]})],
+                ignore_index=True,
+            )
+        else:
+            flows_out = flows
+
+        return flows_out
+
+    for org in ['nws', 'usgs']:
+        if verbose:
+            print('Processing', org)
+
+        count_total = 0
+        count_updated = 0
+
+        base_dir = f'/data/test_cases/{org}_test_cases/validation_data_{org}'
+
+        huc8s = next(os.walk(base_dir))[1]
+
+        for huc8 in huc8s:
+            if verbose:
+                print(f'\t{huc8}')
+
+            lids = next(os.walk(f'{base_dir}/{huc8}'))[1]
+
+            for lid in lids:
+                if verbose:
+                    print(f'\t\t{lid}')
+
+                # Read the input files
+                levelpath_file = f'{fim_dir}/{huc8}/nwm_subset_streams_levelPaths.gpkg'
+                if not os.path.exists(levelpath_file):
+                    continue
+
+                levelpaths = gpd.read_file(levelpath_file)
+
+                validation_path = f'{base_dir}/{huc8}/{lid}'
+
+                domain_file = f'{validation_path}/{lid}_domain.shp'
+                if not os.path.exists(domain_file):
+                    continue
+
+                domain = gpd.read_file(domain_file)
+
+                # Intersect levelpaths with the domain to get feature_ids
+                levelpaths_domain = gpd.sjoin(levelpaths, domain, how="inner", predicate="intersects")
+
+                magnitudes = next(os.walk(validation_path))[1]
+
+                for magnitude in magnitudes:
+                    input_dir = f'{validation_path}/{magnitude}'
+
+                    if output_dir_base is None:
+                        output_dir = input_dir
+                    else:
+                        output_dir = os.path.join(output_dir_base, org)
+
+                        if not os.path.exists(output_dir):
+                            os.makedirs(output_dir)
+
+                    flow_file_in = f'{input_dir}/ahps_{lid}_huc_{huc8}_flows_{magnitude}.csv'
+                    flow_file_out = f'{output_dir}/ahps_{lid}_huc_{huc8}_flows_{magnitude}.csv'
+
+                    # Skip if flow file doesn't exist
+                    if not os.path.exists(flow_file_in):
+                        continue
+
+                    # Backup original flow file
+                    backup_flow_file = flow_file_out + '.bak'
+                    if not os.path.exists(backup_flow_file):
+                        shutil.copy2(flow_file_in, backup_flow_file)
+
+                    flows = pd.read_csv(backup_flow_file)
+
+                    flows_new = iterate_over_sites(levelpaths, levelpaths_domain, flows)
+
+                    if not flows_new.equals(flows):
+                        count_updated += 1
+                        count_total += 1
+                    else:
+                        count_total += 1
+
+                    flows_new.to_csv(flow_file_out, index=False)
+
+        print(f'\tUpdated {count_updated} out of {count_total} flow files for {org}')
+
+
+if __name__ == "__main__":
+    example_text = '''example:
+
+  %(prog)s -f /outputs/dev-4.4.11.0 -o /outputs/temp
+  \n'''
+
+    parser = argparse.ArgumentParser(
+        epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    parser.add_argument('-f', '--fim_dir', help='Location of FIM files', type=str, required=True)
+    parser.add_argument('-o', '--output_dir-base', help='Output directory', type=str, default=None)
+    parser.add_argument('-v', '--verbose', help='Verbose output', action='store_true')
+
+    args = vars(parser.parse_args())
+
+    update_benchmark_flows(**args)
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -1,6 +1,80 @@
 All notable changes to this project will be documented in this file.
 We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
 
+## v4.5.1.3 - 2024-05-17 - [PR#1170](https://github.com/NOAA-OWP/inundation-mapping/pull/1170)
+
+This hotfix addresses the issue #1162 by explicitly using 'fiona' engine for reading gpkg files with Boolean dtype. This is applicable only for `usgs_gages.gpkg` and `usgs_subset_gages.gpkg` files. 
+
+
+### Changes
+- `src/usgs_gage_unit_setup.py`  ... changed only two lines for fiona engine
+- `src/usgs_gage_crosswalk.py` ...  changed only one line for fiona engine + two small changes to use `self.branch_id` for the correct log report
+- `tools/rating_curve_comparison.py`...  changed only one line for fiona engine
+
+<br/><br/>
+
+
+## v4.5.1.2 - 2024-05-17 - [PR#1135](https://github.com/NOAA-OWP/inundation-mapping/pull/1135)
+
+Updates USGS gage processing to use the correct projection (determined by whether the HUC is in Alaska or not).
+
+### Changes
+- `src/run_by_branch.sh`: Added `huc_CRS` as an input argument for `usgs_gage_crosswalk.py`
+- `src/run_unit_wb.sh`: Added `huc_CRS` as an input argument for `usgs_gage_unit_setup.py` and `usgs_gage_crosswalk.py`
+- `src/usgs_gage_crosswalk.py`: Added `huc_CRS` as an input argument for the `run_crosswalk()` function and added re-projection steps wherever new data is being read in so that the files are able to be properly merged.
+- `src/usgs_gage_unit_setup.py`: Added `huc_CRS` as an input argument for the `Gage2Branch()` crosswalking class.
+
+<br/><br/>
+
+## v4.5.1.1 - 2024-05-17 - [PR#1094](https://github.com/NOAA-OWP/inundation-mapping/pull/1094)
+
+Extends flows (i.e., discharge) to stream segments missing from NWS and USGS validation flow files. The levelpath associated with existing flows in the AHPS domain is identified, and any stream segments of the levelpath in the domain missing from the flow file are added to the flow file by assigning the existing flow (this is a constant value regardless of other tributaries including other levelpaths in the domain). Stream segments not on the levelpath are dropped from the flow file, including tributary flows. The original flow file is saved along with the output with an appended `.bak`.
+
+### Additions
+
+- `data/extend_benchmark_flows.py`: Adds missing flows to NWS or USGS benchmark flow files and removes flows from tributaries. The original flow file is saved with an appended `.bak`.
+
+### Changes
+
+- `tools/tools_shared_variables.py`: Removed corrected flow files from `BAD_SITES` list.
+
+<br/><br/>
+
+## v4.5.1.0 - 2024-05-17 - [PR#1150](https://github.com/NOAA-OWP/inundation-mapping/pull/1150)
+
+This focuses on removing hydro-conditioning artifacts by subtracting the thalweg DEM from HAND REM and adding back the original DEM. Also, a new tool was created to test this feature over multiple HUCs
+
+### Additions
+- `tools/analyze_for_missing_FIM_cells.py`: A new script `analyze_for_missing_FIM_cells.py` was added to test and analyze healed HAND for hydro-conditioning artifacts FIM. 
+
+### Changes
+- `src/delineate_hydros_and_produce_HAND.sh`: Removing hydro-conditioning artifacts from HAND REM.
+- `config/params_template.env`: Creating an option to include/exclude healed HAND from FIM pipeline.
+
+
+<br/><br/>
+
+## v4.5.0.2 - 2024-05-17 - [PR#1159](https://github.com/NOAA-OWP/inundation-mapping/pull/1159)
+
+This PR addresses issue #1132 and include the following changes on `tools/generate_nws_lid.py` for updating `nws_lid.gpkg` dataset.
+
+In this revised version, stations only from these two groups are retrieved:
+- lid stations with `rfc_forecast_point= True` 
+- lid stations in `/data/inputs/ahp_sites/evaluated_ahps_sites.csv`
+
+The lid stations in AK (Alaska), HI, and PR, with above two criteria have also been selected, as shown in the map below. In the previous version of the code, **all of lid stations** in PR and HI (regardless of meeting above two criteria), were also being retrieved. I have updated this version to exclude such stations. 
+
+Also, In this revised version, I've eliminated the code sections that previously generated the "is_headwater" and "is_colocated" columns, which are not needed in FIM4. Therefore, in this updated version, these columns are no longer present. 
+
+Similar to 'usgs_gages.gpkg' dataset, all lid stations, including those in Alaska, are stored in a single gpkg file (`nws_lid.gpkg`) with EPSG=5070. The Alaska stations can be identified using their HUC8 numbers (beginning with '19'). 
+
+
+### Changes
+- tools/generate_nws_lid.py
+
+<br/><br/>
+
+
 ## v4.5.0.1 - 2024-05-09 - [PR#1150](https://github.com/NOAA-OWP/inundation-mapping/pull/1150)
 
 Fixes two bugs discovered in v4.5.0.0:

diff --git a/src/delineate_hydros_and_produce_HAND.sh b/src/delineate_hydros_and_produce_HAND.sh
@@ -227,6 +227,7 @@ $taudemDir/catchhydrogeo -hand $tempCurrentBranchDataDir/rem_zeroed_masked_$curr
     -h $tempCurrentBranchDataDir/stage_$current_branch_id.txt \
     -table $tempCurrentBranchDataDir/src_base_$current_branch_id.csv
 
+
 ## FINALIZE CATCHMENTS AND MODEL STREAMS ##
 echo -e $startDiv"Finalize catchments and model streams $hucNumber $current_branch_id"
 python3 $srcDir/add_crosswalk.py \
@@ -248,6 +249,17 @@ python3 $srcDir/add_crosswalk.py \
     -e $min_catchment_area \
     -g $min_stream_length
 
+## HEAL HAND -- REMOVES HYDROCONDITIONING ARTIFACTS ##
+if [ "$healed_hand_hydrocondition" = true ]; then
+    echo -e $startDiv"Healed HAND to Remove Hydro-conditioning Artifacts $hucNumber $current_branch_id"
+    gdal_calc.py --quiet --type=Float32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" \
+        -R $tempCurrentBranchDataDir/rem_zeroed_masked_$current_branch_id.tif \
+        -D $tempCurrentBranchDataDir/dem_meters_$current_branch_id.tif \
+        -T $tempCurrentBranchDataDir/dem_thalwegCond_$current_branch_id.tif \
+        --calc="R+(D-T)" --NoDataValue=$ndv \
+        --outfile=$tempCurrentBranchDataDir/"rem_zeroed_masked_$current_branch_id.tif"
+fi
+
 ## HEAL HAND BRIDGES ##
 if  [ -f $tempHucDataDir/osm_bridges_subset.gpkg ]; then
     echo -e $startDiv"Burn in bridges $hucNumber $current_branch_id"

diff --git a/src/run_by_branch.sh b/src/run_by_branch.sh
@@ -116,7 +116,8 @@ if [ -f $tempHucDataDir/usgs_subset_gages.gpkg ]; then
         -dem $tempCurrentBranchDataDir/dem_meters_$current_branch_id.tif \
         -dem_adj $tempCurrentBranchDataDir/dem_thalwegCond_$current_branch_id.tif \
         -out $tempCurrentBranchDataDir \
-        -b $current_branch_id
+        -b $current_branch_id \
+        -huc_CRS $huc_CRS
 fi
 
 ## REMOVE FILES FROM DENY LIST ##

diff --git a/src/run_unit_wb.sh b/src/run_unit_wb.sh
@@ -250,9 +250,11 @@ if [ -f $tempHucDataDir/nwm_subset_streams_levelPaths.gpkg ]; then
         -o $tempHucDataDir/usgs_subset_gages.gpkg \
         -huc $hucNumber \
         -ahps $tempHucDataDir/nws_lid.gpkg \
-        -bzero_id $branch_zero_id
+        -bzero_id $branch_zero_id \
+        -huc_CRS $huc_CRS
 fi
 
+
 ## USGS CROSSWALK ##
 if [ -f $tempHucDataDir/usgs_subset_gages_$branch_zero_id.gpkg ]; then
     echo -e $startDiv"USGS Crosswalk $hucNumber $branch_zero_id"
@@ -262,7 +264,8 @@ if [ -f $tempHucDataDir/usgs_subset_gages_$branch_zero_id.gpkg ]; then
         -cat $tempCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked_$branch_zero_id.gpkg \
         -dem $tempCurrentBranchDataDir/dem_meters_$branch_zero_id.tif \
         -dem_adj $tempCurrentBranchDataDir/dem_thalwegCond_$branch_zero_id.tif \
-        -out $tempCurrentBranchDataDir -b $branch_zero_id
+        -out $tempCurrentBranchDataDir -b $branch_zero_id \
+        -huc_CRS $huc_CRS
 fi
 
 ## CLEANUP BRANCH ZERO OUTPUTS ##