Skip to content

Commit

Permalink
merge with dev
Browse files Browse the repository at this point in the history
  • Loading branch information
CarsonPruitt-NOAA committed May 17, 2024
2 parents 75edd96 + 39504f3 commit 6fc6d6b
Show file tree
Hide file tree
Showing 12 changed files with 538 additions and 109 deletions.
4 changes: 4 additions & 0 deletions config/params_template.env
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ export branch_zero_id="0"
export mask_leveed_area_toggle=True # Toggle to mask levee-protected areas from DEM
export levee_id_attribute=SYSTEM_ID

#### Healed HAND ####
# Removes Hydro-conditioning Artifacts (true=on; false=off)
export healed_hand_hydrocondition=true

#### apply bathymetry adjustment to rating curve ####
export bathymetry_adjust=True

Expand Down
200 changes: 200 additions & 0 deletions data/update_benchmark_flows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
#!/usr/bin/env python3

import argparse
import os
import shutil

import geopandas as gpd
import pandas as pd


gpd.options.io_engine = "pyogrio"


def update_benchmark_flows(fim_dir: str, output_dir_base: str, verbose: bool = False):
"""
Update benchmark flows of the levelpath in the domain for stream segments missing from the flow file
Parameters
----------
fim_dir : str
Location of FIM files (e.g., "/outputs/dev-4.4.11.0")
output_dir_base : str
Output directory (e.g., "/outputs/temp"). If None, the output files will be saved in the input directory (the original files will be saved with a ".bak" extension appended to the original filename).
"""

def iterate_over_sites(
levelpaths: gpd.GeoDataFrame, levelpaths_domain: gpd.GeoDataFrame, flows: pd.DataFrame
) -> pd.DataFrame:
"""
Update benchmark flows of stream segments missing from the flow file
Parameters
----------
levelpaths : gpd.GeoDataFrame
Level paths of the HUC
levelpaths_domain : gpd.GeoDataFrame
Levelpaths in the domain
flows : pd.DataFrame
Benchmark flows
Returns
-------
pd.DataFrame
Flows with updated benchmark flows
"""

if levelpaths_domain.empty:
return flows

# Find the levelpaths with the highest order (to exclude tributaries)
levelpaths_domain = levelpaths_domain[
levelpaths_domain['order_'] == levelpaths_domain['order_'].max()
]

# Find the levelpath that has flows in the flow file
if levelpaths_domain['levpa_id'].nunique() > 1:
# If there are multiple levelpaths with the highest order, take the longest one (intersect levelpaths_domain with the domain to get the length of the levelpath)
levelpaths_domain_intersect = gpd.overlay(levelpaths, domain, how='intersection')
levelpaths_domain_intersect['length'] = levelpaths_domain_intersect['geometry'].length

# Get the total length of all the segments in the levelpath
levelpaths_domain_intersect_length = (
levelpaths_domain_intersect.groupby('levpa_id').agg({'length': 'sum'}).reset_index()
)

# Get the longest levelpath
levelpath = levelpaths_domain_intersect_length.loc[
levelpaths_domain_intersect_length['length'].idxmax(), 'levpa_id'
]

else:
levelpath = levelpaths_domain['levpa_id'].iloc[0]

# Get IDs of all features in the levelpath
IDs_to_keep = levelpaths[levelpaths['levpa_id'] == levelpath]['ID']

# Get IDs of all features in the levelpath in the domain
IDs = levelpaths_domain.loc[levelpaths_domain['levpa_id'] == levelpath, 'ID']

# Keep the flows that are in the levelpath (remove tributaries)
flows = flows.iloc[flows.loc[flows['feature_id'].isin(IDs_to_keep)].index]

if flows.empty:
return flows

# Find IDs not in the flow file
add_IDs = ~IDs.isin(flows['feature_id'])

# Exit if no IDs to add
if not any(add_IDs):
return flows

IDs_to_add = levelpaths_domain.loc[add_IDs.index, 'ID']

# Add the missing IDs with flows to the flow file
if not IDs_to_add.empty:
flows_out = pd.concat(
[flows, pd.DataFrame({'feature_id': IDs_to_add, 'discharge': flows['discharge'].iloc[0]})],
ignore_index=True,
)
else:
flows_out = flows

return flows_out

for org in ['nws', 'usgs']:
if verbose:
print('Processing', org)

count_total = 0
count_updated = 0

base_dir = f'/data/test_cases/{org}_test_cases/validation_data_{org}'

huc8s = next(os.walk(base_dir))[1]

for huc8 in huc8s:
if verbose:
print(f'\t{huc8}')

lids = next(os.walk(f'{base_dir}/{huc8}'))[1]

for lid in lids:
if verbose:
print(f'\t\t{lid}')

# Read the input files
levelpath_file = f'{fim_dir}/{huc8}/nwm_subset_streams_levelPaths.gpkg'
if not os.path.exists(levelpath_file):
continue

levelpaths = gpd.read_file(levelpath_file)

validation_path = f'{base_dir}/{huc8}/{lid}'

domain_file = f'{validation_path}/{lid}_domain.shp'
if not os.path.exists(domain_file):
continue

domain = gpd.read_file(domain_file)

# Intersect levelpaths with the domain to get feature_ids
levelpaths_domain = gpd.sjoin(levelpaths, domain, how="inner", predicate="intersects")

magnitudes = next(os.walk(validation_path))[1]

for magnitude in magnitudes:
input_dir = f'{validation_path}/{magnitude}'

if output_dir_base is None:
output_dir = input_dir
else:
output_dir = os.path.join(output_dir_base, org)

if not os.path.exists(output_dir):
os.makedirs(output_dir)

flow_file_in = f'{input_dir}/ahps_{lid}_huc_{huc8}_flows_{magnitude}.csv'
flow_file_out = f'{output_dir}/ahps_{lid}_huc_{huc8}_flows_{magnitude}.csv'

# Skip if flow file doesn't exist
if not os.path.exists(flow_file_in):
continue

# Backup original flow file
backup_flow_file = flow_file_out + '.bak'
if not os.path.exists(backup_flow_file):
shutil.copy2(flow_file_in, backup_flow_file)

flows = pd.read_csv(backup_flow_file)

flows_new = iterate_over_sites(levelpaths, levelpaths_domain, flows)

if not flows_new.equals(flows):
count_updated += 1
count_total += 1
else:
count_total += 1

flows_new.to_csv(flow_file_out, index=False)

print(f'\tUpdated {count_updated} out of {count_total} flow files for {org}')


if __name__ == "__main__":
example_text = '''example:
%(prog)s -f /outputs/dev-4.4.11.0 -o /outputs/temp
\n'''

parser = argparse.ArgumentParser(
epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument('-f', '--fim_dir', help='Location of FIM files', type=str, required=True)
parser.add_argument('-o', '--output_dir-base', help='Output directory', type=str, default=None)
parser.add_argument('-v', '--verbose', help='Verbose output', action='store_true')

args = vars(parser.parse_args())

update_benchmark_flows(**args)
74 changes: 74 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,80 @@
All notable changes to this project will be documented in this file.
We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

## v4.5.1.3 - 2024-05-17 - [PR#1170](https://github.com/NOAA-OWP/inundation-mapping/pull/1170)

This hotfix addresses the issue #1162 by explicitly using 'fiona' engine for reading gpkg files with Boolean dtype. This is applicable only for `usgs_gages.gpkg` and `usgs_subset_gages.gpkg` files.


### Changes
- `src/usgs_gage_unit_setup.py` ... changed only two lines for fiona engine
- `src/usgs_gage_crosswalk.py` ... changed only one line for fiona engine + two small changes to use `self.branch_id` for the correct log report
- `tools/rating_curve_comparison.py`... changed only one line for fiona engine

<br/><br/>


## v4.5.1.2 - 2024-05-17 - [PR#1135](https://github.com/NOAA-OWP/inundation-mapping/pull/1135)

Updates USGS gage processing to use the correct projection (determined by whether the HUC is in Alaska or not).

### Changes
- `src/run_by_branch.sh`: Added `huc_CRS` as an input argument for `usgs_gage_crosswalk.py`
- `src/run_unit_wb.sh`: Added `huc_CRS` as an input argument for `usgs_gage_unit_setup.py` and `usgs_gage_crosswalk.py`
- `src/usgs_gage_crosswalk.py`: Added `huc_CRS` as an input argument for the `run_crosswalk()` function and added re-projection steps wherever new data is being read in so that the files are able to be properly merged.
- `src/usgs_gage_unit_setup.py`: Added `huc_CRS` as an input argument for the `Gage2Branch()` crosswalking class.

<br/><br/>

## v4.5.1.1 - 2024-05-17 - [PR#1094](https://github.com/NOAA-OWP/inundation-mapping/pull/1094)

Extends flows (i.e., discharge) to stream segments missing from NWS and USGS validation flow files. The levelpath associated with existing flows in the AHPS domain is identified, and any stream segments of the levelpath in the domain missing from the flow file are added to the flow file by assigning the existing flow (this is a constant value regardless of other tributaries including other levelpaths in the domain). Stream segments not on the levelpath are dropped from the flow file, including tributary flows. The original flow file is saved along with the output with an appended `.bak`.

### Additions

- `data/extend_benchmark_flows.py`: Adds missing flows to NWS or USGS benchmark flow files and removes flows from tributaries. The original flow file is saved with an appended `.bak`.

### Changes

- `tools/tools_shared_variables.py`: Removed corrected flow files from `BAD_SITES` list.

<br/><br/>

## v4.5.1.0 - 2024-05-17 - [PR#1150](https://github.com/NOAA-OWP/inundation-mapping/pull/1150)

This focuses on removing hydro-conditioning artifacts by subtracting the thalweg DEM from HAND REM and adding back the original DEM. Also, a new tool was created to test this feature over multiple HUCs

### Additions
- `tools/analyze_for_missing_FIM_cells.py`: A new script `analyze_for_missing_FIM_cells.py` was added to test and analyze healed HAND for hydro-conditioning artifacts FIM.

### Changes
- `src/delineate_hydros_and_produce_HAND.sh`: Removing hydro-conditioning artifacts from HAND REM.
- `config/params_template.env`: Creating an option to include/exclude healed HAND from FIM pipeline.


<br/><br/>

## v4.5.0.2 - 2024-05-17 - [PR#1159](https://github.com/NOAA-OWP/inundation-mapping/pull/1159)

This PR addresses issue #1132 and include the following changes on `tools/generate_nws_lid.py` for updating `nws_lid.gpkg` dataset.

In this revised version, stations only from these two groups are retrieved:
- lid stations with `rfc_forecast_point= True`
- lid stations in `/data/inputs/ahp_sites/evaluated_ahps_sites.csv`

The lid stations in AK (Alaska), HI, and PR, with above two criteria have also been selected, as shown in the map below. In the previous version of the code, **all of lid stations** in PR and HI (regardless of meeting above two criteria), were also being retrieved. I have updated this version to exclude such stations.

Also, In this revised version, I've eliminated the code sections that previously generated the "is_headwater" and "is_colocated" columns, which are not needed in FIM4. Therefore, in this updated version, these columns are no longer present.

Similar to 'usgs_gages.gpkg' dataset, all lid stations, including those in Alaska, are stored in a single gpkg file (`nws_lid.gpkg`) with EPSG=5070. The Alaska stations can be identified using their HUC8 numbers (beginning with '19').


### Changes
- tools/generate_nws_lid.py

<br/><br/>


## v4.5.0.1 - 2024-05-09 - [PR#1150](https://github.com/NOAA-OWP/inundation-mapping/pull/1150)

Fixes two bugs discovered in v4.5.0.0:
Expand Down
12 changes: 12 additions & 0 deletions src/delineate_hydros_and_produce_HAND.sh
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ $taudemDir/catchhydrogeo -hand $tempCurrentBranchDataDir/rem_zeroed_masked_$curr
-h $tempCurrentBranchDataDir/stage_$current_branch_id.txt \
-table $tempCurrentBranchDataDir/src_base_$current_branch_id.csv


## FINALIZE CATCHMENTS AND MODEL STREAMS ##
echo -e $startDiv"Finalize catchments and model streams $hucNumber $current_branch_id"
python3 $srcDir/add_crosswalk.py \
Expand All @@ -248,6 +249,17 @@ python3 $srcDir/add_crosswalk.py \
-e $min_catchment_area \
-g $min_stream_length

## HEAL HAND -- REMOVES HYDROCONDITIONING ARTIFACTS ##
if [ "$healed_hand_hydrocondition" = true ]; then
echo -e $startDiv"Healed HAND to Remove Hydro-conditioning Artifacts $hucNumber $current_branch_id"
gdal_calc.py --quiet --type=Float32 --overwrite --co "COMPRESS=LZW" --co "BIGTIFF=YES" --co "TILED=YES" \
-R $tempCurrentBranchDataDir/rem_zeroed_masked_$current_branch_id.tif \
-D $tempCurrentBranchDataDir/dem_meters_$current_branch_id.tif \
-T $tempCurrentBranchDataDir/dem_thalwegCond_$current_branch_id.tif \
--calc="R+(D-T)" --NoDataValue=$ndv \
--outfile=$tempCurrentBranchDataDir/"rem_zeroed_masked_$current_branch_id.tif"
fi

## HEAL HAND BRIDGES ##
if [ -f $tempHucDataDir/osm_bridges_subset.gpkg ]; then
echo -e $startDiv"Burn in bridges $hucNumber $current_branch_id"
Expand Down
3 changes: 2 additions & 1 deletion src/run_by_branch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ if [ -f $tempHucDataDir/usgs_subset_gages.gpkg ]; then
-dem $tempCurrentBranchDataDir/dem_meters_$current_branch_id.tif \
-dem_adj $tempCurrentBranchDataDir/dem_thalwegCond_$current_branch_id.tif \
-out $tempCurrentBranchDataDir \
-b $current_branch_id
-b $current_branch_id \
-huc_CRS $huc_CRS
fi

## REMOVE FILES FROM DENY LIST ##
Expand Down
7 changes: 5 additions & 2 deletions src/run_unit_wb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,11 @@ if [ -f $tempHucDataDir/nwm_subset_streams_levelPaths.gpkg ]; then
-o $tempHucDataDir/usgs_subset_gages.gpkg \
-huc $hucNumber \
-ahps $tempHucDataDir/nws_lid.gpkg \
-bzero_id $branch_zero_id
-bzero_id $branch_zero_id \
-huc_CRS $huc_CRS
fi


## USGS CROSSWALK ##
if [ -f $tempHucDataDir/usgs_subset_gages_$branch_zero_id.gpkg ]; then
echo -e $startDiv"USGS Crosswalk $hucNumber $branch_zero_id"
Expand All @@ -262,7 +264,8 @@ if [ -f $tempHucDataDir/usgs_subset_gages_$branch_zero_id.gpkg ]; then
-cat $tempCurrentBranchDataDir/gw_catchments_reaches_filtered_addedAttributes_crosswalked_$branch_zero_id.gpkg \
-dem $tempCurrentBranchDataDir/dem_meters_$branch_zero_id.tif \
-dem_adj $tempCurrentBranchDataDir/dem_thalwegCond_$branch_zero_id.tif \
-out $tempCurrentBranchDataDir -b $branch_zero_id
-out $tempCurrentBranchDataDir -b $branch_zero_id \
-huc_CRS $huc_CRS
fi

## CLEANUP BRANCH ZERO OUTPUTS ##
Expand Down
Loading

0 comments on commit 6fc6d6b

Please sign in to comment.