diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index f818d53d..a07eac6a 100755 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,6 +1,19 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v4.5.12.0 - 2024-11-01 - [PR#1327](https://github.com/NOAA-OWP/inundation-mapping/pull/1327) + +The purpose of this PR is to cut down the runtime for four Alaska HUCs (19020104, 19020503, 19020402 , and 19020602). It significantly optimizes runtime by replacing a nested for loop, used for updating rating curve for small segments, with a vectorized process. This changes were applied only to the Alaska HUCs. +As part of this PR, small modification was applied to bridge_inundation.py. + +### Changes + +- `src/add_crosswalk.py` +- `src/delineate_hydros_and_produce_HAND.sh` +- `tools/bridge_inundation.py` + +

+ ## v4.5.11.3 - 2024-10-25 - [PR#1320](https://github.com/NOAA-OWP/inundation-mapping/pull/1320) diff --git a/src/add_crosswalk.py b/src/add_crosswalk.py index 61622b2e..ba1f31ab 100755 --- a/src/add_crosswalk.py +++ b/src/add_crosswalk.py @@ -36,6 +36,7 @@ def add_crosswalk( small_segments_filename, min_catchment_area, min_stream_length, + huc_id, calibration_mode=False, ): input_catchments = gpd.read_file(input_catchments_fileName, engine="pyogrio", use_arrow=True) @@ -110,6 +111,8 @@ def add_crosswalk( output_flows = output_flows.merge(output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID') + output_flows = output_flows.drop_duplicates(subset='HydroID') + output_flows['ManningN'] = mannings_n if output_flows.NextDownID.dtype != 'int': @@ -281,16 +284,43 @@ def add_crosswalk( sml_segs.to_csv(small_segments_filename, index=False) print("Update rating curves for short reaches.") - for index, segment in sml_segs.iterrows(): - short_id = segment[0] - update_id = segment[1] - new_values = output_src.loc[output_src['HydroID'] == update_id][['Stage', 'Discharge (m3s-1)']] + if huc_id.startswith('19'): + print("Update rating curves for short reaches in Alaska.") + # Create a DataFrame with new values for discharge based on 'update_id' + new_values = output_src[output_src['HydroID'].isin(sml_segs['update_id'])][ + ['HydroID', 'Stage', 'Discharge (m3s-1)'] + ] - for src_index, src_stage in new_values.iterrows(): - output_src.loc[ - (output_src['HydroID'] == short_id) & (output_src['Stage'] == src_stage[0]), - ['Discharge (m3s-1)'], - ] = src_stage[1] + # Merge this new values DataFrame with sml_segs on 'update_id' and 'HydroID' + sml_segs_with_values = sml_segs.merge( + new_values, left_on='update_id', right_on='HydroID', suffixes=('', '_new') + ) + sml_segs_with_values = sml_segs_with_values[['short_id', 'Stage', 'Discharge (m3s-1)']] + merged_output_src = output_src.merge( + sml_segs_with_values[['short_id', 'Stage', 'Discharge (m3s-1)']], + left_on=['HydroID', 'Stage'], + right_on=['short_id', 'Stage'], + suffixes=('', '_df2'), + ) + merged_output_src = merged_output_src[['HydroID', 'Stage', 'Discharge (m3s-1)_df2']] + output_src = pd.merge(output_src, merged_output_src, on=['HydroID', 'Stage'], how='left') + output_src['Discharge (m3s-1)'] = output_src['Discharge (m3s-1)_df2'].fillna( + output_src['Discharge (m3s-1)'] + ) + output_src = output_src.drop(columns=['Discharge (m3s-1)_df2']) + else: + for index, segment in sml_segs.iterrows(): + short_id = segment[0] + update_id = segment[1] + new_values = output_src.loc[output_src['HydroID'] == update_id][ + ['Stage', 'Discharge (m3s-1)'] + ] + + for src_index, src_stage in new_values.iterrows(): + output_src.loc[ + (output_src['HydroID'] == short_id) & (output_src['Stage'] == src_stage[0]), + ['Discharge (m3s-1)'], + ] = src_stage[1] output_src = output_src.merge(crosswalk[['HydroID', 'feature_id']], on='HydroID') @@ -429,6 +459,7 @@ def add_crosswalk( help="Mannings n. Accepts single parameter set or list of parameter set in calibration mode. Currently input as csv.", required=True, ) + parser.add_argument("-u", "--huc-id", help="HUC ID", required=False) parser.add_argument("-z", "--input-nwmcat-fileName", help="NWM catchment polygon", required=True) parser.add_argument("-p", "--extent", help="GMS only for now", default="GMS", required=False) parser.add_argument( diff --git a/src/delineate_hydros_and_produce_HAND.sh b/src/delineate_hydros_and_produce_HAND.sh index 2c827cc5..2ebab872 100755 --- a/src/delineate_hydros_and_produce_HAND.sh +++ b/src/delineate_hydros_and_produce_HAND.sh @@ -242,6 +242,7 @@ python3 $srcDir/add_crosswalk.py \ -t $tempCurrentBranchDataDir/hydroTable_$current_branch_id.csv \ -w $tempHucDataDir/wbd8_clp.gpkg \ -b $b_arg \ + -u $hucNumber \ -y $tempCurrentBranchDataDir/nwm_catchments_proj_subset.tif \ -m $manning_n \ -z $z_arg \ diff --git a/tools/bridge_inundation.py b/tools/bridge_inundation.py index 6f923819..720aedbf 100644 --- a/tools/bridge_inundation.py +++ b/tools/bridge_inundation.py @@ -87,6 +87,11 @@ def bridge_risk_status( # Concatenate all GeoDataFrame into a single GeoDataFrame bridge_points = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True)) + if bridge_points.feature_id.dtype != 'int': + bridge_points.feature_id = bridge_points.feature_id.astype(int) + if flow_file_data.feature_id.dtype != 'int': + flow_file_data.feature_id = flow_file_data.feature_id.astype(int) + # Find the common feature_id between flow_file and bridge_points merged_bri = bridge_points.merge(flow_file_data, on='feature_id', how='inner') @@ -111,7 +116,7 @@ def risk_class(row): bridge_out = merged_bri.loc[merged_data_max] bridge_out.reset_index(drop=True, inplace=True) bridge_out.drop('risk', axis=1, inplace=True) - bridge_out.to_file(output_dir, driver='GPKG', layer='bridge_risk_status') + bridge_out.to_file(output_dir, index=False, driver="GPKG", engine='fiona') return bridge_out