Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[1pt] PR: Cut down Alaska HUCs runtime #1327

Merged
merged 9 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
All notable changes to this project will be documented in this file.
We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

## v4.5.x.x - 2024-10-23 - [PR#1327](https://github.com/NOAA-OWP/inundation-mapping/pull/1327)

The purpose of this PR is to cut down the runtime for four Alaska HUCs (19020104, 19020503, 19020402 , and 19020602). It significantly optimizes runtime by replacing a nested for loop, used for updating rating curve for small segments, with a vectorized process. This changes were applied only to the Alaska HUCs.
As part of this PR, small modification was applied to bridge_inundation.py.

### Changes

- `src/add_crosswalk.py`
- `src/delineate_hydros_and_produce_HAND.sh`
- `tools/bridge_inundation.py`

<br/><br/>


## v4.5.11.3 - 2024-10-25 - [PR#1320](https://github.com/NOAA-OWP/inundation-mapping/pull/1320)

Expand Down
49 changes: 40 additions & 9 deletions src/add_crosswalk.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def add_crosswalk(
small_segments_filename,
min_catchment_area,
min_stream_length,
huc_id,
calibration_mode=False,
):
input_catchments = gpd.read_file(input_catchments_fileName, engine="pyogrio", use_arrow=True)
Expand Down Expand Up @@ -110,6 +111,8 @@ def add_crosswalk(

output_flows = output_flows.merge(output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID')

output_flows = output_flows.drop_duplicates(subset='HydroID')

output_flows['ManningN'] = mannings_n

if output_flows.NextDownID.dtype != 'int':
Expand Down Expand Up @@ -281,16 +284,43 @@ def add_crosswalk(
sml_segs.to_csv(small_segments_filename, index=False)
print("Update rating curves for short reaches.")

for index, segment in sml_segs.iterrows():
short_id = segment[0]
update_id = segment[1]
new_values = output_src.loc[output_src['HydroID'] == update_id][['Stage', 'Discharge (m3s-1)']]
if huc_id.startswith('19'):
print("Update rating curves for short reaches in Alaska.")
# Create a DataFrame with new values for discharge based on 'update_id'
new_values = output_src[output_src['HydroID'].isin(sml_segs['update_id'])][
['HydroID', 'Stage', 'Discharge (m3s-1)']
]

for src_index, src_stage in new_values.iterrows():
output_src.loc[
(output_src['HydroID'] == short_id) & (output_src['Stage'] == src_stage[0]),
['Discharge (m3s-1)'],
] = src_stage[1]
# Merge this new values DataFrame with sml_segs on 'update_id' and 'HydroID'
sml_segs_with_values = sml_segs.merge(
new_values, left_on='update_id', right_on='HydroID', suffixes=('', '_new')
)
sml_segs_with_values = sml_segs_with_values[['short_id', 'Stage', 'Discharge (m3s-1)']]
merged_output_src = output_src.merge(
sml_segs_with_values[['short_id', 'Stage', 'Discharge (m3s-1)']],
left_on=['HydroID', 'Stage'],
right_on=['short_id', 'Stage'],
suffixes=('', '_df2'),
)
merged_output_src = merged_output_src[['HydroID', 'Stage', 'Discharge (m3s-1)_df2']]
output_src = pd.merge(output_src, merged_output_src, on=['HydroID', 'Stage'], how='left')
output_src['Discharge (m3s-1)'] = output_src['Discharge (m3s-1)_df2'].fillna(
output_src['Discharge (m3s-1)']
)
output_src = output_src.drop(columns=['Discharge (m3s-1)_df2'])
else:
for index, segment in sml_segs.iterrows():
short_id = segment[0]
update_id = segment[1]
new_values = output_src.loc[output_src['HydroID'] == update_id][
['Stage', 'Discharge (m3s-1)']
]

for src_index, src_stage in new_values.iterrows():
output_src.loc[
(output_src['HydroID'] == short_id) & (output_src['Stage'] == src_stage[0]),
['Discharge (m3s-1)'],
] = src_stage[1]

output_src = output_src.merge(crosswalk[['HydroID', 'feature_id']], on='HydroID')

Expand Down Expand Up @@ -429,6 +459,7 @@ def add_crosswalk(
help="Mannings n. Accepts single parameter set or list of parameter set in calibration mode. Currently input as csv.",
required=True,
)
parser.add_argument("-u", "--huc-id", help="HUC ID", required=False)
parser.add_argument("-z", "--input-nwmcat-fileName", help="NWM catchment polygon", required=True)
parser.add_argument("-p", "--extent", help="GMS only for now", default="GMS", required=False)
parser.add_argument(
Expand Down
1 change: 1 addition & 0 deletions src/delineate_hydros_and_produce_HAND.sh
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ python3 $srcDir/add_crosswalk.py \
-t $tempCurrentBranchDataDir/hydroTable_$current_branch_id.csv \
-w $tempHucDataDir/wbd8_clp.gpkg \
-b $b_arg \
-u $hucNumber \
-y $tempCurrentBranchDataDir/nwm_catchments_proj_subset.tif \
-m $manning_n \
-z $z_arg \
Expand Down
7 changes: 6 additions & 1 deletion tools/bridge_inundation.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ def bridge_risk_status(
# Concatenate all GeoDataFrame into a single GeoDataFrame
bridge_points = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))

if bridge_points.feature_id.dtype != 'int':
bridge_points.feature_id = bridge_points.feature_id.astype(int)
if flow_file_data.feature_id.dtype != 'int':
flow_file_data.feature_id = flow_file_data.feature_id.astype(int)

# Find the common feature_id between flow_file and bridge_points
merged_bri = bridge_points.merge(flow_file_data, on='feature_id', how='inner')

Expand All @@ -111,7 +116,7 @@ def risk_class(row):
bridge_out = merged_bri.loc[merged_data_max]
bridge_out.reset_index(drop=True, inplace=True)
bridge_out.drop('risk', axis=1, inplace=True)
bridge_out.to_file(output_dir, driver='GPKG', layer='bridge_risk_status')
bridge_out.to_file(output_dir, index=False, driver="GPKG", engine='fiona')

return bridge_out

Expand Down
Loading