From 9c1f3af41924a31fa1eddb65177f3e3a96a25064 Mon Sep 17 00:00:00 2001 From: MattLuck-NOAA Date: Mon, 8 Jul 2024 10:56:06 -0700 Subject: [PATCH] v4.5.2.5 Snap crosswalk to NWM streams (#1205) --- docs/CHANGELOG.md | 11 +++ src/add_crosswalk.py | 189 +++++++++++++++---------------------------- 2 files changed, 74 insertions(+), 126 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 07f1ed850..35de89aed 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,6 +1,17 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v4.5.2.5 - 2024-07-08 - [PR#1205](https://github.com/NOAA-OWP/inundation-mapping/pull/1205) + +Snaps crosswalk from the midpoint of DEM-derived reaches to the nearest point on NWM streams within a threshold of 100 meters. DEM-derived streams that do not locate any NWM streams within 100 meters of their midpoints are removed from the FIM hydrofabric and their catchments are not inundated. + +### Changes + +- `src/add_crosswalk.py`: Locates nearest NWM stream to midpoint of DEM-derived reaches if within 100 meters. Also fixes a couple of minor bugs. + +

+ + ## v4.5.2.4 - 2024-07-08 - [PR#1204](https://github.com/NOAA-OWP/inundation-mapping/pull/1204) Bug fix for extending outlets in order to ensure proper flow direction in depression filling algorithm. This PR adds a distance criteria that in order for the end of an outlet stream to be snapped to the wbd_buffered boundary, the end point must be less than 100 meters from the WBD boundary. diff --git a/src/add_crosswalk.py b/src/add_crosswalk.py index ddc9444df..62789c9b3 100755 --- a/src/add_crosswalk.py +++ b/src/add_crosswalk.py @@ -40,130 +40,70 @@ def add_crosswalk( input_catchments = gpd.read_file(input_catchments_fileName, engine="pyogrio", use_arrow=True) input_flows = gpd.read_file(input_flows_fileName, engine="pyogrio", use_arrow=True) input_huc = gpd.read_file(input_huc_fileName, engine="pyogrio", use_arrow=True) + input_nwmcat = gpd.read_file(input_nwmcat_fileName, engine="pyogrio", use_arrow=True) input_nwmflows = gpd.read_file(input_nwmflows_fileName, engine="pyogrio", use_arrow=True) min_catchment_area = float(min_catchment_area) # 0.25# min_stream_length = float(min_stream_length) # 0.5# - if extent == 'FR': - ## crosswalk using majority catchment method - - # calculate majority catchments - majority_calc = zonal_stats( - input_catchments, input_nwmcatras_fileName, stats=['majority'], geojson_out=True - ) - input_majorities = gpd.GeoDataFrame.from_features(majority_calc) - input_majorities = input_majorities.rename(columns={'majority': 'feature_id'}) - - input_majorities = input_majorities[:][input_majorities['feature_id'].notna()] - if input_majorities.feature_id.dtype != 'int': - input_majorities.feature_id = input_majorities.feature_id.astype(int) - if input_majorities.HydroID.dtype != 'int': - input_majorities.HydroID = input_majorities.HydroID.astype(int) - - input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'}) - if input_nwmflows.feature_id.dtype != 'int': - input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int) - relevant_input_nwmflows = input_nwmflows[ - input_nwmflows['feature_id'].isin(input_majorities['feature_id']) - ] - relevant_input_nwmflows = relevant_input_nwmflows.filter(items=['feature_id', 'order_']) - - if input_catchments.HydroID.dtype != 'int': - input_catchments.HydroID = input_catchments.HydroID.astype(int) - output_catchments = input_catchments.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID') - output_catchments = output_catchments.merge( - relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id' - ) - - if input_flows.HydroID.dtype != 'int': - input_flows.HydroID = input_flows.HydroID.astype(int) - output_flows = input_flows.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID') - if output_flows.HydroID.dtype != 'int': - output_flows.HydroID = output_flows.HydroID.astype(int) - output_flows = output_flows.merge(relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id') - output_flows = output_flows.merge( - output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID' - ) - - elif (extent == 'MS') | (extent == 'GMS'): - ## crosswalk using stream segment midpoint method - input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc, engine="fiona") - - # only reduce nwm catchments to mainstems if running mainstems - if extent == 'MS': - input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem == 1] - - input_nwmcat = input_nwmcat.rename(columns={'ID': 'feature_id'}) - if input_nwmcat.feature_id.dtype != 'int': - input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int) - input_nwmcat = input_nwmcat.set_index('feature_id') - - input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'}) - if input_nwmflows.feature_id.dtype != 'int': - input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int) - - # Get stream midpoint - stream_midpoint = [] - hydroID = [] - for i, lineString in enumerate(input_flows.geometry): - hydroID = hydroID + [input_flows.loc[i, 'HydroID']] - stream_midpoint = stream_midpoint + [lineString.interpolate(0.5, normalized=True)] - - input_flows_midpoint = gpd.GeoDataFrame( - {'HydroID': hydroID, 'geometry': stream_midpoint}, crs=input_flows.crs, geometry='geometry' - ) - input_flows_midpoint = input_flows_midpoint.set_index('HydroID') - - # Create crosswalk - crosswalk = gpd.sjoin( - input_flows_midpoint, input_nwmcat, how='left', predicate='within' - ).reset_index() - crosswalk = crosswalk.rename(columns={"index_right": "feature_id"}) - - # fill in missing ms - crosswalk_missing = crosswalk.loc[crosswalk.feature_id.isna()] - for index, stream in crosswalk_missing.iterrows(): - # find closest nwm catchment by distance - distances = [stream.geometry.distance(poly) for poly in input_nwmcat.geometry] - min_dist = min(distances) - nwmcat_index = distances.index(min_dist) - - # update crosswalk - crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'feature_id'] = input_nwmcat.iloc[ - nwmcat_index - ].name - crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'AreaSqKM'] = input_nwmcat.iloc[ - nwmcat_index - ].AreaSqKM - crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'Shape_Length'] = input_nwmcat.iloc[ - nwmcat_index - ].Shape_Length - crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'Shape_Area'] = input_nwmcat.iloc[ - nwmcat_index - ].Shape_Area - - crosswalk = crosswalk.filter(items=['HydroID', 'feature_id']) - crosswalk = crosswalk.merge(input_nwmflows[['feature_id', 'order_']], on='feature_id') - - if len(crosswalk) < 1: - print("No relevant streams within HUC boundaries.") - sys.exit(0) - - if input_catchments.HydroID.dtype != 'int': - input_catchments.HydroID = input_catchments.HydroID.astype(int) - output_catchments = input_catchments.merge(crosswalk, on='HydroID') - - if input_flows.HydroID.dtype != 'int': - input_flows.HydroID = input_flows.HydroID.astype(int) - output_flows = input_flows.merge(crosswalk, on='HydroID') - - # added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh - if 'areasqkm' not in output_catchments.columns: - output_catchments['areasqkm'] = output_catchments.geometry.area / (1000**2) - - output_flows = output_flows.merge( - output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID' - ) + input_catchments = input_catchments.dissolve(by='HydroID').reset_index() + + ## crosswalk using stream segment midpoint method + input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc, engine="fiona") + + # only reduce nwm catchments to mainstems if running mainstems + if extent == 'MS': + input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem == 1] + + input_nwmcat = input_nwmcat.rename(columns={'ID': 'feature_id'}) + if input_nwmcat.feature_id.dtype != 'int': + input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int) + input_nwmcat = input_nwmcat.set_index('feature_id') + + input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'}) + if input_nwmflows.feature_id.dtype != 'int': + input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int) + input_nwmflows = input_nwmflows.set_index('feature_id') + + # Get stream midpoint + stream_midpoint = [] + hydroID = [] + for i, lineString in enumerate(input_flows.geometry): + hydroID = hydroID + [input_flows.loc[i, 'HydroID']] + stream_midpoint = stream_midpoint + [lineString.interpolate(0.5, normalized=True)] + + input_flows_midpoint = gpd.GeoDataFrame( + {'HydroID': hydroID, 'geometry': stream_midpoint}, crs=input_flows.crs, geometry='geometry' + ) + input_flows_midpoint = input_flows_midpoint.set_index('HydroID') + + # Create crosswalk + crosswalk = gpd.sjoin_nearest( + input_flows_midpoint, input_nwmflows, how='left', distance_col='distance' + ).reset_index() + crosswalk = crosswalk.rename(columns={"index_right": "feature_id"}) + + crosswalk.loc[crosswalk['distance'] > 100.0, 'feature_id'] = pd.NA + + crosswalk = crosswalk.filter(items=['HydroID', 'feature_id', 'distance']) + crosswalk = crosswalk.merge(input_nwmflows[['order_']], on='feature_id') + + if len(crosswalk) < 1: + print("No relevant streams within HUC boundaries.") + sys.exit(0) + + if input_catchments.HydroID.dtype != 'int': + input_catchments.HydroID = input_catchments.HydroID.astype(int) + output_catchments = input_catchments.merge(crosswalk, on='HydroID') + + if input_flows.HydroID.dtype != 'int': + input_flows.HydroID = input_flows.HydroID.astype(int) + output_flows = input_flows.merge(crosswalk, on='HydroID') + + # added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh + if 'areasqkm' not in output_catchments.columns: + output_catchments['areasqkm'] = output_catchments.geometry.area / (1000**2) + + output_flows = output_flows.merge(output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID') output_flows['ManningN'] = mannings_n @@ -223,7 +163,7 @@ def add_crosswalk( elif len(output_flows.loc[output_flows.From_Node == to_node]['HydroID']) > 1: try: max_order = max( - output_flows.loc[output_flows.From_Node == to_node]['HydroID']['order_'] + output_flows.loc[output_flows.From_Node == to_node]['order_'] ) # drainage area would be better than stream order but we would need to calculate except Exception as e: print( @@ -276,7 +216,7 @@ def add_crosswalk( update_id = output_flows.loc[output_flows.From_Node == to_node]['HydroID'].item() else: - update_id = output_flows.loc[output_flows.HydroID == short_id]['HydroID'].item() + update_id = output_flows[output_flows.HydroID == short_id]['HydroID'].iloc[0] output_order = output_flows.loc[output_flows.HydroID == short_id]['order_'] if len(output_order) == 1: @@ -347,10 +287,7 @@ def add_crosswalk( ['Discharge (m3s-1)'], ] = src_stage[1] - if extent == 'FR': - output_src = output_src.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID') - elif (extent == 'MS') | (extent == 'GMS'): - output_src = output_src.merge(crosswalk[['HydroID', 'feature_id']], on='HydroID') + output_src = output_src.merge(crosswalk[['HydroID', 'feature_id']], on='HydroID') output_crosswalk = output_src[['HydroID', 'feature_id']] output_crosswalk = output_crosswalk.drop_duplicates(ignore_index=True)