diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index 07f1ed85..35de89ae 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -1,6 +1,17 @@
All notable changes to this project will be documented in this file.
We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
+## v4.5.2.5 - 2024-07-08 - [PR#1205](https://github.com/NOAA-OWP/inundation-mapping/pull/1205)
+
+Snaps crosswalk from the midpoint of DEM-derived reaches to the nearest point on NWM streams within a threshold of 100 meters. DEM-derived streams that do not locate any NWM streams within 100 meters of their midpoints are removed from the FIM hydrofabric and their catchments are not inundated.
+
+### Changes
+
+- `src/add_crosswalk.py`: Locates nearest NWM stream to midpoint of DEM-derived reaches if within 100 meters. Also fixes a couple of minor bugs.
+
+
+
+
## v4.5.2.4 - 2024-07-08 - [PR#1204](https://github.com/NOAA-OWP/inundation-mapping/pull/1204)
Bug fix for extending outlets in order to ensure proper flow direction in depression filling algorithm. This PR adds a distance criteria that in order for the end of an outlet stream to be snapped to the wbd_buffered boundary, the end point must be less than 100 meters from the WBD boundary.
diff --git a/src/add_crosswalk.py b/src/add_crosswalk.py
index ddc9444d..62789c9b 100755
--- a/src/add_crosswalk.py
+++ b/src/add_crosswalk.py
@@ -40,130 +40,70 @@ def add_crosswalk(
input_catchments = gpd.read_file(input_catchments_fileName, engine="pyogrio", use_arrow=True)
input_flows = gpd.read_file(input_flows_fileName, engine="pyogrio", use_arrow=True)
input_huc = gpd.read_file(input_huc_fileName, engine="pyogrio", use_arrow=True)
+ input_nwmcat = gpd.read_file(input_nwmcat_fileName, engine="pyogrio", use_arrow=True)
input_nwmflows = gpd.read_file(input_nwmflows_fileName, engine="pyogrio", use_arrow=True)
min_catchment_area = float(min_catchment_area) # 0.25#
min_stream_length = float(min_stream_length) # 0.5#
- if extent == 'FR':
- ## crosswalk using majority catchment method
-
- # calculate majority catchments
- majority_calc = zonal_stats(
- input_catchments, input_nwmcatras_fileName, stats=['majority'], geojson_out=True
- )
- input_majorities = gpd.GeoDataFrame.from_features(majority_calc)
- input_majorities = input_majorities.rename(columns={'majority': 'feature_id'})
-
- input_majorities = input_majorities[:][input_majorities['feature_id'].notna()]
- if input_majorities.feature_id.dtype != 'int':
- input_majorities.feature_id = input_majorities.feature_id.astype(int)
- if input_majorities.HydroID.dtype != 'int':
- input_majorities.HydroID = input_majorities.HydroID.astype(int)
-
- input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'})
- if input_nwmflows.feature_id.dtype != 'int':
- input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int)
- relevant_input_nwmflows = input_nwmflows[
- input_nwmflows['feature_id'].isin(input_majorities['feature_id'])
- ]
- relevant_input_nwmflows = relevant_input_nwmflows.filter(items=['feature_id', 'order_'])
-
- if input_catchments.HydroID.dtype != 'int':
- input_catchments.HydroID = input_catchments.HydroID.astype(int)
- output_catchments = input_catchments.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID')
- output_catchments = output_catchments.merge(
- relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id'
- )
-
- if input_flows.HydroID.dtype != 'int':
- input_flows.HydroID = input_flows.HydroID.astype(int)
- output_flows = input_flows.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID')
- if output_flows.HydroID.dtype != 'int':
- output_flows.HydroID = output_flows.HydroID.astype(int)
- output_flows = output_flows.merge(relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id')
- output_flows = output_flows.merge(
- output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID'
- )
-
- elif (extent == 'MS') | (extent == 'GMS'):
- ## crosswalk using stream segment midpoint method
- input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc, engine="fiona")
-
- # only reduce nwm catchments to mainstems if running mainstems
- if extent == 'MS':
- input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem == 1]
-
- input_nwmcat = input_nwmcat.rename(columns={'ID': 'feature_id'})
- if input_nwmcat.feature_id.dtype != 'int':
- input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int)
- input_nwmcat = input_nwmcat.set_index('feature_id')
-
- input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'})
- if input_nwmflows.feature_id.dtype != 'int':
- input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int)
-
- # Get stream midpoint
- stream_midpoint = []
- hydroID = []
- for i, lineString in enumerate(input_flows.geometry):
- hydroID = hydroID + [input_flows.loc[i, 'HydroID']]
- stream_midpoint = stream_midpoint + [lineString.interpolate(0.5, normalized=True)]
-
- input_flows_midpoint = gpd.GeoDataFrame(
- {'HydroID': hydroID, 'geometry': stream_midpoint}, crs=input_flows.crs, geometry='geometry'
- )
- input_flows_midpoint = input_flows_midpoint.set_index('HydroID')
-
- # Create crosswalk
- crosswalk = gpd.sjoin(
- input_flows_midpoint, input_nwmcat, how='left', predicate='within'
- ).reset_index()
- crosswalk = crosswalk.rename(columns={"index_right": "feature_id"})
-
- # fill in missing ms
- crosswalk_missing = crosswalk.loc[crosswalk.feature_id.isna()]
- for index, stream in crosswalk_missing.iterrows():
- # find closest nwm catchment by distance
- distances = [stream.geometry.distance(poly) for poly in input_nwmcat.geometry]
- min_dist = min(distances)
- nwmcat_index = distances.index(min_dist)
-
- # update crosswalk
- crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'feature_id'] = input_nwmcat.iloc[
- nwmcat_index
- ].name
- crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'AreaSqKM'] = input_nwmcat.iloc[
- nwmcat_index
- ].AreaSqKM
- crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'Shape_Length'] = input_nwmcat.iloc[
- nwmcat_index
- ].Shape_Length
- crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'Shape_Area'] = input_nwmcat.iloc[
- nwmcat_index
- ].Shape_Area
-
- crosswalk = crosswalk.filter(items=['HydroID', 'feature_id'])
- crosswalk = crosswalk.merge(input_nwmflows[['feature_id', 'order_']], on='feature_id')
-
- if len(crosswalk) < 1:
- print("No relevant streams within HUC boundaries.")
- sys.exit(0)
-
- if input_catchments.HydroID.dtype != 'int':
- input_catchments.HydroID = input_catchments.HydroID.astype(int)
- output_catchments = input_catchments.merge(crosswalk, on='HydroID')
-
- if input_flows.HydroID.dtype != 'int':
- input_flows.HydroID = input_flows.HydroID.astype(int)
- output_flows = input_flows.merge(crosswalk, on='HydroID')
-
- # added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh
- if 'areasqkm' not in output_catchments.columns:
- output_catchments['areasqkm'] = output_catchments.geometry.area / (1000**2)
-
- output_flows = output_flows.merge(
- output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID'
- )
+ input_catchments = input_catchments.dissolve(by='HydroID').reset_index()
+
+ ## crosswalk using stream segment midpoint method
+ input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc, engine="fiona")
+
+ # only reduce nwm catchments to mainstems if running mainstems
+ if extent == 'MS':
+ input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem == 1]
+
+ input_nwmcat = input_nwmcat.rename(columns={'ID': 'feature_id'})
+ if input_nwmcat.feature_id.dtype != 'int':
+ input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int)
+ input_nwmcat = input_nwmcat.set_index('feature_id')
+
+ input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'})
+ if input_nwmflows.feature_id.dtype != 'int':
+ input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int)
+ input_nwmflows = input_nwmflows.set_index('feature_id')
+
+ # Get stream midpoint
+ stream_midpoint = []
+ hydroID = []
+ for i, lineString in enumerate(input_flows.geometry):
+ hydroID = hydroID + [input_flows.loc[i, 'HydroID']]
+ stream_midpoint = stream_midpoint + [lineString.interpolate(0.5, normalized=True)]
+
+ input_flows_midpoint = gpd.GeoDataFrame(
+ {'HydroID': hydroID, 'geometry': stream_midpoint}, crs=input_flows.crs, geometry='geometry'
+ )
+ input_flows_midpoint = input_flows_midpoint.set_index('HydroID')
+
+ # Create crosswalk
+ crosswalk = gpd.sjoin_nearest(
+ input_flows_midpoint, input_nwmflows, how='left', distance_col='distance'
+ ).reset_index()
+ crosswalk = crosswalk.rename(columns={"index_right": "feature_id"})
+
+ crosswalk.loc[crosswalk['distance'] > 100.0, 'feature_id'] = pd.NA
+
+ crosswalk = crosswalk.filter(items=['HydroID', 'feature_id', 'distance'])
+ crosswalk = crosswalk.merge(input_nwmflows[['order_']], on='feature_id')
+
+ if len(crosswalk) < 1:
+ print("No relevant streams within HUC boundaries.")
+ sys.exit(0)
+
+ if input_catchments.HydroID.dtype != 'int':
+ input_catchments.HydroID = input_catchments.HydroID.astype(int)
+ output_catchments = input_catchments.merge(crosswalk, on='HydroID')
+
+ if input_flows.HydroID.dtype != 'int':
+ input_flows.HydroID = input_flows.HydroID.astype(int)
+ output_flows = input_flows.merge(crosswalk, on='HydroID')
+
+ # added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh
+ if 'areasqkm' not in output_catchments.columns:
+ output_catchments['areasqkm'] = output_catchments.geometry.area / (1000**2)
+
+ output_flows = output_flows.merge(output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID')
output_flows['ManningN'] = mannings_n
@@ -223,7 +163,7 @@ def add_crosswalk(
elif len(output_flows.loc[output_flows.From_Node == to_node]['HydroID']) > 1:
try:
max_order = max(
- output_flows.loc[output_flows.From_Node == to_node]['HydroID']['order_']
+ output_flows.loc[output_flows.From_Node == to_node]['order_']
) # drainage area would be better than stream order but we would need to calculate
except Exception as e:
print(
@@ -276,7 +216,7 @@ def add_crosswalk(
update_id = output_flows.loc[output_flows.From_Node == to_node]['HydroID'].item()
else:
- update_id = output_flows.loc[output_flows.HydroID == short_id]['HydroID'].item()
+ update_id = output_flows[output_flows.HydroID == short_id]['HydroID'].iloc[0]
output_order = output_flows.loc[output_flows.HydroID == short_id]['order_']
if len(output_order) == 1:
@@ -347,10 +287,7 @@ def add_crosswalk(
['Discharge (m3s-1)'],
] = src_stage[1]
- if extent == 'FR':
- output_src = output_src.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID')
- elif (extent == 'MS') | (extent == 'GMS'):
- output_src = output_src.merge(crosswalk[['HydroID', 'feature_id']], on='HydroID')
+ output_src = output_src.merge(crosswalk[['HydroID', 'feature_id']], on='HydroID')
output_crosswalk = output_src[['HydroID', 'feature_id']]
output_crosswalk = output_crosswalk.drop_duplicates(ignore_index=True)