Skip to content

Commit

Permalink
v4.5.2.5 Snap crosswalk to NWM streams (#1205)
Browse files Browse the repository at this point in the history
  • Loading branch information
mluck authored Jul 8, 2024
1 parent 87364e1 commit 9c1f3af
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 126 deletions.
11 changes: 11 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
All notable changes to this project will be documented in this file.
We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

## v4.5.2.5 - 2024-07-08 - [PR#1205](https://github.com/NOAA-OWP/inundation-mapping/pull/1205)

Snaps crosswalk from the midpoint of DEM-derived reaches to the nearest point on NWM streams within a threshold of 100 meters. DEM-derived streams that do not locate any NWM streams within 100 meters of their midpoints are removed from the FIM hydrofabric and their catchments are not inundated.

### Changes

- `src/add_crosswalk.py`: Locates nearest NWM stream to midpoint of DEM-derived reaches if within 100 meters. Also fixes a couple of minor bugs.

<br/><br/>


## v4.5.2.4 - 2024-07-08 - [PR#1204](https://github.com/NOAA-OWP/inundation-mapping/pull/1204)

Bug fix for extending outlets in order to ensure proper flow direction in depression filling algorithm. This PR adds a distance criteria that in order for the end of an outlet stream to be snapped to the wbd_buffered boundary, the end point must be less than 100 meters from the WBD boundary.
Expand Down
189 changes: 63 additions & 126 deletions src/add_crosswalk.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,130 +40,70 @@ def add_crosswalk(
input_catchments = gpd.read_file(input_catchments_fileName, engine="pyogrio", use_arrow=True)
input_flows = gpd.read_file(input_flows_fileName, engine="pyogrio", use_arrow=True)
input_huc = gpd.read_file(input_huc_fileName, engine="pyogrio", use_arrow=True)
input_nwmcat = gpd.read_file(input_nwmcat_fileName, engine="pyogrio", use_arrow=True)
input_nwmflows = gpd.read_file(input_nwmflows_fileName, engine="pyogrio", use_arrow=True)
min_catchment_area = float(min_catchment_area) # 0.25#
min_stream_length = float(min_stream_length) # 0.5#

if extent == 'FR':
## crosswalk using majority catchment method

# calculate majority catchments
majority_calc = zonal_stats(
input_catchments, input_nwmcatras_fileName, stats=['majority'], geojson_out=True
)
input_majorities = gpd.GeoDataFrame.from_features(majority_calc)
input_majorities = input_majorities.rename(columns={'majority': 'feature_id'})

input_majorities = input_majorities[:][input_majorities['feature_id'].notna()]
if input_majorities.feature_id.dtype != 'int':
input_majorities.feature_id = input_majorities.feature_id.astype(int)
if input_majorities.HydroID.dtype != 'int':
input_majorities.HydroID = input_majorities.HydroID.astype(int)

input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'})
if input_nwmflows.feature_id.dtype != 'int':
input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int)
relevant_input_nwmflows = input_nwmflows[
input_nwmflows['feature_id'].isin(input_majorities['feature_id'])
]
relevant_input_nwmflows = relevant_input_nwmflows.filter(items=['feature_id', 'order_'])

if input_catchments.HydroID.dtype != 'int':
input_catchments.HydroID = input_catchments.HydroID.astype(int)
output_catchments = input_catchments.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID')
output_catchments = output_catchments.merge(
relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id'
)

if input_flows.HydroID.dtype != 'int':
input_flows.HydroID = input_flows.HydroID.astype(int)
output_flows = input_flows.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID')
if output_flows.HydroID.dtype != 'int':
output_flows.HydroID = output_flows.HydroID.astype(int)
output_flows = output_flows.merge(relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id')
output_flows = output_flows.merge(
output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID'
)

elif (extent == 'MS') | (extent == 'GMS'):
## crosswalk using stream segment midpoint method
input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc, engine="fiona")

# only reduce nwm catchments to mainstems if running mainstems
if extent == 'MS':
input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem == 1]

input_nwmcat = input_nwmcat.rename(columns={'ID': 'feature_id'})
if input_nwmcat.feature_id.dtype != 'int':
input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int)
input_nwmcat = input_nwmcat.set_index('feature_id')

input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'})
if input_nwmflows.feature_id.dtype != 'int':
input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int)

# Get stream midpoint
stream_midpoint = []
hydroID = []
for i, lineString in enumerate(input_flows.geometry):
hydroID = hydroID + [input_flows.loc[i, 'HydroID']]
stream_midpoint = stream_midpoint + [lineString.interpolate(0.5, normalized=True)]

input_flows_midpoint = gpd.GeoDataFrame(
{'HydroID': hydroID, 'geometry': stream_midpoint}, crs=input_flows.crs, geometry='geometry'
)
input_flows_midpoint = input_flows_midpoint.set_index('HydroID')

# Create crosswalk
crosswalk = gpd.sjoin(
input_flows_midpoint, input_nwmcat, how='left', predicate='within'
).reset_index()
crosswalk = crosswalk.rename(columns={"index_right": "feature_id"})

# fill in missing ms
crosswalk_missing = crosswalk.loc[crosswalk.feature_id.isna()]
for index, stream in crosswalk_missing.iterrows():
# find closest nwm catchment by distance
distances = [stream.geometry.distance(poly) for poly in input_nwmcat.geometry]
min_dist = min(distances)
nwmcat_index = distances.index(min_dist)

# update crosswalk
crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'feature_id'] = input_nwmcat.iloc[
nwmcat_index
].name
crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'AreaSqKM'] = input_nwmcat.iloc[
nwmcat_index
].AreaSqKM
crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'Shape_Length'] = input_nwmcat.iloc[
nwmcat_index
].Shape_Length
crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'Shape_Area'] = input_nwmcat.iloc[
nwmcat_index
].Shape_Area

crosswalk = crosswalk.filter(items=['HydroID', 'feature_id'])
crosswalk = crosswalk.merge(input_nwmflows[['feature_id', 'order_']], on='feature_id')

if len(crosswalk) < 1:
print("No relevant streams within HUC boundaries.")
sys.exit(0)

if input_catchments.HydroID.dtype != 'int':
input_catchments.HydroID = input_catchments.HydroID.astype(int)
output_catchments = input_catchments.merge(crosswalk, on='HydroID')

if input_flows.HydroID.dtype != 'int':
input_flows.HydroID = input_flows.HydroID.astype(int)
output_flows = input_flows.merge(crosswalk, on='HydroID')

# added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh
if 'areasqkm' not in output_catchments.columns:
output_catchments['areasqkm'] = output_catchments.geometry.area / (1000**2)

output_flows = output_flows.merge(
output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID'
)
input_catchments = input_catchments.dissolve(by='HydroID').reset_index()

## crosswalk using stream segment midpoint method
input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc, engine="fiona")

# only reduce nwm catchments to mainstems if running mainstems
if extent == 'MS':
input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem == 1]

input_nwmcat = input_nwmcat.rename(columns={'ID': 'feature_id'})
if input_nwmcat.feature_id.dtype != 'int':
input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int)
input_nwmcat = input_nwmcat.set_index('feature_id')

input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'})
if input_nwmflows.feature_id.dtype != 'int':
input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int)
input_nwmflows = input_nwmflows.set_index('feature_id')

# Get stream midpoint
stream_midpoint = []
hydroID = []
for i, lineString in enumerate(input_flows.geometry):
hydroID = hydroID + [input_flows.loc[i, 'HydroID']]
stream_midpoint = stream_midpoint + [lineString.interpolate(0.5, normalized=True)]

input_flows_midpoint = gpd.GeoDataFrame(
{'HydroID': hydroID, 'geometry': stream_midpoint}, crs=input_flows.crs, geometry='geometry'
)
input_flows_midpoint = input_flows_midpoint.set_index('HydroID')

# Create crosswalk
crosswalk = gpd.sjoin_nearest(
input_flows_midpoint, input_nwmflows, how='left', distance_col='distance'
).reset_index()
crosswalk = crosswalk.rename(columns={"index_right": "feature_id"})

crosswalk.loc[crosswalk['distance'] > 100.0, 'feature_id'] = pd.NA

crosswalk = crosswalk.filter(items=['HydroID', 'feature_id', 'distance'])
crosswalk = crosswalk.merge(input_nwmflows[['order_']], on='feature_id')

if len(crosswalk) < 1:
print("No relevant streams within HUC boundaries.")
sys.exit(0)

if input_catchments.HydroID.dtype != 'int':
input_catchments.HydroID = input_catchments.HydroID.astype(int)
output_catchments = input_catchments.merge(crosswalk, on='HydroID')

if input_flows.HydroID.dtype != 'int':
input_flows.HydroID = input_flows.HydroID.astype(int)
output_flows = input_flows.merge(crosswalk, on='HydroID')

# added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh
if 'areasqkm' not in output_catchments.columns:
output_catchments['areasqkm'] = output_catchments.geometry.area / (1000**2)

output_flows = output_flows.merge(output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID')

output_flows['ManningN'] = mannings_n

Expand Down Expand Up @@ -223,7 +163,7 @@ def add_crosswalk(
elif len(output_flows.loc[output_flows.From_Node == to_node]['HydroID']) > 1:
try:
max_order = max(
output_flows.loc[output_flows.From_Node == to_node]['HydroID']['order_']
output_flows.loc[output_flows.From_Node == to_node]['order_']
) # drainage area would be better than stream order but we would need to calculate
except Exception as e:
print(
Expand Down Expand Up @@ -276,7 +216,7 @@ def add_crosswalk(
update_id = output_flows.loc[output_flows.From_Node == to_node]['HydroID'].item()

else:
update_id = output_flows.loc[output_flows.HydroID == short_id]['HydroID'].item()
update_id = output_flows[output_flows.HydroID == short_id]['HydroID'].iloc[0]

output_order = output_flows.loc[output_flows.HydroID == short_id]['order_']
if len(output_order) == 1:
Expand Down Expand Up @@ -347,10 +287,7 @@ def add_crosswalk(
['Discharge (m3s-1)'],
] = src_stage[1]

if extent == 'FR':
output_src = output_src.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID')
elif (extent == 'MS') | (extent == 'GMS'):
output_src = output_src.merge(crosswalk[['HydroID', 'feature_id']], on='HydroID')
output_src = output_src.merge(crosswalk[['HydroID', 'feature_id']], on='HydroID')

output_crosswalk = output_src[['HydroID', 'feature_id']]
output_crosswalk = output_crosswalk.drop_duplicates(ignore_index=True)
Expand Down

0 comments on commit 9c1f3af

Please sign in to comment.