Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[1pt] PR: Snap crosswalk to NWM streams #1205

Merged
merged 9 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
All notable changes to this project will be documented in this file.
We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

## v4.5.2.5 - 2024-07-08 - [PR#1205](https://github.com/NOAA-OWP/inundation-mapping/pull/1205)

Snaps crosswalk from the midpoint of DEM-derived reaches to the nearest point on NWM streams within a threshold of 100 meters. DEM-derived streams that do not locate any NWM streams within 100 meters of their midpoints are removed from the FIM hydrofabric and their catchments are not inundated.

### Changes

- `src/add_crosswalk.py`: Locates nearest NWM stream to midpoint of DEM-derived reaches if within 100 meters. Also fixes a couple of minor bugs.

<br/><br/>


## v4.5.2.4 - 2024-07-08 - [PR#1204](https://github.com/NOAA-OWP/inundation-mapping/pull/1204)

Bug fix for extending outlets in order to ensure proper flow direction in depression filling algorithm. This PR adds a distance criteria that in order for the end of an outlet stream to be snapped to the wbd_buffered boundary, the end point must be less than 100 meters from the WBD boundary.
Expand Down
189 changes: 63 additions & 126 deletions src/add_crosswalk.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,130 +40,70 @@ def add_crosswalk(
input_catchments = gpd.read_file(input_catchments_fileName, engine="pyogrio", use_arrow=True)
input_flows = gpd.read_file(input_flows_fileName, engine="pyogrio", use_arrow=True)
input_huc = gpd.read_file(input_huc_fileName, engine="pyogrio", use_arrow=True)
input_nwmcat = gpd.read_file(input_nwmcat_fileName, engine="pyogrio", use_arrow=True)
input_nwmflows = gpd.read_file(input_nwmflows_fileName, engine="pyogrio", use_arrow=True)
min_catchment_area = float(min_catchment_area) # 0.25#
min_stream_length = float(min_stream_length) # 0.5#

if extent == 'FR':
## crosswalk using majority catchment method

# calculate majority catchments
majority_calc = zonal_stats(
input_catchments, input_nwmcatras_fileName, stats=['majority'], geojson_out=True
)
input_majorities = gpd.GeoDataFrame.from_features(majority_calc)
input_majorities = input_majorities.rename(columns={'majority': 'feature_id'})

input_majorities = input_majorities[:][input_majorities['feature_id'].notna()]
if input_majorities.feature_id.dtype != 'int':
input_majorities.feature_id = input_majorities.feature_id.astype(int)
if input_majorities.HydroID.dtype != 'int':
input_majorities.HydroID = input_majorities.HydroID.astype(int)

input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'})
if input_nwmflows.feature_id.dtype != 'int':
input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int)
relevant_input_nwmflows = input_nwmflows[
input_nwmflows['feature_id'].isin(input_majorities['feature_id'])
]
relevant_input_nwmflows = relevant_input_nwmflows.filter(items=['feature_id', 'order_'])

if input_catchments.HydroID.dtype != 'int':
input_catchments.HydroID = input_catchments.HydroID.astype(int)
output_catchments = input_catchments.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID')
output_catchments = output_catchments.merge(
relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id'
)

if input_flows.HydroID.dtype != 'int':
input_flows.HydroID = input_flows.HydroID.astype(int)
output_flows = input_flows.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID')
if output_flows.HydroID.dtype != 'int':
output_flows.HydroID = output_flows.HydroID.astype(int)
output_flows = output_flows.merge(relevant_input_nwmflows[['order_', 'feature_id']], on='feature_id')
output_flows = output_flows.merge(
output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID'
)

elif (extent == 'MS') | (extent == 'GMS'):
## crosswalk using stream segment midpoint method
input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc, engine="fiona")

# only reduce nwm catchments to mainstems if running mainstems
if extent == 'MS':
input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem == 1]

input_nwmcat = input_nwmcat.rename(columns={'ID': 'feature_id'})
if input_nwmcat.feature_id.dtype != 'int':
input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int)
input_nwmcat = input_nwmcat.set_index('feature_id')

input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'})
if input_nwmflows.feature_id.dtype != 'int':
input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int)

# Get stream midpoint
stream_midpoint = []
hydroID = []
for i, lineString in enumerate(input_flows.geometry):
hydroID = hydroID + [input_flows.loc[i, 'HydroID']]
stream_midpoint = stream_midpoint + [lineString.interpolate(0.5, normalized=True)]

input_flows_midpoint = gpd.GeoDataFrame(
{'HydroID': hydroID, 'geometry': stream_midpoint}, crs=input_flows.crs, geometry='geometry'
)
input_flows_midpoint = input_flows_midpoint.set_index('HydroID')

# Create crosswalk
crosswalk = gpd.sjoin(
input_flows_midpoint, input_nwmcat, how='left', predicate='within'
).reset_index()
crosswalk = crosswalk.rename(columns={"index_right": "feature_id"})

# fill in missing ms
crosswalk_missing = crosswalk.loc[crosswalk.feature_id.isna()]
for index, stream in crosswalk_missing.iterrows():
# find closest nwm catchment by distance
distances = [stream.geometry.distance(poly) for poly in input_nwmcat.geometry]
min_dist = min(distances)
nwmcat_index = distances.index(min_dist)

# update crosswalk
crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'feature_id'] = input_nwmcat.iloc[
nwmcat_index
].name
crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'AreaSqKM'] = input_nwmcat.iloc[
nwmcat_index
].AreaSqKM
crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'Shape_Length'] = input_nwmcat.iloc[
nwmcat_index
].Shape_Length
crosswalk.loc[crosswalk.HydroID == stream.HydroID, 'Shape_Area'] = input_nwmcat.iloc[
nwmcat_index
].Shape_Area

crosswalk = crosswalk.filter(items=['HydroID', 'feature_id'])
crosswalk = crosswalk.merge(input_nwmflows[['feature_id', 'order_']], on='feature_id')

if len(crosswalk) < 1:
print("No relevant streams within HUC boundaries.")
sys.exit(0)

if input_catchments.HydroID.dtype != 'int':
input_catchments.HydroID = input_catchments.HydroID.astype(int)
output_catchments = input_catchments.merge(crosswalk, on='HydroID')

if input_flows.HydroID.dtype != 'int':
input_flows.HydroID = input_flows.HydroID.astype(int)
output_flows = input_flows.merge(crosswalk, on='HydroID')

# added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh
if 'areasqkm' not in output_catchments.columns:
output_catchments['areasqkm'] = output_catchments.geometry.area / (1000**2)

output_flows = output_flows.merge(
output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID'
)
input_catchments = input_catchments.dissolve(by='HydroID').reset_index()

## crosswalk using stream segment midpoint method
input_nwmcat = gpd.read_file(input_nwmcat_fileName, mask=input_huc, engine="fiona")

# only reduce nwm catchments to mainstems if running mainstems
if extent == 'MS':
input_nwmcat = input_nwmcat.loc[input_nwmcat.mainstem == 1]

input_nwmcat = input_nwmcat.rename(columns={'ID': 'feature_id'})
if input_nwmcat.feature_id.dtype != 'int':
input_nwmcat.feature_id = input_nwmcat.feature_id.astype(int)
input_nwmcat = input_nwmcat.set_index('feature_id')

input_nwmflows = input_nwmflows.rename(columns={'ID': 'feature_id'})
if input_nwmflows.feature_id.dtype != 'int':
input_nwmflows.feature_id = input_nwmflows.feature_id.astype(int)
input_nwmflows = input_nwmflows.set_index('feature_id')

# Get stream midpoint
stream_midpoint = []
hydroID = []
for i, lineString in enumerate(input_flows.geometry):
hydroID = hydroID + [input_flows.loc[i, 'HydroID']]
stream_midpoint = stream_midpoint + [lineString.interpolate(0.5, normalized=True)]

input_flows_midpoint = gpd.GeoDataFrame(
{'HydroID': hydroID, 'geometry': stream_midpoint}, crs=input_flows.crs, geometry='geometry'
)
input_flows_midpoint = input_flows_midpoint.set_index('HydroID')

# Create crosswalk
crosswalk = gpd.sjoin_nearest(
input_flows_midpoint, input_nwmflows, how='left', distance_col='distance'
).reset_index()
crosswalk = crosswalk.rename(columns={"index_right": "feature_id"})

crosswalk.loc[crosswalk['distance'] > 100.0, 'feature_id'] = pd.NA

crosswalk = crosswalk.filter(items=['HydroID', 'feature_id', 'distance'])
crosswalk = crosswalk.merge(input_nwmflows[['order_']], on='feature_id')

if len(crosswalk) < 1:
print("No relevant streams within HUC boundaries.")
sys.exit(0)

if input_catchments.HydroID.dtype != 'int':
input_catchments.HydroID = input_catchments.HydroID.astype(int)
output_catchments = input_catchments.merge(crosswalk, on='HydroID')

if input_flows.HydroID.dtype != 'int':
input_flows.HydroID = input_flows.HydroID.astype(int)
output_flows = input_flows.merge(crosswalk, on='HydroID')

# added for GMS. Consider adding filter_catchments_and_add_attributes.py to run_by_branch.sh
if 'areasqkm' not in output_catchments.columns:
output_catchments['areasqkm'] = output_catchments.geometry.area / (1000**2)

output_flows = output_flows.merge(output_catchments.filter(items=['HydroID', 'areasqkm']), on='HydroID')

output_flows['ManningN'] = mannings_n

Expand Down Expand Up @@ -223,7 +163,7 @@ def add_crosswalk(
elif len(output_flows.loc[output_flows.From_Node == to_node]['HydroID']) > 1:
try:
max_order = max(
output_flows.loc[output_flows.From_Node == to_node]['HydroID']['order_']
output_flows.loc[output_flows.From_Node == to_node]['order_']
) # drainage area would be better than stream order but we would need to calculate
except Exception as e:
print(
Expand Down Expand Up @@ -276,7 +216,7 @@ def add_crosswalk(
update_id = output_flows.loc[output_flows.From_Node == to_node]['HydroID'].item()

else:
update_id = output_flows.loc[output_flows.HydroID == short_id]['HydroID'].item()
update_id = output_flows[output_flows.HydroID == short_id]['HydroID'].iloc[0]

output_order = output_flows.loc[output_flows.HydroID == short_id]['order_']
if len(output_order) == 1:
Expand Down Expand Up @@ -347,10 +287,7 @@ def add_crosswalk(
['Discharge (m3s-1)'],
] = src_stage[1]

if extent == 'FR':
output_src = output_src.merge(input_majorities[['HydroID', 'feature_id']], on='HydroID')
elif (extent == 'MS') | (extent == 'GMS'):
output_src = output_src.merge(crosswalk[['HydroID', 'feature_id']], on='HydroID')
output_src = output_src.merge(crosswalk[['HydroID', 'feature_id']], on='HydroID')

output_crosswalk = output_src[['HydroID', 'feature_id']]
output_crosswalk = output_crosswalk.drop_duplicates(ignore_index=True)
Expand Down
Loading