NOAA-OWP · CarsonPruitt-NOAA · May 17, 2024 · May 13, 2024 · May 13, 2024 · May 13, 2024
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -1,6 +1,28 @@
 All notable changes to this project will be documented in this file.
 We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
 
+## v4.5.0.2 - 2024-05-17 - [PR#1159](https://github.com/NOAA-OWP/inundation-mapping/pull/1159)
+
+This PR addresses issue #1132 and include the following changes on `tools/generate_nws_lid.py` for updating `nws_lid.gpkg` dataset.
+
+In this revised version, stations only from these two groups are retrieved:
+- lid stations with `rfc_forecast_point= True` 
+- lid stations in `/data/inputs/ahp_sites/evaluated_ahps_sites.csv`
+
+The lid stations in AK (Alaska), HI, and PR, with above two criteria have also been selected, as shown in the map below. In the previous version of the code, **all of lid stations** in PR and HI (regardless of meeting above two criteria), were also being retrieved. I have updated this version to exclude such stations. 
+
+Also, In this revised version, I've eliminated the code sections that previously generated the "is_headwater" and "is_colocated" columns, which are not needed in FIM4. Therefore, in this updated version, these columns are no longer present. 
+
+Similar to 'usgs_gages.gpkg' dataset, all lid stations, including those in Alaska, are stored in a single gpkg file (`nws_lid.gpkg`) with EPSG=5070. The Alaska stations can be identified using their HUC8 numbers (beginning with '19'). 
+
+
+### Changes
+- tools/generate_nws_lid.py
+
+<br/><br/>
+
+
+
 ## v4.5.0.1 - 2024-05-09 - [PR#1150](https://github.com/NOAA-OWP/inundation-mapping/pull/1150)
 
 Fixes two bugs discovered in v4.5.0.0:

diff --git a/tools/generate_nws_lid.py b/tools/generate_nws_lid.py
@@ -22,14 +22,11 @@
 API_BASE_URL = os.getenv("API_BASE_URL")
 EVALUATED_SITES_CSV = os.getenv("EVALUATED_SITES_CSV")
 WBD_LAYER = os.getenv("WBD_LAYER")
-# Define path to NWM stream layer
-NWM_FILE = '/data/inputs/nwm_hydrofabric/nwm_flows.gpkg'
 
 
 def generate_nws_lid(workspace):
     '''
-    Generate the nws_lid layer containing all nws_lid points attributed
-    whether site is mainstems and co-located
+    Generate the nws_lid layer containing all nws_lid points
 
     Parameters
     ----------
@@ -79,22 +76,8 @@ def generate_nws_lid(workspace):
         downstream_trace_distance=downstream_trace_distance,
     )
 
-    # Trace downstream from all sites in HI/PR.
-    select_by = 'state'
-    selector = ['HI', 'PR']
-    must_include = None
-    downstream_trace_distance = 'all'
-    islands_list, islands_dataframe = get_metadata(
-        metadata_url=metadata_url,
-        select_by=select_by,
-        selector=selector,
-        must_include=must_include,
-        upstream_trace_distance=None,
-        downstream_trace_distance=downstream_trace_distance,
-    )
-
     # Append all lists
-    all_lists = fcst_list + eval_list + islands_list
+    all_lists = fcst_list + eval_list
 
     # Compile NWM segments from all_lists
 
@@ -129,73 +112,17 @@ def generate_nws_lid(workspace):
             # Dictionary with key of 2nd to last downstream segment and value of site nwm segment
             downstream[int(key)].append(int(value))
 
-    # Walk downstream the network and identify headwater points
-    print('Traversing network..')
-
-    # Import NWM file and create dictionary of network and create the NWM network dictionary.
-    nwm_gdf = gpd.read_file(NWM_FILE)
-    network = nwm_gdf.groupby('ID')['to'].apply(list).to_dict()
-
-    # Walk through network and find headwater points
-    all_dicts = {}
-    for tree, targets in downstream.items():
-        # All targets are assigned headwaters
-        sub_dict = {i: 'is_headwater' for i in targets}
-        # Walk downstream of each target
-        for i in targets:
-            # Check to see element is not a headwater
-            if sub_dict[i] == 'not_headwater':
-                continue
-            # Get from_node and to_node.
-            from_node = i
-            [to_node] = network[from_node]
-            # Walk downstream from target
-            while to_node > 0:
-                # Check if to_node is in targets list
-                if to_node in targets:
-                    sub_dict[to_node] = 'not_headwater'
-                # Assign downstream ID as to_node
-                [to_node] = network[to_node]
-
-        # Append status to master dictionary
-        all_dicts.update(sub_dict)
-
-    # Create dictionaries of nws_lid (key) and headwater status (value) and nws_lid (key) and
-    #   co-located with same feature_id(value)
-    final_dict = {}
-    duplicate_dict = {}
-    for key, status in all_dicts.items():
-        site_list = target[key]
-        for site in site_list:
-            final_dict[site] = status
-            if len(site_list) > 1:
-                duplicate_dict[site] = 'is_colocated'
-            else:
-                duplicate_dict[site] = 'not_colocated'
-
     # Get Spatial data and populate headwater/duplicate attributes
     print('Attributing nws_lid layer..')
 
     # Geodataframe from all_lists, reproject, and reset index.
-    trash, nws_lid_gdf = aggregate_wbd_hucs(all_lists, WBD_LAYER, retain_attributes=False)
+    _, nws_lid_gdf = aggregate_wbd_hucs(all_lists, WBD_LAYER, retain_attributes=False)
     nws_lid_gdf.columns = [name.replace('identifiers_', '') for name in nws_lid_gdf.columns]
     nws_lid_gdf.to_crs(PREP_PROJECTION, inplace=True)
     nws_lid_gdf.reset_index(drop=True)
-
-    # Create DataFrames of headwater and duplicates and join.
-    final_dict_pd = pd.DataFrame(list(final_dict.items()), columns=['nws_lid', 'is_headwater'])
-    duplicate_dict_pd = pd.DataFrame(list(duplicate_dict.items()), columns=['nws_lid', 'is_colocated'])
-    attributes = final_dict_pd.merge(duplicate_dict_pd, on='nws_lid')
-    attributes.replace(
-        {'is_headwater': True, 'is_colocated': True, 'not_headwater': False, 'not_colocated': False},
-        inplace=True,
-    )
-
-    # Join attributes, remove sites with no assigned nwm_feature_id and write to file
-    joined = nws_lid_gdf.merge(attributes, on='nws_lid', how='left')
-    joined.dropna(subset=['nwm_feature_id'], inplace=True)
     Path(workspace).mkdir(parents=True, exist_ok=True)
-    joined.to_file(Path(workspace) / 'nws_lid.gpkg', driver='GPKG')
+    nws_lid_gdf.dropna(subset=['nwm_feature_id'], inplace=True)
+    nws_lid_gdf.to_file(Path(workspace) / 'nws_lid.gpkg', driver='GPKG')
 
 
 if __name__ == '__main__':