v4.5.10.0 PR: Reload 3dep DEMs and pre-clips (#1301)

NOAA-OWP · Sep 25, 2024 · 7e20091 · 7e20091
1 parent be2f8b8
commit 7e20091
Show file tree

Hide file tree

Showing 8 changed files with 275 additions and 149 deletions.
diff --git a/CITATION.cff b/CITATION.cff
@@ -1,8 +1,8 @@
 cff-version: 1.2.0
-message: "If you use this software, please cite it as below."
+message: "If you use this software, please cite it as below. To ensure you have latest and correct version, please look at the changelog.md at 'https://github.com/NOAA-OWP/inundation-mapping/blob/dev/docs/CHANGELOG.md'"
 authors:
 - family-names: "NOAA Office of Water Prediction"
 title: "Inundation Mapping"
 url: "https://github.com/NOAA-OWP/inundation-mapping"
-version: 4.4.2.1
-date-released: 2023
+version: 4.5.10.0
+date-released: 2024
diff --git a/data/usgs/acquire_and_preprocess_3dep_dems.py b/data/usgs/acquire_and_preprocess_3dep_dems.py
@@ -4,12 +4,11 @@
 import glob
 import logging
 import os
-import shutil
 import subprocess
 import sys
 import traceback
 from concurrent.futures import ProcessPoolExecutor, as_completed, wait
-from datetime import datetime
+from datetime import datetime, timezone
 
 import geopandas as gpd
 import pandas as pd
@@ -27,6 +26,14 @@
     - Add input args for resolution size, which means URL and block size also hve to be parameterized.
 '''
 
+"""
+*****************************
+
+### IMPORTANT: Sep 13, 2024: FIM uses vrt's primariy for DEMs but this tool only downloads and preps the DEMs but does
+not create the vrt. That is done using the create_vrt_file.py tool.
+
+"""
+
 
 # local constants (until changed to input param)
 # This URL is part of a series of vrt data available from USGS via an S3 Bucket.
@@ -59,13 +66,20 @@ def acquire_and_preprocess_3dep_dems(
 
     Notes:
         - It really can be used for any huc size or any extent poly as long as it is 10m.
+
         - As this is a very low use tool, most values such as the USGS vrt path, output
           file names, etc are all hardcoded
+
         - Currently there is no tools to extract the WBD HUC8's that are applicable. You will want
           a gkpg for each download extent (ie. HUC8, HUC6, whatever. Make a folder of each extent
           file you want. ie) a folder of WBD HUC8's. One gkpg per HUC8 (or whatever size you like)
+
         - When we originally used this tool for CONUS+, we loaded them in HUC6's, but now
           with selected HUC8's for South Alaska, we will use input extent files as WBD HUC8.
+
+        - We run conus at HUC6 as there is often a number of download fails requiring our tool to run in -r (repair)
+          mode to fix it. HUC8's are more files and allow for more possible errors. (usually just communication fails)
+
         - We have a separate tool to create a VRT of any folder of rasters.
 
     Parameters
@@ -76,8 +90,7 @@ def acquire_and_preprocess_3dep_dems(
             ie) /data/inputs/wbd/HUC6
 
         - target_output_folder_path (str):
-            The output location of the new 3dep dem files. When the param is not submitted,
-            it will be sent to /data/input/usgs/3dep_dems/10m/.
+            The output location of the new 3dep dem files.
 
         - number_of_jobs (int):
             This program supports multiple procs if multiple procs/cores are available.
@@ -95,8 +108,6 @@ def acquire_and_preprocess_3dep_dems(
 
         - target_projection (String)
             Projection of the output DEMS and polygons (if included)
-
-
     '''
     # -------------------
     # Validation
@@ -109,12 +120,31 @@ def acquire_and_preprocess_3dep_dems(
             ' value accordingly.'
         )
 
+    if number_of_jobs > 15:
+        print("")
+        print(f"You have asked for {number_of_jobs} jobs\n")
+        print("For each core, it opens up another extenal connection.")
+        print(" But if you try to download more files simultaneously, many files an be partially downloaded")
+        print(
+            " with no notification or warning. It is recommended to slow down the job numbers to ensure stability."
+        )
+        print("")
+        print(" Type 'CONTINUE' if you want to keep your original job numbers")
+        print("      'MAX' if you want change your job count to 15")
+        print("      any other value to abort.")
+        resp = input(">> ").lower()
+
+        if resp == "max":
+            number_of_jobs = 15
+        elif resp != "continue":
+            print("Program aborted")
+            sys.exit(1)
+        print(f".. Continuing with {number_of_jobs} jobs")
+    print("")
+
     if not os.path.exists(extent_file_path):
         raise ValueError(f'extent_file_path value of {extent_file_path}' ' not set to a valid path')
 
-    if (target_output_folder_path is None) or (target_output_folder_path == ""):
-        target_output_folder_path = os.environ['usgs_3dep_dems_10m']
-
     if not os.path.exists(target_output_folder_path):
         # It is ok if the child diretory does not exist, but the parent folder must
         # parent directory
@@ -125,28 +155,7 @@ def acquire_and_preprocess_3dep_dems(
                 f"For the output path of {target_output_folder_path}, the child directory"
                 " need not exist but the parent folder must."
             )
-        os.makedir(target_output_folder_path)
-
-    else:  # path exists
-        if repair is False:
-            file_list = os.listdir(target_output_folder_path)
-            if len(file_list) > 0:
-                print()
-                msg = (
-                    f"The target output folder of {target_output_folder_path} appears to not be empty.\n\n"
-                    "Do you want to empty the folder first?\n"
-                    "  -- Type 'overwrite' if you want to empty the folder and continue.\n"
-                    "  -- Type any other value to abort and stop the program.\n"
-                )
-                print(msg)
-                resp = input(" ?=").lower()
-                if resp == "overwrite":
-                    shutil.rmtree(target_output_folder_path)
-                    os.mkdir(target_output_folder_path)
-                else:
-                    print("Program stopped\n")
-                    sys.exit(0)
-        # no else:
+        os.makedirs(target_output_folder_path, exist_ok=True)
 
     # I don't need the crs_number for now
     crs_is_valid, err_msg, crs_number = val.is_valid_crs(target_projection)
@@ -155,7 +164,7 @@ def acquire_and_preprocess_3dep_dems(
 
     # -------------------
     # setup logs
-    start_time = datetime.utcnow()
+    start_time = datetime.now(timezone.utc)
     fh.print_start_header('Loading 3dep dems', start_time)
 
     # print(f"Downloading to {target_output_folder_path}")
@@ -179,7 +188,7 @@ def acquire_and_preprocess_3dep_dems(
     if skip_polygons is False:
         polygonize(target_output_folder_path)
 
-    end_time = datetime.utcnow()
+    end_time = datetime.now(timezone.utc)
     fh.print_end_header('Loading 3dep dems complete', start_time, end_time)
 
     print()
@@ -426,7 +435,7 @@ def polygonize(target_output_folder_path):
 
 
 def __setup_logger(output_folder_path):
-    start_time = datetime.utcnow()
+    start_time = datetime.now(timezone.utc)
     file_dt_string = start_time.strftime("%Y_%m_%d-%H_%M_%S")
     log_file_name = f"3Dep_downloaded-{file_dt_string}.log"
 
@@ -453,16 +462,34 @@ def __setup_logger(output_folder_path):
             -t /data/inputs/3dep_dems/10m_South_Alaska/
             -j 20
 
+    or
+        python3 /foss_fim/data/usgs/acquire_and_preprocess_3dep_dems.py
+            -e /data/inputs/wbd/HUC6_ESPG_5070/
+            -t /data/inputs/usgs/3dep_dems/10m_5070/20240916 -r -j 15
+
     Notes:
+      - There is alot to know, so read the notes in the functions above.
+
+      - It is very common for not all DEMs to not all download correctly on each pass.
+        Review the output files and the logs so you know which are missing. Delete the ones in the outputs
+        that are in error. Then run the tool again wihth the -r flag (repair) which will fill in the wholes
+
+        This is also why we run it at HUC6 as it is easier to trace for failed files. We get alot of
+        communication error during downloads.
+
       - This is a very low use tool. So for now, this only can load 10m (1/3 arc second) and is using
         hardcoded paths for the wbd gpkg to be used for clipping (no buffer for now).
         Also hardcoded usgs 3dep urls, etc.  Minor
         upgrades can easily be made for different urls, output folder paths, huc units, etc
         as/if needed (command line params)
+
       - The output path can be adjusted in case of a test reload of newer data for 3dep.
         The default is /data/input/usgs/3dep_dems/10m/
+
       - Each output file will be the name of the input poly plus "_dem.tif". ie) if the wbd gpkg
         is named named "HUC8_12090301", then the output file name will be "HUC8_12090301_dem.tif"
+        Or depends what file name you sent in for the boundary: ie) HUC6_120903
+
       - While you can (and should use more than one job number (if manageable by your server)),
         this tool is memory intensive and needs more RAM then it needs cores / cpus. Go ahead and
         anyways and increase the job number so you are getting the most out of your RAM. Or
@@ -491,6 +518,14 @@ def __setup_logger(output_folder_path):
         required=True,
     )
 
+    parser.add_argument(
+        '-t',
+        '--target_output_folder_path',
+        help='REQUIRED: location of where the 3dep files will be saved.',
+        required=True,
+        default='',
+    )
+
     parser.add_argument(
         '-j',
         '--number_of_jobs',
@@ -504,20 +539,12 @@ def __setup_logger(output_folder_path):
         '-rp',
         '--repair',
         help='OPTIONAL: If included, it process only HUCs missing output DEMs or if the output DEM'
-        ' is too small (under 10 MB), which does happen.',
+        ' is too small (under 10 MB), which does happen. Read all inline notes about this feature',
         required=False,
         action='store_true',
         default=False,
     )
 
-    parser.add_argument(
-        '-t',
-        '--target_output_folder_path',
-        help='OPTIONAL: location of where the 3dep files will be saved.',
-        required=False,
-        default='',
-    )
-
     parser.add_argument(
         '-sp',
         '--skip_polygons',

diff --git a/data/wbd/generate_pre_clip_fim_huc8.py b/data/wbd/generate_pre_clip_fim_huc8.py
@@ -178,13 +178,12 @@ def pre_clip_hucs_from_wbd(outputs_dir, huc_list, number_of_jobs, overwrite):
     '''
 
     # Validation
-    total_cpus_available = os.cpu_count()
+    total_cpus_available = os.cpu_count() - 2
     if number_of_jobs > total_cpus_available:
         print(
             f'Provided: -j {number_of_jobs}, which is greater than than amount of available cpus -2: '
             f'{total_cpus_available - 2} will be used instead.'
         )
-        number_of_jobs = total_cpus_available - 2
 
     # Read in huc_list file and turn into a list data structure
     if os.path.exists(huc_list):
@@ -512,13 +511,25 @@ def huc_level_clip_vectors_to_wbd(huc, outputs_dir):
 
 
 if __name__ == '__main__':
+
+    # NOTE: Super important: Make sure the bash_variables are correct before doing pre-clip.
+    # It pulls a wide number of values from there.
+    # Especially if you can a diretory for a new data load.
+    #     ie) DEMS at data/inputs/3dep_dems/10m_5070/20240916/
+    #
+    # You have to run this twice, once for Alaska and once for CONUS
+    # but make sure put both results the same folder
+    # and you will need to submit the two HUC lists
+    # SouthernAlaska_HUC8.lst
+    # included_huc8.lst
+
     parser = argparse.ArgumentParser(
         description='This script gets WBD layer, calls the clip_vectors_to_wbd.py script, and clips the wbd. '
         'A plethora gpkg files per huc are generated (see args to subset_vector_layers), and placed within '
         'the output directory specified as the <outputs_dir> argument.',
         usage='''
             ./generate_pre_clip_fim_huc8.py
-                -n /data/inputs/pre_clip_huc8/24_3_20
+                -n /data/inputs/pre_clip_huc8/20240927
                 -u /data/inputs/huc_lists/included_huc8_withAlaska.lst
                 -j 6
                 -o
@@ -529,9 +540,10 @@ def huc_level_clip_vectors_to_wbd(huc, outputs_dir):
         '-n',
         '--outputs_dir',
         help='Directory to output all of the HUC level .gpkg files. Use the format: '
-        '<year_month_day> (i.e. September 26, 2023 would be 23_09_26)',
+        '<year_month_day> (i.e. September 26, 2024 would be 20240926)',
     )
     parser.add_argument('-u', '--huc_list', help='List of HUCs to genereate pre-clipped vectors for.')
+
     parser.add_argument(
         '-j',
         '--number_of_jobs',

diff --git a/data/wbd/preprocess_wbd.py b/data/wbd/preprocess_wbd.py
@@ -11,6 +11,9 @@
 
 gpd.options.io_engine = "pyogrio"
 
+# NOTE:
+# Sep 2024. This file might be deprecated as no code calls it
+
 
 def clip_wbd_to_dem_domain(dem: str, wbd_in: str, wbd_out: str, huc_level: int):
     """
@@ -43,6 +46,16 @@ def clip_wbd_to_dem_domain(dem: str, wbd_in: str, wbd_out: str, huc_level: int):
 
 
 if __name__ == '__main__':
+
+    # Example:
+    # preprocess_wbd.py -d /data/inputs/3dep_dems/10m_5070/20240916//HUC6_dem_domain.gpkg
+    #  -w /data/inputs/wbd/WBD_National_EPSG_5070.gpkg
+    #  -o /data/inputs/wbd/WBD_National_EPSG_5070_WBDHU8_clip_dem_domain.gpkg
+    #  -l 8
+
+    # WATCH FOR Alaska as well.  During the 3dep download of 20240916, it did not include
+    # Alaska. That one is in data/inputs/3dep_dems/10m_South_Alaska/20240912/
+
     parser = argparse.ArgumentParser(description='Clip WBD to DEM domain')
     parser.add_argument('-d', '--dem', help='Path to DEM', type=str, required=True)
     parser.add_argument('-w', '--wbd-in', help='Input WBD filename', type=str, required=True)
@@ -52,6 +65,3 @@ def clip_wbd_to_dem_domain(dem: str, wbd_in: str, wbd_out: str, huc_level: int):
     args = vars(parser.parse_args())
 
     clip_wbd_to_dem_domain(**args)
-
-    # Example:
-    # preprocess_wbd.py -d /data/inputs/3dep_dems/10m_5070/HUC6_dem_domain.gpkg -w /data/inputs/wbd/WBD_National_EPSG_5070.gpkg -o /data/inputs/wbd/WBD_National_EPSG_5070_WBDHU8_clip_dem_domain.gpkg -l 8
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -1,6 +1,46 @@
 All notable changes to this project will be documented in this file.
 We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
 
+## v4.5.10.0 - 2024-09-25 - [PR#1301](https://github.com/NOAA-OWP/inundation-mapping/pull/1301)
+
+A reload of all 3Dep DEMs from USGS was performed to refresh our data.
+
+`acquire_and_preprocess_3dep_dems.py` had to be run twice, one for Alaska and once for the rest to two different folder. This is due to different CRS's. Eventually, we could merge these into one run. This also meant two separate vrt runs / files. 
+
+This also triggered a new set of pre-clips for both AK and CONUS+ but the outputs can/were put into the same folder, so fim_pipeline looks in one common pre-clip folder.
+
+Other minor adjustment include:
+- A change to chmod (permissions) files / folder for the logging folders. After careful re-analysis, it was discovered there was some duplication. 
+- Added a simple duration system to the sierra test system, `rating_curve_comparions.py`. This was added as it is expected to be used soon for a  full BED/Production.  The fix focuses purely on duration, but a test did detect a possible pre-existing logic problem. A separate card will be created for that.
+
+Note:
+The root folder for DEM is being changed from:
+    /inputs/3dep_dems/....   to  
+    /inputs/dems/3dep_dems/....
+    This recognizes other DEMs that may be coming in the near future.
+    The same sub-folder patterns have not be changed.
+    No attempts will be made at this time to move older files, only new incoming from this PR.
+
+### Changes
+- `CITATION.cff`: has not be updated for a very long time.
+- `fim_post_processing.sh`: Update to file/folder permissions.
+- `data`
+    - `usgs\acquire_and_preprocesss_3dep_dem.pys
+        - Minor text updates and updated datatime.now patterns as the old ones are not deprecated
+        - An adjustment to how number of jobs are handled. The system dis-likes too many multi-procs due to open network connections to the source.
+        - Change the target output folder from optional to required.
+    - `wbd`
+        - `generate_pre_clip_from_huc8.py`: 
+            - Minor text updates
+        - `preprocess_wbd.py`
+            - Minor text updates
+- `src\base_variables.env`: Changes to variables to reflect new dems and pre-clip paths.
+- `tools\rating_curve_comparisons.py`
+    - Added duration system as mentioned above.
+
+<br/><br/>
+
+
 ## v4.5.9.0 - 2024-09-25 - [PR#1291](https://github.com/NOAA-OWP/inundation-mapping/pull/1291)
 
 Changes Docker base image to `gdal:ubuntu-small` in order to avoid JDK from being carried over in the base image and triggering security vulnerabilities.

diff --git a/fim_post_processing.sh b/fim_post_processing.sh
@@ -266,10 +266,8 @@ Tcount
 
 l_echo $startDiv"Resetting Permissions"
 Tstart
-    find $outputDestDir/logs/ -type d -exec chmod -R 777 {} +
-    find $outputDestDir/branch_errors/ -type d -exec chmod -R 777 {} +
-    find $outputDestDir/unit_errors/ -type d -exec chmod -R 777 {} +
-    find $outputDestDir -type f -exec chmod 777 {} +  # just root level files
+    # super slow to change chmod on the log folder. Not really manditory anyways
+    find $outputDestDir -maxdepth 1 -type f -exec chmod 666 {} +  # just root level files
 Tcount