From b7f81386d54172b8fda224d47cde7a62c8dd00b2 Mon Sep 17 00:00:00 2001 From: Rob Hanna - NOAA <90854818+RobHanna-NOAA@users.noreply.github.com> Date: Fri, 16 Feb 2024 17:21:26 +0000 Subject: [PATCH] v4.4.10.1 Fix return error status from pre-processing and remove CPU test (#1075) --- docs/CHANGELOG.md | 17 +++++++++++++++++ fim_post_processing.sh | 2 ++ fim_pre_processing.sh | 20 ++++++++------------ 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 9760431b2..d8c074dbb 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,6 +1,23 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v4.4.10.1 - 2024-02-16 - [PR#1075](https://github.com/NOAA-OWP/inundation-mapping/pull/1075) + +We recently added code to fim_pre_processing.sh that checks the CPU count. Earlier this test was being done in post-processing and was killing a pipeline that had already been running for a while. + +Fix: +- Removed the CPU test from pre-processing. This puts us back to it possibly failing in post-processing but we have to leave it for now. +- Exit status codes (non 0) are now returned in pre-processing and post-processing when an error has occurred. + +Tested that the a non zero return exit from pre-processing shuts down the AWS step functions. + +### Changes +- `fim_pre_processing.sh`: added non zero exit codes when in error, plus removed CPU test +- `fim_post_processing.sh`: added non zero exit codes when in error + +

+ + ## v4.4.10.0 - 2024-02-02 - [PR#1054](https://github.com/NOAA-OWP/inundation-mapping/pull/1054) Recent testing exposed a bug with the `acquire_and_preprocess_3dep_dems.py` script. It lost the ability to be re-run and look for files that were unsuccessful earlier attempts and try them again. It may have been lost due to confusion of the word "retry". Now "retry" means restart the entire run. A new flag called "repair" has been added meaning fix what failed earlier. This is a key feature it is common for communication failures when calling USGS to download DEMs. And with some runs taking many hours, this feature becomes important. diff --git a/fim_post_processing.sh b/fim_post_processing.sh index a157db6f6..9a176e695 100755 --- a/fim_post_processing.sh +++ b/fim_post_processing.sh @@ -46,6 +46,7 @@ if [ "$runName" = "" ] then echo "ERROR: Missing -n run time name argument" usage + exit 22 fi outputDestDir=$outputsDir/$runName @@ -217,6 +218,7 @@ Tcount date -u find $outputDestDir -type d -exec chmod -R 777 {} + +find $outputDestDir -type f -exec chmod -R 777 {} + echo echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" diff --git a/fim_pre_processing.sh b/fim_pre_processing.sh index f8453c8e3..0846b522e 100755 --- a/fim_pre_processing.sh +++ b/fim_pre_processing.sh @@ -46,7 +46,6 @@ usage() -skipcal : If this param is included, the S.R.C. will be updated via the calibration points. will be skipped. " - exit } set -e @@ -104,16 +103,20 @@ in shift done +# exit 22 means bad argument + # print usage if arguments empty if [ "$hucList" = "" ] then echo "ERROR: Missing -u Huclist argument" usage + exit 22 fi if [ "$runName" = "" ] then echo "ERROR: Missing -n run time name argument" usage + exit 22 fi # outputsDir & workDir come from the Dockerfile @@ -137,6 +140,7 @@ then # NONE is not case sensitive echo "Error: The -ud does not exist and is not the word NONE" usage + exit 22 fi # validate and set defaults for the deny lists @@ -148,6 +152,7 @@ then # NONE is not case sensitive echo "Error: The -bd does not exist and is not the word NONE" usage + exit 22 fi # We do a 1st cleanup of branch zero using branchZeroDenylist (which might be none). @@ -164,6 +169,7 @@ then then echo "Error: The -zd does not exist and is not the word NONE" usage + exit 22 else # only if the deny branch zero has been overwritten and file exists has_deny_branch_zero_override=1 @@ -178,17 +184,7 @@ if [ -d $outputDestDir ] && [ $overwrite -eq 0 ]; then echo "ERROR: Output dir $outputDestDir exists. Use overwrite -o to run." echo usage -fi - -# Test to ensure we are not overuseing cores -num_available_cores=$(echo $(grep -c processor /proc/cpuinfo)) -let total_requested_jobs=$jobHucLimit*$jobBranchLimit -if [[ $total_requested_jobs -gt $num_available_cores ]]; then - echo - echo "ERROR: There are $num_available_cores available, but -jh (jobHucLimit) * -jb (jobBranchLimit)"\ - "exceed the number of available cores" - echo - usage + exit 22 fi ## SOURCE ENV FILE AND FUNCTIONS ##