Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…into dev-error-scan
  • Loading branch information
RobHanna-NOAA committed Oct 25, 2024
2 parents 2232418 + 789618d commit bed6165
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 7 deletions.
File renamed without changes.
129 changes: 129 additions & 0 deletions Dockerfile.prod
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
## Temporary image to build the libraries and only save the needed artifacts
FROM ghcr.io/osgeo/gdal:ubuntu-small-3.8.4 AS builder
WORKDIR /opt/builder
ARG dataDir=/data
ARG projectDir=/foss_fim
ARG depDir=/dependencies
ARG taudemVersion=98137bb6541a0d0077a9c95becfed4e56d0aa0ac
ARG taudemVersion2=81f7a07cdd3721617a30ee4e087804fddbcffa88
ENV taudemDir=$depDir/taudem/bin
ENV taudemDir2=$depDir/taudem_accelerated_flowDirections/taudem/build/bin

# remove reference to missing repo
# RUN rm /etc/apt/sources.list.d/apache-arrow.sources

RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*

RUN git clone https://github.com/dtarb/taudem.git
RUN git clone https://github.com/fernandoa123/cybergis-toolkit.git taudem_accelerated_flowDirections

RUN apt-get update --fix-missing && apt-get install -y cmake mpich \
libgtest-dev libboost-test-dev libnetcdf-dev && rm -rf /var/lib/apt/lists/*

## Compile Main taudem repo ##
RUN mkdir -p taudem/bin
RUN cd taudem \
&& git checkout $taudemVersion \
&& cd src \
&& make

## Compile taudem repo with accelerated flow directions ##
RUN cd taudem_accelerated_flowDirections/taudem \
&& git checkout $taudemVersion2 \
&& mkdir build \
&& cd build \
&& cmake .. \
&& make

RUN mkdir -p $taudemDir
RUN mkdir -p $taudemDir2

## Move needed binaries to the next stage of the image
RUN cd taudem/bin && mv -t $taudemDir flowdircond streamnet gagewatershed catchhydrogeo dinfdistdown
RUN cd taudem_accelerated_flowDirections/taudem/build/bin && mv -t $taudemDir2 d8flowdir dinfflowdir


###############################################################################################
# Base Image that has GDAL, PROJ, etc
FROM ghcr.io/osgeo/gdal:ubuntu-small-3.8.4
ARG dataDir=/data
ENV projectDir=/foss_fim
ARG depDir=/dependencies
ENV inputsDir=$dataDir/inputs
ENV outputsDir=/outputs
ENV srcDir=$projectDir/src
ENV toolsDir=$projectDir/tools
ENV workDir=/fim_temp
ENV taudemDir=$depDir/taudem/bin
ENV taudemDir2=$depDir/taudem_accelerated_flowDirections/taudem/build/bin

## ADDING FIM GROUP ##
ARG GroupID=1370800235
ARG GroupName=fim
RUN addgroup --gid $GroupID $GroupName
ENV GID=$GroupID
ENV GN=$GroupName

RUN mkdir -p $workDir
RUN mkdir -p $depDir
COPY --from=builder $depDir $depDir

# remove reference to missing repo
# RUN rm /etc/apt/sources.list.d/apache-arrow.sources

RUN apt-get update --fix-missing && rm -rf /var/lib/apt/lists/*
RUN apt update --fix-missing

RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt install -y p7zip-full python3-pip time mpich parallel libgeos-dev expect tmux rsync tzdata wget

RUN apt auto-remove

## adding AWS CLI (for bash) ##
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
unzip awscliv2.zip && \
./aws/install

## adding environment variables for numba and python ##
ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8
ENV PYTHONUNBUFFERED=TRUE

## ADD TO PATHS ##
ENV PATH="$projectDir:${PATH}"
ENV PYTHONPATH="${PYTHONPATH}:${projectDir}:${srcDir}:${srcDir}/utils:${projectDir}/data:${toolsDir}"

## install python 3 modules ##
COPY Pipfile .
COPY Pipfile.lock .
RUN pip3 install pipenv==2024.0.1 && PIP_NO_CACHE_DIR=off pipenv install --system --deploy --ignore-pipfile

# ----------------------------------
# Mar 2023 / Sep 2024
# There are some nuances in the whitebox python downloads in that the first time it loads
# it goes to the internet and downloads the latest/greatest WBT (whiteboxtools) engine which is
# required for the whitebox python library to work. We don't want to have FIM attempting a download
# each time a container is opened and the whitebox engine is called.
# Instead we will setup the WBT engine at time of docker build (same as Taudem and AWS).
# Whitebox code detects that the engine it there and makes no attempt to update it.
# We download and unzip it to the same file folder that pip deployed the whitebox library.
# Whitebox also attempts to always download a folder called testdata regardless of use.
# We added an empty folder to fake out whitebox_tools.py so it doesn't try to download the folder
ENV WBT_PATH=/usr/local/lib/python3.10/dist-packages/whitebox/WBT
RUN wget -P $WBT_PATH https://www.whiteboxgeo.com/WBT_Linux/WhiteboxTools_linux_musl.zip && \
unzip -o $WBT_PATH/WhiteboxTools_linux_musl.zip -d $WBT_PATH && \
cp $WBT_PATH/WhiteboxTools_linux_amd64/WBT/whitebox_tools $WBT_PATH
# ----------------------------------

# The containiner will auto use this account to run
ARG RuntimeUser=svc_user
RUN useradd -u 8877 -g $GroupName -s /bin/bash $RuntimeUser
RUN chmod 777 $workDir
RUN mkdir -p "/home/${RuntimeUser}"
RUN chmod 777 /home/$RuntimeUser

## RUN UMASK TO CHANGE DEFAULT PERMISSIONS ##
ADD ./src/entrypoint.sh /
ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]

## This results in the default user being the svc_user user
USER $RuntimeUser
12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ This software uses the Height Above Nearest Drainage (HAND) method to generate R

# FIM Version 4

#### Note: While we use the phrase "FIM" regularily, the phrase "HAND" is also used and is generally interchangeable. Most output folders now follow the convenction of "hand_4_x_x_x".

## Accessing Data through ESIP S3 Bucket
The latest national generated HAND data and a subset of the inputs can be found in an Amazon S3 Bucket hosted by [Earth Science Information Partners (ESIP)](https://www.esipfed.org/). These data can be accessed using the AWS CLI tools. Please contact Carson Pruitt ([email protected]) or Fernando Salas ([email protected]) if you experience issues with permissions.

Expand Down Expand Up @@ -49,12 +51,12 @@ aws s3 ls s3://noaa-nws-owp-fim/hand_fim/ --profile esip

Download a directory of sample outputs for a single HUC8:
```
aws s3 sync s3://noaa-nws-owp-fim/hand_fim/outputs/fim_4_4_0_0/12090301 \
aws s3 sync s3://noaa-nws-owp-fim/hand_fim/outputs/hand_4_5_2_11/12090301 \
/your_local_folder_name/12090301 --profile esip
```
By adjusting pathing, you can also download entire directories such as the `fim_4_4_0_0` folder. An entire output FIM set (e.g. `fim_4_4_0_0`) is approximately 1.1 TB.
By adjusting pathing, you can also download entire directories such as the `hand_4_5_2_11` folder. An entire output HAND set is approximately 1.7 TB.

**Note**: There may be newer editions than `fim_4_4_0_0`, and it is recommended to adjust the command above for the latest version.
**Note**: There may be newer editions than `hand_4_5_11_1`, and it is recommended to adjust the command above for the latest version.

## Setting up your Environment

Expand Down Expand Up @@ -85,7 +87,7 @@ Git will auto create a subfolder named `inundation-mapping` where the code will

### Installation
1. Install Docker : [Docker](https://docs.docker.com/get-docker/)
2. Build Docker Image : `docker build -f Dockerfile -t <image_name>:<tag> <path/to/repository>`
2. Build Docker Image : `docker build -f Dockerfile.dev -t <image_name>:<tag> <path/to/repository>`
3. Create FIM group on host machine:
- Linux: `groupadd -g 1370800178 fim`
4. Change group ownership of repo (needs to be redone when a new file occurs in the repo):
Expand Down Expand Up @@ -128,7 +130,7 @@ docker run --rm -it --name <your_container_name> \
```
For example:
```bash
docker run --rm -it --name robs_container \
docker run --rm -it --name Robs_container \
-v /home/projects/fim/code/inundation-mapping/:/foss_fim \
-v /home/projects/fim/data/outputs/:/outputs \
-v /home/projects/fim/data/outputs_temp/:/fim_temp \
Expand Down
20 changes: 20 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,26 @@ Note: it is finding a good handful more errors and warnings that were being miss
### Changes
`fim_post_processing.sh`: fix as described.

## v4.5.11.2 - 2024-10-25 - [PR#1322](https://github.com/NOAA-OWP/inundation-mapping/pull/1322)

For security reasons, we needed to create a docker image that does not use the root user in anyway. The new `Dockerfile.prod` file is to be used when we want to use a non-root user. The original `Dockerfile` has been renamed to `Dockerfile.dev` and will continue to use it's root users which has no problems with interacting with external mounts.

Note: Re: using pip or pipenv installs.
In the Dockerfile.prod, you can not do installs or update using either pipenv or pip. Those types of tests and adjustments need to be done in the `Dockerfile.dev`. `Dockerfile.dev` will also allow change to the `Pipfile` and `Pipfile.lock` . Both docker files share the Pipfiles so it should be just fine.

### File Renames
- Was: `Dockerfile`, now `Dockerfile.dev`

### Additions

- Dockerfile.prod: as described

### Changes
- `README.md`: change notes from phrase `Dockerfile` to `Dockerfile.dev`. Also added some notes about the new convention of outputs no longer starting with `fim_` but now `hand_`
- `fim_pipeline.sh`: Change for the new `Dockerfile.prod` for permissions.
- `fim_post_processing.sh`: Change for the new `Dockerfile.prod` for permissions.
- `fim_pre_processing.sh`: Change for the new `Dockerfile.prod` for permissions.
- `fim_process_unit_wb.sh`: Change for the new `Dockerfile.prod` for permissions.

<br/><br/>

Expand Down
2 changes: 1 addition & 1 deletion fim_pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ echo "---------------------------------------------------"
## POST PROCESSING

# Remove run from the fim_temp directory
rm -d $workDir/$runName
rm -df $workDir/$runName

# Pipe into post processing
. $projectDir/fim_post_processing.sh -n $runName -j $jobMaxLimit
Expand Down
2 changes: 1 addition & 1 deletion fim_post_processing.sh
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ Tcount
l_echo $startDiv"Resetting Permissions"
Tstart
# super slow to change chmod on the log folder. Not really manditory anyways
find $outputDestDir -maxdepth 1 -type f -exec chmod 666 {} + # just root level files
find $outputDestDir -maxdepth 1 -type f -exec chmod 777 {} + # just root level files
Tcount


Expand Down
4 changes: 4 additions & 0 deletions fim_pre_processing.sh
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ if [ ! -d $outputDestDir ]; then
mkdir -p $outputDestDir
chmod 777 $outputDestDir
mkdir -p $tempRunDir
chmod 777 $tempRunDir
else
# remove these directories and files on a new or overwrite run
rm -rdf $outputDestDir/logs
Expand Down Expand Up @@ -231,6 +232,9 @@ cp $envFile $outputDestDir/params.env

args_file=$outputDestDir/runtime_args.env

# reset it again (this time recursive for the new incoming folders
chmod 777 -R $outputDestDir

# the jobHucLimit is not from the args files, only jobBranchLimit
echo "export runName=$runName" >> $args_file
echo "export jobHucLimit=$jobHucLimit" >> $args_file
Expand Down
2 changes: 2 additions & 0 deletions fim_process_unit_wb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ fi
# make outputs directory
mkdir -p $tempHucDataDir
mkdir -p $tempBranchDataDir
chmod 777 $tempHucDataDir
chmod 777 $tempBranchDataDir

# Clean out previous unit logs and branch logs starting with this huc
rm -f $outputDestDir/logs/unit/"$hucNumber"_unit.log
Expand Down

0 comments on commit bed6165

Please sign in to comment.