diff --git a/.gitignore b/.gitignore index da592b6..ec32d16 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,8 @@ .venv/ .py*[eE]nv/ +*_dirs/ + # Created by https://www.toptal.com/developers/gitignore/api/python,jupyternotebooks # Edit at https://www.toptal.com/developers/gitignore?templates=python,jupyternotebooks diff --git a/CITATION.cff b/CITATION.cff index 9f4bb00..6c275a0 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -27,5 +27,5 @@ message: "If you use this software, please cite it using these metadata." repository-code: "https://github.com/inab/WfExS-backend" type: software title: "WfExS-backend" -version: 1.0.0b1 +version: 1.0.0rc0 date-released: "2024-08-07" diff --git a/INSTALL.md b/INSTALL.md index 07d2d9a..18e4afb 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -15,6 +15,10 @@ just using next bash pattern: ```bash # WFEXS_VER can be either a branch, a tag or a commit hash WFEXS_VER=8a0a980f1a5e69064d16f89f8ec31973b2eb0c8b + +# Alternatively, you can use local copy +WFEXS_VER=$(git rev-parse HEAD) + docker build -t inab/wfexs-backend:${WFEXS_VER} \ --build-arg wfexs_checkout="${WFEXS_VER}" \ https://raw.githubusercontent.com/inab/WfExS-backend/${WFEXS_VER}/container_recipes/Dockerfile @@ -26,6 +30,10 @@ a local copy of the recipe, and next command line from the project root will hel ```bash # WFEXS_VER can be either a branch, a tag or a commit hash WFEXS_VER=8a0a980f1a5e69064d16f89f8ec31973b2eb0c8b + +# Alternatively, you can use local copy +WFEXS_VER=$(git rev-parse HEAD) + mkdir WfExS_docker_build cd WfExS_docker_build curl -O https://raw.githubusercontent.com/inab/WfExS-backend/${WFEXS_VER}/container_recipes/Dockerfile @@ -47,8 +55,13 @@ just using next bash pattern: ```bash # WFEXS_VER can be either a branch, a tag or a commit hash WFEXS_VER=8a0a980f1a5e69064d16f89f8ec31973b2eb0c8b + +# Alternatively, you can use local copy +WFEXS_VER=$(git rev-parse HEAD) + podman build -t inab/wfexs-backend:${WFEXS_VER} \ --build-arg wfexs_checkout="${WFEXS_VER}" \ +--target podman_build \ https://raw.githubusercontent.com/inab/WfExS-backend/${WFEXS_VER}/container_recipes/Dockerfile ``` @@ -58,12 +71,17 @@ a local copy of the recipe, and next command line from the project root will hel ```bash # WFEXS_VER can be either a branch, a tag or a commit hash WFEXS_VER=8a0a980f1a5e69064d16f89f8ec31973b2eb0c8b + +# Alternatively, you can use local copy +WFEXS_VER=$(git rev-parse HEAD) + mkdir WfExS_podman_build cd WfExS_podman_build curl -O https://raw.githubusercontent.com/inab/WfExS-backend/${WFEXS_VER}/container_recipes/Dockerfile podman build -t inab/wfexs-backend:${WFEXS_VER} \ --build-arg wfexs_checkout="${WFEXS_VER}" \ +--target podman_build \ Dockerfile ``` @@ -89,6 +107,10 @@ The precondition is having either Apptainer or Singularity properly setup. There ```bash # WFEXS_VER can be either a branch, a tag or a commit hash WFEXS_VER=8a0a980f1a5e69064d16f89f8ec31973b2eb0c8b + + # Alternatively, you can use local copy + WFEXS_VER=$(git rev-parse HEAD) + singularity build \ --build-arg wfexs_checkout="${WFEXS_VER}" \ wfexs-backend-${WFEXS_VER}.sif container_recipes/Singularity.def diff --git a/WFEXS-in-CONTAINERS.md b/WFEXS-in-CONTAINERS.md new file mode 100644 index 0000000..538591c --- /dev/null +++ b/WFEXS-in-CONTAINERS.md @@ -0,0 +1,635 @@ +# Running WfExS from within a container (alpha)! + +## Singularity/Apptainer within Singularity/Apptainer (works also for encrypted workdirs) + +For this approach we have been using both `-e` and `-c` parameters from Singularity/Apptainer. It is also possible to use `-u`. + +### Steps + +1. Build the SIF image. Let's assume the file is `wfexs-backend-latest.sif`. + +2. First, create and populate a side caches directory: + + ```bash + mkdir -p SING_dirs/side_caches + singularity exec \ + -e -c \ + -B ./SING_dirs/side_caches:${HOME}/.cache \ + wfexs-backend-latest.sif \ + WfExS-backend populate-side-caches + ``` + +3. Create two directories, one for WfExS caches, and another one for the + working directories. Write down the absolute path of the latter. + + ```bash + mkdir -p SING_dirs/wfexs-backend-container-cache + mkdir -p SING_dirs/wfexs-backend-container-WORKDIR + readlink -f SING_dirs/wfexs-backend-container-WORKDIR + ``` + + (let's suppose it is `/home/user/SING_dirs/wfexs-backend-container-WORKDIR`). + +4. Create a configuration file which contains the relative or absolute paths + to both the cache and working directories. For instance, let's suppose it + is available at `/home/user/SING_dirs/local_container_wfexs.yaml` with next content: + + ```yaml + cacheDir: wfexs-backend-container-cache + tools: + dockerCommand: docker + encrypted_fs: + type: gocryptfs + engineMode: local + gitCommand: git + javaCommand: java + singularityCommand: singularity + staticBashCommand: bash-linux-x86_64 + workDir: wfexs-backend-container-WORKDIR + ``` + +5. Initialize the pair of keys: + + ```bash + singularity exec \ + -e -c \ + -B ./SING_dirs/side_caches:${HOME}/.cache \ + -B ./SING_dirs/:/home/${USER}/WfExS-instance-dirs/:rw \ + wfexs-backend-latest.sif \ + WfExS-backend -L /home/${USER}/WfExS-instance-dirs/local_container_wfexs.yaml init + ``` + +6. Use it! + + ```bash + singularity exec \ + -e -c \ + --add-caps SYS_ADMIN \ + -B /dev/fuse \ + -B ./SING_dirs/side_caches/:${HOME}/.cache/:ro \ + -B ./SING_dirs/:/home/${USER}/WfExS-instance-dirs/:rw \ + -B ./workflow_examples/:/home/${USER}/workflow_examples/:ro \ + wfexs-backend-latest.sif \ + WfExS-backend -L /home/${USER}/WfExS-instance-dirs/local_container_wfexs.yaml \ + stage -W /home/${USER}/workflow_examples/hello/hellow_cwl_singularity.wfex.stage + ``` + + ```bash + singularity exec \ + -e -c \ + --add-caps SYS_ADMIN \ + -B /dev/fuse \ + -B ./SING_dirs/side_caches/:${HOME}/.cache/:ro \ + -B ./SING_dirs/:/home/${USER}/WfExS-instance-dirs/:rw \ + -B ./workflow_examples/:/home/${USER}/workflow_examples/:ro \ + wfexs-backend-latest.sif \ + WfExS-backend -L /home/${USER}/WfExS-instance-dirs/local_container_wfexs.yaml \ + staged-workdir offline-exec 'my funny jobname' + ``` + +## Singularity/Apptainer within Podman (works also for encrypted workdirs) + +1. Build the podman image following the instructions. Let's assume the tag is `inab/wfexs-backend:latest` (whose canonical representation is `localhost/inab/wfexs-backend:latest`). + +2. First, create and populate a side caches directory: + + ```bash + mkdir -p PODMAN_dirs/side_caches + podman run --rm -ti \ + -v ./PODMAN_dirs/side_caches:/root/.cache \ + localhost/inab/wfexs-backend:latest \ + WfExS-backend populate-side-caches + ``` + +3. Create two directories, one for WfExS caches, and another one for the + working directories. Write down the absolute path of the latter. + + ```bash + mkdir -p PODMAN_dirs/wfexs-backend-container-cache + mkdir -p PODMAN_dirs/wfexs-backend-container-WORKDIR + readlink -f PODMAN_dirs/wfexs-backend-container-WORKDIR + ``` + + (let's suppose it is `/home/user/PODMAN_dirs/wfexs-backend-WORKDIR`). + +4. Create a configuration file which contains the relative or absolute paths + to both the cache and working directories. For instance, let's suppose it + is available at `/home/user/PODMAN_dirs/local_container_wfexs.yaml` with next content: + + ```yaml + cacheDir: wfexs-backend-container-cache + tools: + dockerCommand: docker + encrypted_fs: + type: gocryptfs + engineMode: local + gitCommand: git + javaCommand: java + singularityCommand: singularity + staticBashCommand: bash-linux-x86_64 + workDir: wfexs-backend-container-WORKDIR + ``` + +5. Initialize the pair of keys: + + ```bash + podman run --rm -ti \ + -v ./PODMAN_dirs/side_caches:/root/.cache \ + -v ./PODMAN_dirs/:/root/WfExS-instance-dirs/:rw \ + localhost/inab/wfexs-backend:latest \ + WfExS-backend -L /root/WfExS-instance-dirs/local_container_wfexs.yaml init + ``` + +6. Use it! + + ```bash + podman run --rm -ti \ + --cap-add SYS_ADMIN \ + --device /dev/fuse \ + -v ./PODMAN_dirs/side_caches:/root/.cache:ro \ + -v ./PODMAN_dirs/:/root/WfExS-instance-dirs/:rw \ + -v ./workflow_examples/:/root/workflow_examples/:ro \ + localhost/inab/wfexs-backend:latest \ + WfExS-backend -L /root/WfExS-instance-dirs/local_container_wfexs.yaml \ + stage -W /root/workflow_examples/hello/hellow_cwl_singularity.wfex.stage + ``` + + ```bash + podman run --rm -ti \ + --cap-add SYS_ADMIN \ + --device /dev/fuse \ + -v ./PODMAN_dirs/side_caches:/root/.cache:ro \ + -v ./PODMAN_dirs/:/root/WfExS-instance-dirs/:rw \ + -v ./workflow_examples/:/root/workflow_examples/:ro \ + localhost/inab/wfexs-backend:latest \ + WfExS-backend -L /root/WfExS-instance-dirs/local_container_wfexs.yaml \ + staged-workdir offline-exec 'my funny jobname' + ``` + +## Singularity within Docker (works also for encrypted workdirs) + +1. Build the docker image following the instructions. Let's assume the tag is `inab/wfexs-backend:latest`. + +2. First, create and populate a side caches directory: + + ```bash + mkdir -p SING_in_DOCKER_dirs/side_caches + docker run --rm -ti \ + -u $(id -u):$(id -g) \ + -v ./SING_in_DOCKER_dirs/side_caches:/.cache \ + inab/wfexs-backend:latest \ + WfExS-backend populate-side-caches + ``` + +3. Create two directories, one for WfExS caches, and another one for the + working directories. Write down the absolute path of the latter. + + ```bash + mkdir -p SING_in_DOCKER_dirs/wfexs-backend-container-cache + mkdir -p SING_in_DOCKER_dirs/wfexs-backend-container-WORKDIR + readlink -f SING_in_DOCKER_dirs/wfexs-backend-container-WORKDIR + ``` + + (let's suppose it is `/home/${USER}/SING_in_DOCKER_dirs/wfexs-backend-WORKDIR`). + +4. Create a configuration file which contains the relative or absolute paths + to both the cache and working directories. For instance, let's suppose it + is available at `/home/${USER}/SING_in_DOCKER_dirs/local_container_wfexs.yaml` with next content: + + ```yaml + cacheDir: wfexs-backend-container-cache + tools: + dockerCommand: docker + encrypted_fs: + type: gocryptfs + engineMode: local + gitCommand: git + javaCommand: java + singularityCommand: singularity + staticBashCommand: bash-linux-x86_64 + workDir: wfexs-backend-container-WORKDIR + ``` + +5. Initialize the pair of keys: + + ```bash + docker run --rm -ti \ + -u $(id -u):$(id -g) \ + -v ./SING_in_DOCKER_dirs/side_caches:/.cache \ + -v ./SING_in_DOCKER_dirs/:/WfExS-instance-dirs/:rw \ + inab/wfexs-backend:latest \ + WfExS-backend -L /WfExS-instance-dirs/local_container_wfexs.yaml init + ``` + +6. Use it! + + ```bash + docker run --rm -ti \ + -u $(id -u):$(id -g) \ + --cap-add SYS_ADMIN \ + --device /dev/fuse \ + -v ./SING_in_DOCKER_dirs/side_caches:/.cache:ro \ + -v ./SING_in_DOCKER_dirs/:/WfExS-instance-dirs/:rw \ + -v ./workflow_examples/:/workflow_examples/:ro \ + inab/wfexs-backend:latest \ + WfExS-backend -L /WfExS-instance-dirs/local_container_wfexs.yaml \ + stage -W /workflow_examples/hello/hellow_cwl_singularity.wfex.stage + ``` + + ```bash + docker run --rm -ti \ + --cap-add SYS_ADMIN \ + --device /dev/fuse \ + -v ./SING_in_DOCKER_dirs/side_caches:/.cache:ro \ + -v ./SING_in_DOCKER_dirs/:/WfExS-instance-dirs/:rw \ + -v ./workflow_examples/:/workflow_examples/:ro \ + inab/wfexs-backend:latest \ + WfExS-backend -L /WfExS-instance-dirs/local_container_wfexs.yaml \ + staged-workdir offline-exec 'my funny jobname' + ``` + +## Podman within Singularity/Apptainer + +(Tested on 2024-08-31) It fails just materializing, due nesting limitations of user namespaces (used both by Podman and Singularity). + +## Podman within Podman + +(Tested on 2024-08-31) It fails just materializing, due nesting limitations of user namespaces (used by Podman). + +## Podman within Docker + +(Tested on 2024-09-09) It fails running the workflow due issues with crun. First issue arose next crun error: + +``` +crun: create keyring `e94eae775d1a0e71b067f98cd569d309a2fcf36c6afd505d0868a32d47629661`: Operation not permitted: OCI permission denied +``` + +which was skipped thanks to commit 9d935b20ba5d75d8d62488941c9c4a3c2c0c101d . But next issue cannot be skipped: + +``` +OCI runtime error: crun: open /proc/sys/net/ipv4/ping_group_range: Read-only file system +``` + +### Steps + +1. Build the docker image following the instructions. Let's assume the tag is `inab/wfexs-backend:latest`. + +2. First, create and populate a side caches directory: + + ```bash + mkdir -p PODMAN_in_DOCKER_dirs/side_caches + docker run --rm -ti \ + -u $(id -u):$(id -g) \ + -v ./PODMAN_in_DOCKER_dirs/side_caches:/.cache \ + inab/wfexs-backend:latest \ + WfExS-backend populate-side-caches + ``` + +3. Create two directories, one for WfExS caches, and another one for the + working directories. Write down the absolute path of the latter. + + ```bash + mkdir -p PODMAN_in_DOCKER_dirs/wfexs-backend-container-cache + mkdir -p PODMAN_in_DOCKER_dirs/wfexs-backend-container-WORKDIR + readlink -f PODMAN_in_DOCKER_dirs/wfexs-backend-container-WORKDIR + ``` + + (let's suppose it is `/home/${USER}/PODMAN_in_DOCKER_dirs/wfexs-backend-WORKDIR`). + +4. Create a configuration file which contains the relative or absolute paths + to both the cache and working directories. For instance, let's suppose it + is available at `/home/${USER}/PODMAN_in_DOCKER_dirs/local_container_wfexs.yaml` with next content: + + ```yaml + cacheDir: wfexs-backend-container-cache + tools: + dockerCommand: docker + encrypted_fs: + type: gocryptfs + engineMode: local + gitCommand: git + javaCommand: java + singularityCommand: singularity + staticBashCommand: bash-linux-x86_64 + workDir: wfexs-backend-container-WORKDIR + ``` + +5. Initialize the pair of keys: + + ```bash + docker run --rm -ti \ + -u $(id -u):$(id -g) \ + -v ./PODMAN_in_DOCKER_dirs/side_caches:/.cache \ + -v ./PODMAN_in_DOCKER_dirs/:/WfExS-instance-dirs/:rw \ + inab/wfexs-backend:latest \ + WfExS-backend -L /WfExS-instance-dirs/local_container_wfexs.yaml init + ``` + +6. Use it! + + ```bash + docker run --rm -ti \ + -u $(id -u):$(id -g) \ + --cap-add SYS_ADMIN \ + --device /dev/fuse \ + -v ./PODMAN_in_DOCKER_dirs/side_caches:/.cache:ro \ + -v ./PODMAN_in_DOCKER_dirs/:/WfExS-instance-dirs/:rw \ + -v ./workflow_examples/:/workflow_examples/:ro \ + inab/wfexs-backend:latest \ + WfExS-backend -L /WfExS-instance-dirs/local_container_wfexs.yaml \ + stage -W /workflow_examples/hello/hellow_cwl_podman.wfex.stage + ``` + + ```bash + docker run --rm -ti \ + --cap-add SYS_ADMIN \ + --device /dev/fuse \ + -v ./PODMAN_in_DOCKER_dirs/side_caches:/.cache:ro \ + -v ./PODMAN_in_DOCKER_dirs/:/WfExS-instance-dirs/:rw \ + -v ./workflow_examples/:/workflow_examples/:ro \ + inab/wfexs-backend:latest \ + WfExS-backend -L /WfExS-instance-dirs/local_container_wfexs.yaml \ + staged-workdir offline-exec 'my funny jobname' + ``` + +## Docker within Singularity/Apptainer + +For this approach there must be a 1:1 volume mapping for the parent working directory (wfexs-backend-container-WORKDIR). +Otherwise the executions fail. + +For this approach we have been using both `-e` and `-c` parameters from Singularity/Apptainer. + +### Steps + +1. Build the SIF image. Let's assume the file is `wfexs-backend-latest.sif`. + +2. First, create and populate a side caches directory: + + ```bash + mkdir -p DOCKER_in_SING_dirs/side_caches + singularity exec \ + -e -c \ + -B ./DOCKER_in_SING_dirs/side_caches:${HOME}/.cache \ + wfexs-backend-latest.sif \ + WfExS-backend populate-side-caches + ``` + +3. Create two directories, one for WfExS caches, and another one for the + working directories. Write down the absolute path of the latter. + + ```bash + mkdir -p DOCKER_in_SING_dirs/wfexs-backend-container-cache + mkdir -p DOCKER_in_SING_dirs/wfexs-backend-container-WORKDIR + readlink -f DOCKER_in_SING_dirs/wfexs-backend-container-WORKDIR + ``` + + (let's suppose it is `/home/${USER}/DOCKER_in_SING_dirs/wfexs-backend-container-WORKDIR`). + +4. Create a configuration file which contains the relative or absolute paths + to both the cache and working directories. For instance, let's suppose it + is available at `/home/${USER}/DOCKER_in_SING_dirs/local_container_wfexs.yaml` with next content: + + ```yaml + cacheDir: wfexs-backend-container-cache + tools: + dockerCommand: docker + encrypted_fs: + type: gocryptfs + engineMode: local + gitCommand: git + javaCommand: java + singularityCommand: singularity + staticBashCommand: bash-linux-x86_64 + workDir: wfexs-backend-container-WORKDIR + ``` + +5. Initialize the pair of keys: + + ```bash + singularity exec \ + -e -c \ + -B ./DOCKER_in_SING_dirs/side_caches:${HOME}/.cache \ + -B /home/${USER}/DOCKER_in_SING_dirs/ \ + wfexs-backend-latest.sif \ + WfExS-backend -L /home/${USER}/DOCKER_in_SING_dirs/local_container_wfexs.yaml init + ``` + +6. Use it! + + ```bash + singularity exec \ + -e -c \ + --add-caps SYS_ADMIN \ + -B /dev/fuse \ + -B /run/docker.sock \ + -B ./DOCKER_in_SING_dirs/side_caches/:${HOME}/.cache/:ro \ + -B /home/${USER}/DOCKER_in_SING_dirs/ \ + -B ./workflow_examples/:/home/${USER}/workflow_examples/:ro \ + wfexs-backend-latest.sif \ + WfExS-backend -L /home/${USER}/DOCKER_in_SING_dirs/local_container_wfexs.yaml \ + stage -W /home/${USER}/workflow_examples/hello/hellow_cwl_podman.wfex.stage + ``` + + ```bash + singularity exec \ + -e -c \ + --add-caps SYS_ADMIN \ + -B /dev/fuse \ + -B /run/docker.sock \ + -B ./DOCKER_in_SING_dirs/side_caches/:${HOME}/.cache/:ro \ + -B /home/${USER}/DOCKER_in_SING_dirs/:/home/${USER}/DOCKER_in_SING_dirs/:rw \ + -B ./workflow_examples/:/home/${USER}/workflow_examples/:ro \ + wfexs-backend-latest.sif \ + WfExS-backend -L /home/${USER}/DOCKER_in_SING_dirs/local_container_wfexs.yaml \ + staged-workdir offline-exec 'my funny jobname' + ``` + +## Docker within Podman (does not work with encrypted workdirs feature) + +For this approach there must be a 1:1 volume mapping for the parent working directory (wfexs-backend-container-WORKDIR). +Otherwise the executions fail. + +Also, either next command + +```bash +sudo setfacl -m u:$(id -u):rw -- /run/docker.sock +``` + +or next command are needed + +```bash +sudo setfacl -m g:$(id -g):rw -- /run/docker.sock +``` + +to avoid next issue with almost any docker command within podman instance: + +``` +permission denied while trying to connect to the Docker daemon socket at unix:///var/run/docker.sock: Head "http://%2Fvar%2Frun%2Fdocker.sock/_ping": dial unix /var/run/docker.sock: connect: permission denied +``` + +### Steps + +1. Build the docker image. Let's assume the tag is `inab/wfexs-backend:latest`. + +2. First, create and populate a side caches directory: + + ```bash + mkdir -p DOCKER_in_PODMAN_dirs/side_caches + podman run --rm -ti \ + -v ./DOCKER_in_PODMAN_dirs/side_caches:/root/.cache \ + localhost/inab/wfexs-backend:latest \ + WfExS-backend populate-side-caches + ``` + +3. Create two directories, one for WfExS caches, and another one for the + working directories. Write down the absolute path of the latter. + + ```bash + mkdir -p DOCKER_in_PODMAN_dirs/wfexs-backend-container-cache + mkdir -p DOCKER_in_PODMAN_dirs/wfexs-backend-container-WORKDIR + readlink -f DOCKER_in_PODMAN_dirs/wfexs-backend-container-WORKDIR + ``` + + (let's suppose it is `/home/${USER}/DOCKER_in_PODMAN_dirs/wfexs-backend-WORKDIR`). + +4. Create a configuration file which contains the relative or absolute paths + to both the cache and working directories. For instance, let's suppose it + is available at `/home/${USER}/DOCKER_in_PODMAN_dirs/local_container_wfexs.yaml` with next content: + + ```yaml + cacheDir: wfexs-backend-container-cache + tools: + dockerCommand: docker + encrypted_fs: + type: gocryptfs + engineMode: local + gitCommand: git + javaCommand: java + singularityCommand: singularity + staticBashCommand: bash-linux-x86_64 + workDir: wfexs-backend-container-WORKDIR + ``` + +5. Initialize the pair of keys: + + ```bash + podman run --rm -ti \ + -v ./DOCKER_in_PODMAN_dirs/side_caches:/root/.cache \ + -v /home/${USER}/DOCKER_in_PODMAN_dirs/:/home/${USER}/DOCKER_in_PODMAN_dirs/:rw \ + localhost/inab/wfexs-backend:latest \ + WfExS-backend -L /home/${USER}/DOCKER_in_PODMAN_dirs/local_container_wfexs.yaml init + ``` + +6. Use it! + + ```bash + podman run --rm -ti \ + --cap-add SYS_ADMIN \ + --device=/dev/fuse \ + -v /run/docker.sock:/run/docker.sock:rw,rprivate \ + -v ./DOCKER_in_PODMAN_dirs/side_caches/:/root/.cache/:ro \ + -v /home/${USER}/DOCKER_in_PODMAN_dirs/:/home/${USER}/DOCKER_in_PODMAN_dirs/:rw \ + -v ./workflow_examples/:/workflow_examples/:ro \ + localhost/inab/wfexs-backend:latest \ + WfExS-backend -L /home/${USER}/DOCKER_in_PODMAN_dirs/local_container_wfexs.yaml \ + stage -W /workflow_examples/hello/hellow_cwl_docker.wfex.stage + ``` + ```bash + podman run --rm -ti \ + --cap-add SYS_ADMIN \ + --device=/dev/fuse \ + -v /run/docker.sock:/run/docker.sock:rw,rprivate \ + -v ./DOCKER_in_PODMAN_dirs/side_caches/:/root/.cache/:ro \ + -v /home/${USER}/DOCKER_in_PODMAN_dirs/:/home/${USER}/DOCKER_in_PODMAN_dirs/:rw \ + localhost/inab/wfexs-backend:latest \ + WfExS-backend -L /home/${USER}/DOCKER_in_PODMAN_dirs/local_container_wfexs.yaml \ + staged-workdir offline-exec 'my funny jobname' + ``` + +## Docker besides Docker (does not work with encrypted workdirs feature) + +For this approach there must be a 1:1 volume mapping for the parent working directory (wfexs-backend-container-WORKDIR). +Otherwise the executions fail. + +### Steps + +1. Build the docker image. Let's assume the tag is `inab/wfexs-backend:latest`. + +2. First, create and populate a side caches directory: + + ```bash + mkdir -p DOCKER_in_DOCKER_dirs/side_caches + docker run --rm -ti \ + -u $(id -u):$(id -g) \ + -v ./DOCKER_in_DOCKER_dirs/side_caches:/.cache \ + inab/wfexs-backend:latest \ + WfExS-backend populate-side-caches + ``` + +3. Create two directories, one for WfExS caches, and another one for the + working directories. Write down the absolute path of the latter. + + ```bash + mkdir -p DOCKER_in_DOCKER_dirs/wfexs-backend-container-cache + mkdir -p DOCKER_in_DOCKER_dirs/wfexs-backend-container-WORKDIR + readlink -f DOCKER_in_DOCKER_dirs/wfexs-backend-container-WORKDIR + ``` + + (let's suppose it is `/home/${USER}/DOCKER_in_DOCKER_dirs/wfexs-backend-WORKDIR`). + +4. Create a configuration file which contains the relative or absolute paths + to both the cache and working directories. For instance, let's suppose it + is available at `/home/${USER}/DOCKER_in_DOCKER_dirs/local_container_wfexs.yaml` with next content: + + ```yaml + cacheDir: wfexs-backend-container-cache + tools: + dockerCommand: docker + encrypted_fs: + type: gocryptfs + engineMode: local + gitCommand: git + javaCommand: java + singularityCommand: singularity + staticBashCommand: bash-linux-x86_64 + workDir: wfexs-backend-container-WORKDIR + ``` + +5. Initialize the pair of keys: + + ```bash + docker run --rm -ti \ + -u $(id -u):$(id -g) \ + -v ./DOCKER_in_DOCKER_dirs/side_caches:/.cache \ + -v /home/${USER}/DOCKER_in_DOCKER_dirs/:/home/${USER}/DOCKER_in_DOCKER_dirs/:rw \ + inab/wfexs-backend:latest \ + WfExS-backend -L /home/${USER}/DOCKER_in_DOCKER_dirs/local_container_wfexs.yaml init + ``` + +6. Use it! + + ```bash + docker run --rm -ti \ + -u $(id -u):$(id -g) \ + --cap-add SYS_ADMIN \ + --device=/dev/fuse \ + -v /run/docker.sock:/run/docker.sock:rw,rprivate \ + -v ./DOCKER_in_DOCKER_dirs/side_caches/:/.cache/:ro \ + -v /home/${USER}/DOCKER_in_DOCKER_dirs/:/home/${USER}/DOCKER_in_DOCKER_dirs/:rw \ + -v ./workflow_examples/:/workflow_examples/:ro \ + inab/wfexs-backend:latest \ + WfExS-backend -L /home/${USER}/DOCKER_in_DOCKER_dirs/local_container_wfexs.yaml \ + stage -W /workflow_examples/hello/hellow_cwl_docker.wfex.stage + ``` + ```bash + docker run --rm -ti \ + -u $(id -u):$(id -g) \ + --cap-add SYS_ADMIN \ + --device=/dev/fuse \ + -v /run/docker.sock:/run/docker.sock:rw,rprivate \ + -v ./DOCKER_in_DOCKER_dirs/side_caches/:/.cache/:ro \ + -v /home/${USER}/DOCKER_in_DOCKER_dirs/:/home/${USER}/DOCKER_in_DOCKER_dirs/:rw \ + inab/wfexs-backend:latest \ + WfExS-backend -L /home/${USER}/DOCKER_in_DOCKER_dirs/local_container_wfexs.yaml \ + staged-workdir offline-exec 'my funny jobname' + ``` diff --git a/container_recipes/Dockerfile b/container_recipes/Dockerfile index 755d2d1..4109931 100644 --- a/container_recipes/Dockerfile +++ b/container_recipes/Dockerfile @@ -1,5 +1,5 @@ FROM gcc:13 AS entr_build -ARG suid_entrypoint_checkout=adf2da44acf4a5feec10b8c73660e8a9b6a4a03b +ARG suid_entrypoint_checkout=b47842ece910303b96bd708e73a53a95dfa3d8bc WORKDIR / # hadolint ignore=DL3003 RUN git clone --filter=blob:none --no-checkout https://github.com/jmfernandez/suid_entrypoint.git && \ @@ -8,9 +8,9 @@ RUN git clone --filter=blob:none --no-checkout https://github.com/jmfernandez/su gcc -static -o suid_entrypoint suid_entrypoint.c # The default images of python are based on debian -FROM python:3.12 +FROM python:3.12 AS podman_build # These arguments help customizing what it is included in the generated image -ARG wfexs_checkout=8a0a980f1a5e69064d16f89f8ec31973b2eb0c8b +ARG wfexs_checkout=0910fe6eec015c7a112f129f0adb4a998ef27a8c ARG apptainer_version=1.3.3 # JDK version parameters ARG JDK_MAJOR_VER=11 @@ -25,9 +25,7 @@ ARG GOCRYPTFS_VER=v2.4.0 ARG STATIC_BASH_VER=5.1.004-1.2.2 # static busybox version ARG BUSYBOX_VER=1.35.0 -COPY --from=entr_build /suid_entrypoint/suid_entrypoint /suid_entrypoint SHELL ["/bin/bash", "-o", "pipefail", "-c"] -RUN chmod u+s /suid_entrypoint # Install apptainer RUN DPKG_ARCH=$(dpkg --print-architecture) && \ wget -nv \ @@ -57,10 +55,15 @@ RUN git clone --filter=blob:none --no-checkout https://github.com/inab/WfExS-bac cd WfExS-backend && \ git sparse-checkout init --cone && \ PYVER=$(python -c 'import sys; print("{}.{}".format(sys.version_info.major, sys.version_info.minor))') && \ - git sparse-checkout set constraints-${PYVER}.txt container_recipes/basic-installer.bash container_recipes/full-installer.bash && \ + git sparse-checkout set constraints-${PYVER}.txt \ + podman_containers_nokeyring.conf \ + container_recipes/basic-installer.bash \ + container_recipes/full-installer.bash && \ git checkout "${wfexs_checkout}" && \ pip install --no-cache-dir "git+https://github.com/inab/WfExS-backend.git@${wfexs_checkout}" -c constraints-${PYVER}.txt && \ + python -m compileall "$(python -c 'import sys; print(sys.prefix)')" && \ mv container_recipes/* / && \ + mv /podman_containers_nokeyring.conf /etc/containers/containers.conf && \ cd .. && \ rm -rf WfExS-backend # Transfer other third party installation script @@ -75,5 +78,9 @@ RUN JDK_MAJOR_VER="${JDK_MAJOR_VER}" \ STATIC_BASH_VER="${STATIC_BASH_VER}" \ BUSYBOX_VER="${BUSYBOX_VER}" \ bash /full-installer.bash +CMD ["/bin/bash"] + +FROM podman_build AS docker_build +COPY --from=entr_build /suid_entrypoint/suid_entrypoint /suid_entrypoint +RUN chmod u+s /suid_entrypoint ENTRYPOINT [ "/suid_entrypoint" ] -CMD ["bash"] \ No newline at end of file diff --git a/container_recipes/Singularity.def b/container_recipes/Singularity.def index dd16b2f..b48a06a 100644 --- a/container_recipes/Singularity.def +++ b/container_recipes/Singularity.def @@ -6,7 +6,7 @@ Stage: spython-base # The default images of python are based on debian # These arguments help customizing what it is included in the generated image wfexs_checkout=574fe343c0b59eecd95afbc67894456359ebe649 -apptainer_version=1.3.2 +apptainer_version=1.3.3 # JDK version parameters JDK_MAJOR_VER=11 # Nested arguments are not allowed @@ -58,6 +58,7 @@ PYVER=$(python -c 'import sys; print("{}.{}".format(sys.version_info.major, sys. git sparse-checkout set constraints-${PYVER}.txt container_recipes/basic-installer.bash container_recipes/full-installer.bash && \ git checkout "{{ wfexs_checkout }}" && \ pip install --no-cache-dir "git+https://github.com/inab/WfExS-backend.git@{{ wfexs_checkout }}" -c constraints-${PYVER}.txt && \ +python -m compileall "$(python -c 'import sys; print(sys.prefix)')" && \ mv container_recipes/* / && \ cd .. && \ rm -rf WfExS-backend diff --git a/container_recipes/podman_containers_nokeyring.conf b/container_recipes/podman_containers_nokeyring.conf new file mode 100644 index 0000000..652dcd1 --- /dev/null +++ b/container_recipes/podman_containers_nokeyring.conf @@ -0,0 +1,2 @@ +[containers] +keyring=false diff --git a/wfexs_backend/__init__.py b/wfexs_backend/__init__.py index e26a8b3..1d47564 100644 --- a/wfexs_backend/__init__.py +++ b/wfexs_backend/__init__.py @@ -21,7 +21,7 @@ __license__ = "Apache 2.0" # https://www.python.org/dev/peps/pep-0396/ -__version__ = "1.0.0b1" +__version__ = "1.0.0rc0" __url__ = "https://github.com/inab/WfExS-backend" __official_name__ = "WfExS-backend" diff --git a/wfexs_backend/__main__.py b/wfexs_backend/__main__.py index e070f5b..a38fbef 100644 --- a/wfexs_backend/__main__.py +++ b/wfexs_backend/__main__.py @@ -42,6 +42,7 @@ if TYPE_CHECKING: from typing import ( Callable, + Optional, Sequence, Tuple, Type, @@ -332,6 +333,15 @@ def genParserSub( help="Max reproducibility level to be tried", ) + if command in (WfExS_Commands.Stage, WfExS_Commands.Execute): + ap_.add_argument( + "--paranoid", + dest="secure", + action="store_true", + default=False, + help="Force secured working directory", + ) + if preStageParams or exportParams or command == WfExS_Commands.ReStage: ap_.add_argument( "-Z", @@ -568,8 +578,14 @@ def processCacheCommand( the_path = "(not recorded)" the_type = "???" + if "clonable" in entryI[1]: + clonable = entryI[1]["clonable"] + else: + clonable = True + the_clonable = "yes" if clonable else "no" + print( - f"({entryI[1]['stamp']}) {entryI[0].uri} => {the_type} {the_path}" + f"({entryI[1]['stamp']}) {entryI[0].uri} => {the_type} {the_path} (clonable: {the_clonable})" ) else: json.dump( @@ -606,8 +622,14 @@ def processCacheCommand( the_path = "(not recorded)" the_type = "???" + if "clonable" in entryD[1]: + clonable = entryD[1]["clonable"] + else: + clonable = True + the_clonable = "yes" if clonable else "no" + print( - f"({entryD[1]['stamp']}) {entryD[0].uri} => {the_type} {the_path}" + f"({entryD[1]['stamp']}) {entryD[0].uri} => {the_type} {the_path} (clonable: {the_clonable})" ) else: print(entryD[0]) @@ -628,18 +650,27 @@ def processCacheCommand( ) ) elif args.cache_command == WfExS_Cache_Commands.Inject: - if len(args.cache_command_args) == 2: - injected_uri = args.cache_command_args[0] - finalCachedFilename = args.cache_command_args[1] + if len(args.cache_command_args) in (2, 3): + injected_uri: "str" = args.cache_command_args[0] + finalCachedFilename: "str" = args.cache_command_args[1] + if len(args.cache_command_args) == 3: + clonable = args.cache_command_args[2] != "false" + else: + # If we have injected anything by hand, most probably + # we do not want it cloned in the working directories. + clonable = False # # First, remove old occurrence # cH.remove(cPath, injected_uri) # Then, inject new occurrence cH.inject( - injected_uri, destdir=cPath, finalCachedFilename=finalCachedFilename + cast("URIType", injected_uri), + destdir=cPath, + finalCachedFilename=pathlib.Path(finalCachedFilename), + clonable=clonable, ) else: print( - f"ERROR: subcommand {args.cache_command} takes two positional parameters: the URI to be injected, and the path to the local content to be associated to that URI", + f"ERROR: subcommand {args.cache_command} takes two required positional parameters: the URI to be injected, and the path to the local content to be associated to that URI. A third optional parameter, which is either 'true' or 'false', tells whether it is allowed to clone the injected content into the working directories.", file=sys.stderr, ) retval = 1 @@ -653,16 +684,19 @@ def processCacheCommand( print(f"\t- {metaUri.uri} {validated}") # pass elif args.cache_command == WfExS_Cache_Commands.Fetch: - if len(args.cache_command_args) == 1 or len(args.cache_command_args) == 3: - uri_to_fetch = args.cache_command_args[0] + if len(args.cache_command_args) >= 1 and len(args.cache_command_args) <= 4: + uri_to_fetch: "str" = args.cache_command_args[0] vault = SecurityContextVault() - if len(args.cache_command_args) == 3: - secContextFilename = args.cache_command_args[1] + secContextName: "Optional[str]" + if len(args.cache_command_args) >= 3: + secContextFilename: "str" = args.cache_command_args[1] secContextName = args.cache_command_args[2] if os.path.exists(secContextFilename): try: - vault = SecurityContextVault(secContextFilename) + vault = SecurityContextVault.FromFile( + pathlib.Path(secContextFilename) + ) except: logging.exception( f"ERROR: security context file {secContextFilename} is corrupted" @@ -684,21 +718,31 @@ def processCacheCommand( file=sys.stderr, ) retval = 1 + else: + secContextName = None + + if len(args.cache_command_args) in (2, 4): + default_clonable = args.cache_command_args[-1] != "false" + else: + # If we are fetching anything by hand, most probably + # we do not mind it cloned in the working directories. + default_clonable = True if retval == 0: cached_content = wfBackend.cacheFetch( - uri_to_fetch, + cast("URIType", uri_to_fetch), args.cache_type, offline=False, vault=vault, sec_context_name=secContextName, + default_clonable=default_clonable, ) print( - f"{cached_content.kind}\t{cached_content.path}\t{cached_content.licences}\t{cached_content.metadata_array}" + f"{cached_content.kind}\t{cached_content.path}\t{cached_content.licences}\t{cached_content.metadata_array}\t{cached_content.clonable}" ) else: print( - f"ERROR: subcommand {args.cache_command} takes either one or three positional parameters: the URI to be fetched, the path to a security context file and the security context to be used for the fetch operation", + f"ERROR: subcommand {args.cache_command} takes either one or three positional parameters: the URI to be fetched, the path to a security context file and the security context to be used for the fetch operation. An optional last parameter tells whether the fetched content should be allowed to be cloned in working directories", file=sys.stderr, ) retval = 1 @@ -1369,12 +1413,23 @@ def main() -> None: file=sys.stderr, ) + # A filename is needed later, in order to initialize installation keys + if not localConfigFilename: + config_directory = None + config_relname = os.path.basename(defaultLocalConfigFilename) + else: + # Hints for the the default path for the Crypt4GH keys + config_directory = localConfigFilename.parent + config_relname = localConfigFilename.name + if args.cacheDir: local_config["cacheDir"] = args.cacheDir # In any case, assuring the cache directory does exist cacheDir = local_config.get("cacheDir") if cacheDir: + if not os.path.isabs(cacheDir) and config_directory is not None: + cacheDir = os.path.normpath(os.path.join(config_directory, cacheDir)) os.makedirs(cacheDir, exist_ok=True) else: cacheDir = tempfile.mkdtemp(prefix="wfexs", suffix="tmpcache") @@ -1386,15 +1441,6 @@ def main() -> None: file=sys.stderr, ) - # A filename is needed later, in order to initialize installation keys - if not localConfigFilename: - config_directory = None - config_relname = os.path.basename(defaultLocalConfigFilename) - else: - # Hints for the the default path for the Crypt4GH keys - config_directory = localConfigFilename.parent - config_relname = localConfigFilename.name - # Initialize (and create config file) if command in ( WfExS_Commands.Init, @@ -1539,6 +1585,7 @@ def main() -> None: private_key_filename=args.private_key_file, private_key_passphrase=private_key_passphrase, orcids=op_orcids, + paranoidMode=args.secure, ) elif command == WfExS_Commands.Import: wfInstance = wfBackend.fromPreviousROCrate( diff --git a/wfexs_backend/cache_handler.py b/wfexs_backend/cache_handler.py index 54d344c..ed0cf93 100644 --- a/wfexs_backend/cache_handler.py +++ b/wfexs_backend/cache_handler.py @@ -103,6 +103,7 @@ class CacheMetadataDict(TypedDict): licences: Tuple[URIType, ...] attributions: Sequence[Mapping[str, Any]] fingerprint: Fingerprint + clonable: bool from .common import ( @@ -145,6 +146,7 @@ class CachedContent(NamedTuple): metadata_array: "Sequence[URIWithMetadata]" licences: "Tuple[URIType, ...]" fingerprint: "Optional[Fingerprint]" = None + clonable: "bool" = True class CacheHandlerException(AbstractWfExSException): @@ -567,6 +569,7 @@ def inject( finalCachedFilename: "Optional[pathlib.Path]" = None, tempCachedFilename: "Optional[pathlib.Path]" = None, inputKind: "Optional[ContentKind]" = None, + clonable: "bool" = True, ) -> "Tuple[Optional[pathlib.Path], Optional[Fingerprint]]": if destdir is None: destdir = self.cacheDir @@ -582,6 +585,7 @@ def inject( finalCachedFilename=finalCachedFilename, tempCachedFilename=tempCachedFilename, inputKind=inputKind, + clonable=clonable, ) assert newFinalCachedFilename is not None @@ -618,6 +622,7 @@ def _inject( finalCachedFilename: "Optional[pathlib.Path]" = None, tempCachedFilename: "Optional[pathlib.Path]" = None, inputKind: "Optional[Union[ContentKind, AnyURI, Sequence[AnyURI]]]" = None, + clonable: "bool" = True, ) -> "Tuple[Optional[pathlib.Path], Optional[Fingerprint]]": """ This method has been created to be able to inject a cached metadata entry @@ -724,6 +729,7 @@ def _inject( "relative": os.path.relpath(finalCachedFilename, hashDir), "absolute": finalCachedFilename.as_posix(), } + metaStructure["clonable"] = clonable else: metaStructure["resolves_to"] = inputKind @@ -840,6 +846,7 @@ def fetch( registerInCache: "bool" = True, vault: "Optional[SecurityContextVault]" = None, sec_context_name: "Optional[str]" = None, + default_clonable: "bool" = True, ) -> "CachedContent": if destdir is None: destdir = self.cacheDir @@ -902,6 +909,7 @@ def fetch( relFinalCachedFilename: "Optional[RelPath]" finalCachedFilename: "Optional[pathlib.Path]" final_fingerprint: "Optional[Fingerprint]" + clonable: "bool" = default_clonable while not isinstance(inputKind, ContentKind): # These elements are alternative URIs. Any of them should # provide the very same content @@ -1064,6 +1072,7 @@ def fetch( licences.extend(the_licences) if "fingerprint" in metaStructure: final_fingerprint = metaStructure["fingerprint"] + clonable = metaStructure.get("clonable", True) elif offline: # As this is a handler for online resources, comply with offline mode raise CacheOfflineException( @@ -1133,6 +1142,7 @@ def fetch( fetched_metadata_array=pfr.metadata_array, tempCachedFilename=tempCachedFilename, inputKind=inputKind, + clonable=clonable, ) final_fingerprint = fingerprint @@ -1215,4 +1225,5 @@ def fetch( metadata_array=metadata_array, licences=tuple(licences), fingerprint=final_fingerprint, + clonable=clonable, ) diff --git a/wfexs_backend/common.py b/wfexs_backend/common.py index 7570da7..f45a820 100644 --- a/wfexs_backend/common.py +++ b/wfexs_backend/common.py @@ -366,6 +366,9 @@ class MaterializedContent(NamedTuple): of the execution environment fingerprint: If it is available, propagate the computed fingerprint from the cache. + clonable: If it is true, copies of this materialized content can be + performed. Otherwise, content should remain in the original place + represented by "local". """ local: "PathlibLike" @@ -375,6 +378,7 @@ class MaterializedContent(NamedTuple): metadata_array: "Optional[Sequence[URIWithMetadata]]" = None extrapolated_local: "Optional[PathlibLike]" = None fingerprint: "Optional[Fingerprint]" = None + clonable: "bool" = True @classmethod def _mapping_fixes( @@ -449,6 +453,7 @@ class MaterializedInput(NamedTuple): autoFilled: "bool" = False implicit: "bool" = False contentWithURIs: "Optional[ContentWithURIsDesc]" = None + disclosable: "bool" = True if TYPE_CHECKING: diff --git a/wfexs_backend/container_factories/abstract_docker_container.py b/wfexs_backend/container_factories/abstract_docker_container.py index 7031557..9a403e8 100644 --- a/wfexs_backend/container_factories/abstract_docker_container.py +++ b/wfexs_backend/container_factories/abstract_docker_container.py @@ -305,12 +305,14 @@ def _load( with package.open( archivefile.as_posix(), mode="rb" - ) as d_in, tempfile.NamedTemporaryFile() as d_out, tempfile.NamedTemporaryFile() as d_err: - self.logger.debug(f"loading {self.variant_name()} container {dockerTag}") + ) as d_in, tempfile.NamedTemporaryFile() as d_int, tempfile.NamedTemporaryFile() as d_out, tempfile.NamedTemporaryFile() as d_err: + shutil.copyfileobj(d_in, d_int) + self.logger.debug( + f"loading {self.variant_name()} container {dockerTag} from {archivefile.as_posix()} {d_int.name}" + ) with subprocess.Popen( - [self.runtime_cmd, "load"], + [self.runtime_cmd, "load", "-i", d_int.name], env=matEnv, - stdin=d_in, stdout=d_out, stderr=d_err, ) as sp: @@ -367,11 +369,13 @@ def _save( def _version( self, + matEnv: "Mapping[str, str]", ) -> "Tuple[ExitVal, str, str]": with tempfile.NamedTemporaryFile() as d_out, tempfile.NamedTemporaryFile() as d_err: self.logger.debug(f"querying {self.variant_name()} version and details") d_retval = subprocess.Popen( [self.runtime_cmd, "version", "--format", "{{json .}}"], + env=matEnv, stdout=d_out, stderr=d_err, ).wait() diff --git a/wfexs_backend/container_factories/docker_container.py b/wfexs_backend/container_factories/docker_container.py index 6f4e5f2..aeef286 100644 --- a/wfexs_backend/container_factories/docker_container.py +++ b/wfexs_backend/container_factories/docker_container.py @@ -125,7 +125,10 @@ def variant_name(self) -> "str": @property def architecture(self) -> "Tuple[ContainerOperatingSystem, ProcessorArchitecture]": - v_retval, payload, v_stderr = self._version() + matEnv = dict(os.environ) + matEnv.update(self.environment) + + v_retval, payload, v_stderr = self._version(matEnv) if v_retval != 0: errstr = """Could not get docker version. Retval {} @@ -613,11 +616,14 @@ def deploySingleContainer( raise ContainerFactoryException(errmsg) from e # Let's load then - do_redeploy = manifestsImageSignature != self._gen_trimmed_manifests_signature( + ins_trimmed_manifests_signature = self._gen_trimmed_manifests_signature( ins_manifests ) + do_redeploy = manifestsImageSignature != ins_trimmed_manifests_signature if do_redeploy: - self.logger.debug(f"Redeploying {dockerTag}") + self.logger.debug( + f"Redeploying {dockerTag} {manifestsImageSignature} != {ins_trimmed_manifests_signature}" + ) # Should we load the image? d_retval, d_out_v, d_err_v = self._load(containerPath, dockerTag, matEnv) diff --git a/wfexs_backend/container_factories/podman_container.py b/wfexs_backend/container_factories/podman_container.py index 274e8fb..5cf99b0 100644 --- a/wfexs_backend/container_factories/podman_container.py +++ b/wfexs_backend/container_factories/podman_container.py @@ -142,7 +142,10 @@ def variant_name(self) -> "str": @property def architecture(self) -> "Tuple[ContainerOperatingSystem, ProcessorArchitecture]": - v_retval, payload, v_stderr = self._version() + matEnv = dict(os.environ) + matEnv.update(self.environment) + + v_retval, payload, v_stderr = self._version(matEnv) if v_retval != 0: errstr = """Could not get podman version. Retval {} @@ -624,11 +627,14 @@ def deploySingleContainer( raise ContainerFactoryException(errmsg) from e # Let's load then - do_redeploy = manifestsImageSignature != self._gen_trimmed_manifests_signature( + trimmed_manifests_image_signature = self._gen_trimmed_manifests_signature( ins_manifests ) + do_redeploy = manifestsImageSignature != trimmed_manifests_image_signature if do_redeploy: - self.logger.debug(f"Redeploying {dockerTag}") + self.logger.debug( + f"Redeploying {dockerTag} as {manifestsImageSignature} != {trimmed_manifests_image_signature}" + ) # Should we load the image? d_retval, d_out_v, d_err_v = self._load(containerPath, dockerTag, matEnv) diff --git a/wfexs_backend/encrypted_fs.py b/wfexs_backend/encrypted_fs.py index c2f68fb..6a7ddf1 100644 --- a/wfexs_backend/encrypted_fs.py +++ b/wfexs_backend/encrypted_fs.py @@ -166,6 +166,7 @@ def _mountGoCryptFS( gocryptfs_cmd.as_posix(), "-i", str(gocryptfs_idleMinutes) + "m", + "-nosyslog", ] if allowOther: @@ -178,10 +179,10 @@ def _mountGoCryptFS( gocryptfsCommand = gocryptfsMount efs = subprocess.Popen( - gocryptfsMount, + gocryptfsCommand, stdin=subprocess.PIPE, stdout=gocryptfs_init_stdout, - stderr=gocryptfs_init_stdout, + stderr=gocryptfs_init_stderr, cwd=uniqueRawWorkDir, ) efs.communicate(input=clearPass.encode("utf-8")) diff --git a/wfexs_backend/ro_crate.py b/wfexs_backend/ro_crate.py index 7273c7a..07197b5 100644 --- a/wfexs_backend/ro_crate.py +++ b/wfexs_backend/ro_crate.py @@ -1577,7 +1577,9 @@ def addWorkflowInputs( ), the_signature=the_signature, the_licences=itemInURILicences, - do_attach=do_attach, + do_attach=do_attach + and in_item.disclosable + and itemInValues.clonable, ) # An extrapolated input, which needs special handling @@ -1594,7 +1596,8 @@ def addWorkflowInputs( self.work_dir, ), ), - do_attach=True, + do_attach=in_item.disclosable + and itemInValues.clonable, ) crate_extrapolated_file[ "description" @@ -1652,7 +1655,9 @@ def addWorkflowInputs( ).as_posix() + "/", ), - do_attach=do_attach, + do_attach=do_attach + and in_item.disclosable + and itemInValues.clonable, ) # crate_dataset = self.crate.add_dataset_ext( # source=itemInURISource, @@ -1808,14 +1813,18 @@ def addWorkflowInputs( ), the_signature=the_sec_signature, the_licences=secInputURILicences, - do_attach=do_attach, + do_attach=do_attach + and in_item.disclosable + and secInput.clonable, ) elif os.path.isdir(secInputLocalSource): sec_crate_elem, _ = self._add_directory_as_dataset( secInputLocalSource, secInputURISource, - do_attach=do_attach, + do_attach=do_attach + and in_item.disclosable + and secInput.clonable, ) # crate_dataset = self.crate.add_dataset_ext( # source=secInputURISource, @@ -2721,6 +2730,7 @@ def addWorkflowExecution( self.logger.error(f"'{' '.join(dot_cmd)}' stderr: {d_err_v}") # Associating the diagram to the main workflow + # https://about.workflowhub.eu/Workflow-RO-Crate/#main-workflow-diagram self.wf_file.append_to("image", the_diagram, compact=True) # Processing the log files diff --git a/wfexs_backend/schemas/stage-definition.json b/wfexs_backend/schemas/stage-definition.json index 9babe79..b391cfa 100644 --- a/wfexs_backend/schemas/stage-definition.json +++ b/wfexs_backend/schemas/stage-definition.json @@ -241,6 +241,21 @@ "type": "string", "minLength": 1 }, + "disclosable": { + "description": "Whether this input contents can be included in RO-Crates or exported", + "type": "boolean", + "default": true + }, + "cacheable": { + "description": "Whether a copy of this input contents can be kept locally outside the working directory", + "type": "boolean", + "default": true + }, + "clonable": { + "description": "Whether a copy of this input contents can be kept locally inside the working directory", + "type": "boolean", + "default": true + }, "globExplode": { "description": "When this is set, and the class is directory, it helps filtering in what it should be included", "type": "string" @@ -421,7 +436,9 @@ "security-context", "globExplode", "autoFill", - "autoPrefix" + "autoPrefix", + "cacheable", + "clonable" ] } } diff --git a/wfexs_backend/utils/contents.py b/wfexs_backend/utils/contents.py index 9ec3763..e81a78f 100644 --- a/wfexs_backend/utils/contents.py +++ b/wfexs_backend/utils/contents.py @@ -431,6 +431,106 @@ def link_or_copy_pathlib( ) +def link_or_symlink_pathlib( + src: "pathlib.Path", + dest: "pathlib.Path", + force_symlink: "bool" = False, +) -> None: + assert ( + src.exists() + ), f"File {src.as_posix()} must exist to be linked or copied {src.exists()} {src.is_symlink()}" + + if isinstance(src, ZipfilePath): + raise Exception(f"Unable to symlink {src}, as it is within a ZIP archive") + + # We should not deal with symlinks + src = src.resolve() + dest = dest.resolve() + # Avoid losing everything by overwriting itself + dest_exists = dest.exists() + if dest_exists and src.samefile(dest): + return + + # First, check whether inputs and content + # are in the same filesystem + # as of https://unix.stackexchange.com/a/44250 + dest_or_ancestor_exists = dest_exists + dest_or_ancestor = dest + while not dest_or_ancestor_exists: + dest_or_ancestor = dest_or_ancestor.parent + dest_or_ancestor_exists = dest_or_ancestor.exists() + dest_st_dev = dest_or_ancestor.lstat().st_dev + + # It could be a subtree of not existing directories + if not dest_exists: + dest_parent = dest.parent + if not dest_parent.is_dir(): + dest_parent.mkdir(parents=True) + + # Now, link or symlink + link_condition = False + try: + link_condition = ( + not isinstance(src, ZipfilePath) + and src.lstat().st_dev == dest_st_dev + and not force_symlink + ) + except: + pass + + if link_condition: + try: + if src.is_file(): + if dest_exists: + dest.unlink() + # link_to appeared in Python 3.8 + # hardlink_to appeared in Python 3.10 + # dest.hardlink_to(src) + os.link(src, dest) + else: + # Recursively hardlinking + # as of https://stackoverflow.com/a/10778930 + if dest_exists: + shutil.rmtree(dest) + + # TODO: study passing link_or_copy as copy_function + shutil.copytree(src, dest, copy_function=link_or_copy) # type: ignore[arg-type] + except OSError as ose: + # Even when we are detecting whether it is the same + # device, it can happen both paths are in different + # bind mounts, which forbid hard links + if ose.errno != 18: + if ose.errno == 1 and src.is_file(): + try: + with src.open(mode="rb") as dummy: + readable = dummy.readable() + except OSError as dummy_err: + readable = False + else: + # Too difficult to guess + readable = False + else: + readable = True + + if not readable: + raise ose + + force_symlink = True + else: + # Be sure to enable to symlink, to avoid a no-op + force_symlink = True + + if force_symlink: + # Symlinking the content + if dest_exists: + if dest.is_file(): + dest.unlink() + else: + shutil.rmtree(dest) + + dest.symlink_to(src) + + def real_unlink_if_exists(the_path: "PathLikePath", fail_ok: "bool" = False) -> "None": if os.path.lexists(the_path): try: diff --git a/wfexs_backend/wfexs_backend.py b/wfexs_backend/wfexs_backend.py index a819b7d..7e2c457 100644 --- a/wfexs_backend/wfexs_backend.py +++ b/wfexs_backend/wfexs_backend.py @@ -1916,6 +1916,8 @@ def shellFirstStagedWorkflow( # Setting a custom symbol theEnv["PROMPT_COMMAND"] = f"echo \"(WfExS '{nickname}')\"" theEnv["PROMPT_DIRTRIM"] = "2" + # The default (for sh) + theEnv["PS1"] = f"(WfExS '{nickname}') {instance_id} " cp = subprocess.run( command, @@ -1944,6 +1946,7 @@ def cacheFetch( registerInCache: "bool" = True, vault: "Optional[SecurityContextVault]" = None, sec_context_name: "Optional[str]" = None, + default_clonable: "bool" = True, ) -> "CachedContent": """ This is a pass-through method to the cache handler, which translates from symbolic types of cache to their corresponding directories @@ -1967,6 +1970,7 @@ def cacheFetch( registerInCache=registerInCache, vault=vault, sec_context_name=sec_context_name, + default_clonable=default_clonable, ) else: workflow_dir, repo, _, effective_checkout = self.cacheWorkflow( @@ -2779,6 +2783,7 @@ def downloadContent( ignoreCache: "bool" = False, registerInCache: "bool" = True, keep_cache_licence: "bool" = True, + default_clonable: "bool" = True, ) -> "MaterializedContent": """ Download remote file or directory / dataset. @@ -2844,6 +2849,7 @@ def downloadContent( ignoreCache=ignoreCache, registerInCache=registerInCache, vault=vault, + default_clonable=default_clonable, ) # TODO: Properly test alternatives downloaded_uri = firstURI.uri @@ -2896,6 +2902,8 @@ def downloadContent( kind=cached_content.kind, metadata_array=cached_content.metadata_array, fingerprint=cached_content.fingerprint, + # We are returning with the most restrictive setting + clonable=cached_content.clonable and default_clonable, ) _LicenceMatcher: "ClassVar[Optional[LicenceMatcher]]" = None diff --git a/wfexs_backend/workflow.py b/wfexs_backend/workflow.py index a2c3b2f..e52a61c 100644 --- a/wfexs_backend/workflow.py +++ b/wfexs_backend/workflow.py @@ -189,6 +189,9 @@ "preferred-name": Union[Literal[False], str], "relative-dir": Union[Literal[False], str], "security-context": str, + "disclosable": bool, + "cacheable": bool, + "clonable": bool, "globExplode": str, "autoFill": bool, "autoPrefix": bool, @@ -330,6 +333,7 @@ bin2dataurl, link_or_copy, link_or_copy_pathlib, + link_or_symlink_pathlib, ) from .utils.marshalling_handling import marshall_namedtuple, unmarshall_namedtuple from .utils.misc import ( @@ -1056,6 +1060,17 @@ def setupWorkdir( raise WFException(errmsg) from e was_setup = False else: + # IMPORTANT: There can be a race condition in some containerised + # scenarios where the FUSE mount process goes to background, but + # mounting itself has not finished. This check helps + # both to detect and to avoid that corner case. + if not os.path.ismount(uniqueWorkDir): + errmsg = f"Corner case: cannot keep mounted FUSE mount {uniqueWorkDir} with {encfs_cmd}" + self.logger.exception(errmsg) + if not fail_ok: + raise WFException(errmsg) + was_setup = False + was_setup = True # and start the thread which keeps the mount working self.encfsCond = threading.Condition() @@ -2376,6 +2391,7 @@ def _fetchRemoteFile( hardenPrettyLocal: "bool" = False, prettyRelname: "Optional[RelPath]" = None, ignoreCache: "bool" = False, + cloneToStore: "bool" = True, ) -> "Sequence[MaterializedContent]": # Embedding the context alt_remote_file, alt_is_plain = self._buildLicensedURI( @@ -2388,9 +2404,10 @@ def _fetchRemoteFile( dest=storeDir, offline=offline, vault=self.vault, - ignoreCache=ignoreCache or not cacheable, + ignoreCache=ignoreCache, registerInCache=cacheable, keep_cache_licence=alt_is_plain, + default_clonable=cloneToStore, ) # Now, time to create the link @@ -2427,8 +2444,13 @@ def _fetchRemoteFile( prettyLocal = inputDestDir / (prefix + prettyRelname) if not prettyLocal.exists(): - # We are either hardlinking or copying here - link_or_copy_pathlib(matContent.local, prettyLocal) + # Are we allowed to make a copy of the input in the working directory? + if matContent.clonable: + # We are either hardlinking or copying here + link_or_copy_pathlib(matContent.local, prettyLocal) + else: + # We are either hardlinking or symlinking here + link_or_symlink_pathlib(matContent.local, prettyLocal) remote_pairs = [] if globExplode is not None: @@ -2801,6 +2823,7 @@ def _fetchContentWithURIs( lastInput: "int" = 0, offline: "bool" = False, ignoreCache: "bool" = False, + cloneToStore: "bool" = True, ) -> "Tuple[Sequence[MaterializedInput], int, Sequence[str]]": # Current code for ContentWithURIs is only implemented for # tabular contents @@ -2842,12 +2865,19 @@ def _fetchContentWithURIs( # We are sending the context name thinking in the future, # as it could contain potential hints for authenticated access contextName = inputs.get("security-context") - cacheable = not self.paranoidMode if inputs.get("cache", True) else False + # This is only for the paranoid mode + cacheable = inputs.get("cacheable", True) + if self.paranoidMode: + ignoreCache = False + + if not cacheable and not cloneToStore: + self.logger.warning( + "Current staging scenario can lead to unexpected errors in case of cache miss, as neither caching nor cloning are allowed" + ) + if remote_files is not None: - this_cacheable = cacheable this_ignoreCache = ignoreCache else: - this_cacheable = False this_ignoreCache = True preferred_name_conf = cast("Optional[RelPath]", inputs.get("preferred-name")) @@ -2875,7 +2905,7 @@ def _fetchContentWithURIs( extrapolatedInputDestDir / relative_dir ).resolve() - # The storage dir depends on whether it can be cached or not + # The storage dir depends on whether it can be cloned or not storeDir: "Union[CacheType, pathlib.Path]" = ( CacheType.Input if cacheable else workflowInputs_destdir ) @@ -2912,12 +2942,13 @@ def _fetchContentWithURIs( contextName, offline, storeDir, - cacheable, - inputDestDir, + cacheable=cacheable, + inputDestDir=inputDestDir, globExplode=None, prefix=str(lastInput) + "_", prettyRelname=pretty_relname, ignoreCache=this_ignoreCache, + cloneToStore=cloneToStore, ) except: self.logger.exception( @@ -2971,7 +3002,8 @@ def _fetchContentWithURIs( inputDestDir, globExplode=None, prefix=str(lastInput) + "_", - ignoreCache=ignoreCache, + ignoreCache=this_ignoreCache, + cloneToStore=cloneToStore, ) except: self.logger.exception( @@ -3065,6 +3097,7 @@ def _fetchContentWithURIs( "uriColumns": t_uri_cols, }, ), + disclosable=inputs.get("disclosable", True), ) ) @@ -3150,6 +3183,7 @@ def fetchInputs( if isinstance(inputs, dict): inputClass = inputs.get("c-l-a-s-s") if inputClass is not None: + clonable = inputs.get("clonable", True) if inputClass in ( ContentKind.File.name, ContentKind.Directory.name, @@ -3201,6 +3235,10 @@ def fetchInputs( name=linearKey, values=[autoFilledDir], autoFilled=True, + # What it is autofilled is probably + # an output, so it should not be + # automatically disclosable + disclosable=False, ) ) continue @@ -3236,6 +3274,10 @@ def fetchInputs( # TODO: do it in a more elegant way values=[autoFilledFile.as_posix()], autoFilled=True, + # What it is autofilled is probably + # an output, so it should not be + # automatically disclosable + disclosable=False, ) ) continue @@ -3256,12 +3298,10 @@ def fetchInputs( contextName = inputs.get("security-context") secondary_remote_files = inputs.get("secondary-urls") - cacheable = ( - not self.paranoidMode - if inputs.get("cache", True) - else False + cacheable = inputs.get("cacheable", True) + this_ignoreCache = ( + False if self.paranoidMode else ignoreCache ) - this_ignoreCache = ignoreCache else: contextName = None secondary_remote_files = None @@ -3359,6 +3399,7 @@ def fetchInputs( prefix=str(lastInput) + "_", prettyRelname=pretty_relname, ignoreCache=this_ignoreCache, + cloneToStore=clonable, ) remote_pairs.extend(t_remote_pairs) except: @@ -3402,7 +3443,8 @@ def fetchInputs( inputDestDir, globExplode, prefix=str(lastInput) + "_", - ignoreCache=ignoreCache, + ignoreCache=this_ignoreCache, + cloneToStore=clonable, ) ) secondary_remote_pairs.extend( @@ -3421,6 +3463,7 @@ def fetchInputs( name=linearKey, values=remote_pairs, secondaryInputs=secondary_remote_pairs, + disclosable=inputs.get("disclosable", True), ) ) else: @@ -3453,6 +3496,7 @@ def fetchInputs( kind=contentKind, ) ], + disclosable=inputs.get("disclosable", True), ) ) @@ -3468,7 +3512,8 @@ def fetchInputs( workflowExtrapolatedInputs_destdir, lastInput=lastInput, offline=offline, - ignoreCache=ignoreCache, + ignoreCache=this_ignoreCache, + cloneToStore=clonable, ) theInputs.extend(theNewInputs) the_failed_uris.extend(new_failed_uris) @@ -3483,6 +3528,7 @@ def fetchInputs( MaterializedInput( name=linearKey, values=input_val, + disclosable=inputs.get("disclosable", True), ) ) else: @@ -3512,6 +3558,7 @@ def fetchInputs( MaterializedInput( name=linearKey, values=inputs, + disclosable=True, ) ) @@ -5073,6 +5120,10 @@ def locateExportItems( raise KeyError( f"Param {item.name} to be exported does not exist" ) + if not materializedParam.disclosable: + raise PermissionError( + f"Param {item.name} contents have export restrictions" + ) retval.extend( cast( "Iterable[MaterializedContent]", @@ -5125,6 +5176,10 @@ def locateExportItems( raise KeyError( f"Environment variable {item.name} to be exported does not exist" ) + if not materializedEnvVar.disclosable: + raise PermissionError( + f"Environment variable {item.name} contents have export restrictions" + ) retval.extend( cast( "Iterable[MaterializedContent]", diff --git a/wfexs_backend/workflow_engines/nextflow_engine.py b/wfexs_backend/workflow_engines/nextflow_engine.py index 7b8a329..6e6c844 100644 --- a/wfexs_backend/workflow_engines/nextflow_engine.py +++ b/wfexs_backend/workflow_engines/nextflow_engine.py @@ -1731,7 +1731,13 @@ def augmentNextflowInputs( # Time to update an existing materialized input theValues = val if isinstance(val, list) else [val] augmentedInput = MaterializedInput( - name=augmentedInput.name, values=theValues, autoFilled=True + name=augmentedInput.name, + values=theValues, + autoFilled=True, + # What it is autofilled is probably + # an output, so it should not be + # automatically disclosable + disclosable=False, ) augmentedInputs.append(augmentedInput) diff --git a/workflow_examples/hello/hellow_cwl_singularity.wfex.stage b/workflow_examples/hello/hellow_cwl_singularity.wfex.stage new file mode 100644 index 0000000..37e0831 --- /dev/null +++ b/workflow_examples/hello/hellow_cwl_singularity.wfex.stage @@ -0,0 +1,14 @@ +workflow_id: github:inab/hello-workflows/b0afc5871c6fdbd66576fcc5a3813ea49aca5104/cwl/hello-workflow.cwl +workflow_config: + secure: false + containerType: singularity +# All the inputs must be URLs or CURIEs from identifiers.org +params: + an_input: + c-l-a-s-s: File + url: github:inab/hello-workflows/b0afc5871c6fdbd66576fcc5a3813ea49aca5104/cwl/hello.yml +environment: + SECRET_VARIABLE: "The secret content" +outputs: + hello_output: + c-l-a-s-s: File