diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 19693bad..fc75ce2a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -85,10 +85,16 @@ repos: args: [ wfexs_backend/schemas ] - repo: https://github.com/jmfernandez/pre-commit_mirrors-actionlint.git - rev: v1.6.25 + rev: v1.7.1 hooks: - id: actionlint + - repo: https://github.com/jmfernandez/pre-commit_mirrors-hadolint.git + rev: v2.12.0 + hooks: + - id: hadolint + args: [ -t, warning] + - repo: https://github.com/ambv/black.git rev: 23.3.0 hooks: diff --git a/CITATION.cff b/CITATION.cff index c37c0fa1..e0469a5d 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -28,4 +28,4 @@ message: "If you use this software, please cite it using these metadata." repository-code: "https://github.com/inab/WfExS-backend" type: software title: "WfExS-backend" -version: 0.99.9 +version: 1.0.0a0 diff --git a/INSTALL.md b/INSTALL.md index 776442be..6f061a81 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -1,11 +1,139 @@ # "Easy" install and setup a Workflow Execution Service backend instance -## "Easy" setup of core and main software dependencies +## Easy creation of WfExS container image -There is an automated installer at [full-installer.bash](full-installer.bash): +This section describes how to build a container image containing WfExS and its preconditions. + +### Docker + +The precondition is having Docker properly setup and running. + +You can build the Docker image for an specific version (release, tag, branch or commit) +without fetching a full copy of the repo or the Dockerfile recipe, +just using next bash pattern: + +```bash +# WFEXS_VER can be either a branch, a tag or a commit hash +WFEXS_VER=574fe343c0b59eecd95afbc67894456359ebe649 +docker build -t inab/wfexs-backend:${WFEXS_VER} \ +--build-arg wfexs_checkout="${WFEXS_VER}" \ +https://raw.githubusercontent.com/inab/WfExS-backend/${WFEXS_VER}/container_recipes/Dockerfile +``` + +Alternatively, if the docker client does not accept URLs, you need to have +a local copy of the recipe, and next command line from the project root will help you: + +```bash +# WFEXS_VER can be either a branch, a tag or a commit hash +WFEXS_VER=574fe343c0b59eecd95afbc67894456359ebe649 +mkdir WfExS_docker_build +cd WfExS_docker_build +curl -O https://raw.githubusercontent.com/inab/WfExS-backend/${WFEXS_VER}/container_recipes/Dockerfile + +docker build -t inab/wfexs-backend:${WFEXS_VER} \ +--build-arg wfexs_checkout="${WFEXS_VER}" \ +Dockerfile +``` + +### Podman + +The precondition is having Podman properly setup and running. + +Mimicking what it can be performed with Docker, you can build the Podman +image for an specific version (release, tag, branch or commit) +without fetching a full copy of the repo or the recipe, +just using next bash pattern: + +```bash +# WFEXS_VER can be either a branch, a tag or a commit hash +WFEXS_VER=574fe343c0b59eecd95afbc67894456359ebe649 +podman build -t inab/wfexs-backend:${WFEXS_VER} \ +--build-arg wfexs_checkout="${WFEXS_VER}" \ +https://raw.githubusercontent.com/inab/WfExS-backend/${WFEXS_VER}/container_recipes/Dockerfile +``` + +Alternatively, if the podman client does not accept URLs, you need to have +a local copy of the recipe, and next command line from the project root will help you: + +```bash +# WFEXS_VER can be either a branch, a tag or a commit hash +WFEXS_VER=574fe343c0b59eecd95afbc67894456359ebe649 +mkdir WfExS_podman_build +cd WfExS_podman_build +curl -O https://raw.githubusercontent.com/inab/WfExS-backend/${WFEXS_VER}/container_recipes/Dockerfile + +podman build -t inab/wfexs-backend:${WFEXS_VER} \ +--build-arg wfexs_checkout="${WFEXS_VER}" \ +Dockerfile +``` + +### SIF image + +The precondition is having either Apptainer or Singularity properly setup. There are three different routes to create a SIF image of WfExS: + +* First approach requires either using curl or having a local copy of the repository + **and** a modern enough version of either apptainer (1.3 or later) + or singularity (4.0 or later). + + ```bash + # WFEXS_VER can be either a branch, a tag or a commit hash + WFEXS_VER=574fe343c0b59eecd95afbc67894456359ebe649 + mkdir WfExS_SIF_build + cd WfExS_SIF_build + curl -O https://raw.githubusercontent.com/inab/WfExS-backend/${WFEXS_VER}/container_recipes/Singularity.def + singularity build \ + --build-arg wfexs_checkout="${WFEXS_VER}" \ + wfexs-backend-${WFEXS_VER}.sif Singularity.def + ``` + + ```bash + # WFEXS_VER can be either a branch, a tag or a commit hash + WFEXS_VER=574fe343c0b59eecd95afbc67894456359ebe649 + singularity build \ + --build-arg wfexs_checkout="${WFEXS_VER}" \ + wfexs-backend-${WFEXS_VER}.sif container_recipes/Singularity.def + ``` + +* Second approach involves to first create the WfExS docker image locally, + following the pattern previously described, and then telling apptainer / singularity + to build it: + + ```bash + # Remember to use the correct tag!!! + WFEXS_VER=574fe343c0b59eecd95afbc67894456359ebe649 + singularity build wfexs-${WFEXS_VER}.sif docker-daemon://inab/wfexs-backend:${WFEXS_VER} + ``` + +* Third approach involves to first create either the local docker or podman image, + as it was described above. Then, you have to save it to an image file, + which will be used to build the SIF image. + + for the WfExS podman image locally, + following the pattern previously described, and then telling apptainer / singularity + to build it: + + ```bash + mkdir WfExS_SIF_build + cd WfExS_SIF_build + + # Remember to use the correct tag!!! + WFEXS_VER=574fe343c0b59eecd95afbc67894456359ebe649 + + # Next command should be used if you used podman to build the local image + podman save -o wfexs-backend-${WFEXS_VER}.tar inab/wfexs-backend:${WFEXS_VER} + + # Next command should be used if you used docker to build the local image + docker save -o wfexs-backend-${WFEXS_VER}.tar inab/wfexs-backend:${WFEXS_VER} + + singularity build wfexs-${WFEXS_VER}.sif docker-archive:wfexs-backend-${WFEXS_VER}.tar + ``` + +## "Easy" local setup of core and main software dependencies + +There is an automated installer at [full-installer.bash](container_recipes/full-installer.bash), which is also used inside the docker: ```bash -./full-installer.bash +container_recipes/full-installer.bash ``` which assumes both essential build dependencies @@ -17,8 +145,8 @@ The automated installer installs both core dependencies and it fetches and insta * A static bash copy: needed by Nextflow runner to monkey-patch some containers which do not have bash, or whose bash copy is buggy. If you also want to install [singularity](https://sylabs.io/singularity/) or -[apptainer](https://apptainer.org) at the WfExS-backend virtual environment, and you are using Ubuntu Linux, a rootless setup is achieved using either [singularity-local-installer.bash](singularity-local-installer.bash) -or [apptainer-local-installer.bash](apptainer-local-installer.bash). +[apptainer](https://apptainer.org) at the WfExS-backend virtual environment, and you are using Ubuntu Linux, a rootless setup is achieved using either [singularity-local-installer.bash](container_recipes/singularity-local-installer.bash) +or [apptainer-local-installer.bash](container_recipes/apptainer-local-installer.bash). At most only one of them can be locally installed, because as of September 2022 workflow engines like `cwltool` or `nextflow` still use the hardcoded name of `singularity`. So, the apptainer installer has to create @@ -26,12 +154,12 @@ a "singularity" symlink pointing to "apptainer". ```bash # For singularity -./singularity-local-installer.bash +container_recipes/singularity-local-installer.bash ``` ```bash # For apptainer -./apptainer-local-installer.bash +container_recipes/apptainer-local-installer.bash ``` This setup will only work on Linux systems with cgroups v2 enabled. You will also need to install the package which provides `mksquashfs`, which is `squashfs-tools` both in Debian and Ubuntu. @@ -40,12 +168,12 @@ The scripts only install singularity or apptainer when it is not available. If y ```bash # For singularity -./singularity-local-installer.bash force +container_recipes/singularity-local-installer.bash force ``` ```bash # For apptainer -./apptainer-local-installer.bash force +container_recipes/apptainer-local-installer.bash force ``` ## Core Dependencies @@ -59,7 +187,7 @@ This workflow execution service backend is written for Python 3.7 and later. * The creation of a virtual environment where to install WfExS backend dependencies can be done running: ```bash -./basic-installer.bash +container_recipes/basic-installer.bash ``` * If you upgrade your Python installation (from version 3.8 to 3.9 or later, for instance), or you move this folder to a different location after following this instructions, you may need to remove and reinstall the virtual environment. @@ -101,10 +229,8 @@ All the development dependencies are declared at [dev-requirements.txt](dev-requ ```bash python3 -m venv .pyWEenv source .pyWEenv/bin/activate -pip install --upgrade pip wheel -pip install -r requirements.txt -pip install -r dev-requirements.txt -pip install -r mypy-requirements.txt +pip install --require-virtualenv --upgrade pip wheel +pip install --require-virtualenv -r requirements.txt -r dev-requirements.txt -r mypy-requirements.txt ``` One of these dependencies is [pre-commit](https://pre-commit.com/), whose rules are declared at [.pre-commit-config.yaml](.pre-commit-config.yaml) (there are special versions of these rules for GitHub). @@ -147,6 +273,6 @@ flake8 --ignore E501 wfexs_backend ``` # License -* © 2020-2022 Barcelona Supercomputing Center (BSC), ES +* © 2020-2024 Barcelona Supercomputing Center (BSC), ES Licensed under the Apache License, version 2.0 , see the file `LICENSE.txt` for details. diff --git a/README.md b/README.md index a7e7a5b2..df1ce874 100644 --- a/README.md +++ b/README.md @@ -58,794 +58,9 @@ EOSC-Life deliverable D8.1, _Zenodo_ ## WfExS-backend Usage -```bash -python WfExS-backend.py --full-help -``` -
-General command line usage +An automatically generated description of the command line directives is available [at the CLI section of the documentation](https://wfexs-backend.readthedocs.io/en/latest/cli.html). -``` -usage: WfExS-backend.py [-h] [--log-file LOGFILENAME] [-q] [-v] [-d] - [-L LOCALCONFIGFILENAME] [--cache-dir CACHEDIR] [-V] - [--full-help] - {init,cache,staged-workdir,export,list-fetchers,list-exporters,list-container-factories,list-workflow-engines,list-licences,config-validate,stage,re-stage,import,mount-workdir,export-stage,offline-execute,execute,export-results,export-crate} - ... - -WfExS (workflow execution service) backend 0.99.0-43-g5058e32 -(5058e32bba74aecc3fef81c9b954b80afbbbb146, branch full_circle) - -options: - -h, --help show this help message and exit - --log-file LOGFILENAME - Store messages in a file instead of using standard - error and standard output (default: None) - -q, --quiet Only show engine warnings and errors (default: None) - -v, --verbose Show verbose (informational) messages (default: None) - -d, --debug Show debug messages (use with care, as it can disclose - passphrases and passwords) (default: None) - -L LOCALCONFIGFILENAME, --local-config LOCALCONFIGFILENAME - Local installation configuration file (can also be set - up through WFEXS_CONFIG_FILE environment variable) - (default: /home/jmfernandez/projects/WfExS/WfExS- - backend_full_circle/wfexs_config.yml) - --cache-dir CACHEDIR Caching directory (default: None) - -V, --version show program's version number and exit - --full-help It returns full help (default: False) - -commands: - Command to run. It must be one of these - - {init,cache,staged-workdir,export,list-fetchers,list-exporters,list-container-factories,list-workflow-engines,list-licences,config-validate,stage,re-stage,import,mount-workdir,export-stage,offline-execute,execute,export-results,export-crate} - init Init local setup - cache Cache handling subcommands - staged-workdir Staged working directories handling subcommands - export Staged working directories export subcommands - list-fetchers List the supported fetchers / schemes - list-exporters List the supported export plugins - list-container-factories - List the supported container factories - list-workflow-engines - List the supported workflow engines - list-licences List the documented licences, both embedded and - fetched from SPDX release 3.23 - config-validate Validate the configuration files to be used for - staging and execution - stage Prepare the staging (working) directory for workflow - execution, fetching dependencies and contents - re-stage Prepare a new staging (working) directory for workflow - execution, repeating the fetch of dependencies and - contents - import Workflow Run RO-Crate import into a new staged working - directory - mount-workdir Mount the encrypted staging directory on secure - staging scenarios - export-stage Export the staging directory as an RO-Crate - offline-execute Execute an already prepared workflow in the staging - directory - execute Execute the stage + offline-execute + export steps - export-results Export the results to a remote location, gathering - their public ids - export-crate Export an already executed workflow in the staging - directory as an RO-Crate -``` -
-
-Subparser init - -``` -usage: WfExS-backend.py init [-h] - -optional arguments: - -h, --help show this help message and exit -``` -Subparser cache - -``` -
-
-usage: WfExS-backend.py cache [-h] [-r] [--cascade] [-g] - {ls,status,inject,fetch,rm,validate} - {input,ro-crate,ga4gh-trs,workflow} - [cache_command_args [cache_command_args ...]] - -positional arguments: - {ls,status,inject,fetch,rm,validate} - Cache command to perform - - ls List the cache entries - status Show the cache entries metadata - inject Inject a new entry in the cache - fetch Fetch a new cache entry, giving as input both the URI and optionally both a security context file and a security context name - rm Remove an entry from the cache - validate Validate the consistency of the cache - {input,ro-crate,ga4gh-trs,workflow} - Cache type to perform the cache command - - input Cached or injected inputs - ro-crate Cached RO-Crates (usually from WorkflowHub) - ga4gh-trs Cached files from tools described at GA4GH TRS repositories - workflow Cached workflows, which come from a git repository - cache_command_args Optional cache element names (default: None) - -optional arguments: - -h, --help show this help message and exit - -r Try doing the operation recursively (i.e. both - metadata and data) (default: False) - --cascade Try doing the operation in cascade (including the URIs - which resolve to other URIs) (default: False) - -g, --glob Given cache element names are globs (default: False) -``` -
-
-Subparser staged-workdir - -``` -usage: WfExS-backend.py staged-workdir [-h] [--orcid ORCIDS] - [--private-key-file PRIVATE_KEY_FILE] - [--private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR] - [--inputs] [--outputs] [--workflow] - [--containers] [--prospective] [--full] - [--licence LICENCES] - [--crate-pid CRATE_PID] [-g] - {offline-exec,ls,mount,rm,shell,status,create-staged-crate,create-prov-crate} - [staged_workdir_command_args [staged_workdir_command_args ...]] - -positional arguments: - {offline-exec,ls,mount,rm,shell,status,create-staged-crate,create-prov-crate} - Staged working directory command to perform - - offline-exec Offline execute the staged instances which match the input pattern - ls List the staged instances - It shows the instance id, nickname, - encryption and whether they are damaged - mount Mount the staged instances which match the input pattern - rm Removes the staged instances which match the input pattern - shell Launches a command in the workdir - First parameter is either the staged instance id or the nickname. - It launches the command specified after the id. - If there is no additional parameters, it launches a shell - in the mounted working directory of the instance - status Shows staged instances status - create-staged-crateIt creates an RO-Crate from the prospective provenance - create-prov-crateIt creates an RO-Crate from the retrospective provenance (after a workflow execution) - staged_workdir_command_args - Optional staged working directory element names - (default: None) - -optional arguments: - -h, --help show this help message and exit - --orcid ORCIDS ORCID(s) of the person(s) staging, running or - exporting the workflow scenario (default: []) - -g, --glob Given staged workflow names are globs (default: False) - -secure workdir arguments: - Private key and passphrase to access secured working directories - - --private-key-file PRIVATE_KEY_FILE - This parameter passes the name of the file containing - the private key needed to unlock an encrypted working - directory. (default: None) - --private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR - This parameter passes the name of the environment - variable containing the passphrase needed to decrypt - the private key needed to unlock an encrypted working - directory. (default: ) - -ro-crate-payload: - What to include in the RO-Crate - - --inputs Should the RO-Crate contain a inputs copy (of - everything)? (default: []) - --outputs Should the RO-Crate contain a outputs copy (of - everything)? (default: []) - --workflow Should the RO-Crate contain a workflow copy (of - everything)? (default: []) - --containers Should the RO-Crate contain a containers copy (of - everything)? (default: []) - --prospective Should the RO-Crate contain a prospective copy (of - everything)? (default: []) - --full Should the RO-Crate contain a full copy (of - everything)? (default: []) - --licence LICENCES Licence(s) to attach to the generated RO-Crate - (default: []) - --crate-pid CRATE_PID - Permanent identifier to embed within the generated RO- - Crate metadata, like a pre-generated DOI (default: - None) -``` -
-
-Subparser export - -``` -usage: WfExS-backend.py export [-h] [-Z SECURITYCONTEXTSCONFIGFILENAME] - [-E EXPORTSCONFIGFILENAME] [--orcid ORCIDS] - [--public-key-file PUBLIC_KEY_FILES] - [--private-key-file PRIVATE_KEY_FILE] - [--private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR] - -J WORKFLOWWORKINGDIRECTORY - [--licence LICENCES] - {ls,run} - [export_contents_command_args [export_contents_command_args ...]] - -positional arguments: - {ls,run} Export operations from staged working directory to perform - - ls List the public identifiers obtained from previous export actions - run Run the different export actions, pushing the exported content and gathering the obtained permanent / public identifiers - export_contents_command_args - Optional export names (default: None) - -optional arguments: - -h, --help show this help message and exit - -Z SECURITYCONTEXTSCONFIGFILENAME, --creds-config SECURITYCONTEXTSCONFIGFILENAME - Configuration file, describing security contexts, - which hold credentials and similar (default: None) - -E EXPORTSCONFIGFILENAME, --exports-config EXPORTSCONFIGFILENAME - Configuration file, describing exports which can be - done (default: None) - --orcid ORCIDS ORCID(s) of the person(s) staging, running or - exporting the workflow scenario (default: []) - --public-key-file PUBLIC_KEY_FILES - This parameter switches on secure processing. Path to - the public key(s) to be used to encrypt the working - directory (default: []) - -J WORKFLOWWORKINGDIRECTORY, --staged-job-dir WORKFLOWWORKINGDIRECTORY - Already staged job directory (default: None) - --licence LICENCES Licence(s) to attach to the exported contents - (default: []) - -secure workdir arguments: - Private key and passphrase to access secured working directories - - --private-key-file PRIVATE_KEY_FILE - This parameter passes the name of the file containing - the private key needed to unlock an encrypted working - directory. (default: None) - --private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR - This parameter passes the name of the environment - variable containing the passphrase needed to decrypt - the private key needed to unlock an encrypted working - directory. (default: ) - -``` -
-
-Subparser list-fetchers - -``` -usage: WfExS-backend.py list-fetchers [-h] - -optional arguments: - -h, --help show this help message and exit - -``` -
-
-Subparser list-exporters - -``` -usage: WfExS-backend.py list-exporters [-h] - -optional arguments: - -h, --help show this help message and exit - -``` -
-
-Subparser list-container-factories - -``` -usage: WfExS-backend.py list-container-factories [-h] - -optional arguments: - -h, --help show this help message and exit - -``` -
-
-Subparser list-workflow-engines - -``` -usage: WfExS-backend.py list-workflow-engines [-h] - -optional arguments: - -h, --help show this help message and exit - -``` -
-
-Subparser list-licences - -``` -usage: WfExS-backend.py list-licences [-h] - -optional arguments: - -h, --help show this help message and exit - -``` -
-
-Subparser config-validate - -``` -usage: WfExS-backend.py config-validate [-h] -W WORKFLOWCONFIGFILENAME - [-Z SECURITYCONTEXTSCONFIGFILENAME] - -optional arguments: - -h, --help show this help message and exit - -W WORKFLOWCONFIGFILENAME, --workflow-config WORKFLOWCONFIGFILENAME - Configuration file, describing workflow and inputs - (default: None) - -Z SECURITYCONTEXTSCONFIGFILENAME, --creds-config SECURITYCONTEXTSCONFIGFILENAME - Configuration file, describing security contexts, - which hold credentials and similar (default: None) - -``` -
-
-Subparser stage - -``` -usage: WfExS-backend.py stage [-h] -W WORKFLOWCONFIGFILENAME - [-Z SECURITYCONTEXTSCONFIGFILENAME] - [-n NICKNAME_PREFIX] [--orcid ORCIDS] - [--public-key-file PUBLIC_KEY_FILES] - [--private-key-file PRIVATE_KEY_FILE] - [--private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR] - -optional arguments: - -h, --help show this help message and exit - -W WORKFLOWCONFIGFILENAME, --workflow-config WORKFLOWCONFIGFILENAME - Configuration file, describing workflow and inputs - (default: None) - -Z SECURITYCONTEXTSCONFIGFILENAME, --creds-config SECURITYCONTEXTSCONFIGFILENAME - Configuration file, describing security contexts, - which hold credentials and similar (default: None) - -n NICKNAME_PREFIX, --nickname-prefix NICKNAME_PREFIX - Nickname prefix to be used on staged workdir creation - (default: None) - --orcid ORCIDS ORCID(s) of the person(s) staging, running or - exporting the workflow scenario (default: []) - --public-key-file PUBLIC_KEY_FILES - This parameter switches on secure processing. Path to - the public key(s) to be used to encrypt the working - directory (default: []) - -secure workdir arguments: - Private key and passphrase to access secured working directories - - --private-key-file PRIVATE_KEY_FILE - This parameter passes the name of the file containing - the private key needed to unlock an encrypted working - directory. (default: None) - --private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR - This parameter passes the name of the environment - variable containing the passphrase needed to decrypt - the private key needed to unlock an encrypted working - directory. (default: ) - -``` -
-
-Subparser re-stage - -``` -Subparser 're-stage' -usage: WfExS-backend.py re-stage [-h] [-W WORKFLOWCONFIGFILENAME] [-s] [-S] - [-Z SECURITYCONTEXTSCONFIGFILENAME] - [-n NICKNAME_PREFIX] [--orcid ORCIDS] - [--public-key-file PUBLIC_KEY_FILES] - [--private-key-file PRIVATE_KEY_FILE] - [--private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR] - -J WORKFLOWWORKINGDIRECTORY - -options: - -h, --help show this help message and exit - -W WORKFLOWCONFIGFILENAME, --workflow-config WORKFLOWCONFIGFILENAME - Optional configuration file, describing some inputs - which will replace the base, original ones (default: - None) - -s, --no-secure Make unsecured working directory (default: True) - -S, --secure Make secured working directory (default) (default: - False) - -Z SECURITYCONTEXTSCONFIGFILENAME, --creds-config SECURITYCONTEXTSCONFIGFILENAME - Configuration file, describing security contexts, - which hold credentials and similar (default: None) - -n NICKNAME_PREFIX, --nickname-prefix NICKNAME_PREFIX - Nickname prefix to be used on staged workdir creation - (default: None) - --orcid ORCIDS ORCID(s) of the person(s) staging, running or - exporting the workflow scenario (default: []) - --public-key-file PUBLIC_KEY_FILES - This parameter switches on secure processing. Path to - the public key(s) to be used to encrypt the working - directory (default: []) - -J WORKFLOWWORKINGDIRECTORY, --staged-job-dir WORKFLOWWORKINGDIRECTORY - Already staged job directory (default: None) - -secure workdir arguments: - Private key and passphrase to access secured working directories - - --private-key-file PRIVATE_KEY_FILE - This parameter passes the name of the file containing - the private key needed to unlock an encrypted working - directory. (default: None) - --private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR - This parameter passes the name of the environment - variable containing the passphrase needed to decrypt - the private key needed to unlock an encrypted working - directory. (default: ) - -``` -
-
-Subparser import - -``` -Subparser 'import' -usage: WfExS-backend.py import [-h] -R WORKFLOWROCRATEFILENAMEORURI - [-W WORKFLOWCONFIGFILENAME] [-s] [-S] - [-Z SECURITYCONTEXTSCONFIGFILENAME] - [-n NICKNAME_PREFIX] [--orcid ORCIDS] - [--public-key-file PUBLIC_KEY_FILES] - [--private-key-file PRIVATE_KEY_FILE] - [--private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR] - -options: - -h, --help show this help message and exit - -R WORKFLOWROCRATEFILENAMEORURI, --workflow-rocrate WORKFLOWROCRATEFILENAMEORURI - Workflow Run RO-Crate describing a previous workflow - execution. It can be either a local path or an URI - resolvable from WfExS with no authentication (default: - None) - -W WORKFLOWCONFIGFILENAME, --workflow-config WORKFLOWCONFIGFILENAME - Optional configuration file, describing some inputs - which will replace the base, original ones (default: - None) - -s, --no-secure Make unsecured working directory (default: True) - -S, --secure Make secured working directory (default) (default: - False) - -Z SECURITYCONTEXTSCONFIGFILENAME, --creds-config SECURITYCONTEXTSCONFIGFILENAME - Configuration file, describing security contexts, - which hold credentials and similar (default: None) - -n NICKNAME_PREFIX, --nickname-prefix NICKNAME_PREFIX - Nickname prefix to be used on staged workdir creation - (default: None) - --orcid ORCIDS ORCID(s) of the person(s) staging, running or - exporting the workflow scenario (default: []) - --public-key-file PUBLIC_KEY_FILES - This parameter switches on secure processing. Path to - the public key(s) to be used to encrypt the working - directory (default: []) - -secure workdir arguments: - Private key and passphrase to access secured working directories - - --private-key-file PRIVATE_KEY_FILE - This parameter passes the name of the file containing - the private key needed to unlock an encrypted working - directory. (default: None) - --private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR - This parameter passes the name of the environment - variable containing the passphrase needed to decrypt - the private key needed to unlock an encrypted working - directory. (default: ) - -``` -
-
-Subparser mount-workdir - -``` -usage: WfExS-backend.py mount-workdir [-h] - [--private-key-file PRIVATE_KEY_FILE] - [--private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR] - -J WORKFLOWWORKINGDIRECTORY - -optional arguments: - -h, --help show this help message and exit - -J WORKFLOWWORKINGDIRECTORY, --staged-job-dir WORKFLOWWORKINGDIRECTORY - Already staged job directory (default: None) - -secure workdir arguments: - Private key and passphrase to access secured working directories - - --private-key-file PRIVATE_KEY_FILE - This parameter passes the name of the file containing - the private key needed to unlock an encrypted working - directory. (default: None) - --private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR - This parameter passes the name of the environment - variable containing the passphrase needed to decrypt - the private key needed to unlock an encrypted working - directory. (default: ) - -``` -
-
-Subparser export-stage - -``` -usage: WfExS-backend.py export-stage [-h] [--orcid ORCIDS] - [--private-key-file PRIVATE_KEY_FILE] - [--private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR] - -J WORKFLOWWORKINGDIRECTORY [--inputs] - [--outputs] [--workflow] [--containers] - [--prospective] [--full] - [--licence LICENCES] - [--crate-pid CRATE_PID] - -optional arguments: - -h, --help show this help message and exit - --orcid ORCIDS ORCID(s) of the person(s) staging, running or - exporting the workflow scenario (default: []) - -J WORKFLOWWORKINGDIRECTORY, --staged-job-dir WORKFLOWWORKINGDIRECTORY - Already staged job directory (default: None) - -secure workdir arguments: - Private key and passphrase to access secured working directories - - --private-key-file PRIVATE_KEY_FILE - This parameter passes the name of the file containing - the private key needed to unlock an encrypted working - directory. (default: None) - --private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR - This parameter passes the name of the environment - variable containing the passphrase needed to decrypt - the private key needed to unlock an encrypted working - directory. (default: ) - -ro-crate-payload: - What to include in the RO-Crate - - --inputs Should the RO-Crate contain a inputs copy (of - everything)? (default: []) - --outputs Should the RO-Crate contain a outputs copy (of - everything)? (default: []) - --workflow Should the RO-Crate contain a workflow copy (of - everything)? (default: []) - --containers Should the RO-Crate contain a containers copy (of - everything)? (default: []) - --prospective Should the RO-Crate contain a prospective copy (of - everything)? (default: []) - --full Should the RO-Crate contain a full copy (of - everything)? (default: []) - --licence LICENCES Licence(s) to attach to the generated RO-Crate - (default: []) - --crate-pid CRATE_PID - Permanent identifier to embed within the generated RO- - Crate metadata, like a pre-generated DOI (default: - None) - -``` -
-
-Subparser offline-execute - -``` -usage: WfExS-backend.py offline-execute [-h] - [--private-key-file PRIVATE_KEY_FILE] - [--private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR] - -J WORKFLOWWORKINGDIRECTORY - -optional arguments: - -h, --help show this help message and exit - -J WORKFLOWWORKINGDIRECTORY, --staged-job-dir WORKFLOWWORKINGDIRECTORY - Already staged job directory (default: None) - -secure workdir arguments: - Private key and passphrase to access secured working directories - - --private-key-file PRIVATE_KEY_FILE - This parameter passes the name of the file containing - the private key needed to unlock an encrypted working - directory. (default: None) - --private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR - This parameter passes the name of the environment - variable containing the passphrase needed to decrypt - the private key needed to unlock an encrypted working - directory. (default: ) - -``` -
-
-Subparser execute - -``` -usage: WfExS-backend.py execute [-h] -W WORKFLOWCONFIGFILENAME - [-Z SECURITYCONTEXTSCONFIGFILENAME] - [-E EXPORTSCONFIGFILENAME] - [-n NICKNAME_PREFIX] [--orcid ORCIDS] - [--public-key-file PUBLIC_KEY_FILES] - [--private-key-file PRIVATE_KEY_FILE] - [--private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR] - [--inputs] [--outputs] [--workflow] - [--containers] [--prospective] [--full] - [--licence LICENCES] [--crate-pid CRATE_PID] - -optional arguments: - -h, --help show this help message and exit - -W WORKFLOWCONFIGFILENAME, --workflow-config WORKFLOWCONFIGFILENAME - Configuration file, describing workflow and inputs - (default: None) - -Z SECURITYCONTEXTSCONFIGFILENAME, --creds-config SECURITYCONTEXTSCONFIGFILENAME - Configuration file, describing security contexts, - which hold credentials and similar (default: None) - -E EXPORTSCONFIGFILENAME, --exports-config EXPORTSCONFIGFILENAME - Configuration file, describing exports which can be - done (default: None) - -n NICKNAME_PREFIX, --nickname-prefix NICKNAME_PREFIX - Nickname prefix to be used on staged workdir creation - (default: None) - --orcid ORCIDS ORCID(s) of the person(s) staging, running or - exporting the workflow scenario (default: []) - --public-key-file PUBLIC_KEY_FILES - This parameter switches on secure processing. Path to - the public key(s) to be used to encrypt the working - directory (default: []) - -secure workdir arguments: - Private key and passphrase to access secured working directories - - --private-key-file PRIVATE_KEY_FILE - This parameter passes the name of the file containing - the private key needed to unlock an encrypted working - directory. (default: None) - --private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR - This parameter passes the name of the environment - variable containing the passphrase needed to decrypt - the private key needed to unlock an encrypted working - directory. (default: ) - -ro-crate-payload: - What to include in the RO-Crate - - --inputs Should the RO-Crate contain a inputs copy (of - everything)? (default: []) - --outputs Should the RO-Crate contain a outputs copy (of - everything)? (default: []) - --workflow Should the RO-Crate contain a workflow copy (of - everything)? (default: []) - --containers Should the RO-Crate contain a containers copy (of - everything)? (default: []) - --prospective Should the RO-Crate contain a prospective copy (of - everything)? (default: []) - --full Should the RO-Crate contain a full copy (of - everything)? (default: []) - --licence LICENCES Licence(s) to attach to the generated RO-Crate - (default: []) - --crate-pid CRATE_PID - Permanent identifier to embed within the generated RO- - Crate metadata, like a pre-generated DOI (default: - None) - -``` -
-
-Subparser export-results - -``` -usage: WfExS-backend.py export-results [-h] [--orcid ORCIDS] - [--private-key-file PRIVATE_KEY_FILE] - [--private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR] - -J WORKFLOWWORKINGDIRECTORY - [--licence LICENCES] - -optional arguments: - -h, --help show this help message and exit - --orcid ORCIDS ORCID(s) of the person(s) staging, running or - exporting the workflow scenario (default: []) - -J WORKFLOWWORKINGDIRECTORY, --staged-job-dir WORKFLOWWORKINGDIRECTORY - Already staged job directory (default: None) - --licence LICENCES Licence(s) to attach to the exported contents - (default: []) - -secure workdir arguments: - Private key and passphrase to access secured working directories - - --private-key-file PRIVATE_KEY_FILE - This parameter passes the name of the file containing - the private key needed to unlock an encrypted working - directory. (default: None) - --private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR - This parameter passes the name of the environment - variable containing the passphrase needed to decrypt - the private key needed to unlock an encrypted working - directory. (default: ) - -``` -
-
-Subparser export-crate - -``` -usage: WfExS-backend.py export-crate [-h] [--orcid ORCIDS] - [--private-key-file PRIVATE_KEY_FILE] - [--private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR] - -J WORKFLOWWORKINGDIRECTORY [--inputs] - [--outputs] [--workflow] [--containers] - [--prospective] [--full] - [--licence LICENCES] - [--crate-pid CRATE_PID] - -optional arguments: - -h, --help show this help message and exit - --orcid ORCIDS ORCID(s) of the person(s) staging, running or - exporting the workflow scenario (default: []) - -J WORKFLOWWORKINGDIRECTORY, --staged-job-dir WORKFLOWWORKINGDIRECTORY - Already staged job directory (default: None) - -secure workdir arguments: - Private key and passphrase to access secured working directories - - --private-key-file PRIVATE_KEY_FILE - This parameter passes the name of the file containing - the private key needed to unlock an encrypted working - directory. (default: None) - --private-key-passphrase-envvar PRIVATE_KEY_PASSPHRASE_ENVVAR - This parameter passes the name of the environment - variable containing the passphrase needed to decrypt - the private key needed to unlock an encrypted working - directory. (default: ) - -ro-crate-payload: - What to include in the RO-Crate - - --inputs Should the RO-Crate contain a inputs copy (of - everything)? (default: []) - --outputs Should the RO-Crate contain a outputs copy (of - everything)? (default: []) - --workflow Should the RO-Crate contain a workflow copy (of - everything)? (default: []) - --containers Should the RO-Crate contain a containers copy (of - everything)? (default: []) - --prospective Should the RO-Crate contain a prospective copy (of - everything)? (default: []) - --full Should the RO-Crate contain a full copy (of - everything)? (default: []) - --licence LICENCES Licence(s) to attach to the generated RO-Crate - (default: []) - --crate-pid CRATE_PID - Permanent identifier to embed within the generated RO- - Crate metadata, like a pre-generated DOI (default: - None) -``` -
- -WfExS commands are: - -![WfExS-backend commands](development-docs/wfexs-commands.svg) - -* `init`: This command is used to initialize a WfExS installation. It takes a local configuration file through `-L` parameter, and it can both generate crypt4gh paired keys for installation work and identification purposes and update the path to them in case they are not properly defined. Those keys are needed to decrypt encrypted working directories, and in the future to decrypt secure requests and encrypt secure results. - -* `config-validate`: This command is used to validate workflow staging configuration file, as well as its paired security context configuration file using the corresponding JSON Schemas. It honours `-L`, `-W`, `-Z` parameters and `WFEXS_CONFIG_FILE` environment variable. If command is not set, this is the default command to be run. - -* `cache`: This command is used to manage the different caches, helping in their own lifecycle (list, fetch, inject, validate, remove). It recognizes both `-L` parameter and `WFEXS_CONFIG_FILE` environment variable. - -* `stage`: This command is used to first validate workflow staging and security context configuration files, then fetch all the workflow preconditions and files, staging them for an execution. It honours `-L`, `-W`, `-Z` parameters and `WFEXS_CONFIG_FILE` environment variable, and once the staging is finished it prints the path to the parent execution environment. - -* `re-stage`: This command is used to reuse an already staged workflow in a completely uncoupled working directory. The command allows replacing some of the parameters. - -* `import`: This command is used to fetch and import a previously generated Workflow Run RO-Crate, for reproducibility. The command allows replacing some of the original parameters, for replicability. - -* `staged-workdir`: This command is complementary to `stage`. It recognizes both `-L` parameter and `WFEXS_CONFIG_FILE` environment variable. This command has several subcommands which help on the workflow execution lifecycle (list available working directories and their statuses, remove some of them, execute either a shell or a custom command in a working directory context, execute, export prospective and retrospective provenance to RO-Crate, ...). - -* `export`: This command is complementary to `stage`. It recognizes both `-L` parameter and `WFEXS_CONFIG_FILE` environment variable, and depends on `-J` parameter to locate the execution environment directory to be used, properly staged through `stage`. It also depends on both -E and -Z parameters, to declare the different export patterns and the needed credentials to complete the rules. This command has a couple of subcommands to list previously exported items and to do those exports. - -* `export-stage` _(to be done)_: This command is complementary to `stage`. It recognizes both `-L` parameter and `WFEXS_CONFIG_FILE` environment variable, and depends on `-J` parameter to locate the execution environment directory to be used, properly staged through `stage`. It will bundle the description of the staged environment in an RO-Crate, in order to be reused later, or uploaded to places like WorkflowHub. All of this assuming there is an stage there. - -* `offline-execute`: This command is complementary to `stage`. It recognizes both `-L` parameter and `WFEXS_CONFIG_FILE` environment variable, and depends on `-J` parameter to locate the execution environment directory to be used, properly staged through `stage`. It executes the workflow, assuming all the preconditions are in place. - -* `export-results`: This command is complementary to `offline-execute`. It recognizes both `-L` parameter and `WFEXS_CONFIG_FILE` environment variable, and depends on `-J` parameter to locate the execution environment directory to be used, properly staged through `stage` and executed through `offline-execute`. It export the results from an execution at a working directory, assuming there is an execution there. Export rules should be described in the file used in `-W` parameter when the working directory was staged. - -* `export-crate`: This command is complementary to `export-results`. It recognizes both `-L` parameter and `WFEXS_CONFIG_FILE` environment variable, and depends on `-J` parameter to locate the execution environment directory to be used, properly staged through `stage` and executed through `offline-execute` and `export-results`. It bundles the metadata and provenance results from an execution at a working directory in an RO-Crate, assuming there is an execution there. - -* `mount-workdir`: This command is a helper to inspect encrypted execution environments, as it mounts its working directory for a limited time. As `export-stage`, `offline-execute` or `export-results`, it recognizes both `-L` parameter and `WFEXS_CONFIG_FILE` environment variable, and depends on `-J` parameter. - -* `execute`: This command's behaviour is equivalent to `stage` followed by `export-stage`, `offline-execute`, `export-results` and `export-crate`. - -When the execution has finished properly, the working directory `outputs` subdirectory should contain both the outputs and an `execution.crate.zip`, which can be used to create a workflow entry in . +Also, a description about the different WfExS commands is available [at the command line section of the documentation](https://wfexs-backend.readthedocs.io/en/latest/command-line.html). ## Configuration files diff --git a/TODO.md b/TODO.md index c2dd40f5..5056b8de 100644 --- a/TODO.md +++ b/TODO.md @@ -65,7 +65,7 @@ references to the inputs, that we are going to use to instantiate the workflows. - [x] **Step 9.d**: Add full circle capabilities. Re-execute workflow with the very same parameters from previously generated RO-Crate (only metadata). - - [ ] **Step 9.e**: Add full circle capabilities. Re-execute workflow with the very same parameters from previously generated RO-Crate (reusing payloads). + - [x] **Step 9.e**: Add full circle capabilities. Re-execute workflow with the very same parameters from previously generated RO-Crate (reusing payloads). ## Other features diff --git a/container_recipes/Dockerfile b/container_recipes/Dockerfile new file mode 100644 index 00000000..e22174b6 --- /dev/null +++ b/container_recipes/Dockerfile @@ -0,0 +1,67 @@ +# The default images of python are based on debian +FROM python:3.12 +# These arguments help customizing what it is included in the generated image +ARG wfexs_checkout=574fe343c0b59eecd95afbc67894456359ebe649 +ARG apptainer_version=1.3.2 +# JDK version parameters +ARG JDK_MAJOR_VER=11 +ARG JDK_VER=${JDK_MAJOR_VER}.0.11 +ARG JDK_REV=9 +ARG OPENJ9_VER=0.26.0 +# Go version to compile +ARG GO_VER=1.17.13 +# gocryptfs version +ARG GOCRYPTFS_VER=v2.4.0 +# static bash version +ARG STATIC_BASH_VER=5.1.004-1.2.2 +# static busybox version +ARG BUSYBOX_VER=1.35.0 +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +# Install apptainer +RUN DPKG_ARCH=$(dpkg --print-architecture) && \ + wget -nv \ + https://github.com/apptainer/apptainer/releases/download/v${apptainer_version}/apptainer_${apptainer_version}_${DPKG_ARCH}.deb \ + https://github.com/apptainer/apptainer/releases/download/v${apptainer_version}/apptainer-suid_${apptainer_version}_${DPKG_ARCH}.deb && \ + apt-get update && \ + apt-get install -y ./*.deb && \ + rm -f *.deb +# Install docker-ce-cli +# hadolint ignore=DL3008 +RUN wget -nv -O /etc/apt/keyrings/docker.asc https://download.docker.com/linux/debian/gpg && \ + chmod a+r /etc/apt/keyrings/docker.asc && \ + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + tee /etc/apt/sources.list.d/docker.list > /dev/null && \ + apt-get update && \ + apt-get install -y --no-install-recommends docker-ce-cli +# Install both podman and encfs +# hadolint ignore=DL3008 +RUN apt-get install -y podman encfs +# Install WfExS, trusting the installers in the commit rather than the +# one in the docker recipe +WORKDIR / +# hadolint ignore=DL3003 +RUN git clone --filter=blob:none --no-checkout https://github.com/inab/WfExS-backend.git && \ + cd WfExS-backend && \ + git sparse-checkout init --cone && \ + PYVER=$(python -c 'import sys; print("{}.{}".format(sys.version_info.major, sys.version_info.minor))') && \ + git sparse-checkout set constraints-${PYVER}.txt container_recipes/basic-installer.bash container_recipes/full-installer.bash && \ + git checkout "${wfexs_checkout}" && \ + pip install --no-cache-dir "git+https://github.com/inab/WfExS-backend.git@${wfexs_checkout}" -c constraints-${PYVER}.txt && \ + mv container_recipes/* / && \ + cd .. && \ + rm -rf WfExS-backend +# Transfer other third party installation script +# COPY basic-installer.bash full-installer.bash ./ +# Install third party software +RUN JDK_MAJOR_VER="${JDK_MAJOR_VER}" \ + JDK_VER="${JDK_VER}" \ + JDK_REV="${JDK_REV}" \ + OPENJ9_VER="${OPENJ9_VER}" \ + GO_VER="${GO_VER}" \ + GOCRYPTFS_VER="${GOCRYPTFS_VER}" \ + STATIC_BASH_VER="${STATIC_BASH_VER}" \ + BUSYBOX_VER="${BUSYBOX_VER}" \ + bash /full-installer.bash +CMD ["bash"] \ No newline at end of file diff --git a/container_recipes/Singularity.def b/container_recipes/Singularity.def new file mode 100644 index 00000000..5f069878 --- /dev/null +++ b/container_recipes/Singularity.def @@ -0,0 +1,83 @@ +Bootstrap: docker +From: python:3.12 +Stage: spython-base + +%arguments +# The default images of python are based on debian +# These arguments help customizing what it is included in the generated image +wfexs_checkout=574fe343c0b59eecd95afbc67894456359ebe649 +apptainer_version=1.3.2 +# JDK version parameters +JDK_MAJOR_VER=11 +# Nested arguments are not allowed +JDK_VER=11.0.11 +JDK_REV=9 +OPENJ9_VER=0.26.0 +# Go version to compile +GO_VER=1.17.13 +# gocryptfs version +GOCRYPTFS_VER=v2.4.0 +# static bash version +STATIC_BASH_VER=5.1.004-1.2.2 +# static busybox version +BUSYBOX_VER=1.35.0 + + +%post +set -ue +# Install apptainer +DPKG_ARCH=$(dpkg --print-architecture) && \ +wget -nv \ +https://github.com/apptainer/apptainer/releases/download/v{{ apptainer_version }}/apptainer_{{ apptainer_version }}_${DPKG_ARCH}.deb \ +https://github.com/apptainer/apptainer/releases/download/v{{ apptainer_version }}/apptainer-suid_{{ apptainer_version }}_${DPKG_ARCH}.deb && \ +apt-get update && \ +apt-get install -y ./*.deb && \ +rm -f *.deb +# Install docker-ce-cli +# hadolint ignore=DL3008 +wget -nv -O /etc/apt/keyrings/docker.asc https://download.docker.com/linux/debian/gpg && \ +chmod a+r /etc/apt/keyrings/docker.asc && \ +echo \ +"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian \ +$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ +tee /etc/apt/sources.list.d/docker.list > /dev/null && \ +apt-get update && \ +apt-get install -y --no-install-recommends docker-ce-cli +# Install both podman and encfs +# hadolint ignore=DL3008 +apt-get install -y podman encfs +# Install WfExS, trusting the installers in the commit rather than the +# one in the docker recipe +mkdir -p / +cd / +# hadolint ignore=DL3003 +git clone --filter=blob:none --no-checkout https://github.com/inab/WfExS-backend.git && \ +cd WfExS-backend && \ +git sparse-checkout init --cone && \ +PYVER=$(python -c 'import sys; print("{}.{}".format(sys.version_info.major, sys.version_info.minor))') && \ +git sparse-checkout set constraints-${PYVER}.txt container_recipes/basic-installer.bash container_recipes/full-installer.bash && \ +git checkout "{{ wfexs_checkout }}" && \ +pip install --no-cache-dir "git+https://github.com/inab/WfExS-backend.git@{{ wfexs_checkout }}" -c constraints-${PYVER}.txt && \ +mv container_recipes/* / && \ +cd .. && \ +rm -rf WfExS-backend +# Transfer other third party installation script +# COPY basic-installer.bash full-installer.bash ./ +# Install third party software +JDK_MAJOR_VER="{{ JDK_MAJOR_VER }}" \ +JDK_VER="{{ JDK_VER }}" \ +JDK_REV="{{JDK_REV }}" \ +OPENJ9_VER="{{ OPENJ9_VER }}" \ +GO_VER="{{ GO_VER }}" \ +GOCRYPTFS_VER="{{ GOCRYPTFS_VER }}" \ +STATIC_BASH_VER="{{ STATIC_BASH_VER }}" \ +BUSYBOX_VER="{{ BUSYBOX_VER }}" \ +bash /full-installer.bash + +%runscript +cd / +exec /bin/bash bash "$@" + +%startscript +cd / +exec /bin/bash bash "$@" diff --git a/apptainer-local-installer.bash b/container_recipes/apptainer-local-installer.bash similarity index 100% rename from apptainer-local-installer.bash rename to container_recipes/apptainer-local-installer.bash diff --git a/apptainer12-local-installer.bash b/container_recipes/apptainer12-local-installer.bash similarity index 100% rename from apptainer12-local-installer.bash rename to container_recipes/apptainer12-local-installer.bash diff --git a/basic-installer.bash b/container_recipes/basic-installer.bash similarity index 65% rename from basic-installer.bash rename to container_recipes/basic-installer.bash index 79553e5e..56f5d62d 100755 --- a/basic-installer.bash +++ b/container_recipes/basic-installer.bash @@ -16,15 +16,15 @@ # limitations under the License. # Getting the installation directory -wfexsDir="$(dirname "$0")" -case "${wfexsDir}" in +scriptDir="$(dirname "$0")" +case "${scriptDir}" in /*) # Path is absolute true ;; *) # Path is relative - wfexsDir="$(readlink -f "${wfexsDir}")" + scriptDir="$(readlink -f "${scriptDir}")" ;; esac @@ -32,8 +32,10 @@ set -eu failed= for cmd in curl tar gzip mktemp grep ; do + set +e type -a "$cmd" 2> /dev/null retval=$? + set -e if [ "$retval" -ne 0 ] ; then failed=1 echo "ERROR: Command $cmd not found in PATH and needed for the installation" @@ -89,8 +91,10 @@ checkInstallGO() { } for cmd in python3 pip ; do + set +e type -a "$cmd" 2> /dev/null retval=$? + set -e if [ "$retval" -ne 0 ] ; then failed=1 echo "ERROR: Command $cmd not found in PATH and needed for the installation" @@ -99,8 +103,10 @@ done failed= for lib in libmagic.so ; do + set +e ldconfig -p | grep -qF "/${lib}" retval=$? + set -e if [ "$retval" -ne 0 ] ; then failed=1 echo "ERROR: Library $lib found in ldconfig cache and needed for the installation" @@ -111,40 +117,62 @@ if [ -n "$failed" ] ; then exit 1 fi -#if declare -F deactivate >& /dev/null ; then +# Detect whether WfExS is already installed is_minimal_ver="$(python3 -c 'import sys; print("{}.{}".format(sys.version_info.major, sys.version_info.minor) if tuple(sys.version_info) >= (3, 7, 0, "final", 0) else "")')" if [ -z "$is_minimal_ver" ] ; then echo "ERROR: Python 3.7 or newer is required, but $(python3 -V) was detected" 1>&2 exit 1 fi -envDir="$(python3 -c 'import sys; print("" if sys.prefix==sys.base_prefix else sys.prefix)')" -if [ -n "${envDir}" ] ; then - echo "Using currently active environment ${envDir} to install the dependencies" +# Is WfExS already installed??? (case of Docker) +set +eu +python3 -P -c "import sys"$'\n'"try:"$'\n'" import wfexs_backend"$'\n'"except:"$'\n'" sys.exit(1)"$'\n'"sys.exit(0)" +retval=$? +set -eu +if [ "$retval" -eq 0 ] ; then + envDir="$(python3 -c 'import sys; print(sys.prefix)')" else - envDir="${wfexsDir}/.pyWEenv" + envDir="" +fi + +# Try installing WfExS in an environment in case it is not +# already installed. +if [ -z "$envDir" ]; then +#if declare -F deactivate >& /dev/null ; then + requirementsFile="$(readlink -f "${scriptDir}"/../requirements.txt)" + wfexsDir="$(dirname "${requirementsFile}")" + + envDir="$(python3 -c 'import sys; print("" if sys.prefix==sys.base_prefix else sys.prefix)')" + if [ -n "${envDir}" ] ; then + echo "Using currently active environment ${envDir} to install the dependencies" + elif [ ! -f "${requirementsFile}" ] ; then + echo "ERROR: Requirements file needed for the installation is not available at $requirementsFile." + exit 1 + else + envDir="${wfexsDir}/.pyWEenv" - echo "Creating WfExS-backend python virtual environment at ${envDir}" + echo "Creating WfExS-backend python virtual environment at ${envDir}" - # Checking whether the environment exists - if [ ! -f "${envDir}" ] ; then - python3 -m venv "${envDir}" - fi + # Checking whether the environment exists + if [ ! -f "${envDir}" ] ; then + python3 -m venv "${envDir}" + fi - # Activating the python environment - envActivate="${envDir}/bin/activate" - source "${envActivate}" - pip install --upgrade pip wheel -fi + # Activating the python environment + envActivate="${envDir}/bin/activate" + source "${envActivate}" + pip install --require-virtualenv --upgrade pip wheel + fi -# Checking whether the modules were already installed -echo "Installing WfExS-backend python dependencies" -pip install -r "${wfexsDir}"/requirements.txt + # Checking whether the modules were already installed + echo "Installing WfExS-backend python dependencies" + pip install --require-virtualenv -r "${requirementsFile}" -# Now, should we run something wrapped? -if [ $# != 0 ] ; then - pip install -r "${wfexsDir}"/dev-requirements.txt -r "${wfexsDir}"/mypy-requirements.txt - "$@" + # Now, should we run something wrapped? + if [ $# != 0 ] ; then + pip install --require-virtualenv -r "${wfexsDir}"/dev-requirements.txt -r "${wfexsDir}"/mypy-requirements.txt + "$@" + fi fi declare -a platformSuffixes=( diff --git a/full-installer.bash b/container_recipes/full-installer.bash similarity index 90% rename from full-installer.bash rename to container_recipes/full-installer.bash index ff2af12c..212ca696 100755 --- a/full-installer.bash +++ b/container_recipes/full-installer.bash @@ -17,33 +17,35 @@ # These are the software versions being installed # in the virtual environment -JDK_MAJOR_VER=11 -JDK_VER=${JDK_MAJOR_VER}.0.11 -JDK_REV=9 -OPENJ9_VER=0.26.0 +: ${JDK_MAJOR_VER:=11} +: ${JDK_VER:=${JDK_MAJOR_VER}.0.11} +: ${JDK_REV:=9} +: ${OPENJ9_VER:=0.26.0} -GO_VER=1.17.13 -GOCRYPTFS_VER=v2.3.1 -STATIC_BASH_VER=5.1.004-1.2.2 -BUSYBOX_VER=1.35.0 +: ${GO_VER:=1.17.13} +: ${GOCRYPTFS_VER:=v2.4.0} +: ${STATIC_BASH_VER:=5.1.004-1.2.2} +: ${BUSYBOX_VER:=1.35.0} # Getting the installation directory -wfexsDir="$(dirname "$0")" -case "${wfexsDir}" in +scriptDir="$(dirname "$0")" +case "${scriptDir}" in /*) # Path is absolute true ;; *) # Path is relative - wfexsDir="$(readlink -f "${wfexsDir}")" + scriptDir="$(readlink -f "${scriptDir}")" ;; esac failed= for cmd in mktemp ; do + set +e type -a "$cmd" 2> /dev/null retval=$? + set -e if [ "$retval" -ne 0 ] ; then failed=1 echo "ERROR: Command $cmd not found in PATH and needed for the installation" @@ -66,7 +68,13 @@ cleanup() { rm -rf "${downloadDir}" } -trap cleanup EXIT ERR +cleanuperr() { + cleanup + exit 1 +} + +trap cleanup EXIT +trap cleanuperr ERR set -eu @@ -76,11 +84,9 @@ declare -a input_params=( "$@" ) if [ $# -gt 0 ]; then shift $# fi -source "${wfexsDir}/basic-installer.bash" - -GO_VER=1.17.13 - - +trap - ERR +source "${scriptDir}/basic-installer.bash" +trap cleanuperr ERR declare -A archesJDK=( [x86_64]=x64 @@ -147,10 +153,8 @@ else gocryptfs_url="https://github.com/rfjakob/gocryptfs/releases/download/${GOCRYPTFS_VER}/gocryptfs_${GOCRYPTFS_VER}_${platformOS}-static_${platformArchGO}.tar.gz" echo "Installing static gocryptfs ${GOCRYPTFS_VER} from ${gocryptfs_url}" set +e - trap - ERR ( trap - EXIT ERR ; cd "${downloadDir}" && curl -f -L -O "${gocryptfs_url}" && tar -x -C "${envDir}/bin" -f "${downloadDir}"/gocryptfs*.tar.gz ) retval=$? - trap cleanup ERR set -e if [ "$retval" -ne 0 ] ; then # Get compiler @@ -168,10 +172,10 @@ if [ -x "${envDir}/bin/${staticBash}" ] ; then else static_bash_url="https://github.com/robxu9/bash-static/releases/download/${STATIC_BASH_VER}/${staticBash}" echo "Installing static bash ${STATIC_BASH_VER} from ${static_bash_url}" - trap - ERR - ( cd "${downloadDir}" && curl -f -L -O "${static_bash_url}" ) + set +e + ( trap - EXIT ERR ; cd "${downloadDir}" && curl -f -L -O "${static_bash_url}" ) retval=$? - trap cleanup ERR + set -e if [ "$retval" -eq 0 ] ; then mv "${downloadDir}/${staticBash}" "${envDir}/bin/${staticBash}" chmod +x "${envDir}/bin/${staticBash}" @@ -188,10 +192,10 @@ for binName in ps ; do else static_bin_url="https://busybox.net/downloads/binaries/${BUSYBOX_VER}-${platformSuffixRev}-musl/busybox_${binName^^}" echo "Installing busybox ${binName} ${BUSYBOX_VER} from ${static_bin_url}" - trap - ERR - ( cd "${downloadDir}" && curl -f -L -o "${staticBin}" "${static_bin_url}" ) + set +e + ( trap - EXIT ERR ; cd "${downloadDir}" && curl -f -L -o "${staticBin}" "${static_bin_url}" ) retval=$? - trap cleanup ERR + set -e if [ "$retval" -eq 0 ] ; then mv "${downloadDir}/${staticBin}" "${envDir}/bin/${staticBin}" chmod +x "${envDir}/bin/${staticBin}" diff --git a/fuse2fs-local-installer.bash b/container_recipes/fuse2fs-local-installer.bash similarity index 100% rename from fuse2fs-local-installer.bash rename to container_recipes/fuse2fs-local-installer.bash diff --git a/singularity-local-installer.bash b/container_recipes/singularity-local-installer.bash similarity index 100% rename from singularity-local-installer.bash rename to container_recipes/singularity-local-installer.bash diff --git a/squashfuse-local-installer.bash b/container_recipes/squashfuse-local-installer.bash similarity index 100% rename from squashfuse-local-installer.bash rename to container_recipes/squashfuse-local-installer.bash diff --git a/development-docs/wfexs-commands.graphml b/development-docs/wfexs-commands.graphml deleted file mode 100644 index ade39ce7..00000000 --- a/development-docs/wfexs-commands.graphml +++ /dev/null @@ -1,988 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - export (subcommands) - - - - - - - - - export (subcommands) - - - - - - - - - - - - - - - ls - - - - - - - - - - run - - - - - - - - - - - - Working -directory -(encrypted) - - - - - - - - - - Caching -directory -(shared) - - - - - - - - - - mount-workdir - - - - - - - - - - - - execute - - - - - - - - - execute - - - - - - - - - - - - - - - init - - - - - - - - - - offline-execute - - - - - - - - - - stage - - - - - - - - - - config-validate - - - - - - - - - - - - export-stage - - - - - - - - - - export-results - - - - - - - - - - Export -Execution -Results - - - - - - - - - - Staged -Workflow -RO-Crate - - - - - - - - - - Export -Execution -RO-Crate - - - - - - - - - - export-crate - - - - - - - - - - - - cache (subcommands) - - - - - - - - - cache - - - - - - - - - - - - - - - ls - - - - - - - - - - rm - - - - - - - - - - inject - - - - - - - - - - input - - - - - - - - - - validate - - - - - - - - - - ro-crate - - - - - - - - - - ga4gh-trs - - - - - - - - - - workflow - - - - - - - - - - fetch - - - - - - - - - - - - - - staged-workdir (subcommands) - - - - - - - - - staged-workdir - - - - - - - - - - - - - - - ls - - - - - - - - - - status - - - - - - - - - - mount - - - - - - - - - - shell - - - - - - - - - - offline-exec - - - - - - - - - - rm - - - - - - - - - - create- -staged- -crate - - - - - - - - - - create- -prov- -crate - - - - - - - - - - - - re-stage - - - - - - - - - - import - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/development-docs/wfexs-commands.svg b/development-docs/wfexs-commands.svg deleted file mode 100644 index 35511f8e..00000000 --- a/development-docs/wfexs-commands.svg +++ /dev/null @@ -1,589 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - export (subcommands) - - - - - - - ls - - - - - - - run - - - - - - - Working - directory - (encrypted) - - - - - - - Caching - directory - (shared) - - - - - - - mount-workdir - - - - - - - - - - - - - - execute - - - - - - - init - - - - - - - offline-execute - - - - - - - stage - - - - - - - config-validate - - - - - - - export-stage - - - - - - - export-results - - - - - - - Export - Execution - Results - - - - - - - Staged - Workflow - RO-Crate - - - - - - - Export - Execution - RO-Crate - - - - - - - export-crate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cache (subcommands) - - - - - - - ls - - - - - - - rm - - - - - - - inject - - - - - - - input - - - - - - - validate - - - - - - - ro-crate - - - - - - - ga4gh-trs - - - - - - - workflow - - - - - - - fetch - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - staged-workdir (subcommands) - - - - - - - ls - - - - - - - status - - - - - - - mount - - - - - - - shell - - - - - - - offline-exec - - - - - - - rm - - - - - - - create- - staged- - crate - - - - - - - create- - prov- - crate - - - - - - - re-stage - - - - - - - import - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/installer.bash b/installer.bash index 4f3db3cc..ea26958e 120000 --- a/installer.bash +++ b/installer.bash @@ -1 +1 @@ -full-installer.bash \ No newline at end of file +container_recipes/full-installer.bash \ No newline at end of file diff --git a/tests/pushers/test_cache.py b/tests/pushers/test_cache.py index 419ea01c..9015a99b 100644 --- a/tests/pushers/test_cache.py +++ b/tests/pushers/test_cache.py @@ -120,7 +120,7 @@ def test_cache_push(tmpdir) -> "None": # type: ignore[no-untyped-def] # First, instantiate WfExS backend temppath = tmpdir.mkdir("TEMP") - bootstrap_ok, test_local_config, config_directory = WfExSBackend.bootstrap( + bootstrap_ok, test_local_config, config_directory = WfExSBackend.bootstrap_config( local_config_ro={ "cacheDir": "CACHE", "workDir": "WORKDIRS", diff --git a/wfexs_backend/__init__.py b/wfexs_backend/__init__.py index d0443b02..5f736922 100644 --- a/wfexs_backend/__init__.py +++ b/wfexs_backend/__init__.py @@ -21,7 +21,7 @@ __license__ = "Apache 2.0" # https://www.python.org/dev/peps/pep-0396/ -__version__ = "0.99.9" +__version__ = "1.0.0a0" __url__ = "https://github.com/inab/WfExS-backend" __official_name__ = "WfExS-backend" diff --git a/wfexs_backend/__main__.py b/wfexs_backend/__main__.py index 790e5498..219d29bb 100644 --- a/wfexs_backend/__main__.py +++ b/wfexs_backend/__main__.py @@ -79,6 +79,7 @@ class BasicLoggingConfigDict(TypedDict): from yaml import Loader as YAMLLoader, Dumper as YAMLDumper from .security_context import SecurityContextVault +from .side_caches import populate_side_caches from .utils.rocrate import ( ReproducibilityLevel, ) @@ -92,6 +93,10 @@ class BasicLoggingConfigDict(TypedDict): class WfExS_Commands(StrDocEnum): + PopulateSideCaches = ( + "populate-side-caches", + "Populate read-only side caches which live in XDG_CACHE_HOME (shared by all the WfExS instances of the very same user)", + ) Init = ("init", "Init local setup") Cache = ("cache", "Cache handling subcommands") ConfigValidate = ( @@ -696,6 +701,7 @@ def processStagedWorkdirCommand( private_key_passphrase = "" # Getting the list of licences (in case they are needed) + op_licences: "Sequence[str]" if hasattr(args, "licences") and args.licences is not None: op_licences = args.licences else: @@ -928,10 +934,11 @@ def processStagedWorkdirCommand( wfSetup.nickname, ) ) + expanded_licences = wB.curate_licence_list(op_licences) wfInstance.createStageResearchObject( filename=args.staged_workdir_command_args[1], payloads=doMaterializedROCrate, - licences=op_licences, + licences=expanded_licences, resolved_orcids=resolved_orcids, crate_pid=op_crate_pid, ) @@ -944,10 +951,11 @@ def processStagedWorkdirCommand( wfSetup.nickname, ) ) + expanded_licences = wB.curate_licence_list(op_licences) wfInstance.createResultsResearchObject( filename=args.staged_workdir_command_args[1], payloads=doMaterializedROCrate, - licences=op_licences, + licences=expanded_licences, resolved_orcids=resolved_orcids, crate_pid=op_crate_pid, ) @@ -1089,6 +1097,8 @@ def _get_wfexs_argparse_internal( ap_i = genParserSub(sp, WfExS_Commands.Init) + ap_psd = genParserSub(sp, WfExS_Commands.PopulateSideCaches) + ap_c = genParserSub(sp, WfExS_Commands.Cache) ap_c.add_argument( "cache_command", @@ -1269,6 +1279,11 @@ def main() -> None: logLevel if logLevel > logging.WARNING else logging.WARNING ) + # Very early command + if command == WfExS_Commands.PopulateSideCaches: + populate_side_caches() + sys.exit(0) + # First, try loading the configuration file localConfigFilename = args.localConfigFilename if localConfigFilename and os.path.exists(localConfigFilename): @@ -1321,7 +1336,7 @@ def main() -> None: WfExS_Commands.Import, WfExS_Commands.Execute, ): - updated_config, local_config, config_directory = WfExSBackend.bootstrap( + updated_config, local_config, config_directory = WfExSBackend.bootstrap_config( local_config, config_directory, key_prefix=config_relname ) # This is needed because config directory could have been empty diff --git a/wfexs_backend/common.py b/wfexs_backend/common.py index 13417850..717b386f 100644 --- a/wfexs_backend/common.py +++ b/wfexs_backend/common.py @@ -565,8 +565,8 @@ class LocalWorkflow(NamedTuple): relPath: Inside the checkout, the relative path to the workflow definition effectiveCheckout: hex hash of the materialized checkout langVersion: workflow language version / revision - relPathFiles: files composing the workflow, which can be either local - or remote ones (i.e. CWL) + relPathFiles: files and directories composing the workflow, which can + be either local or remote ones (i.e. CWL) """ dir: "PathlibLike" @@ -751,3 +751,4 @@ class StagedExecution(NamedTuple): outputMetaDir: "Optional[RelPath]" = None diagram: "Optional[RelPath]" = None logfile: "Sequence[RelPath]" = [] + profiles: "Optional[Sequence[str]]" = None diff --git a/wfexs_backend/container_factories/__init__.py b/wfexs_backend/container_factories/__init__.py index fe916881..30117a80 100644 --- a/wfexs_backend/container_factories/__init__.py +++ b/wfexs_backend/container_factories/__init__.py @@ -700,7 +700,7 @@ def materializeContainers( """ It is assured the containers are materialized """ - materialized_containers: "MutableSequence[Container]" = [] + materialized_containers_h: "MutableMapping[str, Container]" = {} not_found_containers: "MutableSequence[str]" = [] if containers_dir is None: @@ -709,13 +709,16 @@ def materializeContainers( if self.AcceptsContainer(tag): # This one can provide partial or complete information tag_to_use: "ContainerTaggedName" = tag + contTag = self.generateCanonicalTag(tag) + if contTag in materialized_containers_h: + continue for injectable_container in injectable_containers: - if ( - injectable_container.origTaggedName == tag.origTaggedName - and injectable_container.source_type == tag.type - and injectable_container.registries == tag.registries - ): + if injectable_container.source_type != tag.type: + continue + inj_tag = self.generateCanonicalTag(injectable_container) + if contTag == inj_tag: tag_to_use = injectable_container + self.logger.info(f"Matched injected container {contTag}") break container: "Optional[Container]" @@ -730,18 +733,19 @@ def materializeContainers( offline=offline, force=force, ) - if container is not None: - if container not in materialized_containers: - materialized_containers.append(container) - else: - not_found_containers.append(tag.origTaggedName) + if container is not None: + matched_tag = self.generateCanonicalTag(container) + if matched_tag not in materialized_containers_h: + materialized_containers_h[matched_tag] = container + else: + not_found_containers.append(tag.origTaggedName) if len(not_found_containers) > 0: raise ContainerNotFoundException( f"Could not fetch metadata for next tags because they were not found:\n{', '.join(not_found_containers)}" ) - return materialized_containers + return list(materialized_containers_h.values()) @abc.abstractmethod def materializeSingleContainer( @@ -793,6 +797,13 @@ def deploySingleContainer( """ pass + @abc.abstractmethod + def generateCanonicalTag(self, container: "ContainerTaggedName") -> "str": + """ + It provides a way to help comparing two container tags + """ + pass + def supportsFeature(self, feat: "str") -> "bool": """ Checking whether some feature is supported by this container diff --git a/wfexs_backend/container_factories/docker_container.py b/wfexs_backend/container_factories/docker_container.py index 308656bd..575707d5 100644 --- a/wfexs_backend/container_factories/docker_container.py +++ b/wfexs_backend/container_factories/docker_container.py @@ -17,6 +17,7 @@ # limitations under the License. from __future__ import absolute_import +import dataclasses import json import os from typing import ( @@ -163,16 +164,24 @@ def _genDockerTag( else tag_name ) + registries = tag.registries + if registries is None: + registries = { + ContainerType.Docker: DEFAULT_DOCKER_REGISTRY, + } # Should we enrich the tag with the registry? - if isinstance(tag.registries, dict) and ( - ContainerType.Docker in tag.registries - ): - registry = tag.registries[ContainerType.Docker] + registry = registries.get(ContainerType.Docker) + if registry is not None: # Bare case if "/" not in dockerTag: dockerTag = f"{registry}/library/{dockerTag}" elif dockerTag.find("/") == dockerTag.rfind("/"): - dockerTag = f"{registry}/{dockerTag}" + slash_pos = dockerTag.find("/") + possible_registry = dockerTag[0:slash_pos] + if "." in possible_registry: + dockerTag = f"{possible_registry}/library/{dockerTag[slash_pos+1:]}" + else: + dockerTag = f"{registry}/{dockerTag}" # Last case, it already has a registry declared # This is needed .... @@ -306,6 +315,13 @@ def materializeSingleContainer( # And now, let's materialize the new world d_retval, d_out_v, d_err_v = self._pull(dockerPullTag, matEnv) + if d_retval != 0 and dockerTag != dockerPullTag: + self.logger.warning( + f"Unable to pull {dockerPullTag}. Degrading to {dockerTag}" + ) + dockerPullTag = dockerTag + d_retval, d_out_v, d_err_v = self._pull(dockerTag, matEnv) + if d_retval == 0 and dockerTag != dockerPullTag: # Second try d_retval, d_out_v, d_err_v = self._tag(dockerPullTag, dockerTag, matEnv) @@ -327,6 +343,7 @@ def materializeSingleContainer( {}""".format( dockerTag, d_retval, d_out_v, d_err_v ) + self.logger.error(errstr) raise ContainerEngineException(errstr) # Parsing the output from docker inspect @@ -461,6 +478,7 @@ def deploySingleContainer( manifestsImageSignature: "Optional[Fingerprint]" = None manifests = None manifest = None + was_redeployed = False if ( not containerPath.is_file() and isinstance(container, Container) @@ -468,6 +486,7 @@ def deploySingleContainer( ): # Time to inject the image! link_or_copy_pathlib(container.localPath, containerPath, force_copy=True) + was_redeployed = True if not containerPath.is_file(): errmsg = f"Docker saved image {containerPath.name} is not in the staged working dir for {tag_name}" @@ -483,6 +502,7 @@ def deploySingleContainer( link_or_copy_pathlib( container.metadataLocalPath, containerPathMeta, force_copy=True ) + was_redeployed = True if not containerPathMeta.is_file(): errmsg = f"Docker saved image metadata {containerPathMeta.name} is not in the staged working dir for {tag_name}" @@ -497,8 +517,16 @@ def deploySingleContainer( manifests = signaturesAndManifest["manifests"] if isinstance(container, Container): - # Reuse the input container instance - rebuilt_container = container + if was_redeployed: + rebuilt_container = dataclasses.replace( + container, + localPath=containerPath, + metadataLocalPath=containerPathMeta, + image_signature=imageSignature_in_metadata, + ) + else: + # Reuse the input container instance + rebuilt_container = container dockerTag = rebuilt_container.taggedName else: manifest = manifests[0] @@ -601,3 +629,10 @@ def deploySingleContainer( raise ContainerEngineException(errstr) return rebuilt_container, do_redeploy + + def generateCanonicalTag(self, container: "ContainerTaggedName") -> "str": + """ + It provides a way to help comparing two container tags + """ + retval, _ = self._genDockerTag(container) + return retval diff --git a/wfexs_backend/container_factories/no_container.py b/wfexs_backend/container_factories/no_container.py index 684fca46..ff9795f3 100644 --- a/wfexs_backend/container_factories/no_container.py +++ b/wfexs_backend/container_factories/no_container.py @@ -108,3 +108,9 @@ def deploySingleContainer( assert isinstance(container, Container) return container, False + + def generateCanonicalTag(self, container: "ContainerTaggedName") -> "str": + """ + This is a no-op + """ + return container.origTaggedName diff --git a/wfexs_backend/container_factories/podman_container.py b/wfexs_backend/container_factories/podman_container.py index 051a5ec5..1f63538a 100644 --- a/wfexs_backend/container_factories/podman_container.py +++ b/wfexs_backend/container_factories/podman_container.py @@ -17,6 +17,7 @@ # limitations under the License. from __future__ import absolute_import +import dataclasses import json import os from typing import ( @@ -68,6 +69,7 @@ Container, ContainerEngineException, ContainerFactoryException, + DEFAULT_DOCKER_REGISTRY, DOCKER_URI_PREFIX, ) from .abstract_docker_container import ( @@ -172,7 +174,7 @@ def architecture(self) -> "Tuple[ContainerOperatingSystem, ProcessorArchitecture def _genPodmanTag( self, tag: "ContainerTaggedName", - ) -> "Tuple[URIType, str, str]": + ) -> "Tuple[URIType, str, str, str]": # It is an absolute URL, we are removing the docker:// tag_name = tag.origTaggedName if tag_name.startswith(DOCKER_PROTO): @@ -180,21 +182,27 @@ def _genPodmanTag( else: dockerTag = tag_name + registries = tag.registries + if registries is None: + registries = { + ContainerType.Docker: DEFAULT_DOCKER_REGISTRY, + } # Should we enrich the tag with the registry? - if isinstance(tag.registries, dict) and ( - (ContainerType.Docker in tag.registries) - or (ContainerType.Podman in tag.registries) - ): - if ContainerType.Podman in tag.registries: - registry = tag.registries[ContainerType.Podman] - else: - registry = tag.registries[ContainerType.Docker] + registry = registries.get(ContainerType.Podman) + if registry is None: + registry = registries.get(ContainerType.Docker) + if registry is not None: # Bare case if "/" not in dockerTag: dockerTag = f"{registry}/library/{dockerTag}" elif dockerTag.find("/") == dockerTag.rfind("/"): - dockerTag = f"{registry}/{dockerTag}" + slash_pos = dockerTag.find("/") + possible_registry = dockerTag[0:slash_pos] + if "." in possible_registry: + dockerTag = f"{possible_registry}/library/{dockerTag[slash_pos+1:]}" + else: + dockerTag = f"{registry}/{dockerTag}" # Last case, it already has a registry declared # This is needed .... @@ -216,9 +224,10 @@ def _genPodmanTag( else: dockerPullTag = dockerTag + podmanTag = DOCKER_PROTO + dockerTag podmanPullTag = DOCKER_PROTO + dockerPullTag - return cast("URIType", dockerTag), dockerPullTag, podmanPullTag + return cast("URIType", dockerTag), dockerPullTag, podmanTag, podmanPullTag def materializeSingleContainer( self, @@ -236,7 +245,7 @@ def materializeSingleContainer( # It is an absolute URL, we are removing the docker:// tag_name = tag.origTaggedName - dockerTag, dockerPullTag, podmanPullTag = self._genPodmanTag(tag) + dockerTag, dockerPullTag, podmanTag, podmanPullTag = self._genPodmanTag(tag) self.logger.info(f"downloading podman container: {tag_name} => {podmanPullTag}") @@ -312,6 +321,13 @@ def materializeSingleContainer( # And now, let's materialize the new world d_retval, d_out_v, d_err_v = self._pull(podmanPullTag, matEnv) + if d_retval != 0 and dockerTag != dockerPullTag: + self.logger.warning( + f"Unable to pull {podmanPullTag}. Degrading to {podmanTag}" + ) + dockerPullTag = dockerTag + d_retval, d_out_v, d_err_v = self._pull(podmanTag, matEnv) + if d_retval == 0 and dockerTag != dockerPullTag: # Second try d_retval, d_out_v, d_err_v = self._tag(dockerPullTag, dockerTag, matEnv) @@ -468,6 +484,7 @@ def deploySingleContainer( manifestsImageSignature: "Optional[Fingerprint]" = None manifests = None manifest = None + was_redeployed = False if ( not containerPath.is_file() and isinstance(container, Container) @@ -475,6 +492,7 @@ def deploySingleContainer( ): # Time to inject the image! link_or_copy_pathlib(container.localPath, containerPath, force_copy=True) + was_redeployed = True if not containerPath.is_file(): errmsg = f"Podman saved image {containerPath.name} is not in the staged working dir for {tag_name}" @@ -490,6 +508,7 @@ def deploySingleContainer( link_or_copy_pathlib( container.metadataLocalPath, containerPathMeta, force_copy=True ) + was_redeployed = True if not containerPathMeta.is_file(): errmsg = f"FATAL ERROR: Podman saved image metadata {containerPathMeta.name} is not in the staged working dir for {tag_name}" @@ -504,15 +523,26 @@ def deploySingleContainer( manifests = signaturesAndManifest["manifests"] if isinstance(container, Container): - # Reuse the input container instance - rebuilt_container = container + if was_redeployed: + rebuilt_container = dataclasses.replace( + container, + localPath=containerPath, + metadataLocalPath=containerPathMeta, + image_signature=imageSignature_in_metadata, + ) + else: + # Reuse the input container instance + rebuilt_container = container dockerTag = rebuilt_container.taggedName else: manifest = manifests[0] - dockerTag, dockerPullTag, podmanPullTag = self._genPodmanTag( - container - ) + ( + dockerTag, + dockerPullTag, + podmanTag, + podmanPullTag, + ) = self._genPodmanTag(container) image_id = signaturesAndManifest["image_id"] @@ -609,3 +639,10 @@ def deploySingleContainer( raise ContainerEngineException(errstr) return rebuilt_container, do_redeploy + + def generateCanonicalTag(self, container: "ContainerTaggedName") -> "str": + """ + It provides a way to help comparing two container tags + """ + retval, _, _, _ = self._genPodmanTag(container) + return retval diff --git a/wfexs_backend/container_factories/singularity_container.py b/wfexs_backend/container_factories/singularity_container.py index 5afb9021..38b0eac9 100644 --- a/wfexs_backend/container_factories/singularity_container.py +++ b/wfexs_backend/container_factories/singularity_container.py @@ -17,6 +17,7 @@ # limitations under the License. from __future__ import absolute_import +import dataclasses import json import os import os.path @@ -44,6 +45,7 @@ from typing import ( Any, Mapping, + MutableMapping, MutableSequence, Optional, Sequence, @@ -405,56 +407,28 @@ def _genSingTag( # Assuming it is docker isDocker = True - # Should we enrich the tag with the registry? - if ( - isDocker - and isinstance(tag.registries, dict) - and (common.ContainerType.Docker in tag.registries) - ): - registry = tag.registries[common.ContainerType.Docker] - # Bare case - if len(parsedTag.path) <= 1: - singTag = f"docker://{registry}/library/{parsedTag.netloc}" - parsedTag = parse.urlparse(singTag) - elif "/" not in parsedTag.path[1:]: - singTag = f"docker://{registry}/{parsedTag.netloc}{parsedTag.path}" - parsedTag = parse.urlparse(singTag) - # Last case, it already has a registry declared - # It is not an absolute URL, we are prepending the docker:// - tag_name = tag.origTaggedName - parsedTag = parse.urlparse(tag_name) - if parsedTag.scheme in self.ACCEPTED_SING_SCHEMES: - singTag = tag_name - isDocker = parsedTag.scheme == DOCKER_SCHEME - else: - if parsedTag.scheme == "": - singTag = "docker://" + tag_name - parsedTag = parse.urlparse(singTag) - else: - parsedTag = parsedTag._replace( - scheme=DOCKER_SCHEME, - netloc=parsedTag.scheme + ":" + parsedTag.path, - path="", - ) - singTag = parse.urlunparse(parsedTag) - # Assuming it is docker - isDocker = True - - # Should we enrich the tag with the registry? - if ( - isDocker - and isinstance(tag.registries, dict) - and (common.ContainerType.Docker in tag.registries) - ): - registry = tag.registries[common.ContainerType.Docker] - # Bare case - if len(parsedTag.path) <= 1: - singTag = f"docker://{registry}/library/{parsedTag.netloc}" - parsedTag = parse.urlparse(singTag) - elif "/" not in parsedTag.path[1:]: - singTag = f"docker://{registry}/{parsedTag.netloc}{parsedTag.path}" - parsedTag = parse.urlparse(singTag) - # Last case, it already has a registry declared + registries = tag.registries + if isDocker: + if registries is None: + registries = { + common.ContainerType.Docker: DEFAULT_DOCKER_REGISTRY, + } + # Should we enrich the tag with the registry? + registry = registries.get(common.ContainerType.Docker) + if registry is not None: + # Bare case + if len(parsedTag.path) <= 1: + singTag = f"docker://{registry}/library/{parsedTag.netloc}" + parsedTag = parse.urlparse(singTag) + elif "/" not in parsedTag.path[1:]: + if "." in parsedTag.netloc: + singTag = f"docker://{parsedTag.netloc}/library{parsedTag.path}" + else: + singTag = ( + f"docker://{registry}/{parsedTag.netloc}{parsedTag.path}" + ) + parsedTag = parse.urlparse(singTag) + # Last case, it already has a registry declared # Now, the singPullTag if isDocker and isinstance(tag, Container) and tag.fingerprint is not None: @@ -526,6 +500,7 @@ def _materializeSingleContainerSing( tag_name = tag.origTaggedName singTag, parsedTag, singPullTag, isDocker = self._genSingTag(tag) + singPullTagOrig = singPullTag fetch_metadata = True trusted_copy = False @@ -614,6 +589,15 @@ def _materializeSingleContainerSing( singPullTag, tmpContainerPath, matEnv ) + if s_retval != 0 and singPullTag != singTag: + self.logger.warning( + f"Unable to pull {singPullTag}. Degrading to {singTag}" + ) + singPullTag = singTag + s_retval, s_out_v, s_err_v = self._pull( + singTag, tmpContainerPath, matEnv + ) + # Reading the output and error for the report if s_retval == 0: if not tmpContainerPath.exists(): @@ -627,7 +611,7 @@ def _materializeSingleContainerSing( imageSignature = self.cc_handler._computeFingerprint(tmpContainerPath) else: errstr = f"""\ -Could not materialize singularity image {singTag} ({singPullTag}). Retval {s_retval} +Could not materialize singularity image {singTag} ({singPullTagOrig}). Retval {s_retval} ====== STDOUT ====== @@ -647,7 +631,7 @@ def _materializeSingleContainerSing( return FailedContainerTag( tag=tag_name, - sing_tag=singPullTag, + sing_tag=singTag, ) # At this point we should always have a image signature @@ -679,12 +663,10 @@ def _materializeSingleContainerSing( if isDocker: tag_details = dhelp.query_tag(singTag) if tag_details is None: - self.logger.error(f"FALLA {singTag}") return FailedContainerTag(tag=tag_name, sing_tag=singTag) if singTag != singPullTag: tag_pull_details = dhelp.query_tag(singPullTag) if tag_pull_details is None: - self.logger.error(f"CANALLA {singPullTag}") return FailedContainerTag(tag=tag_name, sing_tag=singPullTag) else: tag_pull_details = tag_details @@ -777,7 +759,7 @@ def materializeContainers( """ It is assured the containers are materialized """ - containersList: "MutableSequence[Container]" = [] + containersHash: "MutableMapping[str, Container]" = {} notFoundContainersList: "MutableSequence[FailedContainerTag]" = [] if containers_dir is None: @@ -793,13 +775,16 @@ def materializeContainers( continue tag_to_use: "ContainerTaggedName" = tag + singTag = self.generateCanonicalTag(tag) + if singTag in containersHash: + continue for injectable_container in injectable_containers: - if ( - injectable_container.origTaggedName == tag.origTaggedName - and injectable_container.source_type == tag.type - and injectable_container.registries == tag.registries - ): + if injectable_container.source_type != tag.type: + continue + inj_tag = self.generateCanonicalTag(injectable_container) + if singTag == inj_tag: tag_to_use = injectable_container + self.logger.info(f"Matched injected container {singTag}") break matched_container: "Union[Container, FailedContainerTag]" @@ -818,8 +803,9 @@ def materializeContainers( ) if isinstance(matched_container, Container): - if matched_container not in containersList: - containersList.append(matched_container) + matched_tag = self.generateCanonicalTag(matched_container) + if matched_tag not in containersHash: + containersHash[matched_tag] = matched_container else: notFoundContainersList.append(matched_container) @@ -834,7 +820,7 @@ def materializeContainers( ) ) - return containersList + return list(containersHash.values()) def deploySingleContainer( self, @@ -886,11 +872,33 @@ def deploySingleContainer( try: with containerPathMeta.open(mode="r", encoding="utf-8") as mH: signaturesAndManifest = cast("SingularityManifest", json.load(mH)) - imageSignature_in_metadata = signaturesAndManifest["image_signature"] + imageSignature_in_metadata = signaturesAndManifest.get( + "image_signature" + ) + # Degraded paths + if imageSignature_in_metadata is None and isinstance( + container, Container + ): + imageSignature_in_metadata = container.image_signature + # Last resort + if imageSignature_in_metadata is None: + imageSignature_in_metadata = self.cc_handler._computeFingerprint( + containerPath + ) if isinstance(container, Container): - # Reuse the input container instance - rebuilt_container = container + if was_redeployed: + rebuilt_container = dataclasses.replace( + container, + signature=imageSignature_in_metadata, + architecture=self._getContainerArchitecture(containerPath), + localPath=containerPath, + metadataLocalPath=containerPathMeta, + image_signature=imageSignature_in_metadata, + ) + else: + # Reuse the input container instance + rebuilt_container = container else: singTag, parsedTag, singPullTag, isDocker = self._genSingTag( container @@ -947,3 +955,10 @@ def deploySingleContainer( raise ContainerFactoryException(errmsg) return rebuilt_container, was_redeployed + + def generateCanonicalTag(self, container: "ContainerTaggedName") -> "str": + """ + It provides a way to help comparing two container tags + """ + retval, _, _, _ = self._genSingTag(container) + return retval diff --git a/wfexs_backend/ro_crate.py b/wfexs_backend/ro_crate.py index 863acdcc..b8d0f6f4 100644 --- a/wfexs_backend/ro_crate.py +++ b/wfexs_backend/ro_crate.py @@ -19,10 +19,14 @@ import atexit import copy +import errno +import http.client import inspect +import io import logging import os import pathlib +import shutil import subprocess import tempfile from typing import ( @@ -59,14 +63,13 @@ ExpectedOutput, Fingerprint, LocalWorkflow, - MaterializedInput, - MaterializedOutput, ProgsMapping, RelPath, RepoTag, RepoURL, StagedExecution, StagedSetup, + SymbolicParamName, SymbolicOutputName, URIType, WFLangVersion, @@ -96,7 +99,10 @@ import urllib.parse import uuid -from .utils.misc import lazy_import +from .utils.misc import ( + is_uri, + lazy_import, +) from .utils.rocrate import ( ContainerType2AdditionalType, ContainerTypeMetadata, @@ -107,6 +113,10 @@ WORKFLOW_RUN_NAMESPACE, ) +from .workflow_engines import ( + WorkflowEngine, +) + magic = lazy_import("magic") # import magic @@ -125,7 +135,7 @@ from rocrate.utils import ( get_norm_value, - is_url, + iso_now, ) from .fetchers import ( @@ -136,6 +146,11 @@ validate_orcid, ) +from .utils.contents import ( + MaterializedContent2AbstractGeneratedContent, + Path2AbstractGeneratedContent, +) + from .utils.digests import ( ComputeDigestFromDirectory, ComputeDigestFromFile, @@ -161,6 +176,8 @@ GeneratedDirectoryContent, LicenceDescription, MaterializedContent, + MaterializedInput, + MaterializedOutput, META_JSON_POSTFIX, NoCratableItem, NoLicence, @@ -334,18 +351,19 @@ def __init__( self.fetch_remote = fetch_remote self.validate_url = validate_url self.source = source + self.dest_path = dest_path if dest_path is not None: dest_path = pathlib.Path(dest_path) if dest_path.is_absolute(): raise ValueError("if provided, dest_path must be relative") if identifier is None: - identifier = dest_path.as_posix() + identifier = urllib.parse.quote(dest_path.as_posix()) elif identifier is None: if not isinstance(source, (str, pathlib.Path)): raise ValueError( "dest_path must be provided if source is not a path or URI" ) - elif is_url(str(source)): + elif is_uri(str(source)): identifier = os.path.basename(source) if fetch_remote else str(source) else: identifier = "./" if source == "./" else os.path.basename(source) @@ -355,7 +373,47 @@ def __init__( class FixedFile(FixedMixin, rocrate.model.file.File): # type: ignore[misc] - pass + def write(self, base_path: "Union[str, os.PathLike[str]]") -> "None": + base_path_p: "pathlib.Path" + if isinstance(base_path, pathlib.Path): + base_path_p = base_path + else: + base_path_p = pathlib.Path(base_path) + if self.dest_path is not None: + out_file_path = base_path_p / self.dest_path + else: + out_file_path = base_path_p / self.id + if isinstance(self.source, (io.BytesIO, io.StringIO)): + out_file_path.parent.mkdir(parents=True, exist_ok=True) + mode = "w" + ("b" if isinstance(self.source, io.BytesIO) else "t") + with out_file_path.open(mode=mode) as out_file: + out_file.write(self.source.getvalue()) + elif self.source is None: + # Allows to record a File entity whose @id does not exist, see #73 + warnings.warn(f"No source for {self.id}") + elif is_uri(str(self.source)): + if self.fetch_remote or self.validate_url: + with urllib.request.urlopen(str(self.source)) as response: + if self.validate_url: + if isinstance(response, http.client.HTTPResponse): + self._jsonld.update( # type: ignore[attr-defined] + { + "contentSize": response.getheader("Content-Length"), + "encodingFormat": response.getheader( + "Content-Type" + ), + } + ) + if not self.fetch_remote: + self._jsonld["sdDatePublished"] = iso_now() # type: ignore[attr-defined] + if self.fetch_remote: + out_file_path.parent.mkdir(parents=True, exist_ok=True) + urllib.request.urlretrieve(response.url, out_file_path) + self._jsonld["contentUrl"] = str(self.source) # type: ignore[attr-defined] + else: + out_file_path.parent.mkdir(parents=True, exist_ok=True) + if not out_file_path.exists() or not out_file_path.samefile(self.source): + shutil.copy(self.source, out_file_path) class ContainerImage(rocrate.model.entity.Entity): # type: ignore[misc] @@ -474,7 +532,32 @@ def _empty(self) -> "Mapping[str, Any]": class FixedDataset(FixedMixin, rocrate.model.dataset.Dataset): # type: ignore[misc] - pass + def write(self, base_path: "Union[str, os.PathLike[str]]") -> "None": + if isinstance(base_path, pathlib.Path): + base_path_p = base_path + else: + base_path_p = pathlib.Path(base_path) + if self.dest_path is not None: + out_path = base_path_p / self.dest_path + else: + out_path = base_path_p / self.id + if self.source is None: + pass + # out_path.mkdir(parents=True, exist_ok=True) + elif is_uri(str(self.source)): + if self.validate_url and not self.fetch_remote: + with urllib.request.urlopen(str(self.source)) as _: + self._jsonld["sdDatePublished"] = iso_now() # type: ignore[attr-defined] + if self.fetch_remote: + self.__get_parts(out_path) # type: ignore[attr-defined] + else: + if not pathlib.Path(self.source).exists(): + raise FileNotFoundError( + errno.ENOENT, os.strerror(errno.ENOENT), str(self.source) + ) + out_path.mkdir(parents=True, exist_ok=True) + if not self.crate.source: + self.crate._copy_unlisted(self.source, out_path) # type: ignore[attr-defined] class FixedWorkflow(FixedMixin, rocrate.model.computationalworkflow.ComputationalWorkflow): # type: ignore[misc] @@ -764,8 +847,8 @@ def __init__( self.crate: "FixedROCrate" self.compLang: "rocrate.model.computerlanguage.ComputerLanguage" + self.workflow_type = materializedEngine.instance.workflowType self._init_empty_crate_and_ComputerLanguage( - materializedEngine.instance.workflowType, localWorkflow.langVersion, licences, crate_pid=crate_pid, @@ -914,11 +997,11 @@ def __init__( def _init_empty_crate_and_ComputerLanguage( self, - wf_type: "WorkflowType", langVersion: "Optional[Union[EngineVersion, WFLangVersion]]", licences: "Sequence[LicenceDescription]", crate_pid: "Optional[str]", ) -> "None": + wf_type = self.workflow_type """ Due the internal synergies between an instance of ComputerLanguage and the RO-Crate it is attached to, both of them should be created @@ -932,25 +1015,25 @@ def _init_empty_crate_and_ComputerLanguage( RO_licences = self._process_licences(licences) # Add extra terms - self.crate.metadata.extra_terms.update( - { - # "sha256": WORKFLOW_RUN_NAMESPACE + "sha256", - # # Next ones are experimental - # ContainerImageAdditionalType.Docker.value: WORKFLOW_RUN_NAMESPACE - # + ContainerImageAdditionalType.Docker.value, - # ContainerImageAdditionalType.Singularity.value: WORKFLOW_RUN_NAMESPACE - # + ContainerImageAdditionalType.Singularity.value, - # "containerImage": WORKFLOW_RUN_NAMESPACE + "containerImage", - # "ContainerImage": WORKFLOW_RUN_NAMESPACE + "ContainerImage", - # "registry": WORKFLOW_RUN_NAMESPACE + "registry", - # "tag": WORKFLOW_RUN_NAMESPACE + "tag", - "syntheticOutput": WFEXS_TERMS_NAMESPACE + "syntheticOutput", - "globPattern": WFEXS_TERMS_NAMESPACE + "globPattern", - "filledFrom": WFEXS_TERMS_NAMESPACE + "filledFrom", - } - ) + # self.crate.metadata.extra_terms.update( + # { + # # "sha256": WORKFLOW_RUN_NAMESPACE + "sha256", + # # # Next ones are experimental + # # ContainerImageAdditionalType.Docker.value: WORKFLOW_RUN_NAMESPACE + # # + ContainerImageAdditionalType.Docker.value, + # # ContainerImageAdditionalType.Singularity.value: WORKFLOW_RUN_NAMESPACE + # # + ContainerImageAdditionalType.Singularity.value, + # # "containerImage": WORKFLOW_RUN_NAMESPACE + "containerImage", + # # "ContainerImage": WORKFLOW_RUN_NAMESPACE + "ContainerImage", + # # "registry": WORKFLOW_RUN_NAMESPACE + "registry", + # # "tag": WORKFLOW_RUN_NAMESPACE + "tag", + # "syntheticOutput": WFEXS_TERMS_NAMESPACE + "syntheticOutput", + # "globPattern": WFEXS_TERMS_NAMESPACE + "globPattern", + # "filledFrom": WFEXS_TERMS_NAMESPACE + "filledFrom", + # } + # ) self.crate.metadata.extra_contexts.append(WORKFLOW_RUN_CONTEXT) - # self.crate.metadata.extra_contexts.append(WFEXS_TERMS_CONTEXT) + self.crate.metadata.extra_contexts.append(WFEXS_TERMS_CONTEXT) self.compLang = rocrate.model.computerlanguage.ComputerLanguage( self.crate, @@ -1021,20 +1104,20 @@ def _add_wfexs_to_crate( wrroc_profiles = [ rocrate.model.creativework.CreativeWork( self.crate, - identifier="https://w3id.org/ro/wfrun/process/0.3", - properties={"name": "ProcessRun Crate", "version": "0.3"}, + identifier="https://w3id.org/ro/wfrun/process/0.5", + properties={"name": "ProcessRun Crate", "version": "0.5"}, ), rocrate.model.creativework.CreativeWork( self.crate, - identifier="https://w3id.org/ro/wfrun/workflow/0.3", - properties={"name": "Workflow Run Crate", "version": "0.3"}, + identifier="https://w3id.org/ro/wfrun/workflow/0.5", + properties={"name": "Workflow Run Crate", "version": "0.5"}, ), # TODO: This one can be enabled only when proper provenance # describing the execution steps is implemented # rocrate.model.creativework.CreativeWork( # self.crate, - # identifier="https://w3id.org/ro/wfrun/provenance/0.3", - # properties={"name": "Provenance Run Crate", "version": "0.3"}, + # identifier="https://w3id.org/ro/wfrun/provenance/0.5", + # properties={"name": "Provenance Run Crate", "version": "0.5"}, # ), rocrate.model.creativework.CreativeWork( self.crate, @@ -1359,12 +1442,13 @@ def addWorkflowInputs( itemInValue0 = in_item.values[0] additional_type: "Optional[str]" = None - if isinstance(itemInValue0, int): + # A bool is an instance of int in Python + if isinstance(itemInValue0, bool): + additional_type = "Boolean" + elif isinstance(itemInValue0, int): additional_type = "Integer" elif isinstance(itemInValue0, str): additional_type = "Text" - elif isinstance(itemInValue0, bool): - additional_type = "Boolean" elif isinstance(itemInValue0, float): additional_type = "Float" elif isinstance(itemInValue0, MaterializedContent): @@ -1776,7 +1860,7 @@ def _add_file_to_crate( the_file_crate = self.crate.add_file_ext( identifier=the_id, source=the_path if do_attach else None, - dest_path=the_name if do_attach else None, + dest_path=the_name, clazz=SourceCodeFile if is_soft_source else FixedFile, ) if the_uri is not None: @@ -1836,9 +1920,15 @@ def _add_directory_as_dataset( # Describe datasets referred from DOIs # as in https://github.com/ResearchObject/ro-crate/pull/255/files - if not os.path.isdir(the_path): + if not the_path.is_dir(): return None, None + if the_name is not None and not the_name.endswith("/"): + the_name = cast("RelPath", the_name + "/") + + if the_alternate_name is not None and not the_alternate_name.endswith("/"): + the_alternate_name = cast("RelPath", the_alternate_name + "/") + assert not do_attach or ( the_name is not None ), "A name must be provided for local directories" @@ -1851,7 +1941,7 @@ def _add_directory_as_dataset( crate_dataset = self.crate.add_dataset_ext( identifier=the_id, source=the_path if do_attach else None, - dest_path=the_name if do_attach else None, + dest_path=the_name, fetch_remote=False, validate_url=False, # properties=file_properties, @@ -1880,6 +1970,12 @@ def _add_directory_as_dataset( the_file_crate = self._add_file_to_crate( the_path=pathlib.Path(the_file.path), the_uri=the_item_uri, + the_name=None + if the_name is None + else cast("RelPath", the_name + the_file.name), + the_alternate_name=None + if the_alternate_name is None + else cast("RelPath", the_alternate_name + the_file.name), the_size=the_file.stat().st_size, do_attach=do_attach, ) @@ -1895,6 +1991,12 @@ def _add_directory_as_dataset( ) = self._add_directory_as_dataset( the_path=pathlib.Path(the_file.path), the_uri=the_item_uri, + the_name=None + if the_name is None + else cast("RelPath", the_name + the_file.name), + the_alternate_name=None + if the_alternate_name is None + else cast("RelPath", the_alternate_name + the_file.name), do_attach=do_attach, ) if the_dir_crate is not None: @@ -2042,7 +2144,7 @@ def _add_workflow_to_crate( the_workflow_crate = self.crate.add_workflow_ext( identifier=the_id, source=the_path if do_attach else None, - dest_path=the_name if do_attach else None, + dest_path=the_name, main=main, lang=lang, gen_cwl=gen_cwl, @@ -2144,7 +2246,7 @@ def _add_workflow_to_crate( the_workflow_crate["url"] = wf_url if the_workflow.relPathFiles: - rel_entities: "MutableSequence[Union[FixedFile, rocrate.model.creativework.CreativeWork]]" = ( + rel_entities: "MutableSequence[Union[FixedFile, rocrate.model.creativework.CreativeWork, FixedDataset]]" = ( [] ) for rel_file in the_workflow.relPathFiles: @@ -2154,7 +2256,7 @@ def _add_workflow_to_crate( # First, are we dealing with relative files or with URIs? p_rel_file = urllib.parse.urlparse(rel_file) - the_entity: "Union[FixedFile, rocrate.model.creativework.CreativeWork]" + the_entity: "Union[FixedFile, rocrate.model.creativework.CreativeWork, FixedDataset]" if p_rel_file.scheme != "": the_entity = rocrate.model.creativework.CreativeWork( self.crate, @@ -2170,14 +2272,37 @@ def _add_workflow_to_crate( os.path.join(the_workflow.dir, rel_file), self.staged_setup.workflow_dir, ) - the_entity = self._add_file_to_crate( - the_path=pathlib.Path(the_workflow.dir) / rel_file, - the_name=the_s_name, - the_alternate_name=cast("RelPath", the_alternate_name), - the_uri=cast("URIType", rocrate_file_id), - do_attach=do_attach, - is_soft_source=True, - ) + abs_file = pathlib.Path(the_workflow.dir) / rel_file + if abs_file.is_file(): + the_entity = self._add_file_to_crate( + the_path=abs_file, + the_name=the_s_name, + the_alternate_name=cast("RelPath", the_alternate_name), + the_uri=cast("URIType", rocrate_file_id), + do_attach=do_attach, + is_soft_source=True, + ) + elif abs_file.is_dir(): + ( + the_possible_entity, + the_files_within_the_entity, + ) = self._add_directory_as_dataset( + the_path=abs_file, + the_name=the_s_name, + the_alternate_name=cast("RelPath", the_alternate_name), + the_uri=cast("URIType", rocrate_file_id), + do_attach=do_attach, + ) + if the_possible_entity is None: + raise ROCrateGenerationException( + f"Unable to include {abs_file} directory into the RO-Crate being generated" + ) + + the_entity = the_possible_entity + else: + raise ROCrateGenerationException( + f"Unable to include {abs_file} into the RO-Crate being generated (unmanaged file object)" + ) rel_entities.append(the_entity) @@ -2258,11 +2383,23 @@ def addStagedWorkflowDetails( inputs: "Sequence[MaterializedInput]", environment: "Sequence[MaterializedInput]", outputs: "Optional[Sequence[ExpectedOutput]]", + profiles: "Optional[Sequence[str]]" = None, ) -> None: """ This method is used for WRROCs with only prospective provenance """ - self.addWorkflowInputs(inputs, are_envvars=False) + augmented_inputs: "Sequence[MaterializedInput]" + if profiles: + augmented_inputs = [ + MaterializedInput( + name=cast("SymbolicParamName", "-profile"), + values=profiles, + ), + *inputs, + ] + else: + augmented_inputs = inputs + self.addWorkflowInputs(augmented_inputs, are_envvars=False) if len(environment) > 0: self.addWorkflowInputs(environment, are_envvars=True) @@ -2273,6 +2410,7 @@ def addStagedWorkflowDetails( def addWorkflowExecution( self, stagedExec: "StagedExecution", + expected_outputs: "Optional[Sequence[ExpectedOutput]]" = None, ) -> None: # TODO: Add a new CreateAction for each stagedExec # as it is explained at https://www.researchobject.org/workflow-run-crate/profiles/workflow_run_crate @@ -2312,13 +2450,26 @@ def addWorkflowExecution( crate_action.append_to("actionStatus", {"@id": action_status}, compact=True) + augmented_inputs: "Sequence[MaterializedInput]" + if stagedExec.profiles: + # Profiles are represented as this custom parameter + # assuming no parameter name can start with a minus + augmented_inputs = [ + MaterializedInput( + name=cast("SymbolicParamName", "-profile"), + values=stagedExec.profiles, + ), + *stagedExec.augmentedInputs, + ] + else: + augmented_inputs = stagedExec.augmentedInputs crate_inputs = self.addWorkflowInputs( - stagedExec.augmentedInputs, + augmented_inputs, are_envvars=False, ) crate_action["object"] = crate_inputs - # Add environment, according to WRROC 0.4 + # Add environment, according to WRROC 0.5 if len(stagedExec.environment) > 0: crate_envvars = self.addWorkflowInputs( stagedExec.environment, @@ -2330,8 +2481,93 @@ def addWorkflowExecution( # see https://www.researchobject.org/workflow-run-crate/profiles/workflow_run_crate#adding-engine-specific-traces # TODO: Add "augmented environment variables" + augmented_outputs: "Sequence[MaterializedOutput]" + if not self.workflow_type.has_explicit_outputs: + if expected_outputs is None: + expected_outputs = [] + expected_outputs_h: "Mapping[str, ExpectedOutput]" = { + expected_output.name: expected_output + for expected_output in expected_outputs + } + # This code is needed to heal old nextflow-like executions. + # First, identify what it should be transferred, + # in case it does not appear yet + not_synthetic_inputs: "MutableMapping[str, MaterializedInput]" = {} + for augmented_input in stagedExec.augmentedInputs: + if augmented_input.autoFilled: + not_synthetic_inputs[augmented_input.name] = augmented_input + + the_augmented_outputs: "MutableSequence[MaterializedOutput]" = [] + for mat_output in stagedExec.matCheckOutputs: + if ( + mat_output.name not in not_synthetic_inputs + and mat_output.syntheticOutput is None + ): + augmented_output = mat_output._replace(syntheticOutput=True) + else: + del not_synthetic_inputs[mat_output.name] + augmented_output = mat_output + + the_augmented_outputs.append(augmented_output) + + # What it is still in not_synthetic_inputs is what + # it has to be injected as an output + for augmented_input in not_synthetic_inputs.values(): + preferred_filename: "Optional[RelPath]" = None + expected_output = expected_outputs_h.get(augmented_input.name) + if expected_output is not None: + preferred_filename = expected_output.preferredFilename + + assert len(augmented_input.values) > 0 + if isinstance(augmented_input.values[0], MaterializedContent): + kind = augmented_input.values[0].kind + elif isinstance(augmented_input.values[0], str): + # It is a bare path (sigh, technical debt) + the_path = augmented_input.values[0] + assert os.path.exists(the_path) + kind = ( + ContentKind.Directory + if os.path.isdir(the_path) + else ContentKind.File + ) + else: + raise ROCrateGenerationException( + "Unexpected type of augmented input for expected output healing" + ) + + non_synthetic_values: "MutableSequence[AbstractGeneratedContent]" = [] + for mat_content in cast( + "Sequence[Union[str, MaterializedContent]]", augmented_input.values + ): + non_synthetic_values.append( + MaterializedContent2AbstractGeneratedContent( + mat_content, preferred_filename + ) + if isinstance(mat_content, MaterializedContent) + else Path2AbstractGeneratedContent( + pathlib.Path(mat_content), preferred_filename + ) + ) + + the_augmented_outputs.append( + MaterializedOutput( + name=cast("SymbolicOutputName", augmented_input.name), + kind=kind, + expectedCardinality=WorkflowEngine.GuessedCardinalityMapping[ + len(non_synthetic_values) > 1 + ], + values=non_synthetic_values, + syntheticOutput=False, + filledFrom=augmented_input.name, + ) + ) + augmented_outputs = the_augmented_outputs + else: + # No healing should be needed + augmented_outputs = stagedExec.matCheckOutputs + crate_outputs = self._add_workflow_execution_outputs( - stagedExec.matCheckOutputs, + augmented_outputs, rel_work_dir=stagedExec.outputsDir, ) @@ -2524,12 +2760,12 @@ def _add_workflow_execution_outputs( ) elif len(out_item.values) > 0: itemOutValue0 = out_item.values[0] - if isinstance(itemOutValue0, int): + if isinstance(itemOutValue0, bool): + additional_type = "Boolean" + elif isinstance(itemOutValue0, int): additional_type = "Integer" elif isinstance(itemOutValue0, str): additional_type = "Text" - elif isinstance(itemOutValue0, bool): - additional_type = "Boolean" elif isinstance(itemOutValue0, float): additional_type = "Float" @@ -2756,7 +2992,7 @@ def _add_GeneratedDirectoryContent_as_dataset( crate_dataset = self.crate.add_dataset_ext( identifier=the_id, source=the_content.local if do_attach else None, - dest_path=dest_path if do_attach else None, + dest_path=dest_path, fetch_remote=False, validate_url=False, # properties=file_properties, diff --git a/wfexs_backend/schemas/stage-definition.json b/wfexs_backend/schemas/stage-definition.json index 012f0269..f0456658 100644 --- a/wfexs_backend/schemas/stage-definition.json +++ b/wfexs_backend/schemas/stage-definition.json @@ -622,7 +622,7 @@ ] }, "profile": { - "description": "If defined, the profile/s to enable", + "description": "(deprecated) If defined, the profile/s to enable", "oneOf": [ { "type": "string", @@ -667,6 +667,25 @@ "params": { "$ref": "#/definitions/Params" }, + "profile": { + "description": "For engines which support profiles, like Nextflow or snakemake, if defined, the profile/s to enable", + "oneOf": [ + { + "type": "string", + "pattern": "^[^, \t]+[ \t]*(,[ \t]*[^, \t]+[ \t]*)*$", + "minLength": 1 + }, + { + "type": "array", + "items": { + "type": "string", + "pattern": "^[^,]+$", + "minLength": 1 + }, + "minItems": 1 + } + ] + }, "environment": { "type": "object", "patternProperties": { diff --git a/wfexs_backend/side_caches.py b/wfexs_backend/side_caches.py new file mode 100644 index 00000000..756a0e4f --- /dev/null +++ b/wfexs_backend/side_caches.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# SPDX-License-Identifier: Apache-2.0 +# Copyright 2020-2024 Barcelona Supercomputing Center (BSC), Spain +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import TYPE_CHECKING + +from rocrate.model.metadata import ( + LegacyMetadata, + Metadata, +) + +from .utils.licences import LicenceMatcherSingleton +from .utils.passphrase_wrapper import WfExSPassGenSingleton +from .utils.pyld_caching import pyld_cache_initialize + +if TYPE_CHECKING: + from typing import ( + Sequence, + ) + + +def populate_side_caches() -> "None": + """ + This method populates some side caches which are used in several parts + of WfExS-backend code. They are residing at specific subdirectories + from XDG_CACHE_DIR, and they are: + + * The lists of words used to generate random passphrases, + * Most common JSON-LD contexts. + * The list of licences from SPDX. + """ + + # First, the list of words + logging.info("Populating passphrase generator cache (lists of words)") + pw = WfExSPassGenSingleton() + pw.initialize() + + # Then, most common JSON-LD contexts. + logging.info( + "Populating common JSON-LD contexts cache (needed for offline JSON-LD parsing)" + ) + pyld_cache_initialize( + [ + LegacyMetadata.PROFILE + "/context", + Metadata.PROFILE + "/context", + "https://w3id.org/ro/terms/workflow-run", + ] + ) + + # Last, the list of licences + logging.info("Populating list of licences cache (list fetched from SPDX)") + lm = LicenceMatcherSingleton() diff --git a/wfexs_backend/utils/contents.py b/wfexs_backend/utils/contents.py index 120acca8..8258e70d 100644 --- a/wfexs_backend/utils/contents.py +++ b/wfexs_backend/utils/contents.py @@ -59,6 +59,7 @@ ExpectedOutput, Fingerprint, LicensedURI, + MaterializedContent, RelPath, URIType, ) @@ -160,6 +161,63 @@ def GetGeneratedDirectoryContentFromList( ) +def MaterializedContent2AbstractGeneratedContent( + mat_content: "MaterializedContent", + preferredFilename: "Optional[RelPath]" = None, + signatureMethod: "Optional[FingerprintMethod]" = nihDigester, +) -> "AbstractGeneratedContent": + """ + This method generates either a GeneratedContent + or a GeneratedDirectoryContent from a MaterializedContent + """ + local = ( + mat_content.extrapolated_local + if mat_content.extrapolated_local is not None + else mat_content.local + ) + if mat_content.kind == ContentKind.File: + return GeneratedContent( + local=local, + uri=mat_content.licensed_uri, + # This might be in the wrong representation + signature=mat_content.fingerprint, + preferredFilename=preferredFilename, + ) + else: + return GetGeneratedDirectoryContent( + thePath=local, + uri=mat_content.licensed_uri, + preferredFilename=preferredFilename, + signatureMethod=signatureMethod, + ) + + +def Path2AbstractGeneratedContent( + content: "pathlib.Path", + preferredFilename: "Optional[RelPath]" = None, + signatureMethod: "Optional[FingerprintMethod]" = nihDigester, +) -> "AbstractGeneratedContent": + """ + This method generates either a GeneratedContent + or a GeneratedDirectoryContent from a MaterializedContent + """ + if content.is_dir(): + return GetGeneratedDirectoryContent( + thePath=content, + preferredFilename=preferredFilename, + signatureMethod=signatureMethod, + ) + else: + return GeneratedContent( + local=content, + signature=cast( + "Optional[Fingerprint]", + ComputeDigestFromFile(content, repMethod=signatureMethod), + ), + preferredFilename=preferredFilename, + ) + + CWLClass2WfExS = { "Directory": ContentKind.Directory, "File": ContentKind.File @@ -243,20 +301,31 @@ def copy2_nofollow( shutil.copy2(src, dest, follow_symlinks=False) +def copy_nofollow( + src: "Union[str, os.PathLike[str]]", dest: "Union[str, os.PathLike[str]]" +) -> "None": + shutil.copy(src, dest, follow_symlinks=False) + + def link_or_copy( src: "Union[AnyPath, os.PathLike[str]]", dest: "Union[AnyPath, os.PathLike[str]]", force_copy: "bool" = False, + preserve_attrs: "bool" = True, ) -> None: link_or_copy_pathlib( src if isinstance(src, pathlib.Path) else pathlib.Path(src), dest if isinstance(dest, pathlib.Path) else pathlib.Path(dest), force_copy=force_copy, + preserve_attrs=preserve_attrs, ) def link_or_copy_pathlib( - src: "pathlib.Path", dest: "pathlib.Path", force_copy: "bool" = False + src: "pathlib.Path", + dest: "pathlib.Path", + force_copy: "bool" = False, + preserve_attrs: "bool" = True, ) -> None: assert ( src.exists() @@ -345,18 +414,24 @@ def link_or_copy_pathlib( if dest_exists: dest.unlink() if isinstance(src, ZipfilePath): - src.copy_to(dest) - else: + src.copy_to(dest, preserve_attrs=preserve_attrs) + elif preserve_attrs: shutil.copy2(src, dest) + else: + shutil.copy(src, dest) else: # Recursively copying the content # as it is in a separated filesystem if dest_exists: shutil.rmtree(dest) if isinstance(src, ZipfilePath): - src.copy_to(dest) + src.copy_to(dest, preserve_attrs=preserve_attrs) else: - shutil.copytree(src, dest, copy_function=copy2_nofollow) + shutil.copytree( + src, + dest, + copy_function=copy2_nofollow if preserve_attrs else copy_nofollow, + ) def real_unlink_if_exists( diff --git a/wfexs_backend/utils/marshalling_handling.py b/wfexs_backend/utils/marshalling_handling.py index 5bf43af8..f830fece 100644 --- a/wfexs_backend/utils/marshalling_handling.py +++ b/wfexs_backend/utils/marshalling_handling.py @@ -190,20 +190,24 @@ def recurse_u( else: c_objn_keys = c_objn.keys() - fields = dict(zip(c_objn_keys, recurse_u(c_objn.values(), myglobals))) - # print("{} {} {}".format(clazz, theTypeName, fields)) - - # Deactivated for now, as the code is not ready for this magic - # if hasattr(clazz, '_unmarshall'): - # return clazz._unmarshall(**fields) - # else: - # return clazz(**fields) - - try: - objres = clazz(**fields) - except: - logger.exception(f"Unmarshalling Error instantiating {clazz.__name__}") - raise + fields_list = list(zip(c_objn_keys, recurse_u(c_objn.values(), myglobals))) + if issubclass(clazz, dict): + objres = clazz(fields_list) + else: + fields = dict(fields_list) + # print("{} {} {}".format(clazz, theTypeName, fields)) + + # Deactivated for now, as the code is not ready for this magic + # if hasattr(clazz, '_unmarshall'): + # return clazz._unmarshall(**fields) + # else: + # return clazz(**fields) + + try: + objres = clazz(**fields) + except: + logger.exception(f"Unmarshalling Error instantiating {clazz.__name__}") + raise elif obj_is(collections.abc.Iterable) and not obj_is(str): # print(type(obj)) return type(obj)(recurse_u(obj, myglobals)) diff --git a/wfexs_backend/utils/pyld_caching.py b/wfexs_backend/utils/pyld_caching.py index 12c77c7a..98cf174c 100644 --- a/wfexs_backend/utils/pyld_caching.py +++ b/wfexs_backend/utils/pyld_caching.py @@ -27,16 +27,19 @@ Callable, Mapping, Optional, + Sequence, ) import asyncio import aiohttp from aiohttp_client_cache.session import CachedSession from aiohttp_client_cache.backends.sqlite import SQLiteBackend +import os.path import pyld # type: ignore[import, import-untyped] import re import string import urllib.parse +import xdg.BaseDirectory def aiohttp_caching_document_loader( @@ -193,3 +196,23 @@ def hook_pyld_cache(cache_file: "str") -> "None": timeout=10, ) ) + + +def pyld_cache_initialize(initial_contexts: "Sequence[str]" = []) -> "None": + """ + This method hooks the caching system to pyld, so context resolution + does not need to connect to internet. + And, if the list of initial contexts is not empty, populate the cache + with them. + """ + cache_path = xdg.BaseDirectory.save_cache_path("es.elixir.WfExSJSONLD") + hook_pyld_cache(os.path.join(cache_path, "contexts.db")) + + if len(initial_contexts) > 0: + mock_jsonld = { + "@context": initial_contexts, + "@graph": [], + } + + # This line should perform the magic + pyld.jsonld.expand(mock_jsonld, {"keepFreeFloatingNodes": True}) diff --git a/wfexs_backend/utils/rocrate.py b/wfexs_backend/utils/rocrate.py index 1796110f..9dee2796 100644 --- a/wfexs_backend/utils/rocrate.py +++ b/wfexs_backend/utils/rocrate.py @@ -99,7 +99,6 @@ import pyld # type: ignore[import, import-untyped] import rdflib import rdflib.plugins.sparql -import xdg.BaseDirectory # This code needs exception groups if sys.version_info[:2] < (3, 11): @@ -127,7 +126,7 @@ ) from .pyld_caching import ( - hook_pyld_cache, + pyld_cache_initialize, ) from ..fetchers import ( @@ -189,11 +188,15 @@ class ROCratePayload(NamedTuple): for container_type, container_type_metadata in ContainerTypeMetadataDetails.items() } -WORKFLOW_RUN_CONTEXT: "Final[str]" = "https://w3id.org/ro/terms/workflow-run" -WORKFLOW_RUN_NAMESPACE: "Final[str]" = WORKFLOW_RUN_CONTEXT + "#" +WORKFLOW_RUN_BASE: "Final[str]" = "https://w3id.org/ro/terms/workflow-run" +WORKFLOW_RUN_CONTEXT: "Final[str]" = WORKFLOW_RUN_BASE + "/context" +WORKFLOW_RUN_NAMESPACE: "Final[str]" = WORKFLOW_RUN_BASE + "#" -WFEXS_TERMS_CONTEXT: "Final[str]" = "https://w3id.org/ro/terms/wfexs" -WFEXS_TERMS_NAMESPACE: "Final[str]" = WFEXS_TERMS_CONTEXT + "#" +WFEXS_TERMS_BASE: "Final[str]" = "https://w3id.org/ro/terms/wfexs" +# Not yet .... +# WFEXS_TERMS_CONTEXT: "Final[str]" = WFEXS_TERMS_BASE + "/context" +WFEXS_TERMS_CONTEXT: "Final[str]" = WFEXS_TERMS_BASE +WFEXS_TERMS_NAMESPACE: "Final[str]" = WFEXS_TERMS_BASE + "#" CONTAINER_DOCKERIMAGE_SHORT: "Final[str]" = "DockerImage" CONTAINER_SIFIMAGE_SHORT: "Final[str]" = "SIFImage" @@ -338,6 +341,7 @@ class ROCrateToolbox(abc.ABC): SCHEMA_ORG_PREFIX + "Text": "Text", SCHEMA_ORG_PREFIX + "Boolean": "Boolean", SCHEMA_ORG_PREFIX + "Float": "Float", + SCHEMA_ORG_PREFIX + "PropertyValue": "PropertyValue", SCHEMA_ORG_PREFIX + "MediaObject": "File", SCHEMA_ORG_PREFIX + "Dataset": "Directory", } @@ -365,8 +369,7 @@ def __init__(self, wfexs: "WfExSBackend"): self.wfexs = wfexs # Caching path for the contexts - cache_path = xdg.BaseDirectory.save_cache_path("es.elixir.WfExSJSONLD") - hook_pyld_cache(os.path.join(cache_path, "contexts.db")) + pyld_cache_initialize() # This is needed for proper behaviour # https://stackoverflow.com/a/6264214 @@ -751,7 +754,7 @@ def identifyROCrate( a s:PropertyValue . } UNION { # A combination of files or directories or property values - VALUES ( ?leaf_type ) { ( s:Integer ) ( s:Text ) ( s:Boolean ) ( s:Float ) ( s:MediaObject ) ( s:Dataset ) } + VALUES ( ?leaf_type ) { ( s:PropertyValue ) ( s:MediaObject ) ( s:Dataset ) } ?input a s:Collection ; s:hasPart+ ?component . @@ -820,7 +823,7 @@ def identifyROCrate( BIND (?env AS ?fileid) } UNION { # A combination of files or directories or property values - VALUES ( ?leaf_type ) { ( s:Integer ) ( s:Text ) ( s:Boolean ) ( s:Float ) ( s:MediaObject ) ( s:Dataset ) } + VALUES ( ?leaf_type ) { ( s:PropertyValue ) ( s:MediaObject ) ( s:Dataset ) } ?env a s:Collection ; s:name ?name_env ; @@ -929,8 +932,8 @@ def identifyROCrate( s:additionalType ?additional_type . } UNION { # A combination of files or directories or property values - BIND ( "Collection" AS ?additional_type ) - VALUES ( ?leaf_type ) { ( s:Integer ) ( s:Text ) ( s:Boolean ) ( s:Float ) ( s:MediaObject ) ( s:Dataset ) } + VALUES (?additional_type) { ( "Integer" ) ( "Text" ) ( "Boolean" ) ( "Float" ) ( "Collection" ) } + VALUES ( ?leaf_type ) { ( s:PropertyValue ) ( s:MediaObject ) ( s:Dataset ) } ?input a s:Collection ; s:exampleOfWork ?inputfp ; @@ -1017,8 +1020,8 @@ def identifyROCrate( s:additionalType ?additional_type . } UNION { # A combination of files or directories or property values - BIND ( "Collection" AS ?additional_type ) - VALUES ( ?leaf_type ) { ( s:Integer ) ( s:Text ) ( s:Boolean ) ( s:Float ) ( s:MediaObject ) ( s:Dataset ) } + VALUES (?additional_type) { ( "Integer" ) ( "Text" ) ( "Boolean" ) ( "Float" ) ( "Collection" ) } + VALUES ( ?leaf_type ) { ( s:PropertyValue ) ( s:MediaObject ) ( s:Dataset ) } ?env a s:Collection ; s:name ?name_env ; @@ -1117,8 +1120,8 @@ def identifyROCrate( s:additionalType ?additional_type . } UNION { # A combination of files or directories or property values - BIND ( "Collection" AS ?additional_type ) - VALUES ( ?leaf_type ) { ( s:Integer ) ( s:Text ) ( s:Boolean ) ( s:Float ) ( s:MediaObject ) ( s:Dataset ) } + VALUES (?additional_type) { ( "Integer" ) ( "Text" ) ( "Boolean" ) ( "Float" ) ( "Collection" ) } + VALUES ( ?leaf_type ) { ( s:PropertyValue ) ( s:MediaObject ) ( s:Dataset ) } ?output a s:Collection ; s:exampleOfWork ?outputfp ; @@ -1551,7 +1554,9 @@ def __parseOutputsResults( additional_type = str(outputrow.additional_type) # Is it a nested one? cardinality = "1" - if additional_type == "Collection": + if ( + hasattr(outputrow, "leaf_type") and outputrow.leaf_type is not None + ) or additional_type == "Collection": if not hasattr(outputrow, "leaf_type"): raise ROCrateToolboxException( f"Unable to handle Collections of unknown type in output {str(outputrow.name)}" @@ -1761,14 +1766,15 @@ def __parseInputsResults( valobj: "Optional[MutableMapping[str, Any]]" = None kindobj: "Optional[ContentKind]" = None # Is it a nested one? - if additional_type == "Collection": + if inputrow.leaf_type is not None: leaf_type = str(inputrow.leaf_type) leaf_additional_type = self.LEAF_TYPE_2_ADDITIONAL_TYPE.get(leaf_type) if leaf_additional_type is None: raise ROCrateToolboxException( f"Unable to handle contents of type {leaf_type} in input Collection {str(inputrow.name)}" ) - additional_type = leaf_additional_type + if additional_type == "Collection": + additional_type = leaf_additional_type if leaf_additional_type not in ("File", "Dataset"): valarr = base.setdefault(param_last, []) @@ -1953,14 +1959,15 @@ def __parseEnvResults( valobj: "Optional[MutableMapping[str, Any]]" = None kindobj: "Optional[ContentKind]" = None # Is it a nested one? - if additional_type == "Collection": + if envrow.leaf_type is not None: leaf_type = str(envrow.leaf_type) leaf_additional_type = self.LEAF_TYPE_2_ADDITIONAL_TYPE.get(leaf_type) if leaf_additional_type is None: raise ROCrateToolboxException( f"Unable to handle contents of type {leaf_type} in Collection reflecting contents pointed by environment variable {env_name}" ) - additional_type = leaf_additional_type + if additional_type == "Collection": + additional_type = leaf_additional_type if leaf_additional_type not in ("File", "Dataset"): valarr = environment.setdefault(env_name, []) @@ -2122,7 +2129,7 @@ def __processPayloadEntity( entity_path: "Optional[str]" = None located_entity: "Optional[pathlib.Path]" = None if include_entity: - entity_path = entity_parsed_uri.path + entity_path = urllib.parse.unquote(entity_parsed_uri.path) if entity_path.startswith("/"): entity_path = entity_path[1:] @@ -2437,7 +2444,6 @@ def extractWorkflowMetadata( else None, relPathFiles=rel_path_files, ) - self.logger.error(f"POZI {cached_workflow}") return repo, workflow_type, cached_workflow @@ -2449,7 +2455,7 @@ def generateWorkflowMetaFromJSONLD( reproducibility_level: "ReproducibilityLevel" = ReproducibilityLevel.Metadata, strict_reproducibility_level: "bool" = False, payload_dir: "Optional[pathlib.Path]" = None, - ) -> "Tuple[RemoteRepo, WorkflowType, ContainerType, ParamsBlock, EnvironmentBlock, OutputsBlock, Optional[LocalWorkflow], Sequence[Container], Optional[Sequence[MaterializedInput]], Optional[Sequence[MaterializedInput]]]": + ) -> "Tuple[RemoteRepo, WorkflowType, ContainerType, ParamsBlock, Optional[Sequence[str]], EnvironmentBlock, OutputsBlock, Optional[LocalWorkflow], Sequence[Container], Optional[Sequence[MaterializedInput]], Optional[Sequence[MaterializedInput]]]": matched_crate, g = self.identifyROCrate(jsonld_obj, public_name) # Is it an RO-Crate? if matched_crate is None: @@ -2640,11 +2646,19 @@ def generateWorkflowMetaFromJSONLD( } params = new_params + # Beware!! This is a tweak!! + profiles: "Optional[Sequence[str]]" = params.get("-profile") + if profiles is not None: + new_params = cast("MutableParamsBlock", copy.copy(params)) + del new_params["-profile"] + params = new_params + return ( repo, workflow_type, container_type, params, + profiles, environment, outputs, cached_workflow, diff --git a/wfexs_backend/utils/zipfile_path.py b/wfexs_backend/utils/zipfile_path.py index 4788e1a9..c45d9da2 100644 --- a/wfexs_backend/utils/zipfile_path.py +++ b/wfexs_backend/utils/zipfile_path.py @@ -21,7 +21,9 @@ import pathlib import posixpath import shutil +import stat import sys +import time from typing import ( cast, @@ -281,8 +283,13 @@ class ZipfilePath(pathlib.Path): 'mem' """ + # _flavour = pathlib._posix_flavour __repr = "{self.__class__.__name__}({self._root.filename!r}, {self._at!r})" + def __new__(cls, *args: "Any", **kwargs: "Any") -> "ZipfilePath": + self = object.__new__(cls) + return self + def __init__( self, root: "Union[str, CompleteDirs, os.PathLike[str], zipfile.ZipFile]", @@ -306,6 +313,7 @@ def __init__( self._root = FastLookup.make(root) self._at = at + # super().__init__(self._root.filename) def open( # type: ignore[override] self, @@ -427,6 +435,8 @@ def _extract_member( member: "Union[zipfile.ZipInfo, str]", targetpath: "Union[str, os.PathLike[str]]", pwd: "Optional[bytes]" = None, + preserve_attrs: "bool" = True, + aggresive_attrs: "bool" = False, ) -> "str": """ Method partially borrowed from python 3.12 @@ -474,11 +484,43 @@ def _extract_member( ) as target: shutil.copyfileobj(source, target) + # Used bits from + # https://stackoverflow.com/a/39296577 + # and + # https://stackoverflow.com/a/9813471 + if preserve_attrs or aggresive_attrs: + if member.external_attr > 0xFFFF: + if aggresive_attrs: + os.chmod(targetpath, member.external_attr >> 16) + elif (member.external_attr >> 16) & ( + stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH + ): + os.chmod( + targetpath, + os.stat(targetpath).st_mode + | stat.S_IXUSR + | stat.S_IXGRP + | stat.S_IXOTH, + ) + + date_time = time.mktime(member.date_time + (0, 0, -1)) + os.utime(targetpath, (date_time, date_time)) + return targetpath - def copy_to(self, dest: "pathlib.Path") -> "None": + def copy_to( + self, + dest: "pathlib.Path", + preserve_attrs: "bool" = True, + aggresive_attrs: "bool" = False, + ) -> "None": if self.is_file(): - self._extract_member(self._at, dest) + self._extract_member( + self._at, + dest, + preserve_attrs=preserve_attrs, + aggresive_attrs=aggresive_attrs, + ) else: the_members: "Sequence[str]" if self._at != "": @@ -491,7 +533,12 @@ def copy_to(self, dest: "pathlib.Path") -> "None": the_members = self._root.namelist() for the_member in the_members: the_partial_member = the_member[len(self._at) :] - self._extract_member(the_member, dest / the_partial_member) + self._extract_member( + the_member, + dest / the_partial_member, + preserve_attrs=preserve_attrs, + aggresive_attrs=aggresive_attrs, + ) def with_name(self, name: "Union[str, os.PathLike[str]]") -> "ZipfilePath": return self.parent.joinpath(name) diff --git a/wfexs_backend/wfexs_backend.py b/wfexs_backend/wfexs_backend.py index 04f9fc95..8db0ba87 100644 --- a/wfexs_backend/wfexs_backend.py +++ b/wfexs_backend/wfexs_backend.py @@ -289,7 +289,7 @@ def generate_passphrase(cls) -> "str": ) @classmethod - def bootstrap( + def bootstrap_config( cls, local_config_ro: "WfExSConfigBlock", config_directory: "Optional[AnyPath]" = None, @@ -311,7 +311,7 @@ def bootstrap( valErrors = config_validate(local_config_ro, cls.CONFIG_SCHEMA) if len(valErrors) > 0: logging.error( - f"ERROR on incoming local configuration block for bootstrap: {valErrors}" + f"ERROR on incoming local configuration block for bootstrap config: {valErrors}" ) sys.exit(1) @@ -459,16 +459,29 @@ def FromDescription( stacklevel=2, ) - _, updated_local_config, config_directory = cls.bootstrap( + _, updated_local_config, config_directory = cls.bootstrap_config( local_config, config_directory=config_directory ) + profiles: "Optional[Union[str, Sequence[str]]]" = workflow_meta.get("profile") + enabled_profiles: "Optional[Sequence[str]]" = None + if profiles is not None: + if isinstance(profiles, list): + enabled_profiles = profiles + elif isinstance(profiles, str): + split_by_comma = re.compile(r"[ \t]*,[ \t]*") + enabled_profiles = split_by_comma.split(profiles) + else: + # It should not happen + enabled_profiles = [str(profiles)] + return cls(updated_local_config, config_directory=config_directory).newSetup( workflow_meta["workflow_id"], workflow_meta.get("version"), descriptor_type=workflow_meta.get("workflow_type"), trs_endpoint=workflow_meta.get("trs_endpoint", WF.DEFAULT_TRS_ENDPOINT), params=workflow_meta.get("params", {}), + enabled_profiles=enabled_profiles, environment=workflow_meta.get("environment", {}), outputs=workflow_meta.get("outputs", {}), default_actions=workflow_meta.get("default_actions", []), @@ -499,7 +512,9 @@ def __init__( if not isinstance(local_config, dict): # Minimal bootstrapping for embedded cases - _, local_config, config_directory = self.bootstrap({}, config_directory) + _, local_config, config_directory = self.bootstrap_config( + {}, config_directory + ) # validate the local configuration object valErrors = config_validate(local_config, self.CONFIG_SCHEMA) @@ -1123,6 +1138,7 @@ def newSetup( descriptor_type: "Optional[TRS_Workflow_Descriptor]" = None, trs_endpoint: "str" = WF.DEFAULT_TRS_ENDPOINT, params: "Optional[ParamsBlock]" = None, + enabled_profiles: "Optional[Sequence[str]]" = None, environment: "Optional[EnvironmentBlock]" = None, outputs: "Optional[OutputsBlock]" = None, default_actions: "Optional[Sequence[ExportActionBlock]]" = None, @@ -1142,6 +1158,7 @@ def newSetup( descriptor_type=descriptor_type, trs_endpoint=trs_endpoint, params=params, + enabled_profiles=enabled_profiles, environment=environment, outputs=outputs, default_actions=default_actions, diff --git a/wfexs_backend/workflow.py b/wfexs_backend/workflow.py index 849d1377..3ccbe09b 100644 --- a/wfexs_backend/workflow.py +++ b/wfexs_backend/workflow.py @@ -447,6 +447,7 @@ def __init__( descriptor_type: "Optional[TRS_Workflow_Descriptor]" = None, trs_endpoint: "str" = DEFAULT_TRS_ENDPOINT, params: "Optional[ParamsBlock]" = None, + enabled_profiles: "Optional[Sequence[str]]" = None, environment: "Optional[EnvironmentBlock]" = None, outputs: "Optional[OutputsBlock]" = None, placeholders: "Optional[PlaceHoldersBlock]" = None, @@ -560,6 +561,7 @@ def __init__( # This property should mutate after unmarshalling the config self.container_type_str = container_type_str + self.enabled_profiles: "Optional[Sequence[str]]" = None self.expected_outputs: "Optional[Sequence[ExpectedOutput]]" = None self.default_actions: "Optional[Sequence[ExportAction]]" self.trs_endpoint: "Optional[str]" @@ -594,6 +596,8 @@ def __init__( workflow_meta["workflow_config"] = workflow_config if params is not None: workflow_meta["params"] = params + if enabled_profiles is not None: + workflow_meta["profile"] = enabled_profiles if outputs is not None: workflow_meta["outputs"] = outputs if placeholders is not None: @@ -627,6 +631,7 @@ def __init__( self.version_id = str(version_id) if version_id is not None else None self.descriptor_type = descriptor_type self.params = params + self.enabled_profiles = enabled_profiles self.environment = environment self.placeholders = placeholders self.formatted_params, self.outputs_to_inject = self.formatParams(params) @@ -1618,6 +1623,7 @@ def FromPreviousROCrate( workflow_type, container_type, params, + profiles, environment, outputs, cached_workflow, @@ -1646,6 +1652,8 @@ def FromPreviousROCrate( "secure": secure, }, } + if profiles is not None: + workflow_meta["profile"] = profiles if container_type is not None: workflow_meta["workflow_config"]["containerType"] = container_type.value @@ -1770,6 +1778,18 @@ def FromDescription( if preserved_paranoid_mode is not None: paranoidMode = preserved_paranoid_mode + profiles: "Optional[Union[str, Sequence[str]]]" = workflow_meta.get("profile") + enabled_profiles: "Optional[Sequence[str]]" = None + if profiles is not None: + if isinstance(profiles, list): + enabled_profiles = profiles + elif isinstance(profiles, str): + split_by_comma = re.compile(r"[ \t]*,[ \t]*") + enabled_profiles = split_by_comma.split(profiles) + else: + # It should not happen + enabled_profiles = [str(profiles)] + return cls( wfexs, workflow_meta["workflow_id"], @@ -1777,6 +1797,7 @@ def FromDescription( descriptor_type=workflow_meta.get("workflow_type"), trs_endpoint=workflow_meta.get("trs_endpoint", cls.DEFAULT_TRS_ENDPOINT), params=workflow_meta.get("params", dict()), + enabled_profiles=enabled_profiles, environment=workflow_meta.get("environment", dict()), outputs=workflow_meta.get("outputs", dict()), placeholders=workflow_meta.get("placeholders", dict()), @@ -1821,6 +1842,18 @@ def FromForm( :return: Workflow configuration """ + profiles: "Optional[Union[str, Sequence[str]]]" = workflow_meta.get("profile") + enabled_profiles: "Optional[Sequence[str]]" = None + if profiles is not None: + if isinstance(profiles, list): + enabled_profiles = profiles + elif isinstance(profiles, str): + split_by_comma = re.compile(r"[ \t]*,[ \t]*") + enabled_profiles = split_by_comma.split(profiles) + else: + # It should not happen + enabled_profiles = [str(profiles)] + return cls( wfexs, workflow_meta["workflow_id"], @@ -1828,6 +1861,7 @@ def FromForm( descriptor_type=workflow_meta.get("workflow_type"), trs_endpoint=workflow_meta.get("trs_endpoint", cls.DEFAULT_TRS_ENDPOINT), params=workflow_meta.get("params", dict()), + enabled_profiles=enabled_profiles, environment=workflow_meta.get("environment", dict()), placeholders=workflow_meta.get("placeholders", dict()), default_actions=workflow_meta.get("default_actions"), @@ -1964,6 +1998,7 @@ def fetchWorkflow( injected_workflow.dir / injected_workflow.relPath, workflow_dir / repo.rel_path, force_copy=True, + preserve_attrs=True, ) if rel_path_files is not None: @@ -1977,6 +2012,7 @@ def fetchWorkflow( injected_workflow.dir / inj, workflow_dir / dest_inj, force_copy=True, + preserve_attrs=True, ) elif repoDir.is_dir(): link_or_copy_pathlib(repoDir, workflow_dir, force_copy=True) @@ -2049,6 +2085,31 @@ def fetchWorkflow( ) ) + enabled_profiles: "Optional[Sequence[str]]" = None + if self.enabled_profiles is not None: + enabled_profiles = self.enabled_profiles + elif ( + self.staged_setup.workflow_config is not None + and self.engineDesc is not None + ): + profiles: "Optional[Union[str, Sequence[str]]]" = ( + self.staged_setup.workflow_config.get( + self.engineDesc.engineName, {} + ).get("profile") + ) + if profiles is not None: + if isinstance(profiles, list): + enabled_profiles = profiles + elif isinstance(profiles, str): + split_by_comma = re.compile(r"[ \t]*,[ \t]*") + enabled_profiles = split_by_comma.split(profiles) + else: + # It should not happen + enabled_profiles = [str(profiles)] + + # Backward <=> forward compatibility + self.enabled_profiles = enabled_profiles + self.engine = engine self.engineVer = engineVer self.localWorkflow = candidateLocalWorkflow @@ -2178,6 +2239,7 @@ def materializeWorkflowAndContainers( offline=offline, injectable_containers=injectable_containers, injectable_operational_containers=injectable_operational_containers, + profiles=self.enabled_profiles, ) # DEPRECATED? @@ -3708,6 +3770,7 @@ def executeWorkflow(self, offline: "bool" = False) -> "ExitVal": self.materializedParams, self.materializedEnvironment, self.expected_outputs, + self.enabled_profiles, ) self.stagedExecutions.append(stagedExec) @@ -4050,6 +4113,8 @@ def staging_recipe(self) -> "WritableWorkflowMetaConfigBlock": workflow_meta["workflow_config"] = self.workflow_config if self.params is not None: workflow_meta["params"] = self.params + if self.enabled_profiles is not None: + workflow_meta["profile"] = self.enabled_profiles if self.environment is not None: workflow_meta["environment"] = self.environment if self.placeholders is not None: @@ -4155,6 +4220,21 @@ def unmarshallConfig( self.trs_endpoint = workflow_meta.get("trs_endpoint") self.workflow_config = workflow_meta.get("workflow_config") self.params = workflow_meta.get("params") + profiles: "Optional[Union[str, Sequence[str]]]" = workflow_meta.get( + "profile" + ) + enabled_profiles: "Optional[Sequence[str]]" = None + if profiles is not None: + if isinstance(profiles, list): + enabled_profiles = profiles + elif isinstance(profiles, str): + split_by_comma = re.compile(r"[ \t]*,[ \t]*") + enabled_profiles = split_by_comma.split(profiles) + else: + # It should not happen + enabled_profiles = [str(profiles)] + + self.enabled_profiles = enabled_profiles self.environment = workflow_meta.get("environment") self.placeholders = workflow_meta.get("placeholders") self.outputs = workflow_meta.get("outputs") @@ -4412,6 +4492,28 @@ def unmarshallStage( if do_full_setup: self.setupEngine(offline=True) elif self.engineDesc is not None: + enabled_profiles: "Optional[Sequence[str]]" = None + if self.enabled_profiles is not None: + enabled_profiles = self.enabled_profiles + elif self.staged_setup.workflow_config is not None: + profiles: "Optional[Union[str, Sequence[str]]]" = ( + self.staged_setup.workflow_config.get( + self.engineDesc.engineName, {} + ).get("profile") + ) + if profiles is not None: + if isinstance(profiles, list): + enabled_profiles = profiles + elif isinstance(profiles, str): + split_by_comma = re.compile(r"[ \t]*,[ \t]*") + enabled_profiles = split_by_comma.split(profiles) + else: + # It should not happen + enabled_profiles = [str(profiles)] + + # Backward <=> forward compatibility + self.enabled_profiles = enabled_profiles + self.engine = self.wfexs.instantiateEngine( self.engineDesc, self.staged_setup ) @@ -4639,6 +4741,11 @@ def unmarshallExecute( os.path.relpath(putative_diagram, self.workDir), ) + profiles: "Optional[Sequence[str]]" = execution.get("profiles") + # Backward <=> forward compatibility + if profiles is None: + profiles = self.enabled_profiles + stagedExec = StagedExecution( exitVal=execution["exitVal"], augmentedInputs=execution["augmentedInputs"], @@ -4648,6 +4755,7 @@ def unmarshallExecute( ended=execution.get("ended", executionMarshalled), logfile=logfile, diagram=diagram, + profiles=profiles, ) self.stagedExecutions.append(stagedExec) except Exception as e: @@ -5101,6 +5209,7 @@ def createStageResearchObject( self.materializedParams, self.materializedEnvironment, self.expected_outputs, + profiles=self.enabled_profiles, ) # Save RO-crate as execution.crate.zip @@ -5174,6 +5283,7 @@ def createResultsResearchObject( for stagedExec in self.stagedExecutions: wrroc.addWorkflowExecution( stagedExec=stagedExec, + expected_outputs=self.expected_outputs, ) # Save RO-crate as execution.crate.zip diff --git a/wfexs_backend/workflow_engines/__init__.py b/wfexs_backend/workflow_engines/__init__.py index 46ec8e76..7cde8a3a 100644 --- a/wfexs_backend/workflow_engines/__init__.py +++ b/wfexs_backend/workflow_engines/__init__.py @@ -294,6 +294,7 @@ def materializeWorkflow( matWorfklowEngine: "MaterializedWorkflowEngine", consolidatedWorkflowDir: "AbsPath", offline: "bool" = False, + profiles: "Optional[Sequence[str]]" = None, ) -> "Tuple[MaterializedWorkflowEngine, Sequence[ContainerTaggedName]]": """ Method to ensure the workflow has been materialized. It returns a @@ -311,6 +312,7 @@ def launchWorkflow( inputs: "Sequence[MaterializedInput]", environment: "Sequence[MaterializedInput]", outputs: "Sequence[ExpectedOutput]", + profiles: "Optional[Sequence[str]]" = None, ) -> "StagedExecution": pass @@ -740,6 +742,7 @@ def materializeWorkflow( matWorfklowEngine: "MaterializedWorkflowEngine", consolidatedWorkflowDir: "AbsPath", offline: "bool" = False, + profiles: "Optional[Sequence[str]]" = None, ) -> "Tuple[MaterializedWorkflowEngine, Sequence[ContainerTaggedName]]": """ Method to ensure the workflow has been materialized. It returns the @@ -831,6 +834,7 @@ def launchWorkflow( inputs: "Sequence[MaterializedInput]", environment: "Sequence[MaterializedInput]", outputs: "Sequence[ExpectedOutput]", + profiles: "Optional[Sequence[str]]" = None, ) -> "StagedExecution": pass @@ -841,6 +845,7 @@ def ExecuteWorkflow( inputs: "Sequence[MaterializedInput]", environment: "Sequence[MaterializedInput]", outputs: "Sequence[ExpectedOutput]", + profiles: "Optional[Sequence[str]]" = None, ) -> "StagedExecution": # Now, deploy the containers to the local registry (needed for Docker) if matWfEng.containers is not None: @@ -854,7 +859,11 @@ def ExecuteWorkflow( # And once deployed, let's run the workflow! stagedExec = matWfEng.instance.launchWorkflow( - matWfEng, inputs, environment, outputs + matWfEng, + inputs, + environment, + outputs, + profiles, ) return stagedExec @@ -868,9 +877,13 @@ def MaterializeWorkflowAndContainers( offline: "bool" = False, injectable_containers: "Sequence[Container]" = [], injectable_operational_containers: "Sequence[Container]" = [], + profiles: "Optional[Sequence[str]]" = None, ) -> "Tuple[MaterializedWorkflowEngine, ContainerEngineVersionStr, ContainerOperatingSystem, ProcessorArchitecture]": matWfEngV2, listOfContainerTags = matWfEng.instance.materializeWorkflow( - matWfEng, consolidatedWorkflowDir, offline=offline + matWfEng, + consolidatedWorkflowDir, + offline=offline, + profiles=profiles, ) ( diff --git a/wfexs_backend/workflow_engines/cwl_engine.py b/wfexs_backend/workflow_engines/cwl_engine.py index 2fd6132f..478fb740 100644 --- a/wfexs_backend/workflow_engines/cwl_engine.py +++ b/wfexs_backend/workflow_engines/cwl_engine.py @@ -399,6 +399,12 @@ def _materializeEngineVersionLocal( cwltoolPackage = self.DEVEL_CWLTOOL_PACKAGE cwltoolMatchOp = "@" inst_engineVersion = engineVersion[len(self.DEVEL_CWLTOOL_PACKAGE) + 1 :] + elif engineVersion.startswith("git+https") and "@" in engineVersion: + # This is for foreign development versions of cwltool + at_place = engineVersion.find("@") + cwltoolPackage = engineVersion[0:at_place] + cwltoolMatchOp = "@" + inst_engineVersion = engineVersion[at_place + 1 :] else: cwltoolPackage = self.CWLTOOL_PYTHON_PACKAGE cwltoolMatchOp = "==" @@ -693,6 +699,7 @@ def materializeWorkflow( matWorkflowEngine: "MaterializedWorkflowEngine", consolidatedWorkflowDir: "AbsPath", offline: "bool" = False, + profiles: "Optional[Sequence[str]]" = None, ) -> "Tuple[MaterializedWorkflowEngine, Sequence[ContainerTaggedName]]": """ Method to ensure the workflow has been materialized. In the case @@ -920,6 +927,7 @@ def launchWorkflow( matInputs: "Sequence[MaterializedInput]", matEnvironment: "Sequence[MaterializedInput]", outputs: "Sequence[ExpectedOutput]", + profiles: "Optional[Sequence[str]]" = None, ) -> "StagedExecution": """ Method to execute the workflow diff --git a/wfexs_backend/workflow_engines/nextflow_engine.py b/wfexs_backend/workflow_engines/nextflow_engine.py index 70d84671..8dcc5edd 100644 --- a/wfexs_backend/workflow_engines/nextflow_engine.py +++ b/wfexs_backend/workflow_engines/nextflow_engine.py @@ -298,8 +298,12 @@ def __init__( self.nxf_profile: "Sequence[str]" if isinstance(nxf_profile, list): self.nxf_profile = nxf_profile + elif isinstance(nxf_profile, str): + split_by_comma = re.compile(r"[ \t]*,[ \t]*") + self.nxf_profile = split_by_comma.split(nxf_profile) else: - self.nxf_profile = [cast("str", nxf_profile)] + # It should not happen + self.nxf_profile = [str(nxf_profile)] # Setting the assets directory self.nxf_assets = os.path.join(self.engineTweaksDir, "assets") @@ -534,7 +538,7 @@ def identifyWorkflow( putativeEngineVerVal[1] ) if matched: - if engineVer is None or engineVer < matched.group( + if engineVer is None or engineVer <= matched.group( 1 ): engineVer = cast( @@ -687,6 +691,19 @@ def identifyWorkflow( candidateNf = cast("RelPath", os.path.relpath(entrypoint, nfDir)) + # Last, as there is no safe way to learn about other needed + # files and directories, just include all the ones which are not + # hidden. In previous iterations the code was looking for bin, + # templates, lib and nextflow_schema.json , in part based on + # https://training.nextflow.io/advanced/structure/ + # But real life workflows were using local files and directories + # relative to the workflow directory, from custom locations. + for child in nfDir.iterdir(): + if child.name.startswith("."): + continue + if child.name not in nxfScripts: + nxfScripts.append(cast("RelPath", child.name)) + # The engine version should be used to create the id of the workflow language return engineVer, LocalWorkflow( dir=nfDir, @@ -1336,6 +1353,7 @@ def materializeWorkflow( matWorkflowEngine: "MaterializedWorkflowEngine", consolidatedWorkflowDir: "AbsPath", offline: "bool" = False, + profiles: "Optional[Sequence[str]]" = None, ) -> "Tuple[MaterializedWorkflowEngine, Sequence[ContainerTaggedName]]": """ Method to ensure the workflow has been materialized. In the case @@ -1348,8 +1366,10 @@ def materializeWorkflow( # nextflow config -flat localWf = matWorkflowEngine.workflow nxf_params = ["config", "-flat"] - if self.nxf_profile: - nxf_params.extend(["-profile", ",".join(self.nxf_profile)]) + if profiles is None: + profiles = self.nxf_profile + if profiles: + nxf_params.extend(["-profile", ",".join(profiles)]) else: nxf_params.extend(["-show-profiles"]) nxf_params.append(localWf.dir.as_posix()) @@ -1427,9 +1447,13 @@ def materializeWorkflow( if not relNxfScript.endswith(".nf"): continue - nxfScript = os.path.normpath(os.path.join(nfDir, relNxfScript)) + nxfScript = (nfDir / relNxfScript).resolve(strict=False) + # If it is an special directory, skip it! + if nxfScript.is_dir(): + continue + self.logger.debug(f"Searching container declarations at {relNxfScript}") - with open(nxfScript, encoding="utf-8") as wfH: + with nxfScript.open(mode="rt", encoding="utf-8") as wfH: # This is needed for multi-line pattern matching content = wfH.read() @@ -1603,6 +1627,7 @@ def launchWorkflow( matInputs: "Sequence[MaterializedInput]", matEnvironment: "Sequence[MaterializedInput]", outputs: "Sequence[ExpectedOutput]", + profiles: "Optional[Sequence[str]]" = None, ) -> "StagedExecution": # TODO: implement usage of materialized environment variables if len(matInputs) == 0: # Is list of materialized inputs empty? @@ -1874,10 +1899,12 @@ def wfexs_allParams() ] profile_input: "Optional[MaterializedInput]" = None - if self.nxf_profile: + if profiles is None: + profiles = self.nxf_profile + if profiles: profile_input = MaterializedInput( name=cast("SymbolicParamName", "-profile"), - values=[",".join(self.nxf_profile)], + values=[",".join(profiles)], ) nxf_params.extend( [profile_input.name, cast("str", profile_input.values[0])] @@ -1947,4 +1974,5 @@ def wfexs_allParams() cast("RelPath", os.path.relpath(stdoutFilename, self.workDir)), cast("RelPath", os.path.relpath(stderrFilename, self.workDir)), ], + profiles=profiles, )