diff --git a/README.md b/README.md index 9499dff..82eecbb 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,95 @@ bioconductor-customprodb 1.28.0,1.26.0,1.24.0,1.22.0,1.14.0 Generate customized ... ``` +### bioconda2cwldocker + +This script reads a conda env yaml file and replace, for each package in the environment, the images defined +in the CWL or Yaml files inside the directory pass in the option **cwl_path** + +#### Example + +We would like to use the CWLs defined in the repo https://github.com/ncbi/cwl-ngs-workflows-cbb with the versions +defined in this conda env: + +###### Conda env file + +```yaml +name: rnaseq +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bedtools=2.29.2 + - fastqc=0.11.9 + - sra-tools=2.10.8 + - star=2.7.5a +``` + +###### Cloning the repo + +```bash +$ git clone https://github.com/ncbi/cwl-ngs-workflows-cbb +Cloning into 'cwl-ngs-workflows-cbb'... +remote: Enumerating objects: 94, done. +remote: Counting objects: 100% (94/94), done. +remote: Compressing objects: 100% (69/69), done. +remote: Total 1924 (delta 47), reused 50 (delta 25), pack-reused 1830 +Receiving objects: 100% (1924/1924), 319.82 KiB | 3.48 MiB/s, done. +Resolving deltas: 100% (1216/1216), done. +``` + +###### Print defined images for the tools in the CWLs + +```bash +$ cat cwl-ngs-workflows-cbb/tools/bedtools/bedtools.yml + class: DockerRequirement + dockerPull: quay.io/biocontainers/bedtools:2.28.0--hdf88d34_0 + +$ cat cwl-ngs-workflows-cbb/tools/fastqc/fastqc.yml + class: DockerRequirement + dockerPull: quay.io/biocontainers/fastqc:0.11.8--1 + +$ cat cwl-ngs-workflows-cbb/tools/sra-toolkit/sra-toolkit.yml + class: DockerRequirement + dockerPull: quay.io/biocontainers/sra-tools:2.10.7--pl526haddd2b5_1 + +$ cat cwl-ngs-workflows-cbb/tools/star/star.yml + class: DockerRequirement + dockerPull: quay.io/biocontainers/star:2.7.5a--0 +``` + +###### Running bioconda2cwldocker + +```bash +$ bioconda2cwldocker --conda_env_file conda-env.yaml --cwl_path cwl-ngs-workflows-cbb/ +bedtools with version 2.29.2 update image to: quay.io/biocontainers/bedtools:2.29.2--hc088bd4_0 + cwl-ngs-workflows-cbb/tools/bedtools/bedtools.yml with old image replaced: quay.io/biocontainers/bedtools:2.28.0--hdf88d34_0 +fastqc with version 0.11.9 update image to: quay.io/biocontainers/fastqc:0.11.9--0 + cwl-ngs-workflows-cbb/tools/fastqc/fastqc.yml with old image replaced: quay.io/biocontainers/fastqc:0.11.8--1 +sra-tools with version 2.10.8 update image to: quay.io/biocontainers/sra-tools:2.10.8--pl526haddd2b5_0 + cwl-ngs-workflows-cbb/tools/sra-toolkit/sra-toolkit.yml with old image replaced: quay.io/biocontainers/sra-tools:2.10.7--pl526haddd2b5_1 +``` + +##### Print new defined images for the tools in the CWLs + +```bash +$ cat cwl-ngs-workflows-cbb/tools/bedtools/bedtools.yml + class: DockerRequirement + dockerPull: quay.io/biocontainers/bedtools:2.29.2--hc088bd4_0 + +$ cat cwl-ngs-workflows-cbb/tools/fastqc/fastqc.yml + class: DockerRequirement + dockerPull: quay.io/biocontainers/fastqc:0.11.9--0 + +$ cat cwl-ngs-workflows-cbb/tools/sra-toolkit/sra-toolkit.yml + class: DockerRequirement + dockerPull: quay.io/biocontainers/sra-tools:2.10.8--pl526haddd2b5_0 + +$ cat cwl-ngs-workflows-cbb/tools/star/star.yml + class: DockerRequirement + dockerPull: quay.io/biocontainers/star:2.7.5a--0 +``` ## Install diff --git a/requirements/base.txt b/requirements/base.txt index 61a1ac3..659ee4a 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1 +1,2 @@ -requests~=2.24.0 \ No newline at end of file +requests~=2.24.0 +PyYAML~=5.3.1 diff --git a/setup.py b/setup.py index 9e13385..f46783f 100644 --- a/setup.py +++ b/setup.py @@ -17,8 +17,8 @@ def readme(): '': 'src', }, data_files=[('', ['README.md'])], - version='0.0.2', - description='Find biocontainer images for tools', + version='0.0.3', + description='Tools to synchronize bioconda packages and versions with Biocontainer images', long_description=readme(), long_description_content_type='text/markdown', license='Public Domain', @@ -27,7 +27,8 @@ def readme(): maintainer='Vera Alvarez, Roberto', maintainer_email='veraalva' '@' 'ncbi.nlm.nih.gov', url='https://github.com/BioContainers/bioconda2biocontainer', - install_requires=['requests'], + install_requires=['requests', + 'PyYAML'], classifiers=[ 'Development Status :: 3 - Alpha', 'Intended Audience :: Science/Research', @@ -52,7 +53,8 @@ def readme(): entry_points={ 'console_scripts': [ 'bioconda2biocontainer = bioconda2biocontainer.entry_point:main', - 'biocontainers-search = bioconda2biocontainer.entry_point_search:main' + 'biocontainers-search = bioconda2biocontainer.entry_point_search:main', + 'bioconda2cwldocker = bioconda2biocontainer.entry_point_update_cwl_docker:main' ], } ) diff --git a/src/bioconda2biocontainer/entry_point_update_cwl_docker.py b/src/bioconda2biocontainer/entry_point_update_cwl_docker.py new file mode 100644 index 0000000..d18a1be --- /dev/null +++ b/src/bioconda2biocontainer/entry_point_update_cwl_docker.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +import argparse + +import yaml + +from bioconda2biocontainer.update_cwl_docker_image import update_cwl_docker_from_tool_name + + +def main(): + parser = argparse.ArgumentParser( + description='Replace Docker image in CWL from conda env yaml file') + + parser.add_argument('--conda_env_file', help='Conda env yaml file', + required=True) + parser.add_argument('--cwl_path', help='Path to the CWL directory', + required=True) + args = parser.parse_args() + + with open(args.conda_env_file) as fin: + conda_env = yaml.load(fin, Loader=yaml.FullLoader) + if 'dependencies' in conda_env: + for d in conda_env['dependencies']: + update_cwl_docker_from_tool_name(d, args.cwl_path) + + +if __name__ == '__main__': + main() diff --git a/src/bioconda2biocontainer/update_cwl_docker_image.py b/src/bioconda2biocontainer/update_cwl_docker_image.py new file mode 100644 index 0000000..f899d3f --- /dev/null +++ b/src/bioconda2biocontainer/update_cwl_docker_image.py @@ -0,0 +1,84 @@ +import os + +import yaml + +from bioconda2biocontainer.biocontainer import find_latest_image + +PRINT_HEADER = True + + +def __replace_docker_image(f, old, new, package_name, package_version): + global PRINT_HEADER + if PRINT_HEADER: + print('{} with version {} update image to: {}'.format( + package_name, package_version, + new)) + PRINT_HEADER = False + print('\t{} with old image replaced: {}'.format(f, old)) + with open(f) as fin: + list_of_lines = fin.readlines() + with open(f, 'w') as fout: + for line in list_of_lines: + if old in line: + line = line.replace(old, new) + fout.write(line) + + +def __load_cwl(f, package_name, package_version, image_name): + with open(f) as fin: + try: + y = yaml.load(fin, Loader=yaml.FullLoader) + return y + except yaml.scanner.ScannerError: + pass + return None + + +def __replace_in_cwl(f, package_name, package_version, image_name): + y = __load_cwl(f, package_name, package_version, image_name) + if y: + if 'hints' in y and 'DockerRequirement' in y['hints'] and \ + 'dockerPull' in y['hints']['DockerRequirement'] and \ + y['hints']['DockerRequirement']['dockerPull'].split(':')[0] == \ + image_name.split(':')[0] and \ + y['hints']['DockerRequirement']['dockerPull'] != image_name: + __replace_docker_image(f, + y['hints']['DockerRequirement']['dockerPull'], image_name, + package_name, package_version) + + +def __replace_in_yml(f, package_name, package_version, image_name): + y = __load_cwl(f, package_name, package_version, image_name) + if y: + if 'dockerPull' in y and y['dockerPull'].split(':')[0] == image_name.split(':')[0] and \ + y['dockerPull'] != image_name: + __replace_docker_image(f, + y['dockerPull'], image_name, + package_name, package_version) + + +def update_cwl_docker_from_biocontainers(package_name, package_version, cwl_path): + biocontainer_image = find_latest_image(package_name, package_version, False, + False, False, 'Docker', None) + if isinstance(biocontainer_image, dict): + for root, dirs, files in os.walk(cwl_path): + for f in files: + f = os.path.join(root, f) + if f.endswith('.cwl'): + __replace_in_cwl(f, package_name, package_version, + biocontainer_image['image_name']) + elif f.endswith('.yml') or f.endswith('.yaml'): + __replace_in_yml(f, package_name, package_version, + biocontainer_image['image_name']) + else: + print('There is not biocontainer image for {} version {}'.format( + package_name, package_version)) + + +def update_cwl_docker_from_tool_name(tool, cwl_path): + global PRINT_HEADER + PRINT_HEADER = True + if isinstance(tool, str) and '=' in tool: + tool_version = tool.split('=') + update_cwl_docker_from_biocontainers( + tool_version[0], tool_version[1], cwl_path)