Skip to content

Commit

Permalink
Dockerfile, add docker test and update in samtools dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
fgypas committed Oct 20, 2024
1 parent b631f14 commit caefd94
Show file tree
Hide file tree
Showing 8 changed files with 426 additions and 3 deletions.
35 changes: 32 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
name: CI

on:
# push:
# branches:
# - '*'
push:
branches:
- '*'
pull_request:
branches:
- main
Expand Down Expand Up @@ -214,3 +214,32 @@ jobs:
- name: Run SRA downloads workflow
run: bash tests/test_sra_download_with_conda/test.local.sh

integration-docker:
needs:
- snakemake-graphs-format
runs-on: ubuntu-20.04
defaults:
run:
shell: bash -l {0}
steps:

- name: Checkout zarp repository
uses: actions/checkout@v4

- name: Setup miniconda & zarp env
uses: conda-incubator/setup-miniconda@v3
with:
python-version: "3.10"
mamba-version: "*"
channels: conda-forge
channel-priority: true
auto-update-conda: false
activate-environment: zarp
environment-file: install/environment.yml
auto-activate-base: false

- name: Update zarp env with dev. packages
run: mamba env update -p $CONDA_PREFIX -f install/environment.dev.yml

- name: Run test script
run: bash tests/test_integration_workflow_with_docker/test.local.sh
37 changes: 37 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
FROM continuumio/miniconda3:24.7.1-0


COPY install/environment.yml /environment.yml
COPY workflow /workflow
COPY resources /resources
COPY tests/input_files/config.yaml /config.yaml
COPY tests/input_files/samples.tsv /samples.tsv
COPY tests/input_files/rule_config.yaml /rule_config.yaml
COPY tests/input_files/project1/synthetic.mate_1.fastq.gz /project1/synthetic.mate_1.fastq.gz
COPY tests/input_files/project1/synthetic.mate_2.fastq.gz /project1/synthetic.mate_2.fastq.gz
COPY tests/input_files/project2/synthetic.mate_1.fastq.gz /project2/synthetic.mate_1.fastq.gz
COPY tests/input_files/homo_sapiens/annotation.gtf /annotation.gtf
COPY tests/input_files/homo_sapiens/genome.fa /genome.fa

RUN sed -i 's# - conda-forge##' workflow/envs/STAR.yaml && \
sed -i 's#2.7.11#2.7.10#' workflow/envs/STAR.yaml && \
sed -i 's#../input_files/project1/#/project1/#g' /samples.tsv && \
sed -i 's#../input_files/project2/#/project2/#g' /samples.tsv && \
sed -i 's#../input_files/homo_sapiens/##g' /samples.tsv && \
sed -i 's#../input_files/##' /config.yaml

RUN conda install -c conda-forge mamba --yes && \
mamba env create -f /environment.yml && \
conda clean --all --yes

RUN echo "source activate zarp" > ~/.bashrc

ENV SNAKEMAKE_CONDA_PREFIX="/conda_envs"
ENV PATH=/opt/conda/envs/zarp/bin:$PATH

RUN snakemake -p --snakefile /workflow/Snakefile --configfile /config.yaml --cores 4 --use-conda --conda-create-envs-only --verbose && \
conda clean --all --yes

RUN rm /config.yaml /samples.tsv /rule_config.yaml /project1/synthetic.mate_1.fastq.gz /project1/synthetic.mate_2.fastq.gz /project2/synthetic.mate_1.fastq.gz

RUN mkdir -p /data
18 changes: 18 additions & 0 deletions tests/input_files/config_docker.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
# Required fields
samples: "data/samples_docker.tsv"
output_dir: "data/results"
log_dir: "data/logs"
cluster_log_dir: "data/logs/cluster"
kallisto_indexes: "data/results/kallisto_indexes"
salmon_indexes: "data/results/salmon_indexes"
star_indexes: "data/results/star_indexes"
alfa_indexes: "data/results/alfa_indexes"
# Optional fields
rule_config: "data/rule_config.yaml"
report_description: "No description provided by user"
report_logo: "../../images/logo.128px.png"
report_url: "https://zavolan.biozentrum.unibas.ch/"
author_name: "NA"
author_email: "NA"
...
3 changes: 3 additions & 0 deletions tests/input_files/samples_docker.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
sample seqmode fq1 index_size kmer fq1_3p fq1_5p organism gtf genome sd mean libtype fq1_polya_3p fq1_polya_5p fq2 fq2_3p fq2_5p fq2_polya_3p fq2_polya_5p
synthetic_10_reads_paired_synthetic_10_reads_paired pe data/project1/synthetic.mate_1.fastq.gz 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXX homo_sapiens data/annotation.gtf data/genome.fa 100 250 ISF AAAAAAAAAAAAAAAAA XXXXXXXXXXXXXXXXX data/project1/synthetic.mate_2.fastq.gz AGATCGGAAGAGCGT XXXXXXXXXXXXX XXXXXXXXXXXXXXXXX TTTTTTTTTTTTTTTTT
synthetic_10_reads_mate_1_synthetic_10_reads_mate_1 se data/project2/synthetic.mate_1.fastq.gz 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXX homo_sapiens data/annotation.gtf data/genome.fa 100 250 SF AAAAAAAAAAAAAAAAA XXXXXXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX
151 changes: 151 additions & 0 deletions tests/test_integration_workflow_with_docker/expected_output.files

Large diffs are not rendered by default.

133 changes: 133 additions & 0 deletions tests/test_integration_workflow_with_docker/expected_output.md5

Large diffs are not rendered by default.

51 changes: 51 additions & 0 deletions tests/test_integration_workflow_with_docker/test.local.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash

# Tear down test environment
cleanup () {
rc=$?
rm -rf .cache/
rm -rf .config/
rm -rf .fontconfig/
rm -rf .java/
rm -rf .snakemake/
rm -rf data/
cd $user_dir
echo "Exit status: $rc"
}
trap cleanup EXIT

# Set up test environment
set -eo pipefail # ensures that script exits at first command that exits with non-zero status
set -u # ensures that script exits when unset variables are used
set -x # facilitates debugging by printing out executed commands
user_dir=$PWD
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
cd $script_dir

mkdir -p data
cp ../../tests/input_files/homo_sapiens/genome.fa data/genome.fa
cp ../../tests/input_files/homo_sapiens/annotation.gtf data/annotation.gtf
cp -r ../../tests/input_files/project1 data/project1
cp -r ../../tests/input_files/project2 data/project2
cp -r ../../tests/input_files/config_docker.yaml data/config_docker.yaml
cp ../../tests/input_files/rule_config.yaml data/rule_config.yaml
cp ../../tests/input_files/samples_docker.tsv data/samples_docker.tsv

# Pull the zarp container
docker pull zavolab/zarp:1.0.0-rc.1

# Run tests with Docker
docker run \
--platform linux/x86_64 \
--mount type=bind,source=$script_dir/data,target=/data \
-it zavolab/zarp:1.0.0-rc.1 \
snakemake \
-p \
--snakefile /workflow/Snakefile \
--configfile data/config_docker.yaml \
--cores 4 --use-conda --verbose

# Check md5 sum of some output files
find data/results/ -type f -name \*\.gz -exec gunzip '{}' \;
find data/results/homo_sapiens/ -type f -name \*\.zip -exec sh -c 'unzip -o {} -d $(dirname {})' \;
md5sum --check "expected_output.md5"
1 change: 1 addition & 0 deletions workflow/envs/samtools.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
channels:
- conda-forge
- bioconda
dependencies:
- samtools=1.19.2
Expand Down

0 comments on commit caefd94

Please sign in to comment.