From aa006c82a57b854f324ce25f6fd09c7a71689bb5 Mon Sep 17 00:00:00 2001 From: Miguel Brown Date: Fri, 17 May 2024 11:28:53 -0400 Subject: [PATCH 1/4] :wrench: dropped pipefail in favor of no pipe with shell quote :hammer: some minor refactoring --- tools/bcftools_filter_vcf.cwl | 67 +++++++++++++++++++++-------------- tools/kf_mskcc_vcf2maf.cwl | 5 +-- 2 files changed, 43 insertions(+), 29 deletions(-) diff --git a/tools/bcftools_filter_vcf.cwl b/tools/bcftools_filter_vcf.cwl index 1c5e5b3..c076b7a 100644 --- a/tools/bcftools_filter_vcf.cwl +++ b/tools/bcftools_filter_vcf.cwl @@ -5,46 +5,59 @@ doc: "More generic tool to take in an include expression and optionally an exclu requirements: - class: ShellCommandRequirement - class: DockerRequirement - dockerPull: 'pgc-images.sbgenomics.com/d3b-bixu/bvcftools:latest' + dockerPull: 'pgc-images.sbgenomics.com/d3b-bixu/bcftools:1.20' - class: ResourceRequirement - ramMin: 1000 - coresMin: 1 + ramMin: 16000 + coresMin: 8 - class: InlineJavascriptRequirement -baseCommand: [bash -c 'set -eo pipefail &&] +baseCommand: [] arguments: - - position: 1 + - position: 0 shellQuote: false valueFrom: >- ${ - var out_base = inputs.output_basename; - if (out_base == null){ - out_base = inputs.input_vcf.nameroot + ".bcf_filtered" + var cmd = "bcftools view" + if ( inputs.sample_name != null ){ + cmd += " --threads " + inputs.threads + " -s " + inputs.sample_name + " " + inputs.input_vcf.path + " | bcftools view "; } - var cmd = "bcftools view "; - if (inputs.include_expression != null){ - cmd += "--include \"" + inputs.include_expression + "\" " + inputs.input_vcf.path; - if (inputs.exclude_expression != null){ - cmd += " | bcftools view --exclude \"" + inputs.exclude_expression + "\"' -O z > " + out_base + ".vcf.gz;"; - } else { - cmd += " -O z > " + out_base + ".vcf.gz;"; - } - } else if (inputs.include_expression == null && inputs.exclude_expression != null){ - cmd += "--exclude \"" + inputs.exclude_expression + "\" " + inputs.input_vcf.path + " -O z > " + out_base + ".vcf.gz;"; - } else if (inputs.include_expression == null && inputs.exclude_expression == null){ - cmd = "cp " + inputs.input_vcf.path + " ./" + out_base + ".vcf.gz;"; - } - cmd += "tabix " + out_base + ".vcf.gz;'" return cmd; } + - position: 2 + shellQuote: false + valueFrom: >- + ${ + var arg = " -o " + inputs.output_basename + ".bcf_filtered" + if (inputs.output_type == "v"){ + arg += ".vcf" + } else if (inputs.output_type == "z"){ + arg += ".vcf.gz && tabix " + inputs.output_basename + ".bcf_filtered.vcf.gz" + } else if (inputs.output_type == "b"){ + arg += ".bcf.gz" + } else{ + arg += ".bcf" + } + if (inputs.sample_name == null){ + arg = inputs.input_vcf.path + arg; + } + return arg; + } inputs: input_vcf: File - include_expression: ['null', string] - exclude_expression: ['null', string] - output_basename: ['null', string] + include_expression: { type: 'string?', doc: "See bcftools docs for valid expression. Can't be used at the same time as exclude_expression. Use double quotes when a string needs to be quoted", + inputBinding: { position: 1, prefix: "--include", shellQuote: true} } + threads: { type: 'int?', default: 4, inputBinding: {position: 1, prefix: "--threads"} } + exclude_expression: { type: 'string?', doc: "See bcftools docs for valid expression. Can't be used at the same time as include_expression. Use double quotes when a string needs to be quoted", + inputBinding: { position: 1, prefix: "--exclude", shellQuote: true} } + filter_expression: { type: 'string?', doc: "Add values from FILTER field to subset on", + inputBinding: { position: 1, prefix: "-f"}} + output_type: { type: [ 'null', {type: enum, name: output_type, symbols: [ "u", "b", "v", "z"]}], + inputBinding: { position: 1, prefix: "-O"}, default: "z" } + sample_name: { type: 'string?', doc: "csv string of samples if user wishes to apply filtering to and output specific samples"} + output_basename: string outputs: filtered_vcf: type: File outputBinding: - glob: '*.vcf.gz' - secondaryFiles: [.tbi] + glob: '*bcf_filtered*' + secondaryFiles: ['.tbi?'] diff --git a/tools/kf_mskcc_vcf2maf.cwl b/tools/kf_mskcc_vcf2maf.cwl index bbfabcd..46f7f16 100644 --- a/tools/kf_mskcc_vcf2maf.cwl +++ b/tools/kf_mskcc_vcf2maf.cwl @@ -20,7 +20,7 @@ arguments: - position: 1 shellQuote: false valueFrom: >- - $(inputs.input_vcf.path) > input_file.vcf + > input_file.vcf && perl vcf2maf.pl --input-vcf input_file.vcf --output-maf $(inputs.output_basename).$(inputs.tool_name).vep.maf @@ -28,7 +28,8 @@ arguments: inputs: reference: { type: 'File', secondaryFiles: [.fai], doc: "Fasta genome assembly with index", inputBinding: {position: 2, prefix: "--ref-fasta"} } - input_vcf: { type: 'File', secondaryFiles: [.tbi], doc: "VEP annotated vcf file." } + input_vcf: { type: File, doc: "VEP annotated vcf file.", + inputBinding: { position: 0 } } output_basename: string tumor_id: { type: string, inputBinding: {position: 3, prefix: "--tumor-id"} } normal_id: { type: string, inputBinding: {position: 4, prefix: "--normal-id"} } From 92f0e8f9a6aa20ffed90eb5735fcaabf5366ba47 Mon Sep 17 00:00:00 2001 From: Miguel Brown Date: Fri, 17 May 2024 14:18:12 -0400 Subject: [PATCH 2/4] :wrench: fixed bug from permissive glob --- tools/bcftools_filter_vcf.cwl | 2 +- workflows/kfdrc-germline-snv-annot-workflow.cwl | 2 +- workflows/kfdrc-somatic-snv-annot-workflow.cwl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/bcftools_filter_vcf.cwl b/tools/bcftools_filter_vcf.cwl index c076b7a..241c563 100644 --- a/tools/bcftools_filter_vcf.cwl +++ b/tools/bcftools_filter_vcf.cwl @@ -59,5 +59,5 @@ outputs: filtered_vcf: type: File outputBinding: - glob: '*bcf_filtered*' + glob: "*.{v,b}cf{,.gz}" secondaryFiles: ['.tbi?'] diff --git a/workflows/kfdrc-germline-snv-annot-workflow.cwl b/workflows/kfdrc-germline-snv-annot-workflow.cwl index 2c00964..c761209 100644 --- a/workflows/kfdrc-germline-snv-annot-workflow.cwl +++ b/workflows/kfdrc-germline-snv-annot-workflow.cwl @@ -361,6 +361,6 @@ sbg:license: Apache License 2.0 sbg:publisher: KFDRC "sbg:links": -- id: 'https://github.com/kids-first/kids-first/kf-annotation-tools/releases/tag/v1.1.0' +- id: 'https://github.com/kids-first/kids-first/kf-annotation-tools/releases/tag/v1.2.2' label: github-release diff --git a/workflows/kfdrc-somatic-snv-annot-workflow.cwl b/workflows/kfdrc-somatic-snv-annot-workflow.cwl index 909df6f..53a6cc3 100644 --- a/workflows/kfdrc-somatic-snv-annot-workflow.cwl +++ b/workflows/kfdrc-somatic-snv-annot-workflow.cwl @@ -352,5 +352,5 @@ $namespaces: "sbg:license": Apache License 2.0 "sbg:publisher": KFDRC "sbg:links": -- id: 'https://github.com/kids-first/kf-annotation-tools/releases/tag/v1.2.1' +- id: 'https://github.com/kids-first/kf-annotation-tools/releases/tag/v1.2.2' label: github-release From 3d0e9298d928f06f84be9c2d4ac1f6860d16f81a Mon Sep 17 00:00:00 2001 From: migbro Date: Fri, 17 May 2024 18:25:24 +0000 Subject: [PATCH 3/4] update docker table --- docs/dockers_consensus.md | 2 +- docs/dockers_somatic.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/dockers_consensus.md b/docs/dockers_consensus.md index 349d1e7..230e064 100644 --- a/docs/dockers_consensus.md +++ b/docs/dockers_consensus.md @@ -3,7 +3,7 @@ TOOL|DOCKER -|- bcftools_annotate.cwl|pgc-images.sbgenomics.com/d3b-bixu/vcfutils:latest -bcftools_filter_vcf.cwl|pgc-images.sbgenomics.com/d3b-bixu/bvcftools:latest +bcftools_filter_vcf.cwl|pgc-images.sbgenomics.com/d3b-bixu/bcftools:1.20 bcftools_strip_ann.cwl|pgc-images.sbgenomics.com/d3b-bixu/vcfutils:latest echtvar_anno.cwl|pgc-images.sbgenomics.com/d3b-bixu/echtvar:0.2.0 generic_rename_outputs.cwl|None diff --git a/docs/dockers_somatic.md b/docs/dockers_somatic.md index cc99a97..cfc0b32 100644 --- a/docs/dockers_somatic.md +++ b/docs/dockers_somatic.md @@ -3,7 +3,7 @@ TOOL|DOCKER -|- add_strelka2_fields.cwl|pgc-images.sbgenomics.com/d3b-bixu/add-strelka2-fields:1.0.0 -bcftools_filter_vcf.cwl|pgc-images.sbgenomics.com/d3b-bixu/bvcftools:latest +bcftools_filter_vcf.cwl|pgc-images.sbgenomics.com/d3b-bixu/bcftools:1.20 bcftools_strip_ann.cwl|pgc-images.sbgenomics.com/d3b-bixu/vcfutils:latest echtvar_anno.cwl|pgc-images.sbgenomics.com/d3b-bixu/echtvar:0.2.0 gatk_variant_filter.cwl|pgc-images.sbgenomics.com/d3b-bixu/gatk:4.1.1.0 From 44cd510e5e80cce022572c0f16f18b10d8bacd32 Mon Sep 17 00:00:00 2001 From: Dan Miller Date: Fri, 17 May 2024 21:24:08 +0000 Subject: [PATCH 4/4] :wrench: return pipefail --- tools/bcftools_filter_vcf.cwl | 66 ++++++++++++++++------------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/tools/bcftools_filter_vcf.cwl b/tools/bcftools_filter_vcf.cwl index 241c563..4039317 100644 --- a/tools/bcftools_filter_vcf.cwl +++ b/tools/bcftools_filter_vcf.cwl @@ -10,49 +10,45 @@ requirements: ramMin: 16000 coresMin: 8 - class: InlineJavascriptRequirement + - class: InitialWorkDirRequirement + listing: + - entryname: "run_filter.sh" + entry: | + #!/usr/bin/env bash + set -xeo pipefail + + command=cat + if [[ $(inputs.sample_name) != null ]] + then + command="bcftools view --threads $(inputs.threads) -s $(inputs.sample_name)" + fi + $command $(inputs.input_vcf.path) \ + | bcftools view \ + --threads $(inputs.threads) \ + -O $(inputs.output_type) \ + -o $(inputs.output_basename).bcf_filtered.$(inputs.output_type == "v" ? "vcf" : inputs.output_type == "z" ? "vcf.gz" : inputs.output_type == "b" ? "bcf.gz" : "bcf") \ + $(inputs.exclude_expression == null ? "" : "--exclude " + "'" + inputs.exclude_expression + "'") \ + $(inputs.include_expression == null ? "" : "--include " + "'" + inputs.include_expression + "'") \ + $(inputs.filter_expression == null ? "" : "-f " + inputs.filter_expression) + if [[ $(inputs.output_type) == z ]] + then + tabix $(inputs.output_basename).bcf_filtered.vcf.gz + fi + baseCommand: [] arguments: - position: 0 shellQuote: false valueFrom: >- - ${ - var cmd = "bcftools view" - if ( inputs.sample_name != null ){ - cmd += " --threads " + inputs.threads + " -s " + inputs.sample_name + " " + inputs.input_vcf.path + " | bcftools view "; - } - return cmd; - } - - position: 2 - shellQuote: false - valueFrom: >- - ${ - var arg = " -o " + inputs.output_basename + ".bcf_filtered" - if (inputs.output_type == "v"){ - arg += ".vcf" - } else if (inputs.output_type == "z"){ - arg += ".vcf.gz && tabix " + inputs.output_basename + ".bcf_filtered.vcf.gz" - } else if (inputs.output_type == "b"){ - arg += ".bcf.gz" - } else{ - arg += ".bcf" - } - if (inputs.sample_name == null){ - arg = inputs.input_vcf.path + arg; - } - return arg; - } + /bin/bash run_filter.sh inputs: input_vcf: File - include_expression: { type: 'string?', doc: "See bcftools docs for valid expression. Can't be used at the same time as exclude_expression. Use double quotes when a string needs to be quoted", - inputBinding: { position: 1, prefix: "--include", shellQuote: true} } - threads: { type: 'int?', default: 4, inputBinding: {position: 1, prefix: "--threads"} } - exclude_expression: { type: 'string?', doc: "See bcftools docs for valid expression. Can't be used at the same time as include_expression. Use double quotes when a string needs to be quoted", - inputBinding: { position: 1, prefix: "--exclude", shellQuote: true} } - filter_expression: { type: 'string?', doc: "Add values from FILTER field to subset on", - inputBinding: { position: 1, prefix: "-f"}} - output_type: { type: [ 'null', {type: enum, name: output_type, symbols: [ "u", "b", "v", "z"]}], - inputBinding: { position: 1, prefix: "-O"}, default: "z" } + include_expression: { type: 'string?', doc: "See bcftools docs for valid expression. Can't be used at the same time as exclude_expression. Use double quotes when a string needs to be quoted"} + threads: { type: 'int?', default: 4 } + exclude_expression: { type: 'string?', doc: "See bcftools docs for valid expression. Can't be used at the same time as include_expression. Use double quotes when a string needs to be quoted"} + filter_expression: { type: 'string?', doc: "Add values from FILTER field to subset on"} + output_type: { type: [ 'null', {type: enum, name: output_type, symbols: [ "u", "b", "v", "z"]}]} sample_name: { type: 'string?', doc: "csv string of samples if user wishes to apply filtering to and output specific samples"} output_basename: string outputs: