Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added few new functionalities to DIAMOND tool #5864

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
211 changes: 145 additions & 66 deletions tools/diamond/diamond.xml

Large diffs are not rendered by default.

56 changes: 23 additions & 33 deletions tools/diamond/diamond_makedb.xml
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
<tool id="bg_diamond_makedb" name="Diamond makedb" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01">
<tool id="bg_diamond_makedb" name="Diamond makedb" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<description>Build database from a FASTA file</description>
<macros>
<import>macros.xml</import>
</macros>

<expand macro="requirements" />
<expand macro="stdio" />
<expand macro="version_command" />

<expand macro="requirements"/>
<expand macro="stdio"/>
<expand macro="version_command"/>
<command detect_errors="aggressive">
<!-- DB has two files, *.dmnd and *.tx -->
<!-- DB has two files, *.dmnd and *.tx -->
<![CDATA[
diamond makedb
--threads "\${GALAXY_SLOTS:-12}"
--threads \${GALAXY_SLOTS:-12}
--in '$infile'
--db ./database

Expand All @@ -23,30 +21,24 @@
#end if
]]>
</command>

<inputs>
<param name="infile" type="data" format="fasta" label="Input reference file in FASTA format" />
<conditional name="tax_cond">
<param name="tax_select" type="select" label="Add taxonomic data?" help="Needs to be supplied in order to provide taxonomy features of the aligner">
<option value="yes">Yes</option>
<option value="no" selected="true">No</option>
</param>
<when value="yes">
<param argument="--taxonmap" type="data" format="tabular"
label="Protein accession to taxid mapping file"
help="Path to mapping file that maps NCBI protein accession numbers to taxon ids (gzip compressed). This parameter is optional and needs to be supplied in order to provide taxonomy features.
A custom file following the same format may be supplied here. Note that the first line of this file is assumed to contain headings and will be ignored" />
<param argument="--taxonnodes" type="data" format="tabular" label="Taxonomy nodes.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features" />
<param argument="--taxonnames" type="data" format="tabular" label="Taxonomy names.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features" />
</when>
<when value="no"/>
</conditional>
<param name="infile" type="data" format="fasta" label="Input reference file in FASTA format"/>
<conditional name="tax_cond">
<param name="tax_select" type="select" label="Add taxonomic data?" help="Needs to be supplied in order to provide taxonomy features of the aligner">
<option value="yes">Yes</option>
<option value="no" selected="true">No</option>
</param>
<when value="yes">
<param argument="--taxonmap" type="data" format="tabular" label="Protein accession to taxid mapping file" help="Path to mapping file that maps NCBI protein accession numbers to taxon ids (gzip compressed). This parameter is optional and needs to be supplied in order to provide taxonomy features. A custom file following the same format may be supplied here. Note that the first line of this file is assumed to contain headings and will be ignored"/>
<param argument="--taxonnodes" type="data" format="tabular" label="Taxonomy nodes.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features"/>
<param argument="--taxonnames" type="data" format="tabular" label="Taxonomy names.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features"/>
</when>
<when value="no"/>
</conditional>
</inputs>

<outputs>
<data format="dmnd" name="outfile" from_work_dir="database.dmnd" label="${tool.name} on ${on_string}"/>
</outputs>

<tests>
<test>
<param name="infile" value="db.fasta" ftype="fasta"/>
Expand All @@ -56,14 +48,13 @@
<param name="infile" value="db.fasta" ftype="fasta"/>
<conditional name="tax_cond">
<param name="tax_select" value="yes"/>
<param name="taxonmap" ftype="tabular" value="prot.accession2taxid" />
<param name="taxonnodes" ftype="tabular" value="nodes.dmp" />
<param name="taxonnames" ftype="tabular" value="names.dmp" />
<param name="taxonmap" ftype="tabular" value="prot.accession2taxid"/>
<param name="taxonnodes" ftype="tabular" value="nodes.dmp"/>
<param name="taxonnames" ftype="tabular" value="names.dmp"/>
</conditional>
<output name="outfile" value="db-wtax.dmnd" compare="sim_size" delta="2"/>
</test>
</tests>

<help>
<![CDATA[

Expand All @@ -86,6 +77,5 @@ times faster than BLASTX, finding more than 94% of all matches.
- taxonnodes: Path to the nodes.dmp file from the NCBI taxonomy. This parameter is optional and needs to be supplied in order to provide taxonomy features. The file is contained within this archive downloadable at NCBI: ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip
]]>
</help>

<expand macro="citations" />
<expand macro="citations"/>
</tool>
35 changes: 17 additions & 18 deletions tools/diamond/diamond_view.xml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
<tool id="bg_diamond_view" name="Diamond view" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01">
<tool id="bg_diamond_view" name="Diamond view" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<description>generate formatted output from DAA files</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements" />
<expand macro="stdio" />
<expand macro="version_command" />
<expand macro="requirements"/>
<expand macro="stdio"/>
<expand macro="version_command"/>
<command detect_errors="aggressive"><![CDATA[
## need to link because diamont tries to open dataset_xxx.dat.daa
ln -s '$daa' input.daa &&
Expand All @@ -16,36 +16,36 @@
@OUTPUT_ARGS@
@HITFILTER_ARGS@
$forwardonly
--compress '0'
--verbose
]]>
</command>
<inputs>
<param argument="--daa" type="data" format="daa" label="input file in DAA format" />
<param argument="--daa" type="data" format="daa" label="input file in DAA format"/>
<section name="output_section" title="Output options">
<expand macro="output_type_macro" />
<expand macro="output_type_macro"/>
</section>
<expand macro="hit_filter_macro" />
<param argument="--forwardonly" type="boolean" truevalue="--forwardonly" falsevalue="" checked="false" label="only show alignments of forward strand" help=""/>
<expand macro="hit_filter_macro"/>
<param argument="--forwardonly" type="boolean" truevalue="--forwardonly" falsevalue="" checked="false" label="only show alignments of forward strand"/>
</inputs>
<outputs>
<expand macro="output_macro" />
<expand macro="output_macro"/>
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="daa" ftype="daa" value="diamond_results.daa" />
<param name="daa" ftype="daa" value="diamond_results.daa"/>
<section name="output_section">
<conditional name="output">
<param name="outfmt" value="5"/>
</conditional>
</section>
<conditional name="hit_filter">
<param name="hit_filter_select" value="max"/>
<param name="max_target_seqs" value="1" />
<param name="max_target_seqs" value="1"/>
</conditional>
<output name="blast_tabular" file="diamond_results.xml"/>
</test>
<test expect_num_outputs="1">
<param name="daa" ftype="daa" value="diamond_results.daa" />
<param name="daa" ftype="daa" value="diamond_results.daa"/>
<section name="output_section">
<conditional name="output">
<param name="outfmt" value="6"/>
Expand All @@ -55,21 +55,20 @@
<output name="blast_tabular" file="diamond_view_results.tabular"/>
</test>
<test expect_num_outputs="1">
<param name="daa" ftype="daa" value="diamond_results.daa" />
<param name="daa" ftype="daa" value="diamond_results.daa"/>
<section name="output_section">
<conditional name="output">
<param name="outfmt" value="101"/>
</conditional>
</section>
<conditional name="hit_filter">
<param name="hit_filter_select" value="top"/>
<param name="max_target_seqs" value="1" />
<param name="max_target_seqs" value="1"/>
</conditional>
<param name="forwardonly" value="--forwardonly" />
<param name="forwardonly" value="--forwardonly"/>
<output name="blast_tabular" file="diamond_results.sam" lines_diff="2"/>
</test>
</tests>

<help>
<![CDATA[

Expand Down Expand Up @@ -103,5 +102,5 @@ Column Description
12 Bit score
]]>
</help>
<expand macro="citations" />
<expand macro="citations"/>
</tool>
67 changes: 25 additions & 42 deletions tools/diamond/macros.xml
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
<macros>
<token name="@TOOL_VERSION@">2.0.15</token>
<token name="@TOOL_VERSION@">2.1.9</token>
<token name="@VERSION_SUFFIX@">0</token>
<xml name="requirements">
<requirements>
<requirement type="package" version="@TOOL_VERSION@">diamond</requirement>
<requirement type="package" version="@TOOL_VERSION@">diamond</requirement>
</requirements>
</xml>

<xml name="stdio">
<stdio>
<regex match="Failed to allocate" source="stderr" level="fatal_oom" />
<regex match="Failed to allocate" source="stderr" level="fatal_oom"/>
</stdio>
</xml>

<xml name="version_command">
<version_command>diamond version | cut -d" " -f 3</version_command>
</xml>

<xml name="output_type_macro">
<conditional name="output">
<param argument="--outfmt" type="select" label="Format of output file" help="">
Expand All @@ -26,6 +23,7 @@
<option value="100">DAA</option>
<option value="101">SAM</option>
<option value="102">Taxonomic classification</option>
<option value="104">JSON (flat)</option>
</param>
<when value="0"/>
<when value="5"/>
Expand Down Expand Up @@ -69,78 +67,68 @@
<option value="cigar">Cigar</option>
<yield/>
</param>
<param argument="--unal" type="boolean" label="Report unaligned queries" truevalue="1" falsevalue="0" checked="false"/>
</when>
<when value="100">
<param argument="--salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="true" label="Include full subject titles in DAA file?" help=""/>
<param argument="--sallseqid" type="boolean" truevalue="--sallseqid" falsevalue="" checked="true" label="Include all subject ids in DAA file?" help=""/>
</when>
<when value="101">
<param argument="--salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="true" label="Include full subject titles in DAA file?" help=""/>
<param argument="--sallseqid" type="boolean" truevalue="--sallseqid" falsevalue="" checked="true" label="Include all subject ids in DAA file?" help=""/>
</when>
<when value="102"/>
<when value="102">
<param argument="--include-lineage" type="boolean" truevalue="--include-lineage" falsevalue="" checked="false" label="Include lineage in the taxonomic classification format"/>
</when>
<when value="104"/>
</conditional>
</xml>

<xml name="hit_filter_macro">
<conditional name="hit_filter">
<param name="hit_filter_select" type="select" label="Method to restrict the number of hits?">
<option value="max">Maximum number of target sequences</option>
<option value="top">Percentage of top alignment score</option>
</param>
<when value="max">
<param name="max_target_seqs" argument="--max-target-seqs" type="integer" value="25" label="The maximum number of target sequences per query to report alignments for"
help="Setting this to 0 will report all alignments that were found." />
<param argument="--max-target-seqs" type="integer" value="25" label="The maximum number of target sequences per query to report alignments for" help="Setting this to 0 will report all alignments that were found."/>
</when>
<when value="top">
<param argument="--top" type="integer" value="0" label="Keep alignments within the given percentage range of the top alignment score for a query"
help="For example, setting this to 10 will report all alignments whose score is at most 10% lower than the best alignment score for a query." />
<param argument="--top" type="integer" value="0" label="Keep alignments within the given percentage range of the top alignment score for a query" help="For example, setting this to 10 will report all alignments whose score is at most 10% lower than the best alignment score for a query."/>
</when>
</conditional>
</xml>

<xml name="block_size_low_sens">
<param argument="--block-size" type="float" value="2" label="Block size in billions of sequence letters to be processed at a time"
help="This is the main parameter for controlling the program’s memory and disk space usage. Bigger numbers will increase the use of memory and temporary
disk space, but also improve performance" />
<param argument="--block-size" type="float" value="2" label="Block size in billions of sequence letters to be processed at a time"
help="This is the main parameter for controlling the program’s memory and disk space usage. Bigger numbers will increase the use of memory and temporary disk space, but also improve performance"/>
</xml>

<xml name="block_size_hi_sens">
<param argument="--block-size" type="float" value="0.4" label="Block size in billions of sequence letters to be processed at a time"
help="This is the main parameter for controlling the program’s memory and disk space usage. Bigger numbers will increase the use of memory and temporary
disk space, but also improve performance" />
<param argument="--block-size" type="float" value="0.4" label="Block size in billions of sequence letters to be processed at a time"
help="This is the main parameter for controlling the program’s memory and disk space usage. Bigger numbers will increase the use of memory and temporary disk space, but also improve performance"/>
</xml>

<xml name="citations">
<citations>
<citation type="doi">10.1038/nmeth.3176</citation>
<citation type="doi">10.1038/s41592-021-01101-x</citation>
</citations>
</xml>


<xml name="output_macro">
<data format="txt" name="blast_pairw" label="${tool.name} on ${on_string}">
<data format="txt" name="blast_pairw" label="${tool.name} on ${on_string}: Blast pairwise">
<filter>output_section["output"]["outfmt"] == "0"</filter>
</data>
<data format="xml" name="blast_xml" label="${tool.name} on ${on_string}">
<data format="xml" name="blast_xml" label="${tool.name} on ${on_string}: Blast XML">
<filter>output_section["output"]["outfmt"] == "5"</filter>
</data>
<data format="tabular" name="blast_tabular" label="${tool.name} on ${on_string}">
<data format="tabular" name="blast_tabular" label="${tool.name} on ${on_string}: Blast Tabular">
<filter>output_section["output"]["outfmt"] == "6"</filter>
</data>
<!-- for daa diamond appends the .daa extension -> hence from_work_dir -->
<data format="daa" name="daa_output" label="${tool.name} on ${on_string}" from_work_dir="output.daa">
<data format="daa" name="daa_output" label="${tool.name} on ${on_string}: DAA" from_work_dir="output.daa">
<filter>output_section["output"]["outfmt"] == "100"</filter>
</data>
<data format="sam" name="sam_output" label="${tool.name} on ${on_string}">
<data format="sam" name="sam_output" label="${tool.name} on ${on_string}: SAM">
<filter>output_section["output"]["outfmt"] == "101"</filter>
</data>
<data format="tabular" name="tax_output" label="${tool.name} on ${on_string}">
<data format="tabular" name="tax_output" label="${tool.name} on ${on_string}: Taxonomic classification">
<filter>output_section["output"]["outfmt"] == "102"</filter>
</data>
<data format="json" name="json_output" label="${tool.name} on ${on_string}: Json flat">
<filter>output_section["output"]["outfmt"] == "104"</filter>
</data>
</xml>

<token name="@OUTPUT_ARGS@">
#if $output_section.output.outfmt == "0"
--outfmt '0'
Expand All @@ -151,23 +139,18 @@
#else if $output_section.output.outfmt == "6"
--outfmt '6' #echo ' '.join(str($output_section.output.fields).split(','))
--out '$blast_tabular'
--unal $output_section.output.unal
#else if $output_section.output.outfmt == "100"
--outfmt '100'
$output_section.output.salltitles
$output_section.output.sallseqid
--out output.daa
#else if $output_section.output.outfmt == "101"
--outfmt '101'
$output_section.output.salltitles
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you checked where those parameters have been moved to? I remember there are still in Diamond, but maybe in another section or another tool?

$output_section.output.sallseqid
--out '$sam_output'
#else if $output_section.output.outfmt == "102"
--outfmt '102'
--out '$tax_output'
$output_section.output.include_lineage
#end if
</token>

<token name="@HITFILTER_ARGS@">
#if str($hit_filter.hit_filter_select) == 'max':
--max-target-seqs '$hit_filter.max_target_seqs'
Expand Down
Binary file modified tools/diamond/test-data/diamond_results.daa
Binary file not shown.
1 change: 0 additions & 1 deletion tools/diamond/test-data/diamond_results.tabular
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 283 1 284 1.44e-205 550 100 0 0 0
sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 283 1 284 5.77e-150 409 100 0 0 0
shuffled * -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 * * *
2 changes: 2 additions & 0 deletions tools/diamond/test-data/diamond_results_soft_masking.tabular
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 849 1 284 1.44e-205 550
sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 849 1 284 5.77e-150 409
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 849 1 284 1.44e-205 550
sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 849 1 284 5.77e-150 409
2 changes: 2 additions & 0 deletions tools/diamond/test-data/diamond_results_swipe.tabular
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 849 1 284 1.44e-205 550
sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 849 1 284 5.77e-150 409
Loading