galaxyproject · SaimMomin12 · Feb 27, 2023 · Mar 6, 2023 · Mar 20, 2023 · Jun 5, 2023
diff --git a/tools/diamond/diamond.xml b/tools/diamond/diamond.xml
diff --git a/tools/diamond/diamond_makedb.xml b/tools/diamond/diamond_makedb.xml
@@ -1,18 +1,16 @@
-<tool id="bg_diamond_makedb" name="Diamond makedb" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01">
+<tool id="bg_diamond_makedb" name="Diamond makedb" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
  <description>Build database from a FASTA file</description>
  <macros>
  <import>macros.xml</import>
  </macros>
-
- <expand macro="requirements" />
- <expand macro="stdio" />
- <expand macro="version_command" />
-
+ <expand macro="requirements"/>
+ <expand macro="stdio"/>
+ <expand macro="version_command"/>
  <command detect_errors="aggressive">
- <!-- DB has two files, *.dmnd and *.tx -->
+  <!-- DB has two files, *.dmnd and *.tx -->
  <![CDATA[
  diamond makedb
- --threads "\${GALAXY_SLOTS:-12}"
+ --threads \${GALAXY_SLOTS:-12}
  --in '$infile'
  --db ./database
 
@@ -23,30 +21,24 @@
  #end if
  ]]>
  </command>
-
  <inputs>
- <param name="infile" type="data" format="fasta" label="Input reference file in FASTA format" />
- <conditional name="tax_cond">
- <param name="tax_select" type="select" label="Add taxonomic data?" help="Needs to be supplied in order to provide taxonomy features of the aligner">
- <option value="yes">Yes</option>
- <option value="no" selected="true">No</option>
- </param>
- <when value="yes">
- <param argument="--taxonmap" type="data" format="tabular" 
- label="Protein accession to taxid mapping file" 
- help="Path to mapping file that maps NCBI protein accession numbers to taxon ids (gzip compressed). This parameter is optional and needs to be supplied in order to provide taxonomy features. 
- A custom file following the same format may be supplied here. Note that the first line of this file is assumed to contain headings and will be ignored" />
- <param argument="--taxonnodes" type="data" format="tabular" label="Taxonomy nodes.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features" />
- <param argument="--taxonnames" type="data" format="tabular" label="Taxonomy names.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features" />
- </when>
- <when value="no"/>
- </conditional>
+ <param name="infile" type="data" format="fasta" label="Input reference file in FASTA format"/>
+ <conditional name="tax_cond">
+ <param name="tax_select" type="select" label="Add taxonomic data?" help="Needs to be supplied in order to provide taxonomy features of the aligner">
+ <option value="yes">Yes</option>
+ <option value="no" selected="true">No</option>
+ </param>
+ <when value="yes">
+ <param argument="--taxonmap" type="data" format="tabular" label="Protein accession to taxid mapping file" help="Path to mapping file that maps NCBI protein accession numbers to taxon ids (gzip compressed). This parameter is optional and needs to be supplied in order to provide taxonomy features. A custom file following the same format may be supplied here. Note that the first line of this file is assumed to contain headings and will be ignored"/>
+ <param argument="--taxonnodes" type="data" format="tabular" label="Taxonomy nodes.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features"/>
+ <param argument="--taxonnames" type="data" format="tabular" label="Taxonomy names.dmp from NCBI" help="This parameter is optional and needs to be supplied in order to provide taxonomy features"/>
+ </when>
+ <when value="no"/>
+ </conditional>
  </inputs>
-
  <outputs>
  <data format="dmnd" name="outfile" from_work_dir="database.dmnd" label="${tool.name} on ${on_string}"/>
  </outputs>
-
  <tests>
  <test>
  <param name="infile" value="db.fasta" ftype="fasta"/>
@@ -56,14 +48,13 @@
  <param name="infile" value="db.fasta" ftype="fasta"/>
  <conditional name="tax_cond">
  <param name="tax_select" value="yes"/>
- <param name="taxonmap" ftype="tabular" value="prot.accession2taxid" />
- <param name="taxonnodes" ftype="tabular" value="nodes.dmp" />
- <param name="taxonnames" ftype="tabular" value="names.dmp" />
+ <param name="taxonmap" ftype="tabular" value="prot.accession2taxid"/>
+ <param name="taxonnodes" ftype="tabular" value="nodes.dmp"/>
+ <param name="taxonnames" ftype="tabular" value="names.dmp"/>
  </conditional>
  <output name="outfile" value="db-wtax.dmnd" compare="sim_size" delta="2"/>
  </test>
  </tests>
-
  <help>
 <![CDATA[
 
@@ -86,6 +77,5 @@ times faster than BLASTX, finding more than 94% of all matches.
 - taxonnodes: Path to the nodes.dmp file from the NCBI taxonomy. This parameter is optional and needs to be supplied in order to provide taxonomy features. The file is contained within this archive downloadable at NCBI: ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip
 ]]>
  </help>
-
- <expand macro="citations" />
+ <expand macro="citations"/>
 </tool>
diff --git a/tools/diamond/diamond_view.xml b/tools/diamond/diamond_view.xml
@@ -1,11 +1,11 @@
-<tool id="bg_diamond_view" name="Diamond view" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01">
+<tool id="bg_diamond_view" name="Diamond view" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
  <description>generate formatted output from DAA files</description>
  <macros>
  <import>macros.xml</import>
  </macros>
- <expand macro="requirements" />
- <expand macro="stdio" />
- <expand macro="version_command" />
+ <expand macro="requirements"/>
+ <expand macro="stdio"/>
+ <expand macro="version_command"/>
  <command detect_errors="aggressive"><![CDATA[
  ## need to link because diamont tries to open dataset_xxx.dat.daa
  ln -s '$daa' input.daa &&
@@ -16,36 +16,36 @@
  @OUTPUT_ARGS@
  @HITFILTER_ARGS@
  $forwardonly
- --compress '0'
+ --verbose
  ]]>
  </command>
  <inputs>
- <param argument="--daa" type="data" format="daa" label="input file in DAA format" />
+ <param argument="--daa" type="data" format="daa" label="input file in DAA format"/>
  <section name="output_section" title="Output options">
- <expand macro="output_type_macro" />
+ <expand macro="output_type_macro"/>
  </section>
- <expand macro="hit_filter_macro" />
- <param argument="--forwardonly" type="boolean" truevalue="--forwardonly" falsevalue="" checked="false" label="only show alignments of forward strand" help=""/>
+ <expand macro="hit_filter_macro"/>
+ <param argument="--forwardonly" type="boolean" truevalue="--forwardonly" falsevalue="" checked="false" label="only show alignments of forward strand"/>
  </inputs>
  <outputs>
- <expand macro="output_macro" />
+ <expand macro="output_macro"/>
  </outputs>
  <tests>
  <test expect_num_outputs="1">
- <param name="daa" ftype="daa" value="diamond_results.daa" />
+ <param name="daa" ftype="daa" value="diamond_results.daa"/>
  <section name="output_section">
  <conditional name="output">
  <param name="outfmt" value="5"/>
  </conditional>
  </section>
  <conditional name="hit_filter">
  <param name="hit_filter_select" value="max"/>
- <param name="max_target_seqs" value="1" />
+ <param name="max_target_seqs" value="1"/>
  </conditional>
  <output name="blast_tabular" file="diamond_results.xml"/>
  </test>
  <test expect_num_outputs="1">
- <param name="daa" ftype="daa" value="diamond_results.daa" />
+ <param name="daa" ftype="daa" value="diamond_results.daa"/>
  <section name="output_section">
  <conditional name="output">
  <param name="outfmt" value="6"/>
@@ -55,21 +55,20 @@
  <output name="blast_tabular" file="diamond_view_results.tabular"/>
  </test>
  <test expect_num_outputs="1">
- <param name="daa" ftype="daa" value="diamond_results.daa" />
+ <param name="daa" ftype="daa" value="diamond_results.daa"/>
  <section name="output_section">
  <conditional name="output">
  <param name="outfmt" value="101"/>
  </conditional>
  </section>
  <conditional name="hit_filter">
  <param name="hit_filter_select" value="top"/>
- <param name="max_target_seqs" value="1" />
+ <param name="max_target_seqs" value="1"/>
  </conditional>
- <param name="forwardonly" value="--forwardonly" />
+ <param name="forwardonly" value="--forwardonly"/>
  <output name="blast_tabular" file="diamond_results.sam" lines_diff="2"/>
  </test>
  </tests>
-
  <help>
 <![CDATA[
 
@@ -103,5 +102,5 @@ Column Description
 12 Bit score
 ]]>
  </help>
- <expand macro="citations" />
+ <expand macro="citations"/>
 </tool>
diff --git a/tools/diamond/macros.xml b/tools/diamond/macros.xml
@@ -1,22 +1,19 @@
 <macros>
- <token name="@TOOL_VERSION@">2.0.15</token>
+ <token name="@TOOL_VERSION@">2.1.9</token>
  <token name="@VERSION_SUFFIX@">0</token>
  <xml name="requirements">
  <requirements>
- <requirement type="package" version="@TOOL_VERSION@">diamond</requirement>
+  <requirement type="package" version="@TOOL_VERSION@">diamond</requirement>
  </requirements>
  </xml>
-
  <xml name="stdio">
  <stdio>
- <regex match="Failed to allocate" source="stderr" level="fatal_oom" />
+ <regex match="Failed to allocate" source="stderr" level="fatal_oom"/>
  </stdio>
  </xml>
-
  <xml name="version_command">
  <version_command>diamond version | cut -d" " -f 3</version_command>
  </xml>
-
  <xml name="output_type_macro">
  <conditional name="output">
  <param argument="--outfmt" type="select" label="Format of output file" help="">
@@ -26,6 +23,7 @@
  <option value="100">DAA</option>
  <option value="101">SAM</option>
  <option value="102">Taxonomic classification</option>
+ <option value="104">JSON (flat)</option>
  </param>
  <when value="0"/>
  <when value="5"/>
@@ -69,78 +67,68 @@
  <option value="cigar">Cigar</option>
  <yield/>
  </param>
- <param argument="--unal" type="boolean" label="Report unaligned queries" truevalue="1" falsevalue="0" checked="false"/>
  </when>
  <when value="100">
- <param argument="--salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="true" label="Include full subject titles in DAA file?" help=""/>
- <param argument="--sallseqid" type="boolean" truevalue="--sallseqid" falsevalue="" checked="true" label="Include all subject ids in DAA file?" help=""/>
  </when>
  <when value="101">
- <param argument="--salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="true" label="Include full subject titles in DAA file?" help=""/>
- <param argument="--sallseqid" type="boolean" truevalue="--sallseqid" falsevalue="" checked="true" label="Include all subject ids in DAA file?" help=""/>
  </when>
- <when value="102"/>
+ <when value="102">
+ <param argument="--include-lineage" type="boolean" truevalue="--include-lineage" falsevalue="" checked="false" label="Include lineage in the taxonomic classification format"/>
+ </when>
+ <when value="104"/>
  </conditional>
  </xml>
-
  <xml name="hit_filter_macro">
  <conditional name="hit_filter">
  <param name="hit_filter_select" type="select" label="Method to restrict the number of hits?">
  <option value="max">Maximum number of target sequences</option>
  <option value="top">Percentage of top alignment score</option>
  </param>
  <when value="max">
- <param name="max_target_seqs" argument="--max-target-seqs" type="integer" value="25" label="The maximum number of target sequences per query to report alignments for" 
- help="Setting this to 0 will report all alignments that were found." />
+ <param argument="--max-target-seqs" type="integer" value="25" label="The maximum number of target sequences per query to report alignments for" help="Setting this to 0 will report all alignments that were found."/>
  </when>
  <when value="top">
- <param argument="--top" type="integer" value="0" label="Keep alignments within the given percentage range of the top alignment score for a query" 
- help="For example, setting this to 10 will report all alignments whose score is at most 10% lower than the best alignment score for a query." />
+ <param argument="--top" type="integer" value="0" label="Keep alignments within the given percentage range of the top alignment score for a query" help="For example, setting this to 10 will report all alignments whose score is at most 10% lower than the best alignment score for a query."/>
  </when>
  </conditional>
  </xml>
-
  <xml name="block_size_low_sens">
- <param argument="--block-size" type="float" value="2" label="Block size in billions of sequence letters to be processed at a time" 
- help="This is the main parameter for controlling the program’s memory and disk space usage. Bigger numbers will increase the use of memory and temporary 
- disk space, but also improve performance" />
+ <param argument="--block-size" type="float" value="2" label="Block size in billions of sequence letters to be processed at a time"
+ help="This is the main parameter for controlling the program’s memory and disk space usage. Bigger numbers will increase the use of memory and temporary disk space, but also improve performance"/>
  </xml>
-
  <xml name="block_size_hi_sens">
- <param argument="--block-size" type="float" value="0.4" label="Block size in billions of sequence letters to be processed at a time" 
- help="This is the main parameter for controlling the program’s memory and disk space usage. Bigger numbers will increase the use of memory and temporary 
- disk space, but also improve performance" />
+ <param argument="--block-size" type="float" value="0.4" label="Block size in billions of sequence letters to be processed at a time"
+ help="This is the main parameter for controlling the program’s memory and disk space usage. Bigger numbers will increase the use of memory and temporary disk space, but also improve performance"/>
  </xml>
-
  <xml name="citations">
  <citations>
- <citation type="doi">10.1038/nmeth.3176</citation>
+ <citation type="doi">10.1038/s41592-021-01101-x</citation>
  </citations>
  </xml>
-
-
  <xml name="output_macro">
- <data format="txt" name="blast_pairw" label="${tool.name} on ${on_string}">
+ <data format="txt" name="blast_pairw" label="${tool.name} on ${on_string}: Blast pairwise">
  <filter>output_section["output"]["outfmt"] == "0"</filter>
  </data>
- <data format="xml" name="blast_xml" label="${tool.name} on ${on_string}">
+ <data format="xml" name="blast_xml" label="${tool.name} on ${on_string}: Blast XML">
  <filter>output_section["output"]["outfmt"] == "5"</filter>
  </data>
- <data format="tabular" name="blast_tabular" label="${tool.name} on ${on_string}">
+ <data format="tabular" name="blast_tabular" label="${tool.name} on ${on_string}: Blast Tabular">
  <filter>output_section["output"]["outfmt"] == "6"</filter>
  </data>
  <!-- for daa diamond appends the .daa extension -> hence from_work_dir -->
- <data format="daa" name="daa_output" label="${tool.name} on ${on_string}" from_work_dir="output.daa">
+ <data format="daa" name="daa_output" label="${tool.name} on ${on_string}: DAA" from_work_dir="output.daa">
  <filter>output_section["output"]["outfmt"] == "100"</filter>
  </data>
- <data format="sam" name="sam_output" label="${tool.name} on ${on_string}">
+ <data format="sam" name="sam_output" label="${tool.name} on ${on_string}: SAM">
  <filter>output_section["output"]["outfmt"] == "101"</filter>
  </data>
- <data format="tabular" name="tax_output" label="${tool.name} on ${on_string}">
+ <data format="tabular" name="tax_output" label="${tool.name} on ${on_string}: Taxonomic classification">
  <filter>output_section["output"]["outfmt"] == "102"</filter>
  </data>
+ <data format="json" name="json_output" label="${tool.name} on ${on_string}: Json flat">
+ <filter>output_section["output"]["outfmt"] == "104"</filter>
+ </data>
  </xml>
-
  <token name="@OUTPUT_ARGS@">
  #if $output_section.output.outfmt == "0"
  --outfmt '0'
@@ -151,23 +139,18 @@
  #else if $output_section.output.outfmt == "6"
  --outfmt '6' #echo ' '.join(str($output_section.output.fields).split(','))
  --out '$blast_tabular'
- --unal $output_section.output.unal
  #else if $output_section.output.outfmt == "100"
  --outfmt '100'
- $output_section.output.salltitles
- $output_section.output.sallseqid
  --out output.daa
  #else if $output_section.output.outfmt == "101"
  --outfmt '101'
- $output_section.output.salltitles
- $output_section.output.sallseqid
  --out '$sam_output'
  #else if $output_section.output.outfmt == "102"
  --outfmt '102'
  --out '$tax_output'
+ $output_section.output.include_lineage
  #end if
  </token>
-
  <token name="@HITFILTER_ARGS@">
  #if str($hit_filter.hit_filter_select) == 'max':
  --max-target-seqs '$hit_filter.max_target_seqs'

diff --git a/tools/diamond/test-data/diamond_results.daa b/tools/diamond/test-data/diamond_results.daa
diff --git a/tools/diamond/test-data/diamond_results.tabular b/tools/diamond/test-data/diamond_results.tabular
@@ -1,3 +1,2 @@
 sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 283 1 284 1.44e-205 550 100 0 0 0
 sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 283 1 284 5.77e-150 409 100 0 0 0
-shuffled * -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 * * *
diff --git a/tools/diamond/test-data/diamond_results_soft_masking.tabular b/tools/diamond/test-data/diamond_results_soft_masking.tabular
@@ -0,0 +1,2 @@
+sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 849 1 284 1.44e-205 550
+sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 849 1 284 5.77e-150 409
diff --git a/tools/diamond/test-data/diamond_results_soft_masking_memory.tabular b/tools/diamond/test-data/diamond_results_soft_masking_memory.tabular
@@ -0,0 +1,2 @@
+sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 849 1 284 1.44e-205 550
+sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 849 1 284 5.77e-150 409
diff --git a/tools/diamond/test-data/diamond_results_swipe.tabular b/tools/diamond/test-data/diamond_results_swipe.tabular
@@ -0,0 +1,2 @@
+sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 849 1 284 1.44e-205 550
+sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 849 1 284 5.77e-150 409