Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MAINT: move partitioning and collating actions to q2-types #175

Merged
merged 12 commits into from
Sep 20, 2024
5 changes: 2 additions & 3 deletions q2_moshpit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from . import abundance
from . import busco
from . import eggnog
from . import partition
from . import prodigal
from ._version import get_versions
from .dereplication import dereplicate_mags
Expand All @@ -31,8 +30,8 @@
__all__ = [
'metabat2', 'bracken', 'kraken_class', 'kraken_db',
'kaiju_class', 'kaiju_db', 'dereplicate_mags', 'eggnog',
'busco', 'prodigal', 'kraken_helpers', 'partition',
'filter_derep_mags', 'filter_mags', 'get_feature_lengths',
'busco', 'prodigal', 'kraken_helpers', 'filter_derep_mags',
'filter_mags', 'get_feature_lengths',
'multiply_tables', '_multiply_tables', '_multiply_tables_pa',
'_multiply_tables_relative', 'abundance', 'filter_reads_pangenome'
]
4 changes: 3 additions & 1 deletion q2_moshpit/busco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@

from .busco import evaluate_busco, _evaluate_busco, _visualize_busco
from .database import fetch_busco_db
from .partition import collate_busco_results

__all__ = [
"evaluate_busco", "_evaluate_busco", "_visualize_busco", "fetch_busco_db"
"evaluate_busco", "_evaluate_busco", "_visualize_busco", "fetch_busco_db",
"collate_busco_results"
]
2 changes: 1 addition & 1 deletion q2_moshpit/busco/busco.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def evaluate_busco(
partition_action = "partition_sample_data_mags"
else:
partition_action = "partition_feature_data_mags"
partition_mags = ctx.get_action("moshpit", partition_action)
partition_mags = ctx.get_action("types", partition_action)

(partitioned_mags, ) = partition_mags(bins, num_partitions)
results = []
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
mag_id sample_id input_file dataset complete single duplicated fragmented missing n_markers scaffold_n50 contigs_n50 percent_gaps scaffolds length
bec9c09a-62c3-4fbb-8f7f-9fdf9aefc02f SRR13221817 bec9c09a-62c3-4fbb-8f7f-9fdf9aefc02f.fasta bacteria_odb10 28.2 27.4 0.8 8.9 62.9 124 4785 4785 0.000% 265 1219165
5978e667-0476-4921-8cc2-34b9d1b508c1 SRR13221817 5978e667-0476-4921-8cc2-34b9d1b508c1.fasta bacteria_odb10 1.6 1.6 0.0 1.6 96.8 124 3548 3548 0.000% 67 245922
625c95e6-ac2f-4e6e-9470-af8cd11c75dd SRR13221817 625c95e6-ac2f-4e6e-9470-af8cd11c75dd.fasta bacteria_odb10 26.6 26.6 0.0 3.2 70.2 124 78679 78679 0.000% 17 714893
6ed8c097-1c87-4019-8b38-b95507011b41 SRR14143412 6ed8c097-1c87-4019-8b38-b95507011b41.fasta bacteria_odb10 8.1 8.1 0.0 0.8 91.1 124 74198 74198 0.000% 11 560715
bf2c0af0-83ba-44a6-b550-3b7884a62a82 SRR14143412 bf2c0af0-83ba-44a6-b550-3b7884a62a82.fasta bacteria_odb10 95.9 93.5 2.4 2.4 1.7 124 80820 80820 0.000% 92 4253319
a2401d15-802f-42c3-9eb4-c282e2141b14 SRR14143412 a2401d15-802f-42c3-9eb4-c282e2141b14.fasta bacteria_odb10 89.5 89.5 0.0 1.6 8.9 124 31708 31708 0.000% 106 2120157
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
mag_id sample_id input_file dataset complete single duplicated fragmented missing n_markers scaffold_n50 contigs_n50 percent_gaps scaffolds length
bec9c09a-62c3-4fbb-8f7f-9fdf9aefc02f SRR13221817 bec9c09a-62c3-4fbb-8f7f-9fdf9aefc02f.fasta bacteria_odb10 28.2 27.4 0.8 8.9 62.9 124 4785 4785 0.000% 265 1219165
5978e667-0476-4921-8cc2-34b9d1b508c1 SRR13221817 5978e667-0476-4921-8cc2-34b9d1b508c1.fasta bacteria_odb10 1.6 1.6 0.0 1.6 96.8 124 3548 3548 0.000% 67 245922
625c95e6-ac2f-4e6e-9470-af8cd11c75dd SRR13221817 625c95e6-ac2f-4e6e-9470-af8cd11c75dd.fasta bacteria_odb10 26.6 26.6 0.0 3.2 70.2 124 78679 78679 0.000% 17 714893
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
mag_id sample_id input_file dataset complete single duplicated fragmented missing n_markers scaffold_n50 contigs_n50 percent_gaps scaffolds length
6ed8c097-1c87-4019-8b38-b95507011b41 SRR14143412 6ed8c097-1c87-4019-8b38-b95507011b41.fasta bacteria_odb10 8.1 8.1 0.0 0.8 91.1 124 74198 74198 0.000% 11 560715
bf2c0af0-83ba-44a6-b550-3b7884a62a82 SRR14143412 bf2c0af0-83ba-44a6-b550-3b7884a62a82.fasta bacteria_odb10 95.9 93.5 2.4 2.4 1.7 124 80820 80820 0.000% 92 4253319
a2401d15-802f-42c3-9eb4-c282e2141b14 SRR14143412 a2401d15-802f-42c3-9eb4-c282e2141b14.fasta bacteria_odb10 89.5 89.5 0.0 1.6 8.9 124 31708 31708 0.000% 106 2120157
36 changes: 36 additions & 0 deletions q2_moshpit/busco/tests/test_partition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2022-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import os

import pandas as pd
from qiime2.plugin.testing import TestPluginBase

from q2_moshpit.busco.partition import collate_busco_results
from q2_moshpit.busco.types import BUSCOResultsDirectoryFormat


class TestBUSCOPlots(TestPluginBase):
package = "q2_moshpit.busco.tests"

def test_collate_busco_results(self):
p1 = self.get_data_path("busco_results/sample1")
p2 = self.get_data_path("busco_results/sample2")

busco_results = [
BUSCOResultsDirectoryFormat(p1, mode="r"),
BUSCOResultsDirectoryFormat(p2, mode="r")
]

collated_busco_result = collate_busco_results(busco_results)

obs = pd.read_csv(
os.path.join(str(collated_busco_result), "busco_results.tsv"))
exp = pd.read_csv(
self.get_data_path("busco_results/collated/busco_results.tsv"))

pd.testing.assert_frame_equal(obs, exp)
6 changes: 4 additions & 2 deletions q2_moshpit/eggnog/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,12 @@ def eggnog_annotate(
num_partitions=None
):
_eggnog_annotate = ctx.get_action("moshpit", "_eggnog_annotate")
collate_annotations = ctx.get_action("moshpit", "collate_annotations")
collate_annotations = ctx.get_action(
"types", "collate_ortholog_annotations"
)

if eggnog_hits.type <= SampleData[Orthologs]:
partition_method = ctx.get_action("moshpit", "partition_orthologs")
partition_method = ctx.get_action("types", "partition_orthologs")
else:
raise NotImplementedError()

Expand Down
6 changes: 3 additions & 3 deletions q2_moshpit/eggnog/orthologs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,17 @@ def _run_eggnog_search_pipeline(
- collated_tables: The collated feature tables.
"""
if sequences.type <= FeatureData[MAG]:
plugin, action_name = "moshpit", "partition_feature_data_mags"
plugin, action_name = "types", "partition_feature_data_mags"
elif sequences.type <= SampleData[Contigs]:
plugin, action_name = "assembly", "partition_contigs"
elif sequences.type <= SampleData[MAGs]:
plugin, action_name = "moshpit", "partition_sample_data_mags"
plugin, action_name = "types", "partition_sample_data_mags"
else:
raise NotImplementedError()

partition_method = ctx.get_action(plugin, action_name)
_eggnog_search = ctx.get_action("moshpit", search_action)
collate_hits = ctx.get_action("moshpit", "collate_orthologs")
collate_hits = ctx.get_action("types", "collate_orthologs")
_eggnog_feature_table = ctx.get_action("moshpit", "_eggnog_feature_table")
(partitioned_sequences,) = partition_method(sequences, num_partitions)

Expand Down
2 changes: 1 addition & 1 deletion q2_moshpit/kraken2/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def classify_kraken2(
partition_method = ctx.get_action("assembly", "partition_contigs")
elif seqs.type <= SampleData[MAGs]:
partition_method = ctx.get_action(
"moshpit", "partition_sample_data_mags"
"types", "partition_sample_data_mags"
)
# FeatureData[MAG] is not parallelized
elif seqs.type <= FeatureData[MAG]:
Expand Down
24 changes: 0 additions & 24 deletions q2_moshpit/partition/__init__.py

This file was deleted.

23 changes: 0 additions & 23 deletions q2_moshpit/partition/annotations.py

This file was deleted.

141 changes: 0 additions & 141 deletions q2_moshpit/partition/mags.py

This file was deleted.

Loading
Loading