diff --git a/ci/recipe/meta.yaml b/ci/recipe/meta.yaml index 20490046..9096e130 100644 --- a/ci/recipe/meta.yaml +++ b/ci/recipe/meta.yaml @@ -32,6 +32,7 @@ requirements: - ipywidgets - scikit-bio - scikit-learn + - psutil - biom-format >=2.1.5,<2.2.0 - unifrac >=0.9.2,<0.10.0 - qiime2 {{ release }}.* diff --git a/q2_diversity/__init__.py b/q2_diversity/__init__.py index 2515cc09..eaee8fc9 100644 --- a/q2_diversity/__init__.py +++ b/q2_diversity/__init__.py @@ -8,8 +8,9 @@ from ._alpha import (alpha, alpha_phylogenetic, alpha_group_significance, alpha_correlation, alpha_rarefaction) -from ._beta import (beta, beta_phylogenetic, bioenv, beta_group_significance, - beta_correlation, beta_rarefaction) +from ._beta import (beta, beta_phylogenetic, beta_phylogenetic_alt, bioenv, + beta_group_significance, beta_correlation, + beta_rarefaction) from ._ordination import pcoa from ._core_metrics import core_metrics_phylogenetic, core_metrics from ._filter import filter_distance_matrix @@ -19,8 +20,10 @@ __version__ = get_versions()['version'] del get_versions -__all__ = ['beta', 'beta_phylogenetic', 'alpha', 'alpha_phylogenetic', 'pcoa', - 'alpha_group_significance', 'bioenv', 'beta_group_significance', - 'alpha_correlation', 'core_metrics_phylogenetic', 'core_metrics', + +__all__ = ['beta', 'beta_phylogenetic', 'beta_phylogenetic_alt', 'alpha', + 'alpha_phylogenetic', 'pcoa', 'alpha_group_significance', 'bioenv', + 'beta_group_significance', 'alpha_correlation', + 'core_metrics_phylogenetic', 'core_metrics', 'filter_distance_matrix', 'beta_correlation', 'alpha_rarefaction', 'beta_rarefaction'] diff --git a/q2_diversity/_beta/__init__.py b/q2_diversity/_beta/__init__.py index 465fe317..134f2158 100644 --- a/q2_diversity/_beta/__init__.py +++ b/q2_diversity/_beta/__init__.py @@ -6,11 +6,13 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- -from ._method import (beta_phylogenetic, beta, phylogenetic_metrics, - non_phylogenetic_metrics, all_metrics) +from ._method import (beta_phylogenetic, beta_phylogenetic_alt, beta, + phylogenetic_metrics, non_phylogenetic_metrics, + all_metrics, phylogenetic_metrics_alt_dict) from ._visualizer import (bioenv, beta_group_significance, beta_correlation, beta_rarefaction) -__all__ = ['beta_phylogenetic', 'beta', 'bioenv', 'beta_group_significance', - 'phylogenetic_metrics', 'non_phylogenetic_metrics', 'all_metrics', - 'beta_correlation', 'beta_rarefaction'] +__all__ = ['beta_phylogenetic', 'beta_phylogenetic_alt', 'beta', 'bioenv', + 'beta_group_significance', 'phylogenetic_metrics', + 'non_phylogenetic_metrics', 'all_metrics', 'beta_correlation', + 'beta_rarefaction', 'phylogenetic_metrics_alt_dict'] diff --git a/q2_diversity/_beta/_method.py b/q2_diversity/_beta/_method.py index 7e616ce8..97293c5f 100644 --- a/q2_diversity/_beta/_method.py +++ b/q2_diversity/_beta/_method.py @@ -12,6 +12,13 @@ import skbio.diversity import skbio.tree import sklearn.metrics +import unifrac +import psutil + +from q2_types.feature_table import BIOMV210Format +from q2_types.tree import NewickFormat + +from functools import partial # We should consider moving these functions to scikit-bio. They're part of @@ -20,6 +27,13 @@ def phylogenetic_metrics(): return {'unweighted_unifrac', 'weighted_unifrac'} +def phylogenetic_metrics_alt_dict(): + return {'unweighted_unifrac': unifrac.unweighted, + 'weighted_unifrac': unifrac.weighted_unnormalized, + 'weighted_normalized_unifrac': unifrac.weighted_normalized, + 'generalized_unifrac': unifrac.generalized} + + def non_phylogenetic_metrics(): return {'cityblock', 'euclidean', 'seuclidean', 'sqeuclidean', 'cosine', 'correlation', 'hamming', 'jaccard', 'chebyshev', 'canberra', @@ -63,6 +77,39 @@ def beta_phylogenetic(table: biom.Table, phylogeny: skbio.TreeNode, return results +def beta_phylogenetic_alt(table: BIOMV210Format, phylogeny: NewickFormat, + metric: str, n_jobs: int=1, + variance_adjusted: bool=False, + alpha=None, + bypass_tips: bool=False) -> skbio.DistanceMatrix: + + metrics = phylogenetic_metrics_alt_dict() + generalized_unifrac = 'generalized_unifrac' + + if metric not in metrics: + raise ValueError("Unknown metric: %s" % metric) + + if alpha is not None and metric != generalized_unifrac: + raise ValueError('The alpha parameter is only allowed when the choice' + ' of metric is generalized_unifrac') + + # this behaviour is undefined, so let's avoid a seg fault + cpus = psutil.cpu_count(logical=False) + if n_jobs > cpus: + raise ValueError('The value of n_jobs cannot exceed the number of ' + 'processors (%d) available in this system.' % cpus) + + if metric == generalized_unifrac: + alpha = 1.0 if alpha is None else alpha + f = partial(metrics[metric], alpha=alpha) + else: + f = metrics[metric] + + # unifrac processes tables and trees should be filenames + return f(str(table), str(phylogeny), threads=n_jobs, + variance_adjusted=variance_adjusted, bypass_tips=bypass_tips) + + def beta(table: biom.Table, metric: str, n_jobs: int=1)-> skbio.DistanceMatrix: if metric not in non_phylogenetic_metrics(): raise ValueError("Unknown metric: %s" % metric) diff --git a/q2_diversity/plugin_setup.py b/q2_diversity/plugin_setup.py index 4918ce07..867f05cf 100644 --- a/q2_diversity/plugin_setup.py +++ b/q2_diversity/plugin_setup.py @@ -7,7 +7,7 @@ # ---------------------------------------------------------------------------- from qiime2.plugin import (Plugin, Str, Properties, MetadataCategory, Choices, - Metadata, Int, Bool, Range) + Metadata, Int, Bool, Range, Float) import q2_diversity from q2_diversity import _alpha as alpha @@ -38,7 +38,19 @@ 'and exploring community alpha and beta diversity through ' 'statistics and visualizations in the context of sample ' 'metadata.'), - short_description='Plugin for exploring community diversity.' + short_description='Plugin for exploring community diversity.', + citation_text=('Unweighted UniFrac: ' + 'Lozupone and Knight 2005 Appl Environ Microbiol; DOI: ' + '10.1128/AEM.71.12.8228-8235.2005.\n' + 'Weighted UniFrac: ' + 'Lozupone et al. 2007 Appl Environ Microbiol; DOI: ' + '10.1128/AEM.01996-06.\n' + 'Variance adjusted UniFrac: ' + 'Chang et al. BMC Bioinformatics 2011 ' + 'https://doi.org/10.1186/1471-2105-12-118.\n' + 'Generalized UniFrac: ' + 'Chen et al. 2012 Bioinformatics; DOI: ' + '10.1093/bioinformatics/bts342') ) plugin.methods.register_function( @@ -46,7 +58,7 @@ inputs={'table': FeatureTable[Frequency], 'phylogeny': Phylogeny[Rooted]}, parameters={'metric': Str % Choices(beta.phylogenetic_metrics()), - 'n_jobs': Int}, + 'n_jobs': Int % Range(1, None)}, outputs=[('distance_matrix', DistanceMatrix % Properties('phylogenetic'))], input_descriptions={ 'table': ('The feature table containing the samples over which beta ' @@ -68,6 +80,61 @@ " for all pairs of samples in a feature table.") ) + +plugin.methods.register_function( + function=q2_diversity.beta_phylogenetic_alt, + inputs={'table': FeatureTable[Frequency], + 'phylogeny': Phylogeny[Rooted]}, + parameters={'metric': Str % Choices(beta.phylogenetic_metrics_alt_dict()), + 'n_jobs': Int, + 'variance_adjusted': Bool, + 'alpha': Float % Range(0, 1, inclusive_end=True), + 'bypass_tips': Bool}, + outputs=[('distance_matrix', DistanceMatrix % Properties('phylogenetic'))], + input_descriptions={ + 'table': ('The feature table containing the samples over which beta ' + 'diversity should be computed.'), + 'phylogeny': ('Phylogenetic tree containing tip identifiers that ' + 'correspond to the feature identifiers in the table. ' + 'This tree can contain tip ids that are not present in ' + 'the table, but all feature ids in the table must be ' + 'present in this tree.') + }, + parameter_descriptions={ + 'metric': 'The beta diversity metric to be computed.', + 'n_jobs': 'The number of workers to use.', + 'variance_adjusted': ('Perform variance adjustment based on Chang et ' + 'al. BMC Bioinformatics 2011. Weights distances ' + 'based on the proportion of the relative ' + 'abundance represented between the samples at a' + ' given node under evaluation.'), + 'alpha': ('This parameter is only used when the choice of metric is ' + 'generalized_unifrac. The value of alpha controls importance' + ' of sample proportions. 1.0 is weighted normalized UniFrac.' + ' 0.0 is close to unweighted UniFrac, but only if the sample' + ' proportions are dichotomized.'), + 'bypass_tips': ('In a bifurcating tree, the tips make up about 50% of ' + 'the nodes in a tree. By ignoring them, specificity ' + 'can be traded for reduced compute time. This has the' + ' effect of collapsing the phylogeny, and is analogous' + ' (in concept) to moving from 99% to 97% OTUs') + }, + output_descriptions={'distance_matrix': 'The resulting distance matrix.'}, + name='Beta diversity (phylogenetic) - High Performance Computation', + description=("Computes a user-specified phylogenetic beta diversity metric" + " for all pairs of samples in a feature table. This " + "implementation is recommended for large datasets, otherwise " + "the results are identical to beta_phylogenetic.\n\n" + "This method is an implementation of the Strided State " + "UniFrac algorithm. Multiple variants of the UniFrac metric " + "are available, including Generalized UniFrac (Chen et al. " + "2012), Variance Adjusted UniFrac (Chang et al. 2011), " + "as well as Weighted normalized and unnormalized UniFrac " + "(Lozupone et al. 2007) and unweighted UniFrac " + "(Lozupone et al. 2005)") +) + + plugin.methods.register_function( function=q2_diversity.beta, inputs={'table': FeatureTable[Frequency]}, diff --git a/q2_diversity/tests/data/crawford.biom b/q2_diversity/tests/data/crawford.biom new file mode 100644 index 00000000..fa5bf1bc Binary files /dev/null and b/q2_diversity/tests/data/crawford.biom differ diff --git a/q2_diversity/tests/data/crawford.nwk b/q2_diversity/tests/data/crawford.nwk new file mode 100644 index 00000000..c70c30fb --- /dev/null +++ b/q2_diversity/tests/data/crawford.nwk @@ -0,0 +1 @@ +((169901:0.39755,(100344:0.37296,((1684221:0.33084,((1136443:0.00014,4374042:0.00204):0.2205,((((263705:0.02657,376397:0.05743):0.02493,263452:0.09328):0.09446,((((229459:0.04726,1571092:0.0371):0.03291,827195:0.03981):0.16617,(((162991:0.01263,318370:0.02928):0.00873,(267689:0.0087,335952:0.01692):0.00962):0.08285,276044:0.25511):0.02402)'c__Clostridia; o__Clostridiales':0.01046,(((186521:0.09255,(((195840:0.05097,181603:0.04737):0.0487,(269378:0.05555,(((215193:0.01971,199307:0.02787):0.03584,344527:0.06642):0.01912,187790:0.06549):0.05223):0.06458):0.01861,(267041:0.06668,441494:0.29606):0.00852):0.00686):0.01105,((274438:0.08266,((268121:0.04956,(194787:0.05527,(337331:0.0305,267411:0.12086):0.00656):0.01187):0.00293,181205:0.07086):0.00228):0.00953,(4418586:0.09245,(((((((4471135:0.04783,195445:0.06353):0.00973,(172705:0.09236,2575787:0.21784):0.01157):0.00173,(269576:0.04917,(((259593:0.02332,1105328:0.012):0.00102,208571:0.01117):0.0008,(182621:0.08672,271449:0.02772):0.00713):0.01471):0.01507):0.00147,181419:0.03032)'c__Clostridia; o__Clostridiales':0.00169,(((((((191958:0.01976,((316842:0.02735,194822:0.01128):0.00721,175432:0.01301):0.01834):0.00247,343906:0.02151):0.00921,3392842:0.00869):0.00315,(351881:0.04144,346098:0.02522):0.00695):0.048,(((((191077:0.05103,351794:0.07438):0.00467,(((4364243:0.04249,261419:0.04021):0.01097,231169:0.01264):0.0132,(403497:0.04479,(261334:0.01174,340189:0.05963):0.01024):0.00256):0.00753):0.00429,(293580:0.02795,3265889:0.05414):0.00656):0.00543,292745:0.04202):0.00014,(322062:0.01797,190273:0.01058):0.04098):0.00564):0.00451,(288931:0.03379,((553395:0.05483,302407:0.0199):0.01312,(833390:0.00627,(275707:0.02319,450047:0.00348):0.01342):0.0492):0.0115):0.01294)'c__Clostridia; o__Clostridiales; f__Lachnospiraceae':0.00448,((132114:0.18434,((274422:0.21213,(314810:0.04013,422727:0.06862):0.08472)'c__Clostridia; o__Clostridiales':0.00016,(((((((((190242:0.01884,(265940:0.00636,170950:0.02803):0.03275):0.05634,187133:0.06732):0.03409,4417539:0.16601):0.01312,(267452:0.01884,231021:0.02579):0.22655)'f__Ruminococcaceae; g__Ruminococcus':0.01162,((550807:0.14219,((((((((4484382:0.03105,(((276404:0.01013,(258725:0.01776,4403349:0.01479):0.01233):0.02161,321484:0.05463):0.00374,263546:0.05758):0.04859):0.00344,196194:0.03084):0.01292,269019:0.02212):0.00207,270385:0.07485):0.02724,((182033:0.00895,263165:0.00701):0.0503,(260753:0.01819,191816:0.05696):0.0122):0.05687):0.00674,270519:0.05105):0.022,((((262677:0.00993,272953:0.00844):0.01976,(187807:0.0179,276985:0.03212):0.00809):0.0029,263106:0.01522):0.06462,176118:0.04609):0.04356)'g__Oscillospira':0.02244,(170555:0.01542,263044:0.02129):0.19443):0.02295):0.02515,(301870:0.15284,(259212:0.12329,185222:0.23345):0.03418)'f__Clostridiaceae':0.02365):0.01303):0.00843,((732128:0.0582,166099:0.08664):0.00226,4480176:0.02225):0.21389):0.0112,((179188:0.04915,194662:0.09909):0.00318,261511:0.06272):0.01032)'c__Clostridia; o__Clostridiales':0.00284,(273084:0.05892,(((((260663:0.02738,(265786:0.02702,260397:0.00524):0.00102):0.00014,275627:0.06381):0.14991,4396877:0.05675):0.01023,828435:0.05156):0.08237,((45363:0.01629,259266:0.05658):0.01623,4407703:0.06932):0.13663)'c__Erysipelotrichi':0.11955):0.00127):0.01284,262409:0.18866):0.00839):0.01056):0.00805,((262766:0.04848,(180206:0.05998,(((262869:0.07152,260655:0.03801):0.00395,(((349142:0.02154,195385:0.00634):0.00159,((176858:0.01629,263362:0.01476):0.0004,233313:0.00963):0.00408):0.00147,(((178031:0.01612,(259263:0.0181,263946:0.02447):0.07026):0.00147,268416:0.00842):0.00532,(97294:0.02181,258969:0.03813):0.00502):0.0039):0.00851):0.01331,(((187703:0.03164,259056:0.01433):0.01444,191772:0.02529):0.01041,276260:0.03673):0.0168):0.02587):0.00758):0.00603,(((184151:0.03171,176039:0.02716):0.05897,310490:0.04338):0.00551,((333053:0.04164,((((((((331965:0.08704,261409:0.01737):0.01822,185777:0.01487):0.04786,(259434:0.02291,276531:0.02123):0.01824):0.00853,303479:0.02542):0.00508,263908:0.04835):0.01457,((258522:0.02949,274021:0.05646):0.00554,181344:0.04727):0.00566):0.00176,(272454:0.03068,176850:0.06171):0.01689):0.01607,275563:0.01872):0.00741):0.00923,318563:0.06893):0.01131):0.00257):0.0081):0.0007):0.0016):0.00336,((((((312476:0.02917,174272:0.04651):0.00785,(((268755:0.01177,268581:0.02385):0.00878,(179719:0.04377,264021:0.05912):0.00417):0.00394,(260058:0.03496,(167204:0.01011,829401:0.01565):0.03983):0.00244):0.00992):0.01245,((((((((178926:0.02199,196825:0.01879):0.00883,(443945:0.02387,310748:0.00905):0.03013):0.01777,260756:0.01673):0.00669,((((233817:0.00475,259910:0.01695):0.0001,197775:0.02372):0.00786,130335:0.03254):0.00866,4365109:0.0344):0.00354):0.00157,((334365:0.03359,271766:0.03304):0.00976,(206494:0.02039,181249:0.04899):0.01657):0.00257):0.00474,(((((265828:0.01611,216403:0.03131):0.00681,180919:0.07082):0.00659,((270662:0.04106,229386:0.01722):0.04132,(187989:0.01675,164308:0.02502):0.01745):0.01998):0.00272,((307595:0.04286,178779:0.01324):0.01155,259888:0.06879):0.0098):0.01198,173417:0.03133):0.0056):0.00441,(186526:0.01221,233411:0.0164):0.06098):0.00533,(301012:0.03892,(266483:0.02079,839215:0.0076):0.02063):0.00659):0.01471):0.00433,(((((((175416:0.06589,(((181141:0.02134,197216:0.04356):0.00062,(273515:0.06915,188536:0.03918):0.0068):0.009,697874:0.0303):0.0203):0.00368,(179181:0.08572,(268923:0.11712,351859:0.05093):0.03893):0.00565):0.00429,((((185743:0.07302,274844:0.06304):0.02997,(190460:0.01175,192971:0.02492):0.0557):0.00889,270396:0.13586):0.00157,336145:0.06697):0.00809):0.00759,(((274597:0.05276,((259228:0.09494,264373:0.04067):0.00411,(((343581:0.00883,266595:0.03526):0.00128,269359:0.02279):0.00928,275150:0.04595):0.01442):0.01243):0.00596,(197790:0.07695,((274106:0.03127,260828:0.02915):0.00728,((180105:0.00572,(196777:0.02608,179063:0.02119):0.02629):0.00667,177802:0.01713):0.01597):0.00679):0.0135):0.01445,((((((275078:0.05198,271378:0.04527):0.00987,276580:0.06941):0.00745,260653:0.01951):0.00577,(259335:0.00857,269992:0.01649):0.02089):0.00607,(261606:0.01776,((260205:0.01675,837473:0.0327):0.01931,259175:0.0661):0.01057):0.03771):0.01365,(272812:0.01332,(275136:0.0504,265641:0.04142):0.01641):0.05327):0.01447):0.01207):0.01462,(180362:0.0488,214471:0.03306):0.02282):0.00489,((276663:0.07249,(1108453:0.12347,267457:0.04345):0.01011):0.00558,(174959:0.05484,185754:0.01372):0.08833):0.00679):0.00361,(181091:0.05605,(319909:0.01099,4462541:0.01663):0.05346):0.0137):0.0035):0.00421,(((((291750:0.01414,178659:0.02705):0.00913,350381:0.03212):0.01138,(((((314963:0.02373,(234121:0.00158,274018:0.01522):0.01383):0.00336,((330296:0.00877,343420:0.00828):0.00199,267388:0.05277):0.00813):0.00348,((175573:0.00729,(((274521:0.02619,191887:0.02143):0.00412,2120775:0.03204):0.0148,436341:0.01208):0.02026):0.04471,(199181:0.05741,266816:0.03119):0.01288):0.0044):0.00294,((180972:0.00911,183211:0.01607):0.01057,227886:0.01216):0.00806):0.04236,(348398:0.00318,270491:0.02772):0.02149):0.016):0.00697,(327236:0.072,182016:0.06278):0.02379):0.00015,(258250:0.03667,(275819:0.02306,275470:0.00947):0.0143):0.01639):0.00479):0.00187,((320490:0.04718,311174:0.01972):0.00144,(193463:0.0092,191398:0.04605):0.03482):0.05332):0.00971):0.00744,((354957:0.05068,183390:0.04344):0.00734,275869:0.07717):0.0236):0.03292):0.00127):0.01695):0.01217,(461524:0.11627,((265106:0.03592,(178735:0.03407,267123:0.02167):0.02808):0.01912,199403:0.02374):0.05723):0.23533):0.03382):0.06256)'p__Firmicutes':0.00964,((115186:0.01033,687185:0.04697):0.18622,(214919:0.07235,(((((4397402:0.02449,187233:0.04148):0.00016,166911:0.01405):0.01203,4338733:0.18171):0.05395,(((452823:0.00238,259372:0.01508):0.00255,239571:0.02438):0.00333,135956:0.03044):0.04831):0.05536,303652:0.15985):0.07926):0.0439):0.05025):0.04842):0.02284):0.01143,(((((((1107945:0.00015,((2137001:0.03611,4468234:0.02727):0.00026,191483:0.02217):0.03914):0.08626,353782:0.02512):0.08438,(((4346374:0.02476,184567:0.04246):0.01612,4414420:0.02265):0.01346,(4372578:0.00847,3621189:0.03839):0.04814):0.0072):0.11232,(4449524:0.14634,173807:0.22514):0.01949):0.08539,(((((((3206355:0.05575,(260387:0.01306,269902:0.00981):0.02831):0.01394,179069:0.04071):0.01129,182995:0.11183):0.00805,174663:0.06637):0.00966,196138:0.06604):0.02537,(262399:0.01744,262166:0.00828):0.09631):0.0058,(((((194978:0.0156,187078:0.01205):0.00763,177205:0.03615):0.01066,259249:0.01811):0.01125,(187644:0.03502,163862:0.01646):0.02417):0.01184,(((((((197318:0.0139,186497:0.02106):0.03022,167078:0.05416):0.00273,(176886:0.02207,(174754:0.014,174791:0.0416):0.0392):0.01009):0.00569,264496:0.0376):0.00496,(259609:0.02492,380534:0.02199):0.01009):0.0004,(((275339:0.01699,264787:0.00088):0.01257,(210950:0.01363,(204144:0.006,195005:0.0172):0.02314):0.01172):0.02039,((261177:0.03474,(174056:0.03568,199698:0.08361):0.01406):0.00627,259012:0.05891):0.02085):0.00235):0.00568,((183106:0.04258,177427:0.02913):0.01459,(270984:0.02958,259859:0.03521):0.02522):0.00961):0.00583):0.00502):0.0886):0.05356,(((((199534:0.02102,4127460:0.03563):0.0077,(169398:0.01634,336214:0.01107):0.02565):0.00367,276172:0.01705):0.00497,270391:0.0711):0.02348,4331760:0.04459):0.11283):0.02804,(((2112006:0.09488,170335:0.05985):0.03176,3117556:0.13918)'f__[Odoribacteraceae]':0.0905,(847228:0.03654,(4329571:0.02548,4442459:0.01181):0.03366):0.14176):0.02217):0.39352):0.02085):0.08322):0.19527); \ No newline at end of file diff --git a/q2_diversity/tests/data/vaw.biom b/q2_diversity/tests/data/vaw.biom new file mode 100644 index 00000000..b3c019bf Binary files /dev/null and b/q2_diversity/tests/data/vaw.biom differ diff --git a/q2_diversity/tests/data/vaw.nwk b/q2_diversity/tests/data/vaw.nwk new file mode 100644 index 00000000..1ba14a5b --- /dev/null +++ b/q2_diversity/tests/data/vaw.nwk @@ -0,0 +1 @@ +(GG_OTU_1:1,(GG_OTU_2:1,GG_OTU_3:1):1,(GG_OTU_5:1,GG_OTU_4:1):1); diff --git a/q2_diversity/tests/test_beta.py b/q2_diversity/tests/test_beta.py index 8e9f38a5..dd055deb 100644 --- a/q2_diversity/tests/test_beta.py +++ b/q2_diversity/tests/test_beta.py @@ -21,9 +21,11 @@ import pandas as pd import scipy.misc import qiime2 +from qiime2.plugin.testing import TestPluginBase -from q2_diversity import (beta, beta_phylogenetic, bioenv, - beta_group_significance, beta_correlation, + +from q2_diversity import (beta, beta_phylogenetic, beta_phylogenetic_alt, + bioenv, beta_group_significance, beta_correlation, beta_rarefaction) from q2_diversity._beta._visualizer import (_get_distance_boxplot_data, _metadata_distance, @@ -161,6 +163,222 @@ def test_beta_phylogenetic_weighted_unifrac_threads_error(self): metric='weighted_unifrac', n_jobs=-1) +class BetaDiversityAltTests(TestPluginBase): + # Note that some of these tests replicate the cases in biocore/unifrac + package = 'q2_diversity.tests' + + def test_beta_unweighted(self): + bt_fp = self.get_data_path('crawford.biom') + tree_fp = self.get_data_path('crawford.nwk') + + actual = beta_phylogenetic_alt(table=bt_fp, + phylogeny=tree_fp, + metric='unweighted_unifrac') + + # computed with beta-phylogenetic + data = np.array([0.71836067, 0.71317361, 0.69746044, 0.62587207, + 0.72826674, 0.72065895, 0.72640581, 0.73606053, + 0.70302967, 0.73407301, 0.6548042, 0.71547381, + 0.78397813, 0.72318399, 0.76138933, 0.61041275, + 0.62331299, 0.71848305, 0.70416337, 0.75258475, + 0.79249029, 0.64392779, 0.70052733, 0.69832716, + 0.77818938, 0.72959894, 0.75782689, 0.71005144, + 0.75065046, 0.78944369, 0.63593642, 0.71283615, + 0.58314638, 0.69200762, 0.68972056, 0.71514083]) + ids = ('10084.PC.481', '10084.PC.593', '10084.PC.356', '10084.PC.355', + '10084.PC.354', '10084.PC.636', '10084.PC.635', '10084.PC.607', + '10084.PC.634') + expected = skbio.DistanceMatrix(data, ids=ids) + + self.assertEqual(actual.ids, expected.ids) + for id1 in actual.ids: + for id2 in actual.ids: + npt.assert_almost_equal(actual[id1, id2], expected[id1, id2]) + + def test_beta_unweighted_parallel(self): + bt_fp = self.get_data_path('crawford.biom') + tree_fp = self.get_data_path('crawford.nwk') + + actual = beta_phylogenetic_alt(table=bt_fp, + phylogeny=tree_fp, + metric='unweighted_unifrac', + n_jobs=2) + + # computed with beta-phylogenetic + data = np.array([0.71836067, 0.71317361, 0.69746044, 0.62587207, + 0.72826674, 0.72065895, 0.72640581, 0.73606053, + 0.70302967, 0.73407301, 0.6548042, 0.71547381, + 0.78397813, 0.72318399, 0.76138933, 0.61041275, + 0.62331299, 0.71848305, 0.70416337, 0.75258475, + 0.79249029, 0.64392779, 0.70052733, 0.69832716, + 0.77818938, 0.72959894, 0.75782689, 0.71005144, + 0.75065046, 0.78944369, 0.63593642, 0.71283615, + 0.58314638, 0.69200762, 0.68972056, 0.71514083]) + ids = ('10084.PC.481', '10084.PC.593', '10084.PC.356', '10084.PC.355', + '10084.PC.354', '10084.PC.636', '10084.PC.635', '10084.PC.607', + '10084.PC.634') + expected = skbio.DistanceMatrix(data, ids=ids) + + self.assertEqual(actual.ids, expected.ids) + for id1 in actual.ids: + for id2 in actual.ids: + npt.assert_almost_equal(actual[id1, id2], expected[id1, id2]) + + def test_beta_weighted(self): + bt_fp = self.get_data_path('crawford.biom') + tree_fp = self.get_data_path('crawford.nwk') + + actual = beta_phylogenetic_alt(table=bt_fp, + phylogeny=tree_fp, + metric='weighted_unifrac') + + # computed with beta-phylogenetic (weighted_unifrac) + data = np.array([0.44656238, 0.23771096, 0.30489123, 0.23446002, + 0.65723575, 0.44911772, 0.381904, 0.69144829, + 0.39611776, 0.36568012, 0.53377975, 0.48908025, + 0.35155196, 0.28318669, 0.57376916, 0.23395746, + 0.24658122, 0.60271637, 0.39802552, 0.36567394, + 0.68062701, 0.36862049, 0.48350632, 0.33024631, + 0.33266697, 0.53464744, 0.74605075, 0.53951035, + 0.49680733, 0.79178838, 0.37109012, 0.52629343, + 0.22118218, 0.32400805, 0.43189708, 0.59705893]) + ids = ('10084.PC.481', '10084.PC.593', '10084.PC.356', '10084.PC.355', + '10084.PC.354', '10084.PC.636', '10084.PC.635', '10084.PC.607', + '10084.PC.634') + expected = skbio.DistanceMatrix(data, ids=ids) + + self.assertEqual(actual.ids, expected.ids) + for id1 in actual.ids: + for id2 in actual.ids: + npt.assert_almost_equal(actual[id1, id2], expected[id1, id2]) + + def test_variance_adjusted_normalized(self): + bt_fp = self.get_data_path('vaw.biom') + tree_fp = self.get_data_path('vaw.nwk') + + actual = beta_phylogenetic_alt(table=bt_fp, + phylogeny=tree_fp, + metric='weighted_normalized_unifrac', + variance_adjusted=True) + + data = np.array([[0.0000000, 0.4086040, 0.6240185, 0.4639481, + 0.2857143, 0.2766318], + [0.4086040, 0.0000000, 0.3798594, 0.6884992, + 0.6807616, 0.4735781], + [0.6240185, 0.3798594, 0.0000000, 0.7713254, + 0.8812897, 0.5047114], + [0.4639481, 0.6884992, 0.7713254, 0.0000000, + 0.6666667, 0.2709298], + [0.2857143, 0.6807616, 0.8812897, 0.6666667, + 0.0000000, 0.4735991], + [0.2766318, 0.4735781, 0.5047114, 0.2709298, + 0.4735991, 0.0000000]]) + ids = ('Sample1', 'Sample2', 'Sample3', 'Sample4', 'Sample5', + 'Sample6') + expected = skbio.DistanceMatrix(data, ids=ids) + + self.assertEqual(actual.ids, expected.ids) + for id1 in actual.ids: + for id2 in actual.ids: + npt.assert_almost_equal(actual[id1, id2], expected[id1, id2]) + + def test_generalized_unifrac(self): + bt_fp = self.get_data_path('vaw.biom') + tree_fp = self.get_data_path('vaw.nwk') + + actual = beta_phylogenetic_alt(table=bt_fp, + phylogeny=tree_fp, + metric='generalized_unifrac', + alpha=0.5) + + data = np.array([[0.0000000, 0.4040518, 0.6285560, 0.5869439, + 0.4082483, 0.2995673], + [0.4040518, 0.0000000, 0.4160597, 0.7071068, + 0.7302479, 0.4860856], + [0.6285560, 0.4160597, 0.0000000, 0.8005220, + 0.9073159, 0.5218198], + [0.5869439, 0.7071068, 0.8005220, 0.0000000, + 0.4117216, 0.3485667], + [0.4082483, 0.7302479, 0.9073159, 0.4117216, + 0.0000000, 0.6188282], + [0.2995673, 0.4860856, 0.5218198, 0.3485667, + 0.6188282, 0.0000000]]) + ids = ('Sample1', 'Sample2', 'Sample3', 'Sample4', 'Sample5', + 'Sample6') + expected = skbio.DistanceMatrix(data, ids=ids) + + self.assertEqual(actual.ids, expected.ids) + for id1 in actual.ids: + for id2 in actual.ids: + npt.assert_almost_equal(actual[id1, id2], expected[id1, id2]) + + def test_generalized_unifrac_no_alpha(self): + bt_fp = self.get_data_path('crawford.biom') + tree_fp = self.get_data_path('crawford.nwk') + + actual = beta_phylogenetic_alt(table=bt_fp, + phylogeny=tree_fp, + metric='generalized_unifrac', + alpha=None) + + # alpha=1 should be equal to weighted normalized UniFrac + data = np.array([0.2821874, 0.16148405, 0.20186143, 0.1634832, + 0.40351108, 0.29135056, 0.24790944, 0.41967404, + 0.24642185, 0.22218489, 0.34007547, 0.27722011, + 0.20963881, 0.16897221, 0.3217958, 0.15237816, + 0.16899207, 0.36445044, 0.25408941, 0.23358681, + 0.4069374, 0.24615927, 0.28573888, 0.20578184, + 0.20742006, 0.31249151, 0.46169893, 0.35294595, + 0.32522355, 0.48437103, 0.21534558, 0.30558908, + 0.12091004, 0.19817777, 0.24792853, 0.34293674]) + ids = ('10084.PC.481', '10084.PC.593', '10084.PC.356', '10084.PC.355', + '10084.PC.354', '10084.PC.636', '10084.PC.635', '10084.PC.607', + '10084.PC.634') + expected = skbio.DistanceMatrix(data, ids=ids) + + self.assertEqual(actual.ids, expected.ids) + for id1 in actual.ids: + for id2 in actual.ids: + npt.assert_almost_equal(actual[id1, id2], expected[id1, id2]) + + def test_beta_phylogenetic_alpha_on_non_generalized(self): + bt_fp = self.get_data_path('crawford.biom') + tree_fp = self.get_data_path('tree.nwk') + + with self.assertRaisesRegex(ValueError, 'The alpha parameter is only ' + 'allowed when the choice of metric is ' + 'generalized_unifrac'): + beta_phylogenetic_alt(table=bt_fp, phylogeny=tree_fp, + metric='unweighted_unifrac', + alpha=0.11) + + def test_beta_phylogenetic_non_phylo_metric(self): + bt_fp = self.get_data_path('crawford.biom') + tree_fp = self.get_data_path('tree.nwk') + + with self.assertRaises(ValueError): + beta_phylogenetic_alt(table=bt_fp, phylogeny=tree_fp, + metric='braycurtis') + + def test_beta_phylogenetic_unknown_metric(self): + bt_fp = self.get_data_path('crawford.biom') + tree_fp = self.get_data_path('tree.nwk') + + with self.assertRaises(ValueError): + beta_phylogenetic_alt(table=bt_fp, phylogeny=tree_fp, + metric='not-a-metric') + + def test_beta_phylogenetic_too_many_jobs(self): + bt_fp = self.get_data_path('crawford.biom') + tree_fp = self.get_data_path('tree.nwk') + + with self.assertRaises(ValueError): + # cannot guarantee that this will always be true, but it would be + # odd to see a machine with these many CPUs + beta_phylogenetic_alt(table=bt_fp, phylogeny=tree_fp, + metric='unweighted_unifrac', n_jobs=11117) + + class BioenvTests(unittest.TestCase): def test_bioenv(self):