From 99a0ccaaec14838b95845dbfe57f874d092b65c7 Mon Sep 17 00:00:00 2001
From: Hannah Hagen <125509369+hagenjp@users.noreply.github.com>
Date: Fri, 23 Feb 2024 15:19:06 -0700
Subject: [PATCH] BUG: improves error message to describe too few samples
 retained (#351)

---
 q2_diversity/_alpha/_visualizer.py | 18 +++++++++++++-----
 q2_diversity/tests/test_alpha.py   | 20 ++++++++++++++++++--
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/q2_diversity/_alpha/_visualizer.py b/q2_diversity/_alpha/_visualizer.py
index 4c5864c..bbf0263 100644
--- a/q2_diversity/_alpha/_visualizer.py
+++ b/q2_diversity/_alpha/_visualizer.py
@@ -48,12 +48,20 @@ def alpha_group_significance(output_dir: str, alpha_diversity: pd.Series,
     filtered_columns = pre_filtered_cols - set(metadata.columns)
 
     if len(metadata.columns) == 0:
+        sample_ids = ', '.join(alpha_diversity.index)
         raise ValueError(
-            "Metadata does not contain any columns that satisfy this "
-            "visualizer's requirements. There must be at least one metadata "
-            "column that contains categorical data, isn't empty, doesn't "
-            "consist of unique values, and doesn't consist of exactly one "
-            "value.")
+            "Either the metadata file does not meet the requirements of this "
+            "visualizer, or the samples associated with the metadata do not "
+            "meet the requirements. The visualizer requires at least one "
+            "metadata column that contains categorical data, isn't empty, "
+            "doesn't consist of unique values, and doesn't consist of exactly "
+            "one value. The contents of the metadata file associated with the "
+            "samples present in the alpha-diversity metric are: "
+            + sample_ids +
+            ". Please check your metadata file and the diversity metric to "
+            "ensure an appropriate sampling depth was selected. If your "
+            "sampling depth is too deep, it may result in too few samples "
+            "being retained for the visualizer.")
 
     metric_name = alpha_diversity.name
 
diff --git a/q2_diversity/tests/test_alpha.py b/q2_diversity/tests/test_alpha.py
index 937d3f3..91e8139 100644
--- a/q2_diversity/tests/test_alpha.py
+++ b/q2_diversity/tests/test_alpha.py
@@ -401,8 +401,8 @@ def test_alpha_group_significance_numeric_only(self):
                 index=pd.Index(['sample1', 'sample2', 'sample3'], name='id')))
 
         with tempfile.TemporaryDirectory() as output_dir:
-            err_msg = ("does not contain any columns that satisfy this "
-                       "visualizer's requirements")
+            err_msg = ("Either the metadata file does not meet the "
+                       "requirements of this visualizer")
             with self.assertRaisesRegex(ValueError, err_msg):
                 alpha_group_significance(output_dir, alpha_div, md)
 
@@ -439,6 +439,22 @@ def test_alpha_group_significance_forward_slash_in_metadata_col(self):
                                   'kruskal-wallis-pairwise-a%2Fb.csv')
             self.assertTrue(os.path.exists(csv_fp))
 
+    def test_sample_ids_in_error_message(self):
+        alpha_div = pd.Series([2.0, 4.0, 6.0], name='alpha-div',
+                              index=['sample1', 'sample2', 'sample3'])
+        md = qiime2.Metadata(
+            pd.DataFrame(
+                {'col1': [1, 2, 1],
+                 'col2': [4.2, 4.2, 4.3]},
+                index=pd.Index(['sample1', 'sample2', 'sample3'], name='id')))
+
+        with self.assertRaises(ValueError) as ve:
+            alpha_group_significance('output_dir', alpha_div, md)
+
+        self.assertIn('sample1', str(ve.exception))
+        self.assertIn('sample2', str(ve.exception))
+        self.assertIn('sample3', str(ve.exception))
+
 
 if __name__ == '__main__':
     unittest.main()