diff --git a/dataprofiler/tests/profilers/test_profile_builder.py b/dataprofiler/tests/profilers/test_profile_builder.py index 06d3f7b6e..71f664e1d 100644 --- a/dataprofiler/tests/profilers/test_profile_builder.py +++ b/dataprofiler/tests/profilers/test_profile_builder.py @@ -88,43 +88,6 @@ def setUpClass(cls): cls.aws_dataset, len(cls.aws_dataset), options=profiler_options ) - def test_auto_multiprocess_toggle(self, *mocks): - rows_threshold = 5 - cols_threshold = 10 - - # Test for no multiprocessing for sufficiently small datasets - data = pd.DataFrame(np.random.random((2, 5))) - profiler = dp.StructuredProfiler(data) - self.assertFalse( - profiler._auto_multiprocess_toggle(data, rows_threshold, cols_threshold) - ) - data = pd.DataFrame(np.random.random((5, 10))) - profiler = dp.StructuredProfiler(data) - self.assertFalse( - profiler._auto_multiprocess_toggle(data, rows_threshold, cols_threshold) - ) - - # Test for multiprocessing with only rows passing threshold - data = pd.DataFrame(np.random.random((6, 10))) - profiler = dp.StructuredProfiler(data) - self.assertTrue( - profiler._auto_multiprocess_toggle(data, rows_threshold, cols_threshold) - ) - - # Test for multiprocessing with only columns passing threshold - data = pd.DataFrame(np.random.random((5, 11))) - profiler = dp.StructuredProfiler(data) - self.assertTrue( - profiler._auto_multiprocess_toggle(data, rows_threshold, cols_threshold) - ) - - # Test for multiprocessing with both rows and columns passing threshold - data = pd.DataFrame(np.random.random((6, 11))) - profiler = dp.StructuredProfiler(data) - self.assertTrue( - profiler._auto_multiprocess_toggle(data, rows_threshold, cols_threshold) - ) - @mock.patch( "dataprofiler.profilers.profile_builder.ColumnPrimitiveTypeProfileCompiler" ) diff --git a/dataprofiler/tests/profilers/test_profiler_utils.py b/dataprofiler/tests/profilers/test_profiler_utils.py index fcef4fb57..7d6bc6f83 100644 --- a/dataprofiler/tests/profilers/test_profiler_utils.py +++ b/dataprofiler/tests/profilers/test_profiler_utils.py @@ -469,3 +469,52 @@ def test_odd_merge_profile_list(self, mock_data_labeler, *mocks): self.assertEqual(1, single_report["data_stats"][0]["statistics"]["min"]) self.assertEqual(60.0, single_report["data_stats"][0]["statistics"]["max"]) + + +class TestAutoMultiProcessToggle(unittest.TestCase): + + """ + Validate profile_utils.auto_multiprocess_toggle is properly working. + """ + + def test_auto_multiprocess_toggle(self, *mocks): + rows_threshold = 5 + cols_threshold = 10 + + # Test for no multiprocessing for sufficiently small datasets + data = pd.DataFrame(np.random.random((2, 5))) + self.assertFalse( + profiler_utils.auto_multiprocess_toggle( + data, rows_threshold, cols_threshold + ) + ) + data = pd.DataFrame(np.random.random((5, 10))) + self.assertFalse( + profiler_utils.auto_multiprocess_toggle( + data, rows_threshold, cols_threshold + ) + ) + + # Test for multiprocessing with only rows passing threshold + data = pd.DataFrame(np.random.random((6, 10))) + self.assertTrue( + profiler_utils.auto_multiprocess_toggle( + data, rows_threshold, cols_threshold + ) + ) + + # Test for multiprocessing with only columns passing threshold + data = pd.DataFrame(np.random.random((5, 11))) + self.assertTrue( + profiler_utils.auto_multiprocess_toggle( + data, rows_threshold, cols_threshold + ) + ) + + # Test for multiprocessing with both rows and columns passing threshold + data = pd.DataFrame(np.random.random((6, 11))) + self.assertTrue( + profiler_utils.auto_multiprocess_toggle( + data, rows_threshold, cols_threshold + ) + )