Skip to content

Commit

Permalink
Sent test_auto_multiprocess_toggle to test_profiler_utils.
Browse files Browse the repository at this point in the history
  • Loading branch information
clee1152 committed Jul 31, 2023
1 parent f9b5f96 commit 427a172
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 37 deletions.
37 changes: 0 additions & 37 deletions dataprofiler/tests/profilers/test_profile_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,43 +88,6 @@ def setUpClass(cls):
cls.aws_dataset, len(cls.aws_dataset), options=profiler_options
)

def test_auto_multiprocess_toggle(self, *mocks):
rows_threshold = 5
cols_threshold = 10

# Test for no multiprocessing for sufficiently small datasets
data = pd.DataFrame(np.random.random((2, 5)))
profiler = dp.StructuredProfiler(data)
self.assertFalse(
profiler._auto_multiprocess_toggle(data, rows_threshold, cols_threshold)
)
data = pd.DataFrame(np.random.random((5, 10)))
profiler = dp.StructuredProfiler(data)
self.assertFalse(
profiler._auto_multiprocess_toggle(data, rows_threshold, cols_threshold)
)

# Test for multiprocessing with only rows passing threshold
data = pd.DataFrame(np.random.random((6, 10)))
profiler = dp.StructuredProfiler(data)
self.assertTrue(
profiler._auto_multiprocess_toggle(data, rows_threshold, cols_threshold)
)

# Test for multiprocessing with only columns passing threshold
data = pd.DataFrame(np.random.random((5, 11)))
profiler = dp.StructuredProfiler(data)
self.assertTrue(
profiler._auto_multiprocess_toggle(data, rows_threshold, cols_threshold)
)

# Test for multiprocessing with both rows and columns passing threshold
data = pd.DataFrame(np.random.random((6, 11)))
profiler = dp.StructuredProfiler(data)
self.assertTrue(
profiler._auto_multiprocess_toggle(data, rows_threshold, cols_threshold)
)

@mock.patch(
"dataprofiler.profilers.profile_builder.ColumnPrimitiveTypeProfileCompiler"
)
Expand Down
49 changes: 49 additions & 0 deletions dataprofiler/tests/profilers/test_profiler_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,3 +469,52 @@ def test_odd_merge_profile_list(self, mock_data_labeler, *mocks):

self.assertEqual(1, single_report["data_stats"][0]["statistics"]["min"])
self.assertEqual(60.0, single_report["data_stats"][0]["statistics"]["max"])


class TestAutoMultiProcessToggle(unittest.TestCase):

"""
Validate profile_utils.auto_multiprocess_toggle is properly working.
"""

def test_auto_multiprocess_toggle(self, *mocks):
rows_threshold = 5
cols_threshold = 10

# Test for no multiprocessing for sufficiently small datasets
data = pd.DataFrame(np.random.random((2, 5)))
self.assertFalse(
profiler_utils.auto_multiprocess_toggle(
data, rows_threshold, cols_threshold
)
)
data = pd.DataFrame(np.random.random((5, 10)))
self.assertFalse(
profiler_utils.auto_multiprocess_toggle(
data, rows_threshold, cols_threshold
)
)

# Test for multiprocessing with only rows passing threshold
data = pd.DataFrame(np.random.random((6, 10)))
self.assertTrue(
profiler_utils.auto_multiprocess_toggle(
data, rows_threshold, cols_threshold
)
)

# Test for multiprocessing with only columns passing threshold
data = pd.DataFrame(np.random.random((5, 11)))
self.assertTrue(
profiler_utils.auto_multiprocess_toggle(
data, rows_threshold, cols_threshold
)
)

# Test for multiprocessing with both rows and columns passing threshold
data = pd.DataFrame(np.random.random((6, 11)))
self.assertTrue(
profiler_utils.auto_multiprocess_toggle(
data, rows_threshold, cols_threshold
)
)

0 comments on commit 427a172

Please sign in to comment.