From af9f275716113fa7d6a1e9b75be77ea5438a558d Mon Sep 17 00:00:00 2001 From: Taylor Turner Date: Wed, 5 Jun 2024 11:59:52 -0500 Subject: [PATCH] pre-commit fix (#1122) --- .pre-commit-config.yaml | 2 +- dataprofiler/__init__.py | 16 ++++++++++ dataprofiler/tests/test_data_profiler.py | 40 ++++++++++++++++++++++++ requirements.txt | 2 +- 4 files changed, 58 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f36c52663..7baeb59ec 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -55,7 +55,7 @@ repos: pyarrow>=1.0.1, chardet>=3.0.4, fastavro>=1.0.0.post1, - cramjam>=2.7.0, + python-snappy>=0.5.4, charset-normalizer>=1.3.6, psutil>=4.0.0, scipy>=1.4.1, diff --git a/dataprofiler/__init__.py b/dataprofiler/__init__.py index 5f218bd85..2e89d3e2b 100644 --- a/dataprofiler/__init__.py +++ b/dataprofiler/__init__.py @@ -20,6 +20,22 @@ from .validators.base_validators import Validator from .version import __version__ +try: + import snappy +except ImportError: + import warnings + + warnings.warn( + "Snappy must be installed to use parquet/avro datasets." + "\n\n" + "For macOS use Homebrew:\n" + "\t`brew install snappy`" + "\n\n" + "For linux use apt-get:\n`" + "\tsudo apt-get -y install libsnappy-dev`\n", + ImportWarning, + ) + def set_seed(seed=None): # also check it's an integer diff --git a/dataprofiler/tests/test_data_profiler.py b/dataprofiler/tests/test_data_profiler.py index 9ebdfa039..ef7664cea 100644 --- a/dataprofiler/tests/test_data_profiler.py +++ b/dataprofiler/tests/test_data_profiler.py @@ -56,6 +56,46 @@ def test_data_profiling(self): self.assertIsNotNone(profile.profile) self.assertIsNotNone(profile.report()) + def test_no_snappy(self): + import importlib + import sys + import types + + orig_import = __import__ + # necessary for any wrapper around the library to test if snappy caught + # as an issue + + def reload_data_profiler(): + """Recursively reload modules.""" + sys_modules = sys.modules.copy() + for module_name, module in sys_modules.items(): + # Only reload top level of the dataprofiler + if "dataprofiler" in module_name and len(module_name.split(".")) < 3: + if isinstance(module, types.ModuleType): + importlib.reload(module) + + def import_mock(name, *args, **kwargs): + if name == "snappy": + raise ImportError("test") + return orig_import(name, *args, **kwargs) + + with mock.patch("builtins.__import__", side_effect=import_mock): + with self.assertWarns(ImportWarning) as w: + import dataprofiler + + reload_data_profiler() + + self.assertEqual( + str(w.warning), + "Snappy must be installed to use parquet/avro datasets." + "\n\n" + "For macOS use Homebrew:\n" + "\t`brew install snappy`" + "\n\n" + "For linux use apt-get:\n`" + "\tsudo apt-get -y install libsnappy-dev`\n", + ) + def test_no_tensorflow(self): import sys diff --git a/requirements.txt b/requirements.txt index 405f808b3..a45dc34ae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ pytz>=2020.1 pyarrow>=1.0.1 chardet>=3.0.4 fastavro>=1.0.0.post1 -cramjam>=2.7.0 +python-snappy>=0.5.4 charset-normalizer>=1.3.6 psutil>=4.0.0 scipy>=1.10.0