diff --git a/python/mlcroissant/mlcroissant/_src/datasets_test.py b/python/mlcroissant/mlcroissant/_src/datasets_test.py index a23c9cee..89973de0 100644 --- a/python/mlcroissant/mlcroissant/_src/datasets_test.py +++ b/python/mlcroissant/mlcroissant/_src/datasets_test.py @@ -125,6 +125,7 @@ def load_records_and_test_equality( @pytest.mark.parametrize( ["dataset_name", "record_set_name", "num_records"], [ + ["audio_test/metadata.json", "records", 2], ["coco2014-mini/metadata.json", "bounding_boxes", -1], ["coco2014-mini/metadata.json", "captions", -1], ["coco2014-mini/metadata.json", "images", -1], @@ -164,11 +165,6 @@ def test_hermetic_loading(version, dataset_name, record_set_name, num_records): ["huggingface-c4/metadata.json", "en", 1], ["huggingface-mnist/metadata.json", "default", 10], ["titanic/metadata.json", "passengers", -1], - [ - "audio_test/metadata.json", - "records", - 10, - ], ], ) def test_nonhermetic_loading(version, dataset_name, record_set_name, num_records): @@ -181,7 +177,6 @@ def test_nonhermetic_loading(version, dataset_name, record_set_name, num_records ["dataset_name", "record_set_name", "num_records"], [ ["huggingface-anthropic-hh-rlhf/metadata.json", "red-team-attempts", 10], - ["huggingface-tgqa/metadata.json", "TGSR_test", 10], ], ) def test_nonhermetic_loading_1_0(dataset_name, record_set_name, num_records): diff --git a/python/mlcroissant/pyproject.toml b/python/mlcroissant/pyproject.toml index dcd7f2d1..f798dffc 100644 --- a/python/mlcroissant/pyproject.toml +++ b/python/mlcroissant/pyproject.toml @@ -51,7 +51,11 @@ dev = [ "pytype", "torchdata", ] -audio = ["librosa"] +# Fix following https://github.com/librosa/librosa/issues/1831#issuecomment-2176274560 +audio = [ + "librosa", + "soxr==0.4.0b1", +] git = ["GitPython"] image = ["Pillow"] parquet = ["pyarrow"]