From 84cde6dc7ebd16c0ffec9fb8f68700b1d01b8e10 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 1 Aug 2023 14:03:31 -0700 Subject: [PATCH] CI/TST: Cleanups --- .circleci/setup_env.sh | 4 +- .github/workflows/unit-tests.yml | 2 - pandas/tests/arrays/test_datetimelike.py | 69 --------------------- pandas/tests/io/parser/test_upcast.py | 3 - pandas/tests/io/test_gcs.py | 10 ++-- pandas/tests/test_downstream.py | 76 ++++++++++++++++++++++++ 6 files changed, 83 insertions(+), 81 deletions(-) diff --git a/.circleci/setup_env.sh b/.circleci/setup_env.sh index e41650870bd70..4f81acb6d2099 100755 --- a/.circleci/setup_env.sh +++ b/.circleci/setup_env.sh @@ -48,10 +48,10 @@ source activate pandas-dev # downstream CI jobs that may also build pandas from source. export PANDAS_CI=1 -if pip list | grep -q ^pandas; then +if pip show pandas 1>/dev/null; then echo echo "remove any installed pandas package w/o removing anything else" - pip uninstall -y pandas || true + pip uninstall -y pandas fi echo "Install pandas" diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index a9651ae26934b..1770d18d4eb41 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -333,7 +333,6 @@ jobs: PYTEST_WORKERS: "auto" PANDAS_CI: 1 PATTERN: "not slow and not network and not clipboard and not single_cpu" - COVERAGE: true PYTEST_TARGET: pandas steps: @@ -351,7 +350,6 @@ jobs: python --version python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.0.1 meson-python==0.13.1 python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy - python -m pip install git+https://github.com/nedbat/coveragepy.git python -m pip install versioneer[toml] python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17 python -m pip list diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index fd133b104b380..a4fbc8df4a8fa 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -1,6 +1,5 @@ from __future__ import annotations -import array import re import warnings @@ -12,7 +11,6 @@ OutOfBoundsDatetime, Timestamp, ) -import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -1328,70 +1326,3 @@ def test_from_pandas_array(dtype): result = idx_cls(arr) expected = idx_cls(data) tm.assert_index_equal(result, expected) - - -@pytest.fixture( - params=[ - "memoryview", - "array", - pytest.param("dask", marks=td.skip_if_no("dask.array")), - pytest.param("xarray", marks=td.skip_if_no("xarray")), - ] -) -def array_likes(request): - """ - Fixture giving a numpy array and a parametrized 'data' object, which can - be a memoryview, array, dask or xarray object created from the numpy array. - """ - # GH#24539 recognize e.g xarray, dask, ... - arr = np.array([1, 2, 3], dtype=np.int64) - - name = request.param - if name == "memoryview": - data = memoryview(arr) - elif name == "array": - data = array.array("i", arr) - elif name == "dask": - import dask.array - - data = dask.array.array(arr) - elif name == "xarray": - import xarray as xr - - data = xr.DataArray(arr) - - return arr, data - - -@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) -def test_from_obscure_array(dtype, array_likes): - # GH#24539 recognize e.g xarray, dask, ... - # Note: we dont do this for PeriodArray bc _from_sequence won't accept - # an array of integers - # TODO: could check with arraylike of Period objects - arr, data = array_likes - - cls = {"M8[ns]": DatetimeArray, "m8[ns]": TimedeltaArray}[dtype] - - expected = cls(arr) - result = cls._from_sequence(data) - tm.assert_extension_array_equal(result, expected) - - func = {"M8[ns]": _sequence_to_dt64ns, "m8[ns]": sequence_to_td64ns}[dtype] - result = func(arr)[0] - expected = func(data)[0] - tm.assert_equal(result, expected) - - if not isinstance(data, memoryview): - # FIXME(GH#44431) these raise on memoryview and attempted fix - # fails on py3.10 - func = {"M8[ns]": pd.to_datetime, "m8[ns]": pd.to_timedelta}[dtype] - result = func(arr).array - expected = func(data).array - tm.assert_equal(result, expected) - - # Let's check the Indexes while we're here - idx_cls = {"M8[ns]": DatetimeIndex, "m8[ns]": TimedeltaIndex}[dtype] - result = idx_cls(arr) - expected = idx_cls(data) - tm.assert_index_equal(result, expected) diff --git a/pandas/tests/io/parser/test_upcast.py b/pandas/tests/io/parser/test_upcast.py index 558822b84620a..7cfaac997e3b1 100644 --- a/pandas/tests/io/parser/test_upcast.py +++ b/pandas/tests/io/parser/test_upcast.py @@ -38,9 +38,6 @@ def test_maybe_upcast(any_real_numpy_dtype): def test_maybe_upcast_no_na(any_real_numpy_dtype): # GH#36712 - if any_real_numpy_dtype == "float32": - pytest.skip() - arr = np.array([1, 2, 3], dtype=any_real_numpy_dtype) result = _maybe_upcast(arr, use_dtype_backend=True) diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index bdea24f7bb5aa..89655e8693d7f 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -22,7 +22,8 @@ @pytest.fixture def gcs_buffer(): """Emulate GCS using a binary buffer.""" - import fsspec + pytest.importorskip("gcsfs") + fsspec = pytest.importorskip("fsspec") gcs_buffer = BytesIO() gcs_buffer.close = lambda: True @@ -43,7 +44,6 @@ def ls(self, path, **kwargs): return gcs_buffer -@td.skip_if_no("gcsfs") # Patches pyarrow; other processes should not pick up change @pytest.mark.single_cpu @pytest.mark.parametrize("format", ["csv", "json", "parquet", "excel", "markdown"]) @@ -131,7 +131,6 @@ def assert_equal_zip_safe(result: bytes, expected: bytes, compression: str): assert result == expected -@td.skip_if_no("gcsfs") @pytest.mark.parametrize("encoding", ["utf-8", "cp1251"]) def test_to_csv_compression_encoding_gcs( gcs_buffer, compression_only, encoding, compression_to_extension @@ -177,10 +176,11 @@ def test_to_csv_compression_encoding_gcs( tm.assert_frame_equal(df, read_df) -@td.skip_if_no("fastparquet") -@td.skip_if_no("gcsfs") def test_to_parquet_gcs_new_file(monkeypatch, tmpdir): """Regression test for writing to a not-yet-existent GCS Parquet file.""" + pytest.importorskip("fastparquet") + pytest.importorskip("gcsfs") + from fsspec import AbstractFileSystem df1 = DataFrame( diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 09594588be81c..01efb01e63e1c 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -1,6 +1,7 @@ """ Testing that we work in the downstream packages """ +import array import importlib import subprocess import sys @@ -14,9 +15,17 @@ import pandas as pd from pandas import ( DataFrame, + DatetimeIndex, Series, + TimedeltaIndex, ) import pandas._testing as tm +from pandas.core.arrays import ( + DatetimeArray, + TimedeltaArray, +) +from pandas.core.arrays.datetimes import _sequence_to_dt64ns +from pandas.core.arrays.timedeltas import sequence_to_td64ns def import_module(name): @@ -277,3 +286,70 @@ def __radd__(self, other): assert right.__add__(left) is NotImplemented assert right + left is left + + +@pytest.fixture( + params=[ + "memoryview", + "array", + pytest.param("dask", marks=td.skip_if_no("dask.array")), + pytest.param("xarray", marks=td.skip_if_no("xarray")), + ] +) +def array_likes(request): + """ + Fixture giving a numpy array and a parametrized 'data' object, which can + be a memoryview, array, dask or xarray object created from the numpy array. + """ + # GH#24539 recognize e.g xarray, dask, ... + arr = np.array([1, 2, 3], dtype=np.int64) + + name = request.param + if name == "memoryview": + data = memoryview(arr) + elif name == "array": + data = array.array("i", arr) + elif name == "dask": + import dask.array + + data = dask.array.array(arr) + elif name == "xarray": + import xarray as xr + + data = xr.DataArray(arr) + + return arr, data + + +@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) +def test_from_obscure_array(dtype, array_likes): + # GH#24539 recognize e.g xarray, dask, ... + # Note: we dont do this for PeriodArray bc _from_sequence won't accept + # an array of integers + # TODO: could check with arraylike of Period objects + arr, data = array_likes + + cls = {"M8[ns]": DatetimeArray, "m8[ns]": TimedeltaArray}[dtype] + + expected = cls(arr) + result = cls._from_sequence(data) + tm.assert_extension_array_equal(result, expected) + + func = {"M8[ns]": _sequence_to_dt64ns, "m8[ns]": sequence_to_td64ns}[dtype] + result = func(arr)[0] + expected = func(data)[0] + tm.assert_equal(result, expected) + + if not isinstance(data, memoryview): + # FIXME(GH#44431) these raise on memoryview and attempted fix + # fails on py3.10 + func = {"M8[ns]": pd.to_datetime, "m8[ns]": pd.to_timedelta}[dtype] + result = func(arr).array + expected = func(data).array + tm.assert_equal(result, expected) + + # Let's check the Indexes while we're here + idx_cls = {"M8[ns]": DatetimeIndex, "m8[ns]": TimedeltaIndex}[dtype] + result = idx_cls(arr) + expected = idx_cls(data) + tm.assert_index_equal(result, expected)