diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index 8e29d56f47dcf..24b2de251ce8e 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -86,7 +86,7 @@ jobs: if: ${{ steps.build.outcome == 'success' && always() }} - name: Typing + pylint - uses: pre-commit/action@v3.0.0 + uses: pre-commit/action@v3.0.1 with: extra_args: --verbose --hook-stage manual --all-files if: ${{ steps.build.outcome == 'success' && always() }} diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index eead38dffaff4..ce31d63f0b70f 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -159,12 +159,6 @@ def setup(self): def time_items(self): # (monitor no-copying behaviour) - if hasattr(self.df, "_item_cache"): - self.df._item_cache.clear() - for name, col in self.df.items(): - pass - - def time_items_cached(self): for name, col in self.df.items(): pass diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index d5c58033c1157..30cab00e4d3f9 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -22,23 +22,20 @@ class ToNumeric: - params = ["ignore", "coerce"] - param_names = ["errors"] - - def setup(self, errors): + def setup(self): N = 10000 self.float = Series(np.random.randn(N)) self.numstr = self.float.astype("str") self.str = Series(Index([f"i-{i}" for i in range(N)], dtype=object)) - def time_from_float(self, errors): - to_numeric(self.float, errors=errors) + def time_from_float(self): + to_numeric(self.float, errors="coerce") - def time_from_numeric_str(self, errors): - to_numeric(self.numstr, errors=errors) + def time_from_numeric_str(self): + to_numeric(self.numstr, errors="coerce") - def time_from_str(self, errors): - to_numeric(self.str, errors=errors) + def time_from_str(self): + to_numeric(self.str, errors="coerce") class ToNumericDowncast: @@ -187,7 +184,7 @@ def time_iso8601_tz_spaceformat(self): def time_iso8601_infer_zero_tz_fromat(self): # GH 41047 - to_datetime(self.strings_zero_tz, infer_datetime_format=True) + to_datetime(self.strings_zero_tz) class ToDatetimeNONISO8601: @@ -271,16 +268,6 @@ def time_dup_string_tzoffset_dates(self, cache): to_datetime(self.dup_string_with_tz, cache=cache) -# GH 43901 -class ToDatetimeInferDatetimeFormat: - def setup(self): - rng = date_range(start="1/1/2000", periods=100000, freq="h") - self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist() - - def time_infer_datetime_format(self): - to_datetime(self.strings, infer_datetime_format=True) - - class ToTimedelta: def setup(self): self.ints = np.random.randint(0, 60, size=10000) @@ -301,16 +288,13 @@ def time_convert_string_seconds(self): class ToTimedeltaErrors: - params = ["coerce", "ignore"] - param_names = ["errors"] - - def setup(self, errors): + def setup(self): ints = np.random.randint(0, 60, size=10000) self.arr = [f"{i} days" for i in ints] self.arr[-1] = "apple" - def time_convert(self, errors): - to_timedelta(self.arr, errors=errors) + def time_convert(self): + to_timedelta(self.arr, errors="coerce") from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 6460c430275f4..39ccf904858d5 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -136,13 +136,74 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.Interval\ pandas.Grouper\ pandas.core.groupby.DataFrameGroupBy.nth\ - pandas.core.groupby.DataFrameGroupBy.rolling\ pandas.core.groupby.SeriesGroupBy.nth\ - pandas.core.groupby.SeriesGroupBy.rolling\ pandas.core.groupby.DataFrameGroupBy.plot\ pandas.core.groupby.SeriesGroupBy.plot # There should be no backslash in the final line, please keep this comment in the last ignored function RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Partially validate docstrings (SA05)' ; echo $MSG + $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=SA05 --ignore_functions \ + pandas.DataFrame.agg\ + pandas.DataFrame.aggregate\ + pandas.DataFrame.boxplot\ + pandas.PeriodIndex.asfreq\ + pandas.arrays.ArrowStringArray\ + pandas.arrays.StringArray\ + pandas.core.groupby.DataFrameGroupBy.first\ + pandas.core.groupby.DataFrameGroupBy.last\ + pandas.core.groupby.SeriesGroupBy.first\ + pandas.core.groupby.SeriesGroupBy.last\ + pandas.core.resample.Resampler.first\ + pandas.core.resample.Resampler.last\ + pandas.core.window.ewm.ExponentialMovingWindow.corr\ + pandas.core.window.ewm.ExponentialMovingWindow.cov\ + pandas.core.window.ewm.ExponentialMovingWindow.mean\ + pandas.core.window.ewm.ExponentialMovingWindow.std\ + pandas.core.window.ewm.ExponentialMovingWindow.sum\ + pandas.core.window.ewm.ExponentialMovingWindow.var\ + pandas.core.window.expanding.Expanding.aggregate\ + pandas.core.window.expanding.Expanding.apply\ + pandas.core.window.expanding.Expanding.corr\ + pandas.core.window.expanding.Expanding.count\ + pandas.core.window.expanding.Expanding.cov\ + pandas.core.window.expanding.Expanding.kurt\ + pandas.core.window.expanding.Expanding.max\ + pandas.core.window.expanding.Expanding.mean\ + pandas.core.window.expanding.Expanding.median\ + pandas.core.window.expanding.Expanding.min\ + pandas.core.window.expanding.Expanding.quantile\ + pandas.core.window.expanding.Expanding.rank\ + pandas.core.window.expanding.Expanding.sem\ + pandas.core.window.expanding.Expanding.skew\ + pandas.core.window.expanding.Expanding.std\ + pandas.core.window.expanding.Expanding.sum\ + pandas.core.window.expanding.Expanding.var\ + pandas.core.window.rolling.Rolling.aggregate\ + pandas.core.window.rolling.Rolling.apply\ + pandas.core.window.rolling.Rolling.corr\ + pandas.core.window.rolling.Rolling.count\ + pandas.core.window.rolling.Rolling.cov\ + pandas.core.window.rolling.Rolling.kurt\ + pandas.core.window.rolling.Rolling.max\ + pandas.core.window.rolling.Rolling.mean\ + pandas.core.window.rolling.Rolling.median\ + pandas.core.window.rolling.Rolling.min\ + pandas.core.window.rolling.Rolling.quantile\ + pandas.core.window.rolling.Rolling.rank\ + pandas.core.window.rolling.Rolling.sem\ + pandas.core.window.rolling.Rolling.skew\ + pandas.core.window.rolling.Rolling.std\ + pandas.core.window.rolling.Rolling.sum\ + pandas.core.window.rolling.Rolling.var\ + pandas.core.window.rolling.Window.mean\ + pandas.core.window.rolling.Window.std\ + pandas.core.window.rolling.Window.sum\ + pandas.core.window.rolling.Window.var\ + pandas.plotting.bootstrap_plot\ + pandas.plotting.boxplot\ + pandas.plotting.radviz # There should be no backslash in the final line, please keep this comment in the last ignored function + RET=$(($RET + $?)) ; echo $MSG "DONE" + fi ### DOCUMENTATION NOTEBOOKS ### diff --git a/doc/redirects.csv b/doc/redirects.csv index 8c46cb520cc95..e71a031bd67fd 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -238,7 +238,6 @@ generated/pandas.core.resample.Resampler.backfill,../reference/api/pandas.core.r generated/pandas.core.resample.Resampler.bfill,../reference/api/pandas.core.resample.Resampler.bfill generated/pandas.core.resample.Resampler.count,../reference/api/pandas.core.resample.Resampler.count generated/pandas.core.resample.Resampler.ffill,../reference/api/pandas.core.resample.Resampler.ffill -generated/pandas.core.resample.Resampler.fillna,../reference/api/pandas.core.resample.Resampler.fillna generated/pandas.core.resample.Resampler.first,../reference/api/pandas.core.resample.Resampler.first generated/pandas.core.resample.Resampler.get_group,../reference/api/pandas.core.resample.Resampler.get_group generated/pandas.core.resample.Resampler.groups,../reference/api/pandas.core.resample.Resampler.groups @@ -481,7 +480,6 @@ generated/pandas.DataFrame.style,../reference/api/pandas.DataFrame.style generated/pandas.DataFrame.sub,../reference/api/pandas.DataFrame.sub generated/pandas.DataFrame.subtract,../reference/api/pandas.DataFrame.subtract generated/pandas.DataFrame.sum,../reference/api/pandas.DataFrame.sum -generated/pandas.DataFrame.swapaxes,../reference/api/pandas.DataFrame.swapaxes generated/pandas.DataFrame.swaplevel,../reference/api/pandas.DataFrame.swaplevel generated/pandas.DataFrame.tail,../reference/api/pandas.DataFrame.tail generated/pandas.DataFrame.take,../reference/api/pandas.DataFrame.take @@ -1206,7 +1204,6 @@ generated/pandas.Series.str.zfill,../reference/api/pandas.Series.str.zfill generated/pandas.Series.sub,../reference/api/pandas.Series.sub generated/pandas.Series.subtract,../reference/api/pandas.Series.subtract generated/pandas.Series.sum,../reference/api/pandas.Series.sum -generated/pandas.Series.swapaxes,../reference/api/pandas.Series.swapaxes generated/pandas.Series.swaplevel,../reference/api/pandas.Series.swaplevel generated/pandas.Series.tail,../reference/api/pandas.Series.tail generated/pandas.Series.take,../reference/api/pandas.Series.take diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index d09ca416e32a6..9cd9d6a157aee 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -235,7 +235,6 @@ Reshaping, sorting, transposing DataFrame.swaplevel DataFrame.stack DataFrame.unstack - DataFrame.swapaxes DataFrame.melt DataFrame.explode DataFrame.squeeze diff --git a/doc/source/reference/resampling.rst b/doc/source/reference/resampling.rst index edbc8090fc849..2e0717081b129 100644 --- a/doc/source/reference/resampling.rst +++ b/doc/source/reference/resampling.rst @@ -38,7 +38,6 @@ Upsampling Resampler.ffill Resampler.bfill Resampler.nearest - Resampler.fillna Resampler.asfreq Resampler.interpolate diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst index eb6e50c1be160..94e26ff6aa46a 100644 --- a/doc/source/whatsnew/v2.2.1.rst +++ b/doc/source/whatsnew/v2.2.1.rst @@ -17,6 +17,7 @@ Fixed regressions - Fixed performance regression in :meth:`Series.combine_first` (:issue:`55845`) - Fixed regression in :func:`concat` changing long-standing behavior that always sorted the non-concatenation axis when the axis was a :class:`DatetimeIndex` (:issue:`57006`) - Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`) +- Fixed regression in :func:`pandas.testing.assert_series_equal` defaulting to ``check_exact=True`` when checking the :class:`Index` (:issue:`57067`) - Fixed regression in :func:`wide_to_long` raising an ``AttributeError`` for string columns (:issue:`57066`) - Fixed regression in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, :meth:`.SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`) - Fixed regression in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, :meth:`.SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`) @@ -24,6 +25,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.groupby` raising ``ValueError`` when grouping by a :class:`Series` in some cases (:issue:`57276`) - Fixed regression in :meth:`DataFrame.loc` raising ``IndexError`` for non-unique, masked dtype indexes where result has more than 10,000 rows (:issue:`57027`) - Fixed regression in :meth:`DataFrame.merge` raising ``ValueError`` for certain types of 3rd-party extension arrays (:issue:`57316`) +- Fixed regression in :meth:`DataFrame.shift` raising ``AssertionError`` for ``axis=1`` and empty :class:`DataFrame` (:issue:`57301`) - Fixed regression in :meth:`DataFrame.sort_index` not producing a stable sort for a index with duplicates (:issue:`57151`) - Fixed regression in :meth:`DataFrame.to_dict` with ``orient='list'`` and datetime or timedelta types returning integers (:issue:`54824`) - Fixed regression in :meth:`DataFrame.to_json` converting nullable integers to floats (:issue:`57224`) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index aa378faac2a00..0758d58ed3912 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -30,6 +30,7 @@ Other enhancements ^^^^^^^^^^^^^^^^^^ - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) - :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`) +- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`) - .. --------------------------------------------------------------------------- @@ -106,29 +107,36 @@ Removal of prior version deprecations/changes - :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`) - All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`) - Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`) -- Removed :meth:`DataFrame.applymap`, :meth:`Styler.applymap` and :meth:`Styler.applymap_index` (:issue:`52364`) +- Removed ``DataFrame.applymap``, ``Styler.applymap`` and ``Styler.applymap_index`` (:issue:`52364`) - Removed ``DataFrame.bool`` and ``Series.bool`` (:issue:`51756`) - Removed ``DataFrame.first`` and ``DataFrame.last`` (:issue:`53710`) +- Removed ``DataFrame.swapaxes`` and ``Series.swapaxes`` (:issue:`51946`) - Removed ``DataFrameGroupBy.grouper`` and ``SeriesGroupBy.grouper`` (:issue:`56521`) - Removed ``DataFrameGroupby.fillna`` and ``SeriesGroupBy.fillna``` (:issue:`55719`) - Removed ``Index.format``, use :meth:`Index.astype` with ``str`` or :meth:`Index.map` with a ``formatter`` function instead (:issue:`55439`) +- Removed ``Resample.fillna`` (:issue:`55719`) - Removed ``Series.__int__`` and ``Series.__float__``. Call ``int(Series.iloc[0])`` or ``float(Series.iloc[0])`` instead. (:issue:`51131`) - Removed ``Series.ravel`` (:issue:`56053`) - Removed ``Series.view`` (:issue:`56054`) +- Removed ``StataReader.close`` (:issue:`49228`) - Removed ``axis`` argument from :meth:`DataFrame.groupby`, :meth:`Series.groupby`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.resample`, and :meth:`Series.resample` (:issue:`51203`) - Removed ``axis`` argument from all groupby operations (:issue:`50405`) +- Removed ``convert_dtype`` from :meth:`Series.apply` (:issue:`52257`) - Removed ``pandas.api.types.is_interval`` and ``pandas.api.types.is_period``, use ``isinstance(obj, pd.Interval)`` and ``isinstance(obj, pd.Period)`` instead (:issue:`55264`) - Removed ``pandas.io.sql.execute`` (:issue:`50185`) - Removed ``pandas.value_counts``, use :meth:`Series.value_counts` instead (:issue:`53493`) - Removed ``read_gbq`` and ``DataFrame.to_gbq``. Use ``pandas_gbq.read_gbq`` and ``pandas_gbq.to_gbq`` instead https://pandas-gbq.readthedocs.io/en/latest/api.html (:issue:`55525`) +- Removed ``use_nullable_dtypes`` from :func:`read_parquet` (:issue:`51853`) - Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`) - Removed deprecated behavior of :meth:`Series.agg` using :meth:`Series.apply` (:issue:`53325`) +- Removed support for ``errors="ignore"`` in :func:`to_datetime`, :func:`to_timedelta` and :func:`to_numeric` (:issue:`55734`) - Removed the ``ArrayManager`` (:issue:`55043`) - Removed the ``fastpath`` argument from the :class:`Series` constructor (:issue:`55466`) - Removed the ``is_boolean``, ``is_integer``, ``is_floating``, ``holds_integer``, ``is_numeric``, ``is_categorical``, ``is_object``, and ``is_interval`` attributes of :class:`Index` (:issue:`50042`) - Removed unused arguments ``*args`` and ``**kwargs`` in :class:`Resampler` methods (:issue:`50977`) - Unrecognized timezones when parsing strings to datetimes now raises a ``ValueError`` (:issue:`51477`) + .. --------------------------------------------------------------------------- .. _whatsnew_300.performance: diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 8bba95dd456de..74ca8ead3d936 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -447,8 +447,15 @@ static void NpyArrPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) { npyarr->curdim--; npyarr->dataptr -= npyarr->stride * npyarr->index[npyarr->stridedim]; npyarr->stridedim -= npyarr->inc; - npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); - npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); + + if (!PyArray_Check(npyarr->array)) { + PyErr_SetString(PyExc_TypeError, + "NpyArrayPassThru_iterEnd received a non-array object"); + return; + } + const PyArrayObject *arrayobj = (const PyArrayObject *)npyarr->array; + npyarr->dim = PyArray_DIM(arrayobj, npyarr->stridedim); + npyarr->stride = PyArray_STRIDE(arrayobj, npyarr->stridedim); npyarr->dataptr += npyarr->stride; NpyArr_freeItemValue(obj, tc); @@ -467,12 +474,19 @@ static int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) { NpyArr_freeItemValue(obj, tc); - if (PyArray_ISDATETIME(npyarr->array)) { + if (!PyArray_Check(npyarr->array)) { + PyErr_SetString(PyExc_TypeError, + "NpyArr_iterNextItem received a non-array object"); + return 0; + } + PyArrayObject *arrayobj = (PyArrayObject *)npyarr->array; + + if (PyArray_ISDATETIME(arrayobj)) { GET_TC(tc)->itemValue = obj; Py_INCREF(obj); - ((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(npyarr->array); + ((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(arrayobj); // Also write the resolution (unit) of the ndarray - PyArray_Descr *dtype = PyArray_DESCR(npyarr->array); + PyArray_Descr *dtype = PyArray_DESCR(arrayobj); ((PyObjectEncoder *)tc->encoder)->valueUnit = get_datetime_metadata_from_dtype(dtype).base; ((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr; @@ -505,8 +519,15 @@ static int NpyArr_iterNext(JSOBJ _obj, JSONTypeContext *tc) { npyarr->curdim++; npyarr->stridedim += npyarr->inc; - npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); - npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); + if (!PyArray_Check(npyarr->array)) { + PyErr_SetString(PyExc_TypeError, + "NpyArr_iterNext received a non-array object"); + return 0; + } + const PyArrayObject *arrayobj = (const PyArrayObject *)npyarr->array; + + npyarr->dim = PyArray_DIM(arrayobj, npyarr->stridedim); + npyarr->stride = PyArray_STRIDE(arrayobj, npyarr->stridedim); npyarr->index[npyarr->stridedim] = 0; ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr; @@ -1610,7 +1631,14 @@ static void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { if (!values) { goto INVALID; } - pc->columnLabelsLen = PyArray_DIM(pc->newObj, 0); + + if (!PyArray_Check(pc->newObj)) { + PyErr_SetString(PyExc_TypeError, + "Object_beginTypeContext received a non-array object"); + goto INVALID; + } + const PyArrayObject *arrayobj = (const PyArrayObject *)pc->newObj; + pc->columnLabelsLen = PyArray_DIM(arrayobj, 0); pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, pc->columnLabelsLen); if (!pc->columnLabels) { diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 017fdc4bc834f..4f70b77780c2b 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -272,14 +272,13 @@ def array_with_unit_to_datetime( """ cdef: Py_ssize_t i, n=len(values) - bint is_ignore = errors == "ignore" bint is_coerce = errors == "coerce" bint is_raise = errors == "raise" ndarray[int64_t] iresult tzinfo tz = None float fval - assert is_ignore or is_coerce or is_raise + assert is_coerce or is_raise if unit == "ns": result, tz = array_to_datetime( @@ -342,11 +341,6 @@ def array_with_unit_to_datetime( if is_raise: err.args = (f"{err}, at position {i}",) raise - elif is_ignore: - # we have hit an exception - # and are in ignore mode - # redo as object - return _array_with_unit_to_datetime_object_fallback(values, unit) else: # is_coerce iresult[i] = NPY_NAT @@ -461,7 +455,6 @@ cpdef array_to_datetime( bint utc_convert = bool(utc) bint seen_datetime_offset = False bint is_raise = errors == "raise" - bint is_ignore = errors == "ignore" bint is_coerce = errors == "coerce" bint is_same_offsets _TSObject tsobj @@ -475,7 +468,7 @@ cpdef array_to_datetime( str abbrev # specify error conditions - assert is_raise or is_ignore or is_coerce + assert is_raise or is_coerce if infer_reso: abbrev = "ns" @@ -687,7 +680,6 @@ cdef _array_to_datetime_object( cdef: Py_ssize_t i, n = values.size object val - bint is_ignore = errors == "ignore" bint is_coerce = errors == "coerce" bint is_raise = errors == "raise" ndarray oresult_nd @@ -696,7 +688,7 @@ cdef _array_to_datetime_object( cnp.broadcast mi _TSObject tsobj - assert is_raise or is_ignore or is_coerce + assert is_raise or is_coerce oresult_nd = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0) mi = cnp.PyArray_MultiIterNew2(oresult_nd, values) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index c09835c9661f3..cfced4ab44aa0 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -310,7 +310,7 @@ def array_strptime( values : ndarray of string-like objects fmt : string-like regex exact : matches must be exact if True, search if False - errors : string specifying error handling, {'raise', 'ignore', 'coerce'} + errors : string specifying error handling, {'raise', 'coerce'} creso : NPY_DATETIMEUNIT, default NPY_FR_ns Set to NPY_FR_GENERIC to infer a resolution. """ @@ -322,7 +322,6 @@ def array_strptime( object val bint seen_datetime_offset = False bint is_raise = errors=="raise" - bint is_ignore = errors=="ignore" bint is_coerce = errors=="coerce" bint is_same_offsets set out_tzoffset_vals = set() @@ -334,7 +333,7 @@ def array_strptime( bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC DatetimeParseState state = DatetimeParseState(creso) - assert is_raise or is_ignore or is_coerce + assert is_raise or is_coerce _validate_fmt(fmt) format_regex, locale_time = _get_format_regex(fmt) @@ -806,14 +805,13 @@ def _array_strptime_object_fallback( object val tzinfo tz bint is_raise = errors=="raise" - bint is_ignore = errors=="ignore" bint is_coerce = errors=="coerce" bint iso_format = format_is_iso(fmt) NPY_DATETIMEUNIT creso, out_bestunit, item_reso int out_local = 0, out_tzoffset = 0 bint string_to_dts_succeeded = 0 - assert is_raise or is_ignore or is_coerce + assert is_raise or is_coerce item_reso = NPY_DATETIMEUNIT.NPY_FR_GENERIC format_regex, locale_time = _get_format_regex(fmt) @@ -922,9 +920,8 @@ def _array_strptime_object_fallback( if is_coerce: result[i] = NaT continue - elif is_raise: + else: raise - return values import warnings diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 4aea85d50c352..3aacd3099c334 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -902,6 +902,7 @@ def assert_series_equal( >>> tm.assert_series_equal(a, b) """ __tracebackhide__ = True + check_exact_index = False if check_exact is lib.no_default else check_exact if ( check_exact is lib.no_default and rtol is lib.no_default @@ -944,7 +945,7 @@ def assert_series_equal( right.index, exact=check_index_type, check_names=check_names, - check_exact=check_exact, + check_exact=check_exact_index, check_categorical=check_categorical, check_order=not check_like, rtol=rtol, diff --git a/pandas/_typing.py b/pandas/_typing.py index 00135bffbd435..9465631e9fe20 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -434,7 +434,7 @@ def closed(self) -> bool: # datetime and NaTType DatetimeNaTType = Union[datetime, "NaTType"] -DateTimeErrorChoices = Union[IgnoreRaise, Literal["coerce"]] +DateTimeErrorChoices = Literal["raise", "coerce"] # sort_index SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"] diff --git a/pandas/conftest.py b/pandas/conftest.py index 868bd365fa0ba..254d605e13460 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -28,6 +28,7 @@ timezone, ) from decimal import Decimal +import gc import operator import os from typing import ( @@ -1863,6 +1864,39 @@ def ip(): return InteractiveShell(config=c) +@pytest.fixture +def mpl_cleanup(): + """ + Ensure Matplotlib is cleaned up around a test. + + Before a test is run: + + 1) Set the backend to "template" to avoid requiring a GUI. + + After a test is run: + + 1) Reset units registry + 2) Reset rc_context + 3) Close all figures + + See matplotlib/testing/decorators.py#L24. + """ + mpl = pytest.importorskip("matplotlib") + mpl_units = pytest.importorskip("matplotlib.units") + plt = pytest.importorskip("matplotlib.pyplot") + orig_units_registry = mpl_units.registry.copy() + try: + with mpl.rc_context(): + mpl.use("template") + yield + finally: + mpl_units.registry.clear() + mpl_units.registry.update(orig_units_registry) + plt.close("all") + # https://matplotlib.org/stable/users/prev_whats_new/whats_new_3.6.0.html#garbage-collection-is-no-longer-run-on-figure-close # noqa: E501 + gc.collect(1) + + @pytest.fixture( params=[ getattr(pd.offsets, o) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 500f805499a8b..3672cdb13d4a3 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1635,7 +1635,6 @@ def map_array( arr: ArrayLike, mapper, na_action: Literal["ignore"] | None = None, - convert: bool = True, ) -> np.ndarray | ExtensionArray | Index: """ Map values using an input mapping or function. @@ -1647,9 +1646,6 @@ def map_array( na_action : {None, 'ignore'}, default None If 'ignore', propagate NA values, without passing them to the mapping correspondence. - convert : bool, default True - Try to find better dtype for elementwise function results. If - False, leave as dtype=object. Returns ------- @@ -1707,8 +1703,6 @@ def map_array( # we must convert to python types values = arr.astype(object, copy=False) if na_action is None: - return lib.map_infer(values, mapper, convert=convert) + return lib.map_infer(values, mapper) else: - return lib.map_infer_mask( - values, mapper, mask=isna(values).view(np.uint8), convert=convert - ) + return lib.map_infer_mask(values, mapper, mask=isna(values).view(np.uint8)) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 6265faade2281..0dabb76c2581d 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -16,7 +16,6 @@ import numpy as np -from pandas._libs import lib from pandas._libs.internals import BlockValuesRefs from pandas._typing import ( AggFuncType, @@ -1361,7 +1360,7 @@ def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame: result.index = res_index # infer dtypes - result = result.infer_objects(copy=False) + result = result.infer_objects() return result @@ -1376,23 +1375,10 @@ def __init__( obj: Series, func: AggFuncType, *, - convert_dtype: bool | lib.NoDefault = lib.no_default, by_row: Literal[False, "compat", "_compat"] = "compat", args, kwargs, ) -> None: - if convert_dtype is lib.no_default: - convert_dtype = True - else: - warnings.warn( - "the convert_dtype parameter is deprecated and will be removed in a " - "future version. Do ``ser.astype(object).apply()`` " - "instead if you want ``convert_dtype=False``.", - FutureWarning, - stacklevel=find_stack_level(), - ) - self.convert_dtype = convert_dtype - super().__init__( obj, func, @@ -1486,9 +1472,7 @@ def curried(x): # TODO: remove the `na_action="ignore"` when that default has been changed in # Categorical (GH51645). action = "ignore" if isinstance(obj.dtype, CategoricalDtype) else None - mapped = obj._map_values( - mapper=curried, na_action=action, convert=self.convert_dtype - ) + mapped = obj._map_values(mapper=curried, na_action=action) if len(mapped) and isinstance(mapped[0], ABCSeries): # GH#43986 Need to do list(mapped) in order to get treated as nested @@ -1873,7 +1857,7 @@ def relabel_result( # assign the new user-provided "named aggregation" as index names, and reindex # it based on the whole user-provided names. s.index = reordered_indexes[idx : idx + len(fun)] - reordered_result_in_dict[col] = s.reindex(columns, copy=False) + reordered_result_in_dict[col] = s.reindex(columns) idx = idx + len(fun) return reordered_result_in_dict diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index bc8d170b73fd0..29681539d146b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2394,7 +2394,7 @@ def objects_to_datetime64( yearfirst : bool utc : bool, default False Whether to convert/localize timestamps to UTC. - errors : {'raise', 'ignore', 'coerce'} + errors : {'raise', 'coerce'} allow_object : bool Whether to return an object-dtype ndarray instead of raising if the data contains more than one timezone. @@ -2414,7 +2414,7 @@ def objects_to_datetime64( ValueError : if data cannot be converted to datetimes TypeError : When a type cannot be converted to datetime """ - assert errors in ["raise", "ignore", "coerce"] + assert errors in ["raise", "coerce"] # if str-dtype, convert data = np.array(data, copy=False, dtype=np.object_) diff --git a/pandas/core/base.py b/pandas/core/base.py index cb8848db9af60..b9a57de073595 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -8,6 +8,7 @@ from typing import ( TYPE_CHECKING, Any, + Callable, Generic, Literal, cast, @@ -18,8 +19,6 @@ import numpy as np -from pandas._config import using_copy_on_write - from pandas._libs import lib from pandas._typing import ( AxisInt, @@ -106,7 +105,7 @@ class PandasObject(DirNamesMixin): _cache: dict[str, Any] @property - def _constructor(self): + def _constructor(self) -> Callable[..., Self]: """ Class constructor (for this class it's just `__class__`). """ @@ -661,10 +660,10 @@ def to_numpy( result = np.asarray(values, dtype=dtype) - if (copy and not fillna) or (not copy and using_copy_on_write()): + if (copy and not fillna) or not copy: if np.shares_memory(self._values[:2], result[:2]): # Take slices to improve performance of check - if using_copy_on_write() and not copy: + if not copy: result = result.view() result.flags.writeable = False else: @@ -802,7 +801,7 @@ def argmin( # "int") return result # type: ignore[return-value] - def tolist(self): + def tolist(self) -> list: """ Return a list of the values. @@ -896,7 +895,7 @@ def hasnans(self) -> bool: return bool(isna(self).any()) # type: ignore[union-attr] @final - def _map_values(self, mapper, na_action=None, convert: bool = True): + def _map_values(self, mapper, na_action=None): """ An internal function that maps values using the input correspondence (which can be a dict, Series, or function). @@ -908,10 +907,6 @@ def _map_values(self, mapper, na_action=None, convert: bool = True): na_action : {None, 'ignore'} If 'ignore', propagate NA values, without passing them to the mapping function - convert : bool, default True - Try to find better dtype for elementwise function results. If - False, leave as dtype=object. Note that the dtype is always - preserved for some extension array dtypes, such as Categorical. Returns ------- @@ -925,7 +920,7 @@ def _map_values(self, mapper, na_action=None, convert: bool = True): if isinstance(arr, ExtensionArray): return arr.map(mapper, na_action=na_action) - return algorithms.map_array(arr, mapper, na_action=na_action, convert=convert) + return algorithms.map_array(arr, mapper, na_action=na_action) @final def value_counts( diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index ea293538bae8f..a666e3e61d98a 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -134,7 +134,7 @@ def _align_core(terms): w, category=PerformanceWarning, stacklevel=find_stack_level() ) - obj = ti.reindex(reindexer, axis=axis, copy=False) + obj = ti.reindex(reindexer, axis=axis) terms[i].update(obj) terms[i].update(terms[i].value.values) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a62c3c8cd537a..3a207a7e57fe8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5545,6 +5545,9 @@ def shift( ) fill_value = lib.no_default + if self.empty: + return self.copy() + axis = self._get_axis_number(axis) if is_list_like(periods): @@ -10560,7 +10563,7 @@ def round( """ from pandas.core.reshape.concat import concat - def _dict_round(df: DataFrame, decimals): + def _dict_round(df: DataFrame, decimals) -> Iterator[Series]: for col, vals in df.items(): try: yield _series_round(vals, decimals[col]) @@ -10996,7 +10999,7 @@ def c(x): # ---------------------------------------------------------------------- # ndarray-like stats methods - def count(self, axis: Axis = 0, numeric_only: bool = False): + def count(self, axis: Axis = 0, numeric_only: bool = False) -> Series: """ Count non-NA cells for each column or row. @@ -11242,9 +11245,42 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series: res_ser = self._constructor_sliced(result, index=self.index, copy=False) return res_ser - @doc(make_doc("any", ndim=2)) # error: Signature of "any" incompatible with supertype "NDFrame" - def any( # type: ignore[override] + @overload # type: ignore[override] + def any( + self, + *, + axis: Axis = ..., + bool_only: bool = ..., + skipna: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def any( + self, + *, + axis: None, + bool_only: bool = ..., + skipna: bool = ..., + **kwargs, + ) -> bool: + ... + + @overload + def any( + self, + *, + axis: Axis | None, + bool_only: bool = ..., + skipna: bool = ..., + **kwargs, + ) -> Series | bool: + ... + + @doc(make_doc("any", ndim=2)) + def any( self, *, axis: Axis | None = 0, @@ -11259,6 +11295,39 @@ def any( # type: ignore[override] result = result.__finalize__(self, method="any") return result + @overload + def all( + self, + *, + axis: Axis = ..., + bool_only: bool = ..., + skipna: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def all( + self, + *, + axis: None, + bool_only: bool = ..., + skipna: bool = ..., + **kwargs, + ) -> bool: + ... + + @overload + def all( + self, + *, + axis: Axis | None, + bool_only: bool = ..., + skipna: bool = ..., + **kwargs, + ) -> Series | bool: + ... + @doc(make_doc("all", ndim=2)) def all( self, @@ -11274,6 +11343,40 @@ def all( result = result.__finalize__(self, method="all") return result + # error: Signature of "min" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def min( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def min( + self, + *, + axis: None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def min( + self, + *, + axis: Axis | None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + @doc(make_doc("min", ndim=2)) def min( self, @@ -11281,12 +11384,48 @@ def min( skipna: bool = True, numeric_only: bool = False, **kwargs, - ): - result = super().min(axis, skipna, numeric_only, **kwargs) + ) -> Series | Any: + result = super().min( + axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="min") return result + # error: Signature of "max" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def max( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def max( + self, + *, + axis: None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def max( + self, + *, + axis: Axis | None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + @doc(make_doc("max", ndim=2)) def max( self, @@ -11294,8 +11433,10 @@ def max( skipna: bool = True, numeric_only: bool = False, **kwargs, - ): - result = super().max(axis, skipna, numeric_only, **kwargs) + ) -> Series | Any: + result = super().max( + axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="max") return result @@ -11308,8 +11449,14 @@ def sum( numeric_only: bool = False, min_count: int = 0, **kwargs, - ): - result = super().sum(axis, skipna, numeric_only, min_count, **kwargs) + ) -> Series: + result = super().sum( + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + min_count=min_count, + **kwargs, + ) return result.__finalize__(self, method="sum") @doc(make_doc("prod", ndim=2)) @@ -11320,10 +11467,50 @@ def prod( numeric_only: bool = False, min_count: int = 0, **kwargs, - ): - result = super().prod(axis, skipna, numeric_only, min_count, **kwargs) + ) -> Series: + result = super().prod( + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + min_count=min_count, + **kwargs, + ) return result.__finalize__(self, method="prod") + # error: Signature of "mean" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def mean( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def mean( + self, + *, + axis: None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def mean( + self, + *, + axis: Axis | None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + @doc(make_doc("mean", ndim=2)) def mean( self, @@ -11331,12 +11518,48 @@ def mean( skipna: bool = True, numeric_only: bool = False, **kwargs, - ): - result = super().mean(axis, skipna, numeric_only, **kwargs) + ) -> Series | Any: + result = super().mean( + axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="mean") return result + # error: Signature of "median" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def median( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def median( + self, + *, + axis: None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def median( + self, + *, + axis: Axis | None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + @doc(make_doc("median", ndim=2)) def median( self, @@ -11344,12 +11567,51 @@ def median( skipna: bool = True, numeric_only: bool = False, **kwargs, - ): - result = super().median(axis, skipna, numeric_only, **kwargs) + ) -> Series | Any: + result = super().median( + axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="median") return result + # error: Signature of "sem" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def sem( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def sem( + self, + *, + axis: None, + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def sem( + self, + *, + axis: Axis | None, + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + @doc(make_doc("sem", ndim=2)) def sem( self, @@ -11358,12 +11620,51 @@ def sem( ddof: int = 1, numeric_only: bool = False, **kwargs, - ): - result = super().sem(axis, skipna, ddof, numeric_only, **kwargs) + ) -> Series | Any: + result = super().sem( + axis=axis, skipna=skipna, ddof=ddof, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="sem") return result + # error: Signature of "var" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def var( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def var( + self, + *, + axis: None, + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def var( + self, + *, + axis: Axis | None, + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + @doc(make_doc("var", ndim=2)) def var( self, @@ -11372,12 +11673,51 @@ def var( ddof: int = 1, numeric_only: bool = False, **kwargs, - ): - result = super().var(axis, skipna, ddof, numeric_only, **kwargs) + ) -> Series | Any: + result = super().var( + axis=axis, skipna=skipna, ddof=ddof, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="var") return result + # error: Signature of "std" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def std( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def std( + self, + *, + axis: None, + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def std( + self, + *, + axis: Axis | None, + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + @doc(make_doc("std", ndim=2)) def std( self, @@ -11386,12 +11726,48 @@ def std( ddof: int = 1, numeric_only: bool = False, **kwargs, - ): - result = super().std(axis, skipna, ddof, numeric_only, **kwargs) + ) -> Series | Any: + result = super().std( + axis=axis, skipna=skipna, ddof=ddof, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="std") return result + # error: Signature of "skew" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def skew( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def skew( + self, + *, + axis: None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def skew( + self, + *, + axis: Axis | None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + @doc(make_doc("skew", ndim=2)) def skew( self, @@ -11399,12 +11775,48 @@ def skew( skipna: bool = True, numeric_only: bool = False, **kwargs, - ): - result = super().skew(axis, skipna, numeric_only, **kwargs) + ) -> Series | Any: + result = super().skew( + axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="skew") return result + # error: Signature of "kurt" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def kurt( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def kurt( + self, + *, + axis: None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def kurt( + self, + *, + axis: Axis | None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + @doc(make_doc("kurt", ndim=2)) def kurt( self, @@ -11412,13 +11824,16 @@ def kurt( skipna: bool = True, numeric_only: bool = False, **kwargs, - ): - result = super().kurt(axis, skipna, numeric_only, **kwargs) + ) -> Series | Any: + result = super().kurt( + axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="kurt") return result - kurtosis = kurt + # error: Incompatible types in assignment + kurtosis = kurt # type: ignore[assignment] product = prod @doc(make_doc("cummin", ndim=2)) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8e0767d9b3699..af09eb1656683 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -161,7 +161,6 @@ ensure_index, ) from pandas.core.internals import BlockManager -from pandas.core.internals.construction import ndarray_to_mgr from pandas.core.methods.describe import describe_ndframe from pandas.core.missing import ( clean_fill_method, @@ -755,66 +754,6 @@ def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None: labels = ensure_index(labels) self._mgr.set_axis(axis, labels) - @final - def swapaxes(self, axis1: Axis, axis2: Axis, copy: bool | None = None) -> Self: - """ - Interchange axes and swap values axes appropriately. - - .. deprecated:: 2.1.0 - ``swapaxes`` is deprecated and will be removed. - Please use ``transpose`` instead. - - Returns - ------- - same as input - - Examples - -------- - Please see examples for :meth:`DataFrame.transpose`. - """ - warnings.warn( - # GH#51946 - f"'{type(self).__name__}.swapaxes' is deprecated and " - "will be removed in a future version. " - f"Please use '{type(self).__name__}.transpose' instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - - i = self._get_axis_number(axis1) - j = self._get_axis_number(axis2) - - if i == j: - return self.copy(deep=False) - - mapping = {i: j, j: i} - - new_axes = [self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN)] - new_values = self._values.swapaxes(i, j) # type: ignore[union-attr] - if self._mgr.is_single_block and isinstance(self._mgr, BlockManager): - # This should only get hit in case of having a single block, otherwise a - # copy is made, we don't have to set up references. - new_mgr = ndarray_to_mgr( - new_values, - new_axes[0], - new_axes[1], - dtype=None, - copy=False, - ) - assert isinstance(new_mgr, BlockManager) - assert isinstance(self._mgr, BlockManager) - new_mgr.blocks[0].refs = self._mgr.blocks[0].refs - new_mgr.blocks[0].refs.add_reference(new_mgr.blocks[0]) - out = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) - return out.__finalize__(self, method="swapaxes") - - return self._constructor( - new_values, - *new_axes, - # The no-copy case for CoW is handled above - copy=False, - ).__finalize__(self, method="swapaxes") - @final @doc(klass=_shared_doc_kwargs["klass"]) def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self: @@ -11607,6 +11546,7 @@ def _logical_func( def any( self, + *, axis: Axis | None = 0, bool_only: bool = False, skipna: bool = True, @@ -11618,6 +11558,7 @@ def any( def all( self, + *, axis: Axis = 0, bool_only: bool = False, skipna: bool = True, @@ -11721,6 +11662,7 @@ def _stat_function_ddof( def sem( self, + *, axis: Axis | None = 0, skipna: bool = True, ddof: int = 1, @@ -11733,6 +11675,7 @@ def sem( def var( self, + *, axis: Axis | None = 0, skipna: bool = True, ddof: int = 1, @@ -11745,6 +11688,7 @@ def var( def std( self, + *, axis: Axis | None = 0, skipna: bool = True, ddof: int = 1, @@ -11776,6 +11720,7 @@ def _stat_function( def min( self, + *, axis: Axis | None = 0, skipna: bool = True, numeric_only: bool = False, @@ -11792,6 +11737,7 @@ def min( def max( self, + *, axis: Axis | None = 0, skipna: bool = True, numeric_only: bool = False, @@ -11808,6 +11754,7 @@ def max( def mean( self, + *, axis: Axis | None = 0, skipna: bool = True, numeric_only: bool = False, @@ -11819,6 +11766,7 @@ def mean( def median( self, + *, axis: Axis | None = 0, skipna: bool = True, numeric_only: bool = False, @@ -11830,6 +11778,7 @@ def median( def skew( self, + *, axis: Axis | None = 0, skipna: bool = True, numeric_only: bool = False, @@ -11841,6 +11790,7 @@ def skew( def kurt( self, + *, axis: Axis | None = 0, skipna: bool = True, numeric_only: bool = False, @@ -11893,6 +11843,7 @@ def _min_count_stat_function( def sum( self, + *, axis: Axis | None = 0, skipna: bool = True, numeric_only: bool = False, @@ -11905,6 +11856,7 @@ def sum( def prod( self, + *, axis: Axis | None = 0, skipna: bool = True, numeric_only: bool = False, diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ca8bfcb0c81af..30559ae91ce41 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1648,7 +1648,7 @@ def _wrap_applied_output( res_index = self._grouper.result_index result = self.obj._constructor(index=res_index, columns=data.columns) - result = result.astype(data.dtypes, copy=False) + result = result.astype(data.dtypes) return result # GH12824 @@ -1815,7 +1815,7 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs): concat_index = obj.columns concatenated = concat(applied, axis=0, verify_integrity=False) - concatenated = concatenated.reindex(concat_index, axis=1, copy=False) + concatenated = concatenated.reindex(concat_index, axis=1) return self._set_result_index_ordered(concatenated) __examples_dataframe_doc = dedent( diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 2ac069deaad36..1be1cdfcd0f50 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -47,6 +47,7 @@ class providing the base-class of operations. DtypeObj, FillnaOptions, IndexLabel, + IntervalClosedType, NDFrameT, PositionalIndexer, RandomState, @@ -139,6 +140,7 @@ class providing the base-class of operations. ) if TYPE_CHECKING: + from pandas._libs.tslibs import BaseOffset from pandas._typing import ( Any, Concatenate, @@ -147,6 +149,7 @@ class providing the base-class of operations. T, ) + from pandas.core.indexers.objects import BaseIndexer from pandas.core.resample import Resampler from pandas.core.window import ( ExpandingGroupby, @@ -1241,7 +1244,7 @@ def _concat_objects( indexer, _ = result.index.get_indexer_non_unique(target) result = result.take(indexer, axis=0) else: - result = result.reindex(ax, axis=0, copy=False) + result = result.reindex(ax, axis=0) else: result = concat(values, axis=0) @@ -1269,18 +1272,18 @@ def _set_result_index_ordered( if self._grouper.is_monotonic and not self._grouper.has_dropped_na: # shortcut if we have an already ordered grouper - result = result.set_axis(index, axis=0, copy=False) + result = result.set_axis(index, axis=0) return result # row order is scrambled => sort the rows by position in original index original_positions = Index(self._grouper.result_ilocs) - result = result.set_axis(original_positions, axis=0, copy=False) + result = result.set_axis(original_positions, axis=0) result = result.sort_index(axis=0) if self._grouper.has_dropped_na: # Add back in any missing rows due to dropna - index here is integral # with values referring to the row of the input so can use RangeIndex result = result.reindex(RangeIndex(len(index)), axis=0) - result = result.set_axis(index, axis=0, copy=False) + result = result.set_axis(index, axis=0) return result @@ -1913,7 +1916,7 @@ def _wrap_transform_fast_result(self, result: NDFrameT) -> NDFrameT: # for each col, reshape to size of original frame by take operation ids = self._grouper.ids - result = result.reindex(self._grouper.result_index, axis=0, copy=False) + result = result.reindex(self._grouper.result_index, axis=0) if self.obj.ndim == 1: # i.e. SeriesGroupBy @@ -3616,7 +3619,16 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp ) @final - def rolling(self, *args, **kwargs) -> RollingGroupby: + def rolling( + self, + window: int | datetime.timedelta | str | BaseOffset | BaseIndexer, + min_periods: int | None = None, + center: bool = False, + win_type: str | None = None, + on: str | None = None, + closed: IntervalClosedType | None = None, + method: str = "single", + ) -> RollingGroupby: """ Return a rolling grouper, providing rolling functionality per group. @@ -3746,10 +3758,15 @@ def rolling(self, *args, **kwargs) -> RollingGroupby: return RollingGroupby( self._selected_obj, - *args, + window=window, + min_periods=min_periods, + center=center, + win_type=win_type, + on=on, + closed=closed, + method=method, _grouper=self._grouper, _as_index=self.as_index, - **kwargs, ) @final diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 15292953e72d0..a8887a21afa34 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -74,7 +74,6 @@ def get_objs_combined_axis( intersect: bool = False, axis: Axis = 0, sort: bool = True, - copy: bool = False, ) -> Index: """ Extract combined index: return intersection or union (depending on the @@ -92,15 +91,13 @@ def get_objs_combined_axis( The axis to extract indexes from. sort : bool, default True Whether the result index should come out sorted or not. - copy : bool, default False - If True, return a copy of the combined index. Returns ------- Index """ obs_idxes = [obj._get_axis(axis) for obj in objs] - return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy) + return _get_combined_index(obs_idxes, intersect=intersect, sort=sort) def _get_distinct_objs(objs: list[Index]) -> list[Index]: @@ -121,7 +118,6 @@ def _get_combined_index( indexes: list[Index], intersect: bool = False, sort: bool = False, - copy: bool = False, ) -> Index: """ Return the union or intersection of indexes. @@ -135,8 +131,6 @@ def _get_combined_index( calculate the union. sort : bool, default False Whether the result index should come out sorted or not. - copy : bool, default False - If True, return a copy of the combined index. Returns ------- @@ -158,10 +152,6 @@ def _get_combined_index( if sort: index = safe_sort_index(index) - # GH 29879 - if copy: - index = index.copy() - return index diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e2b3666ea9d85..d9f545969d9f7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -22,7 +22,6 @@ from pandas._config import ( get_option, - using_copy_on_write, using_pyarrow_string_dtype, ) @@ -1633,7 +1632,7 @@ def to_frame( if name is lib.no_default: name = self._get_level_names() - result = DataFrame({name: self}, copy=not using_copy_on_write()) + result = DataFrame({name: self}, copy=False) if index: result.index = self @@ -4773,13 +4772,11 @@ def values(self) -> ArrayLike: [(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] Length: 5, dtype: interval[int64, right] """ - if using_copy_on_write(): - data = self._data - if isinstance(data, np.ndarray): - data = data.view() - data.flags.writeable = False - return data - return self._data + data = self._data + if isinstance(data, np.ndarray): + data = data.view() + data.flags.writeable = False + return data @cache_readonly @doc(IndexOpsMixin.array) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 17a8bcd702ce1..8242dc49e6e0d 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -16,8 +16,6 @@ import numpy as np -from pandas._config import using_copy_on_write - from pandas._libs import ( NaT, Timedelta, @@ -454,9 +452,8 @@ def _with_freq(self, freq): def values(self) -> np.ndarray: # NB: For Datetime64TZ this is lossy data = self._data._ndarray - if using_copy_on_write(): - data = data.view() - data.flags.writeable = False + data = data.view() + data.flags.writeable = False return data @doc(DatetimeIndexOpsMixin.shift) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b127f62faf827..c6759a9f54509 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -13,8 +13,6 @@ import numpy as np -from pandas._config import using_copy_on_write - from pandas._libs.indexing import NDFrameIndexerBase from pandas._libs.lib import item_from_zerodim from pandas.compat import PYPY @@ -894,7 +892,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None: @final def __setitem__(self, key, value) -> None: - if not PYPY and using_copy_on_write(): + if not PYPY: if sys.getrefcount(self.obj) <= 2: warnings.warn( _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 @@ -2107,7 +2105,7 @@ def _setitem_with_indexer_frame_value( tuple(sub_indexer), value[item], multiindex_indexer, - using_cow=using_copy_on_write(), + using_cow=True, ) else: val = np.nan @@ -2280,7 +2278,7 @@ def _setitem_with_indexer_missing(self, indexer, value): has_dtype = hasattr(value, "dtype") if isinstance(value, ABCSeries): # append a Series - value = value.reindex(index=self.obj.columns, copy=True) + value = value.reindex(index=self.obj.columns) value.name = indexer elif isinstance(value, dict): value = Series( @@ -2310,7 +2308,7 @@ def _setitem_with_indexer_missing(self, indexer, value): if not has_dtype: # i.e. if we already had a Series or ndarray, keep that # dtype. But if we had a list or dict, then do inference - df = df.infer_objects(copy=False) + df = df.infer_objects() self.obj._mgr = df._mgr else: self.obj._mgr = self.obj._append(value)._mgr @@ -2383,7 +2381,8 @@ def ravel(i): # we have a frame, with multiple indexers on both axes; and a # series, so need to broadcast (see GH5206) if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer): - ser_values = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values + # TODO(CoW): copy shouldn't be needed here + ser_values = ser.reindex(obj.axes[0][indexer[0]]).copy()._values # single indexer if len(indexer) > 1 and not multiindex_indexer: diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index ff5e8e35f92ab..047c25f4931a6 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -519,11 +519,11 @@ def _homogenize( for val in data: if isinstance(val, (ABCSeries, Index)): if dtype is not None: - val = val.astype(dtype, copy=False) + val = val.astype(dtype) if isinstance(val, ABCSeries) and val.index is not index: # Forces alignment. No need to copy data since we # are putting it into an ndarray later - val = val.reindex(index, copy=False) + val = val.reindex(index) refs.append(val._references) val = val._values else: diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index c620bb9d17976..337b2f7952213 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -173,7 +173,7 @@ def describe(self, percentiles: Sequence[float] | np.ndarray) -> DataFrame: col_names = reorder_columns(ldesc) d = concat( - [x.reindex(col_names, copy=False) for x in ldesc], + [x.reindex(col_names) for x in ldesc], axis=1, sort=False, ) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 88c2762e12940..cbe280b9184c3 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -631,8 +631,8 @@ def nearest(self, limit: int | None = None): See Also -------- - backfill : Backward fill the new missing values in the resampled data. - pad : Forward fill ``NaN`` values. + bfill : Backward fill the new missing values in the resampled data. + ffill : Forward fill ``NaN`` values. Examples -------- @@ -686,9 +686,6 @@ def bfill(self, limit: int | None = None): See Also -------- - bfill : Alias of backfill. - fillna : Fill NaN values using the specified method, which can be - 'backfill'. nearest : Fill NaN values with nearest neighbor starting from center. ffill : Forward fill NaN values. Series.fillna : Fill NaN values in the Series using the @@ -767,177 +764,6 @@ def bfill(self, limit: int | None = None): """ return self._upsample("bfill", limit=limit) - @final - def fillna(self, method, limit: int | None = None): - """ - Fill missing values introduced by upsampling. - - In statistics, imputation is the process of replacing missing data with - substituted values [1]_. When resampling data, missing values may - appear (e.g., when the resampling frequency is higher than the original - frequency). - - Missing values that existed in the original data will - not be modified. - - Parameters - ---------- - method : {'pad', 'backfill', 'ffill', 'bfill', 'nearest'} - Method to use for filling holes in resampled data - - * 'pad' or 'ffill': use previous valid observation to fill gap - (forward fill). - * 'backfill' or 'bfill': use next valid observation to fill gap. - * 'nearest': use nearest valid observation to fill gap. - - limit : int, optional - Limit of how many consecutive missing values to fill. - - Returns - ------- - Series or DataFrame - An upsampled Series or DataFrame with missing values filled. - - See Also - -------- - bfill : Backward fill NaN values in the resampled data. - ffill : Forward fill NaN values in the resampled data. - nearest : Fill NaN values in the resampled data - with nearest neighbor starting from center. - interpolate : Fill NaN values using interpolation. - Series.fillna : Fill NaN values in the Series using the - specified method, which can be 'bfill' and 'ffill'. - DataFrame.fillna : Fill NaN values in the DataFrame using the - specified method, which can be 'bfill' and 'ffill'. - - References - ---------- - .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics) - - Examples - -------- - Resampling a Series: - - >>> s = pd.Series( - ... [1, 2, 3], index=pd.date_range("20180101", periods=3, freq="h") - ... ) - >>> s - 2018-01-01 00:00:00 1 - 2018-01-01 01:00:00 2 - 2018-01-01 02:00:00 3 - Freq: h, dtype: int64 - - Without filling the missing values you get: - - >>> s.resample("30min").asfreq() - 2018-01-01 00:00:00 1.0 - 2018-01-01 00:30:00 NaN - 2018-01-01 01:00:00 2.0 - 2018-01-01 01:30:00 NaN - 2018-01-01 02:00:00 3.0 - Freq: 30min, dtype: float64 - - >>> s.resample("30min").fillna("backfill") - 2018-01-01 00:00:00 1 - 2018-01-01 00:30:00 2 - 2018-01-01 01:00:00 2 - 2018-01-01 01:30:00 3 - 2018-01-01 02:00:00 3 - Freq: 30min, dtype: int64 - - >>> s.resample("15min").fillna("backfill", limit=2) - 2018-01-01 00:00:00 1.0 - 2018-01-01 00:15:00 NaN - 2018-01-01 00:30:00 2.0 - 2018-01-01 00:45:00 2.0 - 2018-01-01 01:00:00 2.0 - 2018-01-01 01:15:00 NaN - 2018-01-01 01:30:00 3.0 - 2018-01-01 01:45:00 3.0 - 2018-01-01 02:00:00 3.0 - Freq: 15min, dtype: float64 - - >>> s.resample("30min").fillna("pad") - 2018-01-01 00:00:00 1 - 2018-01-01 00:30:00 1 - 2018-01-01 01:00:00 2 - 2018-01-01 01:30:00 2 - 2018-01-01 02:00:00 3 - Freq: 30min, dtype: int64 - - >>> s.resample("30min").fillna("nearest") - 2018-01-01 00:00:00 1 - 2018-01-01 00:30:00 2 - 2018-01-01 01:00:00 2 - 2018-01-01 01:30:00 3 - 2018-01-01 02:00:00 3 - Freq: 30min, dtype: int64 - - Missing values present before the upsampling are not affected. - - >>> sm = pd.Series( - ... [1, None, 3], index=pd.date_range("20180101", periods=3, freq="h") - ... ) - >>> sm - 2018-01-01 00:00:00 1.0 - 2018-01-01 01:00:00 NaN - 2018-01-01 02:00:00 3.0 - Freq: h, dtype: float64 - - >>> sm.resample("30min").fillna("backfill") - 2018-01-01 00:00:00 1.0 - 2018-01-01 00:30:00 NaN - 2018-01-01 01:00:00 NaN - 2018-01-01 01:30:00 3.0 - 2018-01-01 02:00:00 3.0 - Freq: 30min, dtype: float64 - - >>> sm.resample("30min").fillna("pad") - 2018-01-01 00:00:00 1.0 - 2018-01-01 00:30:00 1.0 - 2018-01-01 01:00:00 NaN - 2018-01-01 01:30:00 NaN - 2018-01-01 02:00:00 3.0 - Freq: 30min, dtype: float64 - - >>> sm.resample("30min").fillna("nearest") - 2018-01-01 00:00:00 1.0 - 2018-01-01 00:30:00 NaN - 2018-01-01 01:00:00 NaN - 2018-01-01 01:30:00 3.0 - 2018-01-01 02:00:00 3.0 - Freq: 30min, dtype: float64 - - DataFrame resampling is done column-wise. All the same options are - available. - - >>> df = pd.DataFrame( - ... {"a": [2, np.nan, 6], "b": [1, 3, 5]}, - ... index=pd.date_range("20180101", periods=3, freq="h"), - ... ) - >>> df - a b - 2018-01-01 00:00:00 2.0 1 - 2018-01-01 01:00:00 NaN 3 - 2018-01-01 02:00:00 6.0 5 - - >>> df.resample("30min").fillna("bfill") - a b - 2018-01-01 00:00:00 2.0 1 - 2018-01-01 00:30:00 NaN 3 - 2018-01-01 01:00:00 NaN 3 - 2018-01-01 01:30:00 6.0 5 - 2018-01-01 02:00:00 6.0 5 - """ - warnings.warn( - f"{type(self).__name__}.fillna is deprecated and will be removed " - "in a future version. Use obj.ffill(), obj.bfill(), " - "or obj.nearest() instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return self._upsample(method, limit=limit) - @final def interpolate( self, @@ -1808,11 +1634,6 @@ def _upsample(self, method, limit: int | None = None, fill_value=None): Maximum size gap to fill when reindexing fill_value : scalar, default None Value to use for missing values - - See Also - -------- - .fillna: Fill NA/NaN values using the specified method. - """ if self._from_selection: raise ValueError( @@ -1961,11 +1782,6 @@ def _upsample(self, method, limit: int | None = None, fill_value=None): Maximum size gap to fill when reindexing. fill_value : scalar, default None Value to use for missing values. - - See Also - -------- - .fillna: Fill NA/NaN values using the specified method. - """ # we may need to actually resample as if we are timestamps if self.kind == "timestamp": diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index d47cb3f40f341..88323e5304cc4 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -15,8 +15,6 @@ import numpy as np -from pandas._config import using_copy_on_write - from pandas.util._decorators import cache_readonly from pandas.util._exceptions import find_stack_level @@ -370,13 +368,6 @@ def concat( 0 1 2 1 3 4 """ - if copy is None: - if using_copy_on_write(): - copy = False - else: - copy = True - elif copy and using_copy_on_write(): - copy = False op = _Concatenator( objs, @@ -387,7 +378,6 @@ def concat( levels=levels, names=names, verify_integrity=verify_integrity, - copy=copy, sort=sort, ) @@ -411,7 +401,6 @@ def __init__( names: list[HashableT] | None = None, ignore_index: bool = False, verify_integrity: bool = False, - copy: bool = True, sort: bool = False, ) -> None: if isinstance(objs, (ABCSeries, ABCDataFrame, str)): @@ -439,7 +428,6 @@ def __init__( self.ignore_index = ignore_index self.verify_integrity = verify_integrity - self.copy = copy objs, keys = self._clean_keys_and_objs(objs, keys) @@ -656,7 +644,7 @@ def get_result(self): cons = sample._constructor_expanddim index, columns = self.new_axes - df = cons(data, index=index, copy=self.copy) + df = cons(data, index=index, copy=False) df.columns = columns return df.__finalize__(self, method="concat") @@ -681,10 +669,8 @@ def get_result(self): mgrs_indexers.append((obj._mgr, indexers)) new_data = concatenate_managers( - mgrs_indexers, self.new_axes, concat_axis=self.bm_axis, copy=self.copy + mgrs_indexers, self.new_axes, concat_axis=self.bm_axis, copy=False ) - if not self.copy and not using_copy_on_write(): - new_data._consolidate_inplace() out = sample._constructor_from_mgr(new_data, axes=new_data.axes) return out.__finalize__(self, method="concat") @@ -710,7 +696,6 @@ def _get_comb_axis(self, i: AxisInt) -> Index: axis=data_axis, intersect=self.intersect, sort=self.sort, - copy=self.copy, ) @cache_readonly diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index fae5c082c72a0..d6473f695a667 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -6,10 +6,7 @@ Iterable, ) import itertools -from typing import ( - TYPE_CHECKING, - cast, -) +from typing import TYPE_CHECKING import numpy as np @@ -339,7 +336,7 @@ def get_empty_frame(data) -> DataFrame: ) sparse_series.append(Series(data=sarr, index=index, name=col, copy=False)) - return concat(sparse_series, axis=1, copy=False) + return concat(sparse_series, axis=1) else: # ensure ndarray layout is column-major @@ -492,7 +489,7 @@ def from_dummies( f"Received 'data' of type: {type(data).__name__}" ) - col_isna_mask = cast(Series, data.isna().any()) + col_isna_mask = data.isna().any() if col_isna_mask.any(): raise ValueError( diff --git a/pandas/core/series.py b/pandas/core/series.py index d5eaa2125b301..de76c05ef7943 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4481,7 +4481,6 @@ def transform( def apply( self, func: AggFuncType, - convert_dtype: bool | lib.NoDefault = lib.no_default, args: tuple[Any, ...] = (), *, by_row: Literal[False, "compat"] = "compat", @@ -4497,14 +4496,6 @@ def apply( ---------- func : function Python function or NumPy ufunc to apply. - convert_dtype : bool, default True - Try to find better dtype for elementwise function results. If - False, leave as dtype=object. Note that the dtype is always - preserved for some extension array dtypes, such as Categorical. - - .. deprecated:: 2.1.0 - ``convert_dtype`` has been deprecated. Do ``ser.astype(object).apply()`` - instead if you want ``convert_dtype=False``. args : tuple Positional arguments passed to func after the series value. by_row : False or "compat", default "compat" @@ -4608,7 +4599,6 @@ def apply( return SeriesApply( self, func, - convert_dtype=convert_dtype, by_row=by_row, args=args, kwargs=kwargs, @@ -6251,7 +6241,9 @@ def min( numeric_only: bool = False, **kwargs, ): - return NDFrame.min(self, axis, skipna, numeric_only, **kwargs) + return NDFrame.min( + self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) @doc(make_doc("max", ndim=1)) def max( @@ -6261,7 +6253,9 @@ def max( numeric_only: bool = False, **kwargs, ): - return NDFrame.max(self, axis, skipna, numeric_only, **kwargs) + return NDFrame.max( + self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) @doc(make_doc("sum", ndim=1)) def sum( @@ -6272,7 +6266,14 @@ def sum( min_count: int = 0, **kwargs, ): - return NDFrame.sum(self, axis, skipna, numeric_only, min_count, **kwargs) + return NDFrame.sum( + self, + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + min_count=min_count, + **kwargs, + ) @doc(make_doc("prod", ndim=1)) def prod( @@ -6283,7 +6284,14 @@ def prod( min_count: int = 0, **kwargs, ): - return NDFrame.prod(self, axis, skipna, numeric_only, min_count, **kwargs) + return NDFrame.prod( + self, + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + min_count=min_count, + **kwargs, + ) @doc(make_doc("mean", ndim=1)) def mean( @@ -6293,7 +6301,9 @@ def mean( numeric_only: bool = False, **kwargs, ): - return NDFrame.mean(self, axis, skipna, numeric_only, **kwargs) + return NDFrame.mean( + self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) @doc(make_doc("median", ndim=1)) def median( @@ -6303,7 +6313,9 @@ def median( numeric_only: bool = False, **kwargs, ): - return NDFrame.median(self, axis, skipna, numeric_only, **kwargs) + return NDFrame.median( + self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) @doc(make_doc("sem", ndim=1)) def sem( @@ -6314,7 +6326,14 @@ def sem( numeric_only: bool = False, **kwargs, ): - return NDFrame.sem(self, axis, skipna, ddof, numeric_only, **kwargs) + return NDFrame.sem( + self, + axis=axis, + skipna=skipna, + ddof=ddof, + numeric_only=numeric_only, + **kwargs, + ) @doc(make_doc("var", ndim=1)) def var( @@ -6325,7 +6344,14 @@ def var( numeric_only: bool = False, **kwargs, ): - return NDFrame.var(self, axis, skipna, ddof, numeric_only, **kwargs) + return NDFrame.var( + self, + axis=axis, + skipna=skipna, + ddof=ddof, + numeric_only=numeric_only, + **kwargs, + ) @doc(make_doc("std", ndim=1)) def std( @@ -6336,7 +6362,14 @@ def std( numeric_only: bool = False, **kwargs, ): - return NDFrame.std(self, axis, skipna, ddof, numeric_only, **kwargs) + return NDFrame.std( + self, + axis=axis, + skipna=skipna, + ddof=ddof, + numeric_only=numeric_only, + **kwargs, + ) @doc(make_doc("skew", ndim=1)) def skew( @@ -6346,7 +6379,9 @@ def skew( numeric_only: bool = False, **kwargs, ): - return NDFrame.skew(self, axis, skipna, numeric_only, **kwargs) + return NDFrame.skew( + self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) @doc(make_doc("kurt", ndim=1)) def kurt( @@ -6356,7 +6391,9 @@ def kurt( numeric_only: bool = False, **kwargs, ): - return NDFrame.kurt(self, axis, skipna, numeric_only, **kwargs) + return NDFrame.kurt( + self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) kurtosis = kurt product = prod diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 601150ba7a85a..fe68107a953bb 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -661,7 +661,6 @@ def cat( join=(join if join == "inner" else "outer"), keys=range(len(others)), sort=False, - copy=False, ) data, others = data.align(others, join=join) others = [others[x] for x in others] # again list of Series @@ -1427,8 +1426,8 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=None): @forbid_nonstring_types(["bytes"]) def replace( self, - pat: str | re.Pattern, - repl: str | Callable, + pat: str | re.Pattern | dict, + repl: str | Callable | None = None, n: int = -1, case: bool | None = None, flags: int = 0, @@ -1442,11 +1441,14 @@ def replace( Parameters ---------- - pat : str or compiled regex + pat : str, compiled regex, or a dict String can be a character sequence or regular expression. + Dictionary contains pairs of strings to be replaced + along with the updated value. repl : str or callable Replacement string or a callable. The callable is passed the regex match object and must return a replacement string to be used. + Must have a value of None if `pat` is a dict See :func:`re.sub`. n : int, default -1 (all) Number of replacements to make from start. @@ -1480,6 +1482,7 @@ def replace( * if `regex` is False and `repl` is a callable or `pat` is a compiled regex * if `pat` is a compiled regex and `case` or `flags` is set + * if `pat` is a dictionary and `repl` is not None. Notes ----- @@ -1489,6 +1492,15 @@ def replace( Examples -------- + When `pat` is a dictionary, every key in `pat` is replaced + with its corresponding value: + + >>> pd.Series(["A", "B", np.nan]).str.replace(pat={"A": "a", "B": "b"}) + 0 a + 1 b + 2 NaN + dtype: object + When `pat` is a string and `regex` is True, the given `pat` is compiled as a regex. When `repl` is a string, it replaces matching regex patterns as with :meth:`re.sub`. NaN value(s) in the Series are @@ -1551,8 +1563,11 @@ def replace( 2 NaN dtype: object """ + if isinstance(pat, dict) and repl is not None: + raise ValueError("repl cannot be used when pat is a dictionary") + # Check whether repl is valid (GH 13438, GH 15055) - if not (isinstance(repl, str) or callable(repl)): + if not isinstance(pat, dict) and not (isinstance(repl, str) or callable(repl)): raise TypeError("repl must be a string or callable") is_compiled_re = is_re(pat) @@ -1572,10 +1587,17 @@ def replace( if case is None: case = True - result = self._data.array._str_replace( - pat, repl, n=n, case=case, flags=flags, regex=regex - ) - return self._wrap_result(result) + res_output = self._data + if not isinstance(pat, dict): + pat = {pat: repl} + + for key, value in pat.items(): + result = res_output.array._str_replace( + key, value, n=n, case=case, flags=flags, regex=regex + ) + res_output = self._wrap_result(result) + + return res_output @forbid_nonstring_types(["bytes"]) def repeat(self, repeats): diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 6c8c2c7e5009e..325ba90c21c29 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -337,7 +337,7 @@ def _convert_listlike_datetimes( unit : str None or string of the frequency of the passed data errors : str - error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' + error handing behaviors from to_datetime, 'raise', 'coerce' dayfirst : bool dayfirst parsing behavior from to_datetime yearfirst : bool @@ -387,7 +387,6 @@ def _convert_listlike_datetimes( if not is_supported_dtype(arg_dtype): # We go to closest supported reso, i.e. "s" arg = astype_overflowsafe( - # TODO: looks like we incorrectly raise with errors=="ignore" np.asarray(arg), np.dtype("M8[s]"), is_coerce=errors == "coerce", @@ -418,9 +417,6 @@ def _convert_listlike_datetimes( if errors == "coerce": npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg)) return DatetimeIndex(npvalues, name=name) - elif errors == "ignore": - idx = Index(arg, name=name) - return idx raise arg = ensure_object(arg) @@ -525,12 +521,7 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: arg = arg.astype(object, copy=False) arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) - if errors == "ignore": - # Index constructor _may_ infer to DatetimeIndex - result = Index._with_infer(arr, name=name) - else: - result = DatetimeIndex(arr, name=name) - + result = DatetimeIndex(arr, name=name) if not isinstance(result, DatetimeIndex): return result @@ -629,7 +620,6 @@ def to_datetime( format: str | None = ..., exact: bool = ..., unit: str | None = ..., - infer_datetime_format: bool = ..., origin=..., cache: bool = ..., ) -> Timestamp: @@ -646,7 +636,6 @@ def to_datetime( format: str | None = ..., exact: bool = ..., unit: str | None = ..., - infer_datetime_format: bool = ..., origin=..., cache: bool = ..., ) -> Series: @@ -663,7 +652,6 @@ def to_datetime( format: str | None = ..., exact: bool = ..., unit: str | None = ..., - infer_datetime_format: bool = ..., origin=..., cache: bool = ..., ) -> DatetimeIndex: @@ -679,7 +667,6 @@ def to_datetime( format: str | None = None, exact: bool | lib.NoDefault = lib.no_default, unit: str | None = None, - infer_datetime_format: lib.NoDefault | bool = lib.no_default, origin: str = "unix", cache: bool = True, ) -> DatetimeIndex | Series | DatetimeScalar | NaTType | None: @@ -696,10 +683,9 @@ def to_datetime( method expects minimally the following columns: :const:`"year"`, :const:`"month"`, :const:`"day"`. The column "year" must be specified in 4-digit format. - errors : {'ignore', 'raise', 'coerce'}, default 'raise' + errors : {'raise', 'coerce'}, default 'raise' - If :const:`'raise'`, then invalid parsing will raise an exception. - If :const:`'coerce'`, then invalid parsing will be set as :const:`NaT`. - - If :const:`'ignore'`, then invalid parsing will return the input. dayfirst : bool, default False Specify a date parse order if `arg` is str or is list-like. If :const:`True`, parses dates with the day first, e.g. :const:`"10/11/12"` @@ -780,16 +766,6 @@ def to_datetime( integer or float number. This will be based off the origin. Example, with ``unit='ms'`` and ``origin='unix'``, this would calculate the number of milliseconds to the unix epoch start. - infer_datetime_format : bool, default False - If :const:`True` and no `format` is given, attempt to infer the format - of the datetime strings based on the first non-NaN element, - and if it can be inferred, switch to a faster method of parsing them. - In some cases this can increase the parsing speed by ~5-10x. - - .. deprecated:: 2.0.0 - A strict version of this argument is now the default, passing it has - no effect. - origin : scalar, default 'unix' Define the reference date. The numeric values would be parsed as number of units (defined by `unit`) since this reference date. @@ -1012,25 +988,6 @@ def to_datetime( """ if exact is not lib.no_default and format in {"mixed", "ISO8601"}: raise ValueError("Cannot use 'exact' when 'format' is 'mixed' or 'ISO8601'") - if infer_datetime_format is not lib.no_default: - warnings.warn( - "The argument 'infer_datetime_format' is deprecated and will " - "be removed in a future version. " - "A strict version of it is now the default, see " - "https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. " - "You can safely remove this argument.", - stacklevel=find_stack_level(), - ) - if errors == "ignore": - # GH#54467 - warnings.warn( - "errors='ignore' is deprecated and will raise in a future version. " - "Use to_datetime without passing `errors` and catch exceptions " - "explicitly instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - if arg is None: return None @@ -1141,11 +1098,10 @@ def _assemble_from_unit_mappings( Parameters ---------- arg : DataFrame - errors : {'ignore', 'raise', 'coerce'}, default 'raise' + errors : {'raise', 'coerce'}, default 'raise' - If :const:`'raise'`, then invalid parsing will raise an exception - If :const:`'coerce'`, then invalid parsing will be set as :const:`NaT` - - If :const:`'ignore'`, then invalid parsing will return the input utc : bool Whether to convert/localize timestamps to UTC. diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 2ae57d3c8508e..3d28a73df99d1 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -4,12 +4,10 @@ TYPE_CHECKING, Literal, ) -import warnings import numpy as np from pandas._libs import lib -from pandas.util._exceptions import find_stack_level from pandas.util._validators import check_dtype_backend from pandas.core.dtypes.cast import maybe_downcast_numeric @@ -66,14 +64,9 @@ def to_numeric( ---------- arg : scalar, list, tuple, 1-d array, or Series Argument to be converted. - errors : {'ignore', 'raise', 'coerce'}, default 'raise' + errors : {'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception. - If 'coerce', then invalid parsing will be set as NaN. - - If 'ignore', then invalid parsing will return the input. - - .. versionchanged:: 2.2 - - "ignore" is deprecated. Catch exceptions explicitly instead. downcast : str, default None Can be 'integer', 'signed', 'unsigned', or 'float'. @@ -166,17 +159,8 @@ def to_numeric( if downcast not in (None, "integer", "signed", "unsigned", "float"): raise ValueError("invalid downcasting method provided") - if errors not in ("ignore", "raise", "coerce"): + if errors not in ("raise", "coerce"): raise ValueError("invalid error value specified") - if errors == "ignore": - # GH#54467 - warnings.warn( - "errors='ignore' is deprecated and will raise in a future version. " - "Use to_numeric without passing `errors` and catch exceptions " - "explicitly instead", - FutureWarning, - stacklevel=find_stack_level(), - ) check_dtype_backend(dtype_backend) @@ -207,8 +191,6 @@ def to_numeric( else: values = arg - orig_values = values - # GH33013: for IntegerArray & FloatingArray extract non-null values for casting # save mask to reconstruct the full array after casting mask: npt.NDArray[np.bool_] | None = None @@ -227,20 +209,15 @@ def to_numeric( values = values.view(np.int64) else: values = ensure_object(values) - coerce_numeric = errors not in ("ignore", "raise") - try: - values, new_mask = lib.maybe_convert_numeric( # type: ignore[call-overload] - values, - set(), - coerce_numeric=coerce_numeric, - convert_to_masked_nullable=dtype_backend is not lib.no_default - or isinstance(values_dtype, StringDtype) - and not values_dtype.storage == "pyarrow_numpy", - ) - except (ValueError, TypeError): - if errors == "raise": - raise - values = orig_values + coerce_numeric = errors != "raise" + values, new_mask = lib.maybe_convert_numeric( # type: ignore[call-overload] + values, + set(), + coerce_numeric=coerce_numeric, + convert_to_masked_nullable=dtype_backend is not lib.no_default + or isinstance(values_dtype, StringDtype) + and not values_dtype.storage == "pyarrow_numpy", + ) if new_mask is not None: # Remove unnecessary values, is expected later anyway and enables diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 47dfae3c6cadd..5f3963c3d405e 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -8,7 +8,6 @@ Any, overload, ) -import warnings import numpy as np @@ -22,7 +21,6 @@ disallow_ambiguous_unit, parse_timedelta_unit, ) -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import is_list_like from pandas.core.dtypes.dtypes import ArrowDtype @@ -90,7 +88,6 @@ def to_timedelta( | Series, unit: UnitChoices | None = None, errors: DateTimeErrorChoices = "raise", - # returning Any for errors="ignore" ) -> Timedelta | TimedeltaIndex | Series | NaTType | Any: """ Convert argument to timedelta. @@ -130,10 +127,9 @@ def to_timedelta( in a future version. Please use 'h', 'min', 's', 'ms', 'us', and 'ns' instead of 'H', 'T', 'S', 'L', 'U' and 'N'. - errors : {'ignore', 'raise', 'coerce'}, default 'raise' + errors : {'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception. - If 'coerce', then invalid parsing will be set as NaT. - - If 'ignore', then invalid parsing will return the input. Returns ------- @@ -185,17 +181,8 @@ def to_timedelta( unit = parse_timedelta_unit(unit) disallow_ambiguous_unit(unit) - if errors not in ("ignore", "raise", "coerce"): - raise ValueError("errors must be one of 'ignore', 'raise', or 'coerce'.") - if errors == "ignore": - # GH#54467 - warnings.warn( - "errors='ignore' is deprecated and will raise in a future version. " - "Use to_timedelta without passing `errors` and catch exceptions " - "explicitly instead", - FutureWarning, - stacklevel=find_stack_level(), - ) + if errors not in ("raise", "coerce"): + raise ValueError("errors must be one of 'raise', or 'coerce'.") if arg is None: return arg @@ -236,9 +223,6 @@ def _coerce_scalar_to_timedelta_type( except ValueError: if errors == "raise": raise - if errors == "ignore": - return r - # coerce result = NaT @@ -254,30 +238,11 @@ def _convert_listlike( """Convert a list of objects to a timedelta index object.""" arg_dtype = getattr(arg, "dtype", None) if isinstance(arg, (list, tuple)) or arg_dtype is None: - # This is needed only to ensure that in the case where we end up - # returning arg (errors == "ignore"), and where the input is a - # generator, we return a useful list-like instead of a - # used-up generator - if not hasattr(arg, "__array__"): - arg = list(arg) arg = np.array(arg, dtype=object) elif isinstance(arg_dtype, ArrowDtype) and arg_dtype.kind == "m": return arg - try: - td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0] - except ValueError: - if errors == "ignore": - return arg - else: - # This else-block accounts for the cases when errors='raise' - # and errors='coerce'. If errors == 'raise', these errors - # should be raised. If errors == 'coerce', we shouldn't - # expect any errors to be raised, since all parsing errors - # cause coercion to pd.NaT. However, if an error / bug is - # introduced that causes an Exception to be raised, we would - # like to surface it. - raise + td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0] from pandas import TimedeltaIndex diff --git a/pandas/core/tools/times.py b/pandas/core/tools/times.py index d77bcc91df709..f6a610ebc36ab 100644 --- a/pandas/core/tools/times.py +++ b/pandas/core/tools/times.py @@ -5,12 +5,10 @@ time, ) from typing import TYPE_CHECKING -import warnings import numpy as np from pandas._libs.lib import is_list_like -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.generic import ( ABCIndex, @@ -45,24 +43,16 @@ def to_time( infer_time_format: bool, default False Infer the time format based on the first non-NaN element. If all strings are in the same format, this will speed up conversion. - errors : {'ignore', 'raise', 'coerce'}, default 'raise' + errors : {'raise', 'coerce'}, default 'raise' - If 'raise', then invalid parsing will raise an exception - If 'coerce', then invalid parsing will be set as None - - If 'ignore', then invalid parsing will return the input Returns ------- datetime.time """ - if errors == "ignore": - # GH#54467 - warnings.warn( - "errors='ignore' is deprecated and will raise in a future version. " - "Use to_time without passing `errors` and catch exceptions " - "explicitly instead", - FutureWarning, - stacklevel=find_stack_level(), - ) + if errors not in ("raise", "coerce"): + raise ValueError("errors must be one of 'raise', or 'coerce'.") def _convert_listlike(arg, format): if isinstance(arg, (list, tuple)): @@ -90,10 +80,7 @@ def _convert_listlike(arg, format): f"format {format}" ) raise ValueError(msg) from err - if errors == "ignore": - return arg - else: - times.append(None) + times.append(None) else: formats = _time_formats[:] format_found = False @@ -118,8 +105,6 @@ def _convert_listlike(arg, format): times.append(time_object) elif errors == "raise": raise ValueError(f"Cannot convert arg {arg} to a time") - elif errors == "ignore": - return arg else: times.append(None) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 01d1787d46ca0..9b21a23b1aefe 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -1072,7 +1072,7 @@ def mean(self, *args, update=None, update_times=None, **kwargs): result_kwargs["columns"] = self._selected_obj.columns else: result_kwargs["name"] = self._selected_obj.name - np_array = self._selected_obj.astype(np.float64, copy=False).to_numpy() + np_array = self._selected_obj.astype(np.float64).to_numpy() ewma_func = generate_online_numba_ewma_func( **get_jit_arguments(self.engine_kwargs) ) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index b0048d5024064..9e42a9033fb66 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -270,7 +270,7 @@ def _create_data(self, obj: NDFrameT, numeric_only: bool = False) -> NDFrameT: """ # filter out the on from the object if self.on is not None and not isinstance(self.on, Index) and obj.ndim == 2: - obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False) + obj = obj.reindex(columns=obj.columns.difference([self.on])) if obj.ndim > 1 and numeric_only: obj = self._make_numeric_only(obj) return obj diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index e9f2e319c0136..8052da25f0368 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -9,7 +9,6 @@ Any, Literal, ) -import warnings from warnings import catch_warnings from pandas._config import using_pyarrow_string_dtype @@ -18,7 +17,6 @@ from pandas.compat._optional import import_optional_dependency from pandas.errors import AbstractMethodError from pandas.util._decorators import doc -from pandas.util._exceptions import find_stack_level from pandas.util._validators import check_dtype_backend import pandas as pd @@ -240,7 +238,6 @@ def read( path, columns=None, filters=None, - use_nullable_dtypes: bool = False, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, storage_options: StorageOptions | None = None, filesystem=None, @@ -357,15 +354,9 @@ def read( **kwargs, ) -> DataFrame: parquet_kwargs: dict[str, Any] = {} - use_nullable_dtypes = kwargs.pop("use_nullable_dtypes", False) dtype_backend = kwargs.pop("dtype_backend", lib.no_default) # We are disabling nullable dtypes for fastparquet pending discussion parquet_kwargs["pandas_nulls"] = False - if use_nullable_dtypes: - raise ValueError( - "The 'use_nullable_dtypes' argument is not supported for the " - "fastparquet engine" - ) if dtype_backend is not lib.no_default: raise ValueError( "The 'dtype_backend' argument is not supported for the " @@ -493,7 +484,6 @@ def read_parquet( engine: str = "auto", columns: list[str] | None = None, storage_options: StorageOptions | None = None, - use_nullable_dtypes: bool | lib.NoDefault = lib.no_default, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, filesystem: Any = None, filters: list[tuple] | list[list[tuple]] | None = None, @@ -534,17 +524,6 @@ def read_parquet( .. versionadded:: 1.3.0 - use_nullable_dtypes : bool, default False - If True, use dtypes that use ``pd.NA`` as missing value indicator - for the resulting DataFrame. (only applicable for the ``pyarrow`` - engine) - As new dtypes are added that support ``pd.NA`` in the future, the - output with this option will change to use those dtypes. - Note: this is an experimental option, and behaviour (e.g. additional - support dtypes) may change without notice. - - .. deprecated:: 2.0 - dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable' Back-end data type applied to the resultant :class:`DataFrame` (still experimental). Behaviour is as follows: @@ -643,19 +622,6 @@ def read_parquet( """ impl = get_engine(engine) - - if use_nullable_dtypes is not lib.no_default: - msg = ( - "The argument 'use_nullable_dtypes' is deprecated and will be removed " - "in a future version." - ) - if use_nullable_dtypes is True: - msg += ( - "Use dtype_backend='numpy_nullable' instead of use_nullable_dtype=True." - ) - warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) - else: - use_nullable_dtypes = False check_dtype_backend(dtype_backend) return impl.read( @@ -663,7 +629,6 @@ def read_parquet( columns=columns, filters=filters, storage_options=storage_options, - use_nullable_dtypes=use_nullable_dtypes, dtype_backend=dtype_backend, filesystem=filesystem, **kwargs, diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 3aef0692d5f59..6a3834be20d39 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -1354,7 +1354,7 @@ def _isindex(colspec): date_cols.update(old_names) if isinstance(data_dict, DataFrame): - data_dict = concat([DataFrame(new_data), data_dict], axis=1, copy=False) + data_dict = concat([DataFrame(new_data), data_dict], axis=1) else: data_dict.update(new_data) new_cols.extend(columns) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 07920eb1750f2..d9dc5b41a81c2 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -26,8 +26,6 @@ import numpy as np -from pandas._config import using_copy_on_write - from pandas._libs import lib from pandas._libs.parsers import STR_NA_VALUES from pandas.errors import ( @@ -1967,7 +1965,7 @@ def read(self, nrows: int | None = None) -> DataFrame: new_col_dict, columns=columns, index=index, - copy=not using_copy_on_write(), + copy=False, ) self._currow += new_rows diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1e11a9783f0e1..97b9b905dfd62 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -30,7 +30,6 @@ from pandas._config import ( config, get_option, - using_copy_on_write, using_pyarrow_string_dtype, ) @@ -3294,13 +3293,8 @@ def read( dfs.append(df) if len(dfs) > 0: - out = concat(dfs, axis=1, copy=True) - if using_copy_on_write(): - # with CoW, concat ignores the copy keyword. Here, we still want - # to copy to enforce optimized column-major layout - out = out.copy() - out = out.reindex(columns=items, copy=False) - return out + out = concat(dfs, axis=1).copy() + return out.reindex(columns=items, copy=False) return DataFrame(columns=axes[0], index=axes[1]) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 08f99a4d3093a..70a111a8f5c56 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -79,7 +79,6 @@ ) from pandas._typing import ( - DateTimeErrorChoices, DtypeArg, DtypeBackend, IndexLabel, @@ -111,14 +110,7 @@ def _handle_date_column( # read_sql like functions. # Format can take on custom to_datetime argument values such as # {"errors": "coerce"} or {"dayfirst": True} - error: DateTimeErrorChoices = format.pop("errors", None) or "ignore" - if error == "ignore": - try: - return to_datetime(col, **format) - except (TypeError, ValueError): - # TODO: not reached 2023-10-27; needed? - return col - return to_datetime(col, errors=error, **format) + return to_datetime(col, **format) else: # Allow passing of formatting string for integers # GH17855 diff --git a/pandas/io/stata.py b/pandas/io/stata.py index c2a3db2d44b16..37ea940b3938a 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1181,24 +1181,6 @@ def __exit__( if self._close_file: self._close_file() - def close(self) -> None: - """Close the handle if its open. - - .. deprecated: 2.0.0 - - The close method is not part of the public API. - The only supported way to use StataReader is to use it as a context manager. - """ - warnings.warn( - "The StataReader.close() method is not part of the public API and " - "will be removed in a future version without notice. " - "Using StataReader as a context manager is the only supported method.", - FutureWarning, - stacklevel=find_stack_level(), - ) - if self._close_file: - self._close_file() - def _set_encoding(self) -> None: """ Set string encoding which depends on file version diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index be771a4029b4f..dbd2743345a38 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -678,7 +678,7 @@ def _compute_plot_data(self) -> None: # GH16953, infer_objects is needed as fallback, for ``Series`` # with ``dtype == object`` - data = data.infer_objects(copy=False) + data = data.infer_objects() include_type = [np.number, "datetime", "datetimetz", "timedelta"] # GH23719, allow plotting boolean diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index d521d91ab3b7b..d9d1df128d199 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -94,7 +94,7 @@ def _adjust_bins(self, bins: int | np.ndarray | list[np.ndarray]): def _calculate_bins(self, data: Series | DataFrame, bins) -> np.ndarray: """Calculate bins given data""" - nd_values = data.infer_objects(copy=False)._get_numeric_data() + nd_values = data.infer_objects()._get_numeric_data() values = np.ravel(nd_values) values = values[~isna(values)] diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 531045b16fee0..2dd7f9902c78d 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1487,7 +1487,7 @@ def test_apply_dtype(col): tm.assert_series_equal(result, expected) -def test_apply_mutating(using_copy_on_write): +def test_apply_mutating(): # GH#35462 case where applied func pins a new BlockManager to a row df = DataFrame({"a": range(100), "b": range(100, 200)}) df_orig = df.copy() @@ -1504,11 +1504,7 @@ def func(row): result = df.apply(func, axis=1) tm.assert_frame_equal(result, expected) - if using_copy_on_write: - # INFO(CoW) With copy on write, mutating a viewing row doesn't mutate the parent - tm.assert_frame_equal(df, df_orig) - else: - tm.assert_frame_equal(df, result) + tm.assert_frame_equal(df, df_orig) def test_apply_empty_list_reduce(): diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 1e41e54bff6fc..cd1547620e208 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -75,17 +75,6 @@ def f(x): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("convert_dtype", [True, False]) -def test_apply_convert_dtype_deprecated(convert_dtype): - ser = Series(np.random.default_rng(2).standard_normal(10)) - - def func(x): - return x if x > 0 else np.nan - - with tm.assert_produces_warning(FutureWarning): - ser.apply(func, convert_dtype=convert_dtype, by_row="compat") - - def test_apply_args(): s = Series(["foo,bar"]) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 7b1f8b22301a1..8e018db05877f 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1964,7 +1964,7 @@ def test_eval_no_support_column_name(request, column): tm.assert_frame_equal(result, expected) -def test_set_inplace(using_copy_on_write): +def test_set_inplace(): # https://github.com/pandas-dev/pandas/issues/47449 # Ensure we don't only update the DataFrame inplace, but also the actual # column values, such that references to this column also get updated @@ -1974,13 +1974,9 @@ def test_set_inplace(using_copy_on_write): df.eval("A = B + C", inplace=True) expected = DataFrame({"A": [11, 13, 15], "B": [4, 5, 6], "C": [7, 8, 9]}) tm.assert_frame_equal(df, expected) - if not using_copy_on_write: - tm.assert_series_equal(ser, expected["A"]) - tm.assert_series_equal(result_view["A"], expected["A"]) - else: - expected = Series([1, 2, 3], name="A") - tm.assert_series_equal(ser, expected) - tm.assert_series_equal(result_view["A"], expected) + expected = Series([1, 2, 3], name="A") + tm.assert_series_equal(ser, expected) + tm.assert_series_equal(result_view["A"], expected) class TestValidate: diff --git a/pandas/tests/copy_view/index/test_datetimeindex.py b/pandas/tests/copy_view/index/test_datetimeindex.py index 5dd1f45a94ff3..6194ea8b122c9 100644 --- a/pandas/tests/copy_view/index/test_datetimeindex.py +++ b/pandas/tests/copy_view/index/test_datetimeindex.py @@ -14,50 +14,43 @@ @pytest.mark.parametrize("box", [lambda x: x, DatetimeIndex]) -def test_datetimeindex(using_copy_on_write, box): +def test_datetimeindex(box): dt = date_range("2019-12-31", periods=3, freq="D") ser = Series(dt) idx = box(DatetimeIndex(ser)) expected = idx.copy(deep=True) ser.iloc[0] = Timestamp("2020-12-31") - if using_copy_on_write: - tm.assert_index_equal(idx, expected) + tm.assert_index_equal(idx, expected) -def test_datetimeindex_tz_convert(using_copy_on_write): +def test_datetimeindex_tz_convert(): dt = date_range("2019-12-31", periods=3, freq="D", tz="Europe/Berlin") ser = Series(dt) idx = DatetimeIndex(ser).tz_convert("US/Eastern") expected = idx.copy(deep=True) ser.iloc[0] = Timestamp("2020-12-31", tz="Europe/Berlin") - if using_copy_on_write: - tm.assert_index_equal(idx, expected) + tm.assert_index_equal(idx, expected) -def test_datetimeindex_tz_localize(using_copy_on_write): +def test_datetimeindex_tz_localize(): dt = date_range("2019-12-31", periods=3, freq="D") ser = Series(dt) idx = DatetimeIndex(ser).tz_localize("Europe/Berlin") expected = idx.copy(deep=True) ser.iloc[0] = Timestamp("2020-12-31") - if using_copy_on_write: - tm.assert_index_equal(idx, expected) + tm.assert_index_equal(idx, expected) -def test_datetimeindex_isocalendar(using_copy_on_write): +def test_datetimeindex_isocalendar(): dt = date_range("2019-12-31", periods=3, freq="D") ser = Series(dt) df = DatetimeIndex(ser).isocalendar() expected = df.index.copy(deep=True) ser.iloc[0] = Timestamp("2020-12-31") - if using_copy_on_write: - tm.assert_index_equal(df.index, expected) + tm.assert_index_equal(df.index, expected) -def test_index_values(using_copy_on_write): +def test_index_values(): idx = date_range("2019-12-31", periods=3, freq="D") result = idx.values - if using_copy_on_write: - assert result.flags.writeable is False - else: - assert result.flags.writeable is True + assert result.flags.writeable is False diff --git a/pandas/tests/copy_view/index/test_index.py b/pandas/tests/copy_view/index/test_index.py index 9a788c5fd4193..e51f5658cf437 100644 --- a/pandas/tests/copy_view/index/test_index.py +++ b/pandas/tests/copy_view/index/test_index.py @@ -19,18 +19,15 @@ def index_view(index_data): return idx, view -def test_set_index_update_column(using_copy_on_write): +def test_set_index_update_column(): df = DataFrame({"a": [1, 2], "b": 1}) df = df.set_index("a", drop=False) expected = df.index.copy(deep=True) df.iloc[0, 0] = 100 - if using_copy_on_write: - tm.assert_index_equal(df.index, expected) - else: - tm.assert_index_equal(df.index, Index([100, 2], name="a")) + tm.assert_index_equal(df.index, expected) -def test_set_index_drop_update_column(using_copy_on_write): +def test_set_index_drop_update_column(): df = DataFrame({"a": [1, 2], "b": 1.5}) view = df[:] df = df.set_index("a", drop=True) @@ -39,31 +36,25 @@ def test_set_index_drop_update_column(using_copy_on_write): tm.assert_index_equal(df.index, expected) -def test_set_index_series(using_copy_on_write): +def test_set_index_series(): df = DataFrame({"a": [1, 2], "b": 1.5}) ser = Series([10, 11]) df = df.set_index(ser) expected = df.index.copy(deep=True) ser.iloc[0] = 100 - if using_copy_on_write: - tm.assert_index_equal(df.index, expected) - else: - tm.assert_index_equal(df.index, Index([100, 11])) + tm.assert_index_equal(df.index, expected) -def test_assign_index_as_series(using_copy_on_write): +def test_assign_index_as_series(): df = DataFrame({"a": [1, 2], "b": 1.5}) ser = Series([10, 11]) df.index = ser expected = df.index.copy(deep=True) ser.iloc[0] = 100 - if using_copy_on_write: - tm.assert_index_equal(df.index, expected) - else: - tm.assert_index_equal(df.index, Index([100, 11])) + tm.assert_index_equal(df.index, expected) -def test_assign_index_as_index(using_copy_on_write): +def test_assign_index_as_index(): df = DataFrame({"a": [1, 2], "b": 1.5}) ser = Series([10, 11]) rhs_index = Index(ser) @@ -71,24 +62,18 @@ def test_assign_index_as_index(using_copy_on_write): rhs_index = None # overwrite to clear reference expected = df.index.copy(deep=True) ser.iloc[0] = 100 - if using_copy_on_write: - tm.assert_index_equal(df.index, expected) - else: - tm.assert_index_equal(df.index, Index([100, 11])) + tm.assert_index_equal(df.index, expected) -def test_index_from_series(using_copy_on_write): +def test_index_from_series(): ser = Series([1, 2]) idx = Index(ser) expected = idx.copy(deep=True) ser.iloc[0] = 100 - if using_copy_on_write: - tm.assert_index_equal(idx, expected) - else: - tm.assert_index_equal(idx, Index([100, 2])) + tm.assert_index_equal(idx, expected) -def test_index_from_series_copy(using_copy_on_write): +def test_index_from_series_copy(): ser = Series([1, 2]) idx = Index(ser, copy=True) # noqa: F841 arr = get_array(ser) @@ -96,16 +81,13 @@ def test_index_from_series_copy(using_copy_on_write): assert np.shares_memory(get_array(ser), arr) -def test_index_from_index(using_copy_on_write): +def test_index_from_index(): ser = Series([1, 2]) idx = Index(ser) idx = Index(idx) expected = idx.copy(deep=True) ser.iloc[0] = 100 - if using_copy_on_write: - tm.assert_index_equal(idx, expected) - else: - tm.assert_index_equal(idx, Index([100, 2])) + tm.assert_index_equal(idx, expected) @pytest.mark.parametrize( @@ -135,44 +117,36 @@ def test_index_from_index(using_copy_on_write): "astype", ], ) -def test_index_ops(using_copy_on_write, func, request): +def test_index_ops(func, request): idx, view_ = index_view([1, 2]) expected = idx.copy(deep=True) if "astype" in request.node.callspec.id: expected = expected.astype("Int64") idx = func(idx) view_.iloc[0, 0] = 100 - if using_copy_on_write: - tm.assert_index_equal(idx, expected, check_names=False) + tm.assert_index_equal(idx, expected, check_names=False) -def test_infer_objects(using_copy_on_write): +def test_infer_objects(): idx, view_ = index_view(["a", "b"]) expected = idx.copy(deep=True) idx = idx.infer_objects(copy=False) view_.iloc[0, 0] = "aaaa" - if using_copy_on_write: - tm.assert_index_equal(idx, expected, check_names=False) + tm.assert_index_equal(idx, expected, check_names=False) -def test_index_to_frame(using_copy_on_write): +def test_index_to_frame(): idx = Index([1, 2, 3], name="a") expected = idx.copy(deep=True) df = idx.to_frame() - if using_copy_on_write: - assert np.shares_memory(get_array(df, "a"), idx._values) - assert not df._mgr._has_no_reference(0) - else: - assert not np.shares_memory(get_array(df, "a"), idx._values) + assert np.shares_memory(get_array(df, "a"), idx._values) + assert not df._mgr._has_no_reference(0) df.iloc[0, 0] = 100 tm.assert_index_equal(idx, expected) -def test_index_values(using_copy_on_write): +def test_index_values(): idx = Index([1, 2, 3]) result = idx.values - if using_copy_on_write: - assert result.flags.writeable is False - else: - assert result.flags.writeable is True + assert result.flags.writeable is False diff --git a/pandas/tests/copy_view/index/test_periodindex.py b/pandas/tests/copy_view/index/test_periodindex.py index 753304a1a8963..2887b191038d2 100644 --- a/pandas/tests/copy_view/index/test_periodindex.py +++ b/pandas/tests/copy_view/index/test_periodindex.py @@ -14,11 +14,10 @@ @pytest.mark.parametrize("box", [lambda x: x, PeriodIndex]) -def test_periodindex(using_copy_on_write, box): +def test_periodindex(box): dt = period_range("2019-12-31", periods=3, freq="D") ser = Series(dt) idx = box(PeriodIndex(ser)) expected = idx.copy(deep=True) ser.iloc[0] = Period("2020-12-31") - if using_copy_on_write: - tm.assert_index_equal(idx, expected) + tm.assert_index_equal(idx, expected) diff --git a/pandas/tests/copy_view/index/test_timedeltaindex.py b/pandas/tests/copy_view/index/test_timedeltaindex.py index 5b9832093fded..6984df86b00e3 100644 --- a/pandas/tests/copy_view/index/test_timedeltaindex.py +++ b/pandas/tests/copy_view/index/test_timedeltaindex.py @@ -20,11 +20,10 @@ lambda x: TimedeltaIndex(TimedeltaIndex(x)), ], ) -def test_timedeltaindex(using_copy_on_write, cons): +def test_timedeltaindex(cons): dt = timedelta_range("1 day", periods=3) ser = Series(dt) idx = cons(ser) expected = idx.copy(deep=True) ser.iloc[0] = Timedelta("5 days") - if using_copy_on_write: - tm.assert_index_equal(idx, expected) + tm.assert_index_equal(idx, expected) diff --git a/pandas/tests/copy_view/test_array.py b/pandas/tests/copy_view/test_array.py index 02941a2fc3481..bb238d08bd9bd 100644 --- a/pandas/tests/copy_view/test_array.py +++ b/pandas/tests/copy_view/test_array.py @@ -18,29 +18,24 @@ [lambda ser: ser.values, lambda ser: np.asarray(ser)], ids=["values", "asarray"], ) -def test_series_values(using_copy_on_write, method): +def test_series_values(method): ser = Series([1, 2, 3], name="name") ser_orig = ser.copy() arr = method(ser) - if using_copy_on_write: - # .values still gives a view but is read-only - assert np.shares_memory(arr, get_array(ser, "name")) - assert arr.flags.writeable is False - - # mutating series through arr therefore doesn't work - with pytest.raises(ValueError, match="read-only"): - arr[0] = 0 - tm.assert_series_equal(ser, ser_orig) - - # mutating the series itself still works - ser.iloc[0] = 0 - assert ser.values[0] == 0 - else: - assert arr.flags.writeable is True + # .values still gives a view but is read-only + assert np.shares_memory(arr, get_array(ser, "name")) + assert arr.flags.writeable is False + + # mutating series through arr therefore doesn't work + with pytest.raises(ValueError, match="read-only"): arr[0] = 0 - assert ser.iloc[0] == 0 + tm.assert_series_equal(ser, ser_orig) + + # mutating the series itself still works + ser.iloc[0] = 0 + assert ser.values[0] == 0 @pytest.mark.parametrize( @@ -48,54 +43,44 @@ def test_series_values(using_copy_on_write, method): [lambda df: df.values, lambda df: np.asarray(df)], ids=["values", "asarray"], ) -def test_dataframe_values(using_copy_on_write, method): +def test_dataframe_values(method): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) df_orig = df.copy() arr = method(df) - if using_copy_on_write: - # .values still gives a view but is read-only - assert np.shares_memory(arr, get_array(df, "a")) - assert arr.flags.writeable is False - - # mutating series through arr therefore doesn't work - with pytest.raises(ValueError, match="read-only"): - arr[0, 0] = 0 - tm.assert_frame_equal(df, df_orig) - - # mutating the series itself still works - df.iloc[0, 0] = 0 - assert df.values[0, 0] == 0 - else: - assert arr.flags.writeable is True + # .values still gives a view but is read-only + assert np.shares_memory(arr, get_array(df, "a")) + assert arr.flags.writeable is False + + # mutating series through arr therefore doesn't work + with pytest.raises(ValueError, match="read-only"): arr[0, 0] = 0 - assert df.iloc[0, 0] == 0 + tm.assert_frame_equal(df, df_orig) + # mutating the series itself still works + df.iloc[0, 0] = 0 + assert df.values[0, 0] == 0 -def test_series_to_numpy(using_copy_on_write): + +def test_series_to_numpy(): ser = Series([1, 2, 3], name="name") ser_orig = ser.copy() # default: copy=False, no dtype or NAs arr = ser.to_numpy() - if using_copy_on_write: - # to_numpy still gives a view but is read-only - assert np.shares_memory(arr, get_array(ser, "name")) - assert arr.flags.writeable is False - - # mutating series through arr therefore doesn't work - with pytest.raises(ValueError, match="read-only"): - arr[0] = 0 - tm.assert_series_equal(ser, ser_orig) - - # mutating the series itself still works - ser.iloc[0] = 0 - assert ser.values[0] == 0 - else: - assert arr.flags.writeable is True + # to_numpy still gives a view but is read-only + assert np.shares_memory(arr, get_array(ser, "name")) + assert arr.flags.writeable is False + + # mutating series through arr therefore doesn't work + with pytest.raises(ValueError, match="read-only"): arr[0] = 0 - assert ser.iloc[0] == 0 + tm.assert_series_equal(ser, ser_orig) + + # mutating the series itself still works + ser.iloc[0] = 0 + assert ser.values[0] == 0 # specify copy=False gives a writeable array ser = Series([1, 2, 3], name="name") @@ -110,48 +95,33 @@ def test_series_to_numpy(using_copy_on_write): assert arr.flags.writeable is True -def test_series_array_ea_dtypes(using_copy_on_write): +def test_series_array_ea_dtypes(): ser = Series([1, 2, 3], dtype="Int64") arr = np.asarray(ser, dtype="int64") assert np.shares_memory(arr, get_array(ser)) - if using_copy_on_write: - assert arr.flags.writeable is False - else: - assert arr.flags.writeable is True + assert arr.flags.writeable is False arr = np.asarray(ser) assert np.shares_memory(arr, get_array(ser)) - if using_copy_on_write: - assert arr.flags.writeable is False - else: - assert arr.flags.writeable is True + assert arr.flags.writeable is False -def test_dataframe_array_ea_dtypes(using_copy_on_write): +def test_dataframe_array_ea_dtypes(): df = DataFrame({"a": [1, 2, 3]}, dtype="Int64") arr = np.asarray(df, dtype="int64") assert np.shares_memory(arr, get_array(df, "a")) - if using_copy_on_write: - assert arr.flags.writeable is False - else: - assert arr.flags.writeable is True + assert arr.flags.writeable is False arr = np.asarray(df) assert np.shares_memory(arr, get_array(df, "a")) - if using_copy_on_write: - assert arr.flags.writeable is False - else: - assert arr.flags.writeable is True + assert arr.flags.writeable is False -def test_dataframe_array_string_dtype(using_copy_on_write): +def test_dataframe_array_string_dtype(): df = DataFrame({"a": ["a", "b"]}, dtype="string") arr = np.asarray(df) assert np.shares_memory(arr, get_array(df, "a")) - if using_copy_on_write: - assert arr.flags.writeable is False - else: - assert arr.flags.writeable is True + assert arr.flags.writeable is False def test_dataframe_multiple_numpy_dtypes(): @@ -161,13 +131,10 @@ def test_dataframe_multiple_numpy_dtypes(): assert arr.flags.writeable is True -def test_values_is_ea(using_copy_on_write): +def test_values_is_ea(): df = DataFrame({"a": date_range("2012-01-01", periods=3)}) arr = np.asarray(df) - if using_copy_on_write: - assert arr.flags.writeable is False - else: - assert arr.flags.writeable is True + assert arr.flags.writeable is False def test_empty_dataframe(): diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py index f280e2143fee0..5a9b3463cf63f 100644 --- a/pandas/tests/copy_view/test_astype.py +++ b/pandas/tests/copy_view/test_astype.py @@ -17,22 +17,17 @@ from pandas.tests.copy_view.util import get_array -def test_astype_single_dtype(using_copy_on_write): +def test_astype_single_dtype(): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": 1.5}) df_orig = df.copy() df2 = df.astype("float64") - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column/block df2.iloc[0, 2] = 5.5 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) tm.assert_frame_equal(df, df_orig) # mutating parent also doesn't update result @@ -43,22 +38,17 @@ def test_astype_single_dtype(using_copy_on_write): @pytest.mark.parametrize("dtype", ["int64", "Int64"]) @pytest.mark.parametrize("new_dtype", ["int64", "Int64", "int64[pyarrow]"]) -def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype): +def test_astype_avoids_copy(dtype, new_dtype): if new_dtype == "int64[pyarrow]": pytest.importorskip("pyarrow") df = DataFrame({"a": [1, 2, 3]}, dtype=dtype) df_orig = df.copy() df2 = df.astype(new_dtype) - - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column/block df2.iloc[0, 0] = 10 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) # mutating parent also doesn't update result @@ -68,7 +58,7 @@ def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype): @pytest.mark.parametrize("dtype", ["float64", "int32", "Int32", "int32[pyarrow]"]) -def test_astype_different_target_dtype(using_copy_on_write, dtype): +def test_astype_different_target_dtype(dtype): if dtype == "int32[pyarrow]": pytest.importorskip("pyarrow") df = DataFrame({"a": [1, 2, 3]}) @@ -76,8 +66,7 @@ def test_astype_different_target_dtype(using_copy_on_write, dtype): df2 = df.astype(dtype) assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - if using_copy_on_write: - assert df2._mgr._has_no_reference(0) + assert df2._mgr._has_no_reference(0) df2.iloc[0, 0] = 5 tm.assert_frame_equal(df, df_orig) @@ -98,15 +87,11 @@ def test_astype_numpy_to_ea(): @pytest.mark.parametrize( "dtype, new_dtype", [("object", "string"), ("string", "object")] ) -def test_astype_string_and_object(using_copy_on_write, dtype, new_dtype): +def test_astype_string_and_object(dtype, new_dtype): df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype) df_orig = df.copy() df2 = df.astype(new_dtype) - - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df2.iloc[0, 0] = "x" tm.assert_frame_equal(df, df_orig) @@ -115,17 +100,11 @@ def test_astype_string_and_object(using_copy_on_write, dtype, new_dtype): @pytest.mark.parametrize( "dtype, new_dtype", [("object", "string"), ("string", "object")] ) -def test_astype_string_and_object_update_original( - using_copy_on_write, dtype, new_dtype -): +def test_astype_string_and_object_update_original(dtype, new_dtype): df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype) df2 = df.astype(new_dtype) df_orig = df2.copy() - - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df.iloc[0, 0] = "x" tm.assert_frame_equal(df2, df_orig) @@ -151,63 +130,53 @@ def test_astype_string_read_only_on_pickle_roundrip(): tm.assert_series_equal(base, base_copy) -def test_astype_dict_dtypes(using_copy_on_write): +def test_astype_dict_dtypes(): df = DataFrame( {"a": [1, 2, 3], "b": [4, 5, 6], "c": Series([1.5, 1.5, 1.5], dtype="float64")} ) df_orig = df.copy() df2 = df.astype({"a": "float64", "c": "float64"}) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column/block df2.iloc[0, 2] = 5.5 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) df2.iloc[0, 1] = 10 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) tm.assert_frame_equal(df, df_orig) -def test_astype_different_datetime_resos(using_copy_on_write): +def test_astype_different_datetime_resos(): df = DataFrame({"a": date_range("2019-12-31", periods=2, freq="D")}) result = df.astype("datetime64[ms]") assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) - if using_copy_on_write: - assert result._mgr._has_no_reference(0) + assert result._mgr._has_no_reference(0) -def test_astype_different_timezones(using_copy_on_write): +def test_astype_different_timezones(): df = DataFrame( {"a": date_range("2019-12-31", periods=5, freq="D", tz="US/Pacific")} ) result = df.astype("datetime64[ns, Europe/Berlin]") - if using_copy_on_write: - assert not result._mgr._has_no_reference(0) - assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) + assert not result._mgr._has_no_reference(0) + assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) -def test_astype_different_timezones_different_reso(using_copy_on_write): +def test_astype_different_timezones_different_reso(): df = DataFrame( {"a": date_range("2019-12-31", periods=5, freq="D", tz="US/Pacific")} ) result = df.astype("datetime64[ms, Europe/Berlin]") - if using_copy_on_write: - assert result._mgr._has_no_reference(0) - assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) + assert result._mgr._has_no_reference(0) + assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) -def test_astype_arrow_timestamp(using_copy_on_write): +def test_astype_arrow_timestamp(): pytest.importorskip("pyarrow") df = DataFrame( { @@ -219,19 +188,16 @@ def test_astype_arrow_timestamp(using_copy_on_write): dtype="M8[ns]", ) result = df.astype("timestamp[ns][pyarrow]") - if using_copy_on_write: - assert not result._mgr._has_no_reference(0) - if pa_version_under12p0: - assert not np.shares_memory( - get_array(df, "a"), get_array(result, "a")._pa_array - ) - else: - assert np.shares_memory( - get_array(df, "a"), get_array(result, "a")._pa_array - ) - - -def test_convert_dtypes_infer_objects(using_copy_on_write): + assert not result._mgr._has_no_reference(0) + if pa_version_under12p0: + assert not np.shares_memory( + get_array(df, "a"), get_array(result, "a")._pa_array + ) + else: + assert np.shares_memory(get_array(df, "a"), get_array(result, "a")._pa_array) + + +def test_convert_dtypes_infer_objects(): ser = Series(["a", "b", "c"]) ser_orig = ser.copy() result = ser.convert_dtypes( @@ -241,30 +207,19 @@ def test_convert_dtypes_infer_objects(using_copy_on_write): convert_string=False, ) - if using_copy_on_write: - assert np.shares_memory(get_array(ser), get_array(result)) - else: - assert not np.shares_memory(get_array(ser), get_array(result)) - + assert np.shares_memory(get_array(ser), get_array(result)) result.iloc[0] = "x" tm.assert_series_equal(ser, ser_orig) -def test_convert_dtypes(using_copy_on_write): +def test_convert_dtypes(): df = DataFrame({"a": ["a", "b"], "b": [1, 2], "c": [1.5, 2.5], "d": [True, False]}) df_orig = df.copy() df2 = df.convert_dtypes() - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - assert np.shares_memory(get_array(df2, "d"), get_array(df, "d")) - assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) - assert not np.shares_memory(get_array(df2, "d"), get_array(df, "d")) - + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "d"), get_array(df, "d")) + assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) df2.iloc[0, 0] = "x" tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index e1a76e66c107f..a54ecce4ffbec 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -7,30 +7,11 @@ import pandas._testing as tm -def test_methods_iloc_warn(using_copy_on_write): - if not using_copy_on_write: - df = DataFrame({"a": [1, 2, 3], "b": 1}) - with tm.assert_cow_warning(match="A value"): - df.iloc[:, 0].replace(1, 5, inplace=True) - - with tm.assert_cow_warning(match="A value"): - df.iloc[:, 0].fillna(1, inplace=True) - - with tm.assert_cow_warning(match="A value"): - df.iloc[:, 0].interpolate(inplace=True) - - with tm.assert_cow_warning(match="A value"): - df.iloc[:, 0].ffill(inplace=True) - - with tm.assert_cow_warning(match="A value"): - df.iloc[:, 0].bfill(inplace=True) - - # TODO(CoW-warn) expand the cases @pytest.mark.parametrize( "indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])] ) -def test_series_setitem(indexer, using_copy_on_write): +def test_series_setitem(indexer): # ensure we only get a single warning for those typical cases of chained # assignment df = DataFrame({"a": [1, 2, 3], "b": 1}) @@ -40,11 +21,7 @@ def test_series_setitem(indexer, using_copy_on_write): with pytest.warns() as record: df["a"][indexer] = 0 assert len(record) == 1 - if using_copy_on_write: - assert record[0].category == ChainedAssignmentError - else: - assert record[0].category == FutureWarning - assert "ChainedAssignmentError" in record[0].message.args[0] + assert record[0].category == ChainedAssignmentError @pytest.mark.parametrize( diff --git a/pandas/tests/copy_view/test_clip.py b/pandas/tests/copy_view/test_clip.py index c18a2e1e65d26..56df33db6d416 100644 --- a/pandas/tests/copy_view/test_clip.py +++ b/pandas/tests/copy_view/test_clip.py @@ -5,23 +5,20 @@ from pandas.tests.copy_view.util import get_array -def test_clip_inplace_reference(using_copy_on_write): +def test_clip_inplace_reference(): df = DataFrame({"a": [1.5, 2, 3]}) df_copy = df.copy() arr_a = get_array(df, "a") view = df[:] df.clip(lower=2, inplace=True) - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "a"), arr_a) - assert df._mgr._has_no_reference(0) - assert view._mgr._has_no_reference(0) - tm.assert_frame_equal(df_copy, view) - else: - assert np.shares_memory(get_array(df, "a"), arr_a) + assert not np.shares_memory(get_array(df, "a"), arr_a) + assert df._mgr._has_no_reference(0) + assert view._mgr._has_no_reference(0) + tm.assert_frame_equal(df_copy, view) -def test_clip_inplace_reference_no_op(using_copy_on_write): +def test_clip_inplace_reference_no_op(): df = DataFrame({"a": [1.5, 2, 3]}) df_copy = df.copy() arr_a = get_array(df, "a") @@ -30,63 +27,46 @@ def test_clip_inplace_reference_no_op(using_copy_on_write): assert np.shares_memory(get_array(df, "a"), arr_a) - if using_copy_on_write: - assert not df._mgr._has_no_reference(0) - assert not view._mgr._has_no_reference(0) - tm.assert_frame_equal(df_copy, view) + assert not df._mgr._has_no_reference(0) + assert not view._mgr._has_no_reference(0) + tm.assert_frame_equal(df_copy, view) -def test_clip_inplace(using_copy_on_write): +def test_clip_inplace(): df = DataFrame({"a": [1.5, 2, 3]}) arr_a = get_array(df, "a") df.clip(lower=2, inplace=True) assert np.shares_memory(get_array(df, "a"), arr_a) + assert df._mgr._has_no_reference(0) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) - -def test_clip(using_copy_on_write): +def test_clip(): df = DataFrame({"a": [1.5, 2, 3]}) df_orig = df.copy() df2 = df.clip(lower=2) assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) tm.assert_frame_equal(df_orig, df) -def test_clip_no_op(using_copy_on_write): +def test_clip_no_op(): df = DataFrame({"a": [1.5, 2, 3]}) df2 = df.clip(lower=0) - if using_copy_on_write: - assert not df._mgr._has_no_reference(0) - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not df._mgr._has_no_reference(0) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) -def test_clip_chained_inplace(using_copy_on_write): +def test_clip_chained_inplace(): df = DataFrame({"a": [1, 4, 2], "b": 1}) df_orig = df.copy() - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["a"].clip(1, 2, inplace=True) - tm.assert_frame_equal(df, df_orig) - - with tm.raises_chained_assignment_error(): - df[["a"]].clip(1, 2, inplace=True) - tm.assert_frame_equal(df, df_orig) - else: - with tm.assert_produces_warning(FutureWarning, match="inplace method"): - df["a"].clip(1, 2, inplace=True) - - with tm.assert_produces_warning(None): - df[["a"]].clip(1, 2, inplace=True) - - with tm.assert_produces_warning(None): - df[df["a"] > 1].clip(1, 2, inplace=True) + with tm.raises_chained_assignment_error(): + df["a"].clip(1, 2, inplace=True) + tm.assert_frame_equal(df, df_orig) + + with tm.raises_chained_assignment_error(): + df[["a"]].clip(1, 2, inplace=True) + tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index 77c07d11e3381..f7e78146c86eb 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -21,7 +21,7 @@ @pytest.mark.parametrize("dtype", [None, "int64"]) -def test_series_from_series(dtype, using_copy_on_write): +def test_series_from_series(dtype): # Case: constructing a Series from another Series object follows CoW rules: # a new object is returned and thus mutations are not propagated ser = Series([1, 2, 3], name="name") @@ -32,36 +32,23 @@ def test_series_from_series(dtype, using_copy_on_write): # the shallow copy still shares memory assert np.shares_memory(get_array(ser), get_array(result)) - if using_copy_on_write: - assert result._mgr.blocks[0].refs.has_reference() + assert result._mgr.blocks[0].refs.has_reference() - if using_copy_on_write: - # mutating new series copy doesn't mutate original - result.iloc[0] = 0 - assert ser.iloc[0] == 1 - # mutating triggered a copy-on-write -> no longer shares memory - assert not np.shares_memory(get_array(ser), get_array(result)) - else: - # mutating shallow copy does mutate original - result.iloc[0] = 0 - assert ser.iloc[0] == 0 - # and still shares memory - assert np.shares_memory(get_array(ser), get_array(result)) + # mutating new series copy doesn't mutate original + result.iloc[0] = 0 + assert ser.iloc[0] == 1 + # mutating triggered a copy-on-write -> no longer shares memory + assert not np.shares_memory(get_array(ser), get_array(result)) # the same when modifying the parent result = Series(ser, dtype=dtype) - if using_copy_on_write: - # mutating original doesn't mutate new series - ser.iloc[0] = 0 - assert result.iloc[0] == 1 - else: - # mutating original does mutate shallow copy - ser.iloc[0] = 0 - assert result.iloc[0] == 0 + # mutating original doesn't mutate new series + ser.iloc[0] = 0 + assert result.iloc[0] == 1 -def test_series_from_series_with_reindex(using_copy_on_write): +def test_series_from_series_with_reindex(): # Case: constructing a Series from another Series with specifying an index # that potentially requires a reindex of the values ser = Series([1, 2, 3], name="name") @@ -77,17 +64,13 @@ def test_series_from_series_with_reindex(using_copy_on_write): result = Series(ser, index=index) assert np.shares_memory(ser.values, result.values) result.iloc[0] = 0 - if using_copy_on_write: - assert ser.iloc[0] == 1 - else: - assert ser.iloc[0] == 0 + assert ser.iloc[0] == 1 # ensure that if an actual reindex is needed, we don't have any refs # (mutating the result wouldn't trigger CoW) result = Series(ser, index=[0, 1, 2, 3]) assert not np.shares_memory(ser.values, result.values) - if using_copy_on_write: - assert not result._mgr.blocks[0].refs.has_reference() + assert not result._mgr.blocks[0].refs.has_reference() @pytest.mark.parametrize("dtype", [None, "int64"]) @@ -95,25 +78,18 @@ def test_series_from_series_with_reindex(using_copy_on_write): @pytest.mark.parametrize( "arr", [np.array([1, 2, 3], dtype="int64"), pd.array([1, 2, 3], dtype="Int64")] ) -def test_series_from_array(using_copy_on_write, idx, dtype, arr): +def test_series_from_array(idx, dtype, arr): ser = Series(arr, dtype=dtype, index=idx) ser_orig = ser.copy() data = getattr(arr, "_data", arr) - if using_copy_on_write: - assert not np.shares_memory(get_array(ser), data) - else: - assert np.shares_memory(get_array(ser), data) + assert not np.shares_memory(get_array(ser), data) arr[0] = 100 - if using_copy_on_write: - tm.assert_series_equal(ser, ser_orig) - else: - expected = Series([100, 2, 3], dtype=dtype if dtype is not None else arr.dtype) - tm.assert_series_equal(ser, expected) + tm.assert_series_equal(ser, ser_orig) @pytest.mark.parametrize("copy", [True, False, None]) -def test_series_from_array_different_dtype(using_copy_on_write, copy): +def test_series_from_array_different_dtype(copy): arr = np.array([1, 2, 3], dtype="int64") ser = Series(arr, dtype="int32", copy=copy) assert not np.shares_memory(get_array(ser), arr) @@ -128,39 +104,34 @@ def test_series_from_array_different_dtype(using_copy_on_write, copy): TimedeltaIndex([Timedelta("1 days"), Timedelta("2 days")]), ], ) -def test_series_from_index(using_copy_on_write, idx): +def test_series_from_index(idx): ser = Series(idx) expected = idx.copy(deep=True) - if using_copy_on_write: - assert np.shares_memory(get_array(ser), get_array(idx)) - assert not ser._mgr._has_no_reference(0) - else: - assert not np.shares_memory(get_array(ser), get_array(idx)) + assert np.shares_memory(get_array(ser), get_array(idx)) + assert not ser._mgr._has_no_reference(0) ser.iloc[0] = ser.iloc[1] tm.assert_index_equal(idx, expected) -def test_series_from_index_different_dtypes(using_copy_on_write): +def test_series_from_index_different_dtypes(): idx = Index([1, 2, 3], dtype="int64") ser = Series(idx, dtype="int32") assert not np.shares_memory(get_array(ser), get_array(idx)) - if using_copy_on_write: - assert ser._mgr._has_no_reference(0) + assert ser._mgr._has_no_reference(0) -def test_series_from_block_manager_different_dtype(using_copy_on_write): +def test_series_from_block_manager_different_dtype(): ser = Series([1, 2, 3], dtype="int64") msg = "Passing a SingleBlockManager to Series" with tm.assert_produces_warning(DeprecationWarning, match=msg): ser2 = Series(ser._mgr, dtype="int32") assert not np.shares_memory(get_array(ser), get_array(ser2)) - if using_copy_on_write: - assert ser2._mgr._has_no_reference(0) + assert ser2._mgr._has_no_reference(0) @pytest.mark.parametrize("use_mgr", [True, False]) @pytest.mark.parametrize("columns", [None, ["a"]]) -def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr): +def test_dataframe_constructor_mgr_or_df(columns, use_mgr): df = DataFrame({"a": [1, 2, 3]}) df_orig = df.copy() @@ -177,18 +148,14 @@ def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr): assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) new_df.iloc[0] = 100 - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) - tm.assert_frame_equal(df, df_orig) - else: - assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) - tm.assert_frame_equal(df, new_df) + assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("dtype", [None, "int64", "Int64"]) @pytest.mark.parametrize("index", [None, [0, 1, 2]]) @pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]]) -def test_dataframe_from_dict_of_series(using_copy_on_write, columns, index, dtype): +def test_dataframe_from_dict_of_series(columns, index, dtype): # Case: constructing a DataFrame from Series objects with copy=False # has to do a lazy following CoW rules # (the default for DataFrame(dict) is still to copy to ensure consolidation) @@ -208,11 +175,8 @@ def test_dataframe_from_dict_of_series(using_copy_on_write, columns, index, dtyp # mutating the new dataframe doesn't mutate original result.iloc[0, 0] = 10 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(s1)) - tm.assert_series_equal(s1, s1_orig) - else: - assert s1.iloc[0] == 10 + assert not np.shares_memory(get_array(result, "a"), get_array(s1)) + tm.assert_series_equal(s1, s1_orig) # the same when modifying the parent series s1 = Series([1, 2, 3]) @@ -221,11 +185,8 @@ def test_dataframe_from_dict_of_series(using_copy_on_write, columns, index, dtyp {"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False ) s1.iloc[0] = 10 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(s1)) - tm.assert_frame_equal(result, expected) - else: - assert result.iloc[0, 0] == 10 + assert not np.shares_memory(get_array(result, "a"), get_array(s1)) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("dtype", [None, "int64"]) @@ -249,38 +210,30 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype): @pytest.mark.parametrize( "data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)] ) -def test_dataframe_from_series_or_index( - using_copy_on_write, data, dtype, index_or_series -): +def test_dataframe_from_series_or_index(data, dtype, index_or_series): obj = index_or_series(data, dtype=dtype) obj_orig = obj.copy() df = DataFrame(obj, dtype=dtype) assert np.shares_memory(get_array(obj), get_array(df, 0)) - if using_copy_on_write: - assert not df._mgr._has_no_reference(0) + assert not df._mgr._has_no_reference(0) df.iloc[0, 0] = data[-1] - if using_copy_on_write: - tm.assert_equal(obj, obj_orig) + tm.assert_equal(obj, obj_orig) -def test_dataframe_from_series_or_index_different_dtype( - using_copy_on_write, index_or_series -): +def test_dataframe_from_series_or_index_different_dtype(index_or_series): obj = index_or_series([1, 2], dtype="int64") df = DataFrame(obj, dtype="int32") assert not np.shares_memory(get_array(obj), get_array(df, 0)) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) -def test_dataframe_from_series_infer_datetime(using_copy_on_write): +def test_dataframe_from_series_infer_datetime(): ser = Series([Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype=object) with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): df = DataFrame(ser) assert not np.shares_memory(get_array(ser), get_array(df, 0)) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) @pytest.mark.parametrize("index", [None, [0, 1, 2]]) @@ -301,38 +254,33 @@ def test_dataframe_from_dict_of_series_with_dtype(index): @pytest.mark.parametrize("copy", [False, None, True]) -def test_frame_from_numpy_array(using_copy_on_write, copy): +def test_frame_from_numpy_array(copy): arr = np.array([[1, 2], [3, 4]]) df = DataFrame(arr, copy=copy) - if using_copy_on_write and copy is not False or copy is True: + if copy is not False or copy is True: assert not np.shares_memory(get_array(df, 0), arr) else: assert np.shares_memory(get_array(df, 0), arr) -def test_dataframe_from_records_with_dataframe(using_copy_on_write): +def test_dataframe_from_records_with_dataframe(): df = DataFrame({"a": [1, 2, 3]}) df_orig = df.copy() with tm.assert_produces_warning(FutureWarning): df2 = DataFrame.from_records(df) - if using_copy_on_write: - assert not df._mgr._has_no_reference(0) + assert not df._mgr._has_no_reference(0) assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) df2.iloc[0, 0] = 100 - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) - else: - tm.assert_frame_equal(df, df2) + tm.assert_frame_equal(df, df_orig) -def test_frame_from_dict_of_index(using_copy_on_write): +def test_frame_from_dict_of_index(): idx = Index([1, 2, 3]) expected = idx.copy(deep=True) df = DataFrame({"a": idx}, copy=False) assert np.shares_memory(get_array(df, "a"), idx._values) - if using_copy_on_write: - assert not df._mgr._has_no_reference(0) + assert not df._mgr._has_no_reference(0) - df.iloc[0, 0] = 100 - tm.assert_index_equal(idx, expected) + df.iloc[0, 0] = 100 + tm.assert_index_equal(idx, expected) diff --git a/pandas/tests/copy_view/test_core_functionalities.py b/pandas/tests/copy_view/test_core_functionalities.py index b37e1a3718ac1..70d7112ddbd89 100644 --- a/pandas/tests/copy_view/test_core_functionalities.py +++ b/pandas/tests/copy_view/test_core_functionalities.py @@ -6,18 +6,17 @@ from pandas.tests.copy_view.util import get_array -def test_assigning_to_same_variable_removes_references(using_copy_on_write): +def test_assigning_to_same_variable_removes_references(): df = DataFrame({"a": [1, 2, 3]}) df = df.reset_index() - if using_copy_on_write: - assert df._mgr._has_no_reference(1) + assert df._mgr._has_no_reference(1) arr = get_array(df, "a") df.iloc[0, 1] = 100 # Write into a assert np.shares_memory(arr, get_array(df, "a")) -def test_setitem_dont_track_unnecessary_references(using_copy_on_write): +def test_setitem_dont_track_unnecessary_references(): df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1}) df["b"] = 100 @@ -28,7 +27,7 @@ def test_setitem_dont_track_unnecessary_references(using_copy_on_write): assert np.shares_memory(arr, get_array(df, "a")) -def test_setitem_with_view_copies(using_copy_on_write): +def test_setitem_with_view_copies(): df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1}) view = df[:] expected = df.copy() @@ -36,12 +35,11 @@ def test_setitem_with_view_copies(using_copy_on_write): df["b"] = 100 arr = get_array(df, "a") df.iloc[0, 0] = 100 # Check that we correctly track reference - if using_copy_on_write: - assert not np.shares_memory(arr, get_array(df, "a")) - tm.assert_frame_equal(view, expected) + assert not np.shares_memory(arr, get_array(df, "a")) + tm.assert_frame_equal(view, expected) -def test_setitem_with_view_invalidated_does_not_copy(using_copy_on_write, request): +def test_setitem_with_view_invalidated_does_not_copy(request): df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1}) view = df[:] @@ -51,19 +49,16 @@ def test_setitem_with_view_invalidated_does_not_copy(using_copy_on_write, reques # TODO(CoW-warn) false positive? -> block gets split because of `df["b"] = 100` # which introduces additional refs, even when those of `view` go out of scopes df.iloc[0, 0] = 100 - if using_copy_on_write: - # Setitem split the block. Since the old block shared data with view - # all the new blocks are referencing view and each other. When view - # goes out of scope, they don't share data with any other block, - # so we should not trigger a copy - mark = pytest.mark.xfail( - reason="blk.delete does not track references correctly" - ) - request.applymarker(mark) - assert np.shares_memory(arr, get_array(df, "a")) - - -def test_out_of_scope(using_copy_on_write): + # Setitem split the block. Since the old block shared data with view + # all the new blocks are referencing view and each other. When view + # goes out of scope, they don't share data with any other block, + # so we should not trigger a copy + mark = pytest.mark.xfail(reason="blk.delete does not track references correctly") + request.applymarker(mark) + assert np.shares_memory(arr, get_array(df, "a")) + + +def test_out_of_scope(): def func(): df = DataFrame({"a": [1, 2], "b": 1.5, "c": 1}) # create some subset @@ -71,32 +66,28 @@ def func(): return result result = func() - if using_copy_on_write: - assert not result._mgr.blocks[0].refs.has_reference() - assert not result._mgr.blocks[1].refs.has_reference() + assert not result._mgr.blocks[0].refs.has_reference() + assert not result._mgr.blocks[1].refs.has_reference() -def test_delete(using_copy_on_write): +def test_delete(): df = DataFrame( np.random.default_rng(2).standard_normal((4, 3)), columns=["a", "b", "c"] ) del df["b"] - if using_copy_on_write: - assert not df._mgr.blocks[0].refs.has_reference() - assert not df._mgr.blocks[1].refs.has_reference() + assert not df._mgr.blocks[0].refs.has_reference() + assert not df._mgr.blocks[1].refs.has_reference() df = df[["a"]] - if using_copy_on_write: - assert not df._mgr.blocks[0].refs.has_reference() + assert not df._mgr.blocks[0].refs.has_reference() -def test_delete_reference(using_copy_on_write): +def test_delete_reference(): df = DataFrame( np.random.default_rng(2).standard_normal((4, 3)), columns=["a", "b", "c"] ) x = df[:] del df["b"] - if using_copy_on_write: - assert df._mgr.blocks[0].refs.has_reference() - assert df._mgr.blocks[1].refs.has_reference() - assert x._mgr.blocks[0].refs.has_reference() + assert df._mgr.blocks[0].refs.has_reference() + assert df._mgr.blocks[1].refs.has_reference() + assert x._mgr.blocks[0].refs.has_reference() diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index da72e89b23ca0..09d13677eef62 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -48,7 +48,7 @@ def make_series(*args, **kwargs): # Indexing operations taking subset + modifying the subset/parent -def test_subset_column_selection(backend, using_copy_on_write): +def test_subset_column_selection(backend): # Case: taking a subset of the columns of a DataFrame # + afterwards modifying the subset _, DataFrame, _ = backend @@ -69,7 +69,7 @@ def test_subset_column_selection(backend, using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_subset_column_selection_modify_parent(backend, using_copy_on_write): +def test_subset_column_selection_modify_parent(backend): # Case: taking a subset of the columns of a DataFrame # + afterwards modifying the parent _, DataFrame, _ = backend @@ -77,22 +77,20 @@ def test_subset_column_selection_modify_parent(backend, using_copy_on_write): subset = df[["a", "c"]] - if using_copy_on_write: - # the subset shares memory ... - assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) - # ... but parent uses CoW parent when it is modified + # the subset shares memory ... + assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) + # ... but parent uses CoW parent when it is modified df.iloc[0, 0] = 0 assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) - if using_copy_on_write: - # different column/block still shares memory - assert np.shares_memory(get_array(subset, "c"), get_array(df, "c")) + # different column/block still shares memory + assert np.shares_memory(get_array(subset, "c"), get_array(df, "c")) expected = DataFrame({"a": [1, 2, 3], "c": [0.1, 0.2, 0.3]}) tm.assert_frame_equal(subset, expected) -def test_subset_row_slice(backend, using_copy_on_write): +def test_subset_row_slice(backend): # Case: taking a subset of the rows of a DataFrame using a slice # + afterwards modifying the subset _, DataFrame, _ = backend @@ -160,7 +158,6 @@ def test_subset_loc_rows_columns( dtype, row_indexer, column_indexer, - using_copy_on_write, ): # Case: taking a subset of the rows+columns of a DataFrame using .loc # + afterwards modifying the subset @@ -176,14 +173,6 @@ def test_subset_loc_rows_columns( subset = df.loc[row_indexer, column_indexer] - # a few corner cases _do_ actually modify the parent (with both row and column - # slice, and in case of BlockManager with single block) - mutate_parent = ( - isinstance(row_indexer, slice) - and isinstance(column_indexer, slice) - and (dtype == "int64" and dtype_backend == "numpy" and not using_copy_on_write) - ) - # modifying the subset never modifies the parent subset.iloc[0, 0] = 0 @@ -191,8 +180,6 @@ def test_subset_loc_rows_columns( {"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3) ) tm.assert_frame_equal(subset, expected) - if mutate_parent: - df_orig.iloc[1, 1] = 0 tm.assert_frame_equal(df, df_orig) @@ -214,7 +201,6 @@ def test_subset_iloc_rows_columns( dtype, row_indexer, column_indexer, - using_copy_on_write, ): # Case: taking a subset of the rows+columns of a DataFrame using .iloc # + afterwards modifying the subset @@ -230,14 +216,6 @@ def test_subset_iloc_rows_columns( subset = df.iloc[row_indexer, column_indexer] - # a few corner cases _do_ actually modify the parent (with both row and column - # slice, and in case of BlockManager with single block) - mutate_parent = ( - isinstance(row_indexer, slice) - and isinstance(column_indexer, slice) - and (dtype == "int64" and dtype_backend == "numpy" and not using_copy_on_write) - ) - # modifying the subset never modifies the parent subset.iloc[0, 0] = 0 @@ -245,8 +223,6 @@ def test_subset_iloc_rows_columns( {"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3) ) tm.assert_frame_equal(subset, expected) - if mutate_parent: - df_orig.iloc[1, 1] = 0 tm.assert_frame_equal(df, df_orig) @@ -322,7 +298,7 @@ def test_subset_set_column(backend): @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_subset_set_column_with_loc(backend, using_copy_on_write, dtype): +def test_subset_set_column_with_loc(backend, dtype): # Case: setting a single column with loc on a viewing subset # -> subset.loc[:, col] = value _, DataFrame, _ = backend @@ -332,12 +308,7 @@ def test_subset_set_column_with_loc(backend, using_copy_on_write, dtype): df_orig = df.copy() subset = df[1:3] - if using_copy_on_write: - subset.loc[:, "a"] = np.array([10, 11], dtype="int64") - else: - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(None): - subset.loc[:, "a"] = np.array([10, 11], dtype="int64") + subset.loc[:, "a"] = np.array([10, 11], dtype="int64") subset._mgr._verify_integrity() expected = DataFrame( @@ -345,16 +316,11 @@ def test_subset_set_column_with_loc(backend, using_copy_on_write, dtype): index=range(1, 3), ) tm.assert_frame_equal(subset, expected) - if using_copy_on_write: - # original parent dataframe is not modified (CoW) - tm.assert_frame_equal(df, df_orig) - else: - # original parent dataframe is actually updated - df_orig.loc[1:3, "a"] = np.array([10, 11], dtype="int64") - tm.assert_frame_equal(df, df_orig) + # original parent dataframe is not modified (CoW) + tm.assert_frame_equal(df, df_orig) -def test_subset_set_column_with_loc2(backend, using_copy_on_write): +def test_subset_set_column_with_loc2(backend): # Case: setting a single column with loc on a viewing subset # -> subset.loc[:, col] = value # separate test for case of DataFrame of a single column -> takes a separate @@ -364,29 +330,19 @@ def test_subset_set_column_with_loc2(backend, using_copy_on_write): df_orig = df.copy() subset = df[1:3] - if using_copy_on_write: - subset.loc[:, "a"] = 0 - else: - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(None): - subset.loc[:, "a"] = 0 + subset.loc[:, "a"] = 0 subset._mgr._verify_integrity() expected = DataFrame({"a": [0, 0]}, index=range(1, 3)) tm.assert_frame_equal(subset, expected) - if using_copy_on_write: - # original parent dataframe is not modified (CoW) - tm.assert_frame_equal(df, df_orig) - else: - # original parent dataframe is actually updated - df_orig.loc[1:3, "a"] = 0 - tm.assert_frame_equal(df, df_orig) + # original parent dataframe is not modified (CoW) + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_subset_set_columns(backend, using_copy_on_write, dtype): +def test_subset_set_columns(backend, dtype): # Case: setting multiple columns on a viewing subset # -> subset[[col1, col2]] = value dtype_backend, DataFrame, _ = backend @@ -417,7 +373,7 @@ def test_subset_set_columns(backend, using_copy_on_write, dtype): [slice("a", "b"), np.array([True, True, False]), ["a", "b"]], ids=["slice", "mask", "array"], ) -def test_subset_set_with_column_indexer(backend, indexer, using_copy_on_write): +def test_subset_set_with_column_indexer(backend, indexer): # Case: setting multiple columns with a column indexer on a viewing subset # -> subset.loc[:, [col1, col2]] = value _, DataFrame, _ = backend @@ -425,25 +381,12 @@ def test_subset_set_with_column_indexer(backend, indexer, using_copy_on_write): df_orig = df.copy() subset = df[1:3] - if using_copy_on_write: - subset.loc[:, indexer] = 0 - else: - with pd.option_context("chained_assignment", "warn"): - # As of 2.0, this setitem attempts (successfully) to set values - # inplace, so the assignment is not chained. - subset.loc[:, indexer] = 0 + subset.loc[:, indexer] = 0 subset._mgr._verify_integrity() expected = DataFrame({"a": [0, 0], "b": [0.0, 0.0], "c": [5, 6]}, index=range(1, 3)) tm.assert_frame_equal(subset, expected) - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) - else: - # pre-2.0, in the mixed case with BlockManager, only column "a" - # would be mutated in the parent frame. this changed with the - # enforcement of GH#45333 - df_orig.loc[1:2, ["a", "b"]] = 0 - tm.assert_frame_equal(df, df_orig) + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize( @@ -473,7 +416,6 @@ def test_subset_chained_getitem( backend, method, dtype, - using_copy_on_write, ): # Case: creating a subset using multiple, chained getitem calls using views # still needs to guarantee proper CoW behaviour @@ -483,31 +425,17 @@ def test_subset_chained_getitem( ) df_orig = df.copy() - # when not using CoW, it depends on whether we have a single block or not - # and whether we are slicing the columns -> in that case we have a view - test_callspec = request.node.callspec.id - subset_is_view = test_callspec in ( - "numpy-single-block-column-iloc-slice", - "numpy-single-block-column-loc-slice", - ) - # modify subset -> don't modify parent subset = method(df) subset.iloc[0, 0] = 0 - if using_copy_on_write or (not subset_is_view): - tm.assert_frame_equal(df, df_orig) - else: - assert df.iloc[0, 0] == 0 + tm.assert_frame_equal(df, df_orig) # modify parent -> don't modify subset subset = method(df) df.iloc[0, 0] = 0 expected = DataFrame({"a": [1, 2], "b": [4, 5]}) - if using_copy_on_write or not subset_is_view: - tm.assert_frame_equal(subset, expected) - else: - assert subset.iloc[0, 0] == 0 + tm.assert_frame_equal(subset, expected) @pytest.mark.parametrize( @@ -548,7 +476,7 @@ def test_subset_chained_getitem_column(backend, dtype): ], ids=["getitem", "iloc", "loc", "long-chain"], ) -def test_subset_chained_getitem_series(backend, method, using_copy_on_write): +def test_subset_chained_getitem_series(backend, method): # Case: creating a subset using multiple, chained getitem calls using views # still needs to guarantee proper CoW behaviour _, _, Series = backend @@ -558,22 +486,16 @@ def test_subset_chained_getitem_series(backend, method, using_copy_on_write): # modify subset -> don't modify parent subset = method(s) subset.iloc[0] = 0 - if using_copy_on_write: - tm.assert_series_equal(s, s_orig) - else: - assert s.iloc[0] == 0 + tm.assert_series_equal(s, s_orig) # modify parent -> don't modify subset subset = s.iloc[0:3].iloc[0:2] s.iloc[0] = 0 expected = Series([1, 2], index=["a", "b"]) - if using_copy_on_write: - tm.assert_series_equal(subset, expected) - else: - assert subset.iloc[0] == 0 + tm.assert_series_equal(subset, expected) -def test_subset_chained_single_block_row(using_copy_on_write): +def test_subset_chained_single_block_row(): # not parametrizing this for dtype backend, since this explicitly tests single block df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) df_orig = df.copy() @@ -581,19 +503,13 @@ def test_subset_chained_single_block_row(using_copy_on_write): # modify subset -> don't modify parent subset = df[:].iloc[0].iloc[0:2] subset.iloc[0] = 0 - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) - else: - assert df.iloc[0, 0] == 0 + tm.assert_frame_equal(df, df_orig) # modify parent -> don't modify subset subset = df[:].iloc[0].iloc[0:2] df.iloc[0, 0] = 0 expected = Series([1, 4], index=["a", "b"], name=0) - if using_copy_on_write: - tm.assert_series_equal(subset, expected) - else: - assert subset.iloc[0] == 0 + tm.assert_series_equal(subset, expected) @pytest.mark.parametrize( @@ -607,7 +523,7 @@ def test_subset_chained_single_block_row(using_copy_on_write): ], ids=["getitem", "loc", "loc-rows", "iloc", "iloc-rows"], ) -def test_null_slice(backend, method, using_copy_on_write): +def test_null_slice(backend, method): # Case: also all variants of indexing with a null slice (:) should return # new objects to ensure we correctly use CoW for the results dtype_backend, DataFrame, _ = backend @@ -621,10 +537,7 @@ def test_null_slice(backend, method, using_copy_on_write): # and those trigger CoW when mutated df2.iloc[0, 0] = 0 - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) - else: - assert df.iloc[0, 0] == 0 + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize( @@ -636,7 +549,7 @@ def test_null_slice(backend, method, using_copy_on_write): ], ids=["getitem", "loc", "iloc"], ) -def test_null_slice_series(backend, method, using_copy_on_write): +def test_null_slice_series(backend, method): _, _, Series = backend s = Series([1, 2, 3], index=["a", "b", "c"]) s_orig = s.copy() @@ -648,10 +561,7 @@ def test_null_slice_series(backend, method, using_copy_on_write): # and those trigger CoW when mutated s2.iloc[0] = 0 - if using_copy_on_write: - tm.assert_series_equal(s, s_orig) - else: - assert s.iloc[0] == 0 + tm.assert_series_equal(s, s_orig) # TODO add more tests modifying the parent @@ -661,7 +571,7 @@ def test_null_slice_series(backend, method, using_copy_on_write): # Series -- Indexing operations taking subset + modifying the subset/parent -def test_series_getitem_slice(backend, using_copy_on_write): +def test_series_getitem_slice(backend): # Case: taking a slice of a Series + afterwards modifying the subset _, _, Series = backend s = Series([1, 2, 3], index=["a", "b", "c"]) @@ -672,21 +582,16 @@ def test_series_getitem_slice(backend, using_copy_on_write): subset.iloc[0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(subset), get_array(s)) + assert not np.shares_memory(get_array(subset), get_array(s)) expected = Series([0, 2, 3], index=["a", "b", "c"]) tm.assert_series_equal(subset, expected) - if using_copy_on_write: - # original parent series is not modified (CoW) - tm.assert_series_equal(s, s_orig) - else: - # original parent series is actually updated - assert s.iloc[0] == 0 + # original parent series is not modified (CoW) + tm.assert_series_equal(s, s_orig) -def test_series_getitem_ellipsis(using_copy_on_write): +def test_series_getitem_ellipsis(): # Case: taking a view of a Series using Ellipsis + afterwards modifying the subset s = Series([1, 2, 3]) s_orig = s.copy() @@ -696,18 +601,13 @@ def test_series_getitem_ellipsis(using_copy_on_write): subset.iloc[0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(subset), get_array(s)) + assert not np.shares_memory(get_array(subset), get_array(s)) expected = Series([0, 2, 3]) tm.assert_series_equal(subset, expected) - if using_copy_on_write: - # original parent series is not modified (CoW) - tm.assert_series_equal(s, s_orig) - else: - # original parent series is actually updated - assert s.iloc[0] == 0 + # original parent series is not modified (CoW) + tm.assert_series_equal(s, s_orig) @pytest.mark.parametrize( @@ -715,9 +615,7 @@ def test_series_getitem_ellipsis(using_copy_on_write): [slice(0, 2), np.array([True, True, False]), np.array([0, 1])], ids=["slice", "mask", "array"], ) -def test_series_subset_set_with_indexer( - backend, indexer_si, indexer, using_copy_on_write -): +def test_series_subset_set_with_indexer(backend, indexer_si, indexer): # Case: setting values in a viewing Series with an indexer _, _, Series = backend s = Series([1, 2, 3], index=["a", "b", "c"]) @@ -737,17 +635,14 @@ def test_series_subset_set_with_indexer( expected = Series([0, 0, 3], index=["a", "b", "c"]) tm.assert_series_equal(subset, expected) - if using_copy_on_write: - tm.assert_series_equal(s, s_orig) - else: - tm.assert_series_equal(s, expected) + tm.assert_series_equal(s, s_orig) # ----------------------------------------------------------------------------- # del operator -def test_del_frame(backend, using_copy_on_write): +def test_del_frame(backend): # Case: deleting a column with `del` on a viewing child dataframe should # not modify parent + update the references dtype_backend, DataFrame, _ = backend @@ -769,11 +664,8 @@ def test_del_frame(backend, using_copy_on_write): df_orig = df.copy() df2.loc[0, "a"] = 100 - if using_copy_on_write: - # modifying child after deleting a column still doesn't update parent - tm.assert_frame_equal(df, df_orig) - else: - assert df.loc[0, "a"] == 100 + # modifying child after deleting a column still doesn't update parent + tm.assert_frame_equal(df, df_orig) def test_del_series(backend): @@ -873,7 +765,7 @@ def test_column_as_series_no_item_cache(request, backend, method): # TODO add tests for other indexing methods on the Series -def test_dataframe_add_column_from_series(backend, using_copy_on_write): +def test_dataframe_add_column_from_series(backend): # Case: adding a new column to a DataFrame from an existing column/series # -> delays copy under CoW _, DataFrame, Series = backend @@ -881,10 +773,7 @@ def test_dataframe_add_column_from_series(backend, using_copy_on_write): s = Series([10, 11, 12]) df["new"] = s - if using_copy_on_write: - assert np.shares_memory(get_array(df, "new"), get_array(s)) - else: - assert not np.shares_memory(get_array(df, "new"), get_array(s)) + assert np.shares_memory(get_array(df, "new"), get_array(s)) # editing series -> doesn't modify column in frame s[0] = 0 @@ -907,9 +796,7 @@ def test_dataframe_add_column_from_series(backend, using_copy_on_write): @pytest.mark.parametrize( "col", [[0.1, 0.2, 0.3], [7, 8, 9]], ids=["mixed-block", "single-block"] ) -def test_set_value_copy_only_necessary_column( - using_copy_on_write, indexer_func, indexer, val, col -): +def test_set_value_copy_only_necessary_column(indexer_func, indexer, val, col): # When setting inplace, only copy column that is modified instead of the whole # block (by splitting the block) df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": col}) @@ -924,31 +811,18 @@ def test_set_value_copy_only_necessary_column( indexer_func(df)[indexer] = val - if using_copy_on_write: - assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) - assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) - tm.assert_frame_equal(view, df_orig) - else: - assert np.shares_memory(get_array(df, "c"), get_array(view, "c")) - if val == "a": - assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) - else: - assert np.shares_memory(get_array(df, "a"), get_array(view, "a")) + assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) + assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) + tm.assert_frame_equal(view, df_orig) -def test_series_midx_slice(using_copy_on_write): +def test_series_midx_slice(): ser = Series([1, 2, 3], index=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 5]])) ser_orig = ser.copy() result = ser[1] assert np.shares_memory(get_array(ser), get_array(result)) result.iloc[0] = 100 - if using_copy_on_write: - tm.assert_series_equal(ser, ser_orig) - else: - expected = Series( - [100, 2, 3], index=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 5]]) - ) - tm.assert_series_equal(ser, expected) + tm.assert_series_equal(ser, ser_orig) def test_getitem_midx_slice(): @@ -963,7 +837,7 @@ def test_getitem_midx_slice(): tm.assert_frame_equal(df_orig, df) -def test_series_midx_tuples_slice(using_copy_on_write): +def test_series_midx_tuples_slice(): ser = Series( [1, 2, 3], index=pd.MultiIndex.from_tuples([((1, 2), 3), ((1, 2), 4), ((2, 3), 4)]), @@ -971,12 +845,11 @@ def test_series_midx_tuples_slice(using_copy_on_write): result = ser[(1, 2)] assert np.shares_memory(get_array(ser), get_array(result)) result.iloc[0] = 100 - if using_copy_on_write: - expected = Series( - [1, 2, 3], - index=pd.MultiIndex.from_tuples([((1, 2), 3), ((1, 2), 4), ((2, 3), 4)]), - ) - tm.assert_series_equal(ser, expected) + expected = Series( + [1, 2, 3], + index=pd.MultiIndex.from_tuples([((1, 2), 3), ((1, 2), 4), ((2, 3), 4)]), + ) + tm.assert_series_equal(ser, expected) def test_midx_read_only_bool_indexer(): @@ -1000,17 +873,14 @@ def mklbl(prefix, n): tm.assert_series_equal(mask, expected_mask) -def test_loc_enlarging_with_dataframe(using_copy_on_write): +def test_loc_enlarging_with_dataframe(): df = DataFrame({"a": [1, 2, 3]}) rhs = DataFrame({"b": [1, 2, 3], "c": [4, 5, 6]}) rhs_orig = rhs.copy() df.loc[:, ["b", "c"]] = rhs - if using_copy_on_write: - assert np.shares_memory(get_array(df, "b"), get_array(rhs, "b")) - assert np.shares_memory(get_array(df, "c"), get_array(rhs, "c")) - assert not df._mgr._has_no_reference(1) - else: - assert not np.shares_memory(get_array(df, "b"), get_array(rhs, "b")) + assert np.shares_memory(get_array(df, "b"), get_array(rhs, "b")) + assert np.shares_memory(get_array(df, "c"), get_array(rhs, "c")) + assert not df._mgr._has_no_reference(1) df.iloc[0, 1] = 100 tm.assert_frame_equal(rhs, rhs_orig) diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py index f1a4decce623f..07447ab827cec 100644 --- a/pandas/tests/copy_view/test_internals.py +++ b/pandas/tests/copy_view/test_internals.py @@ -6,7 +6,7 @@ from pandas.tests.copy_view.util import get_array -def test_consolidate(using_copy_on_write): +def test_consolidate(): # create unconsolidated DataFrame df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) df["c"] = [4, 5, 6] @@ -35,10 +35,9 @@ def test_consolidate(using_copy_on_write): assert not df._mgr.blocks[2].refs.has_reference() # and modifying subset still doesn't modify parent - if using_copy_on_write: - subset.iloc[0, 1] = 0.0 - assert not df._mgr.blocks[1].refs.has_reference() - assert df.loc[0, "b"] == 0.1 + subset.iloc[0, 1] = 0.0 + assert not df._mgr.blocks[1].refs.has_reference() + assert df.loc[0, "b"] == 0.1 @pytest.mark.parametrize("dtype", [np.intp, np.int8]) @@ -55,7 +54,7 @@ def test_consolidate(using_copy_on_write): ([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T), ], ) -def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr, dtype): +def test_iset_splits_blocks_inplace(locs, arr, dtype): # Nothing currently calls iset with # more than 1 loc with inplace=True (only happens with inplace=False) # but ensure that it works @@ -75,14 +74,9 @@ def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr, dtype): df2._mgr.iset(locs, arr, inplace=True) tm.assert_frame_equal(df, df_orig) - - if using_copy_on_write: - for i, col in enumerate(df.columns): - if i not in locs: - assert np.shares_memory(get_array(df, col), get_array(df2, col)) - else: - for col in df.columns: - assert not np.shares_memory(get_array(df, col), get_array(df2, col)) + for i, col in enumerate(df.columns): + if i not in locs: + assert np.shares_memory(get_array(df, col), get_array(df2, col)) def test_exponential_backoff(): diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index d72600956a6d6..733c6ddb9bd8a 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -3,7 +3,6 @@ from pandas import ( NA, - ArrowDtype, DataFrame, Interval, NaT, @@ -16,7 +15,7 @@ @pytest.mark.parametrize("method", ["pad", "nearest", "linear"]) -def test_interpolate_no_op(using_copy_on_write, method): +def test_interpolate_no_op(method): df = DataFrame({"a": [1, 2]}) df_orig = df.copy() @@ -27,35 +26,26 @@ def test_interpolate_no_op(using_copy_on_write, method): with tm.assert_produces_warning(warn, match=msg): result = df.interpolate(method=method) - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) result.iloc[0, 0] = 100 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("func", ["ffill", "bfill"]) -def test_interp_fill_functions(using_copy_on_write, func): +def test_interp_fill_functions(func): # Check that these takes the same code paths as interpolate df = DataFrame({"a": [1, 2]}) df_orig = df.copy() result = getattr(df, func)() - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) - + assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) result.iloc[0, 0] = 100 - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) @@ -63,54 +53,48 @@ def test_interp_fill_functions(using_copy_on_write, func): @pytest.mark.parametrize( "vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]] ) -def test_interpolate_triggers_copy(using_copy_on_write, vals, func): +def test_interpolate_triggers_copy(vals, func): df = DataFrame({"a": vals}) result = getattr(df, func)() assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) - if using_copy_on_write: - # Check that we don't have references when triggering a copy - assert result._mgr._has_no_reference(0) + # Check that we don't have references when triggering a copy + assert result._mgr._has_no_reference(0) @pytest.mark.parametrize( "vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]] ) -def test_interpolate_inplace_no_reference_no_copy(using_copy_on_write, vals): +def test_interpolate_inplace_no_reference_no_copy(vals): df = DataFrame({"a": vals}) arr = get_array(df, "a") df.interpolate(method="linear", inplace=True) assert np.shares_memory(arr, get_array(df, "a")) - if using_copy_on_write: - # Check that we don't have references when triggering a copy - assert df._mgr._has_no_reference(0) + # Check that we don't have references when triggering a copy + assert df._mgr._has_no_reference(0) @pytest.mark.parametrize( "vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]] ) -def test_interpolate_inplace_with_refs(using_copy_on_write, vals): +def test_interpolate_inplace_with_refs(vals): df = DataFrame({"a": [1, np.nan, 2]}) df_orig = df.copy() arr = get_array(df, "a") view = df[:] df.interpolate(method="linear", inplace=True) - - if using_copy_on_write: - # Check that copy was triggered in interpolate and that we don't - # have any references left - assert not np.shares_memory(arr, get_array(df, "a")) - tm.assert_frame_equal(df_orig, view) - assert df._mgr._has_no_reference(0) - assert view._mgr._has_no_reference(0) - else: - assert np.shares_memory(arr, get_array(df, "a")) + # Check that copy was triggered in interpolate and that we don't + # have any references left + assert not np.shares_memory(arr, get_array(df, "a")) + tm.assert_frame_equal(df_orig, view) + assert df._mgr._has_no_reference(0) + assert view._mgr._has_no_reference(0) @pytest.mark.parametrize("func", ["ffill", "bfill"]) @pytest.mark.parametrize("dtype", ["float64", "Float64"]) -def test_interp_fill_functions_inplace(using_copy_on_write, func, dtype): +def test_interp_fill_functions_inplace(func, dtype): # Check that these takes the same code paths as interpolate df = DataFrame({"a": [1, np.nan, 2]}, dtype=dtype) df_orig = df.copy() @@ -119,18 +103,15 @@ def test_interp_fill_functions_inplace(using_copy_on_write, func, dtype): getattr(df, func)(inplace=True) - if using_copy_on_write: - # Check that copy was triggered in interpolate and that we don't - # have any references left - assert not np.shares_memory(arr, get_array(df, "a")) - tm.assert_frame_equal(df_orig, view) - assert df._mgr._has_no_reference(0) - assert view._mgr._has_no_reference(0) - else: - assert np.shares_memory(arr, get_array(df, "a")) is (dtype == "float64") + # Check that copy was triggered in interpolate and that we don't + # have any references left + assert not np.shares_memory(arr, get_array(df, "a")) + tm.assert_frame_equal(df_orig, view) + assert df._mgr._has_no_reference(0) + assert view._mgr._has_no_reference(0) -def test_interpolate_cleaned_fill_method(using_copy_on_write): +def test_interpolate_cleaned_fill_method(): # Check that "method is set to None" case works correctly df = DataFrame({"a": ["a", np.nan, "c"], "b": 1}) df_orig = df.copy() @@ -139,19 +120,14 @@ def test_interpolate_cleaned_fill_method(using_copy_on_write): with tm.assert_produces_warning(FutureWarning, match=msg): result = df.interpolate(method="linear") - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) - + assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) result.iloc[0, 0] = Timestamp("2021-12-31") - if using_copy_on_write: - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) + assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) tm.assert_frame_equal(df, df_orig) -def test_interpolate_object_convert_no_op(using_copy_on_write): +def test_interpolate_object_convert_no_op(): df = DataFrame({"a": ["a", "b", "c"], "b": 1}) arr_a = get_array(df, "a") msg = "DataFrame.interpolate with method=pad is deprecated" @@ -159,36 +135,33 @@ def test_interpolate_object_convert_no_op(using_copy_on_write): df.interpolate(method="pad", inplace=True) # Now CoW makes a copy, it should not! - if using_copy_on_write: - assert df._mgr._has_no_reference(0) - assert np.shares_memory(arr_a, get_array(df, "a")) + assert df._mgr._has_no_reference(0) + assert np.shares_memory(arr_a, get_array(df, "a")) -def test_interpolate_object_convert_copies(using_copy_on_write): +def test_interpolate_object_convert_copies(): df = DataFrame({"a": Series([1, 2], dtype=object), "b": 1}) arr_a = get_array(df, "a") msg = "DataFrame.interpolate with method=pad is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): df.interpolate(method="pad", inplace=True) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) - assert not np.shares_memory(arr_a, get_array(df, "a")) + assert df._mgr._has_no_reference(0) + assert not np.shares_memory(arr_a, get_array(df, "a")) -def test_interpolate_downcast(using_copy_on_write): +def test_interpolate_downcast(): df = DataFrame({"a": [1, np.nan, 2.5], "b": 1}) arr_a = get_array(df, "a") msg = "DataFrame.interpolate with method=pad is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): df.interpolate(method="pad", inplace=True, downcast="infer") - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) assert np.shares_memory(arr_a, get_array(df, "a")) -def test_interpolate_downcast_reference_triggers_copy(using_copy_on_write): +def test_interpolate_downcast_reference_triggers_copy(): df = DataFrame({"a": [1, np.nan, 2.5], "b": 1}) df_orig = df.copy() arr_a = get_array(df, "a") @@ -197,45 +170,35 @@ def test_interpolate_downcast_reference_triggers_copy(using_copy_on_write): with tm.assert_produces_warning(FutureWarning, match=msg): df.interpolate(method="pad", inplace=True, downcast="infer") - if using_copy_on_write: - assert df._mgr._has_no_reference(0) - assert not np.shares_memory(arr_a, get_array(df, "a")) - tm.assert_frame_equal(df_orig, view) - else: - tm.assert_frame_equal(df, view) + assert df._mgr._has_no_reference(0) + assert not np.shares_memory(arr_a, get_array(df, "a")) + tm.assert_frame_equal(df_orig, view) -def test_fillna(using_copy_on_write): +def test_fillna(): df = DataFrame({"a": [1.5, np.nan], "b": 1}) df_orig = df.copy() df2 = df.fillna(5.5) - if using_copy_on_write: - assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - else: - assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) df2.iloc[0, 1] = 100 tm.assert_frame_equal(df_orig, df) -def test_fillna_dict(using_copy_on_write): +def test_fillna_dict(): df = DataFrame({"a": [1.5, np.nan], "b": 1}) df_orig = df.copy() df2 = df.fillna({"a": 100.5}) - if using_copy_on_write: - assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - else: - assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) df2.iloc[0, 1] = 100 tm.assert_frame_equal(df_orig, df) @pytest.mark.parametrize("downcast", [None, False]) -def test_fillna_inplace(using_copy_on_write, downcast): +def test_fillna_inplace(downcast): df = DataFrame({"a": [1.5, np.nan], "b": 1}) arr_a = get_array(df, "a") arr_b = get_array(df, "b") @@ -245,12 +208,11 @@ def test_fillna_inplace(using_copy_on_write, downcast): df.fillna(5.5, inplace=True, downcast=downcast) assert np.shares_memory(get_array(df, "a"), arr_a) assert np.shares_memory(get_array(df, "b"), arr_b) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) - assert df._mgr._has_no_reference(1) + assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(1) -def test_fillna_inplace_reference(using_copy_on_write): +def test_fillna_inplace_reference(): df = DataFrame({"a": [1.5, np.nan], "b": 1}) df_orig = df.copy() arr_a = get_array(df, "a") @@ -258,20 +220,16 @@ def test_fillna_inplace_reference(using_copy_on_write): view = df[:] df.fillna(5.5, inplace=True) - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "a"), arr_a) - assert np.shares_memory(get_array(df, "b"), arr_b) - assert view._mgr._has_no_reference(0) - assert df._mgr._has_no_reference(0) - tm.assert_frame_equal(view, df_orig) - else: - assert np.shares_memory(get_array(df, "a"), arr_a) - assert np.shares_memory(get_array(df, "b"), arr_b) + assert not np.shares_memory(get_array(df, "a"), arr_a) + assert np.shares_memory(get_array(df, "b"), arr_b) + assert view._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) + tm.assert_frame_equal(view, df_orig) expected = DataFrame({"a": [1.5, 5.5], "b": 1}) tm.assert_frame_equal(df, expected) -def test_fillna_interval_inplace_reference(using_copy_on_write): +def test_fillna_interval_inplace_reference(): # Set dtype explicitly to avoid implicit cast when setting nan ser = Series( interval_range(start=0, end=5), name="a", dtype="interval[float64, right]" @@ -282,94 +240,62 @@ def test_fillna_interval_inplace_reference(using_copy_on_write): view = ser[:] ser.fillna(value=Interval(left=0, right=5), inplace=True) - if using_copy_on_write: - assert not np.shares_memory( - get_array(ser, "a").left.values, get_array(view, "a").left.values - ) - tm.assert_series_equal(view, ser_orig) - else: - assert np.shares_memory( - get_array(ser, "a").left.values, get_array(view, "a").left.values - ) + assert not np.shares_memory( + get_array(ser, "a").left.values, get_array(view, "a").left.values + ) + tm.assert_series_equal(view, ser_orig) -def test_fillna_series_empty_arg(using_copy_on_write): +def test_fillna_series_empty_arg(): ser = Series([1, np.nan, 2]) ser_orig = ser.copy() result = ser.fillna({}) - - if using_copy_on_write: - assert np.shares_memory(get_array(ser), get_array(result)) - else: - assert not np.shares_memory(get_array(ser), get_array(result)) + assert np.shares_memory(get_array(ser), get_array(result)) ser.iloc[0] = 100.5 tm.assert_series_equal(ser_orig, result) -def test_fillna_series_empty_arg_inplace(using_copy_on_write): +def test_fillna_series_empty_arg_inplace(): ser = Series([1, np.nan, 2]) arr = get_array(ser) ser.fillna({}, inplace=True) assert np.shares_memory(get_array(ser), arr) - if using_copy_on_write: - assert ser._mgr._has_no_reference(0) + assert ser._mgr._has_no_reference(0) -def test_fillna_ea_noop_shares_memory( - using_copy_on_write, any_numeric_ea_and_arrow_dtype -): +def test_fillna_ea_noop_shares_memory(any_numeric_ea_and_arrow_dtype): df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype) df_orig = df.copy() df2 = df.fillna(100) assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - if using_copy_on_write: - assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - assert not df2._mgr._has_no_reference(1) - elif isinstance(df.dtypes.iloc[0], ArrowDtype): - # arrow is immutable, so no-ops do not need to copy underlying array - assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - else: - assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert not df2._mgr._has_no_reference(1) tm.assert_frame_equal(df_orig, df) df2.iloc[0, 1] = 100 - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - assert df2._mgr._has_no_reference(1) - assert df._mgr._has_no_reference(1) + assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert df2._mgr._has_no_reference(1) + assert df._mgr._has_no_reference(1) tm.assert_frame_equal(df_orig, df) -def test_fillna_inplace_ea_noop_shares_memory( - using_copy_on_write, any_numeric_ea_and_arrow_dtype -): +def test_fillna_inplace_ea_noop_shares_memory(any_numeric_ea_and_arrow_dtype): df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype) df_orig = df.copy() view = df[:] df.fillna(100, inplace=True) - - if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: - assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) - else: - # MaskedArray can actually respect inplace=True - assert np.shares_memory(get_array(df, "a"), get_array(view, "a")) + assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) - if using_copy_on_write: - assert not df._mgr._has_no_reference(1) - assert not view._mgr._has_no_reference(1) + assert not df._mgr._has_no_reference(1) + assert not view._mgr._has_no_reference(1) df.iloc[0, 1] = 100 - if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: - tm.assert_frame_equal(df_orig, view) - else: - # we actually have a view - tm.assert_frame_equal(df, view) + tm.assert_frame_equal(df_orig, view) def test_fillna_chained_assignment(): diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 356b31a82f5ff..73919997fa7fd 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -86,7 +86,6 @@ def test_copy_shallow(using_copy_on_write): lambda df, copy: df.rename_axis(columns="test", copy=copy), lambda df, copy: df.astype({"b": "int64"}, copy=copy), # lambda df, copy: df.swaplevel(0, 0, copy=copy), - lambda df, copy: df.swapaxes(0, 0, copy=copy), lambda df, copy: df.truncate(0, 5, copy=copy), lambda df, copy: df.infer_objects(copy=copy), lambda df, copy: df.to_timestamp(copy=copy), @@ -105,7 +104,6 @@ def test_copy_shallow(using_copy_on_write): "rename_axis1", "astype", # "swaplevel", # only series - "swapaxes", "truncate", "infer_objects", "to_timestamp", @@ -127,13 +125,7 @@ def test_methods_copy_keyword(request, method, copy, using_copy_on_write): index = date_range("2012-01-01", freq="D", periods=3, tz="Europe/Brussels") df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=index) - - if "swapaxes" in request.node.callspec.id: - msg = "'DataFrame.swapaxes' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - df2 = method(df, copy=copy) - else: - df2 = method(df, copy=copy) + df2 = method(df, copy=copy) share_memory = using_copy_on_write or copy is False @@ -161,7 +153,6 @@ def test_methods_copy_keyword(request, method, copy, using_copy_on_write): lambda ser, copy: ser.rename_axis(index="test", copy=copy), lambda ser, copy: ser.astype("int64", copy=copy), lambda ser, copy: ser.swaplevel(0, 1, copy=copy), - lambda ser, copy: ser.swapaxes(0, 0, copy=copy), lambda ser, copy: ser.truncate(0, 5, copy=copy), lambda ser, copy: ser.infer_objects(copy=copy), lambda ser, copy: ser.to_timestamp(copy=copy), @@ -180,7 +171,6 @@ def test_methods_copy_keyword(request, method, copy, using_copy_on_write): "rename_axis0", "astype", "swaplevel", - "swapaxes", "truncate", "infer_objects", "to_timestamp", @@ -204,13 +194,7 @@ def test_methods_series_copy_keyword(request, method, copy, using_copy_on_write) index = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]]) ser = Series([1, 2, 3], index=index) - - if "swapaxes" in request.node.callspec.id: - msg = "'Series.swapaxes' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - ser2 = method(ser, copy=copy) - else: - ser2 = method(ser, copy=copy) + ser2 = method(ser, copy=copy) share_memory = using_copy_on_write or copy is False @@ -688,55 +672,6 @@ def test_to_frame(using_copy_on_write): tm.assert_frame_equal(df, expected) -@pytest.mark.parametrize("ax", ["index", "columns"]) -def test_swapaxes_noop(using_copy_on_write, ax): - df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) - df_orig = df.copy() - msg = "'DataFrame.swapaxes' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - df2 = df.swapaxes(ax, ax) - - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - - # mutating df2 triggers a copy-on-write for that column/block - df2.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - tm.assert_frame_equal(df, df_orig) - - -def test_swapaxes_single_block(using_copy_on_write): - df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["x", "y", "z"]) - df_orig = df.copy() - msg = "'DataFrame.swapaxes' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - df2 = df.swapaxes("index", "columns") - - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "x"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "x"), get_array(df, "a")) - - # mutating df2 triggers a copy-on-write for that column/block - df2.iloc[0, 0] = 0 - if using_copy_on_write: - assert not np.shares_memory(get_array(df2, "x"), get_array(df, "a")) - tm.assert_frame_equal(df, df_orig) - - -def test_swapaxes_read_only_array(): - df = DataFrame({"a": [1, 2], "b": 3}) - msg = "'DataFrame.swapaxes' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - df = df.swapaxes(axis1="index", axis2="columns") - df.iloc[0, 0] = 100 - expected = DataFrame({0: [100, 3], 1: [2, 3]}, index=["a", "b"]) - tm.assert_frame_equal(df, expected) - - @pytest.mark.parametrize( "method, idx", [ diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index f2ee26c0b9009..63254f1244a2e 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -24,21 +24,19 @@ # 1 ], ) -def test_replace(using_copy_on_write, replace_kwargs): +def test_replace(replace_kwargs): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": ["foo", "bar", "baz"]}) df_orig = df.copy() df_replaced = df.replace(**replace_kwargs) - if using_copy_on_write: - if (df_replaced["b"] == df["b"]).all(): - assert np.shares_memory(get_array(df_replaced, "b"), get_array(df, "b")) - assert np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c")) + if (df_replaced["b"] == df["b"]).all(): + assert np.shares_memory(get_array(df_replaced, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c")) # mutating squeezed df triggers a copy-on-write for that column/block df_replaced.loc[0, "c"] = -1 - if using_copy_on_write: - assert not np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c")) + assert not np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c")) if "a" in replace_kwargs["to_replace"]: arr = get_array(df_replaced, "a") @@ -47,26 +45,22 @@ def test_replace(using_copy_on_write, replace_kwargs): tm.assert_frame_equal(df, df_orig) -def test_replace_regex_inplace_refs(using_copy_on_write): +def test_replace_regex_inplace_refs(): df = DataFrame({"a": ["aaa", "bbb"]}) df_orig = df.copy() view = df[:] arr = get_array(df, "a") df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True) - if using_copy_on_write: - assert not np.shares_memory(arr, get_array(df, "a")) - assert df._mgr._has_no_reference(0) - tm.assert_frame_equal(view, df_orig) - else: - assert np.shares_memory(arr, get_array(df, "a")) + assert not np.shares_memory(arr, get_array(df, "a")) + assert df._mgr._has_no_reference(0) + tm.assert_frame_equal(view, df_orig) -def test_replace_regex_inplace(using_copy_on_write): +def test_replace_regex_inplace(): df = DataFrame({"a": ["aaa", "bbb"]}) arr = get_array(df, "a") df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) assert np.shares_memory(arr, get_array(df, "a")) df_orig = df.copy() @@ -75,89 +69,64 @@ def test_replace_regex_inplace(using_copy_on_write): assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) -def test_replace_regex_inplace_no_op(using_copy_on_write): +def test_replace_regex_inplace_no_op(): df = DataFrame({"a": [1, 2]}) arr = get_array(df, "a") df.replace(to_replace=r"^a.$", value="new", inplace=True, regex=True) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) assert np.shares_memory(arr, get_array(df, "a")) df_orig = df.copy() df2 = df.replace(to_replace=r"^x.$", value="new", regex=True) tm.assert_frame_equal(df_orig, df) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) -def test_replace_mask_all_false_second_block(using_copy_on_write): +def test_replace_mask_all_false_second_block(): df = DataFrame({"a": [1.5, 2, 3], "b": 100.5, "c": 1, "d": 2}) df_orig = df.copy() df2 = df.replace(to_replace=1.5, value=55.5) - if using_copy_on_write: - # TODO: Block splitting would allow us to avoid copying b - assert np.shares_memory(get_array(df, "c"), get_array(df2, "c")) - assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - - else: - assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c")) - assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) + # TODO: Block splitting would allow us to avoid copying b + assert np.shares_memory(get_array(df, "c"), get_array(df2, "c")) + assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) df2.loc[0, "c"] = 1 tm.assert_frame_equal(df, df_orig) # Original is unchanged - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c")) - # TODO: This should split and not copy the whole block - # assert np.shares_memory(get_array(df, "d"), get_array(df2, "d")) + assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c")) + assert np.shares_memory(get_array(df, "d"), get_array(df2, "d")) -def test_replace_coerce_single_column(using_copy_on_write): +def test_replace_coerce_single_column(): df = DataFrame({"a": [1.5, 2, 3], "b": 100.5}) df_orig = df.copy() df2 = df.replace(to_replace=1.5, value="a") + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - if using_copy_on_write: - assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - - else: - assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - - if using_copy_on_write: - df2.loc[0, "b"] = 0.5 - tm.assert_frame_equal(df, df_orig) # Original is unchanged - assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + df2.loc[0, "b"] = 0.5 + tm.assert_frame_equal(df, df_orig) # Original is unchanged + assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) -def test_replace_to_replace_wrong_dtype(using_copy_on_write): +def test_replace_to_replace_wrong_dtype(): df = DataFrame({"a": [1.5, 2, 3], "b": 100.5}) df_orig = df.copy() df2 = df.replace(to_replace="xxx", value=1.5) - if using_copy_on_write: - assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - - else: - assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) - assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) + assert np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) df2.loc[0, "b"] = 0.5 tm.assert_frame_equal(df, df_orig) # Original is unchanged - - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) + assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b")) -def test_replace_list_categorical(using_copy_on_write): +def test_replace_list_categorical(): df = DataFrame({"a": ["a", "b", "c"]}, dtype="category") arr = get_array(df, "a") msg = ( @@ -167,8 +136,7 @@ def test_replace_list_categorical(using_copy_on_write): with tm.assert_produces_warning(FutureWarning, match=msg): df.replace(["c"], value="a", inplace=True) assert np.shares_memory(arr.codes, get_array(df, "a").codes) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) df_orig = df.copy() with tm.assert_produces_warning(FutureWarning, match=msg): @@ -178,7 +146,7 @@ def test_replace_list_categorical(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_replace_list_inplace_refs_categorical(using_copy_on_write): +def test_replace_list_inplace_refs_categorical(): df = DataFrame({"a": ["a", "b", "c"]}, dtype="category") view = df[:] df_orig = df.copy() @@ -188,60 +156,47 @@ def test_replace_list_inplace_refs_categorical(using_copy_on_write): ) with tm.assert_produces_warning(FutureWarning, match=msg): df.replace(["c"], value="a", inplace=True) - if using_copy_on_write: - assert not np.shares_memory( - get_array(view, "a").codes, get_array(df, "a").codes - ) - tm.assert_frame_equal(df_orig, view) - else: - # This could be inplace - assert not np.shares_memory( - get_array(view, "a").codes, get_array(df, "a").codes - ) + assert not np.shares_memory(get_array(view, "a").codes, get_array(df, "a").codes) + tm.assert_frame_equal(df_orig, view) @pytest.mark.parametrize("to_replace", [1.5, [1.5], []]) -def test_replace_inplace(using_copy_on_write, to_replace): +def test_replace_inplace(to_replace): df = DataFrame({"a": [1.5, 2, 3]}) arr_a = get_array(df, "a") df.replace(to_replace=1.5, value=15.5, inplace=True) assert np.shares_memory(get_array(df, "a"), arr_a) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) @pytest.mark.parametrize("to_replace", [1.5, [1.5]]) -def test_replace_inplace_reference(using_copy_on_write, to_replace): +def test_replace_inplace_reference(to_replace): df = DataFrame({"a": [1.5, 2, 3]}) arr_a = get_array(df, "a") view = df[:] df.replace(to_replace=to_replace, value=15.5, inplace=True) - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "a"), arr_a) - assert df._mgr._has_no_reference(0) - assert view._mgr._has_no_reference(0) - else: - assert np.shares_memory(get_array(df, "a"), arr_a) + assert not np.shares_memory(get_array(df, "a"), arr_a) + assert df._mgr._has_no_reference(0) + assert view._mgr._has_no_reference(0) @pytest.mark.parametrize("to_replace", ["a", 100.5]) -def test_replace_inplace_reference_no_op(using_copy_on_write, to_replace): +def test_replace_inplace_reference_no_op(to_replace): df = DataFrame({"a": [1.5, 2, 3]}) arr_a = get_array(df, "a") view = df[:] df.replace(to_replace=to_replace, value=15.5, inplace=True) assert np.shares_memory(get_array(df, "a"), arr_a) - if using_copy_on_write: - assert not df._mgr._has_no_reference(0) - assert not view._mgr._has_no_reference(0) + assert not df._mgr._has_no_reference(0) + assert not view._mgr._has_no_reference(0) @pytest.mark.parametrize("to_replace", [1, [1]]) @pytest.mark.parametrize("val", [1, 1.5]) -def test_replace_categorical_inplace_reference(using_copy_on_write, val, to_replace): +def test_replace_categorical_inplace_reference(val, to_replace): df = DataFrame({"a": Categorical([1, 2, 3])}) df_orig = df.copy() arr_a = get_array(df, "a") @@ -254,17 +209,14 @@ def test_replace_categorical_inplace_reference(using_copy_on_write, val, to_repl with tm.assert_produces_warning(warn, match=msg): df.replace(to_replace=to_replace, value=val, inplace=True) - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "a").codes, arr_a.codes) - assert df._mgr._has_no_reference(0) - assert view._mgr._has_no_reference(0) - tm.assert_frame_equal(view, df_orig) - else: - assert np.shares_memory(get_array(df, "a").codes, arr_a.codes) + assert not np.shares_memory(get_array(df, "a").codes, arr_a.codes) + assert df._mgr._has_no_reference(0) + assert view._mgr._has_no_reference(0) + tm.assert_frame_equal(view, df_orig) @pytest.mark.parametrize("val", [1, 1.5]) -def test_replace_categorical_inplace(using_copy_on_write, val): +def test_replace_categorical_inplace(val): df = DataFrame({"a": Categorical([1, 2, 3])}) arr_a = get_array(df, "a") msg = ( @@ -276,15 +228,14 @@ def test_replace_categorical_inplace(using_copy_on_write, val): df.replace(to_replace=1, value=val, inplace=True) assert np.shares_memory(get_array(df, "a").codes, arr_a.codes) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) expected = DataFrame({"a": Categorical([val, 2, 3])}) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("val", [1, 1.5]) -def test_replace_categorical(using_copy_on_write, val): +def test_replace_categorical(val): df = DataFrame({"a": Categorical([1, 2, 3])}) df_orig = df.copy() msg = ( @@ -295,9 +246,8 @@ def test_replace_categorical(using_copy_on_write, val): with tm.assert_produces_warning(warn, match=msg): df2 = df.replace(to_replace=1, value=val) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) - assert df2._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) + assert df2._mgr._has_no_reference(0) assert not np.shares_memory(get_array(df, "a").codes, get_array(df2, "a").codes) tm.assert_frame_equal(df, df_orig) @@ -307,7 +257,7 @@ def test_replace_categorical(using_copy_on_write, val): @pytest.mark.parametrize("method", ["where", "mask"]) -def test_masking_inplace(using_copy_on_write, method): +def test_masking_inplace(method): df = DataFrame({"a": [1.5, 2, 3]}) df_orig = df.copy() arr_a = get_array(df, "a") @@ -316,59 +266,43 @@ def test_masking_inplace(using_copy_on_write, method): method = getattr(df, method) method(df["a"] > 1.6, -1, inplace=True) - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "a"), arr_a) - assert df._mgr._has_no_reference(0) - assert view._mgr._has_no_reference(0) - tm.assert_frame_equal(view, df_orig) - else: - assert np.shares_memory(get_array(df, "a"), arr_a) + assert not np.shares_memory(get_array(df, "a"), arr_a) + assert df._mgr._has_no_reference(0) + assert view._mgr._has_no_reference(0) + tm.assert_frame_equal(view, df_orig) -def test_replace_empty_list(using_copy_on_write): +def test_replace_empty_list(): df = DataFrame({"a": [1, 2]}) df2 = df.replace([], []) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - assert not df._mgr._has_no_reference(0) - else: - assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert not df._mgr._has_no_reference(0) arr_a = get_array(df, "a") df.replace([], []) - if using_copy_on_write: - assert np.shares_memory(get_array(df, "a"), arr_a) - assert not df._mgr._has_no_reference(0) - assert not df2._mgr._has_no_reference(0) + assert np.shares_memory(get_array(df, "a"), arr_a) + assert not df._mgr._has_no_reference(0) + assert not df2._mgr._has_no_reference(0) @pytest.mark.parametrize("value", ["d", None]) -def test_replace_object_list_inplace(using_copy_on_write, value): +def test_replace_object_list_inplace(value): df = DataFrame({"a": ["a", "b", "c"]}) arr = get_array(df, "a") df.replace(["c"], value, inplace=True) - if using_copy_on_write or value is None: - assert np.shares_memory(arr, get_array(df, "a")) - else: - # This could be inplace - assert not np.shares_memory(arr, get_array(df, "a")) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert np.shares_memory(arr, get_array(df, "a")) + assert df._mgr._has_no_reference(0) -def test_replace_list_multiple_elements_inplace(using_copy_on_write): +def test_replace_list_multiple_elements_inplace(): df = DataFrame({"a": [1, 2, 3]}) arr = get_array(df, "a") df.replace([1, 2], 4, inplace=True) - if using_copy_on_write: - assert np.shares_memory(arr, get_array(df, "a")) - assert df._mgr._has_no_reference(0) - else: - assert np.shares_memory(arr, get_array(df, "a")) + assert np.shares_memory(arr, get_array(df, "a")) + assert df._mgr._has_no_reference(0) -def test_replace_list_none(using_copy_on_write): +def test_replace_list_none(): df = DataFrame({"a": ["a", "b", "c"]}) df_orig = df.copy() @@ -378,37 +312,32 @@ def test_replace_list_none(using_copy_on_write): assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) -def test_replace_list_none_inplace_refs(using_copy_on_write): +def test_replace_list_none_inplace_refs(): df = DataFrame({"a": ["a", "b", "c"]}) arr = get_array(df, "a") df_orig = df.copy() view = df[:] df.replace(["a"], value=None, inplace=True) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) - assert not np.shares_memory(arr, get_array(df, "a")) - tm.assert_frame_equal(df_orig, view) - else: - assert np.shares_memory(arr, get_array(df, "a")) + assert df._mgr._has_no_reference(0) + assert not np.shares_memory(arr, get_array(df, "a")) + tm.assert_frame_equal(df_orig, view) -def test_replace_columnwise_no_op_inplace(using_copy_on_write): +def test_replace_columnwise_no_op_inplace(): df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) view = df[:] df_orig = df.copy() df.replace({"a": 10}, 100, inplace=True) - if using_copy_on_write: - assert np.shares_memory(get_array(view, "a"), get_array(df, "a")) - df.iloc[0, 0] = 100 - tm.assert_frame_equal(view, df_orig) + assert np.shares_memory(get_array(view, "a"), get_array(df, "a")) + df.iloc[0, 0] = 100 + tm.assert_frame_equal(view, df_orig) -def test_replace_columnwise_no_op(using_copy_on_write): +def test_replace_columnwise_no_op(): df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) df_orig = df.copy() df2 = df.replace({"a": 10}, 100) - if using_copy_on_write: - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df2.iloc[0, 0] = 100 tm.assert_frame_equal(df, df_orig) @@ -425,15 +354,12 @@ def test_replace_chained_assignment(): tm.assert_frame_equal(df, df_orig) -def test_replace_listlike(using_copy_on_write): +def test_replace_listlike(): df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) df_orig = df.copy() result = df.replace([200, 201], [11, 11]) - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) - else: - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) + assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) result.iloc[0, 0] = 100 tm.assert_frame_equal(df, df) @@ -443,7 +369,7 @@ def test_replace_listlike(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_replace_listlike_inplace(using_copy_on_write): +def test_replace_listlike_inplace(): df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) arr = get_array(df, "a") df.replace([200, 2], [10, 11], inplace=True) @@ -452,9 +378,5 @@ def test_replace_listlike_inplace(using_copy_on_write): view = df[:] df_orig = df.copy() df.replace([200, 3], [10, 11], inplace=True) - if using_copy_on_write: - assert not np.shares_memory(get_array(df, "a"), arr) - tm.assert_frame_equal(view, df_orig) - else: - assert np.shares_memory(get_array(df, "a"), arr) - tm.assert_frame_equal(df, view) + assert not np.shares_memory(get_array(df, "a"), arr) + tm.assert_frame_equal(view, df_orig) diff --git a/pandas/tests/copy_view/test_setitem.py b/pandas/tests/copy_view/test_setitem.py index 6104699cbc51b..2f28e9826c7a1 100644 --- a/pandas/tests/copy_view/test_setitem.py +++ b/pandas/tests/copy_view/test_setitem.py @@ -28,7 +28,7 @@ def test_set_column_with_array(): tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c")) -def test_set_column_with_series(using_copy_on_write): +def test_set_column_with_series(): # Case: setting a series as a new column (df[col] = s) copies that data # (with delayed copy with CoW) df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) @@ -36,11 +36,7 @@ def test_set_column_with_series(using_copy_on_write): df["c"] = ser - if using_copy_on_write: - assert np.shares_memory(get_array(df, "c"), get_array(ser)) - else: - # the series data is copied - assert not np.shares_memory(get_array(df, "c"), get_array(ser)) + assert np.shares_memory(get_array(df, "c"), get_array(ser)) # and modifying the series does not modify the DataFrame ser.iloc[0] = 0 @@ -48,7 +44,7 @@ def test_set_column_with_series(using_copy_on_write): tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c")) -def test_set_column_with_index(using_copy_on_write): +def test_set_column_with_index(): # Case: setting an index as a new column (df[col] = idx) copies that data df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) idx = Index([1, 2, 3]) @@ -66,7 +62,7 @@ def test_set_column_with_index(using_copy_on_write): assert not np.shares_memory(get_array(df, "d"), arr) -def test_set_columns_with_dataframe(using_copy_on_write): +def test_set_columns_with_dataframe(): # Case: setting a DataFrame as new columns copies that data # (with delayed copy with CoW) df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) @@ -74,18 +70,13 @@ def test_set_columns_with_dataframe(using_copy_on_write): df[["c", "d"]] = df2 - if using_copy_on_write: - assert np.shares_memory(get_array(df, "c"), get_array(df2, "c")) - else: - # the data is copied - assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c")) - + assert np.shares_memory(get_array(df, "c"), get_array(df2, "c")) # and modifying the set DataFrame does not modify the original DataFrame df2.iloc[0, 0] = 0 tm.assert_series_equal(df["c"], Series([7, 8, 9], name="c")) -def test_setitem_series_no_copy(using_copy_on_write): +def test_setitem_series_no_copy(): # Case: setting a Series as column into a DataFrame can delay copying that data df = DataFrame({"a": [1, 2, 3]}) rhs = Series([4, 5, 6]) @@ -93,42 +84,39 @@ def test_setitem_series_no_copy(using_copy_on_write): # adding a new column df["b"] = rhs - if using_copy_on_write: - assert np.shares_memory(get_array(rhs), get_array(df, "b")) + assert np.shares_memory(get_array(rhs), get_array(df, "b")) df.iloc[0, 1] = 100 tm.assert_series_equal(rhs, rhs_orig) -def test_setitem_series_no_copy_single_block(using_copy_on_write): +def test_setitem_series_no_copy_single_block(): # Overwriting an existing column that is a single block df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) rhs = Series([4, 5, 6]) rhs_orig = rhs.copy() df["a"] = rhs - if using_copy_on_write: - assert np.shares_memory(get_array(rhs), get_array(df, "a")) + assert np.shares_memory(get_array(rhs), get_array(df, "a")) df.iloc[0, 0] = 100 tm.assert_series_equal(rhs, rhs_orig) -def test_setitem_series_no_copy_split_block(using_copy_on_write): +def test_setitem_series_no_copy_split_block(): # Overwriting an existing column that is part of a larger block df = DataFrame({"a": [1, 2, 3], "b": 1}) rhs = Series([4, 5, 6]) rhs_orig = rhs.copy() df["b"] = rhs - if using_copy_on_write: - assert np.shares_memory(get_array(rhs), get_array(df, "b")) + assert np.shares_memory(get_array(rhs), get_array(df, "b")) df.iloc[0, 1] = 100 tm.assert_series_equal(rhs, rhs_orig) -def test_setitem_series_column_midx_broadcasting(using_copy_on_write): +def test_setitem_series_column_midx_broadcasting(): # Setting a Series to multiple columns will repeat the data # (currently copying the data eagerly) df = DataFrame( @@ -138,11 +126,10 @@ def test_setitem_series_column_midx_broadcasting(using_copy_on_write): rhs = Series([10, 11]) df["a"] = rhs assert not np.shares_memory(get_array(rhs), df._get_column_array(0)) - if using_copy_on_write: - assert df._mgr._has_no_reference(0) + assert df._mgr._has_no_reference(0) -def test_set_column_with_inplace_operator(using_copy_on_write): +def test_set_column_with_inplace_operator(): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) # this should not raise any warning diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index f8bb0c92cf59e..3fb2fc09eaa79 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -398,10 +398,6 @@ def test_setitem_series(self, data, full_indexer): def test_setitem_frame_2d_values(self, data): # GH#44514 df = pd.DataFrame({"A": data}) - using_copy_on_write = pd.options.mode.copy_on_write - - blk_data = df._mgr.arrays[0] - orig = df.copy() df.iloc[:] = df.copy() @@ -412,9 +408,6 @@ def test_setitem_frame_2d_values(self, data): df.iloc[:] = df.values tm.assert_frame_equal(df, orig) - if not using_copy_on_write: - # GH#33457 Check that this setting occurred in-place - assert df._mgr.arrays[0] is blk_data df.iloc[:-1] = df.values[:-1] tm.assert_frame_equal(df, orig) diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index 3a3844d5a8b7a..5ae0864190f10 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -212,11 +212,3 @@ def invalid_scalar(data): If the array can hold any item (i.e. object dtype), then use pytest.skip. """ return object.__new__(object) - - -@pytest.fixture -def using_copy_on_write() -> bool: - """ - Fixture to check if Copy-on-Write is enabled. - """ - return True diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 69958b51c9e47..816b7ace69300 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -245,18 +245,6 @@ def test_fillna_series_method(self, data_missing, fillna_method): ): super().test_fillna_series_method(data_missing, fillna_method) - def test_fillna_copy_frame(self, data_missing, using_copy_on_write): - warn = DeprecationWarning if not using_copy_on_write else None - msg = "ExtensionArray.fillna added a 'copy' keyword" - with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): - super().test_fillna_copy_frame(data_missing) - - def test_fillna_copy_series(self, data_missing, using_copy_on_write): - warn = DeprecationWarning if not using_copy_on_write else None - msg = "ExtensionArray.fillna added a 'copy' keyword" - with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): - super().test_fillna_copy_series(data_missing) - @pytest.mark.parametrize("dropna", [True, False]) def test_value_counts(self, all_data, dropna): all_data = all_data[:10] @@ -554,12 +542,11 @@ def test_to_numpy_keyword(): tm.assert_numpy_array_equal(result, expected) -def test_array_copy_on_write(using_copy_on_write): +def test_array_copy_on_write(): df = pd.DataFrame({"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype="object") df2 = df.astype(DecimalDtype()) df.iloc[0, 0] = 0 - if using_copy_on_write: - expected = pd.DataFrame( - {"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype=DecimalDtype() - ) - tm.assert_equal(df2.values, expected.values) + expected = pd.DataFrame( + {"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype=DecimalDtype() + ) + tm.assert_equal(df2.values, expected.values) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 50f2fe03cfa13..1e4897368cc7a 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -217,12 +217,8 @@ def test_equals(self, data, na_value, as_series): def test_fillna_copy_frame(self, data_missing): super().test_fillna_copy_frame(data_missing) - def test_equals_same_data_different_object( - self, data, using_copy_on_write, request - ): - if using_copy_on_write: - mark = pytest.mark.xfail(reason="Fails with CoW") - request.applymarker(mark) + @pytest.mark.xfail(reason="Fails with CoW") + def test_equals_same_data_different_object(self, data, request): super().test_equals_same_data_different_object(data) @pytest.mark.xfail(reason="failing on np.array(self, dtype=str)") diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index d8f14383ef114..3ec1d3c8eae3d 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -277,7 +277,7 @@ def test_fillna_frame(self, data_missing): _combine_le_expected_dtype = "Sparse[bool]" - def test_fillna_copy_frame(self, data_missing, using_copy_on_write): + def test_fillna_copy_frame(self, data_missing): arr = data_missing.take([1, 1]) df = pd.DataFrame({"A": arr}, copy=False) @@ -285,24 +285,17 @@ def test_fillna_copy_frame(self, data_missing, using_copy_on_write): result = df.fillna(filled_val) if hasattr(df._mgr, "blocks"): - if using_copy_on_write: - assert df.values.base is result.values.base - else: - assert df.values.base is not result.values.base + assert df.values.base is result.values.base assert df.A._values.to_dense() is arr.to_dense() - def test_fillna_copy_series(self, data_missing, using_copy_on_write): + def test_fillna_copy_series(self, data_missing): arr = data_missing.take([1, 1]) ser = pd.Series(arr, copy=False) filled_val = ser[0] result = ser.fillna(filled_val) - if using_copy_on_write: - assert ser._values is result._values - - else: - assert ser._values is not result._values + assert ser._values is result._values assert ser._values.to_dense() is arr.to_dense() @pytest.mark.xfail(reason="Not Applicable") diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py index 87d1745774487..f6b71626b6fee 100644 --- a/pandas/tests/frame/methods/test_asfreq.py +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -8,6 +8,7 @@ from pandas import ( DataFrame, DatetimeIndex, + PeriodIndex, Series, date_range, period_range, @@ -257,3 +258,28 @@ def test_asfreq_frequency_M_Q_Y_A_deprecated(self, freq, freq_depr): with tm.assert_produces_warning(FutureWarning, match=depr_msg): result = df.asfreq(freq=freq_depr) tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "freq, error_msg", + [ + ( + "2MS", + "MS is not supported as period frequency", + ), + ( + offsets.MonthBegin(), + r"\ is not supported as period frequency", + ), + ( + offsets.DateOffset(months=2), + r"\ is not supported as period frequency", + ), + ], + ) + def test_asfreq_unsupported_freq(self, freq, error_msg): + # https://github.com/pandas-dev/pandas/issues/56718 + index = PeriodIndex(["2020-01-01", "2021-01-01"], freq="M") + df = DataFrame({"a": Series([0, 1], index=index)}) + + with pytest.raises(ValueError, match=error_msg): + df.asfreq(freq=freq) diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index c477c9c1852b7..5bcfaf8b199bf 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -751,3 +751,9 @@ def test_shift_with_iterable_check_other_arguments(self): msg = "Cannot specify `suffix` if `periods` is an int." with pytest.raises(ValueError, match=msg): df.shift(1, suffix="fails") + + def test_shift_axis_one_empty(self): + # GH#57301 + df = DataFrame() + result = df.shift(1, axis=1) + tm.assert_frame_equal(result, df) diff --git a/pandas/tests/frame/methods/test_swapaxes.py b/pandas/tests/frame/methods/test_swapaxes.py deleted file mode 100644 index 53a4691d48b1c..0000000000000 --- a/pandas/tests/frame/methods/test_swapaxes.py +++ /dev/null @@ -1,37 +0,0 @@ -import numpy as np -import pytest - -from pandas import DataFrame -import pandas._testing as tm - - -class TestSwapAxes: - def test_swapaxes(self): - df = DataFrame(np.random.default_rng(2).standard_normal((10, 5))) - msg = "'DataFrame.swapaxes' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - tm.assert_frame_equal(df.T, df.swapaxes(0, 1)) - tm.assert_frame_equal(df.T, df.swapaxes(1, 0)) - - def test_swapaxes_noop(self): - df = DataFrame(np.random.default_rng(2).standard_normal((10, 5))) - msg = "'DataFrame.swapaxes' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - tm.assert_frame_equal(df, df.swapaxes(0, 0)) - - def test_swapaxes_invalid_axis(self): - df = DataFrame(np.random.default_rng(2).standard_normal((10, 5))) - msg = "'DataFrame.swapaxes' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - msg = "No axis named 2 for object type DataFrame" - with pytest.raises(ValueError, match=msg): - df.swapaxes(2, 5) - - def test_round_empty_not_input(self): - # GH#51032 - df = DataFrame({"a": [1, 2]}) - msg = "'DataFrame.swapaxes' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.swapaxes("index", "index") - tm.assert_frame_equal(df, result) - assert df is not result diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 277ea4e72c6a2..cdb34a00b952c 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -232,11 +232,8 @@ def test_size_compat(self, frame_or_series): def test_split_compat(self, frame_or_series): # xref GH8846 o = construct(frame_or_series, shape=10) - with tm.assert_produces_warning( - FutureWarning, match=".swapaxes' is deprecated", check_stacklevel=False - ): - assert len(np.array_split(o, 5)) == 5 - assert len(np.array_split(o, 2)) == 2 + assert len(np.array_split(o, 5)) == 5 + assert len(np.array_split(o, 2)) == 2 # See gh-12301 def test_stat_unexpected_keyword(self, frame_or_series): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index db45067162bc3..2d06c2f258539 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2423,7 +2423,9 @@ def test_rolling_wrong_param_min_period(): test_df = DataFrame([name_l, val_l]).T test_df.columns = ["name", "val"] - result_error_msg = r"__init__\(\) got an unexpected keyword argument 'min_period'" + result_error_msg = ( + r"^[a-zA-Z._]*\(\) got an unexpected keyword argument 'min_period'$" + ) with pytest.raises(TypeError, match=result_error_msg): test_df.groupby("name")["val"].rolling(window=2, min_period=1).sum() diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index e5af61618882c..c2456790e4953 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -363,7 +363,7 @@ def test_groupby_raises_timedelta(func): @pytest.mark.parametrize("how", ["method", "agg", "transform"]) def test_groupby_raises_category( - how, by, groupby_series, groupby_func, using_copy_on_write, df_with_cat_col + how, by, groupby_series, groupby_func, df_with_cat_col ): # GH#50749 df = df_with_cat_col @@ -416,13 +416,7 @@ def test_groupby_raises_category( r"unsupported operand type\(s\) for -: 'Categorical' and 'Categorical'", ), "ffill": (None, ""), - "fillna": ( - TypeError, - r"Cannot setitem on a Categorical with a new category \(0\), " - "set the categories first", - ) - if not using_copy_on_write - else (None, ""), # no-op with CoW + "fillna": (None, ""), # no-op with CoW "first": (None, ""), "idxmax": (None, ""), "idxmin": (None, ""), @@ -555,7 +549,6 @@ def test_groupby_raises_category_on_category( groupby_series, groupby_func, observed, - using_copy_on_write, df_with_cat_col, ): # GH#50749 @@ -616,13 +609,7 @@ def test_groupby_raises_category_on_category( ), "diff": (TypeError, "unsupported operand type"), "ffill": (None, ""), - "fillna": ( - TypeError, - r"Cannot setitem on a Categorical with a new category \(0\), " - "set the categories first", - ) - if not using_copy_on_write - else (None, ""), # no-op with CoW + "fillna": (None, ""), # no-op with CoW "first": (None, ""), "idxmax": (ValueError, "empty group due to unobserved categories") if empty_groups diff --git a/pandas/tests/io/formats/style/test_matplotlib.py b/pandas/tests/io/formats/style/test_matplotlib.py index ef7bfb11d81d8..70ddd65c02d14 100644 --- a/pandas/tests/io/formats/style/test_matplotlib.py +++ b/pandas/tests/io/formats/style/test_matplotlib.py @@ -1,5 +1,3 @@ -import gc - import numpy as np import pytest @@ -16,25 +14,7 @@ from pandas.io.formats.style import Styler - -@pytest.fixture(autouse=True) -def mpl_cleanup(): - # matplotlib/testing/decorators.py#L24 - # 1) Resets units registry - # 2) Resets rc_context - # 3) Closes all figures - mpl = pytest.importorskip("matplotlib") - mpl_units = pytest.importorskip("matplotlib.units") - plt = pytest.importorskip("matplotlib.pyplot") - orig_units_registry = mpl_units.registry.copy() - with mpl.rc_context(): - mpl.use("template") - yield - mpl_units.registry.clear() - mpl_units.registry.update(orig_units_registry) - plt.close("all") - # https://matplotlib.org/stable/users/prev_whats_new/whats_new_3.6.0.html#garbage-collection-is-no-longer-run-on-figure-close # noqa: E501 - gc.collect(1) +pytestmark = pytest.mark.usefixtures("mpl_cleanup") @pytest.fixture diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index b56993829b0ae..6f6252e3929fb 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1273,17 +1273,6 @@ def test_timezone_aware_index(self, fp, timezone_aware_date_list): expected.index.name = "index" check_round_trip(df, fp, expected=expected) - def test_use_nullable_dtypes_not_supported(self, fp): - df = pd.DataFrame({"a": [1, 2]}) - - with tm.ensure_clean() as path: - df.to_parquet(path) - with pytest.raises(ValueError, match="not supported for the fastparquet"): - with tm.assert_produces_warning(FutureWarning): - read_parquet(path, engine="fastparquet", use_nullable_dtypes=True) - with pytest.raises(ValueError, match="not supported for the fastparquet"): - read_parquet(path, engine="fastparquet", dtype_backend="pyarrow") - def test_close_file_handle_on_read_error(self): with tm.ensure_clean("test.parquet") as path: pathlib.Path(path).write_bytes(b"breakit") diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 8bb67fac19c65..c8f4d68230e5b 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1776,7 +1776,7 @@ def test_api_date_parsing(conn, request): @pytest.mark.parametrize("conn", all_connectable_types) -@pytest.mark.parametrize("error", ["ignore", "raise", "coerce"]) +@pytest.mark.parametrize("error", ["raise", "coerce"]) @pytest.mark.parametrize( "read_sql, text, mode", [ diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index c12bcfb91a4c7..42a9e84218a81 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -2001,21 +2001,6 @@ def test_direct_read(datapath, monkeypatch): assert reader._path_or_buf is bio -def test_statareader_warns_when_used_without_context(datapath): - file_path = datapath("io", "data", "stata", "stata-compat-118.dta") - with tm.assert_produces_warning( - ResourceWarning, - match="without using a context manager", - ): - sr = StataReader(file_path) - sr.read() - with tm.assert_produces_warning( - FutureWarning, - match="is not part of the public API", - ): - sr.close() - - @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) @pytest.mark.parametrize("use_dict", [True, False]) @pytest.mark.parametrize("infer", [True, False]) diff --git a/pandas/tests/plotting/conftest.py b/pandas/tests/plotting/conftest.py index d688bbd47595c..eb5a1f1f6382e 100644 --- a/pandas/tests/plotting/conftest.py +++ b/pandas/tests/plotting/conftest.py @@ -1,5 +1,3 @@ -import gc - import numpy as np import pytest @@ -10,23 +8,8 @@ @pytest.fixture(autouse=True) -def mpl_cleanup(): - # matplotlib/testing/decorators.py#L24 - # 1) Resets units registry - # 2) Resets rc_context - # 3) Closes all figures - mpl = pytest.importorskip("matplotlib") - mpl_units = pytest.importorskip("matplotlib.units") - plt = pytest.importorskip("matplotlib.pyplot") - orig_units_registry = mpl_units.registry.copy() - with mpl.rc_context(): - mpl.use("template") - yield - mpl_units.registry.clear() - mpl_units.registry.update(orig_units_registry) - plt.close("all") - # https://matplotlib.org/stable/users/prev_whats_new/whats_new_3.6.0.html#garbage-collection-is-no-longer-run-on-figure-close # noqa: E501 - gc.collect(1) +def autouse_mpl_cleanup(mpl_cleanup): + pass @pytest.fixture diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index bf0543ffcc4bb..465d287fd8eff 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -296,32 +296,6 @@ def test_transform_frame(on): tm.assert_frame_equal(result, expected) -def test_fillna(): - # need to upsample here - rng = date_range("1/1/2012", periods=10, freq="2s") - ts = Series(np.arange(len(rng), dtype="int64"), index=rng) - r = ts.resample("s") - - expected = r.ffill() - msg = "DatetimeIndexResampler.fillna is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = r.fillna(method="ffill") - tm.assert_series_equal(result, expected) - - expected = r.bfill() - with tm.assert_produces_warning(FutureWarning, match=msg): - result = r.fillna(method="bfill") - tm.assert_series_equal(result, expected) - - msg2 = ( - r"Invalid fill method\. Expecting pad \(ffill\), backfill " - r"\(bfill\) or nearest\. Got 0" - ) - with pytest.raises(ValueError, match=msg2): - with tm.assert_produces_warning(FutureWarning, match=msg): - r.fillna(0) - - @pytest.mark.parametrize( "func", [ diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 876906cd76e3f..e104b99370f07 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -43,24 +43,15 @@ def test_append_concat(self): assert isinstance(result.index, PeriodIndex) assert result.index[0] == s1.index[0] - def test_concat_copy(self, using_copy_on_write): + def test_concat_copy(self): df = DataFrame(np.random.default_rng(2).standard_normal((4, 3))) df2 = DataFrame(np.random.default_rng(2).integers(0, 10, size=4).reshape(4, 1)) df3 = DataFrame({5: "foo"}, index=range(4)) # These are actual copies. result = concat([df, df2, df3], axis=1, copy=True) - - if not using_copy_on_write: - for arr in result._mgr.arrays: - assert not any( - np.shares_memory(arr, y) - for x in [df, df2, df3] - for y in x._mgr.arrays - ) - else: - for arr in result._mgr.arrays: - assert arr.base is not None + for arr in result._mgr.arrays: + assert arr.base is not None # These are the same. result = concat([df, df2, df3], axis=1, copy=False) @@ -78,15 +69,11 @@ def test_concat_copy(self, using_copy_on_write): result = concat([df, df2, df3, df4], axis=1, copy=False) for arr in result._mgr.arrays: if arr.dtype.kind == "f": - if using_copy_on_write: - # this is a view on some array in either df or df4 - assert any( - np.shares_memory(arr, other) - for other in df._mgr.arrays + df4._mgr.arrays - ) - else: - # the block was consolidated, so we got a copy anyway - assert arr.base is None + # this is a view on some array in either df or df4 + assert any( + np.shares_memory(arr, other) + for other in df._mgr.arrays + df4._mgr.arrays + ) elif arr.dtype.kind in ["i", "u"]: assert arr.base is df2._mgr.arrays[0].base elif arr.dtype == object: diff --git a/pandas/tests/reshape/concat/test_dataframe.py b/pandas/tests/reshape/concat/test_dataframe.py index 8aefa9262dbc5..2ad46ac009928 100644 --- a/pandas/tests/reshape/concat/test_dataframe.py +++ b/pandas/tests/reshape/concat/test_dataframe.py @@ -192,19 +192,6 @@ def test_concat_duplicates_in_index_with_keys(self): tm.assert_frame_equal(result, expected) tm.assert_index_equal(result.index.levels[1], Index([1, 3], name="date")) - @pytest.mark.parametrize("ignore_index", [True, False]) - @pytest.mark.parametrize("order", ["C", "F"]) - def test_concat_copies(self, axis, order, ignore_index, using_copy_on_write): - # based on asv ConcatDataFrames - df = DataFrame(np.zeros((10, 5), dtype=np.float32, order=order)) - - res = concat([df] * 5, axis=axis, ignore_index=ignore_index, copy=True) - - if not using_copy_on_write: - for arr in res._iter_column_arrays(): - for arr2 in df._iter_column_arrays(): - assert not np.shares_memory(arr, arr2) - def test_outer_sort_columns(self): # GH#47127 df1 = DataFrame({"A": [0], "B": [1], 0: 1}) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index 52bb9fa0f151b..c27d60fa3b175 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -100,23 +100,20 @@ def test_concat_rename_index(self): tm.assert_frame_equal(result, exp) assert result.index.names == exp.index.names - def test_concat_copy_index_series(self, axis, using_copy_on_write): + def test_concat_copy_index_series(self, axis): # GH 29879 ser = Series([1, 2]) comb = concat([ser, ser], axis=axis, copy=True) - if not using_copy_on_write or axis in [0, "index"]: + if axis in [0, "index"]: assert comb.index is not ser.index else: assert comb.index is ser.index - def test_concat_copy_index_frame(self, axis, using_copy_on_write): + def test_concat_copy_index_frame(self, axis): # GH 29879 df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) comb = concat([df, df], axis=axis, copy=True) - if not using_copy_on_write: - assert not comb.index.is_(df.index) - assert not comb.columns.is_(df.columns) - elif axis in [0, "index"]: + if axis in [0, "index"]: assert not comb.index.is_(df.index) assert comb.columns.is_(df.columns) elif axis in [1, "columns"]: diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 9f0994b968a47..f2233a1110059 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -355,6 +355,21 @@ def test_endswith_nullable_string_dtype(nullable_string_dtype, na): # -------------------------------------------------------------------------------------- # str.replace # -------------------------------------------------------------------------------------- +def test_replace_dict_invalid(any_string_dtype): + # GH 51914 + series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col") + msg = "repl cannot be used when pat is a dictionary" + + with pytest.raises(ValueError, match=msg): + series.str.replace(pat={"A": "a", "B": "b"}, repl="A") + + +def test_replace_dict(any_string_dtype): + # GH 51914 + series = Series(data=["A", "B", "C"], name="my_messy_col") + new_series = series.str.replace(pat={"A": "a", "B": "b"}) + expected = Series(data=["a", "b", "C"], name="my_messy_col") + tm.assert_series_equal(new_series, expected) def test_replace(any_string_dtype): diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index ead36ee08b407..7186056f90299 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -155,7 +155,7 @@ def test_scikit_learn(): clf.predict(digits.data[-1:]) -def test_seaborn(): +def test_seaborn(mpl_cleanup): seaborn = pytest.importorskip("seaborn") tips = DataFrame( {"day": pd.date_range("2023", freq="D", periods=5), "total_bill": range(5)} diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 0cbdba874c5f7..0ed61fdd0ce45 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -57,10 +57,6 @@ r"alongside this." ) -pytestmark = pytest.mark.filterwarnings( - "ignore:errors='ignore' is deprecated:FutureWarning" -) - class TestTimeConversionFormats: def test_to_datetime_readonly(self, writable): @@ -154,28 +150,6 @@ def test_to_datetime_format_YYYYMM_with_nat(self, cache): result = to_datetime(ser, format="%Y%m", cache=cache) tm.assert_series_equal(result, expected) - def test_to_datetime_format_YYYYMMDD_ignore(self, cache): - # coercion - # GH 7930, GH 14487 - ser = Series([20121231, 20141231, 99991231]) - result = to_datetime(ser, format="%Y%m%d", errors="ignore", cache=cache) - expected = Series( - [20121231, 20141231, 99991231], - dtype=object, - ) - tm.assert_series_equal(result, expected) - - def test_to_datetime_format_YYYYMMDD_ignore_with_outofbounds(self, cache): - # https://github.com/pandas-dev/pandas/issues/26493 - result = to_datetime( - ["15010101", "20150101", np.nan], - format="%Y%m%d", - errors="ignore", - cache=cache, - ) - expected = Index(["15010101", "20150101", np.nan], dtype=object) - tm.assert_index_equal(result, expected) - def test_to_datetime_format_YYYYMMDD_coercion(self, cache): # coercion # GH 7930 @@ -282,28 +256,6 @@ def test_to_datetime_format_integer(self, cache): result = to_datetime(ser, format="%Y%m", cache=cache) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize( - "int_date, expected", - [ - # valid date, length == 8 - [20121030, datetime(2012, 10, 30)], - # short valid date, length == 6 - [199934, datetime(1999, 3, 4)], - # long integer date partially parsed to datetime(2012,1,1), length > 8 - [2012010101, 2012010101], - # invalid date partially parsed to datetime(2012,9,9), length == 8 - [20129930, 20129930], - # short integer date partially parsed to datetime(2012,9,9), length < 8 - [2012993, 2012993], - # short invalid date, length == 4 - [2121, 2121], - ], - ) - def test_int_to_datetime_format_YYYYMMDD_typeerror(self, int_date, expected): - # GH 26583 - result = to_datetime(int_date, format="%Y%m%d", errors="ignore") - assert result == expected - def test_to_datetime_format_microsecond(self, cache): month_abbr = calendar.month_abbr[4] val = f"01-{month_abbr}-2011 00:00:01.978" @@ -584,11 +536,6 @@ def test_to_datetime_overflow(self): res = to_datetime([arg], errors="coerce") tm.assert_index_equal(res, Index([NaT])) - res = to_datetime(arg, errors="ignore") - assert isinstance(res, str) and res == arg - res = to_datetime([arg], errors="ignore") - tm.assert_index_equal(res, Index([arg], dtype=object)) - def test_to_datetime_mixed_datetime_and_string(self): # GH#47018 adapted old doctest with new behavior d1 = datetime(2020, 1, 1, 17, tzinfo=timezone(-timedelta(hours=1))) @@ -961,7 +908,7 @@ def test_to_datetime_iso_week_year_format(self, s, _format, dt): ], ], ) - @pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"]) + @pytest.mark.parametrize("errors", ["raise", "coerce"]) def test_error_iso_week_year(self, msg, s, _format, errors): # See GH#16607, GH#50308 # This test checks for errors thrown when giving the wrong format @@ -1019,11 +966,6 @@ def test_to_datetime_YYYYMMDD(self): actual = to_datetime("20080115") assert actual == datetime(2008, 1, 15) - def test_to_datetime_unparsable_ignore(self): - # unparsable - ser = "Month 1, 1999" - assert to_datetime(ser, errors="ignore") == ser - @td.skip_if_windows # `tm.set_timezone` does not work in windows def test_to_datetime_now(self): # See GH#18666 @@ -1118,7 +1060,7 @@ def test_to_datetime_dt64s_and_str(self, arg, format): @pytest.mark.parametrize( "dt", [np.datetime64("1000-01-01"), np.datetime64("5000-01-02")] ) - @pytest.mark.parametrize("errors", ["raise", "ignore", "coerce"]) + @pytest.mark.parametrize("errors", ["raise", "coerce"]) def test_to_datetime_dt64s_out_of_ns_bounds(self, cache, dt, errors): # GH#50369 We cast to the nearest supported reso, i.e. "s" ts = to_datetime(dt, errors=errors, cache=cache) @@ -1178,31 +1120,6 @@ def test_to_datetime_array_of_dt64s(self, cache, unit): expected = DatetimeIndex(np.array(dts_with_oob, dtype="M8[s]")) tm.assert_index_equal(result, expected) - # With errors='ignore', out of bounds datetime64s - # are converted to their .item(), which depending on the version of - # numpy is either a python datetime.datetime or datetime.date - result = to_datetime(dts_with_oob, errors="ignore", cache=cache) - if not cache: - # FIXME: shouldn't depend on cache! - expected = Index(dts_with_oob) - tm.assert_index_equal(result, expected) - - def test_out_of_bounds_errors_ignore(self): - # https://github.com/pandas-dev/pandas/issues/50587 - result = to_datetime(np.datetime64("9999-01-01"), errors="ignore") - expected = np.datetime64("9999-01-01") - assert result == expected - - def test_out_of_bounds_errors_ignore2(self): - # GH#12424 - msg = "errors='ignore' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - res = to_datetime( - Series(["2362-01-01", np.nan], dtype=object), errors="ignore" - ) - exp = Series(["2362-01-01", np.nan], dtype=object) - tm.assert_series_equal(res, exp) - def test_to_datetime_tz(self, cache): # xref 8260 # uniform returns a DatetimeIndex @@ -1230,17 +1147,6 @@ def test_to_datetime_tz_mixed(self, cache): with pytest.raises(ValueError, match=msg): to_datetime(arr, cache=cache) - depr_msg = "errors='ignore' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - result = to_datetime(arr, cache=cache, errors="ignore") - expected = Index( - [ - Timestamp("2013-01-01 13:00:00-08:00"), - Timestamp("2013-01-02 14:00:00-05:00"), - ], - dtype="object", - ) - tm.assert_index_equal(result, expected) result = to_datetime(arr, cache=cache, errors="coerce") expected = DatetimeIndex( ["2013-01-01 13:00:00-08:00", "NaT"], dtype="datetime64[ns, US/Pacific]" @@ -1390,7 +1296,6 @@ def test_datetime_bool(self, cache, arg): with pytest.raises(TypeError, match=msg): to_datetime(arg) assert to_datetime(arg, errors="coerce", cache=cache) is NaT - assert to_datetime(arg, errors="ignore", cache=cache) is arg def test_datetime_bool_arrays_mixed(self, cache): msg = f"{type(cache)} is not convertible to datetime" @@ -1418,7 +1323,7 @@ def test_datetime_invalid_datatype(self, arg): with pytest.raises(TypeError, match=msg): to_datetime(arg) - @pytest.mark.parametrize("errors", ["coerce", "raise", "ignore"]) + @pytest.mark.parametrize("errors", ["coerce", "raise"]) def test_invalid_format_raises(self, errors): # https://github.com/pandas-dev/pandas/issues/50255 with pytest.raises( @@ -1430,9 +1335,6 @@ def test_invalid_format_raises(self, errors): @pytest.mark.parametrize("format", [None, "%H:%M:%S"]) def test_datetime_invalid_scalar(self, value, format): # GH24763 - res = to_datetime(value, errors="ignore", format=format) - assert res == value - res = to_datetime(value, errors="coerce", format=format) assert res is NaT @@ -1453,9 +1355,6 @@ def test_datetime_invalid_scalar(self, value, format): @pytest.mark.parametrize("format", [None, "%H:%M:%S"]) def test_datetime_outofbounds_scalar(self, value, format): # GH24763 - res = to_datetime(value, errors="ignore", format=format) - assert res == value - res = to_datetime(value, errors="coerce", format=format) assert res is NaT @@ -1480,11 +1379,6 @@ def test_datetime_invalid_index(self, values, format): warn = UserWarning else: warn = None - with tm.assert_produces_warning( - warn, match="Could not infer format", raise_on_extra_warnings=False - ): - res = to_datetime(values, errors="ignore", format=format) - tm.assert_index_equal(res, Index(values, dtype=object)) with tm.assert_produces_warning( warn, match="Could not infer format", raise_on_extra_warnings=False @@ -1657,22 +1551,15 @@ def test_to_datetime_coerce_oob(self, string_arg, format, outofbounds): expected = DatetimeIndex([datetime(2018, 3, 1), NaT]) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize( - "errors, expected", - [ - ("coerce", Index([NaT, NaT])), - ("ignore", Index(["200622-12-31", "111111-24-11"], dtype=object)), - ], - ) - def test_to_datetime_malformed_no_raise(self, errors, expected): + def test_to_datetime_malformed_no_raise(self): # GH 28299 # GH 48633 ts_strings = ["200622-12-31", "111111-24-11"] with tm.assert_produces_warning( UserWarning, match="Could not infer format", raise_on_extra_warnings=False ): - result = to_datetime(ts_strings, errors=errors) - tm.assert_index_equal(result, expected) + result = to_datetime(ts_strings, errors="coerce") + tm.assert_index_equal(result, Index([NaT, NaT])) def test_to_datetime_malformed_raise(self): # GH 48633 @@ -1876,11 +1763,6 @@ def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit): with tm.assert_produces_warning(FutureWarning, match=warn_msg): to_datetime(["1.5"], unit=unit, errors="raise") - # with errors="ignore" we also end up raising within the Timestamp - # constructor; this may not be ideal - with pytest.raises(ValueError, match=msg): - to_datetime([1.5], unit=unit, errors="ignore") - res = to_datetime([1.5], unit=unit, errors="coerce") expected = Index([NaT], dtype="M8[ns]") tm.assert_index_equal(res, expected) @@ -1903,21 +1785,6 @@ def test_unit(self, cache): def test_unit_array_mixed_nans(self, cache): values = [11111111111111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""] - result = to_datetime(values, unit="D", errors="ignore", cache=cache) - expected = Index( - [ - 11111111111111111, - Timestamp("1970-01-02"), - Timestamp("1970-01-02"), - NaT, - NaT, - NaT, - NaT, - NaT, - ], - dtype=object, - ) - tm.assert_index_equal(result, expected) result = to_datetime(values, unit="D", errors="coerce", cache=cache) expected = DatetimeIndex( @@ -1933,10 +1800,6 @@ def test_unit_array_mixed_nans(self, cache): def test_unit_array_mixed_nans_large_int(self, cache): values = [1420043460000000000000000, iNaT, NaT, np.nan, "NaT"] - result = to_datetime(values, errors="ignore", unit="s", cache=cache) - expected = Index([1420043460000000000000000, NaT, NaT, NaT, NaT], dtype=object) - tm.assert_index_equal(result, expected) - result = to_datetime(values, errors="coerce", unit="s", cache=cache) expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"], dtype="M8[ns]") tm.assert_index_equal(result, expected) @@ -1952,7 +1815,7 @@ def test_to_datetime_invalid_str_not_out_of_bounds_valuerror(self, cache): with pytest.raises(ValueError, match=msg): to_datetime("foo", errors="raise", unit="s", cache=cache) - @pytest.mark.parametrize("error", ["raise", "coerce", "ignore"]) + @pytest.mark.parametrize("error", ["raise", "coerce"]) def test_unit_consistency(self, cache, error): # consistency of conversions expected = Timestamp("1970-05-09 14:25:11") @@ -1960,7 +1823,7 @@ def test_unit_consistency(self, cache, error): assert result == expected assert isinstance(result, Timestamp) - @pytest.mark.parametrize("errors", ["ignore", "raise", "coerce"]) + @pytest.mark.parametrize("errors", ["raise", "coerce"]) @pytest.mark.parametrize("dtype", ["float64", "int64"]) def test_unit_with_numeric(self, cache, errors, dtype): # GH 13180 @@ -2030,18 +1893,6 @@ def test_unit_rounding(self, cache): alt = Timestamp(value, unit="s") assert alt == result - def test_unit_ignore_keeps_name(self, cache): - # GH 21697 - expected = Index([15e9] * 2, name="name") - result = to_datetime(expected, errors="ignore", unit="s", cache=cache) - tm.assert_index_equal(result, expected) - - def test_to_datetime_errors_ignore_utc_true(self): - # GH#23758 - result = to_datetime([1], unit="s", utc=True, errors="ignore") - expected = DatetimeIndex(["1970-01-01 00:00:01"], dtype="M8[ns, UTC]") - tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("dtype", [int, float]) def test_to_datetime_unit(self, dtype): epoch = 1370745748 @@ -2119,7 +1970,7 @@ def test_float_to_datetime_raise_near_bounds(self): [0, tsmax_in_days - 0.005, -tsmax_in_days + 0.005], dtype=float ) expected = (should_succeed * oneday_in_ns).astype(np.int64) - for error_mode in ["raise", "coerce", "ignore"]: + for error_mode in ["raise", "coerce"]: result1 = to_datetime(should_succeed, unit="D", errors=error_mode) # Cast to `np.float64` so that `rtol` and inexact checking kick in # (`check_exact` doesn't take place for integer dtypes) @@ -2548,8 +2399,6 @@ def test_to_datetime_with_space_in_series(self, cache): result_coerce = to_datetime(ser, errors="coerce", cache=cache) expected_coerce = Series([datetime(2006, 10, 18), datetime(2008, 10, 18), NaT]) tm.assert_series_equal(result_coerce, expected_coerce) - result_ignore = to_datetime(ser, errors="ignore", cache=cache) - tm.assert_series_equal(result_ignore, ser) @td.skip_if_not_us_locale def test_to_datetime_with_apply(self, cache): @@ -2568,7 +2417,7 @@ def test_to_datetime_timezone_name(self): assert result == expected @td.skip_if_not_us_locale - @pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"]) + @pytest.mark.parametrize("errors", ["raise", "coerce"]) def test_to_datetime_with_apply_with_empty_str(self, cache, errors): # this is only locale tested with US/None locales # GH 5195, GH50251 @@ -2616,19 +2465,10 @@ def test_to_datetime_strings_vs_constructor(self, result): def test_to_datetime_unprocessable_input(self, cache): # GH 4928 # GH 21864 - result = to_datetime([1, "1"], errors="ignore", cache=cache) - - expected = Index(np.array([1, "1"], dtype="O")) - tm.assert_equal(result, expected) msg = '^Given date string "1" not likely a datetime, at position 1$' with pytest.raises(ValueError, match=msg): to_datetime([1, "1"], errors="raise", cache=cache) - def test_to_datetime_unhashable_input(self, cache): - series = Series([["a"]] * 100) - result = to_datetime(series, errors="ignore", cache=cache) - tm.assert_series_equal(series, result) - def test_to_datetime_other_datetime64_units(self): # 5/25/2012 scalar = np.int64(1337904000000000).view("M8[us]") @@ -2691,11 +2531,6 @@ def test_string_na_nat_conversion_malformed(self, cache): with pytest.raises(ValueError, match=msg): to_datetime(malformed, errors="raise", cache=cache) - result = to_datetime(malformed, errors="ignore", cache=cache) - # GH 21864 - expected = Index(malformed, dtype=object) - tm.assert_index_equal(result, expected) - with pytest.raises(ValueError, match=msg): to_datetime(malformed, errors="raise", cache=cache) @@ -2968,14 +2803,6 @@ def test_to_datetime_iso8601_noleading_0s(self, cache, format): result = to_datetime(ser, format=format, cache=cache) tm.assert_series_equal(result, expected) - def test_parse_dates_infer_datetime_format_warning(self): - # GH 49024 - with tm.assert_produces_warning( - UserWarning, - match="The argument 'infer_datetime_format' is deprecated", - ): - to_datetime(["10-10-2000"], infer_datetime_format=True) - class TestDaysInMonth: # tests for issue #10154 @@ -3039,18 +2866,6 @@ def test_day_not_in_month_raise_value(self, cache, arg, format, msg): with pytest.raises(ValueError, match=msg): to_datetime(arg, errors="raise", format=format, cache=cache) - @pytest.mark.parametrize( - "expected, format", - [ - ["2015-02-29", None], - ["2015-02-29", "%Y-%m-%d"], - ["2015-04-31", "%Y-%m-%d"], - ], - ) - def test_day_not_in_month_ignore(self, cache, expected, format): - result = to_datetime(expected, errors="ignore", format=format, cache=cache) - assert result == expected - class TestDatetimeParsingWrappers: @pytest.mark.parametrize( @@ -3537,7 +3352,7 @@ def test_na_to_datetime(nulls_fixture, klass): assert result[0] is NaT -@pytest.mark.parametrize("errors", ["raise", "coerce", "ignore"]) +@pytest.mark.parametrize("errors", ["raise", "coerce"]) @pytest.mark.parametrize( "args, format", [ @@ -3598,15 +3413,6 @@ def test_to_datetime_cache_coerce_50_lines_outofbounds(series_length): tm.assert_series_equal(result1, expected1) - result2 = to_datetime(ser, errors="ignore", utc=True) - - expected2 = Series( - [datetime.fromisoformat("1446-04-12 00:00:00+00:00")] - + ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * series_length) - ) - - tm.assert_series_equal(result2, expected2) - with pytest.raises(OutOfBoundsDatetime, match="Out of bounds nanosecond timestamp"): to_datetime(ser, errors="raise", utc=True) @@ -3659,17 +3465,12 @@ def test_to_datetime_mixed_not_necessarily_iso8601_raise(): to_datetime(["2020-01-01", "01-01-2000"], format="ISO8601") -@pytest.mark.parametrize( - ("errors", "expected"), - [ - ("coerce", DatetimeIndex(["2020-01-01 00:00:00", NaT])), - ("ignore", Index(["2020-01-01", "01-01-2000"], dtype=object)), - ], -) -def test_to_datetime_mixed_not_necessarily_iso8601_coerce(errors, expected): +def test_to_datetime_mixed_not_necessarily_iso8601_coerce(): # https://github.com/pandas-dev/pandas/issues/50411 - result = to_datetime(["2020-01-01", "01-01-2000"], format="ISO8601", errors=errors) - tm.assert_index_equal(result, expected) + result = to_datetime( + ["2020-01-01", "01-01-2000"], format="ISO8601", errors="coerce" + ) + tm.assert_index_equal(result, DatetimeIndex(["2020-01-01 00:00:00", NaT])) def test_unknown_tz_raises(): diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 105aaedb21b27..585b7ca94f730 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -18,7 +18,7 @@ import pandas._testing as tm -@pytest.fixture(params=[None, "ignore", "raise", "coerce"]) +@pytest.fixture(params=[None, "raise", "coerce"]) def errors(request): return request.param @@ -116,15 +116,11 @@ def test_error(data, msg): to_numeric(ser, errors="raise") -@pytest.mark.parametrize( - "errors,exp_data", [("ignore", [1, -3.14, "apple"]), ("coerce", [1, -3.14, np.nan])] -) -@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") -def test_ignore_error(errors, exp_data): +def test_ignore_error(): ser = Series([1, -3.14, "apple"]) - result = to_numeric(ser, errors=errors) + result = to_numeric(ser, errors="coerce") - expected = Series(exp_data) + expected = Series([1, -3.14, np.nan]) tm.assert_series_equal(result, expected) @@ -132,12 +128,10 @@ def test_ignore_error(errors, exp_data): "errors,exp", [ ("raise", 'Unable to parse string "apple" at position 2'), - ("ignore", [True, False, "apple"]), # Coerces to float. ("coerce", [1.0, 0.0, np.nan]), ], ) -@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") def test_bool_handling(errors, exp): ser = Series([True, False, "apple"]) @@ -236,7 +230,6 @@ def test_all_nan(): tm.assert_series_equal(result, expected) -@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") def test_type_check(errors): # see gh-11776 df = DataFrame({"a": [1, -3.14, 7], "b": ["4", "5", "6"]}) @@ -251,7 +244,6 @@ def test_scalar(val, signed, transform): assert to_numeric(transform(val)) == float(val) -@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") def test_really_large_scalar(large_val, signed, transform, errors): # see gh-24910 kwargs = {"errors": errors} if errors is not None else {} @@ -269,7 +261,6 @@ def test_really_large_scalar(large_val, signed, transform, errors): tm.assert_almost_equal(to_numeric(val, **kwargs), expected) -@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") def test_really_large_in_arr(large_val, signed, transform, multiple_elts, errors): # see gh-24910 kwargs = {"errors": errors} if errors is not None else {} @@ -309,7 +300,6 @@ def test_really_large_in_arr(large_val, signed, transform, multiple_elts, errors tm.assert_almost_equal(result, np.array(expected, dtype=exp_dtype)) -@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") def test_really_large_in_arr_consistent(large_val, signed, multiple_elts, errors): # see gh-24910 # @@ -329,13 +319,8 @@ def test_really_large_in_arr_consistent(large_val, signed, multiple_elts, errors to_numeric(arr, **kwargs) else: result = to_numeric(arr, **kwargs) - - if errors == "coerce": - expected = [float(i) for i in arr] - exp_dtype = float - else: - expected = arr - exp_dtype = object + expected = [float(i) for i in arr] + exp_dtype = float tm.assert_almost_equal(result, np.array(expected, dtype=exp_dtype)) @@ -344,11 +329,9 @@ def test_really_large_in_arr_consistent(large_val, signed, multiple_elts, errors "errors,checker", [ ("raise", 'Unable to parse string "fail" at position 0'), - ("ignore", lambda x: x == "fail"), ("coerce", lambda x: np.isnan(x)), ], ) -@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") def test_scalar_fail(errors, checker): scalar = "fail" @@ -420,11 +403,9 @@ def test_period(request, transform_assert_equal): "errors,expected", [ ("raise", "Invalid object type at position 0"), - ("ignore", Series([[10.0, 2], 1.0, "apple"])), ("coerce", Series([np.nan, 1.0, np.nan])), ], ) -@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") def test_non_hashable(errors, expected): # see gh-13324 ser = Series([[10.0, 2], 1.0, "apple"]) @@ -502,19 +483,6 @@ def test_signed_downcast(data, signed_downcast): tm.assert_numpy_array_equal(res, expected) -def test_ignore_downcast_invalid_data(): - # If we can't successfully cast the given - # data to a numeric dtype, do not bother - # with the downcast parameter. - data = ["foo", 2, 3] - expected = np.array(data, dtype=object) - - msg = "errors='ignore' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - res = to_numeric(data, errors="ignore", downcast="unsigned") - tm.assert_numpy_array_equal(res, expected) - - def test_ignore_downcast_neg_to_unsigned(): # Cannot cast to an unsigned integer # because we have a negative number. @@ -526,7 +494,6 @@ def test_ignore_downcast_neg_to_unsigned(): # Warning in 32 bit platforms -@pytest.mark.filterwarnings("ignore:invalid value encountered in cast:RuntimeWarning") @pytest.mark.parametrize("downcast", ["integer", "signed", "unsigned"]) @pytest.mark.parametrize( "data,expected", @@ -628,26 +595,14 @@ def test_coerce_uint64_conflict(data, exp_data): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize( - "errors,exp", - [ - ("ignore", Series(["12345678901234567890", "1234567890", "ITEM"])), - ("raise", "Unable to parse string"), - ], -) -@pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") -def test_non_coerce_uint64_conflict(errors, exp): +def test_non_coerce_uint64_conflict(): # see gh-17007 and gh-17125 # # For completeness. ser = Series(["12345678901234567890", "1234567890", "ITEM"]) - if isinstance(exp, str): - with pytest.raises(ValueError, match=exp): - to_numeric(ser, errors=errors) - else: - result = to_numeric(ser, errors=errors) - tm.assert_series_equal(result, ser) + with pytest.raises(ValueError, match="Unable to parse string"): + to_numeric(ser, errors="raise") @pytest.mark.parametrize("dc1", ["integer", "float", "unsigned"]) @@ -757,17 +712,6 @@ def test_to_numeric_from_nullable_string_coerce(nullable_string_dtype): tm.assert_series_equal(result, expected) -def test_to_numeric_from_nullable_string_ignore(nullable_string_dtype): - # GH#52146 - values = ["a", "1"] - ser = Series(values, dtype=nullable_string_dtype) - expected = ser.copy() - msg = "errors='ignore' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_numeric(ser, errors="ignore") - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( "data, input_dtype, downcast, expected_dtype", ( @@ -934,11 +878,6 @@ def test_to_numeric_dtype_backend_error(dtype_backend): with pytest.raises(ValueError, match="Unable to parse string"): to_numeric(ser, dtype_backend=dtype_backend) - msg = "errors='ignore' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_numeric(ser, dtype_backend=dtype_backend, errors="ignore") - tm.assert_series_equal(result, expected) - result = to_numeric(ser, dtype_backend=dtype_backend, errors="coerce") if dtype_backend == "pyarrow": dtype = "double[pyarrow]" diff --git a/pandas/tests/tools/test_to_time.py b/pandas/tests/tools/test_to_time.py index b673bd9c2ec71..5e61f5e1a3029 100644 --- a/pandas/tests/tools/test_to_time.py +++ b/pandas/tests/tools/test_to_time.py @@ -54,10 +54,8 @@ def test_arraylike(self): assert to_time(arg, infer_time_format=True) == expected_arr assert to_time(arg, format="%I:%M%p", errors="coerce") == [None, None] - msg = "errors='ignore' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - res = to_time(arg, format="%I:%M%p", errors="ignore") - tm.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) + with pytest.raises(ValueError, match="errors must be"): + to_time(arg, format="%I:%M%p", errors="ignore") msg = "Cannot convert.+to a time with given format" with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index 073533c38f430..c052ca58f5873 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -99,8 +99,7 @@ def test_to_timedelta_oob_non_nano(self): TimedeltaArray._from_sequence(arr, dtype="m8[s]") @pytest.mark.parametrize("box", [lambda x: x, pd.DataFrame]) - @pytest.mark.parametrize("errors", ["ignore", "raise", "coerce"]) - @pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning") + @pytest.mark.parametrize("errors", ["raise", "coerce"]) def test_to_timedelta_dataframe(self, box, errors): # GH 11776 arg = box(np.arange(10).reshape(2, 5)) @@ -145,32 +144,6 @@ def test_to_timedelta_bad_value_coerce(self): to_timedelta(["1 day", "bar", "1 min"], errors="coerce"), ) - def test_to_timedelta_invalid_errors_ignore(self): - # gh-13613: these should not error because errors='ignore' - msg = "errors='ignore' is deprecated" - invalid_data = "apple" - - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_timedelta(invalid_data, errors="ignore") - assert invalid_data == result - - invalid_data = ["apple", "1 days"] - expected = np.array(invalid_data, dtype=object) - - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_timedelta(invalid_data, errors="ignore") - tm.assert_numpy_array_equal(expected, result) - - invalid_data = pd.Index(["apple", "1 days"]) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_timedelta(invalid_data, errors="ignore") - tm.assert_index_equal(invalid_data, result) - - invalid_data = Series(["apple", "1 days"]) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_timedelta(invalid_data, errors="ignore") - tm.assert_series_equal(invalid_data, result) - @pytest.mark.parametrize( "val, errors", [ @@ -255,13 +228,6 @@ def test_to_timedelta_coerce_strings_unit(self): expected = to_timedelta([1, 2, pd.NaT], unit="ns") tm.assert_index_equal(result, expected) - def test_to_timedelta_ignore_strings_unit(self): - arr = np.array([1, 2, "error"], dtype=object) - msg = "errors='ignore' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_timedelta(arr, unit="ns", errors="ignore") - tm.assert_numpy_array_equal(result, arr) - @pytest.mark.parametrize( "expected_val, result_val", [[timedelta(days=2), 2], [None, None]] ) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 3798e68b3780b..f8939d1d8ccd4 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -215,20 +215,6 @@ def test_parsing_different_timezone_offsets(): assert result_tz is None -@pytest.mark.parametrize( - "data", [["-352.737091", "183.575577"], ["1", "2", "3", "4", "5"]] -) -def test_number_looking_strings_not_into_datetime(data): - # see gh-4601 - # - # These strings don't look like datetimes, so - # they shouldn't be attempted to be converted. - arr = np.array(data, dtype=object) - result, _ = tslib.array_to_datetime(arr, errors="ignore") - - tm.assert_numpy_array_equal(result, arr) - - @pytest.mark.parametrize( "invalid_date", [ @@ -266,22 +252,12 @@ def test_coerce_outside_ns_bounds_one_valid(): tm.assert_numpy_array_equal(result, expected) -@pytest.mark.parametrize("errors", ["ignore", "coerce"]) -def test_coerce_of_invalid_datetimes(errors): +def test_coerce_of_invalid_datetimes(): arr = np.array(["01-01-2013", "not_a_date", "1"], dtype=object) - kwargs = {"values": arr, "errors": errors} - - if errors == "ignore": - # Without coercing, the presence of any invalid - # dates prevents any values from being converted. - result, _ = tslib.array_to_datetime(**kwargs) - tm.assert_numpy_array_equal(result, arr) - else: # coerce. - # With coercing, the invalid dates becomes iNaT - result, _ = tslib.array_to_datetime(arr, errors="coerce") - expected = ["2013-01-01T00:00:00.000000000", iNaT, iNaT] - - tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[ns]")) + # With coercing, the invalid dates becomes iNaT + result, _ = tslib.array_to_datetime(arr, errors="coerce") + expected = ["2013-01-01T00:00:00.000000000", iNaT, iNaT] + tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[ns]")) def test_to_datetime_barely_out_of_bounds(): diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index 4c695190c1353..0b3bc07c17452 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -473,3 +473,11 @@ def test_assert_series_equal_int_tol(): tm.assert_extension_array_equal( left.astype("Int64").values, right.astype("Int64").values, rtol=1.5 ) + + +def test_assert_series_equal_index_exact_default(): + # GH#57067 + ser1 = Series(np.zeros(6, dtype=int), [0, 0.2, 0.4, 0.6, 0.8, 1]) + ser2 = Series(np.zeros(6, dtype=int), np.linspace(0, 1, 6)) + tm.assert_series_equal(ser1, ser2) + tm.assert_frame_equal(ser1.to_frame(), ser2.to_frame())