Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: Enforce deprecation of using alias for builtin/NumPy funcs #57444

Merged
merged 17 commits into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions doc/source/whatsnew/v0.15.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ API changes

.. code-block:: ipython

In [4]: gr.apply(sum)
In [4]: gr.apply("sum")
Out[4]:
joe
jim
Expand All @@ -102,9 +102,8 @@ API changes
current behavior:

.. ipython:: python
:okwarning:

gr.apply(sum)
gr.apply("sum")

- Support for slicing with monotonic decreasing indexes, even if ``start`` or ``stop`` is
not found in the index (:issue:`7860`):
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ Removal of prior version deprecations/changes
- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`)
- Enforced silent-downcasting deprecation for :ref:`all relevant methods <whatsnew_220.silent_downcasting>` (:issue:`54710`)
- In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`)
- Methods ``apply``, ``agg``, and ``transform`` will no longer replace NumPy functions (e.g. ``np.sum``) and built-in functions (e.g. ``min``) with the equivalent pandas implementation; use string aliases (e.g. ``"sum"`` and ``"min"``) if you desire to use the pandas implementation (:issue:`53974`)
- Passing both ``freq`` and ``fill_value`` in :meth:`DataFrame.shift` and :meth:`Series.shift` and :meth:`.DataFrameGroupBy.shift` now raises a ``ValueError`` (:issue:`54818`)
- Removed :meth:`DateOffset.is_anchored` and :meth:`offsets.Tick.is_anchored` (:issue:`56594`)
- Removed ``DataFrame.applymap``, ``Styler.applymap`` and ``Styler.applymap_index`` (:issue:`52364`)
Expand Down
8 changes: 0 additions & 8 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,9 +398,6 @@ def external_error_raised(expected_exception: type[Exception]) -> ContextManager
return pytest.raises(expected_exception, match=None)


cython_table = pd.core.common._cython_table.items()


def get_cython_table_params(ndframe, func_names_and_expected):
"""
Combine frame, functions from com._cython_table
Expand All @@ -421,11 +418,6 @@ def get_cython_table_params(ndframe, func_names_and_expected):
results = []
for func_name, expected in func_names_and_expected:
results.append((ndframe, func_name, expected))
results += [
(ndframe, func, expected)
for func, name in cython_table
if name == func_name
]
return results


Expand Down
15 changes: 0 additions & 15 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,7 @@ def agg(self) -> DataFrame | Series | None:
Result of aggregation, or None if agg cannot be performed by
this method.
"""
obj = self.obj
func = self.func
args = self.args
kwargs = self.kwargs

if isinstance(func, str):
return self.apply_str()
Expand All @@ -189,12 +186,6 @@ def agg(self) -> DataFrame | Series | None:
# we require a list, but not a 'str'
return self.agg_list_like()

if callable(func):
f = com.get_cython_func(func)
if f and not args and not kwargs:
warn_alias_replacement(obj, func, f)
return getattr(obj, f)()

# caller can react
return None

Expand Down Expand Up @@ -300,12 +291,6 @@ def transform_str_or_callable(self, func) -> DataFrame | Series:
if isinstance(func, str):
return self._apply_str(obj, func, *args, **kwargs)

if not args and not kwargs:
f = com.get_cython_func(func)
if f:
warn_alias_replacement(obj, func, f)
return getattr(obj, f)()

# Two possible ways to use a UDF - apply or call directly
try:
return obj.apply(func, args=args, **kwargs)
Expand Down
24 changes: 0 additions & 24 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,22 +608,6 @@ def require_length_match(data, index: Index) -> None:
)


# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0,
# whereas np.min and np.max (which directly call obj.min and obj.max)
# default to axis=None.
_builtin_table = {
builtins.sum: np.sum,
builtins.max: np.maximum.reduce,
builtins.min: np.minimum.reduce,
}

# GH#53425: Only for deprecation
_builtin_table_alias = {
builtins.sum: "np.sum",
builtins.max: "np.maximum.reduce",
builtins.min: "np.minimum.reduce",
}

_cython_table = {
builtins.sum: "sum",
builtins.max: "max",
Expand Down Expand Up @@ -660,14 +644,6 @@ def get_cython_func(arg: Callable) -> str | None:
return _cython_table.get(arg)


def is_builtin_func(arg):
"""
if we define a builtin function for this argument, return it,
otherwise return the arg
"""
return _builtin_table.get(arg, arg)


def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]:
"""
If a name is missing then replace it by level_n, where n is the count
Expand Down
16 changes: 0 additions & 16 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
maybe_mangle_lambdas,
reconstruct_func,
validate_func_kwargs,
warn_alias_replacement,
)
import pandas.core.common as com
from pandas.core.frame import DataFrame
Expand Down Expand Up @@ -357,11 +356,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
return ret

else:
cyfunc = com.get_cython_func(func)
if cyfunc and not args and not kwargs:
warn_alias_replacement(self, func, cyfunc)
return getattr(self, cyfunc)()

if maybe_use_numba(engine):
return self._aggregate_with_numba(
func, *args, engine_kwargs=engine_kwargs, **kwargs
Expand Down Expand Up @@ -409,11 +403,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
agg = aggregate

def _python_agg_general(self, func, *args, **kwargs):
orig_func = func
func = com.is_builtin_func(func)
if orig_func != func:
alias = com._builtin_table_alias[func]
warn_alias_replacement(self, orig_func, alias)
f = lambda x: func(x, *args, **kwargs)

obj = self._obj_with_exclusions
Expand Down Expand Up @@ -1656,11 +1645,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
agg = aggregate

def _python_agg_general(self, func, *args, **kwargs):
orig_func = func
func = com.is_builtin_func(func)
if orig_func != func:
alias = com._builtin_table_alias[func]
warn_alias_replacement(self, orig_func, alias)
f = lambda x: func(x, *args, **kwargs)

if self.ngroups == 0:
Expand Down
12 changes: 0 additions & 12 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ class providing the base-class of operations.
sample,
)
from pandas.core._numba import executor
from pandas.core.apply import warn_alias_replacement
from pandas.core.arrays import (
ArrowExtensionArray,
BaseMaskedArray,
Expand Down Expand Up @@ -1647,12 +1646,6 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
b 2
dtype: int64
"""
orig_func = func
func = com.is_builtin_func(func)
if orig_func != func:
alias = com._builtin_table_alias[orig_func]
warn_alias_replacement(self, orig_func, alias)

if isinstance(func, str):
if hasattr(self, func):
res = getattr(self, func)
Expand Down Expand Up @@ -1868,11 +1861,6 @@ def _cython_transform(self, how: str, numeric_only: bool = False, **kwargs):
@final
def _transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
# optimized transforms
orig_func = func
func = com.get_cython_func(func) or func
if orig_func != func:
warn_alias_replacement(self, orig_func, func)

if not isinstance(func, str):
return self._transform_general(func, engine, engine_kwargs, *args, **kwargs)

Expand Down
14 changes: 1 addition & 13 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,12 @@
)

import pandas.core.algorithms as algos
from pandas.core.apply import (
ResamplerWindowApply,
warn_alias_replacement,
)
from pandas.core.apply import ResamplerWindowApply
from pandas.core.arrays import ArrowExtensionArray
from pandas.core.base import (
PandasObject,
SelectionMixin,
)
import pandas.core.common as com
from pandas.core.generic import (
NDFrame,
_shared_docs,
Expand Down Expand Up @@ -1609,10 +1605,6 @@ def _downsample(self, how, **kwargs):
how : string / cython mapped function
**kwargs : kw args passed to how function
"""
orig_how = how
how = com.get_cython_func(how) or how
if orig_how != how:
warn_alias_replacement(self, orig_how, how)
ax = self.ax

# Excludes `on` column when provided
Expand Down Expand Up @@ -1775,10 +1767,6 @@ def _downsample(self, how, **kwargs):
if self.kind == "timestamp":
return super()._downsample(how, **kwargs)

orig_how = how
how = com.get_cython_func(how) or how
if orig_how != how:
warn_alias_replacement(self, orig_how, how)
ax = self.ax

if is_subperiod(ax.freq, self.freq):
Expand Down
6 changes: 2 additions & 4 deletions pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1699,13 +1699,11 @@ def foo2(x, b=2, c=0):
def test_agg_std():
df = DataFrame(np.arange(6).reshape(3, 2), columns=["A", "B"])

with tm.assert_produces_warning(FutureWarning, match="using DataFrame.std"):
result = df.agg(np.std)
result = df.agg(np.std, ddof=1)
expected = Series({"A": 2.0, "B": 2.0}, dtype=float)
tm.assert_series_equal(result, expected)

with tm.assert_produces_warning(FutureWarning, match="using Series.std"):
result = df.agg([np.std])
result = df.agg([np.std], ddof=1)
expected = DataFrame({"A": 2.0, "B": 2.0}, index=["std"])
tm.assert_frame_equal(result, expected)

Expand Down
22 changes: 9 additions & 13 deletions pandas/tests/apply/test_frame_apply_relabeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,24 +49,20 @@ def test_agg_relabel_multi_columns_multi_methods():
def test_agg_relabel_partial_functions():
# GH 26513, test on partial, functools or more complex cases
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
msg = "using Series.[mean|min]"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
expected = pd.DataFrame(
{"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"])
)
tm.assert_frame_equal(result, expected)

msg = "using Series.[mean|min|max|sum]"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.agg(
foo=("A", min),
bar=("A", np.min),
cat=("B", max),
dat=("C", "min"),
f=("B", np.sum),
kk=("B", lambda x: min(x)),
)
result = df.agg(
foo=("A", min),
bar=("A", np.min),
cat=("B", max),
dat=("C", "min"),
f=("B", np.sum),
kk=("B", lambda x: min(x)),
)
expected = pd.DataFrame(
{
"A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan],
Expand Down
10 changes: 2 additions & 8 deletions pandas/tests/apply/test_series_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,10 +547,7 @@ def test_apply_listlike_reducer(string_series, ops, names, how, kwargs):
# GH 39140
expected = Series({name: op(string_series) for name, op in zip(names, ops)})
expected.name = "series"
warn = FutureWarning if how == "agg" else None
msg = f"using Series.[{'|'.join(names)}]"
with tm.assert_produces_warning(warn, match=msg):
result = getattr(string_series, how)(ops, **kwargs)
result = getattr(string_series, how)(ops, **kwargs)
tm.assert_series_equal(result, expected)


Expand All @@ -571,10 +568,7 @@ def test_apply_dictlike_reducer(string_series, ops, how, kwargs, by_row):
# GH 39140
expected = Series({name: op(string_series) for name, op in ops.items()})
expected.name = string_series.name
warn = FutureWarning if how == "agg" else None
msg = "using Series.[sum|mean]"
with tm.assert_produces_warning(warn, match=msg):
result = getattr(string_series, how)(ops, **kwargs)
result = getattr(string_series, how)(ops, **kwargs)
tm.assert_series_equal(result, expected)


Expand Down
12 changes: 3 additions & 9 deletions pandas/tests/apply/test_series_apply_relabeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,8 @@ def test_relabel_no_duplicated_method():
expected = df["B"].agg({"foo": "min", "bar": "max"})
tm.assert_series_equal(result, expected)

msg = "using Series.[sum|min|max]"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df["B"].agg(foo=sum, bar=min, cat="max")
msg = "using Series.[sum|min|max]"
with tm.assert_produces_warning(FutureWarning, match=msg):
expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"})
result = df["B"].agg(foo=sum, bar=min, cat="max")
expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"})
tm.assert_series_equal(result, expected)


Expand All @@ -32,8 +28,6 @@ def test_relabel_duplicated_method():
expected = pd.Series([6, 6], index=["foo", "bar"], name="A")
tm.assert_series_equal(result, expected)

msg = "using Series.min"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df["B"].agg(foo=min, bar="min")
result = df["B"].agg(foo=min, bar="min")
expected = pd.Series([1, 1], index=["foo", "bar"], name="B")
tm.assert_series_equal(result, expected)
19 changes: 4 additions & 15 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,9 +289,7 @@ def func(ser):
def test_agg_multiple_functions_maintain_order(df):
# GH #610
funcs = [("mean", np.mean), ("max", np.max), ("min", np.min)]
msg = "is currently using SeriesGroupBy.mean"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.groupby("A")["C"].agg(funcs)
result = df.groupby("A")["C"].agg(funcs)
exp_cols = Index(["mean", "max", "min"])

tm.assert_index_equal(result.columns, exp_cols)
Expand Down Expand Up @@ -881,11 +879,9 @@ def test_agg_relabel_multiindex_column(
expected = DataFrame({"a_max": [1, 3]}, index=idx)
tm.assert_frame_equal(result, expected)

msg = "is currently using SeriesGroupBy.mean"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.groupby(("x", "group")).agg(
col_1=agg_col1, col_2=agg_col2, col_3=agg_col3
)
result = df.groupby(("x", "group")).agg(
col_1=agg_col1, col_2=agg_col2, col_3=agg_col3
)
expected = DataFrame(
{"col_1": agg_result1, "col_2": agg_result2, "col_3": agg_result3}, index=idx
)
Expand Down Expand Up @@ -1036,13 +1032,6 @@ def test_groupby_as_index_agg(df):
gr = df.groupby(ts)
gr.nth(0) # invokes set_selection_from_grouper internally

msg = "The behavior of DataFrame.sum with axis=None is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
res = gr.apply(sum)
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
alt = df.groupby(ts).apply(sum)
tm.assert_frame_equal(res, alt)

for attr in ["mean", "max", "count", "idxmax", "cumsum", "all"]:
gr = df.groupby(ts, as_index=False)
left = getattr(gr, attr)()
Expand Down
Loading