pandas-dev · mroeschke · Feb 27, 2024 · Feb 10, 2024 · Feb 15, 2024 · Feb 16, 2024
diff --git a/doc/source/whatsnew/v0.15.1.rst b/doc/source/whatsnew/v0.15.1.rst
@@ -92,7 +92,7 @@ API changes
 
   .. code-block:: ipython
 
-     In [4]: gr.apply(sum)
+     In [4]: gr.apply("sum")
      Out[4]:
             joe
      jim
@@ -102,9 +102,8 @@ API changes
   current behavior:
 
   .. ipython:: python
-     :okwarning:
 
-     gr.apply(sum)
+     gr.apply("sum")
 
 - Support for slicing with monotonic decreasing indexes, even if ``start`` or ``stop`` is
   not found in the index (:issue:`7860`):

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -199,6 +199,7 @@ Removal of prior version deprecations/changes
 - Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`)
 - Enforced silent-downcasting deprecation for :ref:`all relevant methods <whatsnew_220.silent_downcasting>` (:issue:`54710`)
 - In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`)
+- Methods ``apply``, ``agg``, and ``transform`` will no longer replace NumPy functions (e.g. ``np.sum``) and built-in functions (e.g. ``min``) with the equivalent pandas implementation; use string aliases (e.g. ``"sum"`` and ``"min"``) if you desire to use the pandas implementation (:issue:`53974`)
 - Passing both ``freq`` and ``fill_value`` in :meth:`DataFrame.shift` and :meth:`Series.shift` and :meth:`.DataFrameGroupBy.shift` now raises a ``ValueError`` (:issue:`54818`)
 - Removed :meth:`DateOffset.is_anchored` and :meth:`offsets.Tick.is_anchored` (:issue:`56594`)
 - Removed ``DataFrame.applymap``, ``Styler.applymap`` and ``Styler.applymap_index`` (:issue:`52364`)

diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
@@ -398,9 +398,6 @@ def external_error_raised(expected_exception: type[Exception]) -> ContextManager
     return pytest.raises(expected_exception, match=None)
 
 
-cython_table = pd.core.common._cython_table.items()
-
-
 def get_cython_table_params(ndframe, func_names_and_expected):
     """
     Combine frame, functions from com._cython_table
@@ -421,11 +418,6 @@ def get_cython_table_params(ndframe, func_names_and_expected):
     results = []
     for func_name, expected in func_names_and_expected:
         results.append((ndframe, func_name, expected))
-        results += [
-            (ndframe, func, expected)
-            for func, name in cython_table
-            if name == func_name
-        ]
     return results
 
 

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -175,10 +175,7 @@ def agg(self) -> DataFrame | Series | None:
         Result of aggregation, or None if agg cannot be performed by
         this method.
         """
-        obj = self.obj
         func = self.func
-        args = self.args
-        kwargs = self.kwargs
 
         if isinstance(func, str):
             return self.apply_str()
@@ -189,12 +186,6 @@ def agg(self) -> DataFrame | Series | None:
             # we require a list, but not a 'str'
             return self.agg_list_like()
 
-        if callable(func):
-            f = com.get_cython_func(func)
-            if f and not args and not kwargs:
-                warn_alias_replacement(obj, func, f)
-                return getattr(obj, f)()
-
         # caller can react
         return None
 
@@ -300,12 +291,6 @@ def transform_str_or_callable(self, func) -> DataFrame | Series:
         if isinstance(func, str):
             return self._apply_str(obj, func, *args, **kwargs)
 
-        if not args and not kwargs:
-            f = com.get_cython_func(func)
-            if f:
-                warn_alias_replacement(obj, func, f)
-                return getattr(obj, f)()
-
         # Two possible ways to use a UDF - apply or call directly
         try:
             return obj.apply(func, args=args, **kwargs)

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -608,22 +608,6 @@ def require_length_match(data, index: Index) -> None:
         )
 
 
-# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0,
-#  whereas np.min and np.max (which directly call obj.min and obj.max)
-#  default to axis=None.
-_builtin_table = {
-    builtins.sum: np.sum,
-    builtins.max: np.maximum.reduce,
-    builtins.min: np.minimum.reduce,
-}
-
-# GH#53425: Only for deprecation
-_builtin_table_alias = {
-    builtins.sum: "np.sum",
-    builtins.max: "np.maximum.reduce",
-    builtins.min: "np.minimum.reduce",
-}
-
 _cython_table = {
     builtins.sum: "sum",
     builtins.max: "max",
@@ -660,14 +644,6 @@ def get_cython_func(arg: Callable) -> str | None:
     return _cython_table.get(arg)
 
 
-def is_builtin_func(arg):
-    """
-    if we define a builtin function for this argument, return it,
-    otherwise return the arg
-    """
-    return _builtin_table.get(arg, arg)
-
-
 def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]:
     """
     If a name is missing then replace it by level_n, where n is the count

@@ -59,7 +59,6 @@
     maybe_mangle_lambdas,
     reconstruct_func,
     validate_func_kwargs,
-    warn_alias_replacement,
 )
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
@@ -357,11 +356,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
             return ret
 
         else:
-            cyfunc = com.get_cython_func(func)
-            if cyfunc and not args and not kwargs:
-                warn_alias_replacement(self, func, cyfunc)
-                return getattr(self, cyfunc)()
-
             if maybe_use_numba(engine):
                 return self._aggregate_with_numba(
                     func, *args, engine_kwargs=engine_kwargs, **kwargs
@@ -409,11 +403,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
     agg = aggregate
 
     def _python_agg_general(self, func, *args, **kwargs):
-        orig_func = func
-        func = com.is_builtin_func(func)
-        if orig_func != func:
-            alias = com._builtin_table_alias[func]
-            warn_alias_replacement(self, orig_func, alias)
         f = lambda x: func(x, *args, **kwargs)
 
         obj = self._obj_with_exclusions
@@ -1656,11 +1645,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
     agg = aggregate
 
     def _python_agg_general(self, func, *args, **kwargs):
-        orig_func = func
-        func = com.is_builtin_func(func)
-        if orig_func != func:
-            alias = com._builtin_table_alias[func]
-            warn_alias_replacement(self, orig_func, alias)
         f = lambda x: func(x, *args, **kwargs)
 
         if self.ngroups == 0:

@@ -94,7 +94,6 @@ class providing the base-class of operations.
     sample,
 )
 from pandas.core._numba import executor
-from pandas.core.apply import warn_alias_replacement
 from pandas.core.arrays import (
     ArrowExtensionArray,
     BaseMaskedArray,
@@ -1647,12 +1646,6 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
         b    2
         dtype: int64
         """
-        orig_func = func
-        func = com.is_builtin_func(func)
-        if orig_func != func:
-            alias = com._builtin_table_alias[orig_func]
-            warn_alias_replacement(self, orig_func, alias)
-
         if isinstance(func, str):
             if hasattr(self, func):
                 res = getattr(self, func)
@@ -1868,11 +1861,6 @@ def _cython_transform(self, how: str, numeric_only: bool = False, **kwargs):
     @final
     def _transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
         # optimized transforms
-        orig_func = func
-        func = com.get_cython_func(func) or func
-        if orig_func != func:
-            warn_alias_replacement(self, orig_func, func)
-
         if not isinstance(func, str):
             return self._transform_general(func, engine, engine_kwargs, *args, **kwargs)
 

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -45,16 +45,12 @@
 )
 
 import pandas.core.algorithms as algos
-from pandas.core.apply import (
-    ResamplerWindowApply,
-    warn_alias_replacement,
-)
+from pandas.core.apply import ResamplerWindowApply
 from pandas.core.arrays import ArrowExtensionArray
 from pandas.core.base import (
     PandasObject,
     SelectionMixin,
 )
-import pandas.core.common as com
 from pandas.core.generic import (
     NDFrame,
     _shared_docs,
@@ -1609,10 +1605,6 @@ def _downsample(self, how, **kwargs):
         how : string / cython mapped function
         **kwargs : kw args passed to how function
         """
-        orig_how = how
-        how = com.get_cython_func(how) or how
-        if orig_how != how:
-            warn_alias_replacement(self, orig_how, how)
         ax = self.ax
 
         # Excludes `on` column when provided
@@ -1775,10 +1767,6 @@ def _downsample(self, how, **kwargs):
         if self.kind == "timestamp":
             return super()._downsample(how, **kwargs)
 
-        orig_how = how
-        how = com.get_cython_func(how) or how
-        if orig_how != how:
-            warn_alias_replacement(self, orig_how, how)
         ax = self.ax
 
         if is_subperiod(ax.freq, self.freq):

diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
@@ -1699,13 +1699,11 @@ def foo2(x, b=2, c=0):
 def test_agg_std():
     df = DataFrame(np.arange(6).reshape(3, 2), columns=["A", "B"])
 
-    with tm.assert_produces_warning(FutureWarning, match="using DataFrame.std"):
-        result = df.agg(np.std)
+    result = df.agg(np.std, ddof=1)
     expected = Series({"A": 2.0, "B": 2.0}, dtype=float)
     tm.assert_series_equal(result, expected)
 
-    with tm.assert_produces_warning(FutureWarning, match="using Series.std"):
-        result = df.agg([np.std])
+    result = df.agg([np.std], ddof=1)
     expected = DataFrame({"A": 2.0, "B": 2.0}, index=["std"])
     tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/apply/test_frame_apply_relabeling.py b/pandas/tests/apply/test_frame_apply_relabeling.py
@@ -49,24 +49,20 @@ def test_agg_relabel_multi_columns_multi_methods():
 def test_agg_relabel_partial_functions():
     # GH 26513, test on partial, functools or more complex cases
     df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
-    msg = "using Series.[mean|min]"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
+    result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
     expected = pd.DataFrame(
         {"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"])
     )
     tm.assert_frame_equal(result, expected)
 
-    msg = "using Series.[mean|min|max|sum]"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = df.agg(
-            foo=("A", min),
-            bar=("A", np.min),
-            cat=("B", max),
-            dat=("C", "min"),
-            f=("B", np.sum),
-            kk=("B", lambda x: min(x)),
-        )
+    result = df.agg(
+        foo=("A", min),
+        bar=("A", np.min),
+        cat=("B", max),
+        dat=("C", "min"),
+        f=("B", np.sum),
+        kk=("B", lambda x: min(x)),
+    )
     expected = pd.DataFrame(
         {
             "A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan],

diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py
@@ -547,10 +547,7 @@ def test_apply_listlike_reducer(string_series, ops, names, how, kwargs):
     # GH 39140
     expected = Series({name: op(string_series) for name, op in zip(names, ops)})
     expected.name = "series"
-    warn = FutureWarning if how == "agg" else None
-    msg = f"using Series.[{'|'.join(names)}]"
-    with tm.assert_produces_warning(warn, match=msg):
-        result = getattr(string_series, how)(ops, **kwargs)
+    result = getattr(string_series, how)(ops, **kwargs)
     tm.assert_series_equal(result, expected)
 
 
@@ -571,10 +568,7 @@ def test_apply_dictlike_reducer(string_series, ops, how, kwargs, by_row):
     # GH 39140
     expected = Series({name: op(string_series) for name, op in ops.items()})
     expected.name = string_series.name
-    warn = FutureWarning if how == "agg" else None
-    msg = "using Series.[sum|mean]"
-    with tm.assert_produces_warning(warn, match=msg):
-        result = getattr(string_series, how)(ops, **kwargs)
+    result = getattr(string_series, how)(ops, **kwargs)
     tm.assert_series_equal(result, expected)
 
 

diff --git a/pandas/tests/apply/test_series_apply_relabeling.py b/pandas/tests/apply/test_series_apply_relabeling.py
@@ -14,12 +14,8 @@ def test_relabel_no_duplicated_method():
     expected = df["B"].agg({"foo": "min", "bar": "max"})
     tm.assert_series_equal(result, expected)
 
-    msg = "using Series.[sum|min|max]"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = df["B"].agg(foo=sum, bar=min, cat="max")
-    msg = "using Series.[sum|min|max]"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"})
+    result = df["B"].agg(foo=sum, bar=min, cat="max")
+    expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"})
     tm.assert_series_equal(result, expected)
 
 
@@ -32,8 +28,6 @@ def test_relabel_duplicated_method():
     expected = pd.Series([6, 6], index=["foo", "bar"], name="A")
     tm.assert_series_equal(result, expected)
 
-    msg = "using Series.min"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = df["B"].agg(foo=min, bar="min")
+    result = df["B"].agg(foo=min, bar="min")
     expected = pd.Series([1, 1], index=["foo", "bar"], name="B")
     tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -289,9 +289,7 @@ def func(ser):
 def test_agg_multiple_functions_maintain_order(df):
     # GH #610
     funcs = [("mean", np.mean), ("max", np.max), ("min", np.min)]
-    msg = "is currently using SeriesGroupBy.mean"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = df.groupby("A")["C"].agg(funcs)
+    result = df.groupby("A")["C"].agg(funcs)
     exp_cols = Index(["mean", "max", "min"])
 
     tm.assert_index_equal(result.columns, exp_cols)
@@ -881,11 +879,9 @@ def test_agg_relabel_multiindex_column(
     expected = DataFrame({"a_max": [1, 3]}, index=idx)
     tm.assert_frame_equal(result, expected)
 
-    msg = "is currently using SeriesGroupBy.mean"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = df.groupby(("x", "group")).agg(
-            col_1=agg_col1, col_2=agg_col2, col_3=agg_col3
-        )
+    result = df.groupby(("x", "group")).agg(
+        col_1=agg_col1, col_2=agg_col2, col_3=agg_col3
+    )
     expected = DataFrame(
         {"col_1": agg_result1, "col_2": agg_result2, "col_3": agg_result3}, index=idx
     )
@@ -1036,13 +1032,6 @@ def test_groupby_as_index_agg(df):
     gr = df.groupby(ts)
     gr.nth(0)  # invokes set_selection_from_grouper internally
 
-    msg = "The behavior of DataFrame.sum with axis=None is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
-        res = gr.apply(sum)
-    with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
-        alt = df.groupby(ts).apply(sum)
-    tm.assert_frame_equal(res, alt)
-
     for attr in ["mean", "max", "count", "idxmax", "cumsum", "all"]:
         gr = df.groupby(ts, as_index=False)
         left = getattr(gr, attr)()