Skip to content

Commit

Permalink
DEPR: DataFrameGroupBy.corrwith (#58732)
Browse files Browse the repository at this point in the history
  • Loading branch information
pedrocariellof authored Jun 3, 2024
1 parent ff550e6 commit 76c7274
Show file tree
Hide file tree
Showing 10 changed files with 111 additions and 25 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ starting with 3.0, so it can be safely removed from your code.
Other Deprecations
^^^^^^^^^^^^^^^^^^

- Deprecated :meth:`.DataFrameGroupby.corrwith` (:issue:`57158`)
- Deprecated :meth:`Timestamp.utcfromtimestamp`, use ``Timestamp.fromtimestamp(ts, "UTC")`` instead (:issue:`56680`)
- Deprecated :meth:`Timestamp.utcnow`, use ``Timestamp.now("UTC")`` instead (:issue:`56680`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.all`, :meth:`DataFrame.min`, :meth:`DataFrame.max`, :meth:`DataFrame.sum`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.var`, :meth:`DataFrame.std`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt`, :meth:`Series.all`, :meth:`Series.min`, :meth:`Series.max`, :meth:`Series.sum`, :meth:`Series.prod`, :meth:`Series.mean`, :meth:`Series.median`, :meth:`Series.sem`, :meth:`Series.var`, :meth:`Series.std`, :meth:`Series.skew`, and :meth:`Series.kurt`. (:issue:`57087`)
Expand Down
1 change: 1 addition & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ def pytest_collection_modifyitems(items, config) -> None:
("is_categorical_dtype", "is_categorical_dtype is deprecated"),
("is_sparse", "is_sparse is deprecated"),
("DataFrameGroupBy.fillna", "DataFrameGroupBy.fillna is deprecated"),
("DataFrameGroupBy.corrwith", "DataFrameGroupBy.corrwith is deprecated"),
("NDFrame.replace", "Series.replace without 'value'"),
("NDFrame.clip", "Downcasting behavior in Series and DataFrame methods"),
("Series.idxmin", "The behavior of Series.idxmin"),
Expand Down
9 changes: 9 additions & 0 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
Union,
cast,
)
import warnings

import numpy as np

Expand All @@ -32,6 +33,7 @@
Substitution,
doc,
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import (
ensure_int64,
Expand Down Expand Up @@ -2726,6 +2728,8 @@ def corrwith(
"""
Compute pairwise correlation.
.. deprecated:: 3.0.0
Pairwise correlation is computed between rows or columns of
DataFrame with rows or columns of Series or DataFrame. DataFrames
are first aligned along both axes before computing the
Expand Down Expand Up @@ -2785,6 +2789,11 @@ def corrwith(
2 0.755929 NaN
3 0.576557 NaN
"""
warnings.warn(
"DataFrameGroupBy.corrwith is deprecated",
FutureWarning,
stacklevel=find_stack_level(),
)
result = self._op_via_apply(
"corrwith",
other=other,
Expand Down
27 changes: 14 additions & 13 deletions pandas/tests/groupby/test_all_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,12 @@ def test_multiindex_group_all_columns_when_empty(groupby_func):
gb = df.groupby(["a", "b", "c"], group_keys=False)
method = getattr(gb, groupby_func)
args = get_groupby_method_args(groupby_func, df)

warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
if groupby_func == "corrwith":
warn = FutureWarning
warn_msg = "DataFrameGroupBy.corrwith is deprecated"
else:
warn = None
warn_msg = ""
with tm.assert_produces_warning(warn, match=warn_msg):
result = method(*args).index
expected = df.index
Expand All @@ -42,18 +45,12 @@ def test_duplicate_columns(request, groupby_func, as_index):
df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb"))
args = get_groupby_method_args(groupby_func, df)
gb = df.groupby("a", as_index=as_index)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = getattr(gb, groupby_func)(*args)
result = getattr(gb, groupby_func)(*args)

expected_df = df.set_axis(["a", "b", "c"], axis=1)
expected_args = get_groupby_method_args(groupby_func, expected_df)
expected_gb = expected_df.groupby("a", as_index=as_index)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
expected = getattr(expected_gb, groupby_func)(*expected_args)
expected = getattr(expected_gb, groupby_func)(*expected_args)
if groupby_func not in ("size", "ngroup", "cumcount"):
expected = expected.rename(columns={"c": "b"})
tm.assert_equal(result, expected)
Expand All @@ -74,8 +71,12 @@ def test_dup_labels_output_shape(groupby_func, idx):
grp_by = df.groupby([0])

args = get_groupby_method_args(groupby_func, df)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
if groupby_func == "corrwith":
warn = FutureWarning
warn_msg = "DataFrameGroupBy.corrwith is deprecated"
else:
warn = None
warn_msg = ""
with tm.assert_produces_warning(warn, match=warn_msg):
result = getattr(grp_by, groupby_func)(*args)

Expand Down
9 changes: 8 additions & 1 deletion pandas/tests/groupby/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1197,7 +1197,14 @@ def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func):
# Check output when another method is called before .apply()
grp = df.groupby(by="a")
args = get_groupby_method_args(reduction_func, df)
_ = getattr(grp, reduction_func)(*args)
if reduction_func == "corrwith":
warn = FutureWarning
msg = "DataFrameGroupBy.corrwith is deprecated"
else:
warn = None
msg = ""
with tm.assert_produces_warning(warn, match=msg):
_ = getattr(grp, reduction_func)(*args)
result = grp.apply(np.sum, axis=0, include_groups=False)
tm.assert_frame_equal(result, expected)

Expand Down
28 changes: 24 additions & 4 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1473,7 +1473,14 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_fun
df_grp = df.groupby(["cat_1", "cat_2"], observed=True)

args = get_groupby_method_args(reduction_func, df)
res = getattr(df_grp, reduction_func)(*args)
if reduction_func == "corrwith":
warn = FutureWarning
warn_msg = "DataFrameGroupBy.corrwith is deprecated"
else:
warn = None
warn_msg = ""
with tm.assert_produces_warning(warn, match=warn_msg):
res = getattr(df_grp, reduction_func)(*args)

for cat in unobserved_cats:
assert cat not in res.index
Expand Down Expand Up @@ -1512,7 +1519,14 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false(
getattr(df_grp, reduction_func)(*args)
return

res = getattr(df_grp, reduction_func)(*args)
if reduction_func == "corrwith":
warn = FutureWarning
warn_msg = "DataFrameGroupBy.corrwith is deprecated"
else:
warn = None
warn_msg = ""
with tm.assert_produces_warning(warn, match=warn_msg):
res = getattr(df_grp, reduction_func)(*args)

expected = _results_for_groupbys_with_missing_categories[reduction_func]

Expand Down Expand Up @@ -1904,8 +1918,14 @@ def test_category_order_reducer(
):
getattr(gb, reduction_func)(*args)
return

op_result = getattr(gb, reduction_func)(*args)
if reduction_func == "corrwith":
warn = FutureWarning
warn_msg = "DataFrameGroupBy.corrwith is deprecated"
else:
warn = None
warn_msg = ""
with tm.assert_produces_warning(warn, match=warn_msg):
op_result = getattr(gb, reduction_func)(*args)
if as_index:
result = op_result.index.get_level_values("a").categories
else:
Expand Down
18 changes: 16 additions & 2 deletions pandas/tests/groupby/test_groupby_dropna.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,14 @@ def test_categorical_reducers(reduction_func, observed, sort, as_index, index_ki
return

gb_filled = df_filled.groupby(keys, observed=observed, sort=sort, as_index=True)
expected = getattr(gb_filled, reduction_func)(*args_filled).reset_index()
if reduction_func == "corrwith":
warn = FutureWarning
msg = "DataFrameGroupBy.corrwith is deprecated"
else:
warn = None
msg = ""
with tm.assert_produces_warning(warn, match=msg):
expected = getattr(gb_filled, reduction_func)(*args_filled).reset_index()
expected["x"] = expected["x"].cat.remove_categories([4])
if index_kind == "multi":
expected["x2"] = expected["x2"].cat.remove_categories([4])
Expand All @@ -567,7 +574,14 @@ def test_categorical_reducers(reduction_func, observed, sort, as_index, index_ki
if as_index:
expected = expected["size"].rename(None)

result = getattr(gb_keepna, reduction_func)(*args)
if reduction_func == "corrwith":
warn = FutureWarning
msg = "DataFrameGroupBy.corrwith is deprecated"
else:
warn = None
msg = ""
with tm.assert_produces_warning(warn, match=msg):
result = getattr(gb_keepna, reduction_func)(*args)

# size will return a Series, others are DataFrame
tm.assert_equal(result, expected)
Expand Down
18 changes: 16 additions & 2 deletions pandas/tests/groupby/test_numeric_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,14 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys):
method = getattr(gb, kernel)
if has_arg and numeric_only is True:
# Cases where b does not appear in the result
result = method(*args, **kwargs)
if kernel == "corrwith":
warn = FutureWarning
msg = "DataFrameGroupBy.corrwith is deprecated"
else:
warn = None
msg = ""
with tm.assert_produces_warning(warn, match=msg):
result = method(*args, **kwargs)
assert "b" not in result.columns
elif (
# kernels that work on any dtype and have numeric_only arg
Expand Down Expand Up @@ -296,7 +303,14 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys):
elif kernel == "idxmax":
msg = "'>' not supported between instances of 'type' and 'type'"
with pytest.raises(exception, match=msg):
method(*args, **kwargs)
if kernel == "corrwith":
warn = FutureWarning
msg = "DataFrameGroupBy.corrwith is deprecated"
else:
warn = None
msg = ""
with tm.assert_produces_warning(warn, match=msg):
method(*args, **kwargs)
elif not has_arg and numeric_only is not lib.no_default:
with pytest.raises(
TypeError, match="got an unexpected keyword argument 'numeric_only'"
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/groupby/test_raises.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ def test_groupby_raises_string(
if groupby_func == "fillna":
kind = "Series" if groupby_series else "DataFrame"
warn_msg = f"{kind}GroupBy.fillna is deprecated"
elif groupby_func == "corrwith":
warn_msg = "DataFrameGroupBy.corrwith is deprecated"
else:
warn_msg = ""
_call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg)
Expand Down Expand Up @@ -288,6 +290,8 @@ def test_groupby_raises_datetime(
if groupby_func == "fillna":
kind = "Series" if groupby_series else "DataFrame"
warn_msg = f"{kind}GroupBy.fillna is deprecated"
elif groupby_func == "corrwith":
warn_msg = "DataFrameGroupBy.corrwith is deprecated"
else:
warn_msg = ""
_call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg=warn_msg)
Expand Down Expand Up @@ -485,6 +489,8 @@ def test_groupby_raises_category(
if groupby_func == "fillna":
kind = "Series" if groupby_series else "DataFrame"
warn_msg = f"{kind}GroupBy.fillna is deprecated"
elif groupby_func == "corrwith":
warn_msg = "DataFrameGroupBy.corrwith is deprecated"
else:
warn_msg = ""
_call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg)
Expand Down Expand Up @@ -658,6 +664,8 @@ def test_groupby_raises_category_on_category(
if groupby_func == "fillna":
kind = "Series" if groupby_series else "DataFrame"
warn_msg = f"{kind}GroupBy.fillna is deprecated"
elif groupby_func == "corrwith":
warn_msg = "DataFrameGroupBy.corrwith is deprecated"
else:
warn_msg = ""
_call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg)
17 changes: 14 additions & 3 deletions pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,7 +1104,14 @@ def test_transform_agg_by_name(request, reduction_func, frame_or_series):
return

args = get_groupby_method_args(reduction_func, obj)
result = g.transform(func, *args)
if func == "corrwith":
warn = FutureWarning
msg = "DataFrameGroupBy.corrwith is deprecated"
else:
warn = None
msg = ""
with tm.assert_produces_warning(warn, match=msg):
result = g.transform(func, *args)

# this is the *definition* of a transformation
tm.assert_index_equal(result.index, obj.index)
Expand Down Expand Up @@ -1468,8 +1475,12 @@ def test_as_index_no_change(keys, df, groupby_func):
args = get_groupby_method_args(groupby_func, df)
gb_as_index_true = df.groupby(keys, as_index=True)
gb_as_index_false = df.groupby(keys, as_index=False)
warn = FutureWarning if groupby_func == "fillna" else None
msg = "DataFrameGroupBy.fillna is deprecated"
if groupby_func == "corrwith":
warn = FutureWarning
msg = "DataFrameGroupBy.corrwith is deprecated"
else:
warn = None
msg = ""
with tm.assert_produces_warning(warn, match=msg):
result = gb_as_index_true.transform(groupby_func, *args)
with tm.assert_produces_warning(warn, match=msg):
Expand Down

0 comments on commit 76c7274

Please sign in to comment.