From 03f46acf2cb3d0cc9e2cb827e6c65c9179513b18 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Sat, 24 Feb 2024 23:29:56 -0700 Subject: [PATCH 1/2] resolve GL08 for pandas.core.groupby.SeriesGroupBy.value_counts --- ci/code_checks.sh | 1 - pandas/core/groupby/generic.py | 79 ++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 47a2cf93a4f89..7e622bb0d74a9 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -179,7 +179,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.Timestamp.tzinfo\ pandas.Timestamp.value\ pandas.Timestamp.year\ - pandas.core.groupby.SeriesGroupBy.value_counts\ pandas.tseries.offsets.BQuarterBegin.is_anchored\ pandas.tseries.offsets.BQuarterBegin.is_on_offset\ pandas.tseries.offsets.BQuarterBegin.n\ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3011fa235b22d..f8114b2325a6d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -801,6 +801,85 @@ def value_counts( bins=None, dropna: bool = True, ) -> Series | DataFrame: + """ + Return a Series or DataFrame containing counts of unique rows. + + .. versionadded:: 1.4.0 + + Parameters + ---------- + normalize : bool, default False + Return proportions rather than frequencies. + sort : bool, default True + Sort by frequencies. + ascending : bool, default False + Sort in ascending order. + bins : int or list of ints, optional + Rather than count values, group them into half-open bins, + a convenience for pd.cut, only works with numeric data. + dropna : bool, default True + Don't include counts of rows that contain NA values. + + Returns + ------- + Series or DataFrame + Series if the groupby as_index is True, otherwise DataFrame. + + See Also + -------- + Series.value_counts: Equivalent method on Series. + DataFrame.value_counts: Equivalent method on DataFrame. + DataFrameGroupBy.value_counts: Equivalent method on DataFrameGroupBy. + + Notes + ----- + - If the groupby as_index is True then the returned Series will have a + MultiIndex with one level per input column. + - If the groupby as_index is False then the returned DataFrame will have an + additional column with the value_counts. The column is labelled 'count' or + 'proportion', depending on the ``normalize`` parameter. + + By default, rows that contain any NA values are omitted from + the result. + + By default, the result will be in descending order so that the + first element of each group is the most frequently-occurring row. + + Examples + -------- + >>> s = pd.Series( + ... [1, 1, 2, 3, 2, 3, 3, 1, 1, 3, 3, 3], + ... index=["A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B"], + ... ) + >>> s + A 1 + A 1 + A 2 + A 3 + A 2 + A 3 + B 3 + B 1 + B 1 + B 3 + B 3 + B 3 + dtype: int64 + >>> g1 = s.groupby(s.index) + >>> g1.value_counts(bins=2) + A (0.997, 2.0] 4 + (2.0, 3.0] 2 + B (2.0, 3.0] 4 + (0.997, 2.0] 2 + Name: count, dtype: int64 + >>> g1.value_counts(normalize=True) + A 1 0.333333 + 2 0.333333 + 3 0.333333 + B 3 0.666667 + 1 0.333333 + Name: proportion, dtype: float64 + """ name = "proportion" if normalize else "count" if bins is None: From bc5d86f71839569b3c9657eb8d014ff529046bd9 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Sun, 25 Feb 2024 14:36:46 -0700 Subject: [PATCH 2/2] update as_index to ``as_index`` --- pandas/core/groupby/generic.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index f8114b2325a6d..390b698503adb 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -823,7 +823,7 @@ def value_counts( Returns ------- Series or DataFrame - Series if the groupby as_index is True, otherwise DataFrame. + Series if the groupby ``as_index`` is True, otherwise DataFrame. See Also -------- @@ -833,9 +833,9 @@ def value_counts( Notes ----- - - If the groupby as_index is True then the returned Series will have a + - If the groupby ``as_index`` is True then the returned Series will have a MultiIndex with one level per input column. - - If the groupby as_index is False then the returned DataFrame will have an + - If the groupby ``as_index`` is False then the returned DataFrame will have an additional column with the value_counts. The column is labelled 'count' or 'proportion', depending on the ``normalize`` parameter. @@ -2382,7 +2382,7 @@ def value_counts( Returns ------- Series or DataFrame - Series if the groupby as_index is True, otherwise DataFrame. + Series if the groupby ``as_index`` is True, otherwise DataFrame. See Also -------- @@ -2392,9 +2392,9 @@ def value_counts( Notes ----- - - If the groupby as_index is True then the returned Series will have a + - If the groupby ``as_index`` is True then the returned Series will have a MultiIndex with one level per input column. - - If the groupby as_index is False then the returned DataFrame will have an + - If the groupby ``as_index`` is False then the returned DataFrame will have an additional column with the value_counts. The column is labelled 'count' or 'proportion', depending on the ``normalize`` parameter.