From 2d9c95ddb70f9c68e1ad4893d07bf0f68a23316e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Oct 2024 10:00:48 -0700 Subject: [PATCH 01/20] Bump mamba-org/setup-micromamba from 1 to 2 (#59988) Bumps [mamba-org/setup-micromamba](https://github.com/mamba-org/setup-micromamba) from 1 to 2. - [Release notes](https://github.com/mamba-org/setup-micromamba/releases) - [Commits](https://github.com/mamba-org/setup-micromamba/compare/v1...v2) --- updated-dependencies: - dependency-name: mamba-org/setup-micromamba dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/package-checks.yml | 2 +- .github/workflows/wheels.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/package-checks.yml b/.github/workflows/package-checks.yml index 6748832903e30..331af6e05b650 100644 --- a/.github/workflows/package-checks.yml +++ b/.github/workflows/package-checks.yml @@ -67,7 +67,7 @@ jobs: fetch-depth: 0 - name: Set up Python - uses: mamba-org/setup-micromamba@v1 + uses: mamba-org/setup-micromamba@v2 with: environment-name: recipe-test create-args: >- diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 2aaec8c9b56b0..de59a454c827c 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -165,7 +165,7 @@ jobs: CIBW_PLATFORM: ${{ matrix.buildplat[1] == 'pyodide_wasm32' && 'pyodide' || 'auto' }} - name: Set up Python - uses: mamba-org/setup-micromamba@v1 + uses: mamba-org/setup-micromamba@v2 with: environment-name: wheel-env # Use a fixed Python, since we might have an unreleased Python not From e5dc0646bb4b945cec03cc328ac0989cfe0fa60a Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Mon, 7 Oct 2024 22:44:19 +0530 Subject: [PATCH 02/20] DOC: fix RT03,SA01,ES01 for pandas.io.stata.StataReader.value_labels (#59991) --- ci/code_checks.sh | 1 - pandas/io/stata.py | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 5487dc19338da..102abf4be187c 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -174,7 +174,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.errors.ValueLabelTypeMismatch SA01" \ -i "pandas.infer_freq SA01" \ -i "pandas.io.json.build_table_schema PR07,RT03,SA01" \ - -i "pandas.io.stata.StataReader.value_labels RT03,SA01" \ -i "pandas.io.stata.StataReader.variable_labels RT03,SA01" \ -i "pandas.io.stata.StataWriter.write_file SA01" \ -i "pandas.json_normalize RT03,SA01" \ diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 6b988d8fed6bf..f1d289726c9c8 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -2076,9 +2076,19 @@ def value_labels(self) -> dict[str, dict[int, str]]: """ Return a nested dict associating each variable name to its value and label. + This method retrieves the value labels from a Stata file. Value labels are + mappings between the coded values and their corresponding descriptive labels + in a Stata dataset. + Returns ------- dict + A python dictionary. + + See Also + -------- + read_stata : Read Stata file into DataFrame. + DataFrame.to_stata : Export DataFrame object to Stata dta format. Examples -------- From b3d0b9622bcd5bdf9733100407bd8b2695bc9af6 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Mon, 7 Oct 2024 22:45:35 +0530 Subject: [PATCH 03/20] DOC: fix RT03,SA01,ES01 for pandas.plotting.lag_plot (#59990) --- ci/code_checks.sh | 1 - pandas/plotting/_misc.py | 11 +++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 102abf4be187c..6a1b613eccb8b 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -178,7 +178,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.io.stata.StataWriter.write_file SA01" \ -i "pandas.json_normalize RT03,SA01" \ -i "pandas.plotting.andrews_curves RT03,SA01" \ - -i "pandas.plotting.lag_plot RT03,SA01" \ -i "pandas.plotting.scatter_matrix PR07,SA01" \ -i "pandas.set_eng_float_format RT03,SA01" \ -i "pandas.tseries.offsets.BDay PR02,SA01" \ diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 03701f8778065..81940613dd2b0 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -549,6 +549,10 @@ def lag_plot(series: Series, lag: int = 1, ax: Axes | None = None, **kwds) -> Ax """ Lag plot for time series. + A lag plot is a scatter plot of a time series against a lag of itself. It helps + in visualizing the temporal dependence between observations by plotting the values + at time `t` on the x-axis and the values at time `t + lag` on the y-axis. + Parameters ---------- series : Series @@ -563,6 +567,13 @@ def lag_plot(series: Series, lag: int = 1, ax: Axes | None = None, **kwds) -> Ax Returns ------- matplotlib.axes.Axes + The matplotlib Axes object containing the lag plot. + + See Also + -------- + plotting.autocorrelation_plot : Autocorrelation plot for time series. + matplotlib.pyplot.scatter : A scatter plot of y vs. x with varying marker size + and/or color in Matplotlib. Examples -------- From 02267e55586c33a4724dd5e9dbaecfe12e3aa8b4 Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Mon, 7 Oct 2024 19:22:27 +0200 Subject: [PATCH 04/20] Typing: Added missing methods to `NaTType` stub (#59995) added missing methods to NaTType stub --- pandas/_libs/tslibs/nattype.pyi | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/_libs/tslibs/nattype.pyi b/pandas/_libs/tslibs/nattype.pyi index f49e894a0bfec..fa1577f033fff 100644 --- a/pandas/_libs/tslibs/nattype.pyi +++ b/pandas/_libs/tslibs/nattype.pyi @@ -9,6 +9,7 @@ from typing import ( Literal, NoReturn, TypeAlias, + overload, ) import numpy as np @@ -159,15 +160,31 @@ class NaTType: # inject Period properties @property def qyear(self) -> float: ... + # comparisons def __eq__(self, other: object) -> bool: ... def __ne__(self, other: object) -> bool: ... __lt__: _NatComparison __le__: _NatComparison __gt__: _NatComparison __ge__: _NatComparison + # unary operators + def __pos__(self) -> Self: ... + def __neg__(self) -> Self: ... + # binary operators def __sub__(self, other: Self | timedelta | datetime) -> Self: ... def __rsub__(self, other: Self | timedelta | datetime) -> Self: ... def __add__(self, other: Self | timedelta | datetime) -> Self: ... def __radd__(self, other: Self | timedelta | datetime) -> Self: ... + def __mul__(self, other: float) -> Self: ... # analogous to timedelta + def __rmul__(self, other: float) -> Self: ... + @overload # analogous to timedelta + def __truediv__(self, other: Self | timedelta) -> float: ... # Literal[NaN] + @overload + def __truediv__(self, other: float) -> Self: ... + @overload # analogous to timedelta + def __floordiv__(self, other: Self | timedelta) -> float: ... # Literal[NaN] + @overload + def __floordiv__(self, other: float) -> Self: ... + # other def __hash__(self) -> int: ... def as_unit(self, unit: str, round_ok: bool = ...) -> NaTType: ... From 37c31afa1be8b51af545a2dc3354acaf42a9c95e Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Mon, 7 Oct 2024 18:30:40 -0400 Subject: [PATCH 05/20] REGR: groupby.value_counts with all NA values (#59999) * REGR: groupby.value_counts with all NA values * Better implementation --- pandas/core/groupby/ops.py | 2 +- .../groupby/methods/test_value_counts.py | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index a82e77140d274..b32119a2ddbde 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -867,7 +867,7 @@ def _ob_index_and_ids( names=names, verify_integrity=False, ) - if not consistent_sorting: + if not consistent_sorting and len(ob_index) > 0: # Sort by the levels where the corresponding sort argument is True n_levels = len(sorts) drop_levels = [ diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index 8f3022fbe551c..8ca6593a19f20 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -1219,3 +1219,25 @@ def test_value_counts_sort_categorical(sort, vc_sort, normalize): expected = expected.take(taker) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("groupby_sort", [True, False]) +def test_value_counts_all_na(sort, dropna, groupby_sort): + # GH#59989 + df = DataFrame({"a": [2, 1, 1], "b": np.nan}) + gb = df.groupby("a", sort=groupby_sort) + result = gb.value_counts(sort=sort, dropna=dropna) + + kwargs = {"levels": [[1, 2], [np.nan]], "names": ["a", "b"]} + if dropna: + data = [] + index = MultiIndex(codes=[[], []], **kwargs) + elif not groupby_sort and not sort: + data = [1, 2] + index = MultiIndex(codes=[[1, 0], [0, 0]], **kwargs) + else: + data = [2, 1] + index = MultiIndex(codes=[[0, 1], [0, 0]], **kwargs) + expected = Series(data, index=index, dtype="int64", name="count") + + tm.assert_series_equal(result, expected) From 5126dcaf88167ff869db874be40a520bb86a27ed Mon Sep 17 00:00:00 2001 From: Deepak Saldanha Date: Tue, 8 Oct 2024 08:05:04 +0530 Subject: [PATCH 06/20] Doc: Update docstring for `dummy_na` parameter (#60000) * update docstring for dummy_na parameter * Update pandas/core/reshape/encoding.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --------- Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/core/reshape/encoding.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index c397c1c2566a5..33ff182f5baee 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -68,7 +68,8 @@ def get_dummies( If appending prefix, separator/delimiter to use. Or pass a list or dictionary as with `prefix`. dummy_na : bool, default False - Add a column to indicate NaNs, if False NaNs are ignored. + If True, a NaN indicator column will be added even if no NaN values are present. + If False, NA values are encoded as all zero. columns : list-like, default None Column names in the DataFrame to be encoded. If `columns` is None then all the columns with From 5ea5bd95d5bb93434fb5f1686f50b176c46dbac8 Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Tue, 8 Oct 2024 20:36:41 +0200 Subject: [PATCH 07/20] Typing: More precise NaT stub (#60002) * more precise NaT stub * ruff format * updated == and != to return literal --- pandas/_libs/tslibs/nattype.pyi | 40 +++++++++++++++------------------ 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyi b/pandas/_libs/tslibs/nattype.pyi index fa1577f033fff..d3b10fbe79cb9 100644 --- a/pandas/_libs/tslibs/nattype.pyi +++ b/pandas/_libs/tslibs/nattype.pyi @@ -25,12 +25,8 @@ NaT: NaTType iNaT: int nat_strings: set[str] -_NaTComparisonTypes: TypeAlias = ( - datetime | timedelta | Period | np.datetime64 | np.timedelta64 -) - -class _NatComparison: - def __call__(self, other: _NaTComparisonTypes) -> bool: ... +_TimeLike: TypeAlias = datetime | timedelta | Period | np.datetime64 | np.timedelta64 +_TimeDelta: TypeAlias = timedelta | np.timedelta64 class NaTType: _value: np.int64 @@ -161,30 +157,30 @@ class NaTType: @property def qyear(self) -> float: ... # comparisons - def __eq__(self, other: object) -> bool: ... - def __ne__(self, other: object) -> bool: ... - __lt__: _NatComparison - __le__: _NatComparison - __gt__: _NatComparison - __ge__: _NatComparison + def __eq__(self, other: object, /) -> Literal[False]: ... + def __ne__(self, other: object, /) -> Literal[True]: ... + def __lt__(self, other: Self | _TimeLike, /) -> Literal[False]: ... + def __le__(self, other: Self | _TimeLike, /) -> Literal[False]: ... + def __gt__(self, other: Self | _TimeLike, /) -> Literal[False]: ... + def __ge__(self, other: Self | _TimeLike, /) -> Literal[False]: ... # unary operators def __pos__(self) -> Self: ... def __neg__(self) -> Self: ... # binary operators - def __sub__(self, other: Self | timedelta | datetime) -> Self: ... - def __rsub__(self, other: Self | timedelta | datetime) -> Self: ... - def __add__(self, other: Self | timedelta | datetime) -> Self: ... - def __radd__(self, other: Self | timedelta | datetime) -> Self: ... - def __mul__(self, other: float) -> Self: ... # analogous to timedelta - def __rmul__(self, other: float) -> Self: ... + def __sub__(self, other: Self | _TimeLike, /) -> Self: ... + def __rsub__(self, other: Self | _TimeLike, /) -> Self: ... + def __add__(self, other: Self | _TimeLike, /) -> Self: ... + def __radd__(self, other: Self | _TimeLike, /) -> Self: ... + def __mul__(self, other: float, /) -> Self: ... # analogous to timedelta + def __rmul__(self, other: float, /) -> Self: ... @overload # analogous to timedelta - def __truediv__(self, other: Self | timedelta) -> float: ... # Literal[NaN] + def __truediv__(self, other: Self | _TimeDelta, /) -> float: ... # Literal[NaN] @overload - def __truediv__(self, other: float) -> Self: ... + def __truediv__(self, other: float, /) -> Self: ... @overload # analogous to timedelta - def __floordiv__(self, other: Self | timedelta) -> float: ... # Literal[NaN] + def __floordiv__(self, other: Self | _TimeDelta, /) -> float: ... # Literal[NaN] @overload - def __floordiv__(self, other: float) -> Self: ... + def __floordiv__(self, other: float, /) -> Self: ... # other def __hash__(self) -> int: ... def as_unit(self, unit: str, round_ok: bool = ...) -> NaTType: ... From f94860e1ce75b57db9eda2c37154c5b22b661121 Mon Sep 17 00:00:00 2001 From: Deepak Saldanha Date: Wed, 9 Oct 2024 00:11:39 +0530 Subject: [PATCH 08/20] DOC: Refactor _create_delegator_method using functools (#59878) * add tag dt.to_timestamp, series.rst * add doc strings for dt.to_timestamp * update datetimes.py * refactor _create_delegator_method to use functools wrap * changes to accessor.py * remove from code_checks.sh * update code_checks.sh * update code_checks.sh * rewrite functools, adjust unit tests * update change log * remove dup entry * update code_checks.sh * update * revert all dt related changes * update series.rst * update imports * format use of functools import --- ci/code_checks.sh | 20 -------------------- pandas/core/accessor.py | 7 ++++--- pandas/core/arrays/categorical.py | 14 ++++++++++++-- 3 files changed, 16 insertions(+), 25 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 6a1b613eccb8b..6fb675069e81d 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -73,27 +73,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Period.freq GL08" \ -i "pandas.Period.ordinal GL08" \ -i "pandas.RangeIndex.from_range PR01,SA01" \ - -i "pandas.Series.cat.add_categories PR01,PR02" \ - -i "pandas.Series.cat.as_ordered PR01" \ - -i "pandas.Series.cat.as_unordered PR01" \ - -i "pandas.Series.cat.remove_categories PR01,PR02" \ - -i "pandas.Series.cat.remove_unused_categories PR01" \ - -i "pandas.Series.cat.rename_categories PR01,PR02" \ - -i "pandas.Series.cat.reorder_categories PR01,PR02" \ - -i "pandas.Series.cat.set_categories PR01,PR02" \ - -i "pandas.Series.dt.as_unit PR01,PR02" \ - -i "pandas.Series.dt.ceil PR01,PR02" \ - -i "pandas.Series.dt.day_name PR01,PR02" \ - -i "pandas.Series.dt.floor PR01,PR02" \ -i "pandas.Series.dt.freq GL08" \ - -i "pandas.Series.dt.month_name PR01,PR02" \ - -i "pandas.Series.dt.normalize PR01" \ - -i "pandas.Series.dt.round PR01,PR02" \ - -i "pandas.Series.dt.strftime PR01,PR02" \ - -i "pandas.Series.dt.to_period PR01,PR02" \ - -i "pandas.Series.dt.total_seconds PR01" \ - -i "pandas.Series.dt.tz_convert PR01,PR02" \ - -i "pandas.Series.dt.tz_localize PR01,PR02" \ -i "pandas.Series.dt.unit GL08" \ -i "pandas.Series.pad PR01,SA01" \ -i "pandas.Timedelta.max PR02" \ diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index d8463fda34caa..78684eacf2d66 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -7,6 +7,7 @@ from __future__ import annotations +import functools from typing import ( TYPE_CHECKING, final, @@ -117,12 +118,12 @@ def _setter(self, new_values): ) def _create_delegator_method(name: str): + method = getattr(delegate, accessor_mapping(name)) + + @functools.wraps(method) def f(self, *args, **kwargs): return self._delegate_method(name, *args, **kwargs) - f.__name__ = name - f.__doc__ = getattr(delegate, accessor_mapping(name)).__doc__ - return f for name in accessors: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index a69e197df851d..0484ef89f61c2 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1155,6 +1155,12 @@ def rename_categories(self, new_categories) -> Self: """ Rename categories. + This method is commonly used to re-label or adjust the + category names in categorical data without changing the + underlying data. It is useful in situations where you want + to modify the labels used for clarity, consistency, + or readability. + Parameters ---------- new_categories : list-like, dict-like or callable @@ -1371,8 +1377,8 @@ def remove_categories(self, removals) -> Self: """ Remove the specified categories. - `removals` must be included in the old categories. Values which were in - the removed categories will be set to NaN + The ``removals`` argument must be a subset of the current categories. + Any values that were part of the removed categories will be set to NaN. Parameters ---------- @@ -1431,6 +1437,10 @@ def remove_unused_categories(self) -> Self: """ Remove categories which are not used. + This method is useful when working with datasets + that undergo dynamic changes where categories may no longer be + relevant, allowing to maintain a clean, efficient data structure. + Returns ------- Categorical From b975191afe1401f13ab5e15d3df83b5d95dffe75 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 8 Oct 2024 22:00:43 +0200 Subject: [PATCH 09/20] Fix Styler docstring (#60001) * Fix Styler docstring * Remove blankspaces --- pandas/io/formats/style.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 6e5ae09485951..eb6773310da69 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -222,6 +222,7 @@ class Styler(StylerRenderer): * ``level`` where `k` is the level in a MultiIndex * Column label cells include + * ``col_heading`` * ``col`` where `n` is the numeric position of the column * ``level`` where `k` is the level in a MultiIndex @@ -231,7 +232,7 @@ class Styler(StylerRenderer): * Trimmed cells include ``col_trim`` or ``row_trim``. Any, or all, or these classes can be renamed by using the ``css_class_names`` - argument in ``Styler.set_table_classes``, giving a value such as + argument in ``Styler.set_table_styles``, giving a value such as *{"row": "MY_ROW_CLASS", "col_trim": "", "row_trim": ""}*. Examples From a0f9140b942d9f596889cd26ac395551dcdf3afb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 8 Oct 2024 14:51:49 -0700 Subject: [PATCH 10/20] [pre-commit.ci] pre-commit autoupdate (#59998) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/astral-sh/ruff-pre-commit: v0.5.0 → v0.6.9](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.0...v0.6.9) - [github.com/jendrikseipp/vulture: v2.11 → v2.13](https://github.com/jendrikseipp/vulture/compare/v2.11...v2.13) - [github.com/pre-commit/pre-commit-hooks: v4.6.0 → v5.0.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.6.0...v5.0.0) - [github.com/asottile/pyupgrade: v3.16.0 → v3.17.0](https://github.com/asottile/pyupgrade/compare/v3.16.0...v3.17.0) - [github.com/sphinx-contrib/sphinx-lint: v0.9.1 → v1.0.0](https://github.com/sphinx-contrib/sphinx-lint/compare/v0.9.1...v1.0.0) - [github.com/pre-commit/mirrors-clang-format: v18.1.8 → v19.1.1](https://github.com/pre-commit/mirrors-clang-format/compare/v18.1.8...v19.1.1) * Update .pre-commit-config.yaml * fix style.ipynb, ignore some pylint * pyupgrade * Revert "pyupgrade" This reverts commit b539c71009ff15769c501cf170ed9894a49ddcfb. * don't bump pyupgrade * Typo in random call * Delete hidden cell * Undo max/min rule from ruff --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- .pre-commit-config.yaml | 10 +- doc/source/user_guide/style.ipynb | 689 ++++++++++++++------------ pandas/core/arrays/string_.py | 2 +- pandas/tests/indexes/test_old_base.py | 2 +- pyproject.toml | 9 +- 5 files changed, 396 insertions(+), 316 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f6717dd503c9b..7c9ebf7d94173 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ ci: skip: [pyright, mypy] repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.5.0 + rev: v0.6.9 hooks: - id: ruff args: [--exit-non-zero-on-fix] @@ -34,7 +34,7 @@ repos: - id: ruff-format exclude: ^scripts|^pandas/tests/frame/test_query_eval.py - repo: https://github.com/jendrikseipp/vulture - rev: 'v2.11' + rev: 'v2.13' hooks: - id: vulture entry: python scripts/run_vulture.py @@ -52,7 +52,7 @@ repos: - id: cython-lint - id: double-quote-cython-strings - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v5.0.0 hooks: - id: check-case-conflict - id: check-toml @@ -90,12 +90,12 @@ repos: types: [text] # overwrite types: [rst] types_or: [python, rst] - repo: https://github.com/sphinx-contrib/sphinx-lint - rev: v0.9.1 + rev: v1.0.0 hooks: - id: sphinx-lint args: ["--enable", "all", "--disable", "line-too-long"] - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v18.1.8 + rev: v19.1.1 hooks: - id: clang-format files: ^pandas/_libs/src|^pandas/_libs/include diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index daecfce6ecebc..abb7181fc8d72 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -38,19 +38,6 @@ "[concatfunc]: ../reference/api/pandas.io.formats.style.Styler.concat.rst" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "nbsphinx": "hidden" - }, - "outputs": [], - "source": [ - "import matplotlib.pyplot\n", - "# We have this here to trigger matplotlib's font cache stuff.\n", - "# This cell is hidden from the output" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -78,17 +65,13 @@ "source": [ "import pandas as pd\n", "import numpy as np\n", - "import matplotlib as mpl\n", "\n", - "df = pd.DataFrame({\n", - " \"strings\": [\"Adam\", \"Mike\"],\n", - " \"ints\": [1, 3],\n", - " \"floats\": [1.123, 1000.23]\n", - "})\n", - "df.style \\\n", - " .format(precision=3, thousands=\".\", decimal=\",\") \\\n", - " .format_index(str.upper, axis=1) \\\n", - " .relabel_index([\"row 1\", \"row 2\"], axis=0)" + "df = pd.DataFrame(\n", + " {\"strings\": [\"Adam\", \"Mike\"], \"ints\": [1, 3], \"floats\": [1.123, 1000.23]}\n", + ")\n", + "df.style.format(precision=3, thousands=\".\", decimal=\",\").format_index(\n", + " str.upper, axis=1\n", + ").relabel_index([\"row 1\", \"row 2\"], axis=0)" ] }, { @@ -104,17 +87,21 @@ "metadata": {}, "outputs": [], "source": [ - "weather_df = pd.DataFrame(np.random.rand(10,2)*5, \n", - " index=pd.date_range(start=\"2021-01-01\", periods=10),\n", - " columns=[\"Tokyo\", \"Beijing\"])\n", + "weather_df = pd.DataFrame(\n", + " np.random.default_rng().random((10, 2)) * 5,\n", + " index=pd.date_range(start=\"2021-01-01\", periods=10),\n", + " columns=[\"Tokyo\", \"Beijing\"],\n", + ")\n", + "\n", "\n", - "def rain_condition(v): \n", + "def rain_condition(v):\n", " if v < 1.75:\n", " return \"Dry\"\n", " elif v < 2.75:\n", " return \"Rain\"\n", " return \"Heavy Rain\"\n", "\n", + "\n", "def make_pretty(styler):\n", " styler.set_caption(\"Weather Conditions\")\n", " styler.format(rain_condition)\n", @@ -122,6 +109,7 @@ " styler.background_gradient(axis=None, vmin=1, vmax=5, cmap=\"YlGnBu\")\n", " return styler\n", "\n", + "\n", "weather_df" ] }, @@ -157,10 +145,8 @@ "metadata": {}, "outputs": [], "source": [ - "df = pd.DataFrame(np.random.randn(5, 5))\n", - "df.style \\\n", - " .hide(subset=[0, 2, 4], axis=0) \\\n", - " .hide(subset=[0, 2, 4], axis=1)" + "df = pd.DataFrame(np.random.default_rng().standard_normal((5, 5)))\n", + "df.style.hide(subset=[0, 2, 4], axis=0).hide(subset=[0, 2, 4], axis=1)" ] }, { @@ -177,9 +163,9 @@ "outputs": [], "source": [ "show = [0, 2, 4]\n", - "df.style \\\n", - " .hide([row for row in df.index if row not in show], axis=0) \\\n", - " .hide([col for col in df.columns if col not in show], axis=1)" + "df.style.hide([row for row in df.index if row not in show], axis=0).hide(\n", + " [col for col in df.columns if col not in show], axis=1\n", + ")" ] }, { @@ -199,9 +185,9 @@ "metadata": {}, "outputs": [], "source": [ - "summary_styler = df.agg([\"sum\", \"mean\"]).style \\\n", - " .format(precision=3) \\\n", - " .relabel_index([\"Sum\", \"Average\"])\n", + "summary_styler = (\n", + " df.agg([\"sum\", \"mean\"]).style.format(precision=3).relabel_index([\"Sum\", \"Average\"])\n", + ")\n", "df.style.format(precision=1).concat(summary_styler)" ] }, @@ -227,9 +213,16 @@ "metadata": {}, "outputs": [], "source": [ - "df = pd.DataFrame([[38.0, 2.0, 18.0, 22.0, 21, np.nan],[19, 439, 6, 452, 226,232]], \n", - " index=pd.Index(['Tumour (Positive)', 'Non-Tumour (Negative)'], name='Actual Label:'), \n", - " columns=pd.MultiIndex.from_product([['Decision Tree', 'Regression', 'Random'],['Tumour', 'Non-Tumour']], names=['Model:', 'Predicted:']))\n", + "idx = pd.Index([\"Tumour (Positive)\", \"Non-Tumour (Negative)\"], name=\"Actual Label:\")\n", + "cols = pd.MultiIndex.from_product(\n", + " [[\"Decision Tree\", \"Regression\", \"Random\"], [\"Tumour\", \"Non-Tumour\"]],\n", + " names=[\"Model:\", \"Predicted:\"],\n", + ")\n", + "df = pd.DataFrame(\n", + " [[38.0, 2.0, 18.0, 22.0, 21, np.nan], [19, 439, 6, 452, 226, 232]],\n", + " index=idx,\n", + " columns=cols,\n", + ")\n", "df.style" ] }, @@ -242,63 +235,68 @@ "outputs": [], "source": [ "# Hidden cell to just create the below example: code is covered throughout the guide.\n", - "s = df.style\\\n", - " .hide([('Random', 'Tumour'), ('Random', 'Non-Tumour')], axis='columns')\\\n", - " .format('{:.0f}')\\\n", - " .set_table_styles([{\n", - " 'selector': '',\n", - " 'props': 'border-collapse: separate;'\n", - " },{\n", - " 'selector': 'caption',\n", - " 'props': 'caption-side: bottom; font-size:1.3em;'\n", - " },{\n", - " 'selector': '.index_name',\n", - " 'props': 'font-style: italic; color: darkgrey; font-weight:normal;'\n", - " },{\n", - " 'selector': 'th:not(.index_name)',\n", - " 'props': 'background-color: #000066; color: white;'\n", - " },{\n", - " 'selector': 'th.col_heading',\n", - " 'props': 'text-align: center;'\n", - " },{\n", - " 'selector': 'th.col_heading.level0',\n", - " 'props': 'font-size: 1.5em;'\n", - " },{\n", - " 'selector': 'th.col2',\n", - " 'props': 'border-left: 1px solid white;'\n", - " },{\n", - " 'selector': '.col2',\n", - " 'props': 'border-left: 1px solid #000066;'\n", - " },{\n", - " 'selector': 'td',\n", - " 'props': 'text-align: center; font-weight:bold;'\n", - " },{\n", - " 'selector': '.true',\n", - " 'props': 'background-color: #e6ffe6;'\n", - " },{\n", - " 'selector': '.false',\n", - " 'props': 'background-color: #ffe6e6;'\n", - " },{\n", - " 'selector': '.border-red',\n", - " 'props': 'border: 2px dashed red;'\n", - " },{\n", - " 'selector': '.border-green',\n", - " 'props': 'border: 2px dashed green;'\n", - " },{\n", - " 'selector': 'td:hover',\n", - " 'props': 'background-color: #ffffb3;'\n", - " }])\\\n", - " .set_td_classes(pd.DataFrame([['true border-green', 'false', 'true', 'false border-red', '', ''],\n", - " ['false', 'true', 'false', 'true', '', '']], \n", - " index=df.index, columns=df.columns))\\\n", - " .set_caption(\"Confusion matrix for multiple cancer prediction models.\")\\\n", - " .set_tooltips(pd.DataFrame([['This model has a very strong true positive rate', '', '', \"This model's total number of false negatives is too high\", '', ''],\n", - " ['', '', '', '', '', '']], \n", - " index=df.index, columns=df.columns),\n", - " css_class='pd-tt', props=\n", - " 'visibility: hidden; position: absolute; z-index: 1; border: 1px solid #000066;'\n", - " 'background-color: white; color: #000066; font-size: 0.8em;' \n", - " 'transform: translate(0px, -24px); padding: 0.6em; border-radius: 0.5em;')\n" + "s = (\n", + " df.style.hide([(\"Random\", \"Tumour\"), (\"Random\", \"Non-Tumour\")], axis=\"columns\")\n", + " .format(\"{:.0f}\")\n", + " .set_table_styles(\n", + " [\n", + " {\"selector\": \"\", \"props\": \"border-collapse: separate;\"},\n", + " {\"selector\": \"caption\", \"props\": \"caption-side: bottom; font-size:1.3em;\"},\n", + " {\n", + " \"selector\": \".index_name\",\n", + " \"props\": \"font-style: italic; color: darkgrey; font-weight:normal;\",\n", + " },\n", + " {\n", + " \"selector\": \"th:not(.index_name)\",\n", + " \"props\": \"background-color: #000066; color: white;\",\n", + " },\n", + " {\"selector\": \"th.col_heading\", \"props\": \"text-align: center;\"},\n", + " {\"selector\": \"th.col_heading.level0\", \"props\": \"font-size: 1.5em;\"},\n", + " {\"selector\": \"th.col2\", \"props\": \"border-left: 1px solid white;\"},\n", + " {\"selector\": \".col2\", \"props\": \"border-left: 1px solid #000066;\"},\n", + " {\"selector\": \"td\", \"props\": \"text-align: center; font-weight:bold;\"},\n", + " {\"selector\": \".true\", \"props\": \"background-color: #e6ffe6;\"},\n", + " {\"selector\": \".false\", \"props\": \"background-color: #ffe6e6;\"},\n", + " {\"selector\": \".border-red\", \"props\": \"border: 2px dashed red;\"},\n", + " {\"selector\": \".border-green\", \"props\": \"border: 2px dashed green;\"},\n", + " {\"selector\": \"td:hover\", \"props\": \"background-color: #ffffb3;\"},\n", + " ]\n", + " )\n", + " .set_td_classes(\n", + " pd.DataFrame(\n", + " [\n", + " [\"true border-green\", \"false\", \"true\", \"false border-red\", \"\", \"\"],\n", + " [\"false\", \"true\", \"false\", \"true\", \"\", \"\"],\n", + " ],\n", + " index=df.index,\n", + " columns=df.columns,\n", + " )\n", + " )\n", + " .set_caption(\"Confusion matrix for multiple cancer prediction models.\")\n", + " .set_tooltips(\n", + " pd.DataFrame(\n", + " [\n", + " [\n", + " \"This model has a very strong true positive rate\",\n", + " \"\",\n", + " \"\",\n", + " \"This model's total number of false negatives is too high\",\n", + " \"\",\n", + " \"\",\n", + " ],\n", + " [\"\", \"\", \"\", \"\", \"\", \"\"],\n", + " ],\n", + " index=df.index,\n", + " columns=df.columns,\n", + " ),\n", + " css_class=\"pd-tt\",\n", + " props=\"visibility: hidden; \"\n", + " \"position: absolute; z-index: 1; \"\n", + " \"border: 1px solid #000066;\"\n", + " \"background-color: white; color: #000066; font-size: 0.8em;\"\n", + " \"transform: translate(0px, -24px); padding: 0.6em; border-radius: 0.5em;\",\n", + " )\n", + ")" ] }, { @@ -325,7 +323,9 @@ "metadata": {}, "outputs": [], "source": [ - "s = df.style.format('{:.0f}').hide([('Random', 'Tumour'), ('Random', 'Non-Tumour')], axis=\"columns\")\n", + "s = df.style.format(\"{:.0f}\").hide(\n", + " [(\"Random\", \"Tumour\"), (\"Random\", \"Non-Tumour\")], axis=\"columns\"\n", + ")\n", "s" ] }, @@ -337,8 +337,8 @@ }, "outputs": [], "source": [ - "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", - "s.set_uuid('after_hide')" + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting\n", + "s.set_uuid(\"after_hide\")" ] }, { @@ -395,16 +395,16 @@ "outputs": [], "source": [ "cell_hover = { # for row hover use instead of \n", - " 'selector': 'td:hover',\n", - " 'props': [('background-color', '#ffffb3')]\n", + " \"selector\": \"td:hover\",\n", + " \"props\": [(\"background-color\", \"#ffffb3\")],\n", "}\n", "index_names = {\n", - " 'selector': '.index_name',\n", - " 'props': 'font-style: italic; color: darkgrey; font-weight:normal;'\n", + " \"selector\": \".index_name\",\n", + " \"props\": \"font-style: italic; color: darkgrey; font-weight:normal;\",\n", "}\n", "headers = {\n", - " 'selector': 'th:not(.index_name)',\n", - " 'props': 'background-color: #000066; color: white;'\n", + " \"selector\": \"th:not(.index_name)\",\n", + " \"props\": \"background-color: #000066; color: white;\",\n", "}\n", "s.set_table_styles([cell_hover, index_names, headers])" ] @@ -417,8 +417,8 @@ }, "outputs": [], "source": [ - "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", - "s.set_uuid('after_tab_styles1')" + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting\n", + "s.set_uuid(\"after_tab_styles1\")" ] }, { @@ -434,11 +434,14 @@ "metadata": {}, "outputs": [], "source": [ - "s.set_table_styles([\n", - " {'selector': 'th.col_heading', 'props': 'text-align: center;'},\n", - " {'selector': 'th.col_heading.level0', 'props': 'font-size: 1.5em;'},\n", - " {'selector': 'td', 'props': 'text-align: center; font-weight: bold;'},\n", - "], overwrite=False)" + "s.set_table_styles(\n", + " [\n", + " {\"selector\": \"th.col_heading\", \"props\": \"text-align: center;\"},\n", + " {\"selector\": \"th.col_heading.level0\", \"props\": \"font-size: 1.5em;\"},\n", + " {\"selector\": \"td\", \"props\": \"text-align: center; font-weight: bold;\"},\n", + " ],\n", + " overwrite=False,\n", + ")" ] }, { @@ -449,8 +452,8 @@ }, "outputs": [], "source": [ - "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", - "s.set_uuid('after_tab_styles2')" + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting\n", + "s.set_uuid(\"after_tab_styles2\")" ] }, { @@ -468,10 +471,16 @@ "metadata": {}, "outputs": [], "source": [ - "s.set_table_styles({\n", - " ('Regression', 'Tumour'): [{'selector': 'th', 'props': 'border-left: 1px solid white'},\n", - " {'selector': 'td', 'props': 'border-left: 1px solid #000066'}]\n", - "}, overwrite=False, axis=0)" + "s.set_table_styles(\n", + " {\n", + " (\"Regression\", \"Tumour\"): [\n", + " {\"selector\": \"th\", \"props\": \"border-left: 1px solid white\"},\n", + " {\"selector\": \"td\", \"props\": \"border-left: 1px solid #000066\"},\n", + " ]\n", + " },\n", + " overwrite=False,\n", + " axis=0,\n", + ")" ] }, { @@ -482,8 +491,8 @@ }, "outputs": [], "source": [ - "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting \n", - "s.set_uuid('xyz01')" + "# Hidden cell to avoid CSS clashes and latter code upcoding previous formatting\n", + "s.set_uuid(\"xyz01\")" ] }, { @@ -508,7 +517,7 @@ "outputs": [], "source": [ "out = s.set_table_attributes('class=\"my-table-cls\"').to_html()\n", - "print(out[out.find('