Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into Solution-for-issue-#…
Browse files Browse the repository at this point in the history
…60044-by-ZKaoChi
  • Loading branch information
ZKaoChi committed Oct 16, 2024
2 parents aeb73af + 2a10e04 commit 5910018
Show file tree
Hide file tree
Showing 58 changed files with 410 additions and 258 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ jobs:
run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"

- name: Build wheels
uses: pypa/cibuildwheel@v2.20.0
uses: pypa/cibuildwheel@v2.21.0
with:
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
env:
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ repos:
hooks:
- id: isort
- repo: https://github.com/asottile/pyupgrade
rev: v3.16.0
rev: v3.17.0
hooks:
- id: pyupgrade
args: [--py310-plus]
Expand Down Expand Up @@ -112,7 +112,7 @@ repos:
types: [python]
stages: [manual]
additional_dependencies: &pyright_dependencies
- [email protected].352
- [email protected].383
- id: pyright
# note: assumes python env is setup and activated
name: pyright reportGeneralTypeIssues
Expand Down
12 changes: 6 additions & 6 deletions doc/source/whatsnew/v0.12.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,9 @@ API changes
to be inserted if ``True``, default is ``False`` (same as prior to 0.12) (:issue:`3679`)
- Implement ``__nonzero__`` for ``NDFrame`` objects (:issue:`3691`, :issue:`3696`)

- IO api
- IO API

- added top-level function ``read_excel`` to replace the following,
- Added top-level function ``read_excel`` to replace the following,
The original API is deprecated and will be removed in a future version

.. code-block:: python
Expand All @@ -153,7 +153,7 @@ API changes
pd.read_excel("path_to_file.xls", "Sheet1", index_col=None, na_values=["NA"])
- added top-level function ``read_sql`` that is equivalent to the following
- Added top-level function ``read_sql`` that is equivalent to the following

.. code-block:: python
Expand Down Expand Up @@ -482,11 +482,11 @@ Bug fixes

- ``HDFStore``

- will retain index attributes (freq,tz,name) on recreation (:issue:`3499`)
- will warn with a ``AttributeConflictWarning`` if you are attempting to append
- Will retain index attributes (freq,tz,name) on recreation (:issue:`3499`)
- Will warn with a ``AttributeConflictWarning`` if you are attempting to append
an index with a different frequency than the existing, or attempting
to append an index with a different name than the existing
- support datelike columns with a timezone as data_columns (:issue:`2852`)
- Support datelike columns with a timezone as data_columns (:issue:`2852`)

- Non-unique index support clarified (:issue:`3468`).

Expand Down
4 changes: 2 additions & 2 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ Enhancements

.. _whatsnew_300.enhancements.enhancement1:

enhancement1
Enhancement1
^^^^^^^^^^^^

.. _whatsnew_300.enhancements.enhancement2:

enhancement2
Enhancement2
^^^^^^^^^^^^

.. _whatsnew_300.enhancements.other:
Expand Down
6 changes: 3 additions & 3 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,10 @@ dependencies:
- cxx-compiler

# code checks
- flake8=6.1.0 # run in subprocess over docstring examples
- mypy=1.9.0 # pre-commit uses locally installed mypy
- flake8=7.1.0 # run in subprocess over docstring examples
- mypy=1.11.2 # pre-commit uses locally installed mypy
- tokenize-rt # scripts/check_for_inconsistent_pandas_namespace.py
- pre-commit>=3.6.0
- pre-commit>=4.0.1

# documentation
- gitpython # obtain contributors from git for whatsnew
Expand Down
4 changes: 2 additions & 2 deletions pandas/_config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ def __dir__(self) -> list[str]:


@contextmanager
def option_context(*args) -> Generator[None, None, None]:
def option_context(*args) -> Generator[None]:
"""
Context manager to temporarily set options in a ``with`` statement.
Expand Down Expand Up @@ -718,7 +718,7 @@ def _build_option_description(k: str) -> str:


@contextmanager
def config_prefix(prefix: str) -> Generator[None, None, None]:
def config_prefix(prefix: str) -> Generator[None]:
"""
contextmanager for multiple invocations of API with a common prefix
Expand Down
2 changes: 1 addition & 1 deletion pandas/_config/localization.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
@contextmanager
def set_locale(
new_locale: str | tuple[str, str], lc_var: int = locale.LC_ALL
) -> Generator[str | tuple[str, str], None, None]:
) -> Generator[str | tuple[str, str]]:
"""
Context manager for temporarily setting a locale.
Expand Down
2 changes: 1 addition & 1 deletion pandas/_testing/_warnings.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def assert_produces_warning(
raise_on_extra_warnings: bool = True,
match: str | tuple[str | None, ...] | None = None,
must_find_all_warnings: bool = True,
) -> Generator[list[warnings.WarningMessage], None, None]:
) -> Generator[list[warnings.WarningMessage]]:
"""
Context manager for running code expected to either raise a specific warning,
multiple specific warnings, or not raise any warnings. Verifies that the code
Expand Down
8 changes: 4 additions & 4 deletions pandas/_testing/contexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
@contextmanager
def decompress_file(
path: FilePath | BaseBuffer, compression: CompressionOptions
) -> Generator[IO[bytes], None, None]:
) -> Generator[IO[bytes]]:
"""
Open a compressed file and return a file object.
Expand All @@ -50,7 +50,7 @@ def decompress_file(


@contextmanager
def set_timezone(tz: str) -> Generator[None, None, None]:
def set_timezone(tz: str) -> Generator[None]:
"""
Context manager for temporarily setting a timezone.
Expand Down Expand Up @@ -92,7 +92,7 @@ def setTZ(tz) -> None:


@contextmanager
def ensure_clean(filename=None) -> Generator[Any, None, None]:
def ensure_clean(filename=None) -> Generator[Any]:
"""
Gets a temporary path and agrees to remove on close.
Expand Down Expand Up @@ -124,7 +124,7 @@ def ensure_clean(filename=None) -> Generator[Any, None, None]:


@contextmanager
def with_csv_dialect(name: str, **kwargs) -> Generator[None, None, None]:
def with_csv_dialect(name: str, **kwargs) -> Generator[None]:
"""
Context manager to temporarily register a CSV dialect for parsing CSV.
Expand Down
2 changes: 1 addition & 1 deletion pandas/compat/pickle_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def loads(


@contextlib.contextmanager
def patch_pickle() -> Generator[None, None, None]:
def patch_pickle() -> Generator[None]:
"""
Temporarily patch pickle to use our unpickler.
"""
Expand Down
12 changes: 4 additions & 8 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,12 +246,8 @@ def transform(self) -> DataFrame | Series:
and not obj.empty
):
raise ValueError("Transform function failed")
# error: Argument 1 to "__get__" of "AxisProperty" has incompatible type
# "Union[Series, DataFrame, GroupBy[Any], SeriesGroupBy,
# DataFrameGroupBy, BaseWindow, Resampler]"; expected "Union[DataFrame,
# Series]"
if not isinstance(result, (ABCSeries, ABCDataFrame)) or not result.index.equals(
obj.index # type: ignore[arg-type]
obj.index
):
raise ValueError("Function did not transform")

Expand Down Expand Up @@ -803,7 +799,7 @@ def result_columns(self) -> Index:

@property
@abc.abstractmethod
def series_generator(self) -> Generator[Series, None, None]:
def series_generator(self) -> Generator[Series]:
pass

@staticmethod
Expand Down Expand Up @@ -1128,7 +1124,7 @@ class FrameRowApply(FrameApply):
axis: AxisInt = 0

@property
def series_generator(self) -> Generator[Series, None, None]:
def series_generator(self) -> Generator[Series]:
return (self.obj._ixs(i, axis=1) for i in range(len(self.columns)))

@staticmethod
Expand Down Expand Up @@ -1235,7 +1231,7 @@ def apply_broadcast(self, target: DataFrame) -> DataFrame:
return result.T

@property
def series_generator(self) -> Generator[Series, None, None]:
def series_generator(self) -> Generator[Series]:
values = self.values
values = ensure_wrapped_if_datetimelike(values)
assert len(values) > 0
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arraylike.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,12 +403,12 @@ def _reconstruct(result):
# for np.<ufunc>(..) calls
# kwargs cannot necessarily be handled block-by-block, so only
# take this path if there are no kwargs
mgr = inputs[0]._mgr
mgr = inputs[0]._mgr # pyright: ignore[reportGeneralTypeIssues]
result = mgr.apply(getattr(ufunc, method))
else:
# otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..))
# Those can have an axis keyword and thus can't be called block-by-block
result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs)
result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs) # pyright: ignore[reportGeneralTypeIssues]
# e.g. np.negative (only one reached), with "where" and "out" in kwargs

result = reconstruct(result)
Expand Down
44 changes: 27 additions & 17 deletions pandas/core/arrays/_arrow_string_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,14 @@

import numpy as np

from pandas._libs import lib
from pandas.compat import (
pa_version_under10p1,
pa_version_under11p0,
pa_version_under13p0,
pa_version_under17p0,
)

from pandas.core.dtypes.missing import isna

if not pa_version_under10p1:
import pyarrow as pa
import pyarrow.compute as pc
Expand All @@ -38,7 +37,7 @@ class ArrowStringArrayMixin:
def __init__(self, *args, **kwargs) -> None:
raise NotImplementedError

def _convert_bool_result(self, result):
def _convert_bool_result(self, result, na=lib.no_default, method_name=None):
# Convert a bool-dtype result to the appropriate result type
raise NotImplementedError

Expand Down Expand Up @@ -212,7 +211,9 @@ def _str_removesuffix(self, suffix: str):
result = pc.if_else(ends_with, removed, self._pa_array)
return type(self)(result)

def _str_startswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
def _str_startswith(
self, pat: str | tuple[str, ...], na: Scalar | lib.NoDefault = lib.no_default
):
if isinstance(pat, str):
result = pc.starts_with(self._pa_array, pattern=pat)
else:
Expand All @@ -225,11 +226,11 @@ def _str_startswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):

for p in pat[1:]:
result = pc.or_(result, pc.starts_with(self._pa_array, pattern=p))
if not isna(na): # pyright: ignore [reportGeneralTypeIssues]
result = result.fill_null(na)
return self._convert_bool_result(result)
return self._convert_bool_result(result, na=na, method_name="startswith")

def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
def _str_endswith(
self, pat: str | tuple[str, ...], na: Scalar | lib.NoDefault = lib.no_default
):
if isinstance(pat, str):
result = pc.ends_with(self._pa_array, pattern=pat)
else:
Expand All @@ -242,9 +243,7 @@ def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):

for p in pat[1:]:
result = pc.or_(result, pc.ends_with(self._pa_array, pattern=p))
if not isna(na): # pyright: ignore [reportGeneralTypeIssues]
result = result.fill_null(na)
return self._convert_bool_result(result)
return self._convert_bool_result(result, na=na, method_name="endswith")

def _str_isalnum(self):
result = pc.utf8_is_alnum(self._pa_array)
Expand Down Expand Up @@ -283,7 +282,12 @@ def _str_isupper(self):
return self._convert_bool_result(result)

def _str_contains(
self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True
self,
pat,
case: bool = True,
flags: int = 0,
na: Scalar | lib.NoDefault = lib.no_default,
regex: bool = True,
):
if flags:
raise NotImplementedError(f"contains not implemented with {flags=}")
Expand All @@ -293,19 +297,25 @@ def _str_contains(
else:
pa_contains = pc.match_substring
result = pa_contains(self._pa_array, pat, ignore_case=not case)
if not isna(na): # pyright: ignore [reportGeneralTypeIssues]
result = result.fill_null(na)
return self._convert_bool_result(result)
return self._convert_bool_result(result, na=na, method_name="contains")

def _str_match(
self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
self,
pat: str,
case: bool = True,
flags: int = 0,
na: Scalar | lib.NoDefault = lib.no_default,
):
if not pat.startswith("^"):
pat = f"^{pat}"
return self._str_contains(pat, case, flags, na, regex=True)

def _str_fullmatch(
self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
self,
pat,
case: bool = True,
flags: int = 0,
na: Scalar | lib.NoDefault = lib.no_default,
):
if not pat.endswith("$") or pat.endswith("\\$"):
pat = f"{pat}$"
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2318,7 +2318,9 @@ def _apply_elementwise(self, func: Callable) -> list[list[Any]]:
for chunk in self._pa_array.iterchunks()
]

def _convert_bool_result(self, result):
def _convert_bool_result(self, result, na=lib.no_default, method_name=None):
if na is not lib.no_default and not isna(na): # pyright: ignore [reportGeneralTypeIssues]
result = result.fill_null(na)
return type(self)(result)

def _convert_int_result(self, result):
Expand Down Expand Up @@ -2426,7 +2428,7 @@ def _str_rindex(self, sub: str, start: int = 0, end: int | None = None) -> Self:
result = self._apply_elementwise(predicate)
return type(self)(pa.chunked_array(result))

def _str_normalize(self, form: str) -> Self:
def _str_normalize(self, form: Literal["NFC", "NFD", "NFKC", "NFKD"]) -> Self:
predicate = lambda val: unicodedata.normalize(form, val)
result = self._apply_elementwise(predicate)
return type(self)(pa.chunked_array(result))
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ def _coerce_to_array(
assert dtype == "boolean"
return coerce_to_array(value, copy=copy)

def _logical_method(self, other, op):
def _logical_method(self, other, op): # type: ignore[override]
assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"}
other_is_scalar = lib.is_scalar(other)
mask = None
Expand Down
20 changes: 16 additions & 4 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2679,16 +2679,28 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
# ------------------------------------------------------------------------
# String methods interface
def _str_map(
self, f, na_value=np.nan, dtype=np.dtype("object"), convert: bool = True
self, f, na_value=lib.no_default, dtype=np.dtype("object"), convert: bool = True
):
# Optimization to apply the callable `f` to the categories once
# and rebuild the result by `take`ing from the result with the codes.
# Returns the same type as the object-dtype implementation though.
from pandas.core.arrays import NumpyExtensionArray

categories = self.categories
codes = self.codes
result = NumpyExtensionArray(categories.to_numpy())._str_map(f, na_value, dtype)
if categories.dtype == "string":
result = categories.array._str_map(f, na_value, dtype) # type: ignore[attr-defined]
if (
categories.dtype.na_value is np.nan # type: ignore[union-attr]
and is_bool_dtype(dtype)
and (na_value is lib.no_default or isna(na_value))
):
# NaN propagates as False for functions with boolean return type
na_value = False
else:
from pandas.core.arrays import NumpyExtensionArray

result = NumpyExtensionArray(categories.to_numpy())._str_map(
f, na_value, dtype
)
return take_nd(result, codes, fill_value=na_value)

def _str_get_dummies(self, sep: str = "|", dtype: NpDtype | None = None):
Expand Down
Loading

0 comments on commit 5910018

Please sign in to comment.