Skip to content

Commit

Permalink
REF (string): Move StringArrayNumpySemantics methods to base class (p…
Browse files Browse the repository at this point in the history
…andas-dev#59514)

* REF (string): Move StringArrayNumpySemantics methods to base class

* mypy fixup
  • Loading branch information
jbrockmendel authored Aug 14, 2024
1 parent d36c589 commit 0851ac3
Showing 1 changed file with 23 additions and 33 deletions.
56 changes: 23 additions & 33 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,12 @@ def _reduce(
axis: AxisInt | None = 0,
**kwargs,
):
if self.dtype.na_value is np.nan and name in ["any", "all"]:
if name == "any":
return nanops.nanany(self._ndarray, skipna=skipna)
else:
return nanops.nanall(self._ndarray, skipna=skipna)

if name in ["min", "max"]:
result = getattr(self, name)(skipna=skipna, axis=axis)
if keepdims:
Expand All @@ -754,6 +760,12 @@ def _reduce(

raise TypeError(f"Cannot perform reduction '{name}' with string dtype")

def _wrap_reduction_result(self, axis: AxisInt | None, result) -> Any:
if self.dtype.na_value is np.nan and result is libmissing.NA:
# the masked_reductions use pd.NA -> convert to np.nan
return np.nan
return super()._wrap_reduction_result(axis, result)

def min(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
nv.validate_min((), kwargs)
result = masked_reductions.min(
Expand All @@ -771,8 +783,11 @@ def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
def value_counts(self, dropna: bool = True) -> Series:
from pandas.core.algorithms import value_counts_internal as value_counts

result = value_counts(self._ndarray, sort=False, dropna=dropna).astype("Int64")
result = value_counts(self._ndarray, sort=False, dropna=dropna)
result.index = result.index.astype(self.dtype)

if self.dtype.na_value is libmissing.NA:
result = result.astype("Int64")
return result

def memory_usage(self, deep: bool = False) -> int:
Expand Down Expand Up @@ -823,7 +838,13 @@ def _cmp_method(self, other, op):
# logical
result = np.zeros(len(self._ndarray), dtype="bool")
result[valid] = op(self._ndarray[valid], other)
return BooleanArray(result, mask)
res_arr = BooleanArray(result, mask)
if self.dtype.na_value is np.nan:
if op == operator.ne:
return res_arr.to_numpy(np.bool_, na_value=True)
else:
return res_arr.to_numpy(np.bool_, na_value=False)
return res_arr

_arith_method = _cmp_method

Expand Down Expand Up @@ -864,37 +885,6 @@ def _from_backing_data(self, arr: np.ndarray) -> StringArrayNumpySemantics:
# we always preserve the dtype
return NDArrayBacked._from_backing_data(self, arr)

def _reduce(
self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
):
if name in ["any", "all"]:
if name == "any":
return nanops.nanany(self._ndarray, skipna=skipna)
else:
return nanops.nanall(self._ndarray, skipna=skipna)
else:
return super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs)

def _wrap_reduction_result(self, axis: AxisInt | None, result) -> Any:
# the masked_reductions use pd.NA
if result is libmissing.NA:
return np.nan
return super()._wrap_reduction_result(axis, result)

def _cmp_method(self, other, op):
result = super()._cmp_method(other, op)
if op == operator.ne:
return result.to_numpy(np.bool_, na_value=True)
else:
return result.to_numpy(np.bool_, na_value=False)

def value_counts(self, dropna: bool = True) -> Series:
from pandas.core.algorithms import value_counts_internal as value_counts

result = value_counts(self._ndarray, sort=False, dropna=dropna)
result.index = result.index.astype(self.dtype)
return result

# ------------------------------------------------------------------------
# String methods interface
_str_na_value = np.nan

0 comments on commit 0851ac3

Please sign in to comment.