Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix copy semantics in __array__ #60046

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,15 @@ def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
"""Correctly construct numpy arrays when passed to `np.asarray()`."""
return self.to_numpy(dtype=dtype)
if copy is False:
# TODO: By using `zero_copy_only` it may be possible to implement this
raise ValueError(
"Unable to avoid copy while creating an array as requested."
)
elif copy is None:
copy = False # The NumPy copy=False meaning is different here.

return self.to_numpy(dtype=dtype, copy=copy)

def __invert__(self) -> Self:
# This is a bit wise op for integer types
Expand Down
26 changes: 17 additions & 9 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,11 +579,12 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
raise ValueError("Cannot convert float NaN to integer")

elif len(self.codes) == 0 or len(self.categories) == 0:
result = np.array(
self,
dtype=dtype,
copy=copy,
)
# For NumPy 1.x compatibility we cannot use copy=None. And
# `copy=False` has the meaning of `copy=None` here:
if not copy:
result = np.asarray(self, dtype=dtype)
else:
result = np.array(self, dtype=dtype)

else:
# GH8628 (PERF): astype category codes instead of astyping array
Expand Down Expand Up @@ -1663,7 +1664,7 @@ def __array__(
Specifies the the dtype for the array.

copy : bool or None, optional
Unused.
See :func:`numpy.asarray`.

Returns
-------
Expand All @@ -1686,13 +1687,20 @@ def __array__(
>>> np.asarray(cat)
array(['a', 'b'], dtype=object)
"""
if copy is False:
raise ValueError(
"Unable to avoid copy while creating an array as requested."
)

ret = take_nd(self.categories._values, self._codes)
if dtype and np.dtype(dtype) != self.categories.dtype:
return np.asarray(ret, dtype)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not understand why this is needed. If dtypes match, NumPy should make it a no-op? If dtype is None, it is the same as not passing.

# When we're a Categorical[ExtensionArray], like Interval,
# we need to ensure __array__ gets all the way to an
# ndarray.
return np.asarray(ret)

if copy is None:
# Branch required since copy=None is not defined on 1.x
return np.asarray(ret, dtype=dtype)
return np.array(ret, dtype=dtype)

def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
# for binary ops, use our custom dunder methods
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,14 @@ def __array__(
) -> np.ndarray:
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
if is_object_dtype(dtype):
if copy is False:
raise ValueError(
"Unable to avoid copy while creating an array as requested."
)
return np.array(list(self), dtype=object)

if copy is True:
return np.array(self._ndarray, dtype=dtype)
return self._ndarray

@overload
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1606,6 +1606,11 @@ def __array__(
Return the IntervalArray's data as a numpy array of Interval
objects (with dtype='object')
"""
if copy is False:
raise ValueError(
"Unable to avoid copy while creating an array as requested."
)

left = self._left
right = self._right
mask = self.isna()
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,14 @@ def __array__(
the array interface, return my values
We return an object array here to preserve our scalar values
"""
return self.to_numpy(dtype=dtype)
if copy is False:
raise ValueError(
"Unable to avoid copy while creating an array as requested."
)

if copy is None:
copy = False # The NumPy copy=False meaning is different here.
return self.to_numpy(dtype=dtype, copy=copy)

_HANDLED_TYPES: tuple[type, ...]

Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ def dtype(self) -> NumpyEADtype:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
if copy is not None:
# Note: branch avoids `copy=None` for NumPy 1.x support
return np.array(self._ndarray, dtype=dtype, copy=copy)
return np.asarray(self._ndarray, dtype=dtype)

def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,11 @@ def freqstr(self) -> str:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
if copy is False:
raise ValueError(
"Unable to avoid copy while creating an array as requested."
)

if dtype == "i8":
return self.asi8
elif dtype == bool:
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,11 @@ def from_spmatrix(cls, data: spmatrix) -> Self:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
if copy is False:
raise ValueError(
"Unable to avoid copy while creating an array as requested."
)

fill_value = self.fill_value

if self.sp_index.ngaps == 0:
Expand Down
13 changes: 11 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2015,8 +2015,17 @@ def __array__(
self, dtype: npt.DTypeLike | None = None, copy: bool | None = None
) -> np.ndarray:
values = self._values
arr = np.asarray(values, dtype=dtype)
if astype_is_view(values.dtype, arr.dtype) and self._mgr.is_single_block:
if copy is None:
# Note: branch avoids `copy=None` for NumPy 1.x support
arr = np.asarray(values, dtype=dtype)
else:
arr = np.array(values, dtype=dtype, copy=copy)

if (
copy is not False
and astype_is_view(values.dtype, arr.dtype)
and self._mgr.is_single_block
):
# Check if both conversions can be done without a copy
if astype_is_view(self.dtypes.iloc[0], values.dtype) and astype_is_view(
values.dtype, arr.dtype
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -908,7 +908,11 @@ def __array__(self, dtype=None, copy=None) -> np.ndarray:
"""
The array interface, return my values.
"""
return np.asarray(self._data, dtype=dtype)
if copy is None:
# Note, that the if branch exists for NumPy 1.x support
return np.asarray(self._data, dtype=dtype)

return np.array(self._data, dtype=dtype, copy=copy)

def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs):
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1391,6 +1391,9 @@ def copy( # type: ignore[override]

def __array__(self, dtype=None, copy=None) -> np.ndarray:
"""the array interface, return my values"""
if copy is True:
# Note: branch avoids `copy=None` for NumPy 1.x support
return np.array(self.values, dtype=dtype, copy=copy)
return self.values

def view(self, cls=None) -> Self:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def ndarray_to_mgr(
# and a subsequent `astype` will not already result in a copy
values = np.array(values, copy=True, order="F")
else:
values = np.array(values, copy=False)
values = np.asarray(values)
values = _ensure_2d(values)

else:
Expand Down
13 changes: 10 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,7 @@ def __array__(
the dtype is inferred from the data.

copy : bool or None, optional
Unused.
See :func:`numpy.asarray`.

Returns
-------
Expand Down Expand Up @@ -879,8 +879,15 @@ def __array__(
dtype='datetime64[ns]')
"""
values = self._values
arr = np.asarray(values, dtype=dtype)
if astype_is_view(values.dtype, arr.dtype):
if copy is None:
# Note: branch avoids `copy=None` for NumPy 1.x support
arr = np.asarray(values, dtype=dtype)
else:
arr = np.array(values, dtype=dtype, copy=copy)

if copy is True:
return arr
if copy is False or astype_is_view(values.dtype, arr.dtype):
arr = arr.view()
arr.flags.writeable = False
return arr
Expand Down
10 changes: 9 additions & 1 deletion pandas/tests/extension/json/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,20 @@ def __ne__(self, other):
return NotImplemented

def __array__(self, dtype=None, copy=None):
if copy is False:
raise ValueError(
"Unable to avoid copy while creating an array as requested."
)

if dtype is None:
dtype = object
if dtype == object:
# on py38 builds it looks like numpy is inferring to a non-1D array
return construct_1d_object_array_from_listlike(list(self))
return np.asarray(self.data, dtype=dtype)
if copy is None:
# Note: branch avoids `copy=None` for NumPy 1.x support
return np.asarray(self.data, dtype=dtype)
return np.asarray(self.data, dtype=dtype, copy=copy)

@property
def nbytes(self) -> int:
Expand Down