Skip to content

Commit

Permalink
MAINT: Adjust the codebase to the new np.array's copy keyword mea…
Browse files Browse the repository at this point in the history
…ning (pandas-dev#57172)

* MAINT: Adjust the codebase to the new np.array copy keyword meaning

* Add copy is docstring

* Use asarray where possible

---------

Co-authored-by: Matthew Roeschke <[email protected]>
  • Loading branch information
2 people authored and pmhatre1 committed May 7, 2024
1 parent b778905 commit 4c01d71
Show file tree
Hide file tree
Showing 33 changed files with 128 additions and 58 deletions.
6 changes: 3 additions & 3 deletions pandas/core/array_algos/quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def quantile_with_mask(
interpolation=interpolation,
)

result = np.array(result, copy=False)
result = np.asarray(result)
result = result.T

return result
Expand Down Expand Up @@ -201,9 +201,9 @@ def _nanpercentile(
]
if values.dtype.kind == "f":
# preserve itemsize
result = np.array(result, dtype=values.dtype, copy=False).T
result = np.asarray(result, dtype=values.dtype).T
else:
result = np.array(result, copy=False).T
result = np.asarray(result).T
if (
result.dtype != values.dtype
and not mask.all()
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,9 @@ def __arrow_array__(self, type=None):
"""Convert myself to a pyarrow ChunkedArray."""
return self._pa_array

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
"""Correctly construct numpy arrays when passed to `np.asarray()`."""
return self.to_numpy(dtype=dtype)

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,10 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:

return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy)

return np.array(self, dtype=dtype, copy=copy)
if not copy:
return np.asarray(self, dtype=dtype)
else:
return np.array(self, dtype=dtype, copy=copy)

def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll:
"""
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1659,7 +1659,9 @@ def _validate_codes_for_dtype(cls, codes, *, dtype: CategoricalDtype) -> np.ndar
# -------------------------------------------------------------

@ravel_compat
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
"""
The numpy array interface.
Expand All @@ -1668,6 +1670,9 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
dtype : np.dtype or None
Specifies the the dtype for the array.
copy : bool or None, optional
Unused.
Returns
-------
numpy.array
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,9 @@ def _formatter(self, boxed: bool = False) -> Callable[[object], str]:
# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
if is_object_dtype(dtype):
return np.array(list(self), dtype=object)
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,12 +649,12 @@ def _resolution_obj(self) -> Resolution:
# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods

def __array__(self, dtype=None) -> np.ndarray:
def __array__(self, dtype=None, copy=None) -> np.ndarray:
if dtype is None and self.tz:
# The default for tz-aware is object, to preserve tz info
dtype = object

return super().__array__(dtype=dtype)
return super().__array__(dtype=dtype, copy=copy)

def __iter__(self) -> Iterator:
"""
Expand Down Expand Up @@ -2421,7 +2421,7 @@ def objects_to_datetime64(
assert errors in ["raise", "coerce"]

# if str-dtype, convert
data = np.array(data, copy=False, dtype=np.object_)
data = np.asarray(data, dtype=np.object_)

result, tz_parsed = tslib.array_to_datetime(
data,
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1564,7 +1564,9 @@ def is_non_overlapping_monotonic(self) -> bool:
# ---------------------------------------------------------------------
# Conversion

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
"""
Return the IntervalArray's data as a numpy array of Interval
objects (with dtype='object')
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,9 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:

__array_priority__ = 1000 # higher than ndarray so ops dispatch to us

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
"""
the array interface, return my values
We return an object array here to preserve our scalar values
Expand Down
14 changes: 10 additions & 4 deletions pandas/core/arrays/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,10 @@ def _coerce_to_data_and_mask(
return values, mask, dtype, inferred_type

original = values
values = np.array(values, copy=copy)
if not copy:
values = np.asarray(values)
else:
values = np.array(values, copy=copy)
inferred_type = None
if values.dtype == object or is_string_dtype(values.dtype):
inferred_type = lib.infer_dtype(values, skipna=True)
Expand All @@ -169,7 +172,10 @@ def _coerce_to_data_and_mask(
raise TypeError(f"{values.dtype} cannot be converted to {name}")

elif values.dtype.kind == "b" and checker(dtype):
values = np.array(values, dtype=default_dtype, copy=copy)
if not copy:
values = np.asarray(values, dtype=default_dtype)
else:
values = np.array(values, dtype=default_dtype, copy=copy)

elif values.dtype.kind not in "iuf":
name = dtype_cls.__name__.strip("_")
Expand Down Expand Up @@ -208,9 +214,9 @@ def _coerce_to_data_and_mask(
inferred_type not in ["floating", "mixed-integer-float"]
and not mask.any()
):
values = np.array(original, dtype=dtype, copy=False)
values = np.asarray(original, dtype=dtype)
else:
values = np.array(original, dtype="object", copy=False)
values = np.asarray(original, dtype="object")

# we copy as need to coerce here
if mask.any():
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,9 @@ def dtype(self) -> NumpyEADtype:
# ------------------------------------------------------------------------
# NumPy Array Interface

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
return np.asarray(self._ndarray, dtype=dtype)

def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,10 @@ def __init__(
raise raise_on_incompatible(values, dtype.freq)
values, dtype = values._ndarray, values.dtype

values = np.array(values, dtype="int64", copy=copy)
if not copy:
values = np.asarray(values, dtype="int64")
else:
values = np.array(values, dtype="int64", copy=copy)
if dtype is None:
raise ValueError("dtype is not specified and cannot be inferred")
dtype = cast(PeriodDtype, dtype)
Expand Down Expand Up @@ -400,7 +403,9 @@ def freq(self) -> BaseOffset:
def freqstr(self) -> str:
return PeriodDtype(self.freq)._freqstr

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
if dtype == "i8":
return self.asi8
elif dtype == bool:
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,9 @@ def from_spmatrix(cls, data: spmatrix) -> Self:

return cls._simple_new(arr, index, dtype)

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
fill_value = self.fill_value

if self.sp_index.ngaps == 0:
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1072,7 +1072,10 @@ def sequence_to_td64ns(
# This includes datetime64-dtype, see GH#23539, GH#29794
raise TypeError(f"dtype {data.dtype} cannot be converted to timedelta64[ns]")

data = np.array(data, copy=copy)
if not copy:
data = np.asarray(data)
else:
data = np.array(data, copy=copy)

assert data.dtype.kind == "m"
assert data.dtype != "m8" # i.e. not unit-less
Expand Down Expand Up @@ -1152,7 +1155,7 @@ def _objects_to_td64ns(
higher level.
"""
# coerce Index to np.ndarray, converting string-dtype if necessary
values = np.array(data, dtype=np.object_, copy=False)
values = np.asarray(data, dtype=np.object_)

result = array_to_timedelta64(values, unit=unit, errors=errors)
return result.view("timedelta64[ns]")
Expand Down
14 changes: 11 additions & 3 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,10 @@ def sanitize_array(

elif hasattr(data, "__array__"):
# e.g. dask array GH#38645
data = np.array(data, copy=copy)
if not copy:
data = np.asarray(data)
else:
data = np.array(data, copy=copy)
return sanitize_array(
data,
index=index,
Expand Down Expand Up @@ -744,8 +747,11 @@ def _sanitize_str_dtypes(
# GH#19853: If data is a scalar, result has already the result
if not lib.is_scalar(data):
if not np.all(isna(data)):
data = np.array(data, dtype=dtype, copy=False)
result = np.array(data, dtype=object, copy=copy)
data = np.asarray(data, dtype=dtype)
if not copy:
result = np.asarray(data, dtype=object)
else:
result = np.array(data, dtype=object, copy=copy)
return result


Expand Down Expand Up @@ -810,6 +816,8 @@ def _try_cast(
# this will raise if we have e.g. floats

subarr = maybe_cast_to_integer_array(arr, dtype)
elif not copy:
subarr = np.asarray(arr, dtype=dtype)
else:
subarr = np.array(arr, dtype=dtype, copy=copy)

Expand Down
7 changes: 5 additions & 2 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1503,7 +1503,10 @@ def construct_2d_arraylike_from_scalar(

# Attempt to coerce to a numpy array
try:
arr = np.array(value, dtype=dtype, copy=copy)
if not copy:
arr = np.asarray(value, dtype=dtype)
else:
arr = np.array(value, dtype=dtype, copy=copy)
except (ValueError, TypeError) as err:
raise TypeError(
f"DataFrame constructor called with incompatible data and dtype: {err}"
Expand Down Expand Up @@ -1652,7 +1655,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
"out-of-bound Python int",
DeprecationWarning,
)
casted = np.array(arr, dtype=dtype, copy=False)
casted = np.asarray(arr, dtype=dtype)
else:
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=RuntimeWarning)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,7 @@ def infer_fill_value(val):
"""
if not is_list_like(val):
val = [val]
val = np.array(val, copy=False)
val = np.asarray(val)
if val.dtype.kind in "mM":
return np.array("NaT", dtype=val.dtype)
elif val.dtype == object:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1919,7 +1919,7 @@ def to_numpy(
dtype = np.dtype(dtype)
result = self._mgr.as_array(dtype=dtype, copy=copy, na_value=na_value)
if result.dtype is not dtype:
result = np.array(result, dtype=dtype, copy=False)
result = np.asarray(result, dtype=dtype)

return result

Expand Down
4 changes: 3 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1989,7 +1989,9 @@ def empty(self) -> bool:
# GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented
__array_priority__: int = 1000

def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
def __array__(
self, dtype: npt.DTypeLike | None = None, copy: bool | None = None
) -> np.ndarray:
values = self._values
arr = np.asarray(values, dtype=dtype)
if astype_is_view(values.dtype, arr.dtype) and self._mgr.is_single_block:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -912,7 +912,7 @@ def __len__(self) -> int:
"""
return len(self._data)

def __array__(self, dtype=None) -> np.ndarray:
def __array__(self, dtype=None, copy=None) -> np.ndarray:
"""
The array interface, return my values.
"""
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,7 @@ def _values(self) -> np.ndarray:
):
vals = vals.astype(object)

array_vals = np.array(vals, copy=False)
array_vals = np.asarray(vals)
array_vals = algos.take_nd(array_vals, codes, fill_value=index._na_value)
values.append(array_vals)

Expand Down Expand Up @@ -1330,7 +1330,7 @@ def copy( # type: ignore[override]
new_index._id = self._id
return new_index

def __array__(self, dtype=None) -> np.ndarray:
def __array__(self, dtype=None, copy=None) -> np.ndarray:
"""the array interface, return my values"""
return self.values

Expand Down Expand Up @@ -3357,7 +3357,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
locs = (level_codes >= idx.start) & (level_codes < idx.stop)
return locs

locs = np.array(level_codes == idx, dtype=bool, copy=False)
locs = np.asarray(level_codes == idx, dtype=bool)

if not locs.any():
# The label is present in self.levels[level] but unused:
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1824,6 +1824,8 @@ def as_array(
na_value=na_value,
copy=copy,
).reshape(blk.shape)
elif not copy:
arr = np.asarray(blk.values, dtype=dtype)
else:
arr = np.array(blk.values, dtype=dtype, copy=copy)

Expand Down
7 changes: 6 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -789,7 +789,9 @@ def __len__(self) -> int:

# ----------------------------------------------------------------------
# NDArray Compat
def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
def __array__(
self, dtype: npt.DTypeLike | None = None, copy: bool | None = None
) -> np.ndarray:
"""
Return the values as a NumPy array.
Expand All @@ -802,6 +804,9 @@ def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
The dtype to use for the resulting NumPy array. By default,
the dtype is inferred from the data.
copy : bool or None, optional
Unused.
Returns
-------
numpy.ndarray
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4043,7 +4043,7 @@ def _create_axes(
if isinstance(data_converted.dtype, CategoricalDtype):
ordered = data_converted.ordered
meta = "category"
metadata = np.array(data_converted.categories, copy=False).ravel()
metadata = np.asarray(data_converted.categories).ravel()

data, dtype_name = _get_data_and_dtype_name(data_converted)

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/arrays/integer/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
"Addition/subtraction of integers and integer-arrays with Timestamp",
"has no kernel",
"not implemented",
"The 'out' kwarg is necessary. Use numpy.strings.multiply without it.",
]
)
with pytest.raises(errs, match=msg):
Expand Down
Loading

0 comments on commit 4c01d71

Please sign in to comment.