MAINT: Adjust the codebase to the new np.array's copy keyword mea…

…ning (pandas-dev#57172) * MAINT: Adjust the codebase to the new np.array copy keyword meaning * Add copy is docstring * Use asarray where possible --------- Co-authored-by: Matthew Roeschke <[email protected]>
pmhatre1 · May 7, 2024 · 4c01d71 · 4c01d71
1 parent b778905
commit 4c01d71
Show file tree

Hide file tree

Showing 33 changed files with 128 additions and 58 deletions.
diff --git a/pandas/core/array_algos/quantile.py b/pandas/core/array_algos/quantile.py
@@ -102,7 +102,7 @@ def quantile_with_mask(
             interpolation=interpolation,
         )
 
-        result = np.array(result, copy=False)
+        result = np.asarray(result)
         result = result.T
 
     return result
@@ -201,9 +201,9 @@ def _nanpercentile(
         ]
         if values.dtype.kind == "f":
             # preserve itemsize
-            result = np.array(result, dtype=values.dtype, copy=False).T
+            result = np.asarray(result, dtype=values.dtype).T
         else:
-            result = np.array(result, copy=False).T
+            result = np.asarray(result).T
             if (
                 result.dtype != values.dtype
                 and not mask.all()

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -659,7 +659,9 @@ def __arrow_array__(self, type=None):
         """Convert myself to a pyarrow ChunkedArray."""
         return self._pa_array
 
-    def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
+    def __array__(
+        self, dtype: NpDtype | None = None, copy: bool | None = None
+    ) -> np.ndarray:
         """Correctly construct numpy arrays when passed to `np.asarray()`."""
         return self.to_numpy(dtype=dtype)
 

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -725,7 +725,10 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
 
             return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy)
 
-        return np.array(self, dtype=dtype, copy=copy)
+        if not copy:
+            return np.asarray(self, dtype=dtype)
+        else:
+            return np.array(self, dtype=dtype, copy=copy)
 
     def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll:
         """

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1659,7 +1659,9 @@ def _validate_codes_for_dtype(cls, codes, *, dtype: CategoricalDtype) -> np.ndar
     # -------------------------------------------------------------
 
     @ravel_compat
-    def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
+    def __array__(
+        self, dtype: NpDtype | None = None, copy: bool | None = None
+    ) -> np.ndarray:
         """
         The numpy array interface.
 
@@ -1668,6 +1670,9 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
         dtype : np.dtype or None
             Specifies the the dtype for the array.
 
+        copy : bool or None, optional
+            Unused.
+
         Returns
         -------
         numpy.array

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -353,7 +353,9 @@ def _formatter(self, boxed: bool = False) -> Callable[[object], str]:
     # ----------------------------------------------------------------
     # Array-Like / EA-Interface Methods
 
-    def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
+    def __array__(
+        self, dtype: NpDtype | None = None, copy: bool | None = None
+    ) -> np.ndarray:
         # used for Timedelta/DatetimeArray, overwritten by PeriodArray
         if is_object_dtype(dtype):
             return np.array(list(self), dtype=object)

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -649,12 +649,12 @@ def _resolution_obj(self) -> Resolution:
     # ----------------------------------------------------------------
     # Array-Like / EA-Interface Methods
 
-    def __array__(self, dtype=None) -> np.ndarray:
+    def __array__(self, dtype=None, copy=None) -> np.ndarray:
         if dtype is None and self.tz:
             # The default for tz-aware is object, to preserve tz info
             dtype = object
 
-        return super().__array__(dtype=dtype)
+        return super().__array__(dtype=dtype, copy=copy)
 
     def __iter__(self) -> Iterator:
         """
@@ -2421,7 +2421,7 @@ def objects_to_datetime64(
     assert errors in ["raise", "coerce"]
 
     # if str-dtype, convert
-    data = np.array(data, copy=False, dtype=np.object_)
+    data = np.asarray(data, dtype=np.object_)
 
     result, tz_parsed = tslib.array_to_datetime(
         data,

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -1564,7 +1564,9 @@ def is_non_overlapping_monotonic(self) -> bool:
     # ---------------------------------------------------------------------
     # Conversion
 
-    def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
+    def __array__(
+        self, dtype: NpDtype | None = None, copy: bool | None = None
+    ) -> np.ndarray:
         """
         Return the IntervalArray's data as a numpy array of Interval
         objects (with dtype='object')

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -594,7 +594,9 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
 
     __array_priority__ = 1000  # higher than ndarray so ops dispatch to us
 
-    def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
+    def __array__(
+        self, dtype: NpDtype | None = None, copy: bool | None = None
+    ) -> np.ndarray:
         """
         the array interface, return my values
         We return an object array here to preserve our scalar values

diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
@@ -160,7 +160,10 @@ def _coerce_to_data_and_mask(
         return values, mask, dtype, inferred_type
 
     original = values
-    values = np.array(values, copy=copy)
+    if not copy:
+        values = np.asarray(values)
+    else:
+        values = np.array(values, copy=copy)
     inferred_type = None
     if values.dtype == object or is_string_dtype(values.dtype):
         inferred_type = lib.infer_dtype(values, skipna=True)
@@ -169,7 +172,10 @@ def _coerce_to_data_and_mask(
             raise TypeError(f"{values.dtype} cannot be converted to {name}")
 
     elif values.dtype.kind == "b" and checker(dtype):
-        values = np.array(values, dtype=default_dtype, copy=copy)
+        if not copy:
+            values = np.asarray(values, dtype=default_dtype)
+        else:
+            values = np.array(values, dtype=default_dtype, copy=copy)
 
     elif values.dtype.kind not in "iuf":
         name = dtype_cls.__name__.strip("_")
@@ -208,9 +214,9 @@ def _coerce_to_data_and_mask(
                     inferred_type not in ["floating", "mixed-integer-float"]
                     and not mask.any()
                 ):
-                    values = np.array(original, dtype=dtype, copy=False)
+                    values = np.asarray(original, dtype=dtype)
                 else:
-                    values = np.array(original, dtype="object", copy=False)
+                    values = np.asarray(original, dtype="object")
 
     # we copy as need to coerce here
     if mask.any():

diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -150,7 +150,9 @@ def dtype(self) -> NumpyEADtype:
     # ------------------------------------------------------------------------
     # NumPy Array Interface
 
-    def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
+    def __array__(
+        self, dtype: NpDtype | None = None, copy: bool | None = None
+    ) -> np.ndarray:
         return np.asarray(self._ndarray, dtype=dtype)
 
     def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -256,7 +256,10 @@ def __init__(
                 raise raise_on_incompatible(values, dtype.freq)
             values, dtype = values._ndarray, values.dtype
 
-        values = np.array(values, dtype="int64", copy=copy)
+        if not copy:
+            values = np.asarray(values, dtype="int64")
+        else:
+            values = np.array(values, dtype="int64", copy=copy)
         if dtype is None:
             raise ValueError("dtype is not specified and cannot be inferred")
         dtype = cast(PeriodDtype, dtype)
@@ -400,7 +403,9 @@ def freq(self) -> BaseOffset:
     def freqstr(self) -> str:
         return PeriodDtype(self.freq)._freqstr
 
-    def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
+    def __array__(
+        self, dtype: NpDtype | None = None, copy: bool | None = None
+    ) -> np.ndarray:
         if dtype == "i8":
             return self.asi8
         elif dtype == bool:

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -554,7 +554,9 @@ def from_spmatrix(cls, data: spmatrix) -> Self:
 
         return cls._simple_new(arr, index, dtype)
 
-    def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
+    def __array__(
+        self, dtype: NpDtype | None = None, copy: bool | None = None
+    ) -> np.ndarray:
         fill_value = self.fill_value
 
         if self.sp_index.ngaps == 0:

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -1072,7 +1072,10 @@ def sequence_to_td64ns(
         # This includes datetime64-dtype, see GH#23539, GH#29794
         raise TypeError(f"dtype {data.dtype} cannot be converted to timedelta64[ns]")
 
-    data = np.array(data, copy=copy)
+    if not copy:
+        data = np.asarray(data)
+    else:
+        data = np.array(data, copy=copy)
 
     assert data.dtype.kind == "m"
     assert data.dtype != "m8"  # i.e. not unit-less
@@ -1152,7 +1155,7 @@ def _objects_to_td64ns(
     higher level.
     """
     # coerce Index to np.ndarray, converting string-dtype if necessary
-    values = np.array(data, dtype=np.object_, copy=False)
+    values = np.asarray(data, dtype=np.object_)
 
     result = array_to_timedelta64(values, unit=unit, errors=errors)
     return result.view("timedelta64[ns]")

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -626,7 +626,10 @@ def sanitize_array(
 
     elif hasattr(data, "__array__"):
         # e.g. dask array GH#38645
-        data = np.array(data, copy=copy)
+        if not copy:
+            data = np.asarray(data)
+        else:
+            data = np.array(data, copy=copy)
         return sanitize_array(
             data,
             index=index,
@@ -744,8 +747,11 @@ def _sanitize_str_dtypes(
         # GH#19853: If data is a scalar, result has already the result
         if not lib.is_scalar(data):
             if not np.all(isna(data)):
-                data = np.array(data, dtype=dtype, copy=False)
-            result = np.array(data, dtype=object, copy=copy)
+                data = np.asarray(data, dtype=dtype)
+            if not copy:
+                result = np.asarray(data, dtype=object)
+            else:
+                result = np.array(data, dtype=object, copy=copy)
     return result
 
 
@@ -810,6 +816,8 @@ def _try_cast(
         # this will raise if we have e.g. floats
 
         subarr = maybe_cast_to_integer_array(arr, dtype)
+    elif not copy:
+        subarr = np.asarray(arr, dtype=dtype)
     else:
         subarr = np.array(arr, dtype=dtype, copy=copy)
 

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1503,7 +1503,10 @@ def construct_2d_arraylike_from_scalar(
 
     # Attempt to coerce to a numpy array
     try:
-        arr = np.array(value, dtype=dtype, copy=copy)
+        if not copy:
+            arr = np.asarray(value, dtype=dtype)
+        else:
+            arr = np.array(value, dtype=dtype, copy=copy)
     except (ValueError, TypeError) as err:
         raise TypeError(
             f"DataFrame constructor called with incompatible data and dtype: {err}"
@@ -1652,7 +1655,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
                         "out-of-bound Python int",
                         DeprecationWarning,
                     )
-                casted = np.array(arr, dtype=dtype, copy=False)
+                casted = np.asarray(arr, dtype=dtype)
         else:
             with warnings.catch_warnings():
                 warnings.filterwarnings("ignore", category=RuntimeWarning)

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -564,7 +564,7 @@ def infer_fill_value(val):
     """
     if not is_list_like(val):
         val = [val]
-    val = np.array(val, copy=False)
+    val = np.asarray(val)
     if val.dtype.kind in "mM":
         return np.array("NaT", dtype=val.dtype)
     elif val.dtype == object:

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1919,7 +1919,7 @@ def to_numpy(
             dtype = np.dtype(dtype)
         result = self._mgr.as_array(dtype=dtype, copy=copy, na_value=na_value)
         if result.dtype is not dtype:
-            result = np.array(result, dtype=dtype, copy=False)
+            result = np.asarray(result, dtype=dtype)
 
         return result
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1989,7 +1989,9 @@ def empty(self) -> bool:
     # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented
     __array_priority__: int = 1000
 
-    def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
+    def __array__(
+        self, dtype: npt.DTypeLike | None = None, copy: bool | None = None
+    ) -> np.ndarray:
         values = self._values
         arr = np.asarray(values, dtype=dtype)
         if astype_is_view(values.dtype, arr.dtype) and self._mgr.is_single_block:

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -912,7 +912,7 @@ def __len__(self) -> int:
         """
         return len(self._data)
 
-    def __array__(self, dtype=None) -> np.ndarray:
+    def __array__(self, dtype=None, copy=None) -> np.ndarray:
         """
         The array interface, return my values.
         """

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -770,7 +770,7 @@ def _values(self) -> np.ndarray:
             ):
                 vals = vals.astype(object)
 
-            array_vals = np.array(vals, copy=False)
+            array_vals = np.asarray(vals)
             array_vals = algos.take_nd(array_vals, codes, fill_value=index._na_value)
             values.append(array_vals)
 
@@ -1330,7 +1330,7 @@ def copy(  # type: ignore[override]
             new_index._id = self._id
         return new_index
 
-    def __array__(self, dtype=None) -> np.ndarray:
+    def __array__(self, dtype=None, copy=None) -> np.ndarray:
         """the array interface, return my values"""
         return self.values
 
@@ -3357,7 +3357,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
                     locs = (level_codes >= idx.start) & (level_codes < idx.stop)
                     return locs
 
-                locs = np.array(level_codes == idx, dtype=bool, copy=False)
+                locs = np.asarray(level_codes == idx, dtype=bool)
 
                 if not locs.any():
                     # The label is present in self.levels[level] but unused:

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -1824,6 +1824,8 @@ def as_array(
                     na_value=na_value,
                     copy=copy,
                 ).reshape(blk.shape)
+            elif not copy:
+                arr = np.asarray(blk.values, dtype=dtype)
             else:
                 arr = np.array(blk.values, dtype=dtype, copy=copy)
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -789,7 +789,9 @@ def __len__(self) -> int:
 
     # ----------------------------------------------------------------------
     # NDArray Compat
-    def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
+    def __array__(
+        self, dtype: npt.DTypeLike | None = None, copy: bool | None = None
+    ) -> np.ndarray:
         """
         Return the values as a NumPy array.
 
@@ -802,6 +804,9 @@ def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
             The dtype to use for the resulting NumPy array. By default,
             the dtype is inferred from the data.
 
+        copy : bool or None, optional
+            Unused.
+
         Returns
         -------
         numpy.ndarray

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -4043,7 +4043,7 @@ def _create_axes(
             if isinstance(data_converted.dtype, CategoricalDtype):
                 ordered = data_converted.ordered
                 meta = "category"
-                metadata = np.array(data_converted.categories, copy=False).ravel()
+                metadata = np.asarray(data_converted.categories).ravel()
 
             data, dtype_name = _get_data_and_dtype_name(data_converted)
 

diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py
@@ -197,6 +197,7 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
             "Addition/subtraction of integers and integer-arrays with Timestamp",
             "has no kernel",
             "not implemented",
+            "The 'out' kwarg is necessary. Use numpy.strings.multiply without it.",
         ]
     )
     with pytest.raises(errs, match=msg):