Skip to content

Commit

Permalink
Fix loc/iloc.__setitem__[:, loc] with non cupy types (#16677)
Browse files Browse the repository at this point in the history
Discovered in #16652, `DataFrame.iloc/loc.__setitem__` with a non-cupy type e.g. `"category"` failed because the indexing path unconditionally tries to `cupy.asarray` the value to be set which only accepts types recognized by cupy.

We can skip this `asarray` if we have a numpy/pandas/cudf object

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #16677
  • Loading branch information
mroeschke authored Aug 28, 2024
1 parent 60f30d8 commit 1a96e4c
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 4 deletions.
10 changes: 6 additions & 4 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,8 +414,9 @@ def _setitem_tuple_arg(self, key, value):
)

else:
value = cupy.asarray(value)
if value.ndim == 2:
if not is_column_like(value):
value = cupy.asarray(value)
if getattr(value, "ndim", 1) == 2:
# If the inner dimension is 1, it's broadcastable to
# all columns of the dataframe.
indexed_shape = columns_df.loc[key[0]].shape
Expand Down Expand Up @@ -558,8 +559,9 @@ def _setitem_tuple_arg(self, key, value):
else:
# TODO: consolidate code path with identical counterpart
# in `_DataFrameLocIndexer._setitem_tuple_arg`
value = cupy.asarray(value)
if value.ndim == 2:
if not is_column_like(value):
value = cupy.asarray(value)
if getattr(value, "ndim", 1) == 2:
indexed_shape = columns_df.iloc[key[0]].shape
if value.shape[1] == 1:
if value.shape[0] != indexed_shape[0]:
Expand Down
10 changes: 10 additions & 0 deletions python/cudf/cudf/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2369,3 +2369,13 @@ def test_duplicate_labels_raises():
df[["a", "a"]]
with pytest.raises(ValueError):
df.loc[:, ["a", "a"]]


@pytest.mark.parametrize("indexer", ["iloc", "loc"])
@pytest.mark.parametrize("dtype", ["category", "timedelta64[ns]"])
def test_loc_iloc_setitem_col_slice_non_cupy_types(indexer, dtype):
df_pd = pd.DataFrame(range(2), dtype=dtype)
df_cudf = cudf.DataFrame.from_pandas(df_pd)
getattr(df_pd, indexer)[:, 0] = getattr(df_pd, indexer)[:, 0]
getattr(df_cudf, indexer)[:, 0] = getattr(df_cudf, indexer)[:, 0]
assert_eq(df_pd, df_cudf)

0 comments on commit 1a96e4c

Please sign in to comment.