From 1a96e4cca188f4e0500a87c391ef105b49a42288 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 27 Aug 2024 18:57:48 -1000 Subject: [PATCH] Fix loc/iloc.__setitem__[:, loc] with non cupy types (#16677) Discovered in https://github.com/rapidsai/cudf/pull/16652, `DataFrame.iloc/loc.__setitem__` with a non-cupy type e.g. `"category"` failed because the indexing path unconditionally tries to `cupy.asarray` the value to be set which only accepts types recognized by cupy. We can skip this `asarray` if we have a numpy/pandas/cudf object Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/16677 --- python/cudf/cudf/core/dataframe.py | 10 ++++++---- python/cudf/cudf/tests/test_indexing.py | 10 ++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 14b63c2b0d7..d54a800aedf 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -414,8 +414,9 @@ def _setitem_tuple_arg(self, key, value): ) else: - value = cupy.asarray(value) - if value.ndim == 2: + if not is_column_like(value): + value = cupy.asarray(value) + if getattr(value, "ndim", 1) == 2: # If the inner dimension is 1, it's broadcastable to # all columns of the dataframe. indexed_shape = columns_df.loc[key[0]].shape @@ -558,8 +559,9 @@ def _setitem_tuple_arg(self, key, value): else: # TODO: consolidate code path with identical counterpart # in `_DataFrameLocIndexer._setitem_tuple_arg` - value = cupy.asarray(value) - if value.ndim == 2: + if not is_column_like(value): + value = cupy.asarray(value) + if getattr(value, "ndim", 1) == 2: indexed_shape = columns_df.iloc[key[0]].shape if value.shape[1] == 1: if value.shape[0] != indexed_shape[0]: diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index 716b4dc6acd..9df2852dde8 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -2369,3 +2369,13 @@ def test_duplicate_labels_raises(): df[["a", "a"]] with pytest.raises(ValueError): df.loc[:, ["a", "a"]] + + +@pytest.mark.parametrize("indexer", ["iloc", "loc"]) +@pytest.mark.parametrize("dtype", ["category", "timedelta64[ns]"]) +def test_loc_iloc_setitem_col_slice_non_cupy_types(indexer, dtype): + df_pd = pd.DataFrame(range(2), dtype=dtype) + df_cudf = cudf.DataFrame.from_pandas(df_pd) + getattr(df_pd, indexer)[:, 0] = getattr(df_pd, indexer)[:, 0] + getattr(df_cudf, indexer)[:, 0] = getattr(df_cudf, indexer)[:, 0] + assert_eq(df_pd, df_cudf)