Skip to content

Commit

Permalink
PERF: Return RangeIndex from RangeIndex.reindex when possible (pandas…
Browse files Browse the repository at this point in the history
…-dev#57647)

* PERF: Return RangeIndex from RangeIndex.reindex when possible

* Add whatsnew number

* Only if index

* add name

* Skip for type self, undo test

* Use intp

* merge

* Add test for Index return
  • Loading branch information
mroeschke authored and pmhatre1 committed May 7, 2024
1 parent 4c01d71 commit d45b8ef
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 5 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ Performance improvements
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
- Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`)
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`)
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`)
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
doc,
)

from pandas.core.dtypes import missing
from pandas.core.dtypes.base import ExtensionDtype
from pandas.core.dtypes.common import (
ensure_platform_int,
Expand Down Expand Up @@ -475,7 +476,7 @@ def _shallow_copy(self, values, name: Hashable = no_default):
# GH 46675 & 43885: If values is equally spaced, return a
# more memory-compact RangeIndex instead of Index with 64-bit dtype
diff = values[1] - values[0]
if diff != 0:
if not missing.isna(diff) and diff != 0:
maybe_range_indexer, remainder = np.divmod(values - values[0], diff)
if (
lib.is_range_indexer(maybe_range_indexer, len(maybe_range_indexer))
Expand All @@ -490,6 +491,11 @@ def _view(self) -> Self:
result._cache = self._cache
return result

def _wrap_reindex_result(self, target, indexer, preserve_names: bool):
if not isinstance(target, type(self)) and target.dtype.kind == "i":
target = self._shallow_copy(target._values, name=target.name)
return super()._wrap_reindex_result(target, indexer, preserve_names)

@doc(Index.copy)
def copy(self, name: Hashable | None = None, deep: bool = False) -> Self:
name = self._validate_names(name=name, deep=deep)[0]
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/indexes/ranges/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,26 @@ def test_range_index_rsub_by_const(self):
tm.assert_index_equal(result, expected)


def test_reindex_returns_rangeindex():
ri = RangeIndex(2, name="foo")
result, result_indexer = ri.reindex([1, 2, 3])
expected = RangeIndex(1, 4, name="foo")
tm.assert_index_equal(result, expected, exact=True)

expected_indexer = np.array([1, -1, -1], dtype=np.intp)
tm.assert_numpy_array_equal(result_indexer, expected_indexer)


def test_reindex_returns_index():
ri = RangeIndex(4, name="foo")
result, result_indexer = ri.reindex([0, 1, 3])
expected = Index([0, 1, 3], name="foo")
tm.assert_index_equal(result, expected, exact=True)

expected_indexer = np.array([0, 1, 3], dtype=np.intp)
tm.assert_numpy_array_equal(result_indexer, expected_indexer)


def test_take_return_rangeindex():
ri = RangeIndex(5, name="foo")
result = ri.take([])
Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1204,10 +1204,7 @@ def test_loc_setitem_empty_append_raises(self):
data = [1, 2]
df = DataFrame(columns=["x", "y"])
df.index = df.index.astype(np.int64)
msg = (
rf"None of \[Index\(\[0, 1\], dtype='{np.dtype(int)}'\)\] "
r"are in the \[index\]"
)
msg = r"None of .*Index.* are in the \[index\]"
with pytest.raises(KeyError, match=msg):
df.loc[[0, 1], "x"] = data

Expand Down

0 comments on commit d45b8ef

Please sign in to comment.