TST: Clean tests that constuct Index equivalent to RangeIndexes (#57441)

* API: Check index and column classess exactly by default * Add a todo * Change test for expected behavior * add ignore index check * ignore column checking for some test * Ignore index checking for test_concat_all_na_block * Ignore adjust some tests * Fix another test * Adjust more tests * Fix more tests * Adjust more tests * adjust another test * Adjust more tests * Adjust test * Adjust test * Adjust test * Fix more tests * Fix more tests * Fix more tests * Fix tests * Adjust more tests * Adjust more tests * Fix some tests * Adjust tests * Fix test * Fix more test * Adjust more tests * Undo some strictness checking * update tests * Adjust more tests * Another test * Adjust more tests * fix another test * Fix test * Fix another test * fix more test * More indexes * Undo assert_ functions for strict checking * Fix tests
pandas-dev · Jul 24, 2024 · f732749 · f732749
1 parent 1afc7a3
commit f732749
Show file tree

Hide file tree

Showing 47 changed files with 235 additions and 189 deletions.
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
@@ -368,18 +368,18 @@ def test_apply_mixed_dtype_corner():
     result = df[:0].apply(np.mean, axis=1)
     # the result here is actually kind of ambiguous, should it be a Series
     # or a DataFrame?
-    expected = Series(np.nan, index=pd.Index([], dtype="int64"))
+    expected = Series(dtype=np.float64)
     tm.assert_series_equal(result, expected)
 
 
 def test_apply_mixed_dtype_corner_indexing():
     df = DataFrame({"A": ["foo"], "B": [1.0]})
     result = df.apply(lambda x: x["A"], axis=1)
-    expected = Series(["foo"], index=[0])
+    expected = Series(["foo"], index=range(1))
     tm.assert_series_equal(result, expected)
 
     result = df.apply(lambda x: x["B"], axis=1)
-    expected = Series([1.0], index=[0])
+    expected = Series([1.0], index=range(1))
     tm.assert_series_equal(result, expected)
 
 
@@ -1037,7 +1037,7 @@ def test_result_type(int_frame_const_col):
 
     result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand")
     expected = df.copy()
-    expected.columns = [0, 1, 2]
+    expected.columns = range(3)
     tm.assert_frame_equal(result, expected)
 
 
@@ -1047,7 +1047,7 @@ def test_result_type_shorter_list(int_frame_const_col):
     df = int_frame_const_col
     result = df.apply(lambda x: [1, 2], axis=1, result_type="expand")
     expected = df[["A", "B"]].copy()
-    expected.columns = [0, 1]
+    expected.columns = range(2)
     tm.assert_frame_equal(result, expected)
 
 

diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
@@ -1451,7 +1451,7 @@ def test_fill_value_inf_masking():
     expected = pd.DataFrame(
         {"A": [np.inf, 1.0, 0.0, 1.0], "B": [0.0, np.nan, 0.0, np.nan]}
     )
-    tm.assert_frame_equal(result, expected)
+    tm.assert_frame_equal(result, expected, check_index_type=False)
 
 
 def test_dataframe_div_silenced():

diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
@@ -1800,7 +1800,7 @@ def test_numexpr_option_incompatible_op():
             {"A": [True, False, True, False, None, None], "B": [1, 2, 3, 4, 5, 6]}
         )
         result = df.query("A.isnull()")
-        expected = DataFrame({"A": [None, None], "B": [5, 6]}, index=[4, 5])
+        expected = DataFrame({"A": [None, None], "B": [5, 6]}, index=range(4, 6))
         tm.assert_frame_equal(result, expected)
 
 

diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py
@@ -408,7 +408,7 @@ def test_take_series(self, data):
         result = s.take([0, -1])
         expected = pd.Series(
             data._from_sequence([data[0], data[len(data) - 1]], dtype=s.dtype),
-            index=[0, len(data) - 1],
+            index=range(0, 198, 99),
         )
         tm.assert_series_equal(result, expected)
 
@@ -428,7 +428,8 @@ def test_reindex(self, data, na_value):
 
         result = s.reindex([n, n + 1])
         expected = pd.Series(
-            data._from_sequence([na_value, na_value], dtype=s.dtype), index=[n, n + 1]
+            data._from_sequence([na_value, na_value], dtype=s.dtype),
+            index=range(n, n + 2, 1),
         )
         tm.assert_series_equal(result, expected)
 

diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
@@ -33,8 +33,8 @@ def test_concat(self, data, in_frame):
 
     @pytest.mark.parametrize("in_frame", [True, False])
     def test_concat_all_na_block(self, data_missing, in_frame):
-        valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1])
-        na_block = pd.Series(data_missing.take([0, 0]), index=[2, 3])
+        valid_block = pd.Series(data_missing.take([1, 1]), index=range(2))
+        na_block = pd.Series(data_missing.take([0, 0]), index=range(2, 4))
         if in_frame:
             valid_block = pd.DataFrame({"a": valid_block})
             na_block = pd.DataFrame({"a": na_block})

diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
@@ -374,7 +374,7 @@ def test_setitem_preserves_views(self, data):
 
     def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
         # https://github.com/pandas-dev/pandas/issues/32395
-        df = expected = pd.DataFrame({0: pd.Series(data)})
+        df = expected = pd.DataFrame(pd.Series(data))
         result = pd.DataFrame(index=df.index)
 
         key = full_indexer(df)

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
@@ -991,7 +991,7 @@ def test_single_element_ix_dont_upcast(self, float_frame):
         result = df.loc[0, "b"]
         assert is_integer(result)
 
-        expected = Series([666], [0], name="b")
+        expected = Series([666], index=range(1), name="b")
         result = df.loc[[0], "b"]
         tm.assert_series_equal(result, expected)
 
@@ -1193,7 +1193,7 @@ def test_type_error_multiindex(self):
         # See gh-12218
         mi = MultiIndex.from_product([["x", "y"], [0, 1]], names=[None, "c"])
         dg = DataFrame(
-            [[1, 1, 2, 2], [3, 3, 4, 4]], columns=mi, index=Index([0, 1], name="i")
+            [[1, 1, 2, 2], [3, 3, 4, 4]], columns=mi, index=Index(range(2), name="i")
         )
         with pytest.raises(InvalidIndexError, match="slice"):
             dg[:, 0]
@@ -1452,7 +1452,7 @@ def test_iloc_ea_series_indexer(self):
         indexer = Series([0, 1], dtype="Int64")
         row_indexer = Series([1], dtype="Int64")
         result = df.iloc[row_indexer, indexer]
-        expected = DataFrame([[5, 6]], index=[1])
+        expected = DataFrame([[5, 6]], index=range(1, 2))
         tm.assert_frame_equal(result, expected)
 
         result = df.iloc[row_indexer.values, indexer.values]

diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
@@ -165,7 +165,7 @@ def test_setitem_timestamp_empty_columns(self):
         df["now"] = Timestamp("20130101", tz="UTC")
 
         expected = DataFrame(
-            [[Timestamp("20130101", tz="UTC")]] * 3, index=[0, 1, 2], columns=["now"]
+            [[Timestamp("20130101", tz="UTC")]] * 3, index=range(3), columns=["now"]
         )
         tm.assert_frame_equal(df, expected)
 

diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py
@@ -21,15 +21,15 @@ def test_compare_axis(align_axis):
     result = df.compare(df2, align_axis=align_axis)
 
     if align_axis in (1, "columns"):
-        indices = pd.Index([0, 2])
+        indices = pd.RangeIndex(0, 4, 2)
         columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]])
         expected = pd.DataFrame(
             [["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, 4.0]],
             index=indices,
             columns=columns,
         )
     else:
-        indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
+        indices = pd.MultiIndex.from_product([range(0, 4, 2), ["self", "other"]])
         columns = pd.Index(["col1", "col3"])
         expected = pd.DataFrame(
             [["a", np.nan], ["c", np.nan], [np.nan, 3.0], [np.nan, 4.0]],
@@ -60,7 +60,7 @@ def test_compare_various_formats(keep_shape, keep_equal):
     result = df.compare(df2, keep_shape=keep_shape, keep_equal=keep_equal)
 
     if keep_shape:
-        indices = pd.Index([0, 1, 2])
+        indices = pd.RangeIndex(3)
         columns = pd.MultiIndex.from_product(
             [["col1", "col2", "col3"], ["self", "other"]]
         )
@@ -85,7 +85,7 @@ def test_compare_various_formats(keep_shape, keep_equal):
                 columns=columns,
             )
     else:
-        indices = pd.Index([0, 2])
+        indices = pd.RangeIndex(0, 4, 2)
         columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]])
         expected = pd.DataFrame(
             [["a", "c", 1.0, 1.0], ["c", "c", 3.0, 4.0]], index=indices, columns=columns
@@ -203,6 +203,7 @@ def test_compare_result_names():
         },
     )
     result = df1.compare(df2, result_names=("left", "right"))
+    result.index = pd.Index([0, 2])
     expected = pd.DataFrame(
         {
             ("col1", "left"): {0: "a", 2: np.nan},

diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py
@@ -411,10 +411,15 @@ def test_drop_duplicates_inplace():
 @pytest.mark.parametrize(
     "origin_dict, output_dict, ignore_index, output_index",
     [
-        ({"A": [2, 2, 3]}, {"A": [2, 3]}, True, [0, 1]),
-        ({"A": [2, 2, 3]}, {"A": [2, 3]}, False, [0, 2]),
-        ({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, True, [0, 1]),
-        ({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, False, [0, 2]),
+        ({"A": [2, 2, 3]}, {"A": [2, 3]}, True, range(2)),
+        ({"A": [2, 2, 3]}, {"A": [2, 3]}, False, range(0, 4, 2)),
+        ({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, True, range(2)),
+        (
+            {"A": [2, 2, 3], "B": [2, 2, 4]},
+            {"A": [2, 3], "B": [2, 4]},
+            False,
+            range(0, 4, 2),
+        ),
     ],
 )
 def test_drop_duplicates_ignore_index(

diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py
@@ -195,7 +195,7 @@ def test_dropna_tz_aware_datetime(self):
         # Ex2
         df = DataFrame({"Time": [dt1, None, np.nan, dt2]})
         result = df.dropna(axis=0)
-        expected = DataFrame([dt1, dt2], columns=["Time"], index=[0, 3])
+        expected = DataFrame([dt1, dt2], columns=["Time"], index=range(0, 6, 3))
         tm.assert_frame_equal(result, expected)
 
     def test_dropna_categorical_interval_index(self):
@@ -233,7 +233,7 @@ def test_set_single_column_subset(self):
         # GH 41021
         df = DataFrame({"A": [1, 2, 3], "B": list("abc"), "C": [4, np.nan, 5]})
         expected = DataFrame(
-            {"A": [1, 3], "B": list("ac"), "C": [4.0, 5.0]}, index=[0, 2]
+            {"A": [1, 3], "B": list("ac"), "C": [4.0, 5.0]}, index=range(0, 4, 2)
         )
         result = df.dropna(subset="C")
         tm.assert_frame_equal(result, expected)

diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py
@@ -210,7 +210,7 @@ def test_ignore_index():
     df = pd.DataFrame({"id": range(0, 20, 10), "values": [list("ab"), list("cd")]})
     result = df.explode("values", ignore_index=True)
     expected = pd.DataFrame(
-        {"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3]
+        {"id": [0, 0, 10, 10], "values": list("abcd")}, index=range(4)
     )
     tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py
@@ -82,6 +82,7 @@ def test_nlargest_n(self, nselect_method, n, order):
         else:
             ascending = nselect_method == "nsmallest"
             result = getattr(df, nselect_method)(n, order)
+            result.index = pd.Index(list(result.index))
             expected = df.sort_values(order, ascending=ascending).head(n)
             tm.assert_frame_equal(result, expected)
 
@@ -132,7 +133,7 @@ def test_nlargest_n_identical_values(self):
         df = pd.DataFrame({"a": [1] * 5, "b": [1, 2, 3, 4, 5]})
 
         result = df.nlargest(3, "a")
-        expected = pd.DataFrame({"a": [1] * 3, "b": [1, 2, 3]}, index=[0, 1, 2])
+        expected = pd.DataFrame({"a": [1] * 3, "b": [1, 2, 3]}, index=range(3))
         tm.assert_frame_equal(result, expected)
 
         result = df.nsmallest(3, "a")
@@ -179,18 +180,20 @@ def test_nlargest_duplicate_keep_all_ties(self):
         result = df.nlargest(4, "a", keep="all")
         expected = pd.DataFrame(
             {
-                "a": {0: 5, 1: 4, 2: 4, 4: 3, 5: 3, 6: 3, 7: 3},
-                "b": {0: 10, 1: 9, 2: 8, 4: 5, 5: 50, 6: 10, 7: 20},
-            }
+                "a": [5, 4, 4, 3, 3, 3, 3],
+                "b": [10, 9, 8, 5, 50, 10, 20],
+            },
+            index=[0, 1, 2, 4, 5, 6, 7],
         )
         tm.assert_frame_equal(result, expected)
 
         result = df.nsmallest(2, "a", keep="all")
         expected = pd.DataFrame(
             {
-                "a": {3: 2, 4: 3, 5: 3, 6: 3, 7: 3},
-                "b": {3: 7, 4: 5, 5: 50, 6: 10, 7: 20},
-            }
+                "a": [2, 3, 3, 3, 3],
+                "b": [7, 5, 50, 10, 20],
+            },
+            index=range(3, 8),
         )
         tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py
@@ -127,7 +127,7 @@ def test_axis_numeric_only_true(self, interp_method):
         result = df.quantile(
             0.5, axis=1, numeric_only=True, interpolation=interpolation, method=method
         )
-        expected = Series([3.0, 4.0], index=[0, 1], name=0.5)
+        expected = Series([3.0, 4.0], index=range(2), name=0.5)
         if interpolation == "nearest":
             expected = expected.astype(np.int64)
         tm.assert_series_equal(result, expected)

diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py
@@ -170,7 +170,7 @@ def test_sort_values_multicolumn_uint64(self):
                 "a": pd.Series([18446637057563306014, 1162265347240853609]),
                 "b": pd.Series([1, 2]),
             },
-            index=pd.Index([1, 0]),
+            index=range(1, -1, -1),
         )
 
         tm.assert_frame_equal(result, expected)
@@ -360,7 +360,7 @@ def test_sort_values_nat_values_in_int_column(self):
         df_reversed = DataFrame(
             {"int": int_values[::-1], "float": float_values[::-1]},
             columns=["int", "float"],
-            index=[1, 0],
+            index=range(1, -1, -1),
         )
 
         # NaT is not a "na" for int64 columns, so na_position must not
@@ -385,7 +385,7 @@ def test_sort_values_nat_values_in_int_column(self):
         df_reversed = DataFrame(
             {"datetime": [NaT, Timestamp("2016-01-01")], "float": float_values[::-1]},
             columns=["datetime", "float"],
-            index=[1, 0],
+            index=range(1, -1, -1),
         )
 
         df_sorted = df.sort_values(["datetime", "float"], na_position="first")
@@ -540,19 +540,19 @@ def test_sort_values_na_position_with_categories_raises(self):
     @pytest.mark.parametrize(
         "original_dict, sorted_dict, ignore_index, output_index",
         [
-            ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, [0, 1, 2]),
-            ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, [2, 1, 0]),
+            ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, range(3)),
+            ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, range(2, -1, -1)),
             (
                 {"A": [1, 2, 3], "B": [2, 3, 4]},
                 {"A": [3, 2, 1], "B": [4, 3, 2]},
                 True,
-                [0, 1, 2],
+                range(3),
             ),
             (
                 {"A": [1, 2, 3], "B": [2, 3, 4]},
                 {"A": [3, 2, 1], "B": [4, 3, 2]},
                 False,
-                [2, 1, 0],
+                range(2, -1, -1),
             ),
         ],
     )

diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py
@@ -25,6 +25,7 @@ def test_transpose_td64_intervals(self):
         df = DataFrame(ii)
 
         result = df.T
+        result.columns = Index(list(range(len(ii))))
         expected = DataFrame({i: ii[i : i + 1] for i in range(len(ii))})
         tm.assert_frame_equal(result, expected)
 
@@ -153,7 +154,6 @@ def test_transpose_not_inferring_dt(self):
         result = df.T
         expected = DataFrame(
             [[Timestamp("2019-12-31"), Timestamp("2019-12-31")]],
-            columns=[0, 1],
             index=["a"],
             dtype=object,
         )
@@ -175,7 +175,6 @@ def test_transpose_not_inferring_dt_mixed_blocks(self):
                 [Timestamp("2019-12-31"), Timestamp("2019-12-31")],
                 [Timestamp("2019-12-31"), Timestamp("2019-12-31")],
             ],
-            columns=[0, 1],
             index=["a", "b"],
             dtype=object,
         )