Skip to content

Commit

Permalink
TST: Clean tests that constuct Index equivalent to RangeIndexes (#57441)
Browse files Browse the repository at this point in the history
* API: Check index and column classess exactly by default

* Add a todo

* Change test for expected behavior

* add ignore index check

* ignore column checking for some test

* Ignore index checking for test_concat_all_na_block

* Ignore adjust some tests

* Fix another test

* Adjust more tests

* Fix more tests

* Adjust more tests

* adjust another test

* Adjust more tests

* Adjust test

* Adjust test

* Adjust test

* Fix more tests

* Fix more tests

* Fix more tests

* Fix tests

* Adjust more tests

* Adjust more tests

* Fix some tests

* Adjust tests

* Fix test

* Fix more test

* Adjust more tests

* Undo some strictness checking

* update tests

* Adjust more tests

* Another test

* Adjust more tests

* fix another test

* Fix test

* Fix another test

* fix more test

* More indexes

* Undo assert_ functions for strict checking

* Fix tests
  • Loading branch information
mroeschke authored Jul 24, 2024
1 parent 1afc7a3 commit f732749
Show file tree
Hide file tree
Showing 47 changed files with 235 additions and 189 deletions.
10 changes: 5 additions & 5 deletions pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,18 +368,18 @@ def test_apply_mixed_dtype_corner():
result = df[:0].apply(np.mean, axis=1)
# the result here is actually kind of ambiguous, should it be a Series
# or a DataFrame?
expected = Series(np.nan, index=pd.Index([], dtype="int64"))
expected = Series(dtype=np.float64)
tm.assert_series_equal(result, expected)


def test_apply_mixed_dtype_corner_indexing():
df = DataFrame({"A": ["foo"], "B": [1.0]})
result = df.apply(lambda x: x["A"], axis=1)
expected = Series(["foo"], index=[0])
expected = Series(["foo"], index=range(1))
tm.assert_series_equal(result, expected)

result = df.apply(lambda x: x["B"], axis=1)
expected = Series([1.0], index=[0])
expected = Series([1.0], index=range(1))
tm.assert_series_equal(result, expected)


Expand Down Expand Up @@ -1037,7 +1037,7 @@ def test_result_type(int_frame_const_col):

result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand")
expected = df.copy()
expected.columns = [0, 1, 2]
expected.columns = range(3)
tm.assert_frame_equal(result, expected)


Expand All @@ -1047,7 +1047,7 @@ def test_result_type_shorter_list(int_frame_const_col):
df = int_frame_const_col
result = df.apply(lambda x: [1, 2], axis=1, result_type="expand")
expected = df[["A", "B"]].copy()
expected.columns = [0, 1]
expected.columns = range(2)
tm.assert_frame_equal(result, expected)


Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -1451,7 +1451,7 @@ def test_fill_value_inf_masking():
expected = pd.DataFrame(
{"A": [np.inf, 1.0, 0.0, 1.0], "B": [0.0, np.nan, 0.0, np.nan]}
)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected, check_index_type=False)


def test_dataframe_div_silenced():
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/computation/test_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1800,7 +1800,7 @@ def test_numexpr_option_incompatible_op():
{"A": [True, False, True, False, None, None], "B": [1, 2, 3, 4, 5, 6]}
)
result = df.query("A.isnull()")
expected = DataFrame({"A": [None, None], "B": [5, 6]}, index=[4, 5])
expected = DataFrame({"A": [None, None], "B": [5, 6]}, index=range(4, 6))
tm.assert_frame_equal(result, expected)


Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/extension/base/getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ def test_take_series(self, data):
result = s.take([0, -1])
expected = pd.Series(
data._from_sequence([data[0], data[len(data) - 1]], dtype=s.dtype),
index=[0, len(data) - 1],
index=range(0, 198, 99),
)
tm.assert_series_equal(result, expected)

Expand All @@ -428,7 +428,8 @@ def test_reindex(self, data, na_value):

result = s.reindex([n, n + 1])
expected = pd.Series(
data._from_sequence([na_value, na_value], dtype=s.dtype), index=[n, n + 1]
data._from_sequence([na_value, na_value], dtype=s.dtype),
index=range(n, n + 2, 1),
)
tm.assert_series_equal(result, expected)

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/base/reshaping.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def test_concat(self, data, in_frame):

@pytest.mark.parametrize("in_frame", [True, False])
def test_concat_all_na_block(self, data_missing, in_frame):
valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1])
na_block = pd.Series(data_missing.take([0, 0]), index=[2, 3])
valid_block = pd.Series(data_missing.take([1, 1]), index=range(2))
na_block = pd.Series(data_missing.take([0, 0]), index=range(2, 4))
if in_frame:
valid_block = pd.DataFrame({"a": valid_block})
na_block = pd.DataFrame({"a": na_block})
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ def test_setitem_preserves_views(self, data):

def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
# https://github.com/pandas-dev/pandas/issues/32395
df = expected = pd.DataFrame({0: pd.Series(data)})
df = expected = pd.DataFrame(pd.Series(data))
result = pd.DataFrame(index=df.index)

key = full_indexer(df)
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -991,7 +991,7 @@ def test_single_element_ix_dont_upcast(self, float_frame):
result = df.loc[0, "b"]
assert is_integer(result)

expected = Series([666], [0], name="b")
expected = Series([666], index=range(1), name="b")
result = df.loc[[0], "b"]
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -1193,7 +1193,7 @@ def test_type_error_multiindex(self):
# See gh-12218
mi = MultiIndex.from_product([["x", "y"], [0, 1]], names=[None, "c"])
dg = DataFrame(
[[1, 1, 2, 2], [3, 3, 4, 4]], columns=mi, index=Index([0, 1], name="i")
[[1, 1, 2, 2], [3, 3, 4, 4]], columns=mi, index=Index(range(2), name="i")
)
with pytest.raises(InvalidIndexError, match="slice"):
dg[:, 0]
Expand Down Expand Up @@ -1452,7 +1452,7 @@ def test_iloc_ea_series_indexer(self):
indexer = Series([0, 1], dtype="Int64")
row_indexer = Series([1], dtype="Int64")
result = df.iloc[row_indexer, indexer]
expected = DataFrame([[5, 6]], index=[1])
expected = DataFrame([[5, 6]], index=range(1, 2))
tm.assert_frame_equal(result, expected)

result = df.iloc[row_indexer.values, indexer.values]
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def test_setitem_timestamp_empty_columns(self):
df["now"] = Timestamp("20130101", tz="UTC")

expected = DataFrame(
[[Timestamp("20130101", tz="UTC")]] * 3, index=[0, 1, 2], columns=["now"]
[[Timestamp("20130101", tz="UTC")]] * 3, index=range(3), columns=["now"]
)
tm.assert_frame_equal(df, expected)

Expand Down
9 changes: 5 additions & 4 deletions pandas/tests/frame/methods/test_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ def test_compare_axis(align_axis):
result = df.compare(df2, align_axis=align_axis)

if align_axis in (1, "columns"):
indices = pd.Index([0, 2])
indices = pd.RangeIndex(0, 4, 2)
columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]])
expected = pd.DataFrame(
[["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, 4.0]],
index=indices,
columns=columns,
)
else:
indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
indices = pd.MultiIndex.from_product([range(0, 4, 2), ["self", "other"]])
columns = pd.Index(["col1", "col3"])
expected = pd.DataFrame(
[["a", np.nan], ["c", np.nan], [np.nan, 3.0], [np.nan, 4.0]],
Expand Down Expand Up @@ -60,7 +60,7 @@ def test_compare_various_formats(keep_shape, keep_equal):
result = df.compare(df2, keep_shape=keep_shape, keep_equal=keep_equal)

if keep_shape:
indices = pd.Index([0, 1, 2])
indices = pd.RangeIndex(3)
columns = pd.MultiIndex.from_product(
[["col1", "col2", "col3"], ["self", "other"]]
)
Expand All @@ -85,7 +85,7 @@ def test_compare_various_formats(keep_shape, keep_equal):
columns=columns,
)
else:
indices = pd.Index([0, 2])
indices = pd.RangeIndex(0, 4, 2)
columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]])
expected = pd.DataFrame(
[["a", "c", 1.0, 1.0], ["c", "c", 3.0, 4.0]], index=indices, columns=columns
Expand Down Expand Up @@ -203,6 +203,7 @@ def test_compare_result_names():
},
)
result = df1.compare(df2, result_names=("left", "right"))
result.index = pd.Index([0, 2])
expected = pd.DataFrame(
{
("col1", "left"): {0: "a", 2: np.nan},
Expand Down
13 changes: 9 additions & 4 deletions pandas/tests/frame/methods/test_drop_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,10 +411,15 @@ def test_drop_duplicates_inplace():
@pytest.mark.parametrize(
"origin_dict, output_dict, ignore_index, output_index",
[
({"A": [2, 2, 3]}, {"A": [2, 3]}, True, [0, 1]),
({"A": [2, 2, 3]}, {"A": [2, 3]}, False, [0, 2]),
({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, True, [0, 1]),
({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, False, [0, 2]),
({"A": [2, 2, 3]}, {"A": [2, 3]}, True, range(2)),
({"A": [2, 2, 3]}, {"A": [2, 3]}, False, range(0, 4, 2)),
({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, True, range(2)),
(
{"A": [2, 2, 3], "B": [2, 2, 4]},
{"A": [2, 3], "B": [2, 4]},
False,
range(0, 4, 2),
),
],
)
def test_drop_duplicates_ignore_index(
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/methods/test_dropna.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def test_dropna_tz_aware_datetime(self):
# Ex2
df = DataFrame({"Time": [dt1, None, np.nan, dt2]})
result = df.dropna(axis=0)
expected = DataFrame([dt1, dt2], columns=["Time"], index=[0, 3])
expected = DataFrame([dt1, dt2], columns=["Time"], index=range(0, 6, 3))
tm.assert_frame_equal(result, expected)

def test_dropna_categorical_interval_index(self):
Expand Down Expand Up @@ -233,7 +233,7 @@ def test_set_single_column_subset(self):
# GH 41021
df = DataFrame({"A": [1, 2, 3], "B": list("abc"), "C": [4, np.nan, 5]})
expected = DataFrame(
{"A": [1, 3], "B": list("ac"), "C": [4.0, 5.0]}, index=[0, 2]
{"A": [1, 3], "B": list("ac"), "C": [4.0, 5.0]}, index=range(0, 4, 2)
)
result = df.dropna(subset="C")
tm.assert_frame_equal(result, expected)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/methods/test_explode.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def test_ignore_index():
df = pd.DataFrame({"id": range(0, 20, 10), "values": [list("ab"), list("cd")]})
result = df.explode("values", ignore_index=True)
expected = pd.DataFrame(
{"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3]
{"id": [0, 0, 10, 10], "values": list("abcd")}, index=range(4)
)
tm.assert_frame_equal(result, expected)

Expand Down
17 changes: 10 additions & 7 deletions pandas/tests/frame/methods/test_nlargest.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def test_nlargest_n(self, nselect_method, n, order):
else:
ascending = nselect_method == "nsmallest"
result = getattr(df, nselect_method)(n, order)
result.index = pd.Index(list(result.index))
expected = df.sort_values(order, ascending=ascending).head(n)
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -132,7 +133,7 @@ def test_nlargest_n_identical_values(self):
df = pd.DataFrame({"a": [1] * 5, "b": [1, 2, 3, 4, 5]})

result = df.nlargest(3, "a")
expected = pd.DataFrame({"a": [1] * 3, "b": [1, 2, 3]}, index=[0, 1, 2])
expected = pd.DataFrame({"a": [1] * 3, "b": [1, 2, 3]}, index=range(3))
tm.assert_frame_equal(result, expected)

result = df.nsmallest(3, "a")
Expand Down Expand Up @@ -179,18 +180,20 @@ def test_nlargest_duplicate_keep_all_ties(self):
result = df.nlargest(4, "a", keep="all")
expected = pd.DataFrame(
{
"a": {0: 5, 1: 4, 2: 4, 4: 3, 5: 3, 6: 3, 7: 3},
"b": {0: 10, 1: 9, 2: 8, 4: 5, 5: 50, 6: 10, 7: 20},
}
"a": [5, 4, 4, 3, 3, 3, 3],
"b": [10, 9, 8, 5, 50, 10, 20],
},
index=[0, 1, 2, 4, 5, 6, 7],
)
tm.assert_frame_equal(result, expected)

result = df.nsmallest(2, "a", keep="all")
expected = pd.DataFrame(
{
"a": {3: 2, 4: 3, 5: 3, 6: 3, 7: 3},
"b": {3: 7, 4: 5, 5: 50, 6: 10, 7: 20},
}
"a": [2, 3, 3, 3, 3],
"b": [7, 5, 50, 10, 20],
},
index=range(3, 8),
)
tm.assert_frame_equal(result, expected)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/methods/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def test_axis_numeric_only_true(self, interp_method):
result = df.quantile(
0.5, axis=1, numeric_only=True, interpolation=interpolation, method=method
)
expected = Series([3.0, 4.0], index=[0, 1], name=0.5)
expected = Series([3.0, 4.0], index=range(2), name=0.5)
if interpolation == "nearest":
expected = expected.astype(np.int64)
tm.assert_series_equal(result, expected)
Expand Down
14 changes: 7 additions & 7 deletions pandas/tests/frame/methods/test_sort_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def test_sort_values_multicolumn_uint64(self):
"a": pd.Series([18446637057563306014, 1162265347240853609]),
"b": pd.Series([1, 2]),
},
index=pd.Index([1, 0]),
index=range(1, -1, -1),
)

tm.assert_frame_equal(result, expected)
Expand Down Expand Up @@ -360,7 +360,7 @@ def test_sort_values_nat_values_in_int_column(self):
df_reversed = DataFrame(
{"int": int_values[::-1], "float": float_values[::-1]},
columns=["int", "float"],
index=[1, 0],
index=range(1, -1, -1),
)

# NaT is not a "na" for int64 columns, so na_position must not
Expand All @@ -385,7 +385,7 @@ def test_sort_values_nat_values_in_int_column(self):
df_reversed = DataFrame(
{"datetime": [NaT, Timestamp("2016-01-01")], "float": float_values[::-1]},
columns=["datetime", "float"],
index=[1, 0],
index=range(1, -1, -1),
)

df_sorted = df.sort_values(["datetime", "float"], na_position="first")
Expand Down Expand Up @@ -540,19 +540,19 @@ def test_sort_values_na_position_with_categories_raises(self):
@pytest.mark.parametrize(
"original_dict, sorted_dict, ignore_index, output_index",
[
({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, [0, 1, 2]),
({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, [2, 1, 0]),
({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, range(3)),
({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, range(2, -1, -1)),
(
{"A": [1, 2, 3], "B": [2, 3, 4]},
{"A": [3, 2, 1], "B": [4, 3, 2]},
True,
[0, 1, 2],
range(3),
),
(
{"A": [1, 2, 3], "B": [2, 3, 4]},
{"A": [3, 2, 1], "B": [4, 3, 2]},
False,
[2, 1, 0],
range(2, -1, -1),
),
],
)
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/frame/methods/test_transpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def test_transpose_td64_intervals(self):
df = DataFrame(ii)

result = df.T
result.columns = Index(list(range(len(ii))))
expected = DataFrame({i: ii[i : i + 1] for i in range(len(ii))})
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -153,7 +154,6 @@ def test_transpose_not_inferring_dt(self):
result = df.T
expected = DataFrame(
[[Timestamp("2019-12-31"), Timestamp("2019-12-31")]],
columns=[0, 1],
index=["a"],
dtype=object,
)
Expand All @@ -175,7 +175,6 @@ def test_transpose_not_inferring_dt_mixed_blocks(self):
[Timestamp("2019-12-31"), Timestamp("2019-12-31")],
[Timestamp("2019-12-31"), Timestamp("2019-12-31")],
],
columns=[0, 1],
index=["a", "b"],
dtype=object,
)
Expand Down
Loading

0 comments on commit f732749

Please sign in to comment.