Skip to content

Commit

Permalink
REF: Move methods in core/reshape/util.py to where they are used (#59172
Browse files Browse the repository at this point in the history
)

* Move methods in core/reshape/util.py to where they are used

* Remove unit tests
  • Loading branch information
mroeschke authored Jul 9, 2024
1 parent 9eaa4bc commit 3a34e07
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 112 deletions.
58 changes: 57 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,6 @@ def from_product(
(2, 'purple')],
names=['number', 'color'])
"""
from pandas.core.reshape.util import cartesian_product

if not is_list_like(iterables):
raise TypeError("Input must be a list / sequence of iterables.")
Expand Down Expand Up @@ -4105,3 +4104,60 @@ def _require_listlike(level, arr, arrname: str):
if not is_list_like(arr) or not is_list_like(arr[0]):
raise TypeError(f"{arrname} must be list of lists-like")
return level, arr


def cartesian_product(X: list[np.ndarray]) -> list[np.ndarray]:
"""
Numpy version of itertools.product.
Sometimes faster (for large inputs)...
Parameters
----------
X : list-like of list-likes
Returns
-------
product : list of ndarrays
Examples
--------
>>> cartesian_product([list("ABC"), [1, 2]])
[array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='<U1'), array([1, 2, 1, 2, 1, 2])]
See Also
--------
itertools.product : Cartesian product of input iterables. Equivalent to
nested for-loops.
"""
msg = "Input must be a list-like of list-likes"
if not is_list_like(X):
raise TypeError(msg)
for x in X:
if not is_list_like(x):
raise TypeError(msg)

if len(X) == 0:
return []

lenX = np.fromiter((len(x) for x in X), dtype=np.intp)
cumprodX = np.cumprod(lenX)

if np.any(cumprodX < 0):
raise ValueError("Product space too large to allocate arrays!")

a = np.roll(cumprodX, 1)
a[0] = 1

if cumprodX[-1] != 0:
b = cumprodX[-1] / cumprodX
else:
# if any factor is empty, the cartesian product is empty
b = np.zeros_like(cumprodX)

return [
np.tile(
np.repeat(x, b[i]),
np.prod(a[i]),
)
for i, x in enumerate(X)
]
4 changes: 2 additions & 2 deletions pandas/core/reshape/melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import pandas.core.algorithms as algos
from pandas.core.indexes.api import MultiIndex
from pandas.core.reshape.concat import concat
from pandas.core.reshape.util import tile_compat
from pandas.core.tools.numeric import to_numeric

if TYPE_CHECKING:
Expand Down Expand Up @@ -266,7 +265,8 @@ def melt(
result = frame._constructor(mdata, columns=mcolumns)

if not ignore_index:
result.index = tile_compat(frame.index, num_cols_adjusted)
taker = np.tile(np.arange(len(frame)), num_cols_adjusted)
result.index = frame.index.take(taker)

return result

Expand Down
9 changes: 2 additions & 7 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
get_objs_combined_axis,
)
from pandas.core.reshape.concat import concat
from pandas.core.reshape.util import cartesian_product
from pandas.core.series import Series

if TYPE_CHECKING:
Expand Down Expand Up @@ -358,15 +357,11 @@ def __internal_pivot_table(

if not dropna:
if isinstance(table.index, MultiIndex):
m = MultiIndex.from_arrays(
cartesian_product(table.index.levels), names=table.index.names
)
m = MultiIndex.from_product(table.index.levels, names=table.index.names)
table = table.reindex(m, axis=0, fill_value=fill_value)

if isinstance(table.columns, MultiIndex):
m = MultiIndex.from_arrays(
cartesian_product(table.columns.levels), names=table.columns.names
)
m = MultiIndex.from_product(table.columns.levels, names=table.columns.names)
table = table.reindex(m, axis=1, fill_value=fill_value)

if sort is True and isinstance(table, ABCDataFrame):
Expand Down
85 changes: 0 additions & 85 deletions pandas/core/reshape/util.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
date_range,
)
import pandas._testing as tm
from pandas.core.reshape.util import cartesian_product
from pandas.core.indexes.multi import cartesian_product


class TestCartesianProduct:
Expand All @@ -28,22 +28,6 @@ def test_datetimeindex(self):
tm.assert_index_equal(result1, expected1)
tm.assert_index_equal(result2, expected2)

def test_tzaware_retained(self):
x = date_range("2000-01-01", periods=2, tz="US/Pacific")
y = np.array([3, 4])
result1, result2 = cartesian_product([x, y])

expected = x.repeat(2)
tm.assert_index_equal(result1, expected)

def test_tzaware_retained_categorical(self):
x = date_range("2000-01-01", periods=2, tz="US/Pacific").astype("category")
y = np.array([3, 4])
result1, result2 = cartesian_product([x, y])

expected = x.repeat(2)
tm.assert_index_equal(result1, expected)

@pytest.mark.parametrize("x, y", [[[], []], [[0, 1], []], [[], ["a", "b", "c"]]])
def test_empty(self, x, y):
# product of empty factors
Expand Down

0 comments on commit 3a34e07

Please sign in to comment.