Skip to content

Commit

Permalink
fix: pandas and double nested vectors issue 885 (#912)
Browse files Browse the repository at this point in the history
* fix: pandas and double nested vectors issue 885

* style: pre-commit fixes

* Rename test file to match PR number.

* Fix ROOT import.

* Fix order of operations.

* Fix order of operations.2

* style: pre-commit fixes

* Change the logic in pandas finalize.

* style: pre-commit fixes

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
ioanaif and pre-commit-ci[bot] authored Jul 7, 2023
1 parent b96ae44 commit a8644df
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 10 deletions.
14 changes: 8 additions & 6 deletions src/uproot/interpretation/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,14 +841,16 @@ def finalize(self, array, branch, interpretation, entry_start, entry_stop, optio
):
return pandas.Series(array, index=index)
else:
awkward_pandas = uproot.extras.awkward_pandas()
ak_lib = _libraries[Awkward.name]
ak_arr = ak_lib.finalize(
array = _libraries[Awkward.name].finalize(
array, branch, interpretation, entry_start, entry_stop, options
)
return pandas.Series(
awkward_pandas.AwkwardExtensionArray(ak_arr), index=index
)
if isinstance(
array.type.content, uproot.extras.awkward().types.NumpyType
) and array.layout.minmax_depth == (1, 1):
array = array.to_numpy()
else:
array = uproot.extras.awkward_pandas().AwkwardExtensionArray(array)
return pandas.Series(array, index=index)

def group(self, arrays, expression_context, how):
pandas = self.imported
Expand Down
19 changes: 16 additions & 3 deletions src/uproot/interpretation/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,15 @@ def basket_array(
)
assert basket.byte_offsets is not None

if self._forth and isinstance(library, uproot.interpretation.library.Awkward):
if self._forth and (
isinstance(
library,
(
uproot.interpretation.library.Awkward,
uproot.interpretation.library.Pandas,
),
)
):
output = self.basket_array_forth(
data,
byte_offsets,
Expand Down Expand Up @@ -403,9 +411,14 @@ def final_array(
output = numpy.array([], dtype=self.numpy_dtype)
elif all(
uproot._util.from_module(x, "awkward") for x in basket_arrays.values()
) and isinstance(
library,
(
uproot.interpretation.library.Awkward,
uproot.interpretation.library.Pandas,
),
):
assert isinstance(library, uproot.interpretation.library.Awkward)
awkward = library.imported
awkward = uproot.extras.awkward()
output = awkward.concatenate(trimmed, mergebool=False, highlevel=False)
else:
output = numpy.concatenate(trimmed)
Expand Down
3 changes: 2 additions & 1 deletion tests/test_0910-fix_906_members_non_numerical_branches.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import uproot
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE

import uproot
from skhep_testdata import data_path


Expand Down
54 changes: 54 additions & 0 deletions tests/test_0912-fix-pandas-and-double-nested-vectors-issue-885.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE

import uproot
import pytest
import skhep_testdata, os
import numpy as np
import awkward as ak

ROOT = pytest.importorskip("ROOT")


def test_pandas_and_double_nested_vectors_issue_885(tmp_path):
filename = os.path.join(
tmp_path, "uproot_test_pandas_and_double_nested_vectors.root"
)
f = ROOT.TFile(filename, "recreate")
t = ROOT.TTree("mytree", "example tree")

vec1 = ROOT.std.vector("double")()
vec2 = ROOT.std.vector("double")()
vec_vec = ROOT.std.vector(ROOT.std.vector("double"))()

for i in range(3):
vec1.push_back(i)
for i in range(5):
vec2.push_back(i)

vec_vec.push_back(vec1)
vec_vec.push_back(vec2)

a = np.array([1, 2, 3, 4], dtype=np.uint32)
avec = ROOT.std.vector("unsigned int")(a)

b = np.array([[[0, 1, 3], [4, 5, 6], [7, 8, 9]]], dtype=np.uint32)
bvec = ROOT.std.vector("unsigned int")(b)

t.Branch("2Dvector", vec_vec)
t.Branch("1Dvector", avec)
t.Branch("othervector", bvec)

nentries = 25
for i in range(nentries):
t.Fill()

f.Write()

with uproot.open(filename)["mytree"] as fs:
u = fs.arrays(["2Dvector", "1Dvector", "othervector"], library="pd")
assert isinstance(u["2Dvector"][0], ak.highlevel.Array)
assert isinstance(u["1Dvector"][0], ak.highlevel.Array)
assert isinstance(u["othervector"][0], ak.highlevel.Array)
assert ak.to_list(u["2Dvector"][0]) == [[0, 1, 2], [0, 1, 2, 3, 4]]
assert ak.to_list(u["1Dvector"][0]) == [1, 2, 3, 4]
assert ak.to_list(u["othervector"][0]) == [0, 1, 3, 4, 5, 6, 7, 8, 9]

0 comments on commit a8644df

Please sign in to comment.