diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index 01c2ed3821d7a..2880ecec81f66 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -130,7 +130,7 @@ MultiIndex I/O ^^^ -- +- Bug in :func:`read_sql` causing an unintended exception when byte data was being converted to string when using the pyarrow dtype_backend (:issue:`59242`) - Period diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 959e572b2b35b..1900ed282e876 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -970,7 +970,17 @@ def convert(arr): if dtype_backend != "numpy" and arr.dtype == np.dtype("O"): new_dtype = StringDtype() arr_cls = new_dtype.construct_array_type() - arr = arr_cls._from_sequence(arr, dtype=new_dtype) + try: + # Addressing (#59242) + # Byte data that could not be decoded into + # a string would throw a UnicodeDecodeError exception + + # Try and greedily convert to string + # Will fail if the object is bytes + arr = arr_cls._from_sequence(arr, dtype=new_dtype) + except UnicodeDecodeError: + pass + elif dtype_backend != "numpy" and isinstance(arr, np.ndarray): if arr.dtype.kind in "iufb": arr = pd_array(arr, copy=False) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index c28a33069d23f..73f9ff42287fc 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -4352,3 +4352,18 @@ def test_xsqlite_if_exists(sqlite_buildin): (5, "E"), ] drop_table(table_name, sqlite_buildin) + + +def test_bytes_column(sqlite_buildin): + pytest.importorskip("pyarrow") + """ + Regression test for (#59242) + Bytes being returned in a column that could not be converted + to a string would raise a UnicodeDecodeError + when using dtype_backend='pyarrow' + """ + query = """ + select cast(x'0123456789abcdef0123456789abcdef' as blob) a + """ + df = pd.read_sql(query, sqlite_buildin, dtype_backend="pyarrow") + assert df.a.values[0] == b"\x01#Eg\x89\xab\xcd\xef\x01#Eg\x89\xab\xcd\xef"