diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 17894914b44d1..0615f7aca5798 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -213,7 +213,7 @@ Other enhancements - Many read/to_* functions, such as :meth:`DataFrame.to_pickle` and :func:`read_csv`, support forwarding compression arguments to lzma.LZMAFile (:issue:`52979`) - Reductions :meth:`Series.argmax`, :meth:`Series.argmin`, :meth:`Series.idxmax`, :meth:`Series.idxmin`, :meth:`Index.argmax`, :meth:`Index.argmin`, :meth:`DataFrame.idxmax`, :meth:`DataFrame.idxmin` are now supported for object-dtype objects (:issue:`4279`, :issue:`18021`, :issue:`40685`, :issue:`43697`) - Performance improvement in :meth:`GroupBy.quantile` (:issue:`51722`) -- +- Column names are extracted from DB-API 2.0 cursor objects passed to the :class:`DataFrame` constructor if columns are not specified explicitly .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index ee190ad8db2d9..41ef009180e82 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -42,6 +42,7 @@ def infer_dtype(value: object, skipna: bool = ...) -> str: ... def is_iterator(obj: object) -> bool: ... def is_scalar(val: object) -> bool: ... def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ... +def is_cursor(obj: object) -> bool: ... def is_pyarrow_array(obj: object) -> bool: ... def is_period(val: object) -> TypeGuard[Period]: ... def is_interval(val: object) -> TypeGuard[Interval]: ... diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index c3fbd3ee4853e..19d19511f24c4 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1219,6 +1219,35 @@ cdef bint c_is_list_like(object obj, bint allow_sets) except -1: ) +def is_cursor(obj: object) -> bool: + """ + Check if the object is a DB-API cursor. + + Parameters + ---------- + obj : object + Object to check. + + Returns + ------- + bool + Whether `obj` appears to be a DB-API cursor object. + """ + return c_is_cursor(obj) + + +cdef bint c_is_cursor(object obj) except -1: + return ( + # check for required methods + hasattr(obj, "fetchall") + and hasattr(obj, "execute") + and hasattr(obj, "close") + # check for column descriptions field + and getattr(obj, "description", None) is not None + and is_list_like(getattr(obj, "description", None)) + ) + + def is_pyarrow_array(obj): """ Return True if given object is a pyarrow Array or ChunkedArray. diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index a39dafc64c42b..c762edf846c5e 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -35,6 +35,7 @@ is_array_like, is_bool, is_complex, + is_cursor, is_dataclass, is_decimal, is_dict_like, @@ -1680,6 +1681,7 @@ def is_all_strings(value: ArrayLike) -> bool: "is_categorical_dtype", "is_complex", "is_complex_dtype", + "is_cursor", "is_dataclass", "is_datetime64_any_dtype", "is_datetime64_dtype", diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 9c04e57be36fc..6732f42f826d1 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -35,6 +35,8 @@ is_iterator = lib.is_iterator +is_cursor = lib.is_cursor + def is_number(obj) -> TypeGuard[Number | np.number]: """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3b2fe1699e996..c082586caace7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -89,6 +89,7 @@ is_1d_only_ea_dtype, is_array_like, is_bool_dtype, + is_cursor, is_dataclass, is_dict_like, is_float, @@ -798,6 +799,8 @@ def __init__( # GH#44616 big perf improvement for e.g. pytorch tensor data = np.asarray(data) else: + if columns is None and is_cursor(data): + columns = [x[0] for x in data.description] data = list(data) if len(data) > 0: if is_dataclass(data[0]): diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 78f960f4d46d5..0390139e01a57 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -131,6 +131,42 @@ def shape(self): return self._values.shape +class MockDBCursor: + """ + A class which is cursor-like but not actually a database cursor + + This mock class includes an iterator interface. This technically is not + required by the DB-API 2.0, but many database interfaces include + this feature. This cursor object is intended to mock that behavior. + + """ + + def __iter__(self) -> Iterator: + return iter(self.fetchall()) + + @property + def description(self): + return [ + ("First", 0, None, None, None, None, False), + ("Second", 0, None, None, None, None, False), + ("Third", 1, None, None, None, None, True), + ("Fourth", 2, None, None, None, None, True), + ] + + @property + def rowcount(self): + return 1 + + def execute(self, *args): + return + + def fetchall(self): + return [("a", "b", 1.2, 3)] + + def close(self): + return + + # collect all objects to be tested for list-like-ness; use tuples of objects, # whether they are list-like or not (special casing for sets), and their ID ll_params = [ @@ -184,6 +220,7 @@ def shape(self): (object(), False, "object"), (np.nan, False, "NaN"), (None, False, "None"), + (MockDBCursor(), True, "duck-db-cursor"), ] objs, expected, ids = zip(*ll_params) @@ -1985,3 +2022,19 @@ def test_ensure_int32(): values = np.arange(10, dtype=np.int64) result = ensure_int32(values) assert result.dtype == np.int32 + + +def test_is_cursor(): + is_cursor = inference.is_cursor + + cur = MockDBCursor() + + assert inference.is_list_like(cur) + assert is_cursor(cur) + + arr = MockNumpyLikeArray([[0, 1]]) + + assert not is_cursor(arr) + assert not is_cursor("") + assert not is_cursor(1) + assert not is_cursor(1.23)