pandas-dev · kesmit13 · Aug 2, 2023 · Aug 2, 2023 · Aug 2, 2023 · WillAyd
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -213,7 +213,7 @@ Other enhancements
 - Many read/to_* functions, such as :meth:`DataFrame.to_pickle` and :func:`read_csv`, support forwarding compression arguments to lzma.LZMAFile (:issue:`52979`)
 - Reductions :meth:`Series.argmax`, :meth:`Series.argmin`, :meth:`Series.idxmax`, :meth:`Series.idxmin`, :meth:`Index.argmax`, :meth:`Index.argmin`, :meth:`DataFrame.idxmax`, :meth:`DataFrame.idxmin` are now supported for object-dtype objects (:issue:`4279`, :issue:`18021`, :issue:`40685`, :issue:`43697`)
 - Performance improvement in :meth:`GroupBy.quantile` (:issue:`51722`)
--
+- Column names are extracted from DB-API 2.0 cursor objects passed to the :class:`DataFrame` constructor if columns are not specified explicitly
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_210.notable_bug_fixes:

@@ -42,6 +42,7 @@ def infer_dtype(value: object, skipna: bool = ...) -> str: ...
 def is_iterator(obj: object) -> bool: ...
 def is_scalar(val: object) -> bool: ...
 def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ...
+def is_cursor(obj: object) -> bool: ...
 def is_pyarrow_array(obj: object) -> bool: ...
 def is_period(val: object) -> TypeGuard[Period]: ...
 def is_interval(val: object) -> TypeGuard[Interval]: ...

@@ -1219,6 +1219,35 @@ cdef bint c_is_list_like(object obj, bint allow_sets) except -1:
     )
 
 
+def is_cursor(obj: object) -> bool:
+    """
+    Check if the object is a DB-API cursor.
+
+    Parameters
+    ----------
+    obj : object
+        Object to check.
+
+    Returns
+    -------
+    bool
+        Whether `obj` appears to be a DB-API cursor object.
+    """
+    return c_is_cursor(obj)
+
+
+cdef bint c_is_cursor(object obj) except -1:
+    return (
+        # check for required methods
+        hasattr(obj, "fetchall")
+        and hasattr(obj, "execute")
+        and hasattr(obj, "close")
+        # check for column descriptions field
+        and getattr(obj, "description", None) is not None
+        and is_list_like(getattr(obj, "description", None))
+    )
+
+
 def is_pyarrow_array(obj):
     """
     Return True if given object is a pyarrow Array or ChunkedArray.

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -35,6 +35,7 @@
     is_array_like,
     is_bool,
     is_complex,
+    is_cursor,
     is_dataclass,
     is_decimal,
     is_dict_like,
@@ -1680,6 +1681,7 @@ def is_all_strings(value: ArrayLike) -> bool:
     "is_categorical_dtype",
     "is_complex",
     "is_complex_dtype",
+    "is_cursor",
     "is_dataclass",
     "is_datetime64_any_dtype",
     "is_datetime64_dtype",

diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py
@@ -35,6 +35,8 @@
 
 is_iterator = lib.is_iterator
 
+is_cursor = lib.is_cursor
+
 
 def is_number(obj) -> TypeGuard[Number | np.number]:
     """

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -89,6 +89,7 @@
     is_1d_only_ea_dtype,
     is_array_like,
     is_bool_dtype,
+    is_cursor,
     is_dataclass,
     is_dict_like,
     is_float,
@@ -798,6 +799,8 @@ def __init__(
                     # GH#44616 big perf improvement for e.g. pytorch tensor
                     data = np.asarray(data)
                 else:
+                    if columns is None and is_cursor(data):
+                        columns = [x[0] for x in data.description]
                     data = list(data)
             if len(data) > 0:
                 if is_dataclass(data[0]):

diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
@@ -131,6 +131,42 @@ def shape(self):
         return self._values.shape
 
 
+class MockDBCursor:
+    """
+    A class which is cursor-like but not actually a database cursor
+
+    This mock class includes an iterator interface. This technically is not
+    required by the DB-API 2.0, but many database interfaces include
+    this feature. This cursor object is intended to mock that behavior.
+
+    """
+
+    def __iter__(self) -> Iterator:
+        return iter(self.fetchall())
+
+    @property
+    def description(self):
+        return [
+            ("First", 0, None, None, None, None, False),
+            ("Second", 0, None, None, None, None, False),
+            ("Third", 1, None, None, None, None, True),
+            ("Fourth", 2, None, None, None, None, True),
+        ]
+
+    @property
+    def rowcount(self):
+        return 1
+
+    def execute(self, *args):
+        return
+
+    def fetchall(self):
+        return [("a", "b", 1.2, 3)]
+
+    def close(self):
+        return
+
+
 # collect all objects to be tested for list-like-ness; use tuples of objects,
 # whether they are list-like or not (special casing for sets), and their ID
 ll_params = [
@@ -184,6 +220,7 @@ def shape(self):
     (object(), False, "object"),
     (np.nan, False, "NaN"),
     (None, False, "None"),
+    (MockDBCursor(), True, "duck-db-cursor"),
 ]
 objs, expected, ids = zip(*ll_params)
 
@@ -1985,3 +2022,19 @@ def test_ensure_int32():
     values = np.arange(10, dtype=np.int64)
     result = ensure_int32(values)
     assert result.dtype == np.int32
+
+
+def test_is_cursor():
+    is_cursor = inference.is_cursor
+
+    cur = MockDBCursor()
+
+    assert inference.is_list_like(cur)
+    assert is_cursor(cur)
+
+    arr = MockNumpyLikeArray([[0, 1]])
+
+    assert not is_cursor(arr)
+    assert not is_cursor("")
+    assert not is_cursor(1)
+    assert not is_cursor(1.23)