From ac52dfaa1f6fb0a6994abdd9c14375bfd26b4874 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Wed, 2 Aug 2023 13:09:32 -0500 Subject: [PATCH 1/3] Add DB-API 2.0 cursor support to pandas DataFrame constructor --- pandas/_libs/lib.pyi | 1 + pandas/_libs/lib.pyx | 29 ++++++++++++++ pandas/core/dtypes/common.py | 1 + pandas/core/dtypes/inference.py | 2 + pandas/core/frame.py | 3 ++ pandas/tests/dtypes/test_inference.py | 54 +++++++++++++++++++++++++++ 6 files changed, 90 insertions(+) diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index ee190ad8db2d9..41ef009180e82 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -42,6 +42,7 @@ def infer_dtype(value: object, skipna: bool = ...) -> str: ... def is_iterator(obj: object) -> bool: ... def is_scalar(val: object) -> bool: ... def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ... +def is_cursor(obj: object) -> bool: ... def is_pyarrow_array(obj: object) -> bool: ... def is_period(val: object) -> TypeGuard[Period]: ... def is_interval(val: object) -> TypeGuard[Interval]: ... diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index c3fbd3ee4853e..19d19511f24c4 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1219,6 +1219,35 @@ cdef bint c_is_list_like(object obj, bint allow_sets) except -1: ) +def is_cursor(obj: object) -> bool: + """ + Check if the object is a DB-API cursor. + + Parameters + ---------- + obj : object + Object to check. + + Returns + ------- + bool + Whether `obj` appears to be a DB-API cursor object. + """ + return c_is_cursor(obj) + + +cdef bint c_is_cursor(object obj) except -1: + return ( + # check for required methods + hasattr(obj, "fetchall") + and hasattr(obj, "execute") + and hasattr(obj, "close") + # check for column descriptions field + and getattr(obj, "description", None) is not None + and is_list_like(getattr(obj, "description", None)) + ) + + def is_pyarrow_array(obj): """ Return True if given object is a pyarrow Array or ChunkedArray. diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index a39dafc64c42b..d07dfa6a985f1 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -35,6 +35,7 @@ is_array_like, is_bool, is_complex, + is_cursor, is_dataclass, is_decimal, is_dict_like, diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 9c04e57be36fc..6732f42f826d1 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -35,6 +35,8 @@ is_iterator = lib.is_iterator +is_cursor = lib.is_cursor + def is_number(obj) -> TypeGuard[Number | np.number]: """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3b2fe1699e996..c082586caace7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -89,6 +89,7 @@ is_1d_only_ea_dtype, is_array_like, is_bool_dtype, + is_cursor, is_dataclass, is_dict_like, is_float, @@ -798,6 +799,8 @@ def __init__( # GH#44616 big perf improvement for e.g. pytorch tensor data = np.asarray(data) else: + if columns is None and is_cursor(data): + columns = [x[0] for x in data.description] data = list(data) if len(data) > 0: if is_dataclass(data[0]): diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 78f960f4d46d5..09921fb29d9ac 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -40,6 +40,7 @@ ensure_int32, is_bool, is_complex, + is_cursor, is_datetime64_any_dtype, is_datetime64_dtype, is_datetime64_ns_dtype, @@ -131,6 +132,42 @@ def shape(self): return self._values.shape +class MockDBCursor: + """ + A class which is cursor-like but not actually a database cursor + + This mock class includes an iterator interface. This technically is not + required by the DB-API 2.0, but many database interfaces include + this feature. This cursor object is intended to mock that behavior. + + """ + + def __iter__(self) -> Iterator: + return iter(self.fetchall()) + + @property + def description(self): + return [ + ('First', 0, None, None, None, None, False), + ('Second', 0, None, None, None, None, False), + ('Third', 1, None, None, None, None, True), + ('Fourth', 2, None, None, None, None, True), + ] + + @property + def rowcount(self): + return 1 + + def execute(self, *args): + return + + def fetchall(self): + return [('a', 'b', 1.2, 3)] + + def close(self): + return + + # collect all objects to be tested for list-like-ness; use tuples of objects, # whether they are list-like or not (special casing for sets), and their ID ll_params = [ @@ -184,6 +221,7 @@ def shape(self): (object(), False, "object"), (np.nan, False, "NaN"), (None, False, "None"), + (MockDBCursor(), True, "duck-db-cursor"), ] objs, expected, ids = zip(*ll_params) @@ -1985,3 +2023,19 @@ def test_ensure_int32(): values = np.arange(10, dtype=np.int64) result = ensure_int32(values) assert result.dtype == np.int32 + + +def test_is_cursor(): + is_cursor = inference.is_cursor + + cur = MockDBCursor() + + assert inference.is_list_like(cur) + assert is_cursor(cur) + + arr = MockNumpyLikeArray([[0, 1]]) + + assert not is_cursor(arr) + assert not is_cursor('') + assert not is_cursor(1) + assert not is_cursor(1.23) From a9bbbb2f83a3d185d467e996975bdba207779681 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Wed, 2 Aug 2023 14:57:50 -0500 Subject: [PATCH 2/3] pre-commit fixes --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/dtypes/common.py | 1 - pandas/tests/dtypes/test_inference.py | 13 ++++++------- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 17894914b44d1..0615f7aca5798 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -213,7 +213,7 @@ Other enhancements - Many read/to_* functions, such as :meth:`DataFrame.to_pickle` and :func:`read_csv`, support forwarding compression arguments to lzma.LZMAFile (:issue:`52979`) - Reductions :meth:`Series.argmax`, :meth:`Series.argmin`, :meth:`Series.idxmax`, :meth:`Series.idxmin`, :meth:`Index.argmax`, :meth:`Index.argmin`, :meth:`DataFrame.idxmax`, :meth:`DataFrame.idxmin` are now supported for object-dtype objects (:issue:`4279`, :issue:`18021`, :issue:`40685`, :issue:`43697`) - Performance improvement in :meth:`GroupBy.quantile` (:issue:`51722`) -- +- Column names are extracted from DB-API 2.0 cursor objects passed to the :class:`DataFrame` constructor if columns are not specified explicitly .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index d07dfa6a985f1..a39dafc64c42b 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -35,7 +35,6 @@ is_array_like, is_bool, is_complex, - is_cursor, is_dataclass, is_decimal, is_dict_like, diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 09921fb29d9ac..0390139e01a57 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -40,7 +40,6 @@ ensure_int32, is_bool, is_complex, - is_cursor, is_datetime64_any_dtype, is_datetime64_dtype, is_datetime64_ns_dtype, @@ -148,10 +147,10 @@ def __iter__(self) -> Iterator: @property def description(self): return [ - ('First', 0, None, None, None, None, False), - ('Second', 0, None, None, None, None, False), - ('Third', 1, None, None, None, None, True), - ('Fourth', 2, None, None, None, None, True), + ("First", 0, None, None, None, None, False), + ("Second", 0, None, None, None, None, False), + ("Third", 1, None, None, None, None, True), + ("Fourth", 2, None, None, None, None, True), ] @property @@ -162,7 +161,7 @@ def execute(self, *args): return def fetchall(self): - return [('a', 'b', 1.2, 3)] + return [("a", "b", 1.2, 3)] def close(self): return @@ -2036,6 +2035,6 @@ def test_is_cursor(): arr = MockNumpyLikeArray([[0, 1]]) assert not is_cursor(arr) - assert not is_cursor('') + assert not is_cursor("") assert not is_cursor(1) assert not is_cursor(1.23) From 4c62babba285a59fede0d1d2e4c522f3daab5f63 Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Wed, 2 Aug 2023 15:29:13 -0500 Subject: [PATCH 3/3] Export is_cursor --- pandas/core/dtypes/common.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index a39dafc64c42b..c762edf846c5e 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -35,6 +35,7 @@ is_array_like, is_bool, is_complex, + is_cursor, is_dataclass, is_decimal, is_dict_like, @@ -1680,6 +1681,7 @@ def is_all_strings(value: ArrayLike) -> bool: "is_categorical_dtype", "is_complex", "is_complex_dtype", + "is_cursor", "is_dataclass", "is_datetime64_any_dtype", "is_datetime64_dtype",