From db570ffe078c6bdb2df141c14e0335a90454a762 Mon Sep 17 00:00:00 2001 From: Ammar Qazi Date: Sun, 8 Sep 2024 05:04:43 +0200 Subject: [PATCH 1/6] Update docstring of Extensionarray.interpolate --- pandas/core/arrays/base.py | 77 ++++++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 11 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 536c7303a2f92..755b4cf1db0bd 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -999,24 +999,79 @@ def interpolate( **kwargs, ) -> Self: """ - See DataFrame.interpolate.__doc__. + Fill NaN values using an interpolation method. + Parameters + ---------- + method : str, default 'linear' + Interpolation technique to use. One of: + * 'linear': Ignore the index and treat the values as equally spaced. This is the only method supported on MultiIndexes. + * 'time': Works on daily and higher resolution data to interpolate given length of interval. + * 'index', 'values': use the actual numerical values of the index. + * 'pad': Fill in NaNs using existing values. + * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'polynomial': + Passed to scipy.interpolate.interp1d, whereas 'spline' is passed to + scipy.interpolate.UnivariateSpline. These methods use the numerical values of the index. + Both 'polynomial' and 'spline' require that you also specify an order (int), + e.g. arr.interpolate(method='polynomial', order=5). + * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima', 'cubicspline': + Wrappers around the SciPy interpolation methods of similar names. See Notes. + * 'from_derivatives': Refers to scipy.interpolate.BPoly.from_derivatives. + axis : int + Axis to interpolate along. For 1D NumpyExtensionArray, use 0. + index : Index + Index to use for interpolation. + limit : int or None + Maximum number of consecutive NaNs to fill. Must be greater than 0. + limit_direction : {'forward', 'backward', 'both'} + Consecutive NaNs will be filled in this direction. + * If 'method' is 'pad' or 'ffill', 'limit_direction' must be 'forward'. + * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be 'backward'. + Raises ValueError if limit_direction is 'forward' or 'both' and method is 'backfill' or 'bfill'. + Raises ValueError if limit_direction is 'backward' or 'both' and method is 'pad' or 'ffill'. + limit_area : {'inside', 'outside'} or None + If limit is specified, consecutive NaNs will be filled with this restriction. + * None: No fill restriction. + * 'inside': Only fill NaNs surrounded by valid values (interpolate). + * 'outside': Only fill NaNs outside valid values (extrapolate). + copy : bool + If True, a copy of the object is returned with interpolated values. + **kwargs : optional + Keyword arguments to pass on to the interpolating function. + + Returns + ------- + NumpyExtensionArray + A new NumpyExtensionArray with interpolated values. + + See Also + -------- + Series.interpolate : Interpolate values in a Series. + DataFrame.interpolate : Interpolate values in a DataFrame. + + Notes + ----- + - All parameters must be specified as keyword arguments. + - The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima' + methods are wrappers around the respective SciPy implementations of + similar names. These use the actual numerical values of the index. + - For 1D NumpyExtensionArray, use 0 for the `axis` parameter. + Examples -------- - >>> arr = pd.arrays.NumpyExtensionArray(np.array([0, 1, np.nan, 3])) + >>> arr = pd.arrays.NumpyExtensionArray(np.array([0, np.nan, 2, np.nan, 4])) >>> arr.interpolate( - ... method="linear", - ... limit=3, - ... limit_direction="forward", - ... index=pd.Index([1, 2, 3, 4]), - ... fill_value=1, - ... copy=False, + ... method='linear', ... axis=0, - ... limit_area="inside", + ... index=pd.Index(range(len(arr))), + ... limit=None, + ... limit_direction='forward', + ... limit_area=None, + ... copy=True ... ) - [0.0, 1.0, 2.0, 3.0] - Length: 4, dtype: float64 + [0.0, 1.0, 2.0, 3.0, 4.0] + Length: 5, dtype: float64 """ # NB: we return type(self) even if copy=False raise NotImplementedError( From 538a25cbbc5c0425950dac1edd5adc76748c7590 Mon Sep 17 00:00:00 2001 From: Ammar Qazi Date: Sun, 8 Sep 2024 05:11:32 +0200 Subject: [PATCH 2/6] Remove Extensionarray.interpolate from code_checks.sh --- ci/code_checks.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 06078d8958492..01a5a95b4569b 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -116,7 +116,13 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Timestamp.resolution PR02" \ -i "pandas.Timestamp.tzinfo GL08" \ -i "pandas.Timestamp.year GL08" \ - -i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \ + -i "pandas.api.types.is_bool PR01,SA01" \ + -i "pandas.api.types.is_categorical_dtype SA01" \ + -i "pandas.api.types.is_complex PR01,SA01" \ + -i "pandas.api.types.is_complex_dtype SA01" \ + -i "pandas.api.types.is_datetime64_dtype SA01" \ + -i "pandas.api.types.is_datetime64_ns_dtype SA01" \ + -i "pandas.api.types.is_datetime64tz_dtype SA01" \ -i "pandas.api.types.is_dict_like PR07,SA01" \ -i "pandas.api.types.is_extension_array_dtype SA01" \ -i "pandas.api.types.is_file_like PR07,SA01" \ From d23b5ed45e0185c183b661362613f28579519c3b Mon Sep 17 00:00:00 2001 From: Ammar Qazi Date: Sun, 8 Sep 2024 15:41:51 +0200 Subject: [PATCH 3/6] Resolving pre-commit errors --- pandas/core/arrays/base.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 755b4cf1db0bd..98c2450c4840f 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1005,17 +1005,21 @@ def interpolate( ---------- method : str, default 'linear' Interpolation technique to use. One of: - * 'linear': Ignore the index and treat the values as equally spaced. This is the only method supported on MultiIndexes. - * 'time': Works on daily and higher resolution data to interpolate given length of interval. + * 'linear': Ignore the index and treat the values as equally spaced. + This is the only method supported on MultiIndexes. + * 'time': Works on daily and higher resolution data to interpolate + given length of interval. * 'index', 'values': use the actual numerical values of the index. * 'pad': Fill in NaNs using existing values. - * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'polynomial': - Passed to scipy.interpolate.interp1d, whereas 'spline' is passed to - scipy.interpolate.UnivariateSpline. These methods use the numerical values of the index. - Both 'polynomial' and 'spline' require that you also specify an order (int), - e.g. arr.interpolate(method='polynomial', order=5). - * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima', 'cubicspline': - Wrappers around the SciPy interpolation methods of similar names. See Notes. + * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', + 'polynomial': Passed to scipy.interpolate.interp1d, whereas 'spline' + is passed to scipy.interpolate.UnivariateSpline. These methods use + the numerical values of the index. + Both 'polynomial' and 'spline' require that you also specify an + order (int), e.g. arr.interpolate(method='polynomial', order=5). + * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima', + 'cubicspline': Wrappers around the SciPy interpolation methods + of similar names. See Notes. * 'from_derivatives': Refers to scipy.interpolate.BPoly.from_derivatives. axis : int Axis to interpolate along. For 1D NumpyExtensionArray, use 0. @@ -1026,11 +1030,15 @@ def interpolate( limit_direction : {'forward', 'backward', 'both'} Consecutive NaNs will be filled in this direction. * If 'method' is 'pad' or 'ffill', 'limit_direction' must be 'forward'. - * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be 'backward'. - Raises ValueError if limit_direction is 'forward' or 'both' and method is 'backfill' or 'bfill'. - Raises ValueError if limit_direction is 'backward' or 'both' and method is 'pad' or 'ffill'. + * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be + 'backward'. + Raises ValueError if limit_direction is 'forward' or 'both' and method + is 'backfill' or 'bfill'. + Raises ValueError if limit_direction is 'backward' or 'both' and method + is 'pad' or 'ffill'. limit_area : {'inside', 'outside'} or None - If limit is specified, consecutive NaNs will be filled with this restriction. + If limit is specified, consecutive NaNs will be filled with this + restriction. * None: No fill restriction. * 'inside': Only fill NaNs surrounded by valid values (interpolate). * 'outside': Only fill NaNs outside valid values (extrapolate). From 99a5dddf2210bcc528e2e35f294937c076ba5c49 Mon Sep 17 00:00:00 2001 From: Ammar Qazi Date: Sun, 8 Sep 2024 16:44:37 +0200 Subject: [PATCH 4/6] Resolving pre-commit errors 2 --- pandas/core/arrays/base.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 98c2450c4840f..b07376e260ad1 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1005,20 +1005,20 @@ def interpolate( ---------- method : str, default 'linear' Interpolation technique to use. One of: - * 'linear': Ignore the index and treat the values as equally spaced. + * 'linear': Ignore the index and treat the values as equally spaced. This is the only method supported on MultiIndexes. - * 'time': Works on daily and higher resolution data to interpolate + * 'time': Works on daily and higher resolution data to interpolate given length of interval. * 'index', 'values': use the actual numerical values of the index. * 'pad': Fill in NaNs using existing values. - * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', - 'polynomial': Passed to scipy.interpolate.interp1d, whereas 'spline' - is passed to scipy.interpolate.UnivariateSpline. These methods use + * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', + 'polynomial': Passed to scipy.interpolate.interp1d, whereas 'spline' + is passed to scipy.interpolate.UnivariateSpline. These methods use the numerical values of the index. - Both 'polynomial' and 'spline' require that you also specify an + Both 'polynomial' and 'spline' require that you also specify an order (int), e.g. arr.interpolate(method='polynomial', order=5). - * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima', - 'cubicspline': Wrappers around the SciPy interpolation methods + * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima', + 'cubicspline': Wrappers around the SciPy interpolation methods of similar names. See Notes. * 'from_derivatives': Refers to scipy.interpolate.BPoly.from_derivatives. axis : int @@ -1030,14 +1030,14 @@ def interpolate( limit_direction : {'forward', 'backward', 'both'} Consecutive NaNs will be filled in this direction. * If 'method' is 'pad' or 'ffill', 'limit_direction' must be 'forward'. - * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be + * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be 'backward'. - Raises ValueError if limit_direction is 'forward' or 'both' and method + Raises ValueError if limit_direction is 'forward' or 'both' and method is 'backfill' or 'bfill'. - Raises ValueError if limit_direction is 'backward' or 'both' and method + Raises ValueError if limit_direction is 'backward' or 'both' and method is 'pad' or 'ffill'. limit_area : {'inside', 'outside'} or None - If limit is specified, consecutive NaNs will be filled with this + If limit is specified, consecutive NaNs will be filled with this restriction. * None: No fill restriction. * 'inside': Only fill NaNs surrounded by valid values (interpolate). @@ -1060,11 +1060,11 @@ def interpolate( Notes ----- - All parameters must be specified as keyword arguments. - - The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima' - methods are wrappers around the respective SciPy implementations of + - The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima' + methods are wrappers around the respective SciPy implementations of similar names. These use the actual numerical values of the index. - For 1D NumpyExtensionArray, use 0 for the `axis` parameter. - + Examples -------- >>> arr = pd.arrays.NumpyExtensionArray(np.array([0, np.nan, 2, np.nan, 4])) From 287ca5e5fa0e4994ee4544603c4098924f48e923 Mon Sep 17 00:00:00 2001 From: Ammar Qazi Date: Sun, 8 Sep 2024 19:15:44 +0200 Subject: [PATCH 5/6] Resolved ruff formatting error --- pandas/core/arrays/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index b07376e260ad1..5067cae63b598 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1069,13 +1069,13 @@ def interpolate( -------- >>> arr = pd.arrays.NumpyExtensionArray(np.array([0, np.nan, 2, np.nan, 4])) >>> arr.interpolate( - ... method='linear', + ... method="linear", ... axis=0, ... index=pd.Index(range(len(arr))), ... limit=None, - ... limit_direction='forward', + ... limit_direction="forward", ... limit_area=None, - ... copy=True + ... copy=True, ... ) [0.0, 1.0, 2.0, 3.0, 4.0] From 23d67f684b94ec647f6cfcfacfc34cf3a94f1213 Mon Sep 17 00:00:00 2001 From: Ammar Qazi Date: Mon, 9 Sep 2024 23:48:04 +0200 Subject: [PATCH 6/6] Fix issues after review --- ci/code_checks.sh | 7 ------- pandas/core/arrays/base.py | 43 ++++++++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 01a5a95b4569b..2aa256b65a493 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -116,13 +116,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Timestamp.resolution PR02" \ -i "pandas.Timestamp.tzinfo GL08" \ -i "pandas.Timestamp.year GL08" \ - -i "pandas.api.types.is_bool PR01,SA01" \ - -i "pandas.api.types.is_categorical_dtype SA01" \ - -i "pandas.api.types.is_complex PR01,SA01" \ - -i "pandas.api.types.is_complex_dtype SA01" \ - -i "pandas.api.types.is_datetime64_dtype SA01" \ - -i "pandas.api.types.is_datetime64_ns_dtype SA01" \ - -i "pandas.api.types.is_datetime64tz_dtype SA01" \ -i "pandas.api.types.is_dict_like PR07,SA01" \ -i "pandas.api.types.is_extension_array_dtype SA01" \ -i "pandas.api.types.is_file_like PR07,SA01" \ diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 5067cae63b598..a933a9ce11646 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1022,20 +1022,13 @@ def interpolate( of similar names. See Notes. * 'from_derivatives': Refers to scipy.interpolate.BPoly.from_derivatives. axis : int - Axis to interpolate along. For 1D NumpyExtensionArray, use 0. + Axis to interpolate along. For 1-dimensional data, use 0. index : Index Index to use for interpolation. limit : int or None Maximum number of consecutive NaNs to fill. Must be greater than 0. limit_direction : {'forward', 'backward', 'both'} Consecutive NaNs will be filled in this direction. - * If 'method' is 'pad' or 'ffill', 'limit_direction' must be 'forward'. - * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be - 'backward'. - Raises ValueError if limit_direction is 'forward' or 'both' and method - is 'backfill' or 'bfill'. - Raises ValueError if limit_direction is 'backward' or 'both' and method - is 'pad' or 'ffill'. limit_area : {'inside', 'outside'} or None If limit is specified, consecutive NaNs will be filled with this restriction. @@ -1049,8 +1042,8 @@ def interpolate( Returns ------- - NumpyExtensionArray - A new NumpyExtensionArray with interpolated values. + ExtensionArray + An ExtensionArray with interpolated values. See Also -------- @@ -1063,23 +1056,41 @@ def interpolate( - The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima' methods are wrappers around the respective SciPy implementations of similar names. These use the actual numerical values of the index. - - For 1D NumpyExtensionArray, use 0 for the `axis` parameter. Examples -------- - >>> arr = pd.arrays.NumpyExtensionArray(np.array([0, np.nan, 2, np.nan, 4])) + Interpolating values in a NumPy array: + + >>> arr = pd.arrays.NumpyExtensionArray(np.array([0, 1, np.nan, 3])) + >>> arr.interpolate( + ... method="linear", + ... limit=3, + ... limit_direction="forward", + ... index=pd.Index(range(len(arr))), + ... fill_value=1, + ... copy=False, + ... axis=0, + ... limit_area="inside", + ... ) + + [0.0, 1.0, 2.0, 3.0] + Length: 4, dtype: float64 + + Interpolating values in a FloatingArray: + + >>> arr = pd.array([1.0, pd.NA, 3.0, 4.0, pd.NA, 6.0], dtype="Float64") >>> arr.interpolate( ... method="linear", ... axis=0, ... index=pd.Index(range(len(arr))), ... limit=None, - ... limit_direction="forward", + ... limit_direction="both", ... limit_area=None, ... copy=True, ... ) - - [0.0, 1.0, 2.0, 3.0, 4.0] - Length: 5, dtype: float64 + + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + Length: 6, dtype: Float64 """ # NB: we return type(self) even if copy=False raise NotImplementedError(