Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: Remove inf_as_na #57428

Merged
merged 1 commit into from
Feb 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ Removal of prior version deprecations/changes
- Removed ``year``, ``month``, ``quarter``, ``day``, ``hour``, ``minute``, and ``second`` keywords in the :class:`PeriodIndex` constructor, use :meth:`PeriodIndex.from_fields` instead (:issue:`55960`)
- Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`)
- Removed deprecated behavior of :meth:`Series.agg` using :meth:`Series.apply` (:issue:`53325`)
- Removed option ``mode.use_inf_as_na``, convert inf entries to ``NaN`` before instead (:issue:`51684`)
- Removed support for :class:`DataFrame` in :meth:`DataFrame.from_records`(:issue:`51697`)
- Removed support for ``errors="ignore"`` in :func:`to_datetime`, :func:`to_timedelta` and :func:`to_numeric` (:issue:`55734`)
- Removed support for ``slice`` in :meth:`DataFrame.take` (:issue:`51539`)
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/missing.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ from numpy cimport (
cpdef bint is_matching_na(object left, object right, bint nan_matches_none=*)
cpdef bint check_na_tuples_nonequal(object left, object right)

cpdef bint checknull(object val, bint inf_as_na=*)
cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=*)
cpdef bint checknull(object val)
cpdef ndarray[uint8_t] isnaobj(ndarray arr)

cdef bint is_null_datetime64(v)
cdef bint is_null_timedelta64(v)
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/missing.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ def is_matching_na(
) -> bool: ...
def isposinf_scalar(val: object) -> bool: ...
def isneginf_scalar(val: object) -> bool: ...
def checknull(val: object, inf_as_na: bool = ...) -> bool: ...
def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
def checknull(val: object) -> bool: ...
def isnaobj(arr: np.ndarray) -> npt.NDArray[np.bool_]: ...
def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
10 changes: 3 additions & 7 deletions pandas/_libs/missing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False
return False


cpdef bint checknull(object val, bint inf_as_na=False):
cpdef bint checknull(object val):
"""
Return boolean describing of the input is NA-like, defined here as any
of:
Expand All @@ -152,8 +152,6 @@ cpdef bint checknull(object val, bint inf_as_na=False):
Parameters
----------
val : object
inf_as_na : bool, default False
Whether to treat INF and -INF as NA values.

Returns
-------
Expand All @@ -164,8 +162,6 @@ cpdef bint checknull(object val, bint inf_as_na=False):
elif util.is_float_object(val) or util.is_complex_object(val):
if val != val:
return True
elif inf_as_na:
return val == INF or val == NEGINF
return False
elif cnp.is_timedelta64_object(val):
return cnp.get_timedelta64_value(val) == NPY_NAT
Expand All @@ -184,7 +180,7 @@ cdef bint is_decimal_na(object val):

@cython.wraparound(False)
@cython.boundscheck(False)
cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=False):
cpdef ndarray[uint8_t] isnaobj(ndarray arr):
"""
Return boolean mask denoting which elements of a 1-D array are na-like,
according to the criteria defined in `checknull`:
Expand Down Expand Up @@ -217,7 +213,7 @@ cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=False):
# equivalents to `val = values[i]`
val = cnp.PyArray_GETITEM(arr, cnp.PyArray_ITER_DATA(it))
cnp.PyArray_ITER_NEXT(it)
is_null = checknull(val, inf_as_na=inf_as_na)
is_null = checknull(val)
# Dereference pointer (set value)
(<uint8_t *>(cnp.PyArray_ITER_DATA(it2)))[0] = <uint8_t>is_null
cnp.PyArray_ITER_NEXT(it2)
Expand Down
29 changes: 0 additions & 29 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,35 +406,6 @@ def is_terminal() -> bool:
with cf.config_prefix("mode"):
cf.register_option("sim_interactive", False, tc_sim_interactive_doc)

use_inf_as_na_doc = """
: boolean
True means treat None, NaN, INF, -INF as NA (old way),
False means None and NaN are null, but INF, -INF are not NA
(new way).

This option is deprecated in pandas 2.1.0 and will be removed in 3.0.
"""

# We don't want to start importing everything at the global context level
# or we'll hit circular deps.


def use_inf_as_na_cb(key) -> None:
# TODO(3.0): enforcing this deprecation will close GH#52501
from pandas.core.dtypes.missing import _use_inf_as_na

_use_inf_as_na(key)


with cf.config_prefix("mode"):
cf.register_option("use_inf_as_na", False, use_inf_as_na_doc, cb=use_inf_as_na_cb)

cf.deprecate_option(
# GH#51684
"mode.use_inf_as_na",
"use_inf_as_na option is deprecated and will be removed in a future "
"version. Convert inf values to NaN before operating instead.",
)

# TODO better name?
copy_on_write_doc = """
Expand Down
117 changes: 23 additions & 94 deletions pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from __future__ import annotations

from decimal import Decimal
from functools import partial
from typing import (
TYPE_CHECKING,
overload,
Expand All @@ -13,8 +12,6 @@

import numpy as np

from pandas._config import get_option

from pandas._libs import lib
import pandas._libs.missing as libmissing
from pandas._libs.tslibs import (
Expand Down Expand Up @@ -64,8 +61,6 @@
isposinf_scalar = libmissing.isposinf_scalar
isneginf_scalar = libmissing.isneginf_scalar

nan_checker = np.isnan
INF_AS_NA = False
_dtype_object = np.dtype("object")
_dtype_str = np.dtype(str)

Expand Down Expand Up @@ -180,95 +175,57 @@ def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame:
isnull = isna


def _isna(obj, inf_as_na: bool = False):
def _isna(obj):
"""
Detect missing values, treating None, NaN or NA as null. Infinite
values will also be treated as null if inf_as_na is True.
Detect missing values, treating None, NaN or NA as null.

Parameters
----------
obj: ndarray or object value
Input array or scalar value.
inf_as_na: bool
Whether to treat infinity as null.

Returns
-------
boolean ndarray or boolean
"""
if is_scalar(obj):
return libmissing.checknull(obj, inf_as_na=inf_as_na)
return libmissing.checknull(obj)
elif isinstance(obj, ABCMultiIndex):
raise NotImplementedError("isna is not defined for MultiIndex")
elif isinstance(obj, type):
return False
elif isinstance(obj, (np.ndarray, ABCExtensionArray)):
return _isna_array(obj, inf_as_na=inf_as_na)
return _isna_array(obj)
elif isinstance(obj, ABCIndex):
# Try to use cached isna, which also short-circuits for integer dtypes
# and avoids materializing RangeIndex._values
if not obj._can_hold_na:
return obj.isna()
return _isna_array(obj._values, inf_as_na=inf_as_na)
return _isna_array(obj._values)

elif isinstance(obj, ABCSeries):
result = _isna_array(obj._values, inf_as_na=inf_as_na)
result = _isna_array(obj._values)
# box
result = obj._constructor(result, index=obj.index, name=obj.name, copy=False)
return result
elif isinstance(obj, ABCDataFrame):
return obj.isna()
elif isinstance(obj, list):
return _isna_array(np.asarray(obj, dtype=object), inf_as_na=inf_as_na)
return _isna_array(np.asarray(obj, dtype=object))
elif hasattr(obj, "__array__"):
return _isna_array(np.asarray(obj), inf_as_na=inf_as_na)
return _isna_array(np.asarray(obj))
else:
return False


def _use_inf_as_na(key) -> None:
"""
Option change callback for na/inf behaviour.

Choose which replacement for numpy.isnan / -numpy.isfinite is used.

Parameters
----------
flag: bool
True means treat None, NaN, INF, -INF as null (old way),
False means None and NaN are null, but INF, -INF are not null
(new way).

Notes
-----
This approach to setting global module values is discussed and
approved here:

* https://stackoverflow.com/questions/4859217/
programmatically-creating-variables-in-python/4859312#4859312
"""
inf_as_na = get_option(key)
globals()["_isna"] = partial(_isna, inf_as_na=inf_as_na)
if inf_as_na:
globals()["nan_checker"] = lambda x: ~np.isfinite(x)
globals()["INF_AS_NA"] = True
else:
globals()["nan_checker"] = np.isnan
globals()["INF_AS_NA"] = False


def _isna_array(
values: ArrayLike, inf_as_na: bool = False
) -> npt.NDArray[np.bool_] | NDFrame:
def _isna_array(values: ArrayLike) -> npt.NDArray[np.bool_] | NDFrame:
"""
Return an array indicating which values of the input array are NaN / NA.

Parameters
----------
obj: ndarray or ExtensionArray
The input array whose elements are to be checked.
inf_as_na: bool
Whether or not to treat infinite values as NA.

Returns
-------
Expand All @@ -280,73 +237,47 @@ def _isna_array(

if not isinstance(values, np.ndarray):
# i.e. ExtensionArray
if inf_as_na and isinstance(dtype, CategoricalDtype):
result = libmissing.isnaobj(values.to_numpy(), inf_as_na=inf_as_na)
else:
# error: Incompatible types in assignment (expression has type
# "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has
# type "ndarray[Any, dtype[bool_]]")
result = values.isna() # type: ignore[assignment]
# error: Incompatible types in assignment (expression has type
# "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has
# type "ndarray[Any, dtype[bool_]]")
result = values.isna() # type: ignore[assignment]
elif isinstance(values, np.rec.recarray):
# GH 48526
result = _isna_recarray_dtype(values, inf_as_na=inf_as_na)
result = _isna_recarray_dtype(values)
elif is_string_or_object_np_dtype(values.dtype):
result = _isna_string_dtype(values, inf_as_na=inf_as_na)
result = _isna_string_dtype(values)
elif dtype.kind in "mM":
# this is the NaT pattern
result = values.view("i8") == iNaT
else:
if inf_as_na:
result = ~np.isfinite(values)
else:
result = np.isnan(values)
result = np.isnan(values)

return result


def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> npt.NDArray[np.bool_]:
def _isna_string_dtype(values: np.ndarray) -> npt.NDArray[np.bool_]:
# Working around NumPy ticket 1542
dtype = values.dtype

if dtype.kind in ("S", "U"):
result = np.zeros(values.shape, dtype=bool)
else:
if values.ndim in {1, 2}:
result = libmissing.isnaobj(values, inf_as_na=inf_as_na)
result = libmissing.isnaobj(values)
else:
# 0-D, reached via e.g. mask_missing
result = libmissing.isnaobj(values.ravel(), inf_as_na=inf_as_na)
result = libmissing.isnaobj(values.ravel())
result = result.reshape(values.shape)

return result


def _has_record_inf_value(record_as_array: np.ndarray) -> np.bool_:
is_inf_in_record = np.zeros(len(record_as_array), dtype=bool)
for i, value in enumerate(record_as_array):
is_element_inf = False
try:
is_element_inf = np.isinf(value)
except TypeError:
is_element_inf = False
is_inf_in_record[i] = is_element_inf

return np.any(is_inf_in_record)


def _isna_recarray_dtype(
values: np.rec.recarray, inf_as_na: bool
) -> npt.NDArray[np.bool_]:
def _isna_recarray_dtype(values: np.rec.recarray) -> npt.NDArray[np.bool_]:
result = np.zeros(values.shape, dtype=bool)
for i, record in enumerate(values):
record_as_array = np.array(record.tolist())
does_record_contain_nan = isna_all(record_as_array)
does_record_contain_inf = False
if inf_as_na:
does_record_contain_inf = bool(_has_record_inf_value(record_as_array))
result[i] = np.any(
np.logical_or(does_record_contain_nan, does_record_contain_inf)
)
result[i] = np.any(does_record_contain_nan)

return result

Expand Down Expand Up @@ -774,7 +705,7 @@ def isna_all(arr: ArrayLike) -> bool:

dtype = arr.dtype
if lib.is_np_dtype(dtype, "f"):
checker = nan_checker
checker = np.isnan

elif (lib.is_np_dtype(dtype, "mM")) or isinstance(
dtype, (DatetimeTZDtype, PeriodDtype)
Expand All @@ -786,9 +717,7 @@ def isna_all(arr: ArrayLike) -> bool:
else:
# error: Incompatible types in assignment (expression has type "Callable[[Any],
# Any]", variable has type "ufunc")
checker = lambda x: _isna_array( # type: ignore[assignment]
x, inf_as_na=INF_AS_NA
)
checker = _isna_array # type: ignore[assignment]

return all(
checker(arr[i : i + chunk_len]).all() for i in range(0, total_len, chunk_len)
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8026,8 +8026,7 @@ def isna(self) -> Self:
NA values, such as None or :attr:`numpy.NaN`, gets mapped to True
values.
Everything else gets mapped to False values. Characters such as empty
strings ``''`` or :attr:`numpy.inf` are not considered NA values
(unless you set ``pandas.options.mode.use_inf_as_na = True``).
strings ``''`` or :attr:`numpy.inf` are not considered NA values.

Returns
-------
Expand Down Expand Up @@ -8098,8 +8097,7 @@ def notna(self) -> Self:

Return a boolean same-sized object indicating if the values are not NA.
Non-missing values get mapped to True. Characters such as empty
strings ``''`` or :attr:`numpy.inf` are not considered NA values
(unless you set ``pandas.options.mode.use_inf_as_na = True``).
strings ``''`` or :attr:`numpy.inf` are not considered NA values.
NA values, such as None or :attr:`numpy.NaN`, get mapped to False
values.

Expand Down
4 changes: 0 additions & 4 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1205,10 +1205,6 @@ def _format(x):
return "None"
elif x is NA:
return str(NA)
elif lib.is_float(x) and np.isinf(x):
# TODO(3.0): this will be unreachable when use_inf_as_na
# deprecation is enforced
return str(x)
elif x is NaT or isinstance(x, (np.datetime64, np.timedelta64)):
return "NaT"
return self.na_rep
Expand Down
Loading