Skip to content

Commit

Permalink
Merge branch 'main' into issue#57111_10
Browse files Browse the repository at this point in the history
  • Loading branch information
jordan-d-murphy authored Feb 13, 2024
2 parents fe8d088 + 1d7aedc commit 84657e5
Show file tree
Hide file tree
Showing 96 changed files with 1,416 additions and 2,384 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/code-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
if: ${{ steps.build.outcome == 'success' && always() }}

- name: Typing + pylint
uses: pre-commit/[email protected].0
uses: pre-commit/[email protected].1
with:
extra_args: --verbose --hook-stage manual --all-files
if: ${{ steps.build.outcome == 'success' && always() }}
Expand Down
6 changes: 0 additions & 6 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,6 @@ def setup(self):

def time_items(self):
# (monitor no-copying behaviour)
if hasattr(self.df, "_item_cache"):
self.df._item_cache.clear()
for name, col in self.df.items():
pass

def time_items_cached(self):
for name, col in self.df.items():
pass

Expand Down
38 changes: 11 additions & 27 deletions asv_bench/benchmarks/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,20 @@


class ToNumeric:
params = ["ignore", "coerce"]
param_names = ["errors"]

def setup(self, errors):
def setup(self):
N = 10000
self.float = Series(np.random.randn(N))
self.numstr = self.float.astype("str")
self.str = Series(Index([f"i-{i}" for i in range(N)], dtype=object))

def time_from_float(self, errors):
to_numeric(self.float, errors=errors)
def time_from_float(self):
to_numeric(self.float, errors="coerce")

def time_from_numeric_str(self, errors):
to_numeric(self.numstr, errors=errors)
def time_from_numeric_str(self):
to_numeric(self.numstr, errors="coerce")

def time_from_str(self, errors):
to_numeric(self.str, errors=errors)
def time_from_str(self):
to_numeric(self.str, errors="coerce")


class ToNumericDowncast:
Expand Down Expand Up @@ -187,7 +184,7 @@ def time_iso8601_tz_spaceformat(self):

def time_iso8601_infer_zero_tz_fromat(self):
# GH 41047
to_datetime(self.strings_zero_tz, infer_datetime_format=True)
to_datetime(self.strings_zero_tz)


class ToDatetimeNONISO8601:
Expand Down Expand Up @@ -271,16 +268,6 @@ def time_dup_string_tzoffset_dates(self, cache):
to_datetime(self.dup_string_with_tz, cache=cache)


# GH 43901
class ToDatetimeInferDatetimeFormat:
def setup(self):
rng = date_range(start="1/1/2000", periods=100000, freq="h")
self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist()

def time_infer_datetime_format(self):
to_datetime(self.strings, infer_datetime_format=True)


class ToTimedelta:
def setup(self):
self.ints = np.random.randint(0, 60, size=10000)
Expand All @@ -301,16 +288,13 @@ def time_convert_string_seconds(self):


class ToTimedeltaErrors:
params = ["coerce", "ignore"]
param_names = ["errors"]

def setup(self, errors):
def setup(self):
ints = np.random.randint(0, 60, size=10000)
self.arr = [f"{i} days" for i in ints]
self.arr[-1] = "apple"

def time_convert(self, errors):
to_timedelta(self.arr, errors=errors)
def time_convert(self):
to_timedelta(self.arr, errors="coerce")


from .pandas_vb_common import setup # noqa: F401 isort:skip
65 changes: 63 additions & 2 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,74 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
pandas.Interval\
pandas.Grouper\
pandas.core.groupby.DataFrameGroupBy.nth\
pandas.core.groupby.DataFrameGroupBy.rolling\
pandas.core.groupby.SeriesGroupBy.nth\
pandas.core.groupby.SeriesGroupBy.rolling\
pandas.core.groupby.DataFrameGroupBy.plot\
pandas.core.groupby.SeriesGroupBy.plot # There should be no backslash in the final line, please keep this comment in the last ignored function
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Partially validate docstrings (SA05)' ; echo $MSG
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=SA05 --ignore_functions \
pandas.DataFrame.agg\
pandas.DataFrame.aggregate\
pandas.DataFrame.boxplot\
pandas.PeriodIndex.asfreq\
pandas.arrays.ArrowStringArray\
pandas.arrays.StringArray\
pandas.core.groupby.DataFrameGroupBy.first\
pandas.core.groupby.DataFrameGroupBy.last\
pandas.core.groupby.SeriesGroupBy.first\
pandas.core.groupby.SeriesGroupBy.last\
pandas.core.resample.Resampler.first\
pandas.core.resample.Resampler.last\
pandas.core.window.ewm.ExponentialMovingWindow.corr\
pandas.core.window.ewm.ExponentialMovingWindow.cov\
pandas.core.window.ewm.ExponentialMovingWindow.mean\
pandas.core.window.ewm.ExponentialMovingWindow.std\
pandas.core.window.ewm.ExponentialMovingWindow.sum\
pandas.core.window.ewm.ExponentialMovingWindow.var\
pandas.core.window.expanding.Expanding.aggregate\
pandas.core.window.expanding.Expanding.apply\
pandas.core.window.expanding.Expanding.corr\
pandas.core.window.expanding.Expanding.count\
pandas.core.window.expanding.Expanding.cov\
pandas.core.window.expanding.Expanding.kurt\
pandas.core.window.expanding.Expanding.max\
pandas.core.window.expanding.Expanding.mean\
pandas.core.window.expanding.Expanding.median\
pandas.core.window.expanding.Expanding.min\
pandas.core.window.expanding.Expanding.quantile\
pandas.core.window.expanding.Expanding.rank\
pandas.core.window.expanding.Expanding.sem\
pandas.core.window.expanding.Expanding.skew\
pandas.core.window.expanding.Expanding.std\
pandas.core.window.expanding.Expanding.sum\
pandas.core.window.expanding.Expanding.var\
pandas.core.window.rolling.Rolling.aggregate\
pandas.core.window.rolling.Rolling.apply\
pandas.core.window.rolling.Rolling.corr\
pandas.core.window.rolling.Rolling.count\
pandas.core.window.rolling.Rolling.cov\
pandas.core.window.rolling.Rolling.kurt\
pandas.core.window.rolling.Rolling.max\
pandas.core.window.rolling.Rolling.mean\
pandas.core.window.rolling.Rolling.median\
pandas.core.window.rolling.Rolling.min\
pandas.core.window.rolling.Rolling.quantile\
pandas.core.window.rolling.Rolling.rank\
pandas.core.window.rolling.Rolling.sem\
pandas.core.window.rolling.Rolling.skew\
pandas.core.window.rolling.Rolling.std\
pandas.core.window.rolling.Rolling.sum\
pandas.core.window.rolling.Rolling.var\
pandas.core.window.rolling.Window.mean\
pandas.core.window.rolling.Window.std\
pandas.core.window.rolling.Window.sum\
pandas.core.window.rolling.Window.var\
pandas.plotting.bootstrap_plot\
pandas.plotting.boxplot\
pandas.plotting.radviz # There should be no backslash in the final line, please keep this comment in the last ignored function
RET=$(($RET + $?)) ; echo $MSG "DONE"

fi

### DOCUMENTATION NOTEBOOKS ###
Expand Down
3 changes: 0 additions & 3 deletions doc/redirects.csv
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,6 @@ generated/pandas.core.resample.Resampler.backfill,../reference/api/pandas.core.r
generated/pandas.core.resample.Resampler.bfill,../reference/api/pandas.core.resample.Resampler.bfill
generated/pandas.core.resample.Resampler.count,../reference/api/pandas.core.resample.Resampler.count
generated/pandas.core.resample.Resampler.ffill,../reference/api/pandas.core.resample.Resampler.ffill
generated/pandas.core.resample.Resampler.fillna,../reference/api/pandas.core.resample.Resampler.fillna
generated/pandas.core.resample.Resampler.first,../reference/api/pandas.core.resample.Resampler.first
generated/pandas.core.resample.Resampler.get_group,../reference/api/pandas.core.resample.Resampler.get_group
generated/pandas.core.resample.Resampler.groups,../reference/api/pandas.core.resample.Resampler.groups
Expand Down Expand Up @@ -481,7 +480,6 @@ generated/pandas.DataFrame.style,../reference/api/pandas.DataFrame.style
generated/pandas.DataFrame.sub,../reference/api/pandas.DataFrame.sub
generated/pandas.DataFrame.subtract,../reference/api/pandas.DataFrame.subtract
generated/pandas.DataFrame.sum,../reference/api/pandas.DataFrame.sum
generated/pandas.DataFrame.swapaxes,../reference/api/pandas.DataFrame.swapaxes
generated/pandas.DataFrame.swaplevel,../reference/api/pandas.DataFrame.swaplevel
generated/pandas.DataFrame.tail,../reference/api/pandas.DataFrame.tail
generated/pandas.DataFrame.take,../reference/api/pandas.DataFrame.take
Expand Down Expand Up @@ -1206,7 +1204,6 @@ generated/pandas.Series.str.zfill,../reference/api/pandas.Series.str.zfill
generated/pandas.Series.sub,../reference/api/pandas.Series.sub
generated/pandas.Series.subtract,../reference/api/pandas.Series.subtract
generated/pandas.Series.sum,../reference/api/pandas.Series.sum
generated/pandas.Series.swapaxes,../reference/api/pandas.Series.swapaxes
generated/pandas.Series.swaplevel,../reference/api/pandas.Series.swaplevel
generated/pandas.Series.tail,../reference/api/pandas.Series.tail
generated/pandas.Series.take,../reference/api/pandas.Series.take
Expand Down
1 change: 0 additions & 1 deletion doc/source/reference/frame.rst
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,6 @@ Reshaping, sorting, transposing
DataFrame.swaplevel
DataFrame.stack
DataFrame.unstack
DataFrame.swapaxes
DataFrame.melt
DataFrame.explode
DataFrame.squeeze
Expand Down
1 change: 0 additions & 1 deletion doc/source/reference/resampling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ Upsampling
Resampler.ffill
Resampler.bfill
Resampler.nearest
Resampler.fillna
Resampler.asfreq
Resampler.interpolate

Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.2.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@ Fixed regressions
- Fixed performance regression in :meth:`Series.combine_first` (:issue:`55845`)
- Fixed regression in :func:`concat` changing long-standing behavior that always sorted the non-concatenation axis when the axis was a :class:`DatetimeIndex` (:issue:`57006`)
- Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`)
- Fixed regression in :func:`pandas.testing.assert_series_equal` defaulting to ``check_exact=True`` when checking the :class:`Index` (:issue:`57067`)
- Fixed regression in :func:`wide_to_long` raising an ``AttributeError`` for string columns (:issue:`57066`)
- Fixed regression in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, :meth:`.SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
- Fixed regression in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, :meth:`.SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)
- Fixed regression in :meth:`CategoricalIndex.difference` raising ``KeyError`` when other contains null values other than NaN (:issue:`57318`)
- Fixed regression in :meth:`DataFrame.groupby` raising ``ValueError`` when grouping by a :class:`Series` in some cases (:issue:`57276`)
- Fixed regression in :meth:`DataFrame.loc` raising ``IndexError`` for non-unique, masked dtype indexes where result has more than 10,000 rows (:issue:`57027`)
- Fixed regression in :meth:`DataFrame.merge` raising ``ValueError`` for certain types of 3rd-party extension arrays (:issue:`57316`)
- Fixed regression in :meth:`DataFrame.shift` raising ``AssertionError`` for ``axis=1`` and empty :class:`DataFrame` (:issue:`57301`)
- Fixed regression in :meth:`DataFrame.sort_index` not producing a stable sort for a index with duplicates (:issue:`57151`)
- Fixed regression in :meth:`DataFrame.to_dict` with ``orient='list'`` and datetime or timedelta types returning integers (:issue:`54824`)
- Fixed regression in :meth:`DataFrame.to_json` converting nullable integers to floats (:issue:`57224`)
Expand Down
10 changes: 9 additions & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Other enhancements
^^^^^^^^^^^^^^^^^^
- :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`)
-

.. ---------------------------------------------------------------------------
Expand Down Expand Up @@ -106,29 +107,36 @@ Removal of prior version deprecations/changes
- :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`)
- All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`)
- Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`)
- Removed :meth:`DataFrame.applymap`, :meth:`Styler.applymap` and :meth:`Styler.applymap_index` (:issue:`52364`)
- Removed ``DataFrame.applymap``, ``Styler.applymap`` and ``Styler.applymap_index`` (:issue:`52364`)
- Removed ``DataFrame.bool`` and ``Series.bool`` (:issue:`51756`)
- Removed ``DataFrame.first`` and ``DataFrame.last`` (:issue:`53710`)
- Removed ``DataFrame.swapaxes`` and ``Series.swapaxes`` (:issue:`51946`)
- Removed ``DataFrameGroupBy.grouper`` and ``SeriesGroupBy.grouper`` (:issue:`56521`)
- Removed ``DataFrameGroupby.fillna`` and ``SeriesGroupBy.fillna``` (:issue:`55719`)
- Removed ``Index.format``, use :meth:`Index.astype` with ``str`` or :meth:`Index.map` with a ``formatter`` function instead (:issue:`55439`)
- Removed ``Resample.fillna`` (:issue:`55719`)
- Removed ``Series.__int__`` and ``Series.__float__``. Call ``int(Series.iloc[0])`` or ``float(Series.iloc[0])`` instead. (:issue:`51131`)
- Removed ``Series.ravel`` (:issue:`56053`)
- Removed ``Series.view`` (:issue:`56054`)
- Removed ``StataReader.close`` (:issue:`49228`)
- Removed ``axis`` argument from :meth:`DataFrame.groupby`, :meth:`Series.groupby`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.resample`, and :meth:`Series.resample` (:issue:`51203`)
- Removed ``axis`` argument from all groupby operations (:issue:`50405`)
- Removed ``convert_dtype`` from :meth:`Series.apply` (:issue:`52257`)
- Removed ``pandas.api.types.is_interval`` and ``pandas.api.types.is_period``, use ``isinstance(obj, pd.Interval)`` and ``isinstance(obj, pd.Period)`` instead (:issue:`55264`)
- Removed ``pandas.io.sql.execute`` (:issue:`50185`)
- Removed ``pandas.value_counts``, use :meth:`Series.value_counts` instead (:issue:`53493`)
- Removed ``read_gbq`` and ``DataFrame.to_gbq``. Use ``pandas_gbq.read_gbq`` and ``pandas_gbq.to_gbq`` instead https://pandas-gbq.readthedocs.io/en/latest/api.html (:issue:`55525`)
- Removed ``use_nullable_dtypes`` from :func:`read_parquet` (:issue:`51853`)
- Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`)
- Removed deprecated behavior of :meth:`Series.agg` using :meth:`Series.apply` (:issue:`53325`)
- Removed support for ``errors="ignore"`` in :func:`to_datetime`, :func:`to_timedelta` and :func:`to_numeric` (:issue:`55734`)
- Removed the ``ArrayManager`` (:issue:`55043`)
- Removed the ``fastpath`` argument from the :class:`Series` constructor (:issue:`55466`)
- Removed the ``is_boolean``, ``is_integer``, ``is_floating``, ``holds_integer``, ``is_numeric``, ``is_categorical``, ``is_object``, and ``is_interval`` attributes of :class:`Index` (:issue:`50042`)
- Removed unused arguments ``*args`` and ``**kwargs`` in :class:`Resampler` methods (:issue:`50977`)
- Unrecognized timezones when parsing strings to datetimes now raises a ``ValueError`` (:issue:`51477`)


.. ---------------------------------------------------------------------------
.. _whatsnew_300.performance:

Expand Down
44 changes: 36 additions & 8 deletions pandas/_libs/src/vendored/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -447,8 +447,15 @@ static void NpyArrPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) {
npyarr->curdim--;
npyarr->dataptr -= npyarr->stride * npyarr->index[npyarr->stridedim];
npyarr->stridedim -= npyarr->inc;
npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim);
npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim);

if (!PyArray_Check(npyarr->array)) {
PyErr_SetString(PyExc_TypeError,
"NpyArrayPassThru_iterEnd received a non-array object");
return;
}
const PyArrayObject *arrayobj = (const PyArrayObject *)npyarr->array;
npyarr->dim = PyArray_DIM(arrayobj, npyarr->stridedim);
npyarr->stride = PyArray_STRIDE(arrayobj, npyarr->stridedim);
npyarr->dataptr += npyarr->stride;

NpyArr_freeItemValue(obj, tc);
Expand All @@ -467,12 +474,19 @@ static int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) {

NpyArr_freeItemValue(obj, tc);

if (PyArray_ISDATETIME(npyarr->array)) {
if (!PyArray_Check(npyarr->array)) {
PyErr_SetString(PyExc_TypeError,
"NpyArr_iterNextItem received a non-array object");
return 0;
}
PyArrayObject *arrayobj = (PyArrayObject *)npyarr->array;

if (PyArray_ISDATETIME(arrayobj)) {
GET_TC(tc)->itemValue = obj;
Py_INCREF(obj);
((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(npyarr->array);
((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(arrayobj);
// Also write the resolution (unit) of the ndarray
PyArray_Descr *dtype = PyArray_DESCR(npyarr->array);
PyArray_Descr *dtype = PyArray_DESCR(arrayobj);
((PyObjectEncoder *)tc->encoder)->valueUnit =
get_datetime_metadata_from_dtype(dtype).base;
((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr;
Expand Down Expand Up @@ -505,8 +519,15 @@ static int NpyArr_iterNext(JSOBJ _obj, JSONTypeContext *tc) {

npyarr->curdim++;
npyarr->stridedim += npyarr->inc;
npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim);
npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim);
if (!PyArray_Check(npyarr->array)) {
PyErr_SetString(PyExc_TypeError,
"NpyArr_iterNext received a non-array object");
return 0;
}
const PyArrayObject *arrayobj = (const PyArrayObject *)npyarr->array;

npyarr->dim = PyArray_DIM(arrayobj, npyarr->stridedim);
npyarr->stride = PyArray_STRIDE(arrayobj, npyarr->stridedim);
npyarr->index[npyarr->stridedim] = 0;

((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr;
Expand Down Expand Up @@ -1610,7 +1631,14 @@ static void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
if (!values) {
goto INVALID;
}
pc->columnLabelsLen = PyArray_DIM(pc->newObj, 0);

if (!PyArray_Check(pc->newObj)) {
PyErr_SetString(PyExc_TypeError,
"Object_beginTypeContext received a non-array object");
goto INVALID;
}
const PyArrayObject *arrayobj = (const PyArrayObject *)pc->newObj;
pc->columnLabelsLen = PyArray_DIM(arrayobj, 0);
pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc,
pc->columnLabelsLen);
if (!pc->columnLabels) {
Expand Down
Loading

0 comments on commit 84657e5

Please sign in to comment.