Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/2.3.x' into remove-read_json-d…
Browse files Browse the repository at this point in the history
…atetime-deprecation-warning
  • Loading branch information
KevsterAmp committed Oct 22, 2024
2 parents 51ad07d + a24a653 commit c781b59
Show file tree
Hide file tree
Showing 302 changed files with 4,521 additions and 2,373 deletions.
6 changes: 6 additions & 0 deletions .github/actions/setup-conda/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,9 @@ runs:
condarc-file: ci/.condarc
cache-environment: true
cache-downloads: true

- name: Uninstall pyarrow
if: ${{ env.REMOVE_PYARROW == '1' }}
run: |
micromamba remove -y pyarrow
shell: bash -el {0}
15 changes: 13 additions & 2 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ jobs:
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml, actions-312.yaml]
# Prevent the include jobs from overriding other jobs
pattern: [""]
pandas_future_infer_string: ["0"]
include:
- name: "Downstream Compat"
env_file: actions-311-downstream_compat.yaml
Expand Down Expand Up @@ -85,6 +86,14 @@ jobs:
env_file: actions-39.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "warn"
- name: "Future infer strings"
env_file: actions-312.yaml
pandas_future_infer_string: "1"
pandas_copy_on_write: "1"
- name: "Future infer strings (without pyarrow)"
env_file: actions-311.yaml
pandas_future_infer_string: "1"
pandas_copy_on_write: "1"
- name: "Pypy"
env_file: actions-pypy-39.yaml
pattern: "not slow and not network and not single_cpu"
Expand All @@ -103,16 +112,18 @@ jobs:
LANG: ${{ matrix.lang || 'C.UTF-8' }}
LC_ALL: ${{ matrix.lc_all || '' }}
PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }}
PANDAS_CI: ${{ matrix.pandas_ci || '1' }}
PANDAS_CI: '1'
PANDAS_FUTURE_INFER_STRING: ${{ matrix.pandas_future_infer_string || '0' }}
TEST_ARGS: ${{ matrix.test_args || '' }}
PYTEST_WORKERS: ${{ matrix.pytest_workers || 'auto' }}
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
NPY_PROMOTION_STATE: ${{ matrix.env_file == 'actions-311-numpydev.yaml' && 'weak' || 'legacy' }}
# Clipboard tests
QT_QPA_PLATFORM: offscreen
REMOVE_PYARROW: ${{ matrix.name == 'Future infer strings (without pyarrow)' && '1' || '0' }}
concurrency:
# https://github.community/t/concurrecy-not-work-for-push/183068/7
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }}
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }}-${{ matrix.pandas_future_infer_string }}
cancel-in-progress: true

services:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ jobs:
run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"

- name: Build wheels
uses: pypa/cibuildwheel@v2.20.0
uses: pypa/cibuildwheel@v2.21.0
with:
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
env:
Expand Down
7 changes: 0 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -274,13 +274,6 @@ repos:
language: python
types: [rst]
files: ^doc/source/(development|reference)/
- id: unwanted-patterns-bare-pytest-raises
name: Check for use of bare pytest raises
language: python
entry: python scripts/validate_unwanted_patterns.py --validation-type="bare_pytest_raises"
types: [python]
files: ^pandas/tests/
exclude: ^pandas/tests/extension/
- id: unwanted-patterns-private-function-across-module
name: Check for use of private functions across modules
language: python
Expand Down
8 changes: 8 additions & 0 deletions doc/source/whatsnew/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@ This is the list of changes to pandas between each release. For full details,
see the `commit logs <https://github.com/pandas-dev/pandas/commits/>`_. For install and
upgrade instructions, see :ref:`install`.

Version 2.3
-----------

.. toctree::
:maxdepth: 2

v2.3.0

Version 2.2
-----------

Expand Down
180 changes: 180 additions & 0 deletions doc/source/whatsnew/v2.3.0.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
.. _whatsnew_230:

What's new in 2.3.0 (Month XX, 2024)
------------------------------------

These are the changes in pandas 2.3.0. See :ref:`release` for a full changelog
including other versions of pandas.

{{ header }}

.. ---------------------------------------------------------------------------
.. _whatsnew_230.upcoming_changes:

Upcoming changes in pandas 3.0
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


.. _whatsnew_230.enhancements:

Enhancements
~~~~~~~~~~~~

.. _whatsnew_230.enhancements.enhancement1:

enhancement1
^^^^^^^^^^^^


.. _whatsnew_230.enhancements.other:

Other enhancements
^^^^^^^^^^^^^^^^^^

-
-

.. ---------------------------------------------------------------------------
.. _whatsnew_230.notable_bug_fixes:

Notable bug fixes
~~~~~~~~~~~~~~~~~

These are bug fixes that might have notable behavior changes.

.. _whatsnew_230.notable_bug_fixes.notable_bug_fix1:

notable_bug_fix1
^^^^^^^^^^^^^^^^

.. ---------------------------------------------------------------------------
.. _whatsnew_230.deprecations:

Deprecations
~~~~~~~~~~~~
- Deprecated allowing non-``bool`` values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` for dtypes that do not already disallow these (:issue:`59615`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_230.performance:

Performance improvements
~~~~~~~~~~~~~~~~~~~~~~~~
-
-

.. ---------------------------------------------------------------------------
.. _whatsnew_230.bug_fixes:

Bug fixes
~~~~~~~~~

Categorical
^^^^^^^^^^^
-
-

Datetimelike
^^^^^^^^^^^^
-
-

Timedelta
^^^^^^^^^
-
-

Timezones
^^^^^^^^^
-
-

Numeric
^^^^^^^
-
-

Conversion
^^^^^^^^^^
-
-

Strings
^^^^^^^
- Bug in :meth:`Series.rank` for :class:`StringDtype` with ``storage="pyarrow"`` incorrectly returning integer results in case of ``method="average"`` and raising an error if it would truncate results (:issue:`59768`)
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
- Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` and :class:`StringDtype` with ``storage="pyarrow"`` giving incorrect results (:issue:`59710`)
- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
-

Interval
^^^^^^^^
-
-

Indexing
^^^^^^^^
-
-

Missing
^^^^^^^
-
-

MultiIndex
^^^^^^^^^^
-
-

I/O
^^^
-
-

Period
^^^^^^
-
-

Plotting
^^^^^^^^
-
-

Groupby/resample/rolling
^^^^^^^^^^^^^^^^^^^^^^^^
-
-

Reshaping
^^^^^^^^^
-
-

Sparse
^^^^^^
-
-

ExtensionArray
^^^^^^^^^^^^^^
-
-

Styler
^^^^^^
-
-

Other
^^^^^
-
-

.. ---------------------------------------------------------------------------
.. _whatsnew_230.contributors:

Contributors
~~~~~~~~~~~~
2 changes: 1 addition & 1 deletion pandas/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,6 @@ def using_nullable_dtypes() -> bool:
return _mode_options["nullable_dtypes"]


def using_pyarrow_string_dtype() -> bool:
def using_string_dtype() -> bool:
_mode_options = _global_config["future"]
return _mode_options["infer_string"]
4 changes: 4 additions & 0 deletions pandas/_libs/arrays.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ cdef class NDArrayBacked:
"""
Construct a new ExtensionArray `new_array` with `arr` as its _ndarray.
The returned array has the same dtype as self.
Caller is responsible for ensuring `values.dtype == self._ndarray.dtype`.
This should round-trip:
self == self._from_backing_data(self._ndarray)
"""
Expand Down
5 changes: 4 additions & 1 deletion pandas/_libs/hashtable.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ from pandas._libs.khash cimport (
kh_python_hash_func,
khiter_t,
)
from pandas._libs.missing cimport checknull
from pandas._libs.missing cimport (
checknull,
is_matching_na,
)


def get_hashtable_trace_domain():
Expand Down
18 changes: 15 additions & 3 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -1121,11 +1121,13 @@ cdef class StringHashTable(HashTable):
const char **vecs
khiter_t k
bint use_na_value
bint non_null_na_value

if return_inverse:
labels = np.zeros(n, dtype=np.intp)
uindexer = np.empty(n, dtype=np.int64)
use_na_value = na_value is not None
non_null_na_value = not checknull(na_value)

# assign pointers and pre-filter out missing (if ignore_na)
vecs = <const char **>malloc(n * sizeof(char *))
Expand All @@ -1134,7 +1136,12 @@ cdef class StringHashTable(HashTable):

if (ignore_na
and (not isinstance(val, str)
or (use_na_value and val == na_value))):
or (use_na_value and (
(non_null_na_value and val == na_value) or
(not non_null_na_value and is_matching_na(val, na_value)))
)
)
):
# if missing values do not count as unique values (i.e. if
# ignore_na is True), we can skip the actual value, and
# replace the label with na_sentinel directly
Expand Down Expand Up @@ -1400,18 +1407,23 @@ cdef class PyObjectHashTable(HashTable):
object val
khiter_t k
bint use_na_value

bint non_null_na_value
if return_inverse:
labels = np.empty(n, dtype=np.intp)
use_na_value = na_value is not None
non_null_na_value = not checknull(na_value)

for i in range(n):
val = values[i]
hash(val)

if ignore_na and (
checknull(val)
or (use_na_value and val == na_value)
or (use_na_value and (
(non_null_na_value and val == na_value) or
(not non_null_na_value and is_matching_na(val, na_value))
)
)
):
# if missing values do not count as unique values (i.e. if
# ignore_na is True), skip the hashtable entry for them, and
Expand Down
Loading

0 comments on commit c781b59

Please sign in to comment.