From 4f2b4b0a8b0ef7249219ef5804da332900223bd6 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 8 Mar 2025 15:52:37 -0500 Subject: [PATCH 1/7] DEPS: Update NumpyExtensionArray repr for NEP51 --- pandas/core/arrays/numpy_.py | 10 +++++++++ pandas/tests/arrays/numpy_/test_numpy.py | 27 ++++++++++++++++++++++++ pandas/tests/arrays/test_period.py | 1 + 3 files changed, 38 insertions(+) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index ac0823ed903b3..de5d3eae932fc 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -2,6 +2,7 @@ from typing import ( TYPE_CHECKING, + Any, Literal, ) @@ -29,6 +30,8 @@ from pandas.core.strings.object_array import ObjectStringArrayMixin if TYPE_CHECKING: + from collections.abc import Callable + from pandas._typing import ( AxisInt, Dtype, @@ -565,3 +568,10 @@ def _wrap_ndarray_result(self, result: np.ndarray): return TimedeltaArray._simple_new(result, dtype=result.dtype) return type(self)(result) + + def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]: + # NEP 51: https://github.com/numpy/numpy/pull/22449 + if self.dtype == "object": + return repr + else: + return str diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index e86eb014465e1..afdd55cc5e958 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -323,3 +323,30 @@ def test_factorize_unsigned(): tm.assert_numpy_array_equal(res_codes, exp_codes) tm.assert_extension_array_equal(res_unique, NumpyExtensionArray(exp_unique)) + + +# ---------------------------------------------------------------------------- +# Output formatting + + +def test_array_repr(any_numpy_array): + # GH#??? + nparray = any_numpy_array + arr = NumpyExtensionArray(nparray) + if nparray.dtype == "object": + values = "['a', 'b']" + elif nparray.dtype == "float64": + values = "[0.0, 1.0]" + elif nparray.dtype == "int64": + values = "[0, 1]" + elif nparray.dtype == "complex128": + values = "[0j, (1+2j)]" + elif nparray.dtype == "bool": + values = "[True, False]" + elif nparray.dtype == "datetime64[ns]": + values = "[1970-01-01T00:00:00.000000000, 1970-01-01T00:00:00.000000001]" + elif nparray.dtype == "timedelta64[ns]": + values = "[0 nanoseconds, 1 nanoseconds]" + expected = f"\n{values}\nLength: 2, dtype: {nparray.dtype}" + result = repr(arr) + assert result == expected diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 48453ba19e9a1..963c86e15daab 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -181,4 +181,5 @@ def test_repr_large(): "'2001-01-01']\n" "Length: 1000, dtype: period[D]" ) + print(result) assert result == expected From 2d58b80d86e8ea9769939859d8fcb733eade1259 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 8 Mar 2025 15:56:21 -0500 Subject: [PATCH 2/7] Cleanup --- pandas/tests/arrays/test_period.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 963c86e15daab..48453ba19e9a1 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -181,5 +181,4 @@ def test_repr_large(): "'2001-01-01']\n" "Length: 1000, dtype: period[D]" ) - print(result) assert result == expected From 425b4bbbcd2d543d4ef77915bc83261897a901e9 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 8 Mar 2025 15:56:56 -0500 Subject: [PATCH 3/7] GH# --- pandas/tests/arrays/numpy_/test_numpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index afdd55cc5e958..ac412189b29d1 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -330,7 +330,7 @@ def test_factorize_unsigned(): def test_array_repr(any_numpy_array): - # GH#??? + # GH#61085 nparray = any_numpy_array arr = NumpyExtensionArray(nparray) if nparray.dtype == "object": From 63069b834013b50104406be1e4e8b7565e97d98f Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 8 Mar 2025 16:11:17 -0500 Subject: [PATCH 4/7] Debug --- pandas/tests/arrays/numpy_/test_numpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index ac412189b29d1..8f77e4a52af24 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -349,4 +349,4 @@ def test_array_repr(any_numpy_array): values = "[0 nanoseconds, 1 nanoseconds]" expected = f"\n{values}\nLength: 2, dtype: {nparray.dtype}" result = repr(arr) - assert result == expected + assert result == expected, f"{result} vs {expected}" From 34e8f03a2997e45cdc224262819e74f6b2a9e7f7 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 8 Mar 2025 17:03:42 -0500 Subject: [PATCH 5/7] Fix fixture --- pandas/tests/arrays/numpy_/test_numpy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index 8f77e4a52af24..cf1baefcdcdb3 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -22,7 +22,7 @@ np.array([True, False], dtype=bool), np.array([0, 1], dtype="datetime64[ns]"), np.array([0, 1], dtype="timedelta64[ns]"), - ] + ], ) def any_numpy_array(request): """ @@ -30,7 +30,7 @@ def any_numpy_array(request): This excludes string and bytes. """ - return request.param + return request.param.copy() # ---------------------------------------------------------------------------- From d10706f9eb3d39cee91ccebd1a6298cee1672feb Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 9 Mar 2025 08:54:04 -0400 Subject: [PATCH 6/7] Fixups --- pandas/core/arrays/numpy_.py | 4 +++- pandas/tests/arrays/numpy_/test_numpy.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index de5d3eae932fc..fd2c8c9d63362 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -571,7 +571,9 @@ def _wrap_ndarray_result(self, result: np.ndarray): def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]: # NEP 51: https://github.com/numpy/numpy/pull/22449 - if self.dtype == "object": + if self.dtype.kind in "SU": + return "'{}'".format + elif self.dtype == "object": return repr else: return str diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index cf1baefcdcdb3..620a553d5a731 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -337,7 +337,7 @@ def test_array_repr(any_numpy_array): values = "['a', 'b']" elif nparray.dtype == "float64": values = "[0.0, 1.0]" - elif nparray.dtype == "int64": + elif str(nparray.dtype).startswith("int"): values = "[0, 1]" elif nparray.dtype == "complex128": values = "[0j, (1+2j)]" From 35e4e2128f12259cae6491d5bd211fa5037a6246 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Tue, 11 Mar 2025 16:16:40 -0400 Subject: [PATCH 7/7] whatsnew --- doc/source/whatsnew/v2.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index 09134763977c3..ac54d5e895f2f 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -37,6 +37,7 @@ Other enhancements updated to work correctly with NumPy >= 2 (:issue:`57739`) - :meth:`Series.str.decode` result now has ``StringDtype`` when ``future.infer_string`` is True (:issue:`60709`) - :meth:`~Series.to_hdf` and :meth:`~DataFrame.to_hdf` now round-trip with ``StringDtype`` (:issue:`60663`) +- Improved ``repr`` of :class:`.NumpyExtensionArray` to account for NEP51 (:issue:`61085`) - The :meth:`Series.str.decode` has gained the argument ``dtype`` to control the dtype of the result (:issue:`60940`) - The :meth:`~Series.cumsum`, :meth:`~Series.cummin`, and :meth:`~Series.cummax` reductions are now implemented for ``StringDtype`` columns (:issue:`60633`) - The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)