From 8175010f720acb96efbffa348ebad642564c8eb7 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Tue, 10 Jun 2025 07:12:42 -0400 Subject: [PATCH 1/3] BUG: DataFrame.explode fails with str dtype --- doc/source/whatsnew/v2.3.1.rst | 44 +++++++++++++++++++++ pandas/core/arrays/arrow/array.py | 6 +-- pandas/tests/frame/methods/test_explode.py | 7 ++++ pandas/tests/series/methods/test_explode.py | 7 ++++ 4 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 doc/source/whatsnew/v2.3.1.rst diff --git a/doc/source/whatsnew/v2.3.1.rst b/doc/source/whatsnew/v2.3.1.rst new file mode 100644 index 0000000000000..41e6ff3d1cd6f --- /dev/null +++ b/doc/source/whatsnew/v2.3.1.rst @@ -0,0 +1,44 @@ +.. _whatsnew_231: + +What's new in 2.3.1 (Month XX, 2025) +--------------------------------------- + +These are the changes in pandas 2.3.1. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- +.. _whatsnew_231.enhancements: + +Enhancements +~~~~~~~~~~~~ +- + +.. _whatsnew_231.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- + +.. --------------------------------------------------------------------------- +.. _whatsnew_231.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Fixed bug in :meth:`DataFrame.explode` and :meth:`Series.explode` where methods would fail with ``dtype="str"`` (:issue:`???`) + +.. --------------------------------------------------------------------------- +.. _whatsnew_231.other: + +Other +~~~~~ +- + +.. --------------------------------------------------------------------------- +.. _whatsnew_231.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v2.3.0..v2.3.1 diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 0b90bcea35100..c18f06c3a126d 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1935,9 +1935,9 @@ def _explode(self): """ # child class explode method supports only list types; return # default implementation for non list types. - if not ( - pa.types.is_list(self.dtype.pyarrow_dtype) - or pa.types.is_large_list(self.dtype.pyarrow_dtype) + if not hasattr(self.dtype, "pyarrow_dtype") or ( + not pa.types.is_list(self.dtype.pyarrow_dtype) + and not pa.types.is_large_list(self.dtype.pyarrow_dtype) ): return super()._explode() values = self diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py index 876ad5539d603..dbcd1c684a029 100644 --- a/pandas/tests/frame/methods/test_explode.py +++ b/pandas/tests/frame/methods/test_explode.py @@ -297,3 +297,10 @@ def test_multi_columns_nan_empty(): index=[0, 0, 1, 2, 3, 3], ) tm.assert_frame_equal(result, expected) + + +def test_str_dtype(): + df = pd.DataFrame({"a": ["x", "y"]}, dtype="str") + result = df.explode(column="a") + assert result is not df + tm.assert_frame_equal(result, df) diff --git a/pandas/tests/series/methods/test_explode.py b/pandas/tests/series/methods/test_explode.py index e4ad2493f9bb9..6d052b3c63fbc 100644 --- a/pandas/tests/series/methods/test_explode.py +++ b/pandas/tests/series/methods/test_explode.py @@ -175,3 +175,10 @@ def test_explode_pyarrow_non_list_type(ignore_index): result = ser.explode(ignore_index=ignore_index) expected = pd.Series([1, 2, 3], dtype="int64[pyarrow]", index=[0, 1, 2]) tm.assert_series_equal(result, expected) + + +def test_str_dtype(): + ser = pd.Series(["x", "y"], dtype="str") + result = ser.explode() + assert result is not ser + tm.assert_series_equal(result, ser) From 703de84a21350f2d60a2208c2bddff761ee7c254 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Tue, 10 Jun 2025 07:13:45 -0400 Subject: [PATCH 2/3] GH# --- doc/source/whatsnew/v2.3.1.rst | 2 +- pandas/tests/frame/methods/test_explode.py | 1 + pandas/tests/series/methods/test_explode.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.3.1.rst b/doc/source/whatsnew/v2.3.1.rst index 41e6ff3d1cd6f..0edb8bc3181c2 100644 --- a/doc/source/whatsnew/v2.3.1.rst +++ b/doc/source/whatsnew/v2.3.1.rst @@ -26,7 +26,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- Fixed bug in :meth:`DataFrame.explode` and :meth:`Series.explode` where methods would fail with ``dtype="str"`` (:issue:`???`) +- Fixed bug in :meth:`DataFrame.explode` and :meth:`Series.explode` where methods would fail with ``dtype="str"`` (:issue:`61623`) .. --------------------------------------------------------------------------- .. _whatsnew_231.other: diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py index dbcd1c684a029..cfb85d261d07a 100644 --- a/pandas/tests/frame/methods/test_explode.py +++ b/pandas/tests/frame/methods/test_explode.py @@ -300,6 +300,7 @@ def test_multi_columns_nan_empty(): def test_str_dtype(): + # https://github.com/pandas-dev/pandas/pull/61623 df = pd.DataFrame({"a": ["x", "y"]}, dtype="str") result = df.explode(column="a") assert result is not df diff --git a/pandas/tests/series/methods/test_explode.py b/pandas/tests/series/methods/test_explode.py index 6d052b3c63fbc..9c08f47c0d678 100644 --- a/pandas/tests/series/methods/test_explode.py +++ b/pandas/tests/series/methods/test_explode.py @@ -178,6 +178,7 @@ def test_explode_pyarrow_non_list_type(ignore_index): def test_str_dtype(): + # https://github.com/pandas-dev/pandas/pull/61623 ser = pd.Series(["x", "y"], dtype="str") result = ser.explode() assert result is not ser From 37a07e208941e05354cd3f883748b4f9dda6bc1f Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Tue, 10 Jun 2025 07:34:18 -0400 Subject: [PATCH 3/3] Add 2.3.1 to the index of the whatsnew --- doc/source/whatsnew/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst index 1dd6c5fabef04..9da73c8fd76d4 100644 --- a/doc/source/whatsnew/index.rst +++ b/doc/source/whatsnew/index.rst @@ -24,6 +24,7 @@ Version 2.3 .. toctree:: :maxdepth: 2 + v2.3.1 v2.3.0 Version 2.2