From a26823ff5fe3252c432d937cdbb9dd668de1ebce Mon Sep 17 00:00:00 2001 From: Sam Cohen Date: Sun, 4 Oct 2020 18:47:21 +0000 Subject: [PATCH 01/17] BUG: df.replace over pd.Period columns (#34871) --- pandas/core/internals/blocks.py | 17 ++++++++++++++++- pandas/core/internals/managers.py | 9 +++++++++ pandas/tests/frame/methods/test_replace.py | 3 --- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 09f276be7d64a..c14b61c8b621d 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -10,6 +10,7 @@ import pandas._libs.internals as libinternals from pandas._libs.internals import BlockPlacement from pandas._libs.tslibs import conversion +from pandas._libs.tslibs.period import Period from pandas._libs.tslibs.timezones import tz_compare from pandas._typing import ArrayLike, Scalar from pandas.util._validators import validate_bool_kwarg @@ -1975,6 +1976,18 @@ def external_values(self): return self.values.astype(object) +class PeriodExtensionBlock(ObjectValuesExtensionBlock): + """ + Used by PeriodArray to ensure proper type conversions + """ + + def _can_hold_element(self, element: Any) -> bool: + tipo = maybe_infer_dtype_type(element) + if tipo is not None: + return issubclass(tipo.type, Period) + return isinstance(element, Period) + + class NumericBlock(Block): __slots__ = () is_numeric = True @@ -2747,8 +2760,10 @@ def get_block_type(values, dtype=None): cls = DatetimeBlock elif is_datetime64tz_dtype(values.dtype): cls = DatetimeTZBlock - elif is_interval_dtype(dtype) or is_period_dtype(dtype): + elif is_interval_dtype(dtype): cls = ObjectValuesExtensionBlock + elif is_period_dtype(dtype): + cls = PeriodExtensionBlock elif is_extension_array_dtype(values.dtype): cls = ExtensionBlock elif issubclass(vtype, np.floating): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f2480adce89b4..acfddf3f784b4 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -47,6 +47,7 @@ DatetimeTZBlock, ExtensionBlock, ObjectValuesExtensionBlock, + PeriodExtensionBlock, extend_blocks, get_block_type, make_block, @@ -1743,6 +1744,14 @@ def form_blocks(arrays, names: Index, axes) -> List[Block]: blocks.extend(external_blocks) + if len(items_dict["PeriodExtensionBlock"]): + external_blocks = [ + make_block(array, klass=PeriodExtensionBlock, placement=i) + for i, _, array in items_dict["PeriodExtensionBlock"] + ] + + blocks.extend(external_blocks) + if len(extra_locs): shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:]) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index a9cf840470ae0..5345d08c1edf0 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1517,9 +1517,6 @@ def test_replace_with_duplicate_columns(self, replacement): tm.assert_frame_equal(result, expected) - @pytest.mark.xfail( - reason="replace() changes dtype from period to object, see GH34871", strict=True - ) def test_replace_period_ignore_float(self): """ Regression test for GH#34871: if df.replace(1.0, 0.0) is called on a df From 7b40ecef957be30651d92cb2be64e30aa87c9e66 Mon Sep 17 00:00:00 2001 From: Sam Cohen Date: Mon, 5 Oct 2020 01:08:05 +0000 Subject: [PATCH 02/17] Remove PeriodExtensionBlock, update docs/tests --- doc/source/whatsnew/v1.2.0.rst | 7 ++++++- pandas/core/internals/blocks.py | 15 +++------------ pandas/core/internals/managers.py | 9 --------- pandas/tests/frame/methods/test_replace.py | 9 +++++---- pandas/tests/series/methods/test_replace.py | 11 +++++++++++ 5 files changed, 25 insertions(+), 26 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index cb0858fd678f8..d4266c53fe87a 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -348,7 +348,7 @@ Strings Interval ^^^^^^^^ -- +- Bug in :meth:`DataFrame.replace` and :method:`Series.replace` where :class:`Interval` dtypes would be converted to object dytpes (:issue:34871) - Indexing @@ -387,6 +387,11 @@ I/O - Removed ``private_key`` and ``verbose`` from :func:`read_gbq` as they are no longer supported in ``pandas-gbq`` (:issue:`34654`, :issue:`30200`) - Bumped minimum pytables version to 3.5.1 to avoid a ``ValueError`` in :meth:`read_hdf` (:issue:`24839`) +Period +^^^^^^ + +- Bug in :meth:`DataFrame.replace` and :method:`Series.replace` where :class:`Period` dtypes would be converted to object dytpes (:issue:34871) + Plotting ^^^^^^^^ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c14b61c8b621d..b1ed7465e4cb3 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -10,7 +10,6 @@ import pandas._libs.internals as libinternals from pandas._libs.internals import BlockPlacement from pandas._libs.tslibs import conversion -from pandas._libs.tslibs.period import Period from pandas._libs.tslibs.timezones import tz_compare from pandas._typing import ArrayLike, Scalar from pandas.util._validators import validate_bool_kwarg @@ -1975,17 +1974,11 @@ class ObjectValuesExtensionBlock(ExtensionBlock): def external_values(self): return self.values.astype(object) - -class PeriodExtensionBlock(ObjectValuesExtensionBlock): - """ - Used by PeriodArray to ensure proper type conversions - """ - def _can_hold_element(self, element: Any) -> bool: tipo = maybe_infer_dtype_type(element) if tipo is not None: - return issubclass(tipo.type, Period) - return isinstance(element, Period) + return issubclass(tipo.type, self.dtype.type) + return isinstance(element, self.dtype.type) class NumericBlock(Block): @@ -2760,10 +2753,8 @@ def get_block_type(values, dtype=None): cls = DatetimeBlock elif is_datetime64tz_dtype(values.dtype): cls = DatetimeTZBlock - elif is_interval_dtype(dtype): + elif is_interval_dtype(dtype) or is_period_dtype(dtype): cls = ObjectValuesExtensionBlock - elif is_period_dtype(dtype): - cls = PeriodExtensionBlock elif is_extension_array_dtype(values.dtype): cls = ExtensionBlock elif issubclass(vtype, np.floating): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index acfddf3f784b4..f2480adce89b4 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -47,7 +47,6 @@ DatetimeTZBlock, ExtensionBlock, ObjectValuesExtensionBlock, - PeriodExtensionBlock, extend_blocks, get_block_type, make_block, @@ -1744,14 +1743,6 @@ def form_blocks(arrays, names: Index, axes) -> List[Block]: blocks.extend(external_blocks) - if len(items_dict["PeriodExtensionBlock"]): - external_blocks = [ - make_block(array, klass=PeriodExtensionBlock, placement=i) - for i, _, array in items_dict["PeriodExtensionBlock"] - ] - - blocks.extend(external_blocks) - if len(extra_locs): shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:]) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 5345d08c1edf0..be89fc6edea45 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1517,14 +1517,15 @@ def test_replace_with_duplicate_columns(self, replacement): tm.assert_frame_equal(result, expected) - def test_replace_period_ignore_float(self): + @pytest.mark.parametrize("value", [pd.Period("2020-01"), pd.Interval(0, 5)]) + def test_replace_period_ignore_float(self, value): """ Regression test for GH#34871: if df.replace(1.0, 0.0) is called on a df - with a Period column the old, faulty behavior is to raise TypeError. + with a Period/Interval column the old, faulty behavior is to raise TypeError. """ - df = pd.DataFrame({"Per": [pd.Period("2020-01")] * 3}) + df = pd.DataFrame({"Per": [value] * 3}) result = df.replace(1.0, 0.0) - expected = pd.DataFrame({"Per": [pd.Period("2020-01")] * 3}) + expected = pd.DataFrame({"Per": [value] * 3}) tm.assert_frame_equal(expected, result) def test_replace_value_category_type(self): diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index e255d46e81851..f1da9bbf26319 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -449,3 +449,14 @@ def test_replace_with_compiled_regex(self): result = s.replace({regex: "z"}, regex=True) expected = pd.Series(["z", "b", "c"]) tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("value", [pd.Period("2020-01"), pd.Interval(0, 5)]) + def test_replace_period_ignore_float(self, value): + """ + Regression test for corrolary to GH#34871: if series.replace(1.0, 0.0) is called on a + Period/Interval Series, the old, faulty behavior is to raise TypeError. + """ + series = pd.Series([value] * 3) + result = series.replace(1.0, 0.0) + expected = pd.Series([value] * 3) + tm.assert_series_equal(expected, result) \ No newline at end of file From fa20da9bf1c1dc3d3ae1fcb97ce4f19ea390f551 Mon Sep 17 00:00:00 2001 From: Sam Cohen Date: Mon, 5 Oct 2020 01:11:09 +0000 Subject: [PATCH 03/17] Fix PEP violations --- pandas/tests/series/methods/test_replace.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index f1da9bbf26319..cb164420b93a7 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -453,10 +453,11 @@ def test_replace_with_compiled_regex(self): @pytest.mark.parametrize("value", [pd.Period("2020-01"), pd.Interval(0, 5)]) def test_replace_period_ignore_float(self, value): """ - Regression test for corrolary to GH#34871: if series.replace(1.0, 0.0) is called on a - Period/Interval Series, the old, faulty behavior is to raise TypeError. + Regression test for corrolary to GH#34871: if series.replace(1.0, 0.0) + is called on a Period/Interval Series, the old, faulty behavior + is to raise TypeError. """ series = pd.Series([value] * 3) result = series.replace(1.0, 0.0) expected = pd.Series([value] * 3) - tm.assert_series_equal(expected, result) \ No newline at end of file + tm.assert_series_equal(expected, result) From c378787430b5b85895bb1239aa1b0151aca2aa15 Mon Sep 17 00:00:00 2001 From: Sam Cohen Date: Mon, 5 Oct 2020 01:51:59 +0000 Subject: [PATCH 04/17] Fix docs: method to meth --- doc/source/whatsnew/v1.2.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index d4266c53fe87a..9b9fe5ad6fb0e 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -348,7 +348,7 @@ Strings Interval ^^^^^^^^ -- Bug in :meth:`DataFrame.replace` and :method:`Series.replace` where :class:`Interval` dtypes would be converted to object dytpes (:issue:34871) +- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` where :class:`Interval` dtypes would be converted to object dytpes (:issue:34871) - Indexing @@ -390,7 +390,7 @@ I/O Period ^^^^^^ -- Bug in :meth:`DataFrame.replace` and :method:`Series.replace` where :class:`Period` dtypes would be converted to object dytpes (:issue:34871) +- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` where :class:`Period` dtypes would be converted to object dytpes (:issue:34871) Plotting ^^^^^^^^ From 0b2de8ff1585c1cba8f374c6437f6b04cb64f961 Mon Sep 17 00:00:00 2001 From: Sam Cohen Date: Wed, 21 Oct 2020 22:58:19 +0000 Subject: [PATCH 05/17] Address test and release notes comments --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/tests/frame/methods/test_replace.py | 2 +- pandas/tests/series/methods/test_replace.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 45983dd8f6cb4..bb4ddd43b158e 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -417,7 +417,7 @@ Strings Interval ^^^^^^^^ -- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` where :class:`Interval` dtypes would be converted to object dytpes (:issue:34871) +- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` where :class:`Interval` dtypes would be converted to object dtypes (:issue:34871) - Indexing diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 9e1af89216261..718b7131d7321 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1518,7 +1518,7 @@ def test_replace_with_duplicate_columns(self, replacement): tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("value", [pd.Period("2020-01"), pd.Interval(0, 5)]) - def test_replace_period_ignore_float(self, value): + def test_replace_ea_ignore_float(self, value): """ Regression test for GH#34871: if df.replace(1.0, 0.0) is called on a df with a Period/Interval column the old, faulty behavior is to raise TypeError. diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 1cb774a1e1272..5c0b32c09b9d4 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -451,7 +451,7 @@ def test_replace_with_compiled_regex(self): tm.assert_series_equal(result, expected) @pytest.mark.parametrize("value", [pd.Period("2020-01"), pd.Interval(0, 5)]) - def test_replace_period_ignore_float(self, value): + def test_replace_ea_ignore_float(self, value): """ Regression test for corrolary to GH#34871: if series.replace(1.0, 0.0) is called on a Period/Interval Series, the old, faulty behavior From ac8309b2207b99de29190e469efd6a04140c8d84 Mon Sep 17 00:00:00 2001 From: Sam Cohen Date: Thu, 22 Oct 2020 00:10:59 +0000 Subject: [PATCH 06/17] Fix unit tests --- pandas/core/internals/blocks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6371b089250ce..2ce9534cb3f46 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2003,6 +2003,8 @@ def external_values(self): return self.values.astype(object) def _can_hold_element(self, element: Any) -> bool: + if is_valid_nat_for_dtype(element, self.dtype): + return True tipo = maybe_infer_dtype_type(element) if tipo is not None: return issubclass(tipo.type, self.dtype.type) From 5df2c695bdb45a59c7e1a4613323beefae447d95 Mon Sep 17 00:00:00 2001 From: Sam Cohen Date: Sun, 25 Oct 2020 00:40:58 +0000 Subject: [PATCH 07/17] More build fixes --- pandas/core/internals/blocks.py | 4 ++++ pandas/tests/frame/methods/test_replace.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2ce9534cb3f46..4956c1526bc06 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2005,6 +2005,10 @@ def external_values(self): def _can_hold_element(self, element: Any) -> bool: if is_valid_nat_for_dtype(element, self.dtype): return True + if element is NaT: + return True + if isinstance(element, list) and len(element) == 0: + return True tipo = maybe_infer_dtype_type(element) if tipo is not None: return issubclass(tipo.type, self.dtype.type) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 718b7131d7321..c900d070fa8e5 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1523,9 +1523,9 @@ def test_replace_ea_ignore_float(self, value): Regression test for GH#34871: if df.replace(1.0, 0.0) is called on a df with a Period/Interval column the old, faulty behavior is to raise TypeError. """ - df = pd.DataFrame({"Per": [value] * 3}) + df = DataFrame({"Per": [value] * 3}) result = df.replace(1.0, 0.0) - expected = pd.DataFrame({"Per": [value] * 3}) + expected = DataFrame({"Per": [value] * 3}) tm.assert_frame_equal(expected, result) def test_replace_value_category_type(self): From 1d798bc55dc8037d3fb3dbe1d59ef8b1f0b20b07 Mon Sep 17 00:00:00 2001 From: Sam Cohen Date: Sun, 1 Nov 2020 17:13:07 +0000 Subject: [PATCH 08/17] Remove excess NaT check --- pandas/core/internals/blocks.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4956c1526bc06..929657423b614 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2005,8 +2005,6 @@ def external_values(self): def _can_hold_element(self, element: Any) -> bool: if is_valid_nat_for_dtype(element, self.dtype): return True - if element is NaT: - return True if isinstance(element, list) and len(element) == 0: return True tipo = maybe_infer_dtype_type(element) From 5921fb743d1a76b7c2c58942f0b9ff3a64628e86 Mon Sep 17 00:00:00 2001 From: Sam Cohen Date: Tue, 10 Nov 2020 03:07:19 +0000 Subject: [PATCH 09/17] Address review comments --- doc/source/whatsnew/v1.2.0.rst | 4 ++-- pandas/tests/frame/methods/test_replace.py | 5 +---- pandas/tests/series/methods/test_replace.py | 6 +----- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 653e7d77c705e..834dfd1c219b3 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -435,7 +435,7 @@ Strings Interval ^^^^^^^^ -- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` where :class:`Interval` dtypes would be converted to object dtypes (:issue:34871) +- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` where :class:`Interval` dtypes would be converted to object dtypes (:issue:`34871`) - Bug in :meth:`IntervalIndex.take` with negative indices and ``fill_value=None`` (:issue:`37330`) - - @@ -493,7 +493,7 @@ I/O Period ^^^^^^ -- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` where :class:`Period` dtypes would be converted to object dytpes (:issue:34871) +- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` where :class:`Period` dtypes would be converted to object dtypes (:issue:`34871`) Plotting ^^^^^^^^ diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 369176475cdad..3fb3165a56a72 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1519,10 +1519,7 @@ def test_replace_with_duplicate_columns(self, replacement): @pytest.mark.parametrize("value", [pd.Period("2020-01"), pd.Interval(0, 5)]) def test_replace_ea_ignore_float(self, value): - """ - Regression test for GH#34871: if df.replace(1.0, 0.0) is called on a df - with a Period/Interval column the old, faulty behavior is to raise TypeError. - """ + # GH#34871 df = DataFrame({"Per": [value] * 3}) result = df.replace(1.0, 0.0) expected = DataFrame({"Per": [value] * 3}) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 5c0b32c09b9d4..430259874afb8 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -452,11 +452,7 @@ def test_replace_with_compiled_regex(self): @pytest.mark.parametrize("value", [pd.Period("2020-01"), pd.Interval(0, 5)]) def test_replace_ea_ignore_float(self, value): - """ - Regression test for corrolary to GH#34871: if series.replace(1.0, 0.0) - is called on a Period/Interval Series, the old, faulty behavior - is to raise TypeError. - """ + # GH#34871 series = pd.Series([value] * 3) result = series.replace(1.0, 0.0) expected = pd.Series([value] * 3) From 91ad6ddd45a47f516e89f3c50e55a811d6a769b6 Mon Sep 17 00:00:00 2001 From: Sam Cohen Date: Tue, 10 Nov 2020 13:45:54 +0000 Subject: [PATCH 10/17] Combine test_replace Series and DataFrame tests --- pandas/tests/frame/methods/test_replace.py | 17 +--------------- pandas/tests/generic/methods/test_replace.py | 21 ++++++++++++++++++++ pandas/tests/series/methods/test_replace.py | 16 --------------- 3 files changed, 22 insertions(+), 32 deletions(-) create mode 100644 pandas/tests/generic/methods/test_replace.py diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 3fb3165a56a72..063a8f8c3b341 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1,5 +1,6 @@ from datetime import datetime from io import StringIO +from pandas.conftest import frame_or_series import re from typing import Dict, List, Union @@ -1517,14 +1518,6 @@ def test_replace_with_duplicate_columns(self, replacement): tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("value", [pd.Period("2020-01"), pd.Interval(0, 5)]) - def test_replace_ea_ignore_float(self, value): - # GH#34871 - df = DataFrame({"Per": [value] * 3}) - result = df.replace(1.0, 0.0) - expected = DataFrame({"Per": [value] * 3}) - tm.assert_frame_equal(expected, result) - def test_replace_value_category_type(self): """ Test for #23305: to ensure category dtypes are maintained @@ -1605,14 +1598,6 @@ def test_replace_dict_category_type(self, input_category_df, expected_category_d tm.assert_frame_equal(result, expected) - def test_replace_with_compiled_regex(self): - # https://github.com/pandas-dev/pandas/issues/35680 - df = DataFrame(["a", "b", "c"]) - regex = re.compile("^a$") - result = df.replace({regex: "z"}, regex=True) - expected = DataFrame(["z", "b", "c"]) - tm.assert_frame_equal(result, expected) - def test_replace_intervals(self): # https://github.com/pandas-dev/pandas/issues/35931 df = DataFrame({"a": [pd.Interval(0, 1), pd.Interval(0, 1)]}) diff --git a/pandas/tests/generic/methods/test_replace.py b/pandas/tests/generic/methods/test_replace.py new file mode 100644 index 0000000000000..f1effaeced5e0 --- /dev/null +++ b/pandas/tests/generic/methods/test_replace.py @@ -0,0 +1,21 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + +class SharedReplaceTests: + @pytest.mark.parametrize("value", [pd.Period("2020-01"), pd.Interval(0, 5)]) + def test_replace_ea_ignore_float(self, frame_or_series, value): + # GH#34871 + df = frame_or_series([value] * 3) + result = df.replace(1.0, 0.0) + expected = frame_or_series([value] * 3) + tm.assert_equal(expected, result) + + def test_replace_with_compiled_regex(self): + # https://github.com/pandas-dev/pandas/issues/35680 + s = pd.Series(["a", "b", "c"]) + regex = re.compile("^a$") + result = s.replace({regex: "z"}, regex=True) + expected = pd.Series(["z", "b", "c"]) + tm.assert_series_equal(result, expected) \ No newline at end of file diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 430259874afb8..fafcd891b1a30 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -442,22 +442,6 @@ def test_replace_extension_other(self): ser = pd.Series(pd.array([1, 2, 3], dtype="Int64")) ser.replace("", "") # no exception - def test_replace_with_compiled_regex(self): - # https://github.com/pandas-dev/pandas/issues/35680 - s = pd.Series(["a", "b", "c"]) - regex = re.compile("^a$") - result = s.replace({regex: "z"}, regex=True) - expected = pd.Series(["z", "b", "c"]) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("value", [pd.Period("2020-01"), pd.Interval(0, 5)]) - def test_replace_ea_ignore_float(self, value): - # GH#34871 - series = pd.Series([value] * 3) - result = series.replace(1.0, 0.0) - expected = pd.Series([value] * 3) - tm.assert_series_equal(expected, result) - @pytest.mark.parametrize("pattern", ["^.$", "."]) def test_str_replace_regex_default_raises_warning(self, pattern): # https://github.com/pandas-dev/pandas/pull/24809 From 90ab53e552a4845ef1cfe995133a09e19deb44ef Mon Sep 17 00:00:00 2001 From: Sam Cohen Date: Tue, 10 Nov 2020 13:46:32 +0000 Subject: [PATCH 11/17] Add missing file --- pandas/tests/generic/methods/test_replace.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/generic/methods/test_replace.py b/pandas/tests/generic/methods/test_replace.py index f1effaeced5e0..47648b8cc83d3 100644 --- a/pandas/tests/generic/methods/test_replace.py +++ b/pandas/tests/generic/methods/test_replace.py @@ -1,9 +1,12 @@ +import re + import pytest import pandas as pd import pandas._testing as tm -class SharedReplaceTests: + +class TestReplace: @pytest.mark.parametrize("value", [pd.Period("2020-01"), pd.Interval(0, 5)]) def test_replace_ea_ignore_float(self, frame_or_series, value): # GH#34871 From f7bd1e411f382260553b12e7975990f0e98c78c3 Mon Sep 17 00:00:00 2001 From: Sam Cohen Date: Tue, 10 Nov 2020 13:48:59 +0000 Subject: [PATCH 12/17] Remove unused import --- pandas/tests/frame/methods/test_replace.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 063a8f8c3b341..e50e32f4c1797 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1,6 +1,5 @@ from datetime import datetime from io import StringIO -from pandas.conftest import frame_or_series import re from typing import Dict, List, Union From bd1fea80ac07306d393cfd6611ec072856ff8395 Mon Sep 17 00:00:00 2001 From: Sam Cohen Date: Tue, 10 Nov 2020 13:49:23 +0000 Subject: [PATCH 13/17] Newline at end of file --- pandas/tests/generic/methods/test_replace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/generic/methods/test_replace.py b/pandas/tests/generic/methods/test_replace.py index 47648b8cc83d3..da52713ff5276 100644 --- a/pandas/tests/generic/methods/test_replace.py +++ b/pandas/tests/generic/methods/test_replace.py @@ -21,4 +21,4 @@ def test_replace_with_compiled_regex(self): regex = re.compile("^a$") result = s.replace({regex: "z"}, regex=True) expected = pd.Series(["z", "b", "c"]) - tm.assert_series_equal(result, expected) \ No newline at end of file + tm.assert_series_equal(result, expected) From 0fa2a25d6b21aa25fcf3b83966cc0485bc74468c Mon Sep 17 00:00:00 2001 From: Sam Cohen Date: Thu, 12 Nov 2020 01:17:19 +0000 Subject: [PATCH 14/17] Remove generic unit test --- pandas/tests/frame/methods/test_replace.py | 16 +++++++++++++ pandas/tests/generic/methods/test_replace.py | 24 -------------------- 2 files changed, 16 insertions(+), 24 deletions(-) delete mode 100644 pandas/tests/generic/methods/test_replace.py diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index e50e32f4c1797..af8383be8c8a1 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1517,6 +1517,14 @@ def test_replace_with_duplicate_columns(self, replacement): tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("value", [pd.Period("2020-01"), pd.Interval(0, 5)]) + def test_replace_ea_ignore_float(self, frame_or_series, value): + # GH#34871 + df = frame_or_series([value] * 3) + result = df.replace(1.0, 0.0) + expected = frame_or_series([value] * 3) + tm.assert_equal(expected, result) + def test_replace_value_category_type(self): """ Test for #23305: to ensure category dtypes are maintained @@ -1597,6 +1605,14 @@ def test_replace_dict_category_type(self, input_category_df, expected_category_d tm.assert_frame_equal(result, expected) + def test_replace_with_compiled_regex(self): + # https://github.com/pandas-dev/pandas/issues/35680 + df = DataFrame(["a", "b", "c"]) + regex = re.compile("^a$") + result = df.replace({regex: "z"}, regex=True) + expected = DataFrame(["z", "b", "c"]) + tm.assert_frame_equal(result, expected) + def test_replace_intervals(self): # https://github.com/pandas-dev/pandas/issues/35931 df = DataFrame({"a": [pd.Interval(0, 1), pd.Interval(0, 1)]}) diff --git a/pandas/tests/generic/methods/test_replace.py b/pandas/tests/generic/methods/test_replace.py deleted file mode 100644 index da52713ff5276..0000000000000 --- a/pandas/tests/generic/methods/test_replace.py +++ /dev/null @@ -1,24 +0,0 @@ -import re - -import pytest - -import pandas as pd -import pandas._testing as tm - - -class TestReplace: - @pytest.mark.parametrize("value", [pd.Period("2020-01"), pd.Interval(0, 5)]) - def test_replace_ea_ignore_float(self, frame_or_series, value): - # GH#34871 - df = frame_or_series([value] * 3) - result = df.replace(1.0, 0.0) - expected = frame_or_series([value] * 3) - tm.assert_equal(expected, result) - - def test_replace_with_compiled_regex(self): - # https://github.com/pandas-dev/pandas/issues/35680 - s = pd.Series(["a", "b", "c"]) - regex = re.compile("^a$") - result = s.replace({regex: "z"}, regex=True) - expected = pd.Series(["z", "b", "c"]) - tm.assert_series_equal(result, expected) From bcf61e6e62aae95ea790b31f2124e6dc63a50686 Mon Sep 17 00:00:00 2001 From: Sam Cohen Date: Thu, 12 Nov 2020 01:18:55 +0000 Subject: [PATCH 15/17] Add back series regex test --- pandas/tests/series/methods/test_replace.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index fafcd891b1a30..79d6fc22aba97 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -442,6 +442,14 @@ def test_replace_extension_other(self): ser = pd.Series(pd.array([1, 2, 3], dtype="Int64")) ser.replace("", "") # no exception + def test_replace_with_compiled_regex(self): + # https://github.com/pandas-dev/pandas/issues/35680 + s = pd.Series(["a", "b", "c"]) + regex = re.compile("^a$") + result = s.replace({regex: "z"}, regex=True) + expected = pd.Series(["z", "b", "c"]) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("pattern", ["^.$", "."]) def test_str_replace_regex_default_raises_warning(self, pattern): # https://github.com/pandas-dev/pandas/pull/24809 From 42e8c7453db87c5fff6ccd4813becda511b0f3ff Mon Sep 17 00:00:00 2001 From: Sam Cohen Date: Thu, 12 Nov 2020 01:26:37 +0000 Subject: [PATCH 16/17] Nitpick df to obj --- pandas/tests/frame/methods/test_replace.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index af8383be8c8a1..f038fcdb80204 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1520,8 +1520,8 @@ def test_replace_with_duplicate_columns(self, replacement): @pytest.mark.parametrize("value", [pd.Period("2020-01"), pd.Interval(0, 5)]) def test_replace_ea_ignore_float(self, frame_or_series, value): # GH#34871 - df = frame_or_series([value] * 3) - result = df.replace(1.0, 0.0) + obj = frame_or_series([value] * 3) + result = obj.replace(1.0, 0.0) expected = frame_or_series([value] * 3) tm.assert_equal(expected, result) From 5cc38c8b3bb880a48795875c2560234524ad870c Mon Sep 17 00:00:00 2001 From: Sam Cohen Date: Sun, 15 Nov 2020 22:22:41 -0500 Subject: [PATCH 17/17] Fix conflict --- pandas/tests/frame/methods/test_replace.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index c5ed5f1da1e53..8e59dd959ab57 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1531,6 +1531,7 @@ def test_replace_ea_ignore_float(self, frame_or_series, value): obj = obj["Per"] expected = obj.copy() + result = obj.replace(1.0, 0.0) tm.assert_equal(expected, result) def test_replace_value_category_type(self):