From 276c0298fe6f9baf4469f4bf3e8e3912a1e8bdb5 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 17 Feb 2023 20:37:37 +0100 Subject: [PATCH 1/8] CoW: Ignore copy=True when copy_on_write is enabled --- pandas/core/frame.py | 4 +- pandas/core/generic.py | 23 +++++--- pandas/core/internals/managers.py | 4 ++ pandas/core/series.py | 6 +-- pandas/tests/copy_view/test_methods.py | 74 +++++++++++++++++++++++--- 5 files changed, 92 insertions(+), 19 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cd9e975b3d578..3d6f2855bc17f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11011,7 +11011,7 @@ def to_timestamp( DataFrame The DataFrame has a DatetimeIndex. """ - new_obj = self.copy(deep=copy) + new_obj = self.copy(deep=copy and not using_copy_on_write()) axis_name = self._get_axis_name(axis) old_ax = getattr(self, axis_name) @@ -11068,7 +11068,7 @@ def to_period( >>> idx.to_period("Y") PeriodIndex(['2001', '2002', '2003'], dtype='period[A-DEC]') """ - new_obj = self.copy(deep=copy) + new_obj = self.copy(deep=copy and not using_copy_on_write()) axis_name = self._get_axis_name(axis) old_ax = getattr(self, axis_name) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0369de8db1339..531214571b7d6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -442,7 +442,7 @@ def set_flags( >>> df2.flags.allows_duplicate_labels False """ - df = self.copy(deep=copy) + df = self.copy(deep=copy and not using_copy_on_write()) if allows_duplicate_labels is not None: df.flags["allows_duplicate_labels"] = allows_duplicate_labels return df @@ -711,6 +711,8 @@ def _set_axis_nocheck( if inplace: setattr(self, self._get_axis_name(axis), labels) else: + if copy and using_copy_on_write(): + copy = False # With copy=False, we create a new object but don't copy the # underlying data. obj = self.copy(deep=copy) @@ -742,7 +744,7 @@ def swapaxes( j = self._get_axis_number(axis2) if i == j: - return self.copy(deep=copy) + return self.copy(deep=copy and not using_copy_on_write()) mapping = {i: j, j: i} @@ -999,6 +1001,8 @@ def _rename( index = mapper self._check_inplace_and_allows_duplicate_labels(inplace) + if copy and using_copy_on_write(): + copy = False result = self if inplace else self.copy(deep=copy) for axis_no, replacements in enumerate((index, columns)): @@ -1215,6 +1219,9 @@ class name inplace = validate_bool_kwarg(inplace, "inplace") + if copy and using_copy_on_write(): + copy = False + if mapper is not lib.no_default: # Use v0.23 behavior if a scalar or list non_mapper = is_scalar(mapper) or ( @@ -5333,6 +5340,8 @@ def reindex( # if all axes that are requested to reindex are equal, then only copy # if indicated must have index names equal here as well as values + if copy and using_copy_on_write(): + copy = False if all( self._get_axis(axis_name).identical(ax) for axis_name, ax in axes.items() @@ -6250,6 +6259,9 @@ def astype( 2 2020-01-03 dtype: datetime64[ns] """ + if copy and using_copy_on_write(): + copy = False + if is_dict_like(dtype): if self.ndim == 1: # i.e. Series if len(dtype) > 1 or self.name not in dtype: @@ -10270,8 +10282,7 @@ def truncate( if isinstance(ax, MultiIndex): setattr(result, self._get_axis_name(axis), ax.truncate(before, after)) - if copy or (copy is None and not using_copy_on_write()): - result = result.copy(deep=copy) + result = result.copy(deep=copy and not using_copy_on_write()) return result @@ -10352,7 +10363,7 @@ def _tz_convert(ax, tz): raise ValueError(f"The level {level} is not valid") ax = _tz_convert(ax, tz) - result = self.copy(deep=copy) + result = self.copy(deep=copy and not using_copy_on_write()) result = result.set_axis(ax, axis=axis, copy=False) return result.__finalize__(self, method="tz_convert") @@ -10534,7 +10545,7 @@ def _tz_localize(ax, tz, ambiguous, nonexistent): raise ValueError(f"The level {level} is not valid") ax = _tz_localize(ax, tz, ambiguous, nonexistent) - result = self.copy(deep=copy) + result = self.copy(deep=copy and not using_copy_on_write()) result = result.set_axis(ax, axis=axis, copy=False) return result.__finalize__(self, method="tz_localize") diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 664a122015ba5..d902cce7244f2 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -423,6 +423,8 @@ def astype(self: T, dtype, copy: bool | None = False, errors: str = "raise") -> copy = False else: copy = True + elif using_copy_on_write(): + copy = False return self.apply( "astype", @@ -438,6 +440,8 @@ def convert(self: T, copy: bool | None) -> T: copy = False else: copy = True + elif using_copy_on_write(): + copy = False return self.apply("convert", copy=copy, using_cow=using_copy_on_write()) diff --git a/pandas/core/series.py b/pandas/core/series.py index d69c057c85783..0e369982fea11 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4129,7 +4129,7 @@ def swaplevel( {examples} """ assert isinstance(self.index, MultiIndex) - result = self.copy(deep=copy) + result = self.copy(deep=copy and not using_copy_on_write()) result.index = self.index.swaplevel(i, j) return result @@ -5714,7 +5714,7 @@ def to_timestamp( if not isinstance(self.index, PeriodIndex): raise TypeError(f"unsupported Type {type(self.index).__name__}") - new_obj = self.copy(deep=copy) + new_obj = self.copy(deep=copy and not using_copy_on_write()) new_index = self.index.to_timestamp(freq=freq, how=how) setattr(new_obj, "index", new_index) return new_obj @@ -5754,7 +5754,7 @@ def to_period(self, freq: str | None = None, copy: bool | None = None) -> Series if not isinstance(self.index, DatetimeIndex): raise TypeError(f"unsupported Type {type(self.index).__name__}") - new_obj = self.copy(deep=copy) + new_obj = self.copy(deep=copy and not using_copy_on_write()) new_index = self.index.to_period(freq=freq) setattr(new_obj, "index", new_index) return new_obj diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 16033bfa750b3..5fdd198c832e7 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -115,15 +115,12 @@ def test_methods_copy_keyword( df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=index) df2 = method(df, copy=copy) - share_memory = (using_copy_on_write and copy is not True) or copy is False + share_memory = using_copy_on_write or copy is False if request.node.callspec.id.startswith("reindex-"): # TODO copy=False without CoW still returns a copy in this case if not using_copy_on_write and not using_array_manager and copy is False: share_memory = False - # TODO copy=True with CoW still returns a view - if using_copy_on_write: - share_memory = True if share_memory: assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) @@ -131,6 +128,68 @@ def test_methods_copy_keyword( assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) +@pytest.mark.parametrize("copy", [True, None, False]) +@pytest.mark.parametrize( + "method", + [ + lambda ser, copy: ser.rename(index={0: 100}, copy=copy), + lambda ser, copy: ser.reindex(index=ser.index, copy=copy), + lambda ser, copy: ser.reindex_like(ser, copy=copy), + lambda ser, copy: ser.set_axis(["a", "b", "c"], axis="index", copy=copy), + lambda ser, copy: ser.rename_axis(index="test", copy=copy), + lambda ser, copy: ser.astype("int64", copy=copy), + lambda ser, copy: ser.swaplevel(0, 1, copy=copy), + lambda ser, copy: ser.swapaxes(0, 0, copy=copy), + lambda ser, copy: ser.truncate(0, 5, copy=copy), + lambda ser, copy: ser.infer_objects(copy=copy), + lambda ser, copy: ser.to_timestamp(copy=copy), + lambda ser, copy: ser.to_period(freq="D", copy=copy), + lambda ser, copy: ser.tz_localize("US/Central", copy=copy), + lambda ser, copy: ser.tz_convert("US/Central", copy=copy), + lambda ser, copy: ser.set_flags(allows_duplicate_labels=False, copy=copy), + ], + ids=[ + "rename", + "reindex", + "reindex_like", + "set_axis", + "rename_axis0", + "astype", + "swaplevel", + "swapaxes", + "truncate", + "infer_objects", + "to_timestamp", + "to_period", + "tz_localize", + "tz_convert", + "set_flags", + ], +) +def test_methods_series_copy_keyword(request, method, copy, using_copy_on_write): + index = None + if "to_timestamp" in request.node.callspec.id: + index = period_range("2012-01-01", freq="D", periods=3) + elif "to_period" in request.node.callspec.id: + index = date_range("2012-01-01", freq="D", periods=3) + elif "tz_localize" in request.node.callspec.id: + index = date_range("2012-01-01", freq="D", periods=3) + elif "tz_convert" in request.node.callspec.id: + index = date_range("2012-01-01", freq="D", periods=3, tz="Europe/Brussels") + elif "swaplevel" in request.node.callspec.id: + index = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]]) + + ser = Series([1, 2, 3], index=index) + ser2 = method(ser, copy=copy) + + share_memory = using_copy_on_write or copy is False + + if share_memory: + assert np.shares_memory(get_array(ser2), get_array(ser)) + else: + assert not np.shares_memory(get_array(ser2), get_array(ser)) + + # ----------------------------------------------------------------------------- # DataFrame methods returning new DataFrame using shallow copy @@ -1111,14 +1170,13 @@ def test_set_flags(using_copy_on_write): tm.assert_series_equal(ser, expected) -@pytest.mark.parametrize("copy_kwargs", [{"copy": True}, {}]) @pytest.mark.parametrize("kwargs", [{"mapper": "test"}, {"index": "test"}]) -def test_rename_axis(using_copy_on_write, kwargs, copy_kwargs): +def test_rename_axis(using_copy_on_write, kwargs): df = DataFrame({"a": [1, 2, 3, 4]}, index=Index([1, 2, 3, 4], name="a")) df_orig = df.copy() - df2 = df.rename_axis(**kwargs, **copy_kwargs) + df2 = df.rename_axis(**kwargs) - if using_copy_on_write and not copy_kwargs: + if using_copy_on_write: assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) else: assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) From b873d62e7ed420d6cf85758e5f64ed806fd71d3e Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 17 Feb 2023 20:40:28 +0100 Subject: [PATCH 2/8] Update --- pandas/core/generic.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 531214571b7d6..61e2aeb7aaaac 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -711,11 +711,9 @@ def _set_axis_nocheck( if inplace: setattr(self, self._get_axis_name(axis), labels) else: - if copy and using_copy_on_write(): - copy = False # With copy=False, we create a new object but don't copy the # underlying data. - obj = self.copy(deep=copy) + obj = self.copy(deep=copy and not using_copy_on_write()) setattr(obj, obj._get_axis_name(axis), labels) return obj @@ -1001,9 +999,7 @@ def _rename( index = mapper self._check_inplace_and_allows_duplicate_labels(inplace) - if copy and using_copy_on_write(): - copy = False - result = self if inplace else self.copy(deep=copy) + result = self if inplace else self.copy(deep=copy and not using_copy_on_write()) for axis_no, replacements in enumerate((index, columns)): if replacements is None: From 7d036e06c41784ffcd7327a492ac5c89ee764f2f Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 17 Feb 2023 21:33:57 +0100 Subject: [PATCH 3/8] Add concat and merge --- pandas/core/reshape/concat.py | 2 ++ pandas/tests/copy_view/test_functions.py | 30 ++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index a3068e5c9e4b8..78fd5083f13f8 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -370,6 +370,8 @@ def concat( copy = False else: copy = True + elif copy and using_copy_on_write(): + copy = False op = _Concatenator( objs, diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py index b6f2f0543cb2b..53d72baf7da4e 100644 --- a/pandas/tests/copy_view/test_functions.py +++ b/pandas/tests/copy_view/test_functions.py @@ -181,6 +181,21 @@ def test_concat_mixed_series_frame(using_copy_on_write): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize("copy", [True, None, False]) +def test_concat_copy_keyword(using_copy_on_write, copy): + df = DataFrame({"a": [1, 2]}) + df2 = DataFrame({"b": [1.5, 2.5]}) + + result = concat([df, df2], axis=1, copy=copy) + + if using_copy_on_write or copy is False: + assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) + assert np.shares_memory(get_array(df2, "b"), get_array(result, "b")) + else: + assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) + assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b")) + + @pytest.mark.parametrize( "func", [ @@ -280,3 +295,18 @@ def test_merge_on_key_enlarging_one(using_copy_on_write, func, how): assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a")) tm.assert_frame_equal(df1, df1_orig) tm.assert_frame_equal(df2, df2_orig) + + +@pytest.mark.parametrize("copy", [True, None, False]) +def test_merge_copy_keyword(using_copy_on_write, copy): + df = DataFrame({"a": [1, 2]}) + df2 = DataFrame({"b": [3, 4.5]}) + + result = df.merge(df2, copy=copy, left_index=True, right_index=True) + + if using_copy_on_write or copy is False: + assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) + assert np.shares_memory(get_array(df2, "b"), get_array(result, "b")) + else: + assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) + assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b")) From 40e5c42a5216f85475625f45d8e7ddd094b97c1e Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 17 Feb 2023 21:43:12 +0100 Subject: [PATCH 4/8] Add align --- pandas/core/generic.py | 10 ++++++++-- pandas/tests/copy_view/test_methods.py | 4 ++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 61e2aeb7aaaac..0756bcc6063dc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5432,10 +5432,14 @@ def _reindex_with_indexers( # If we've made a copy once, no need to make another one copy = False - if (copy or copy is None) and new_data is self._mgr: + if ( + (copy or copy is None) + and new_data is self._mgr + and not using_copy_on_write() + ): new_data = new_data.copy(deep=copy) elif using_copy_on_write() and new_data is self._mgr: - new_data = new_data.copy(deep=copy) + new_data = new_data.copy(deep=False) return self._constructor(new_data).__finalize__(self) @@ -9516,6 +9520,8 @@ def _align_series( fill_axis: Axis = 0, ): is_series = isinstance(self, ABCSeries) + if copy and using_copy_on_write(): + copy = False if (not is_series and axis is None) or axis not in [None, 0, 1]: raise ValueError("Must specify axis=0 or 1") diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 5fdd198c832e7..5de2ef54d6842 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -66,6 +66,7 @@ def test_copy_shallow(using_copy_on_write): lambda df, copy: df.rename(columns=str.lower, copy=copy), lambda df, copy: df.reindex(columns=["a", "c"], copy=copy), lambda df, copy: df.reindex_like(df, copy=copy), + lambda df, copy: df.align(df, copy=copy)[0], lambda df, copy: df.set_axis(["a", "b", "c"], axis="index", copy=copy), lambda df, copy: df.rename_axis(index="test", copy=copy), lambda df, copy: df.rename_axis(columns="test", copy=copy), @@ -84,6 +85,7 @@ def test_copy_shallow(using_copy_on_write): "rename", "reindex", "reindex_like", + "align", "set_axis", "rename_axis0", "rename_axis1", @@ -135,6 +137,7 @@ def test_methods_copy_keyword( lambda ser, copy: ser.rename(index={0: 100}, copy=copy), lambda ser, copy: ser.reindex(index=ser.index, copy=copy), lambda ser, copy: ser.reindex_like(ser, copy=copy), + lambda ser, copy: ser.align(ser, copy=copy)[0], lambda ser, copy: ser.set_axis(["a", "b", "c"], axis="index", copy=copy), lambda ser, copy: ser.rename_axis(index="test", copy=copy), lambda ser, copy: ser.astype("int64", copy=copy), @@ -152,6 +155,7 @@ def test_methods_copy_keyword( "rename", "reindex", "reindex_like", + "align", "set_axis", "rename_axis0", "astype", From 6df0f0fdd3087907043b7513a87a36a7f4f91526 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 18 Feb 2023 00:49:08 +0100 Subject: [PATCH 5/8] Fix tests --- pandas/tests/frame/methods/test_set_axis.py | 15 +++++++------- pandas/tests/reshape/concat/test_concat.py | 8 ++++++-- pandas/tests/reshape/concat/test_dataframe.py | 9 +++++---- pandas/tests/reshape/concat/test_index.py | 20 ++++++++++++++----- pandas/tests/series/methods/test_align.py | 10 +++++++--- 5 files changed, 41 insertions(+), 21 deletions(-) diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py index fd140e0098f2a..2fc629b14a50e 100644 --- a/pandas/tests/frame/methods/test_set_axis.py +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -33,13 +33,14 @@ def test_set_axis_copy(self, obj, using_copy_on_write): tm.assert_equal(expected, result) assert result is not obj # check we DID make a copy - if obj.ndim == 1: - assert not tm.shares_memory(result, obj) - else: - assert not any( - tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) - for i in range(obj.shape[1]) - ) + if not using_copy_on_write: + if obj.ndim == 1: + assert not tm.shares_memory(result, obj) + else: + assert not any( + tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) + for i in range(obj.shape[1]) + ) result = obj.set_axis(new_index, axis=0, copy=False) tm.assert_equal(expected, result) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index b08d0a33d08c6..44b02310eb8a7 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -59,8 +59,12 @@ def test_concat_copy(self, using_array_manager, using_copy_on_write): # These are actual copies. result = concat([df, df2, df3], axis=1, copy=True) - for arr in result._mgr.arrays: - assert arr.base is None + if not using_copy_on_write: + for arr in result._mgr.arrays: + assert arr.base is None + else: + for arr in result._mgr.arrays: + assert arr.base is not None # These are the same. result = concat([df, df2, df3], axis=1, copy=False) diff --git a/pandas/tests/reshape/concat/test_dataframe.py b/pandas/tests/reshape/concat/test_dataframe.py index 23a49c33099cb..105ffe84a0703 100644 --- a/pandas/tests/reshape/concat/test_dataframe.py +++ b/pandas/tests/reshape/concat/test_dataframe.py @@ -195,15 +195,16 @@ def test_concat_duplicates_in_index_with_keys(self): @pytest.mark.parametrize("ignore_index", [True, False]) @pytest.mark.parametrize("order", ["C", "F"]) @pytest.mark.parametrize("axis", [0, 1]) - def test_concat_copies(self, axis, order, ignore_index): + def test_concat_copies(self, axis, order, ignore_index, using_copy_on_write): # based on asv ConcatDataFrames df = DataFrame(np.zeros((10000, 200), dtype=np.float32, order=order)) res = concat([df] * 5, axis=axis, ignore_index=ignore_index, copy=True) - for arr in res._iter_column_arrays(): - for arr2 in df._iter_column_arrays(): - assert not np.shares_memory(arr, arr2) + if not using_copy_on_write: + for arr in res._iter_column_arrays(): + for arr2 in df._iter_column_arrays(): + assert not np.shares_memory(arr, arr2) def test_outer_sort_columns(self): # GH#47127 diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index e0ea09138ef3c..ce06e74de91b9 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -100,18 +100,28 @@ def test_concat_rename_index(self): tm.assert_frame_equal(result, exp) assert result.index.names == exp.index.names - def test_concat_copy_index_series(self, axis): + def test_concat_copy_index_series(self, axis, using_copy_on_write): # GH 29879 ser = Series([1, 2]) comb = concat([ser, ser], axis=axis, copy=True) - assert comb.index is not ser.index + if not using_copy_on_write or axis in [0, "index"]: + assert comb.index is not ser.index + else: + assert comb.index is ser.index - def test_concat_copy_index_frame(self, axis): + def test_concat_copy_index_frame(self, axis, using_copy_on_write): # GH 29879 df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) comb = concat([df, df], axis=axis, copy=True) - assert comb.index is not df.index - assert comb.columns is not df.columns + if not using_copy_on_write: + assert comb.index is not df.index + assert comb.columns is not df.columns + elif axis in [0, "index"]: + assert comb.index is not df.index + assert comb.columns is df.columns + elif axis in [1, "columns"]: + assert comb.index is df.index + assert comb.columns is not df.columns def test_default_index(self): # is_series and ignore_index diff --git a/pandas/tests/series/methods/test_align.py b/pandas/tests/series/methods/test_align.py index b2e03684bc902..7f34f4046d33c 100644 --- a/pandas/tests/series/methods/test_align.py +++ b/pandas/tests/series/methods/test_align.py @@ -118,14 +118,18 @@ def test_align_nocopy(datetime_series, using_copy_on_write): assert (b[:2] == 5).all() -def test_align_same_index(datetime_series): +def test_align_same_index(datetime_series, using_copy_on_write): a, b = datetime_series.align(datetime_series, copy=False) assert a.index is datetime_series.index assert b.index is datetime_series.index a, b = datetime_series.align(datetime_series, copy=True) - assert a.index is not datetime_series.index - assert b.index is not datetime_series.index + if not using_copy_on_write: + assert a.index is not datetime_series.index + assert b.index is not datetime_series.index + else: + assert a.index is datetime_series.index + assert b.index is datetime_series.index def test_align_multiindex(): From 5da7368140819ad0acb21c0753079488d26d70c2 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 21 Feb 2023 00:23:12 +0000 Subject: [PATCH 6/8] Fix ci --- pandas/tests/frame/methods/test_reindex.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index ceea53e3dd8bf..52e841a8c569a 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -149,7 +149,10 @@ def test_reindex_copies_ea(self, using_copy_on_write): # pass both columns and index result2 = df.reindex(columns=cols, index=df.index, copy=True) - assert not np.shares_memory(result2[0].array._data, df[0].array._data) + if using_copy_on_write: + assert np.shares_memory(result2[0].array._data, df[0].array._data) + else: + assert not np.shares_memory(result2[0].array._data, df[0].array._data) @td.skip_array_manager_not_yet_implemented def test_reindex_date_fill_value(self): From 9b02111a88cde2d1089fbc9b5d661dec8a54d5a1 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 3 Mar 2023 16:56:51 +0100 Subject: [PATCH 7/8] Add transpose test --- pandas/tests/copy_view/test_methods.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 5d0c37610bd03..13d5853145766 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -194,6 +194,18 @@ def test_methods_series_copy_keyword(request, method, copy, using_copy_on_write) assert not np.shares_memory(get_array(ser2), get_array(ser)) +@pytest.mark.parametrize("copy", [True, None, False]) +def test_transpose_copy_keyword(using_copy_on_write, copy): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + result = df.transpose(copy=copy) + share_memory = using_copy_on_write or copy is False or copy is None + + if share_memory: + assert np.shares_memory(get_array(df, "a"), get_array(result, 0)) + else: + assert not np.shares_memory(get_array(df, "a"), get_array(result, 0)) + + # ----------------------------------------------------------------------------- # DataFrame methods returning new DataFrame using shallow copy From eef5234ba8353560371353cc3104b954401cf7e1 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 4 Mar 2023 00:47:32 +0100 Subject: [PATCH 8/8] Fix array manager --- pandas/tests/copy_view/test_methods.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 13d5853145766..7429a73717470 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -195,10 +195,11 @@ def test_methods_series_copy_keyword(request, method, copy, using_copy_on_write) @pytest.mark.parametrize("copy", [True, None, False]) -def test_transpose_copy_keyword(using_copy_on_write, copy): +def test_transpose_copy_keyword(using_copy_on_write, copy, using_array_manager): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) result = df.transpose(copy=copy) share_memory = using_copy_on_write or copy is False or copy is None + share_memory = share_memory and not using_array_manager if share_memory: assert np.shares_memory(get_array(df, "a"), get_array(result, 0))