diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 64df847aa74f0..8890eb01a3727 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -20,6 +20,9 @@ Copy-on-Write improvements ^^^^^^^^^^^^^^^^^^^^^^^^^^ - Setting a :class:`Series` into a :class:`DataFrame` now creates a lazy instead of a deep copy (:issue:`53142`) +- The :class:`DataFrame` constructor, when constructing a DataFrame from a dictionary + of Index objects and specifying ``copy=False``, will now use a lazy copy + of those Index objects for the columns of the DataFrame (:issue:`52947`) .. _whatsnew_210.enhancements.enhancement2: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2c6c6dd0e6ed1..30bc6a42a2db5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -22,7 +22,10 @@ import numpy as np -from pandas._config import get_option +from pandas._config import ( + get_option, + using_copy_on_write, +) from pandas._libs import ( NaT, @@ -1635,7 +1638,7 @@ def to_frame( if name is lib.no_default: name = self._get_level_names() - result = DataFrame({name: self._values.copy()}) + result = DataFrame({name: self}, copy=not using_copy_on_write()) if index: result.index = self diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index f080683d76df7..dc9c47a4a5e34 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -460,13 +460,19 @@ def dict_to_mgr( keys = list(data.keys()) columns = Index(keys) if keys else default_index(0) arrays = [com.maybe_iterable_to_list(data[k]) for k in keys] - arrays = [arr if not isinstance(arr, Index) else arr._data for arr in arrays] if copy: if typ == "block": # We only need to copy arrays that will not get consolidated, i.e. # only EA arrays - arrays = [x.copy() if isinstance(x, ExtensionArray) else x for x in arrays] + arrays = [ + x.copy() + if isinstance(x, ExtensionArray) + else x.copy(deep=True) + if isinstance(x, Index) + else x + for x in arrays + ] else: # dtype check to exclude e.g. range objects, scalars arrays = [x.copy() if hasattr(x, "dtype") else x for x in arrays] @@ -573,10 +579,10 @@ def _homogenize( refs: list[Any] = [] for val in data: - if isinstance(val, ABCSeries): + if isinstance(val, (ABCSeries, Index)): if dtype is not None: val = val.astype(dtype, copy=False) - if val.index is not index: + if isinstance(val, ABCSeries) and val.index is not index: # Forces alignment. No need to copy data since we # are putting it into an ndarray later val = val.reindex(index, copy=False) diff --git a/pandas/tests/copy_view/index/test_index.py b/pandas/tests/copy_view/index/test_index.py index 5e9c04c0adfc3..826505cbaf03f 100644 --- a/pandas/tests/copy_view/index/test_index.py +++ b/pandas/tests/copy_view/index/test_index.py @@ -153,3 +153,17 @@ def test_infer_objects(using_copy_on_write): view_.iloc[0, 0] = "aaaa" if using_copy_on_write: tm.assert_index_equal(idx, expected, check_names=False) + + +def test_index_to_frame(using_copy_on_write): + idx = Index([1, 2, 3], name="a") + expected = idx.copy(deep=True) + df = idx.to_frame() + if using_copy_on_write: + assert np.shares_memory(get_array(df, "a"), idx._values) + assert not df._mgr._has_no_reference(0) + else: + assert not np.shares_memory(get_array(df, "a"), idx._values) + + df.iloc[0, 0] = 100 + tm.assert_index_equal(idx, expected) diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index ad7812778afd8..af7e759902f9f 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -340,3 +340,15 @@ def test_dataframe_from_records_with_dataframe(using_copy_on_write): tm.assert_frame_equal(df, df_orig) else: tm.assert_frame_equal(df, df2) + + +def test_frame_from_dict_of_index(using_copy_on_write): + idx = Index([1, 2, 3]) + expected = idx.copy(deep=True) + df = DataFrame({"a": idx}, copy=False) + assert np.shares_memory(get_array(df, "a"), idx._values) + if using_copy_on_write: + assert not df._mgr._has_no_reference(0) + + df.iloc[0, 0] = 100 + tm.assert_index_equal(idx, expected) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 83b32bb1230c2..b73bd7c78f009 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -31,7 +31,7 @@ class TestCommon: @pytest.mark.parametrize("name", [None, "new_name"]) - def test_to_frame(self, name, index_flat): + def test_to_frame(self, name, index_flat, using_copy_on_write): # see GH#15230, GH#22580 idx = index_flat @@ -45,7 +45,8 @@ def test_to_frame(self, name, index_flat): assert df.index is idx assert len(df.columns) == 1 assert df.columns[0] == idx_name - assert df[idx_name].values is not idx.values + if not using_copy_on_write: + assert df[idx_name].values is not idx.values df = idx.to_frame(index=False, name=idx_name) assert df.index is not idx