From 2756370379e7a3f1cb27ea3abe379d881330db89 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 23 Aug 2021 15:42:06 -0700 Subject: [PATCH 1/2] BUG: pickle Index[object] preserve dtype --- pandas/core/indexes/base.py | 4 ++++ pandas/tests/indexes/base_class/test_pickle.py | 11 +++++++++++ 2 files changed, 15 insertions(+) create mode 100644 pandas/tests/indexes/base_class/test_pickle.py diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8c32ccc1fa74c..b6d7b703995f7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -242,6 +242,10 @@ def _new_Index(cls, d): # GH#23752 "labels" kwarg has been replaced with "codes" d["codes"] = d.pop("labels") + elif "dtype" not in d and "data" in d: + # Prevent Index.__new__ from conducting inference; + # "data" key not in RangeIndex + d["dtype"] = d["data"].dtype return cls.__new__(cls, **d) diff --git a/pandas/tests/indexes/base_class/test_pickle.py b/pandas/tests/indexes/base_class/test_pickle.py new file mode 100644 index 0000000000000..e413913b86023 --- /dev/null +++ b/pandas/tests/indexes/base_class/test_pickle.py @@ -0,0 +1,11 @@ +from pandas import Index +import pandas._testing as tm + + +def test_pickle_preserves_object_dtype(): + # GH#43155 don't infer numeric dtype + index = Index([1, 2, 3], dtype=object) + + result = tm.round_trip_pickle(index) + assert result.dtype == object + tm.assert_index_equal(index, result) From b581bfe906714d66441f449d729fae824606563b Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 23 Aug 2021 15:44:11 -0700 Subject: [PATCH 2/2] whatsnew --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/tests/indexes/base_class/test_pickle.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 205a49e7786a7..53ed329518687 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -319,7 +319,7 @@ I/O - Bug in :func:`json_normalize` where ``errors=ignore`` could fail to ignore missing values of ``meta`` when ``record_path`` has a length greater than one (:issue:`41876`) - Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`) - Bug in :func:`Series.to_json` and :func:`DataFrame.to_json` where some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`) -- +- Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`) Period ^^^^^^ diff --git a/pandas/tests/indexes/base_class/test_pickle.py b/pandas/tests/indexes/base_class/test_pickle.py index e413913b86023..c670921decb78 100644 --- a/pandas/tests/indexes/base_class/test_pickle.py +++ b/pandas/tests/indexes/base_class/test_pickle.py @@ -3,7 +3,7 @@ def test_pickle_preserves_object_dtype(): - # GH#43155 don't infer numeric dtype + # GH#43188, GH#43155 don't infer numeric dtype index = Index([1, 2, 3], dtype=object) result = tm.round_trip_pickle(index)