From 0311f19500f88499dac0dc27ce05a0b26945991f Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Mon, 24 Jan 2022 20:58:24 -0500 Subject: [PATCH 1/2] diff to upcast int8/int16 to float64 to be consistent with other metods --- pandas/core/algorithms.py | 6 +++--- pandas/tests/frame/methods/test_diff.py | 4 +--- pandas/tests/groupby/test_groupby_shift_diff.py | 2 +- pandas/tests/test_algos.py | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 157404305c5d9..c29fc329824fa 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1608,9 +1608,9 @@ def diff(arr, n: int, axis: int = 0): # int8, int16 are incompatible with float64, # see https://github.com/cython/cython/issues/2646 if arr.dtype.name in ["int8", "int16"]: - dtype = np.float32 - else: - dtype = np.float64 + arr = arr.astype("int32") + + dtype = np.float64 orig_ndim = arr.ndim if orig_ndim == 1: diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index f61529659e9d5..1a8571067c9bf 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -296,8 +296,6 @@ def test_diff_all_int_dtype(self, any_int_numpy_dtype): df = DataFrame(range(5)) df = df.astype(any_int_numpy_dtype) result = df.diff() - expected_dtype = ( - "float32" if any_int_numpy_dtype in ("int8", "int16") else "float64" - ) + expected_dtype = "float64" expected = DataFrame([np.nan, 1.0, 1.0, 1.0, 1.0], dtype=expected_dtype) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby_shift_diff.py b/pandas/tests/groupby/test_groupby_shift_diff.py index c989c0e0c94cd..3c049aee78a12 100644 --- a/pandas/tests/groupby/test_groupby_shift_diff.py +++ b/pandas/tests/groupby/test_groupby_shift_diff.py @@ -76,7 +76,7 @@ def test_group_diff_real(any_real_numpy_dtype): ) result = df.groupby("a")["b"].diff() exp_dtype = "float" - if any_real_numpy_dtype in ["int8", "int16", "float32"]: + if any_real_numpy_dtype == "float32": exp_dtype = "float32" expected = Series([np.nan, np.nan, np.nan, 1.0, 3.0], dtype=exp_dtype, name="b") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 94a20901b2f7a..81e26c1beef9f 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -2441,7 +2441,7 @@ def test_diff_ea_axis(self): def test_diff_low_precision_int(self, dtype): arr = np.array([0, 1, 1, 0, 0], dtype=dtype) result = algos.diff(arr, 1) - expected = np.array([np.nan, 1, 0, -1, 0], dtype="float32") + expected = np.array([np.nan, 1, 0, -1, 0], dtype="float64") tm.assert_numpy_array_equal(result, expected) From 6e0ccceb35212d9d1eb7308a090ca502f21babf3 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Thu, 27 Jan 2022 21:34:20 -0500 Subject: [PATCH 2/2] whatsnew --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index af8580b67165b..9780b0ebde43a 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -244,6 +244,7 @@ Conversion - Bug in :meth:`Float64Index.astype` to unsigned integer dtype incorrectly casting to ``np.int64`` dtype (:issue:`45309`) - Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` from floating dtype to unsigned integer dtype failing to raise in the presence of negative values (:issue:`45151`) - Bug in :func:`array` with ``FloatingDtype`` and values containing float-castable strings incorrectly raising (:issue:`45424`) +- Bug in :meth:`DataFrame.diff` upcasting ``np.int8`` and ``np.int16`` to ``np.float32`` which is inconsistent with other methods like :meth:`DataFrame.shift` (:issue:`45562`) - Strings