From 8d310d6d2f3f3d4581018d0f6a1c6be0c54576af Mon Sep 17 00:00:00 2001 From: Malte Londschien Date: Thu, 19 Nov 2020 19:08:48 +0100 Subject: [PATCH 01/10] Special handling of string -> float. --- pandas/core/arrays/string_.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 3b297e7c2b13b..c2cf4caabf87b 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -298,6 +298,13 @@ def astype(self, dtype, copy=True): arr[mask] = 0 values = arr.astype(dtype.numpy_dtype) return IntegerArray(values, mask, copy=False) + elif np.issubdtype(dtype, np.floating): + arr = self._ndarray.copy() + mask = self.isna() + arr[mask] = 0 + values = arr.astype(dtype) + values[mask] = np.nan + return values return super().astype(dtype, copy) From 4ec629598151c4b1c443129581fa5b6e9345565c Mon Sep 17 00:00:00 2001 From: Malte Londschien Date: Thu, 19 Nov 2020 20:39:15 +0100 Subject: [PATCH 02/10] Add tests. --- pandas/tests/arrays/string_/test_string.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 089bbcf4e0e3f..6f10902eb4d45 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -226,6 +226,15 @@ def test_astype_int(): tm.assert_extension_array_equal(result, expected) +def test_astype_float(): + arr = pd.array(["1.1", pd.NA, "3.3"], dtype="string") + + result = arr.astype("float") + expected = pd.array([1.1, pd.NA, 3.3], dtype="float") + tm.assert_extension_array_equal(result, expected) + + + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.xfail(reason="Not implemented StringArray.sum") def test_reduce(skipna): From 95e8605827ccb1a2799d414ca4fdc093688646c2 Mon Sep 17 00:00:00 2001 From: Malte Londschien Date: Fri, 20 Nov 2020 08:18:30 +0100 Subject: [PATCH 03/10] Use pd.Series in test. --- pandas/tests/arrays/string_/test_string.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 6f10902eb4d45..a870e1c1328a1 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -227,12 +227,11 @@ def test_astype_int(): def test_astype_float(): - arr = pd.array(["1.1", pd.NA, "3.3"], dtype="string") - - result = arr.astype("float") - expected = pd.array([1.1, pd.NA, 3.3], dtype="float") - tm.assert_extension_array_equal(result, expected) + s = pd.Series(["1.1", pd.NA, "3.3"], dtype="string") + result = s.astype("float") + expected = pd.Series([1.1, np.nan, 3.3], dtype="float64") + tm.assert_series_equal(result, expected) @pytest.mark.parametrize("skipna", [True, False]) From cce3fdebbf05877fbe24b89543402fa7ca155716 Mon Sep 17 00:00:00 2001 From: Malte Londschien Date: Fri, 20 Nov 2020 08:31:03 +0100 Subject: [PATCH 04/10] Add changelog entry. --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index a3b5ba616b258..467dd35103a6c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -574,6 +574,7 @@ Conversion ^^^^^^^^^^ - Bug in :meth:`DataFrame.to_dict` with ``orient='records'`` now returns python native datetime objects for datetimelike columns (:issue:`21256`) +- Bug in :meth:`Series.astype` conversion from ``string`` to ``float`` raised in presence of ``pd.NA`` values. - Strings From 3fe70c90c7db146b75b87af23fb410ffbe04874d Mon Sep 17 00:00:00 2001 From: Malte Londschien Date: Sun, 22 Nov 2020 13:33:11 +0100 Subject: [PATCH 05/10] Some love for FloatingDtype --- pandas/core/arrays/string_.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index c2cf4caabf87b..e0bb788d665eb 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -18,7 +18,8 @@ from pandas.core import ops from pandas.core.array_algos import masked_reductions -from pandas.core.arrays import IntegerArray, PandasArray +from pandas.core.arrays import FloatingArray, IntegerArray, PandasArray +from pandas.core.arrays.floating import FloatingDtype from pandas.core.arrays.integer import _IntegerDtype from pandas.core.construction import extract_array from pandas.core.indexers import check_array_indexer @@ -298,6 +299,12 @@ def astype(self, dtype, copy=True): arr[mask] = 0 values = arr.astype(dtype.numpy_dtype) return IntegerArray(values, mask, copy=False) + elif isinstance(dtype, FloatingDtype): + arr = self.copy() + mask = self.isna() + arr[mask] = "0" + values = arr.astype(dtype.numpy_dtype) + return FloatingArray(values, mask, copy=False) elif np.issubdtype(dtype, np.floating): arr = self._ndarray.copy() mask = self.isna() From 7fe9f17d22a268a3607a669a556d5349585f11f7 Mon Sep 17 00:00:00 2001 From: Malte Londschien Date: Sun, 22 Nov 2020 13:35:33 +0100 Subject: [PATCH 06/10] Parametrize dtypes. --- pandas/tests/arrays/string_/test_string.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index a870e1c1328a1..991f485ae7a7d 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -226,11 +226,13 @@ def test_astype_int(): tm.assert_extension_array_equal(result, expected) -def test_astype_float(): +@pytest.mark.parametrize("dtype", ["float", "float32", "Float32", "Float64"]) +def test_astype_float(dtype): + # Don't compare arrays (37974) s = pd.Series(["1.1", pd.NA, "3.3"], dtype="string") - result = s.astype("float") - expected = pd.Series([1.1, np.nan, 3.3], dtype="float64") + result = s.astype(dtype) + expected = pd.Series([1.1, np.nan, 3.3], dtype=dtype) tm.assert_series_equal(result, expected) From db9e7081151061f480f4a73446e19ea453ca220c Mon Sep 17 00:00:00 2001 From: Malte Londschien Date: Sun, 22 Nov 2020 20:16:50 +0100 Subject: [PATCH 07/10] Use ser and np dtypes. --- pandas/tests/arrays/string_/test_string.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 991f485ae7a7d..81dcd55297bdd 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -226,12 +226,14 @@ def test_astype_int(): tm.assert_extension_array_equal(result, expected) -@pytest.mark.parametrize("dtype", ["float", "float32", "Float32", "Float64"]) +@pytest.mark.parametrize( + "dtype", ["float", "float32", "Float32", "Float64", np.single, np.double] +) def test_astype_float(dtype): # Don't compare arrays (37974) - s = pd.Series(["1.1", pd.NA, "3.3"], dtype="string") + ser = pd.Series(["1.1", pd.NA, "3.3"], dtype="string") - result = s.astype(dtype) + result = ser.astype(dtype) expected = pd.Series([1.1, np.nan, 3.3], dtype=dtype) tm.assert_series_equal(result, expected) From 7d7fd60b2e50e23a80aa45561688ed3722f66c77 Mon Sep 17 00:00:00 2001 From: Malte Londschien Date: Sun, 22 Nov 2020 20:29:11 +0100 Subject: [PATCH 08/10] Try again. From 02b0fa4cfc09206e94925badc61a5fd8d35d70c0 Mon Sep 17 00:00:00 2001 From: Malte Londschien Date: Mon, 23 Nov 2020 17:17:45 +0100 Subject: [PATCH 09/10] Implement suggestion. --- pandas/conftest.py | 15 ++++++++++++++- pandas/tests/arrays/string_/test_string.py | 9 +++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 77e9af67590a6..a2c137a1e1aed 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -286,7 +286,6 @@ def unique_nulls_fixture(request): # Generate cartesian product of unique_nulls_fixture: unique_nulls_fixture2 = unique_nulls_fixture - # ---------------------------------------------------------------- # Classes # ---------------------------------------------------------------- @@ -1069,6 +1068,20 @@ def float_ea_dtype(request): return request.param +@pytest.fixture(params=tm.FLOAT_DTYPES + tm.FLOAT_EA_DTYPES) +def any_float_allowed_nullable_dtype(request): + """ + Parameterized fixture for float dtypes. + + * float + * 'float32' + * 'float64' + * 'Float32' + * 'Float64' + """ + return request.param + + @pytest.fixture(params=tm.COMPLEX_DTYPES) def complex_dtype(request): """ diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 81dcd55297bdd..19aae1313714c 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -226,15 +226,12 @@ def test_astype_int(): tm.assert_extension_array_equal(result, expected) -@pytest.mark.parametrize( - "dtype", ["float", "float32", "Float32", "Float64", np.single, np.double] -) -def test_astype_float(dtype): +def test_astype_float(any_float_allowed_nullable_dtype): # Don't compare arrays (37974) ser = pd.Series(["1.1", pd.NA, "3.3"], dtype="string") - result = ser.astype(dtype) - expected = pd.Series([1.1, np.nan, 3.3], dtype=dtype) + result = ser.astype(any_float_allowed_nullable_dtype) + expected = pd.Series([1.1, np.nan, 3.3], dtype=any_float_allowed_nullable_dtype) tm.assert_series_equal(result, expected) From 3db01b8888c162a226e35634ea1b0fe75c466196 Mon Sep 17 00:00:00 2001 From: Malte Londschien Date: Tue, 24 Nov 2020 08:03:10 +0100 Subject: [PATCH 10/10] Reference issue in changelog. --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 0a5f5e1fa388c..ffd20df85ed1f 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -580,7 +580,7 @@ Conversion ^^^^^^^^^^ - Bug in :meth:`DataFrame.to_dict` with ``orient='records'`` now returns python native datetime objects for datetimelike columns (:issue:`21256`) -- Bug in :meth:`Series.astype` conversion from ``string`` to ``float`` raised in presence of ``pd.NA`` values. +- Bug in :meth:`Series.astype` conversion from ``string`` to ``float`` raised in presence of ``pd.NA`` values (:issue:`37626`) - Strings