fix float casting

jreback · jreback · commit df053085d361 · 2019-06-09T19:12:28.000-04:00
diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py
@@ -24,7 +24,8 @@
     infer_dtype_from_scalar)
 from pandas.core.dtypes.common import (
     is_array_like, is_bool_dtype, is_datetime64_any_dtype, is_dtype_equal,
-    is_integer, is_object_dtype, is_scalar, is_string_dtype, pandas_dtype)
+    is_float_dtype, is_integer, is_integer_dtype, is_object_dtype, is_scalar,
+    is_string_dtype, pandas_dtype)
 from pandas.core.dtypes.dtypes import register_extension_dtype
 from pandas.core.dtypes.generic import (
     ABCIndexClass, ABCSeries, ABCSparseArray, ABCSparseSeries)
@@ -1907,15 +1908,24 @@ def make_sparse(arr, kind='block', fill_value=None, dtype=None, copy=False):
     index = _make_index(length, indices, kind)
     sparsified_values = arr[mask]
 
-    # careful about casting here as we could easily specify a type that
-    # cannot hold the resulting values, e.g. integer when we have floats
-    # if we don't have an object specified then use this as the cast
     if dtype is not None:
 
-        ok_to_cast = all(not (is_object_dtype(t) or is_bool_dtype(t))
-                         for t in (dtype, sparsified_values.dtype))
-        if ok_to_cast:
+        # careful about casting here as we could easily specify a type that
+        # cannot hold the resulting values, e.g. integer when we have floats
+        # if this is not safe then convert the dtype; note that if there are
+        # nan's in the source array this will raise
+
+        # TODO: ideally this would be done by 'safe' casting in astype_nansafe
+        # but alas too many cases rely upon this working in the current way
+        # and casting='safe' doesn't really work in numpy properly
+        if is_integer_dtype(dtype) and is_float_dtype(sparsified_values.dtype):
+            result = astype_nansafe(
+                sparsified_values, dtype=dtype)
+            if np.allclose(result, sparsified_values, rtol=0):
+                return result, index, fill_value
+
             dtype = find_common_type([dtype, sparsified_values.dtype])
+
         sparsified_values = astype_nansafe(
             sparsified_values, dtype=dtype)
 
diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
@@ -473,6 +473,7 @@ def test_astype(self):
         # float -> float
         arr = SparseArray([None, None, 0, 2])
         result = arr.astype("Sparse[float32]")
+
         expected = SparseArray([None, None, 0, 2], dtype=np.dtype('float32'))
         tm.assert_sp_array_equal(result, expected)
 
diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
@@ -154,32 +154,6 @@ def test_reindex(self, data, na_value):
         self._check_unsupported(data)
         super().test_reindex(data, na_value)
 
-    def test_getitem_mask(self, data):
-        # Empty mask, raw array
-        mask = np.zeros(len(data), dtype=bool)
-        result = data[mask]
-        assert len(result) == 0
-        assert isinstance(result, type(data))
-
-        # Empty mask, in series
-        mask = np.zeros(len(data), dtype=bool)
-        result = pd.Series(data)[mask]
-        assert len(result) == 0
-
-        # we change int -> float because of the masking
-        assert result.dtype == SparseDtype('float64', data.dtype.fill_value)
-
-        # non-empty mask, raw array
-        mask[0] = True
-        result = data[mask]
-        assert len(result) == 1
-        assert isinstance(result, type(data))
-
-        # non-empty mask, in series
-        result = pd.Series(data)[mask]
-        assert len(result) == 1
-        assert result.dtype == data.dtype
-
 
 # Skipping TestSetitem, since we don't implement it.