Skip to content

Commit df05308

Browse files
committed
fix float casting
1 parent 083eff6 commit df05308

File tree

3 files changed

+18
-33
lines changed

3 files changed

+18
-33
lines changed

pandas/core/arrays/sparse.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424
infer_dtype_from_scalar)
2525
from pandas.core.dtypes.common import (
2626
is_array_like, is_bool_dtype, is_datetime64_any_dtype, is_dtype_equal,
27-
is_integer, is_object_dtype, is_scalar, is_string_dtype, pandas_dtype)
27+
is_float_dtype, is_integer, is_integer_dtype, is_object_dtype, is_scalar,
28+
is_string_dtype, pandas_dtype)
2829
from pandas.core.dtypes.dtypes import register_extension_dtype
2930
from pandas.core.dtypes.generic import (
3031
ABCIndexClass, ABCSeries, ABCSparseArray, ABCSparseSeries)
@@ -1907,15 +1908,24 @@ def make_sparse(arr, kind='block', fill_value=None, dtype=None, copy=False):
19071908
index = _make_index(length, indices, kind)
19081909
sparsified_values = arr[mask]
19091910

1910-
# careful about casting here as we could easily specify a type that
1911-
# cannot hold the resulting values, e.g. integer when we have floats
1912-
# if we don't have an object specified then use this as the cast
19131911
if dtype is not None:
19141912

1915-
ok_to_cast = all(not (is_object_dtype(t) or is_bool_dtype(t))
1916-
for t in (dtype, sparsified_values.dtype))
1917-
if ok_to_cast:
1913+
# careful about casting here as we could easily specify a type that
1914+
# cannot hold the resulting values, e.g. integer when we have floats
1915+
# if this is not safe then convert the dtype; note that if there are
1916+
# nan's in the source array this will raise
1917+
1918+
# TODO: ideally this would be done by 'safe' casting in astype_nansafe
1919+
# but alas too many cases rely upon this working in the current way
1920+
# and casting='safe' doesn't really work in numpy properly
1921+
if is_integer_dtype(dtype) and is_float_dtype(sparsified_values.dtype):
1922+
result = astype_nansafe(
1923+
sparsified_values, dtype=dtype)
1924+
if np.allclose(result, sparsified_values, rtol=0):
1925+
return result, index, fill_value
1926+
19181927
dtype = find_common_type([dtype, sparsified_values.dtype])
1928+
19191929
sparsified_values = astype_nansafe(
19201930
sparsified_values, dtype=dtype)
19211931

pandas/tests/arrays/sparse/test_array.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,7 @@ def test_astype(self):
473473
# float -> float
474474
arr = SparseArray([None, None, 0, 2])
475475
result = arr.astype("Sparse[float32]")
476+
476477
expected = SparseArray([None, None, 0, 2], dtype=np.dtype('float32'))
477478
tm.assert_sp_array_equal(result, expected)
478479

pandas/tests/extension/test_sparse.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -154,32 +154,6 @@ def test_reindex(self, data, na_value):
154154
self._check_unsupported(data)
155155
super().test_reindex(data, na_value)
156156

157-
def test_getitem_mask(self, data):
158-
# Empty mask, raw array
159-
mask = np.zeros(len(data), dtype=bool)
160-
result = data[mask]
161-
assert len(result) == 0
162-
assert isinstance(result, type(data))
163-
164-
# Empty mask, in series
165-
mask = np.zeros(len(data), dtype=bool)
166-
result = pd.Series(data)[mask]
167-
assert len(result) == 0
168-
169-
# we change int -> float because of the masking
170-
assert result.dtype == SparseDtype('float64', data.dtype.fill_value)
171-
172-
# non-empty mask, raw array
173-
mask[0] = True
174-
result = data[mask]
175-
assert len(result) == 1
176-
assert isinstance(result, type(data))
177-
178-
# non-empty mask, in series
179-
result = pd.Series(data)[mask]
180-
assert len(result) == 1
181-
assert result.dtype == data.dtype
182-
183157

184158
# Skipping TestSetitem, since we don't implement it.
185159

0 commit comments

Comments
 (0)