diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 9b71ab656920d..606f29ef75ba7 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -199,6 +199,7 @@ Other Enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- A newly constructed empty :class:`DataFrame` with integer as the ``dtype`` will now only be cast to ``float64`` if ``index`` is specified (:issue:`22858`) .. _whatsnew_0240.api_breaking.interval_values: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 410e061c895db..a95a45d5f9ae4 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1220,7 +1220,9 @@ def construct_1d_arraylike_from_scalar(value, length, dtype): dtype = dtype.dtype # coerce if we have nan for an integer dtype - if is_integer_dtype(dtype) and isna(value): + # GH 22858: only cast to float if an index + # (passed here as length) is specified + if length and is_integer_dtype(dtype) and isna(value): dtype = np.float64 subarr = np.empty(length, dtype=dtype) subarr.fill(value) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 2f1c9e05a01b0..e2be410d51b88 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -798,25 +798,20 @@ def test_constructor_mrecarray(self): result = DataFrame(mrecs, index=[1, 2]) assert_fr_equal(result, expected) - def test_constructor_corner(self): + def test_constructor_corner_shape(self): df = DataFrame(index=[]) assert df.values.shape == (0, 0) - # empty but with specified dtype - df = DataFrame(index=lrange(10), columns=['a', 'b'], dtype=object) - assert df.values.dtype == np.object_ - - # does not error but ends up float - df = DataFrame(index=lrange(10), columns=['a', 'b'], dtype=int) - assert df.values.dtype == np.dtype('float64') - - # #1783 empty dtype object - df = DataFrame({}, columns=['foo', 'bar']) - assert df.values.dtype == np.object_ - - df = DataFrame({'b': 1}, index=lrange(10), columns=list('abc'), - dtype=int) - assert df.values.dtype == np.dtype('float64') + @pytest.mark.parametrize("data, index, columns, dtype, expected", [ + (None, lrange(10), ['a', 'b'], object, np.object_), + (None, None, ['a', 'b'], 'int64', np.dtype('int64')), + (None, lrange(10), ['a', 'b'], int, np.dtype('float64')), + ({}, None, ['foo', 'bar'], None, np.object_), + ({'b': 1}, lrange(10), list('abc'), int, np.dtype('float64')) + ]) + def test_constructor_dtype(self, data, index, columns, dtype, expected): + df = DataFrame(data, index, columns, dtype) + assert df.values.dtype == expected def test_constructor_scalar_inference(self): data = {'int': 1, 'bool': True,