diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index de9e3ace4f0ca..aa9d1c8152019 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -891,6 +891,8 @@ def value_counts( else: values = _ensure_arraylike(values) keys, counts = value_counts_arraylike(values, dropna) + if keys.dtype == np.float16: + keys = keys.astype(np.float32) # For backwards compatibility, we let Index do its normal type # inference, _except_ for if if infers from object to bool. diff --git a/pandas/core/base.py b/pandas/core/base.py index 22a4790b32506..e5e0ac4e121ae 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1140,6 +1140,8 @@ def factorize( codes, uniques = algorithms.factorize( self._values, sort=sort, use_na_sentinel=use_na_sentinel ) + if uniques.dtype == np.float16: + uniques = uniques.astype(np.float32) if isinstance(self, ABCIndex): # preserve e.g. NumericIndex, preserve MultiIndex diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1938dc6d5c7b4..3dc6aed56fa24 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -881,6 +881,9 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs): # i.e. np.divmod, np.modf, np.frexp return tuple(self.__array_wrap__(x) for x in result) + if result.dtype == np.float16: + result = result.astype(np.float32) + return self.__array_wrap__(result) def __array_wrap__(self, result, context=None): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index df353e98fde4a..7fec60babea00 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -75,8 +75,8 @@ class NumericIndex(Index): Notes ----- An NumericIndex instance can **only** contain numpy int64/32/16/8, uint64/32/16/8 or - float64/32/16 dtype. In particular, ``NumericIndex`` *can not* hold Pandas numeric - dtypes (:class:`Int64Dtype`, :class:`Int32Dtype` etc.). + float64/32 dtype. In particular, ``NumericIndex`` *can not* hold numpy float16 + dtype or Pandas numeric dtypes (:class:`Int64Dtype`, :class:`Int32Dtype` etc.). """ _typ = "numericindex" @@ -133,6 +133,10 @@ def _ensure_array(cls, data, dtype, copy: bool): Ensure we have a valid array to pass to _simple_new. """ cls._validate_dtype(dtype) + if dtype == np.float16: + + # float16 not supported (no indexing engine) + raise NotImplementedError("float16 indexes are not supported") if not isinstance(data, (np.ndarray, Index)): # Coerce to ndarray if not already ndarray or Index @@ -176,6 +180,10 @@ def _ensure_array(cls, data, dtype, copy: bool): raise ValueError("Index data must be 1-dimensional") subarr = np.asarray(subarr) + if subarr.dtype == "float16": + # float16 not supported (no indexing engine) + raise NotImplementedError("float16 indexes are not implemented") + return subarr @classmethod @@ -202,6 +210,9 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: dtype = pandas_dtype(dtype) if not isinstance(dtype, np.dtype): raise TypeError(f"{dtype} not a numpy type") + elif dtype == np.float16: + # float16 not supported (no indexing engine) + raise NotImplementedError("float16 indexes are not supported") if cls._is_backward_compat_public_numeric_index: # dtype for NumericIndex diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 529dd6baa70c0..5c6cbf73d5bfc 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -7,7 +7,6 @@ from datetime import timedelta from decimal import Decimal import operator -from typing import Any import numpy as np import pytest @@ -72,15 +71,10 @@ def compare_op(series, other, op): # TODO: remove this kludge once mypy stops giving false positives here # List comprehension has incompatible type List[PandasObject]; expected List[RangeIndex] # See GH#29725 -ser_or_index: list[Any] = [Series, Index] -lefts: list[Any] = [RangeIndex(10, 40, 10)] -lefts.extend( - [ - cls([10, 20, 30], dtype=dtype) - for dtype in ["i1", "i2", "i4", "i8", "u1", "u2", "u4", "u8", "f2", "f4", "f8"] - for cls in ser_or_index - ] -) +_ldtypes = ["i1", "i2", "i4", "i8", "u1", "u2", "u4", "u8", "f2", "f4", "f8"] +lefts: list[Index | Series] = [RangeIndex(10, 40, 10)] +lefts.extend([Series([10, 20, 30], dtype=dtype) for dtype in _ldtypes]) +lefts.extend([Index([10, 20, 30], dtype=dtype) for dtype in _ldtypes if dtype != "f2"]) # ------------------------------------------------------------------ # Comparisons diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index f244b348c6763..a910b20c476ff 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -62,6 +62,10 @@ def test_iterable(self, index_or_series, method, dtype, rdtype): # gh-13258 # coerce iteration to underlying python / pandas types typ = index_or_series + if dtype == "float16" and issubclass(typ, pd.Index): + with pytest.raises(NotImplementedError, match="float16 indexes are not "): + typ([1], dtype=dtype) + return s = typ([1], dtype=dtype) result = method(s)[0] assert isinstance(result, rdtype) @@ -115,6 +119,10 @@ def test_iterable_map(self, index_or_series, dtype, rdtype): # gh-13236 # coerce iteration to underlying python / pandas types typ = index_or_series + if dtype == "float16" and issubclass(typ, pd.Index): + with pytest.raises(NotImplementedError, match="float16 indexes are not "): + typ([1], dtype=dtype) + return s = typ([1], dtype=dtype) result = s.map(type)[0] if not isinstance(rdtype, tuple): diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index dafbd9fee1b8e..3aa0827b22a78 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -28,7 +28,13 @@ def test_value_counts(index_or_series_obj): counter = collections.Counter(obj) expected = Series(dict(counter.most_common()), dtype=np.int64, name=obj.name) - expected.index = expected.index.astype(obj.dtype) + + if obj.dtype != np.float16: + expected.index = expected.index.astype(obj.dtype) + else: + with pytest.raises(NotImplementedError, match="float16 indexes are not "): + expected.index.astype(obj.dtype) + return if not isinstance(result.dtype, np.dtype): # i.e IntegerDtype @@ -73,7 +79,13 @@ def test_value_counts_null(null_obj, index_or_series_obj): # np.nan would be duplicated, whereas None wouldn't counter = collections.Counter(obj.dropna()) expected = Series(dict(counter.most_common()), dtype=np.int64) - expected.index = expected.index.astype(obj.dtype) + + if obj.dtype != np.float16: + expected.index = expected.index.astype(obj.dtype) + else: + with pytest.raises(NotImplementedError, match="float16 indexes are not "): + expected.index.astype(obj.dtype) + return result = obj.value_counts() if obj.duplicated().any(): diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index c06fce4811f12..e8e6eed31db74 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -471,6 +471,44 @@ def test_coerce_list(self): assert type(arr) is Index +class TestFloat16Index: + # float 16 indexes not supported + # GH 49535 + _index_cls = NumericIndex + + def test_constructor(self): + index_cls = self._index_cls + dtype = np.float16 + + msg = "float16 indexes are not supported" + + # explicit construction + with pytest.raises(NotImplementedError, match=msg): + index_cls([1, 2, 3, 4, 5], dtype=dtype) + + with pytest.raises(NotImplementedError, match=msg): + index_cls(np.array([1, 2, 3, 4, 5]), dtype=dtype) + + with pytest.raises(NotImplementedError, match=msg): + index_cls([1.0, 2, 3, 4, 5], dtype=dtype) + + with pytest.raises(NotImplementedError, match=msg): + index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype) + + with pytest.raises(NotImplementedError, match=msg): + index_cls([1.0, 2, 3, 4, 5], dtype=dtype) + + with pytest.raises(NotImplementedError, match=msg): + index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype) + + # nan handling + with pytest.raises(NotImplementedError, match=msg): + index_cls([np.nan, np.nan], dtype=dtype) + + with pytest.raises(NotImplementedError, match=msg): + index_cls(np.array([np.nan]), dtype=dtype) + + class TestUIntNumericIndex(NumericInt): _index_cls = NumericIndex diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index 453ece35a68e7..40fa4877d2e83 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -77,7 +77,10 @@ def test_numpy_ufuncs_basic(index, func): # coerces to float (e.g. np.sin) with np.errstate(all="ignore"): result = func(index) - exp = Index(func(index.values), name=index.name) + arr_result = func(index.values) + if arr_result.dtype == np.float16: + arr_result = arr_result.astype(np.float32) + exp = Index(arr_result, name=index.name) tm.assert_index_equal(result, exp) if type(index) is not Index or index.dtype == bool: diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 818211cf0fa2a..767203838728b 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -66,7 +66,10 @@ def test_factorize(self, index_or_series_obj, sort): constructor = Index if isinstance(obj, MultiIndex): constructor = MultiIndex.from_tuples - expected_uniques = constructor(obj.unique()) + expected_arr = obj.unique() + if expected_arr.dtype == np.float16: + expected_arr = expected_arr.astype(np.float32) + expected_uniques = constructor(expected_arr) if ( isinstance(obj, Index) and expected_uniques.dtype == bool