Skip to content

DEPR: Remove NumericIndex.__new__ + related methods #51050

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,7 @@ Other API changes
new DataFrame (shallow copy) instead of the original DataFrame, consistent with other
methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`)
- Disallow computing ``cumprod`` for :class:`Timedelta` object; previously this returned incorrect values (:issue:`50246`)
- Instantiating an :class:`Index` with an numeric numpy dtype with data containing :class:`NA` and/or :class:`NaT` now raises a ``ValueError``. Previously a ``TypeError`` was raised (:issue:`51050`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this match what we raise with Series or DataFrame?

Copy link
Contributor Author

@topper-123 topper-123 Jan 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, so this is also an unification wrt. the errors messages the users get, when they do this kind of mistake.

- Loading a JSON file with duplicate columns using ``read_json(orient='split')`` renames columns to avoid duplicates, as :func:`read_csv` and the other readers do (:issue:`50370`)
- The levels of the index of the :class:`Series` returned from ``Series.sparse.from_coo`` now always have dtype ``int32``. Previously they had dtype ``int64`` (:issue:`50926`)
- :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`)
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,10 @@ def _ensure_array(cls, data, dtype, copy: bool):
if data.ndim > 1:
# GH#13601, GH#20285, GH#27125
raise ValueError("Index data must be 1-dimensional")
elif dtype == np.float16:
# float16 not supported (no indexing engine)
raise NotImplementedError("float16 indexes are not supported")

if copy:
# asarray_tuplesafe does not always copy underlying data,
# so need to make sure that this happens
Expand Down
129 changes: 9 additions & 120 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import annotations

from typing import Callable

import numpy as np

from pandas._typing import Dtype
Expand All @@ -10,20 +8,7 @@
doc,
)

from pandas.core.dtypes.common import (
is_dtype_equal,
is_integer_dtype,
is_numeric_dtype,
is_scalar,
pandas_dtype,
)
from pandas.core.dtypes.generic import ABCSeries

from pandas.core.construction import sanitize_array
from pandas.core.indexes.base import (
Index,
maybe_extract_name,
)
from pandas.core.indexes.base import Index


class NumericIndex(Index):
Expand Down Expand Up @@ -64,102 +49,20 @@ class NumericIndex(Index):
"""

_typ = "numericindex"
_values: np.ndarray
_default_dtype: np.dtype | None = None
_dtype_validation_metadata: tuple[Callable[..., bool], str] = (
is_numeric_dtype,
"numeric type",
)
_can_hold_strings = False

def __new__(
cls, data=None, dtype: Dtype | None = None, copy: bool = False, name=None
) -> NumericIndex:
name = maybe_extract_name(name, data, cls)

subarr = cls._ensure_array(data, dtype, copy)
return cls._simple_new(subarr, name=name)

@classmethod
def _ensure_array(cls, data, dtype, copy: bool):
"""
Ensure we have a valid array to pass to _simple_new.
"""
cls._validate_dtype(dtype)
if dtype == np.float16:

# float16 not supported (no indexing engine)
raise NotImplementedError("float16 indexes are not supported")

if not isinstance(data, (np.ndarray, Index)):
# Coerce to ndarray if not already ndarray or Index
if is_scalar(data):
cls._raise_scalar_data_error(data)

# other iterable of some kind
if not isinstance(data, (ABCSeries, list, tuple)):
data = list(data)

if isinstance(data, (list, tuple)):
if len(data):
data = sanitize_array(data, index=None)
else:
data = np.array([], dtype=np.int64)

dtype = cls._ensure_dtype(dtype)

if copy or not is_dtype_equal(data.dtype, dtype):
# TODO: the try/except below is because it's difficult to predict the error
# and/or error message from different combinations of data and dtype.
# Efforts to avoid this try/except welcome.
# See https://github.com/pandas-dev/pandas/pull/41153#discussion_r676206222
try:
subarr = np.array(data, dtype=dtype, copy=copy)
cls._validate_dtype(subarr.dtype)
except (TypeError, ValueError):
raise ValueError(f"data is not compatible with {cls.__name__}")
cls._assert_safe_casting(data, subarr)
else:
subarr = data

if subarr.ndim > 1:
# GH#13601, GH#20285, GH#27125
raise ValueError("Index data must be 1-dimensional")

subarr = np.asarray(subarr)
if subarr.dtype == "float16":
# float16 not supported (no indexing engine)
raise NotImplementedError("float16 indexes are not implemented")

return subarr

@classmethod
def _validate_dtype(cls, dtype: Dtype | None) -> None:
if dtype is None:
return

validation_func, expected = cls._dtype_validation_metadata
if not validation_func(dtype):
raise ValueError(
f"Incorrect `dtype` passed: expected {expected}, received {dtype}"
)

@classmethod
def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None:
"""
Assumes dtype has already been validated.
"""
if dtype is None:
return cls._default_dtype

dtype = pandas_dtype(dtype)
if not isinstance(dtype, np.dtype):
raise TypeError(f"{dtype} not a numpy type")
elif dtype == np.float16:
# float16 not supported (no indexing engine)
raise NotImplementedError("float16 indexes are not supported")

return dtype
# temporary scaffolding, will be removed soon.
if isinstance(data, list) and len(data) == 0:
data = np.array([], dtype=np.int64)
elif isinstance(data, range):
data = np.arange(data.start, data.stop, data.step, dtype=np.int64)
return super().__new__(
cls, data=data, dtype=dtype, copy=copy, name=name
) # type: ignore[return-value]

# ----------------------------------------------------------------
# Indexing Methods
Expand All @@ -168,17 +71,3 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None:
@doc(Index._should_fallback_to_positional)
def _should_fallback_to_positional(self) -> bool:
return False

# ----------------------------------------------------------------

@classmethod
def _assert_safe_casting(cls, data: np.ndarray, subarr: np.ndarray) -> None:
"""
Ensure incoming data can be represented with matching signed-ness.

Needed if the process of casting data from some accepted dtype to the internal
dtype(s) bears the risk of truncation (e.g. float to int).
"""
if is_integer_dtype(subarr.dtype):
if not np.array_equal(data, subarr):
raise TypeError("Unsafe NumPy casting, you must explicitly cast")
12 changes: 12 additions & 0 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ class RangeIndex(NumericIndex):
_typ = "rangeindex"
_dtype_validation_metadata = (is_signed_integer_dtype, "signed integer")
_range: range
_values: np.ndarray

@property
def _engine_type(self) -> type[libindex.Int64Engine]:
Expand Down Expand Up @@ -178,6 +179,17 @@ def _simple_new( # type: ignore[override]
result._reset_identity()
return result

@classmethod
def _validate_dtype(cls, dtype: Dtype | None) -> None:
if dtype is None:
return

validation_func, expected = cls._dtype_validation_metadata
if not validation_func(dtype):
raise ValueError(
f"Incorrect `dtype` passed: expected {expected}, received {dtype}"
)

# --------------------------------------------------------------------

# error: Return type "Type[NumericIndex]" of "_constructor" incompatible with return
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/interval/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ class ConstructorTests:
params=[
([3, 14, 15, 92, 653], np.int64),
(np.arange(10, dtype="int64"), np.int64),
(NumericIndex(range(-10, 11), dtype=np.int64), np.int64),
(NumericIndex(range(10, 31), dtype=np.uint64), np.uint64),
(NumericIndex(np.arange(-10, 11, dtype=np.int64)), np.int64),
(NumericIndex(np.arange(10, 31, dtype=np.uint64)), np.uint64),
(NumericIndex(np.arange(20, 30, 0.5), dtype=np.float64), np.float64),
(date_range("20180101", periods=10), "<M8[ns]"),
(
Expand Down
8 changes: 5 additions & 3 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,6 @@ def test_constructor_dtypes_timedelta(self, attr, klass):
"klass",
[
Index,
NumericIndex,
CategoricalIndex,
DatetimeIndex,
TimedeltaIndex,
Expand Down Expand Up @@ -873,8 +872,11 @@ def test_isin_nan_common_float64(self, nulls_fixture):
if nulls_fixture is pd.NaT or nulls_fixture is pd.NA:
# Check 1) that we cannot construct a float64 Index with this value
# and 2) that with an NaN we do not have .isin(nulls_fixture)
msg = "data is not compatible with NumericIndex"
with pytest.raises(ValueError, match=msg):
msg = (
r"float\(\) argument must be a string or a (real )?number, "
f"not {repr(type(nulls_fixture).__name__)}"
)
with pytest.raises(TypeError, match=msg):
NumericIndex([1.0, nulls_fixture], dtype=np.float64)

idx = NumericIndex([1.0, np.nan], dtype=np.float64)
Expand Down