-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
REF: Back IntervalArray by array instead of Index #36310
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
8987a0e
153c87a
8164099
d545dac
6050ec8
bd6231c
548efe6
124938e
c479e0a
c4a2229
97a0bed
bfa13bb
b45ed46
e6d4bd9
266512f
f16be73
4efdc08
1ed9623
ed6a932
fee70d8
1a22095
490a8f5
865b3fc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,7 +32,6 @@ | |
from pandas.core.dtypes.dtypes import IntervalDtype | ||
from pandas.core.dtypes.generic import ( | ||
ABCDatetimeIndex, | ||
ABCIndexClass, | ||
ABCIntervalIndex, | ||
ABCPeriodIndex, | ||
ABCSeries, | ||
|
@@ -43,7 +42,7 @@ | |
from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs | ||
from pandas.core.arrays.categorical import Categorical | ||
import pandas.core.common as com | ||
from pandas.core.construction import array | ||
from pandas.core.construction import array, extract_array | ||
from pandas.core.indexers import check_array_indexer | ||
from pandas.core.indexes.base import ensure_index | ||
|
||
|
@@ -161,10 +160,12 @@ def __new__( | |
verify_integrity: bool = True, | ||
): | ||
|
||
if isinstance(data, ABCSeries) and is_interval_dtype(data.dtype): | ||
data = data._values | ||
if isinstance(data, (ABCSeries, ABCIntervalIndex)) and is_interval_dtype( | ||
data.dtype | ||
): | ||
data = data._values # TODO: extract_array? | ||
|
||
if isinstance(data, (cls, ABCIntervalIndex)): | ||
if isinstance(data, cls): | ||
left = data.left | ||
right = data.right | ||
closed = closed or data.closed | ||
|
@@ -243,8 +244,12 @@ def _simple_new( | |
) | ||
raise ValueError(msg) | ||
|
||
result._left = left | ||
result._right = right | ||
from pandas.core.ops.array_ops import maybe_upcast_datetimelike_array | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
left = maybe_upcast_datetimelike_array(left) | ||
right = maybe_upcast_datetimelike_array(right) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
result._left = extract_array(left, extract_numpy=True) | ||
result._right = extract_array(right, extract_numpy=True) | ||
result._closed = closed | ||
if verify_integrity: | ||
result._validate() | ||
|
@@ -511,15 +516,14 @@ def __getitem__(self, value): | |
left = self.left[value] | ||
right = self.right[value] | ||
|
||
# scalar | ||
if not isinstance(left, ABCIndexClass): | ||
if not isinstance(left, (np.ndarray, ExtensionArray)): | ||
# scalar | ||
if is_scalar(left) and isna(left): | ||
return self._fill_value | ||
if np.ndim(left) > 1: | ||
# GH#30588 multi-dimensional indexer disallowed | ||
raise ValueError("multi-dimensional indexing not allowed") | ||
return Interval(left, right, self.closed) | ||
|
||
if np.ndim(left) > 1: | ||
# GH#30588 multi-dimensional indexer disallowed | ||
raise ValueError("multi-dimensional indexing not allowed") | ||
return self._shallow_copy(left, right) | ||
|
||
def __setitem__(self, key, value): | ||
|
@@ -557,15 +561,8 @@ def __setitem__(self, key, value): | |
|
||
key = check_array_indexer(self, key) | ||
|
||
# Need to ensure that left and right are updated atomically, so we're | ||
# forced to copy, update the copy, and swap in the new values. | ||
left = self.left.copy(deep=True) | ||
left._values[key] = value_left | ||
self._left = left | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
right = self.right.copy(deep=True) | ||
right._values[key] = value_right | ||
self._right = right | ||
self._left[key] = value_left | ||
self._right[key] = value_right # TODO: needs tests for not breaking views | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't the un-xfail-ed test doing that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch, will remove comment |
||
|
||
def __eq__(self, other): | ||
# ensure pandas array for list-like and eliminate non-interval scalars | ||
|
@@ -657,8 +654,10 @@ def fillna(self, value=None, method=None, limit=None): | |
|
||
self._check_closed_matches(value, name="value") | ||
|
||
left = self.left.fillna(value=value.left) | ||
right = self.right.fillna(value=value.right) | ||
from pandas import Index | ||
|
||
left = Index(self.left).fillna(value=value.left) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. umm why do you need to coerce to .fillna? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ATM self.left is an ndarray which doesnt have fillna There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ahh |
||
right = Index(self.right).fillna(value=value.right) | ||
return self._shallow_copy(left, right) | ||
|
||
@property | ||
|
@@ -684,6 +683,7 @@ def astype(self, dtype, copy=True): | |
array : ExtensionArray or ndarray | ||
ExtensionArray or NumPy ndarray with 'dtype' for its dtype. | ||
""" | ||
from pandas import Index | ||
from pandas.core.arrays.string_ import StringDtype | ||
|
||
if dtype is not None: | ||
|
@@ -695,8 +695,10 @@ def astype(self, dtype, copy=True): | |
|
||
# need to cast to different subtype | ||
try: | ||
new_left = self.left.astype(dtype.subtype) | ||
new_right = self.right.astype(dtype.subtype) | ||
# We need to use Index rules for astype to prevent casting | ||
# np.nan entries to int subtypes | ||
new_left = Index(self.left).astype(dtype.subtype) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
new_right = Index(self.right).astype(dtype.subtype) | ||
except TypeError as err: | ||
msg = ( | ||
f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible" | ||
|
@@ -758,13 +760,13 @@ def copy(self): | |
------- | ||
IntervalArray | ||
""" | ||
left = self.left.copy(deep=True) | ||
right = self.right.copy(deep=True) | ||
left = self.left.copy() | ||
right = self.right.copy() | ||
closed = self.closed | ||
# TODO: Could skip verify_integrity here. | ||
return type(self).from_arrays(left, right, closed=closed) | ||
|
||
def isna(self): | ||
def isna(self) -> np.ndarray: | ||
return isna(self.left) | ||
|
||
@property | ||
|
@@ -790,7 +792,9 @@ def shift(self, periods: int = 1, fill_value: object = None) -> "IntervalArray": | |
|
||
empty_len = min(abs(periods), len(self)) | ||
if isna(fill_value): | ||
fill_value = self.left._na_value | ||
from pandas import Index | ||
|
||
fill_value = Index(self.left)._na_value | ||
empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1)) | ||
else: | ||
empty = self._from_sequence([fill_value] * empty_len) | ||
|
@@ -854,7 +858,9 @@ def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs): | |
fill_left = fill_right = fill_value | ||
if allow_fill: | ||
if fill_value is None: | ||
fill_left = fill_right = self.left._na_value | ||
from pandas import Index | ||
|
||
fill_left = fill_right = Index(self.left)._na_value | ||
elif is_interval(fill_value): | ||
self._check_closed_matches(fill_value, name="fill_value") | ||
fill_left, fill_right = fill_value.left, fill_value.right | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -183,12 +183,8 @@ def func(intvidx_self, other, sort=False): | |
) | ||
) | ||
@inherit_names(["set_closed", "to_tuples"], IntervalArray, wrap=True) | ||
@inherit_names( | ||
["__array__", "overlaps", "contains", "left", "right", "length"], IntervalArray | ||
) | ||
@inherit_names( | ||
["is_non_overlapping_monotonic", "mid", "closed"], IntervalArray, cache=True | ||
) | ||
@inherit_names(["__array__", "overlaps", "contains"], IntervalArray) | ||
@inherit_names(["is_non_overlapping_monotonic", "closed"], IntervalArray, cache=True) | ||
class IntervalIndex(IntervalMixin, ExtensionIndex): | ||
_typ = "intervalindex" | ||
_comparables = ["name"] | ||
|
@@ -201,6 +197,8 @@ class IntervalIndex(IntervalMixin, ExtensionIndex): | |
_mask = None | ||
|
||
_data: IntervalArray | ||
_values: IntervalArray | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. umm, now i am confused, what is different about these? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. without this, mypy thinks _values is ExtensionArray and has a bunch of new complaints since we access There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. kk, should try to remove this at some point |
||
|
||
# -------------------------------------------------------------------- | ||
# Constructors | ||
|
||
|
@@ -407,7 +405,7 @@ def __reduce__(self): | |
return _new_IntervalIndex, (type(self), d), None | ||
|
||
@Appender(Index.astype.__doc__) | ||
def astype(self, dtype, copy=True): | ||
def astype(self, dtype, copy: bool = True): | ||
with rewrite_exception("IntervalArray", type(self).__name__): | ||
new_values = self._values.astype(dtype, copy=copy) | ||
if is_interval_dtype(new_values.dtype): | ||
|
@@ -436,7 +434,7 @@ def is_monotonic_decreasing(self) -> bool: | |
return self[::-1].is_monotonic_increasing | ||
|
||
@cache_readonly | ||
def is_unique(self): | ||
def is_unique(self) -> bool: | ||
""" | ||
Return True if the IntervalIndex contains unique elements, else False. | ||
""" | ||
|
@@ -891,6 +889,22 @@ def _convert_list_indexer(self, keyarr): | |
|
||
# -------------------------------------------------------------------- | ||
|
||
@cache_readonly | ||
def left(self) -> Index: | ||
return Index(self._values.left) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. copy=False on these? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add this? |
||
|
||
@cache_readonly | ||
def right(self) -> Index: | ||
return Index(self._values.right) | ||
|
||
@cache_readonly | ||
def mid(self): | ||
return Index(self._data.mid) | ||
|
||
@property | ||
def length(self): | ||
return Index(self._data.length) | ||
|
||
@Appender(Index.where.__doc__) | ||
def where(self, cond, other=None): | ||
if other is None: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe better to