-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
WIP: DatetimeArray+TimedeltaArray #23415
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e7ecbf8
e309ab9
423a357
47689be
dc4a264
f92843b
92923b2
77e9a48
07d4d97
cd63892
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,7 +39,7 @@ | |
from pandas.core.dtypes.missing import isna | ||
|
||
import pandas.core.common as com | ||
from pandas.core.algorithms import checked_add_with_arr | ||
from pandas.core.algorithms import checked_add_with_arr, take | ||
|
||
from .base import ExtensionOpsMixin | ||
from pandas.util._decorators import deprecate_kwarg | ||
|
@@ -127,6 +127,10 @@ def asi8(self): | |
# ------------------------------------------------------------------ | ||
# Array-like Methods | ||
|
||
@property | ||
def nbytes(self): | ||
return self.asi8.nbytes | ||
|
||
@property | ||
def shape(self): | ||
return (len(self),) | ||
|
@@ -192,6 +196,107 @@ def astype(self, dtype, copy=True): | |
return self._box_values(self.asi8) | ||
return super(DatetimeLikeArrayMixin, self).astype(dtype, copy) | ||
|
||
# ------------------------------------------------------------------ | ||
# ExtensionArray Interface | ||
# isna | ||
# __getitem__ | ||
# __len__ | ||
# nbytes | ||
# take | ||
# _concat_same_type | ||
# copy | ||
# _from_factorized | ||
# factorize / _values_for_factorize | ||
# _from_sequence | ||
# unique | ||
# | ||
# dtype | ||
# | ||
# dropna | ||
# | ||
#* _formatting_values | ||
#* fillna | ||
#* argsort / _values_for_argsort | ||
#* _reduce | ||
|
||
def unique(self): | ||
from pandas.core.algorithms import unique1d | ||
result = unique1d(self.asi8) | ||
return self._shallow_copy(result) | ||
|
||
def _validate_fill_value(self, fill_value): | ||
""" | ||
If a fill_value is passed to `take` convert it to an i8 representation, | ||
raising ValueError if this is not possible. | ||
|
||
Parameters | ||
---------- | ||
fill_value : object | ||
|
||
Returns | ||
------- | ||
fill_value : np.int64 | ||
|
||
Raises | ||
------ | ||
ValueError | ||
""" | ||
raise AbstractMethodError(self) | ||
|
||
def take(self, indices, allow_fill=False, fill_value=None): | ||
if allow_fill: | ||
fill_value = self._validate_fill_value(fill_value) | ||
|
||
new_values = take(self.asi8, | ||
indices, | ||
allow_fill=allow_fill, | ||
fill_value=fill_value) | ||
|
||
# TODO: use "infer"? Why does not passing freq cause | ||
# failures in py37 but not py27? | ||
freq = self.freq if is_period_dtype(self) else None | ||
return self._shallow_copy(new_values, freq=freq) | ||
|
||
@classmethod | ||
def _concat_same_type(cls, to_concat): | ||
# for TimedeltaArray and PeriodArray; DatetimeArray overrides | ||
freqs = {x.freq for x in to_concat} | ||
assert len(freqs) == 1 | ||
freq = list(freqs)[0] | ||
values = np.concatenate([x.asi8 for x in to_concat]) | ||
return cls._simple_new(values, freq=freq) | ||
|
||
def copy(self, deep=False): | ||
# TODO: should `deep` determine whether we copy self.asi8? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes like
|
||
if is_datetime64tz_dtype(self): | ||
return type(self)(self.asi8.copy(), tz=self.tz, freq=self.freq) | ||
return type(self)(self.asi8.copy(), freq=self.freq) | ||
|
||
# Following how PeriodArray does this | ||
# TODO: ignoring `type`? | ||
def view(self, dtype=None, type=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this so complicated? do we really need this method? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I implemented this because tests were raising AttributeErrors asking for it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah we really can' support There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Really this PR is still sufficiently early in the "WIP" phase it isn't worth spending much time on ATM. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok sure |
||
if dtype is None or dtype is __builtins__['type'](self): | ||
return self | ||
return self._ndarray_values.view(dtype=dtype) | ||
|
||
def _values_for_factorize(self): | ||
return self.asi8, iNaT | ||
|
||
@classmethod | ||
def _from_factorized(cls, values, original): | ||
if is_datetime64tz_dtype(original): | ||
return cls(values, tz=original.tz, freq=original.freq) | ||
return cls(values, freq=original.freq) | ||
|
||
@classmethod | ||
def _from_sequence(cls, scalars, dtype=None, copy=False): | ||
arr = np.asarray(scalars, dtype=object) | ||
if copy: | ||
arr = arr.copy() | ||
|
||
# If necessary this will infer tz from dtype | ||
return cls(arr, dtype=dtype) | ||
|
||
# ------------------------------------------------------------------ | ||
# Null Handling | ||
|
||
|
@@ -736,8 +841,8 @@ def __rsub__(self, other): | |
# we need to wrap in DatetimeArray/Index and flip the operation | ||
if not isinstance(other, DatetimeLikeArrayMixin): | ||
# Avoid down-casting DatetimeIndex | ||
from pandas.core.arrays import DatetimeArrayMixin | ||
other = DatetimeArrayMixin(other) | ||
from pandas.core.arrays import DatetimeArray | ||
other = DatetimeArray(other) | ||
return other - self | ||
elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and | ||
not is_datetime64_any_dtype(other)): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,7 +12,7 @@ | |
conversion, fields, timezones, | ||
resolution as libresolution) | ||
|
||
from pandas.util._decorators import cache_readonly | ||
from pandas.util._decorators import cache_readonly, Appender | ||
from pandas.errors import PerformanceWarning | ||
from pandas import compat | ||
|
||
|
@@ -34,6 +34,7 @@ | |
from pandas.tseries.offsets import Tick, generate_range | ||
|
||
from pandas.core.arrays import datetimelike as dtl | ||
from pandas.core.arrays.base import ExtensionArray | ||
|
||
|
||
_midnight = time(0, 0) | ||
|
@@ -122,7 +123,7 @@ def wrapper(self, other): | |
except ValueError: | ||
other = np.array(other, dtype=np.object_) | ||
elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries, | ||
DatetimeArrayMixin)): | ||
DatetimeArray)): | ||
# Following Timestamp convention, __eq__ is all-False | ||
# and __ne__ is all True, others raise TypeError. | ||
return ops.invalid_comparison(self, other, op) | ||
|
@@ -158,7 +159,7 @@ def wrapper(self, other): | |
return compat.set_function_name(wrapper, opname, cls) | ||
|
||
|
||
class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin): | ||
class DatetimeArray(dtl.DatetimeLikeArrayMixin, ExtensionArray): | ||
""" | ||
Assumes that subclass __new__/__init__ defines: | ||
tz | ||
|
@@ -221,7 +222,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None): | |
# if dtype has an embedded tz, capture it | ||
tz = dtl.validate_tz_from_dtype(dtype, tz) | ||
|
||
if isinstance(values, DatetimeArrayMixin): | ||
if isinstance(values, DatetimeArray): | ||
# extract nanosecond unix timestamps | ||
values = values.asi8 | ||
if values.dtype == 'i8': | ||
|
@@ -295,7 +296,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None, | |
|
||
if tz is not None and index.tz is None: | ||
arr = conversion.tz_localize_to_utc( | ||
ensure_int64(index.values), | ||
ensure_int64(index.asi8), | ||
tz, ambiguous=ambiguous) | ||
|
||
index = cls(arr) | ||
|
@@ -318,7 +319,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None, | |
if not right_closed and len(index) and index[-1] == end: | ||
index = index[:-1] | ||
|
||
return cls._simple_new(index.values, freq=freq, tz=tz) | ||
return cls._simple_new(index.asi8, freq=freq, tz=tz) | ||
|
||
# ----------------------------------------------------------------- | ||
# Descriptive Properties | ||
|
@@ -411,6 +412,38 @@ def __iter__(self): | |
for v in converted: | ||
yield v | ||
|
||
# ---------------------------------------------------------------- | ||
# ExtensionArray Interface | ||
|
||
@property | ||
def _ndarray_values(self): | ||
return self._data | ||
|
||
@Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) | ||
def _validate_fill_value(self, fill_value): | ||
if isna(fill_value): | ||
fill_value = iNaT | ||
elif isinstance(fill_value, (datetime, np.datetime64)): | ||
self._assert_tzawareness_compat(fill_value) | ||
fill_value = Timestamp(fill_value).value | ||
else: | ||
raise ValueError("'fill_value' should be a Timestamp. " | ||
"Got '{got}'.".format(got=fill_value)) | ||
return fill_value | ||
|
||
@classmethod | ||
def _concat_same_type(cls, to_concat): | ||
freqs = {x.freq for x in to_concat} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you do this freq dance a few times, maybe a helper function? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll take a look. |
||
assert len(freqs) == 1 | ||
freq = list(freqs)[0] | ||
|
||
tzs = {x.tz for x in to_concat} | ||
assert len(tzs) == 1 | ||
tz = list(tzs)[0] | ||
|
||
values = np.concatenate([x.asi8 for x in to_concat]) | ||
return cls._simple_new(values, freq=freq, tz=tz) | ||
|
||
# ----------------------------------------------------------------- | ||
# Comparison Methods | ||
|
||
|
@@ -1378,8 +1411,8 @@ def to_julian_date(self): | |
) / 24.0) | ||
|
||
|
||
DatetimeArrayMixin._add_comparison_ops() | ||
DatetimeArrayMixin._add_datetimelike_methods() | ||
DatetimeArray._add_comparison_ops() | ||
DatetimeArray._add_datetimelike_methods() | ||
|
||
|
||
def _generate_regular_range(cls, start, end, periods, freq): | ||
|
Uh oh!
There was an error while loading. Please reload this page.