Skip to content

delegate (most) datetimelike Series arithmetic ops to DatetimeIndex #18817

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ Other API Changes
- Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`)
- :func:`pandas.merge` now raises a ``ValueError`` when trying to merge on incompatible data types (:issue:`9780`)
- :func:`wide_to_long` previously kept numeric-like suffixes as ``object`` dtype. Now they are cast to numeric if possible (:issue:`17627`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move to 0.23.0

- Subtraction of :class:`DatetimeIndex` with mis-matched timezones will now raise a ``TypeError`` instead of a ``ValueError`` (:issue:`18817`)

.. _whatsnew_0220.deprecations:

Expand Down
15 changes: 14 additions & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import numpy as np
from pandas.core.dtypes.common import (
is_integer, is_float,
is_integer, is_float, is_integer_dtype,
is_bool_dtype, _ensure_int64,
is_scalar, is_dtype_equal,
is_list_like, is_timedelta64_dtype)
Expand Down Expand Up @@ -650,6 +650,7 @@ def _add_datetimelike_methods(cls):
def __add__(self, other):
from pandas.core.index import Index
from pandas.core.indexes.timedeltas import TimedeltaIndex
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.tseries.offsets import DateOffset
if is_timedelta64_dtype(other):
return self._add_delta(other)
Expand All @@ -664,6 +665,12 @@ def __add__(self, other):
return self.shift(other)
elif isinstance(other, (Index, datetime, np.datetime64)):
return self._add_datelike(other)
elif (isinstance(self, DatetimeIndex) and
isinstance(other, np.ndarray) and other.size == 1 and
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

huh? I believe we already check is_integer_dtype(other). this check is much too specific

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Under the status quo adding np.array(1, dtype=np.int64) to a DatetimeIndex behaves like adding Timedelta(nanosecond=1). Series raises (and a Series test fails unless this is caught here).

I'm open to ideas for how to make this less hacky.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An alternative would be to -- for the time being -- check for this case in the Series op and avoid dispatching to DatetimeIndex.

is_integer_dtype(other)):
# TODO: Should this be allowed if self.freq is not None?
raise TypeError("cannot add {cls} and {typ}"
.format(cls=type(cls), typ=type(other)))
else: # pragma: no cover
return NotImplemented
cls.__add__ = __add__
Expand Down Expand Up @@ -695,6 +702,12 @@ def __sub__(self, other):
return self._sub_datelike(other)
elif isinstance(other, Period):
return self._sub_period(other)
elif (isinstance(self, DatetimeIndex) and
isinstance(other, np.ndarray) and other.size == 1 and
is_integer_dtype(other)):
# TODO: Should this be allowed if self.freq is not None?
raise TypeError("cannot add {cls} and {typ}"
.format(cls=type(cls), typ=type(other)))
else: # pragma: no cover
return NotImplemented
cls.__sub__ = __sub__
Expand Down
40 changes: 40 additions & 0 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,7 @@ def _convert_to_array(self, values, name=None, other=None):
# datetime with tz
elif (isinstance(ovalues, datetime.datetime) and
hasattr(ovalues, 'tzinfo')):
# TODO: does this mean to say `ovalues.tzinfo is not None`?
values = pd.DatetimeIndex(values)
# datetime array with tz
elif is_datetimetz(values):
Expand Down Expand Up @@ -655,6 +656,15 @@ def _construct_divmod_result(left, result, index, name, dtype):
)


def _get_series_result_name(left, rvalues):
# TODO: Can we just use right instead of rvalues?
if isinstance(rvalues, ABCSeries):
name = _maybe_match_name(left, rvalues)
else:
name = left.name
return name


def _arith_method_SERIES(op, name, str_rep, fill_zeros=None, default_axis=None,
construct_result=_construct_result, **eval_kwargs):
"""
Expand Down Expand Up @@ -707,6 +717,36 @@ def wrapper(left, right, name=name, na_op=na_op):
if isinstance(right, ABCDataFrame):
return NotImplemented

elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left):
# Dispatch to DatetimeIndex method; there are a handful of cases
# that DatetimeIndex handles differently from Series so we avoid
# dispatching.
if right is pd.NaT:
# DatetimeIndex and Series handle this differently, so
# until that is resolved we need to special-case here
return construct_result(left, pd.NaT, index=left.index,
name=left.name, dtype=left.dtype)
# TODO: double-check that the tz part of the dtype
# is supposed to be retained
elif is_offsetlike(right):
# special handling for alignment
pass
elif isinstance(right, pd.PeriodIndex):
# not supported for DatetimeIndex
pass
elif (isinstance(right, np.ndarray) and right.size == 1 and
is_integer_dtype(right)):
# DatetimeIndex adds this as nanoseconds, needs fixing
pass
else:
left, right = _align_method_SERIES(left, right)
name = _get_series_result_name(left, right)
result = op(pd.DatetimeIndex(left), right)
result.name = name # Needs to be overriden if name is None
return construct_result(left, result,
index=left.index, name=name,
dtype=result.dtype)

left, right = _align_method_SERIES(left, right)

converted = _Op.get_op(left, right, name, na_op)
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/indexes/datetimes/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,20 @@ def test_datetimeindex_sub_timestamp_overflow(self):
with pytest.raises(OverflowError):
dtimin - variant

def test_dti_add_intarray(self, tz):
rng = pd.date_range('2000-01-01 09:00', freq='H',
periods=10, tz=tz)
other = np.array(1, dtype=np.int64)
with pytest.raises(TypeError):
rng + other

def test_dti_sub_intarray(self, tz):
rng = pd.date_range('2000-01-01 09:00', freq='H',
periods=10, tz=tz)
other = np.array(1, dtype=np.int64)
with pytest.raises(TypeError):
rng - other


# GH 10699
@pytest.mark.parametrize('klass,assert_func', zip([Series, DatetimeIndex],
Expand Down
17 changes: 16 additions & 1 deletion pandas/tests/series/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -970,7 +970,7 @@ def run_ops(ops, get_ser, test_ser):
# defined
for op_str in ops:
op = getattr(get_ser, op_str, None)
with tm.assert_raises_regex(TypeError, 'operate'):
with tm.assert_raises_regex(TypeError, 'operate|cannot'):
op(test_ser)

# ## timedelta64 ###
Expand Down Expand Up @@ -1203,6 +1203,21 @@ def test_datetime64_ops_nat(self):
with pytest.raises(TypeError):
nat_series_dtype_timestamp / 1

def test_datetime_sub_datetime_overflow(self):
# GH#12534
dt = pd.Timestamp('1700-01-31')
dti = pd.date_range('1999-09-30', freq='M', periods=10)
with pytest.raises(OverflowError):
dti - dt
with pytest.raises(OverflowError):
dt - dti

ser = pd.Series(dti)
with pytest.raises(OverflowError):
ser - dt
with pytest.raises(OverflowError):
dt - ser


class TestSeriesOperators(TestData):
def test_op_method(self):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def test_shift(self):
# incompat tz
s2 = Series(date_range('2000-01-01 09:00:00', periods=5,
tz='CET'), name='foo')
pytest.raises(ValueError, lambda: s - s2)
pytest.raises(TypeError, lambda: s - s2)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is an API change

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as in needs to be noted in whatsnew or needs to be avoided?


def test_shift2(self):
ts = Series(np.random.randn(5),
Expand Down