diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 0456033dbb731..40d970c7b20f2 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -30,6 +30,7 @@ from pandas._libs import tslib from pandas._libs.tslib import Timestamp, iNaT, NaT from tslibs.timezones cimport ( is_utc, is_tzlocal, get_utcoffset, get_dst_info, maybe_get_tz) +from tslibs.timedeltas cimport delta_to_nanoseconds from tslibs.parsing import parse_time_string, NAT_SENTINEL from tslibs.frequencies cimport get_freq_code @@ -716,8 +717,8 @@ cdef class _Period(object): if isinstance(other, (timedelta, np.timedelta64, offsets.Tick)): offset = frequencies.to_offset(self.freq.rule_code) if isinstance(offset, offsets.Tick): - nanos = tslib._delta_to_nanoseconds(other) - offset_nanos = tslib._delta_to_nanoseconds(offset) + nanos = delta_to_nanoseconds(other) + offset_nanos = delta_to_nanoseconds(offset) if nanos % offset_nanos == 0: ordinal = self.ordinal + (nanos // offset_nanos) diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index ec060335c220e..f2edf48a6b829 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -3,7 +3,8 @@ from decimal import Decimal cimport util cimport cython from tslibs.nattype import NaT -from tslib cimport convert_to_tsobject, convert_to_timedelta64 +from tslib cimport convert_to_tsobject +from tslibs.timedeltas cimport convert_to_timedelta64 from tslibs.timezones cimport get_timezone from datetime import datetime, timedelta iNaT = util.get_nat() diff --git a/pandas/_libs/tslib.pxd b/pandas/_libs/tslib.pxd index 443b3867eb2b5..1c2c679904868 100644 --- a/pandas/_libs/tslib.pxd +++ b/pandas/_libs/tslib.pxd @@ -2,7 +2,6 @@ from numpy cimport ndarray, int64_t from tslibs.conversion cimport convert_to_tsobject -cpdef convert_to_timedelta64(object, object) cdef bint _check_all_nulls(obj) cdef _to_i8(object val) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 08a0ed713d936..6d793b6770113 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -71,8 +71,6 @@ from .tslibs.parsing import parse_datetime_string cimport cython -from pandas.compat import iteritems - import warnings import pytz @@ -85,7 +83,8 @@ import_array() cdef int64_t NPY_NAT = util.get_nat() iNaT = NPY_NAT -from tslibs.timedeltas cimport parse_timedelta_string, cast_from_unit +from tslibs.timedeltas cimport cast_from_unit, delta_to_nanoseconds +from tslibs.timedeltas import Timedelta from tslibs.timezones cimport ( is_utc, is_tzlocal, is_fixed_offset, treat_tz_as_dateutil, treat_tz_as_pytz, @@ -1069,7 +1068,7 @@ cdef class _Timestamp(datetime): return Timestamp((self.freq * other).apply(self), freq=self.freq) elif PyDelta_Check(other) or hasattr(other, 'delta'): - nanos = _delta_to_nanoseconds(other) + nanos = delta_to_nanoseconds(other) result = Timestamp(self.value + nanos, tz=self.tzinfo, freq=self.freq) if getattr(other, 'normalize', False): @@ -1789,366 +1788,6 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', return oresult -from tslibs.timedeltas cimport _Timedelta as __Timedelta - -# Similar to Timestamp/datetime, this is a construction requirement for -# timedeltas that we need to do object instantiation in python. This will -# serve as a C extension type that shadows the Python class, where we do any -# heavy lifting. -cdef class _Timedelta(__Timedelta): - - def __hash__(_Timedelta self): - if self._has_ns(): - return hash(self.value) - else: - return timedelta.__hash__(self) - - def __richcmp__(_Timedelta self, object other, int op): - cdef: - _Timedelta ots - int ndim - - if isinstance(other, _Timedelta): - ots = other - elif PyDelta_Check(other): - ots = Timedelta(other) - else: - ndim = getattr(other, _NDIM_STRING, -1) - - if ndim != -1: - if ndim == 0: - if is_timedelta64_object(other): - other = Timedelta(other) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - - # only allow ==, != ops - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, - type(other).__name__)) - if util.is_array(other): - return PyObject_RichCompare(np.array([self]), other, op) - return PyObject_RichCompare(other, self, reverse_ops[op]) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, type(other).__name__)) - - return cmp_scalar(self.value, ots.value, op) - - -def _binary_op_method_timedeltalike(op, name): - # define a binary operation that only works if the other argument is - # timedelta like or an array of timedeltalike - def f(self, other): - # an offset - if hasattr(other, 'delta') and not isinstance(other, Timedelta): - return op(self, other.delta) - - # a datetimelike - if (isinstance(other, (datetime, np.datetime64)) - and not (isinstance(other, Timestamp) or other is NaT)): - return op(self, Timestamp(other)) - - # nd-array like - if hasattr(other, 'dtype'): - if other.dtype.kind not in ['m', 'M']: - # raise rathering than letting numpy return wrong answer - return NotImplemented - return op(self.to_timedelta64(), other) - - if not _validate_ops_compat(other): - return NotImplemented - - if other is NaT: - return NaT - - try: - other = Timedelta(other) - except ValueError: - # failed to parse as timedelta - return NotImplemented - - return Timedelta(op(self.value, other.value), unit='ns') - - f.__name__ = name - return f - - -def _op_unary_method(func, name): - - def f(self): - return Timedelta(func(self.value), unit='ns') - f.__name__ = name - return f - - -cdef bint _validate_ops_compat(other): - # return True if we are compat with operating - if _checknull_with_nat(other): - return True - elif PyDelta_Check(other) or is_timedelta64_object(other): - return True - elif util.is_string_object(other): - return True - elif hasattr(other, 'delta'): - return True - return False - - -# Python front end to C extension type _Timedelta -# This serves as the box for timedelta64 - - -class Timedelta(_Timedelta): - """ - Represents a duration, the difference between two dates or times. - - Timedelta is the pandas equivalent of python's ``datetime.timedelta`` - and is interchangable with it in most cases. - - Parameters - ---------- - value : Timedelta, timedelta, np.timedelta64, string, or integer - unit : string, [D,h,m,s,ms,us,ns] - Denote the unit of the input, if input is an integer. Default 'ns'. - days, seconds, microseconds, - milliseconds, minutes, hours, weeks : numeric, optional - Values for construction in compat with datetime.timedelta. - np ints and floats will be coereced to python ints and floats. - - Notes - ----- - The ``.value`` attribute is always in ns. - - """ - - def __new__(cls, object value=_no_input, unit=None, **kwargs): - cdef _Timedelta td_base - - if value is _no_input: - if not len(kwargs): - raise ValueError("cannot construct a Timedelta without a " - "value/unit or descriptive keywords " - "(days,seconds....)") - - def _to_py_int_float(v): - if is_integer_object(v): - return int(v) - elif is_float_object(v): - return float(v) - raise TypeError("Invalid type {0}. Must be int or " - "float.".format(type(v))) - - kwargs = dict([(k, _to_py_int_float(v)) - for k, v in iteritems(kwargs)]) - - try: - nano = kwargs.pop('nanoseconds', 0) - value = convert_to_timedelta64( - timedelta(**kwargs), 'ns') + nano - except TypeError as e: - raise ValueError("cannot construct a Timedelta from the " - "passed arguments, allowed keywords are " - "[weeks, days, hours, minutes, seconds, " - "milliseconds, microseconds, nanoseconds]") - - if isinstance(value, Timedelta): - value = value.value - elif is_string_object(value): - value = np.timedelta64(parse_timedelta_string(value)) - elif PyDelta_Check(value): - value = convert_to_timedelta64(value, 'ns') - elif is_timedelta64_object(value): - if unit is not None: - value = value.astype('timedelta64[{0}]'.format(unit)) - value = value.astype('timedelta64[ns]') - elif hasattr(value, 'delta'): - value = np.timedelta64(_delta_to_nanoseconds(value.delta), 'ns') - elif is_integer_object(value) or is_float_object(value): - # unit=None is de-facto 'ns' - value = convert_to_timedelta64(value, unit) - elif _checknull_with_nat(value): - return NaT - else: - raise ValueError("Value must be Timedelta, string, integer, " - "float, timedelta or convertible") - - if is_timedelta64_object(value): - value = value.view('i8') - - # nat - if value == NPY_NAT: - return NaT - - # make timedelta happy - td_base = _Timedelta.__new__(cls, microseconds=int(value) / 1000) - td_base.value = value - td_base.is_populated = 0 - return td_base - - def _round(self, freq, rounder): - - cdef int64_t result, unit - - from pandas.tseries.frequencies import to_offset - unit = to_offset(freq).nanos - result = unit * rounder(self.value / float(unit)) - return Timedelta(result, unit='ns') - - def round(self, freq): - """ - Round the Timedelta to the specified resolution - - Returns - ------- - a new Timedelta rounded to the given resolution of `freq` - - Parameters - ---------- - freq : a freq string indicating the rounding resolution - - Raises - ------ - ValueError if the freq cannot be converted - """ - return self._round(freq, np.round) - - def floor(self, freq): - """ - return a new Timedelta floored to this resolution - - Parameters - ---------- - freq : a freq string indicating the flooring resolution - """ - return self._round(freq, np.floor) - - def ceil(self, freq): - """ - return a new Timedelta ceiled to this resolution - - Parameters - ---------- - freq : a freq string indicating the ceiling resolution - """ - return self._round(freq, np.ceil) - - def __setstate__(self, state): - (value) = state - self.value = value - - def __reduce__(self): - object_state = self.value, - return (Timedelta, object_state) - - __add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__') - __radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__') - __sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__') - __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__') - - def __mul__(self, other): - - # nd-array like - if hasattr(other, 'dtype'): - return other * self.to_timedelta64() - - if other is NaT: - return NaT - - # only integers and floats allowed - if not (is_integer_object(other) or is_float_object(other)): - return NotImplemented - - return Timedelta(other * self.value, unit='ns') - - __rmul__ = __mul__ - - def __truediv__(self, other): - - if hasattr(other, 'dtype'): - return self.to_timedelta64() / other - - # integers or floats - if is_integer_object(other) or is_float_object(other): - return Timedelta(self.value /other, unit='ns') - - if not _validate_ops_compat(other): - return NotImplemented - - other = Timedelta(other) - if other is NaT: - return np.nan - return self.value /float(other.value) - - def __rtruediv__(self, other): - if hasattr(other, 'dtype'): - return other / self.to_timedelta64() - - if not _validate_ops_compat(other): - return NotImplemented - - other = Timedelta(other) - if other is NaT: - return NaT - return float(other.value) / self.value - - if not PY3: - __div__ = __truediv__ - __rdiv__ = __rtruediv__ - - def __floordiv__(self, other): - - if hasattr(other, 'dtype'): - - # work with i8 - other = other.astype('m8[ns]').astype('i8') - - return self.value // other - - # integers only - if is_integer_object(other): - return Timedelta(self.value // other, unit='ns') - - if not _validate_ops_compat(other): - return NotImplemented - - other = Timedelta(other) - if other is NaT: - return np.nan - return self.value // other.value - - def __rfloordiv__(self, other): - if hasattr(other, 'dtype'): - - # work with i8 - other = other.astype('m8[ns]').astype('i8') - return other // self.value - - if not _validate_ops_compat(other): - return NotImplemented - - other = Timedelta(other) - if other is NaT: - return NaT - return other.value // self.value - - __inv__ = _op_unary_method(lambda x: -x, '__inv__') - __neg__ = _op_unary_method(lambda x: -x, '__neg__') - __pos__ = _op_unary_method(lambda x: x, '__pos__') - __abs__ = _op_unary_method(lambda x: abs(x), '__abs__') - - -# resolution in ns -Timedelta.min = Timedelta(np.iinfo(np.int64).min +1) -Timedelta.max = Timedelta(np.iinfo(np.int64).max) - cdef PyTypeObject* td_type = Timedelta @@ -2156,122 +1795,9 @@ cdef inline bint is_timedelta(object o): return Py_TYPE(o) == td_type # isinstance(o, Timedelta) -cpdef array_to_timedelta64(ndarray[object] values, unit='ns', errors='raise'): - """ - Convert an ndarray to an array of timedeltas. If errors == 'coerce', - coerce non-convertible objects to NaT. Otherwise, raise. - """ - - cdef: - Py_ssize_t i, n - ndarray[int64_t] iresult - - if errors not in ('ignore', 'raise', 'coerce'): - raise ValueError("errors must be one of 'ignore', " - "'raise', or 'coerce'}") - - n = values.shape[0] - result = np.empty(n, dtype='m8[ns]') - iresult = result.view('i8') - - # Usually, we have all strings. If so, we hit the fast path. - # If this path fails, we try conversion a different way, and - # this is where all of the error handling will take place. - try: - for i in range(n): - result[i] = parse_timedelta_string(values[i]) - except: - for i in range(n): - try: - result[i] = convert_to_timedelta64(values[i], unit) - except ValueError: - if errors == 'coerce': - result[i] = NPY_NAT - else: - raise - - return iresult - - -cpdef convert_to_timedelta64(object ts, object unit): - """ - Convert an incoming object to a timedelta64 if possible - - Handle these types of objects: - - timedelta/Timedelta - - timedelta64 - - an offset - - np.int64 (with unit providing a possible modifier) - - None/NaT - - Return an ns based int64 - - # kludgy here until we have a timedelta scalar - # handle the numpy < 1.7 case - """ - if _checknull_with_nat(ts): - return np.timedelta64(NPY_NAT) - elif isinstance(ts, Timedelta): - # already in the proper format - ts = np.timedelta64(ts.value) - elif is_datetime64_object(ts): - # only accept a NaT here - if ts.astype('int64') == NPY_NAT: - return np.timedelta64(NPY_NAT) - elif is_timedelta64_object(ts): - ts = ts.astype("m8[{0}]".format(unit.lower())) - elif is_integer_object(ts): - if ts == NPY_NAT: - return np.timedelta64(NPY_NAT) - else: - if util.is_array(ts): - ts = ts.astype('int64').item() - if unit in ['Y', 'M', 'W']: - ts = np.timedelta64(ts, unit) - else: - ts = cast_from_unit(ts, unit) - ts = np.timedelta64(ts) - elif is_float_object(ts): - if util.is_array(ts): - ts = ts.astype('int64').item() - if unit in ['Y', 'M', 'W']: - ts = np.timedelta64(int(ts), unit) - else: - ts = cast_from_unit(ts, unit) - ts = np.timedelta64(ts) - elif is_string_object(ts): - ts = np.timedelta64(parse_timedelta_string(ts)) - elif hasattr(ts, 'delta'): - ts = np.timedelta64(_delta_to_nanoseconds(ts), 'ns') - - if PyDelta_Check(ts): - ts = np.timedelta64(_delta_to_nanoseconds(ts), 'ns') - elif not is_timedelta64_object(ts): - raise ValueError("Invalid type for timedelta " - "scalar: %s" % type(ts)) - return ts.astype('timedelta64[ns]') - - # ---------------------------------------------------------------------- # Conversion routines -cpdef int64_t _delta_to_nanoseconds(delta) except? -1: - if util.is_array(delta): - return delta.astype('m8[ns]').astype('int64') - if hasattr(delta, 'nanos'): - return delta.nanos - if hasattr(delta, 'delta'): - delta = delta.delta - if is_timedelta64_object(delta): - return delta.astype("timedelta64[ns]").item() - if is_integer_object(delta): - return delta - - return (delta.days * 24 * 60 * 60 * 1000000 + - delta.seconds * 1000000 + - delta.microseconds) * 1000 - - def cast_to_nanoseconds(ndarray arr): cdef: Py_ssize_t i, n = arr.size diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index 4dfd3f3e9eca5..3e7b88b208e89 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -3,19 +3,11 @@ from cpython.datetime cimport timedelta -from numpy cimport int64_t +from numpy cimport int64_t, ndarray # Exposed for tslib, not intended for outside use. cdef parse_timedelta_string(object ts) cpdef int64_t cast_from_unit(object ts, object unit) except? -1 - - -cdef class _Timedelta(timedelta): - cdef readonly: - int64_t value # nanoseconds - object freq # frequency reference - bint is_populated # are my components populated - int64_t _sign, _d, _h, _m, _s, _ms, _us, _ns - - cpdef timedelta to_pytimedelta(_Timedelta self) - cpdef bint _has_ns(self) +cpdef int64_t delta_to_nanoseconds(delta) except? -1 +cpdef convert_to_timedelta64(object ts, object unit) +cpdef array_to_timedelta64(ndarray[object] values, unit=*, errors=*) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 2f177868a6947..623babe5422a8 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -5,22 +5,31 @@ import collections import sys cdef bint PY3 = (sys.version_info[0] >= 3) -from cpython cimport PyUnicode_Check +from cython cimport Py_ssize_t + +from cpython cimport PyUnicode_Check, Py_NE, Py_EQ, PyObject_RichCompare import numpy as np cimport numpy as np -from numpy cimport int64_t +from numpy cimport int64_t, ndarray np.import_array() from cpython.datetime cimport (datetime, timedelta, - PyDelta_Check, PyDateTime_IMPORT) + PyDateTime_CheckExact, + PyDateTime_Check, PyDelta_Check, + PyDateTime_IMPORT) PyDateTime_IMPORT cimport util -from util cimport is_timedelta64_object +from util cimport (is_timedelta64_object, is_datetime64_object, + is_integer_object, is_float_object, + is_string_object) + +from np_datetime cimport cmp_scalar, reverse_ops -from nattype import nat_strings +from nattype import nat_strings, NaT +from nattype cimport _checknull_with_nat # ---------------------------------------------------------------------- # Constants @@ -66,8 +75,122 @@ cdef dict timedelta_abbrevs = { 'D': 'd', 'nanos': 'ns', 'nanosecond': 'ns'} +_no_input = object() + # ---------------------------------------------------------------------- +cpdef int64_t delta_to_nanoseconds(delta) except? -1: + if util.is_array(delta): + return delta.astype('m8[ns]').astype('int64') + if hasattr(delta, 'nanos'): + return delta.nanos + if hasattr(delta, 'delta'): + delta = delta.delta + if is_timedelta64_object(delta): + return delta.astype("timedelta64[ns]").item() + if is_integer_object(delta): + return delta + + return (delta.days * 24 * 60 * 60 * 1000000 + + delta.seconds * 1000000 + + delta.microseconds) * 1000 + + +cpdef convert_to_timedelta64(object ts, object unit): + """ + Convert an incoming object to a timedelta64 if possible + + Handle these types of objects: + - timedelta/Timedelta + - timedelta64 + - an offset + - np.int64 (with unit providing a possible modifier) + - None/NaT + + Return an ns based int64 + + # kludgy here until we have a timedelta scalar + # handle the numpy < 1.7 case + """ + if _checknull_with_nat(ts): + return np.timedelta64(NPY_NAT) + elif isinstance(ts, Timedelta): + # already in the proper format + ts = np.timedelta64(ts.value) + elif is_datetime64_object(ts): + # only accept a NaT here + if ts.astype('int64') == NPY_NAT: + return np.timedelta64(NPY_NAT) + elif is_timedelta64_object(ts): + ts = ts.astype("m8[{0}]".format(unit.lower())) + elif is_integer_object(ts): + if ts == NPY_NAT: + return np.timedelta64(NPY_NAT) + else: + if util.is_array(ts): + ts = ts.astype('int64').item() + if unit in ['Y', 'M', 'W']: + ts = np.timedelta64(ts, unit) + else: + ts = cast_from_unit(ts, unit) + ts = np.timedelta64(ts) + elif is_float_object(ts): + if util.is_array(ts): + ts = ts.astype('int64').item() + if unit in ['Y', 'M', 'W']: + ts = np.timedelta64(int(ts), unit) + else: + ts = cast_from_unit(ts, unit) + ts = np.timedelta64(ts) + elif is_string_object(ts): + ts = np.timedelta64(parse_timedelta_string(ts)) + elif hasattr(ts, 'delta'): + ts = np.timedelta64(delta_to_nanoseconds(ts), 'ns') + + if PyDelta_Check(ts): + ts = np.timedelta64(delta_to_nanoseconds(ts), 'ns') + elif not is_timedelta64_object(ts): + raise ValueError("Invalid type for timedelta " + "scalar: %s" % type(ts)) + return ts.astype('timedelta64[ns]') + + +cpdef array_to_timedelta64(ndarray[object] values, unit='ns', errors='raise'): + """ + Convert an ndarray to an array of timedeltas. If errors == 'coerce', + coerce non-convertible objects to NaT. Otherwise, raise. + """ + + cdef: + Py_ssize_t i, n + ndarray[int64_t] iresult + + if errors not in ('ignore', 'raise', 'coerce'): + raise ValueError("errors must be one of 'ignore', " + "'raise', or 'coerce'}") + + n = values.shape[0] + result = np.empty(n, dtype='m8[ns]') + iresult = result.view('i8') + + # Usually, we have all strings. If so, we hit the fast path. + # If this path fails, we try conversion a different way, and + # this is where all of the error handling will take place. + try: + for i in range(n): + result[i] = parse_timedelta_string(values[i]) + except: + for i in range(n): + try: + result[i] = convert_to_timedelta64(values[i], unit) + except ValueError: + if errors == 'coerce': + result[i] = NPY_NAT + else: + raise + + return iresult + cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: """ return a casting of the unit represented to nanoseconds @@ -315,23 +438,145 @@ cdef inline timedelta_from_spec(object number, object frac, object unit): n = ''.join(number) + '.' + ''.join(frac) return cast_from_unit(float(n), unit) + +# ---------------------------------------------------------------------- +# Timedelta ops utilities + +cdef bint _validate_ops_compat(other): + # return True if we are compat with operating + if _checknull_with_nat(other): + return True + elif PyDelta_Check(other) or is_timedelta64_object(other): + return True + elif is_string_object(other): + return True + elif hasattr(other, 'delta'): + return True + return False + + +def _op_unary_method(func, name): + def f(self): + return Timedelta(func(self.value), unit='ns') + f.__name__ = name + return f + + +def _binary_op_method_timedeltalike(op, name): + # define a binary operation that only works if the other argument is + # timedelta like or an array of timedeltalike + def f(self, other): + if hasattr(other, 'delta') and not PyDelta_Check(other): + # offsets.Tick + return op(self, other.delta) + + elif other is NaT: + return NaT + + elif is_datetime64_object(other) or PyDateTime_CheckExact(other): + # the PyDateTime_CheckExact case is for a datetime object that + # is specifically *not* a Timestamp, as the Timestamp case will be + # handled after `_validate_ops_compat` returns False below + from ..tslib import Timestamp + return op(self, Timestamp(other)) + # We are implicitly requiring the canonical behavior to be + # defined by Timestamp methods. + + elif hasattr(other, 'dtype'): + # nd-array like + if other.dtype.kind not in ['m', 'M']: + # raise rathering than letting numpy return wrong answer + return NotImplemented + return op(self.to_timedelta64(), other) + + elif not _validate_ops_compat(other): + return NotImplemented + + try: + other = Timedelta(other) + except ValueError: + # failed to parse as timedelta + return NotImplemented + + return Timedelta(op(self.value, other.value), unit='ns') + + f.__name__ = name + return f + + # ---------------------------------------------------------------------- # Timedelta Construction +cdef _to_py_int_float(v): + # Note: This used to be defined inside Timedelta.__new__ + # but cython will not allow `cdef` functions to be defined dynamically. + if is_integer_object(v): + return int(v) + elif is_float_object(v): + return float(v) + raise TypeError("Invalid type {0}. Must be int or " + "float.".format(type(v))) + + # Similar to Timestamp/datetime, this is a construction requirement for # timedeltas that we need to do object instantiation in python. This will # serve as a C extension type that shadows the Python class, where we do any # heavy lifting. cdef class _Timedelta(timedelta): - # cdef readonly: - # int64_t value # nanoseconds - # object freq # frequency reference - # bint is_populated # are my components populated - # int64_t _sign, _d, _h, _m, _s, _ms, _us, _ns + cdef readonly: + int64_t value # nanoseconds + object freq # frequency reference + bint is_populated # are my components populated + int64_t _sign, _d, _h, _m, _s, _ms, _us, _ns # higher than np.ndarray and np.matrix __array_priority__ = 100 + def __hash__(_Timedelta self): + if self._has_ns(): + return hash(self.value) + else: + return timedelta.__hash__(self) + + def __richcmp__(_Timedelta self, object other, int op): + cdef: + _Timedelta ots + int ndim + + if isinstance(other, _Timedelta): + ots = other + elif PyDelta_Check(other): + ots = Timedelta(other) + else: + ndim = getattr(other, "ndim", -1) + + if ndim != -1: + if ndim == 0: + if is_timedelta64_object(other): + other = Timedelta(other) + else: + if op == Py_EQ: + return False + elif op == Py_NE: + return True + + # only allow ==, != ops + raise TypeError('Cannot compare type %r with type %r' % + (type(self).__name__, + type(other).__name__)) + if util.is_array(other): + return PyObject_RichCompare(np.array([self]), other, op) + return PyObject_RichCompare(other, self, reverse_ops[op]) + else: + if op == Py_EQ: + return False + elif op == Py_NE: + return True + raise TypeError('Cannot compare type %r with type %r' % + (type(self).__name__, type(other).__name__)) + + return cmp_scalar(self.value, ots.value, op) + cpdef bint _has_ns(self): return self.value % 1000 != 0 @@ -621,3 +866,239 @@ cdef class _Timedelta(timedelta): tpl = 'P{td.days}DT{td.hours}H{td.minutes}M{seconds}S'.format( td=components, seconds=seconds) return tpl + + +# Python front end to C extension type _Timedelta +# This serves as the box for timedelta64 + +class Timedelta(_Timedelta): + """ + Represents a duration, the difference between two dates or times. + + Timedelta is the pandas equivalent of python's ``datetime.timedelta`` + and is interchangable with it in most cases. + + Parameters + ---------- + value : Timedelta, timedelta, np.timedelta64, string, or integer + unit : string, [D,h,m,s,ms,us,ns] + Denote the unit of the input, if input is an integer. Default 'ns'. + days, seconds, microseconds, + milliseconds, minutes, hours, weeks : numeric, optional + Values for construction in compat with datetime.timedelta. + np ints and floats will be coereced to python ints and floats. + + Notes + ----- + The ``.value`` attribute is always in ns. + + """ + def __new__(cls, object value=_no_input, unit=None, **kwargs): + cdef _Timedelta td_base + + if value is _no_input: + if not len(kwargs): + raise ValueError("cannot construct a Timedelta without a " + "value/unit or descriptive keywords " + "(days,seconds....)") + + kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs} + + nano = kwargs.pop('nanoseconds', 0) + try: + value = nano + convert_to_timedelta64(timedelta(**kwargs), + 'ns') + except TypeError as e: + raise ValueError("cannot construct a Timedelta from the " + "passed arguments, allowed keywords are " + "[weeks, days, hours, minutes, seconds, " + "milliseconds, microseconds, nanoseconds]") + + if isinstance(value, Timedelta): + value = value.value + elif util.is_string_object(value): + value = np.timedelta64(parse_timedelta_string(value)) + elif PyDelta_Check(value): + value = convert_to_timedelta64(value, 'ns') + elif is_timedelta64_object(value): + if unit is not None: + value = value.astype('timedelta64[{0}]'.format(unit)) + value = value.astype('timedelta64[ns]') + elif hasattr(value, 'delta'): + value = np.timedelta64(delta_to_nanoseconds(value.delta), 'ns') + elif is_integer_object(value) or util.is_float_object(value): + # unit=None is de-facto 'ns' + value = convert_to_timedelta64(value, unit) + elif _checknull_with_nat(value): + return NaT + else: + raise ValueError( + "Value must be Timedelta, string, integer, " + "float, timedelta or convertible") + + if is_timedelta64_object(value): + value = value.view('i8') + + # nat + if value == NPY_NAT: + return NaT + + # make timedelta happy + td_base = _Timedelta.__new__(cls, microseconds=int(value) / 1000) + td_base.value = value + td_base.is_populated = 0 + return td_base + + def __setstate__(self, state): + (value) = state + self.value = value + + def __reduce__(self): + object_state = self.value, + return (Timedelta, object_state) + + def _round(self, freq, rounder): + cdef: + int64_t result, unit + + from pandas.tseries.frequencies import to_offset + unit = to_offset(freq).nanos + result = unit * rounder(self.value / float(unit)) + return Timedelta(result, unit='ns') + + def round(self, freq): + """ + Round the Timedelta to the specified resolution + + Returns + ------- + a new Timedelta rounded to the given resolution of `freq` + + Parameters + ---------- + freq : a freq string indicating the rounding resolution + + Raises + ------ + ValueError if the freq cannot be converted + """ + return self._round(freq, np.round) + + def floor(self, freq): + """ + return a new Timedelta floored to this resolution + + Parameters + ---------- + freq : a freq string indicating the flooring resolution + """ + return self._round(freq, np.floor) + + def ceil(self, freq): + """ + return a new Timedelta ceiled to this resolution + + Parameters + ---------- + freq : a freq string indicating the ceiling resolution + """ + return self._round(freq, np.ceil) + + # ---------------------------------------------------------------- + # Arithmetic Methods + # TODO: Can some of these be defined in the cython class? + + __inv__ = _op_unary_method(lambda x: -x, '__inv__') + __neg__ = _op_unary_method(lambda x: -x, '__neg__') + __pos__ = _op_unary_method(lambda x: x, '__pos__') + __abs__ = _op_unary_method(lambda x: abs(x), '__abs__') + + __add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__') + __radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__') + __sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__') + __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__') + + def __mul__(self, other): + if hasattr(other, 'dtype'): + # ndarray-like + return other * self.to_timedelta64() + + elif other is NaT: + return NaT + + elif not (is_integer_object(other) or is_float_object(other)): + # only integers and floats allowed + return NotImplemented + + return Timedelta(other * self.value, unit='ns') + + __rmul__ = __mul__ + + def __truediv__(self, other): + if hasattr(other, 'dtype'): + return self.to_timedelta64() / other + + elif is_integer_object(other) or is_float_object(other): + # integers or floats + return Timedelta(self.value / other, unit='ns') + + elif not _validate_ops_compat(other): + return NotImplemented + + other = Timedelta(other) + if other is NaT: + return np.nan + return self.value / float(other.value) + + def __rtruediv__(self, other): + if hasattr(other, 'dtype'): + return other / self.to_timedelta64() + + elif not _validate_ops_compat(other): + return NotImplemented + + other = Timedelta(other) + if other is NaT: + return NaT + return float(other.value) / self.value + + if not PY3: + __div__ = __truediv__ + __rdiv__ = __rtruediv__ + + def __floordiv__(self, other): + if hasattr(other, 'dtype'): + # work with i8 + other = other.astype('m8[ns]').astype('i8') + return self.value // other + + elif is_integer_object(other): + # integers only + return Timedelta(self.value // other, unit='ns') + + elif not _validate_ops_compat(other): + return NotImplemented + + other = Timedelta(other) + if other is NaT: + return np.nan + return self.value // other.value + + def __rfloordiv__(self, other): + if hasattr(other, 'dtype'): + # work with i8 + other = other.astype('m8[ns]').astype('i8') + return other // self.value + + elif not _validate_ops_compat(other): + return NotImplemented + + other = Timedelta(other) + if other is NaT: + return NaT + return other.value // self.value + + +# resolution in ns +Timedelta.min = Timedelta(np.iinfo(np.int64).min +1) +Timedelta.max = Timedelta(np.iinfo(np.int64).max) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index ebc0d50d8ba05..4934ccb49b844 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -24,8 +24,9 @@ from pandas.core.common import AbstractMethodError import pandas.io.formats.printing as printing -from pandas._libs import (tslib as libts, lib, iNaT, NaT) +from pandas._libs import lib, iNaT, NaT from pandas._libs.period import Period +from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds from pandas.core.indexes.base import Index, _index_shared_docs from pandas.util._decorators import Appender, cache_readonly @@ -701,7 +702,7 @@ def _add_delta_td(self, other): # add a delta of a timedeltalike # return the i8 result view - inc = libts._delta_to_nanoseconds(other) + inc = delta_to_nanoseconds(other) new_values = checked_add_with_arr(self.asi8, inc, arr_mask=self._isnan).view('i8') if self.hasnans: diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index c4938b556c8dd..bd069c1d22403 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -36,6 +36,7 @@ get_period_field_arr, _validate_end_alias, _quarter_to_myear) from pandas._libs.tslibs.fields import isleapyear_arr +from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds from pandas.core.base import _shared_docs from pandas.core.indexes.base import _index_shared_docs, _ensure_index @@ -652,10 +653,10 @@ def _maybe_convert_timedelta(self, other): offset = frequencies.to_offset(self.freq.rule_code) if isinstance(offset, offsets.Tick): if isinstance(other, np.ndarray): - nanos = np.vectorize(tslib._delta_to_nanoseconds)(other) + nanos = np.vectorize(delta_to_nanoseconds)(other) else: - nanos = tslib._delta_to_nanoseconds(other) - offset_nanos = tslib._delta_to_nanoseconds(offset) + nanos = delta_to_nanoseconds(other) + offset_nanos = delta_to_nanoseconds(offset) check = np.all(nanos % offset_nanos == 0) if check: return nanos // offset_nanos @@ -672,8 +673,8 @@ def _maybe_convert_timedelta(self, other): elif is_timedelta64_dtype(other): offset = frequencies.to_offset(self.freq) if isinstance(offset, offsets.Tick): - nanos = tslib._delta_to_nanoseconds(other) - offset_nanos = tslib._delta_to_nanoseconds(offset) + nanos = delta_to_nanoseconds(other) + offset_nanos = delta_to_nanoseconds(offset) if (nanos % offset_nanos).all() == 0: return nanos // offset_nanos elif is_integer(other): diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 729edc81bb642..c9701d0d8dae8 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -34,6 +34,7 @@ from pandas.tseries.offsets import Tick, DateOffset from pandas._libs import (lib, index as libindex, tslib as libts, join as libjoin, Timedelta, NaT, iNaT) +from pandas._libs.tslibs.timedeltas import array_to_timedelta64 def _td_index_cmp(opname, nat_result=False): @@ -286,7 +287,7 @@ def _box_func(self): def _simple_new(cls, values, name=None, freq=None, **kwargs): values = np.array(values, copy=False) if values.dtype == np.object_: - values = libts.array_to_timedelta64(values) + values = array_to_timedelta64(values) if values.dtype != _TD_DTYPE: values = _ensure_int64(values).view(_TD_DTYPE) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 5a571f9077999..eeb6faf20ffce 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -13,7 +13,7 @@ from pandas.tseries.frequencies import to_offset, is_subperiod, is_superperiod from pandas.core.indexes.datetimes import DatetimeIndex, date_range from pandas.core.indexes.timedeltas import TimedeltaIndex -from pandas.tseries.offsets import DateOffset, Tick, Day, _delta_to_nanoseconds +from pandas.tseries.offsets import DateOffset, Tick, Day, delta_to_nanoseconds from pandas.core.indexes.period import PeriodIndex import pandas.core.common as com import pandas.core.algorithms as algos @@ -1186,7 +1186,7 @@ def _adjust_bin_edges(self, binner, ax_values): bin_edges = binner.asi8 if self.freq != 'D' and is_superperiod(self.freq, 'D'): - day_nanos = _delta_to_nanoseconds(timedelta(1)) + day_nanos = delta_to_nanoseconds(timedelta(1)) if self.closed == 'right': bin_edges = bin_edges + day_nanos - 1 @@ -1312,7 +1312,7 @@ def _get_range_edges(first, last, offset, closed='left', base=0): if isinstance(offset, Tick): is_day = isinstance(offset, Day) - day_nanos = _delta_to_nanoseconds(timedelta(1)) + day_nanos = delta_to_nanoseconds(timedelta(1)) # #1165 if (is_day and day_nanos % offset.nanos == 0) or not is_day: diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index f61d9f90d6ca2..94e2f2342bd51 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -5,6 +5,8 @@ import numpy as np import pandas as pd import pandas._libs.tslib as tslib +from pandas._libs.tslibs.timedeltas import (convert_to_timedelta64, + array_to_timedelta64) from pandas.core.dtypes.common import ( _ensure_object, @@ -140,7 +142,7 @@ def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, errors='raise'): """Convert string 'r' to a timedelta object.""" try: - result = tslib.convert_to_timedelta64(r, unit) + result = convert_to_timedelta64(r, unit) except ValueError: if errors == 'raise': raise @@ -169,8 +171,8 @@ def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None): 'timedelta64[ns]', copy=False) else: try: - value = tslib.array_to_timedelta64(_ensure_object(arg), - unit=unit, errors=errors) + value = array_to_timedelta64(_ensure_object(arg), + unit=unit, errors=errors) value = value.astype('timedelta64[ns]', copy=False) except ValueError: if errors == 'ignore': diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 862f289d81954..5843aaa23be57 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -14,7 +14,7 @@ from pandas._libs import tslib, Timestamp, OutOfBoundsDatetime, Timedelta from pandas.util._decorators import cache_readonly -from pandas._libs.tslib import _delta_to_nanoseconds +from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds from pandas._libs.tslibs.offsets import ( ApplyTypeError, as_datetime, _is_normalized, @@ -2569,7 +2569,7 @@ def delta(self): @property def nanos(self): - return _delta_to_nanoseconds(self.delta) + return delta_to_nanoseconds(self.delta) def apply(self, other): # Timestamp can handle tz and nano sec, thus no need to use apply_wraps @@ -2612,7 +2612,7 @@ def _delta_to_tick(delta): else: return Second(seconds) else: - nanos = _delta_to_nanoseconds(delta) + nanos = delta_to_nanoseconds(delta) if nanos % 1000000 == 0: return Milli(nanos // 1000000) elif nanos % 1000 == 0: diff --git a/setup.py b/setup.py index f5c27eb3498c5..572c426f26ae3 100755 --- a/setup.py +++ b/setup.py @@ -525,6 +525,7 @@ def pxd(name): 'pyxfile': '_libs/period', 'pxdfiles': ['_libs/src/util', '_libs/lib', + '_libs/tslibs/timedeltas', '_libs/tslibs/timezones', '_libs/tslibs/nattype'], 'depends': tseries_depends + ['pandas/_libs/src/period_helper.h'], @@ -587,7 +588,8 @@ def pxd(name): 'sources': np_datetime_sources}, '_libs.tslibs.timedeltas': { 'pyxfile': '_libs/tslibs/timedeltas', - 'pxdfiles': ['_libs/src/util'], + 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/nattype'], 'depends': np_datetime_headers, 'sources': np_datetime_sources}, '_libs.tslibs.timezones': {