From 13336d9de972b029a066800c3953136a17644f08 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 29 Nov 2018 14:26:08 -0800 Subject: [PATCH 1/6] WIP: fix array_to_datetime --- pandas/_libs/tslib.pyx | 159 +++++++++++-------- pandas/_libs/tslibs/conversion.pyx | 3 + pandas/_libs/tslibs/timezones.pxd | 2 + pandas/_libs/tslibs/timezones.pyx | 2 +- pandas/core/dtypes/cast.py | 14 +- pandas/core/tools/datetimes.py | 139 ++++++++-------- pandas/tests/indexes/datetimes/test_tools.py | 4 +- 7 files changed, 182 insertions(+), 141 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index ddeaffbfb3cc0..6b5b927f92015 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -32,7 +32,9 @@ from tslibs.np_datetime import OutOfBoundsDatetime from tslibs.parsing import parse_datetime_string from tslibs.timedeltas cimport cast_from_unit -from tslibs.timezones cimport is_utc, is_tzlocal, get_dst_info +from tslibs.timezones cimport ( + is_utc, is_tzlocal, get_dst_info, tz_cache_key, get_utcoffset, + is_fixed_offset, tz_compare, get_timezone) from tslibs.timezones import UTC from tslibs.conversion cimport (tz_convert_single, _TSObject, convert_datetime_to_tsobject, @@ -459,12 +461,60 @@ def array_with_unit_to_datetime(ndarray values, object unit, return oresult +cdef get_key(tz): + if tz is None: + return None + if is_fixed_offset(tz): + # TODO: these should all be mapped together + try: + # pytz + return str(tz._minutes) # pytz specific? + except AttributeError: + try: + # dateutil.tz.tzoffset + return str(tz._offset.total_seconds()) + except AttributeError: + return str(tz) + return tz_cache_key(tz) + + +cdef fixed_offset_to_pytz(tz): + """ + If we have a FixedOffset, ensure it is a pytz fixed offset + """ + if is_fixed_offset(tz): + # tests expect pytz, not dateutil... + if tz is pytz.utc: + pass + elif hasattr(tz, '_minutes'): + # i.e. pytz + pass # TODO: use the treat_as_pytz method? + elif hasattr(tz, '_offset'): + # i.e. dateutil # TODO: use the treat_as_dateutil method? + secs = tz._offset.total_seconds() + assert secs % 60 == 0, secs + tz = pytz.FixedOffset(secs / 60) + else: + # e.g. custom FixedOffset implemented in tests + pass + # TODO: using the below breaks some tests and fixes others + # off = get_utcoffset(tz, Timestamp.now()) + # secs = off.total_seconds() + # assert secs % 60 == 0, secs + # tz = pytz.FixedOffset(secs / 60) + + elif is_utc(tz): + # if we have a dateutil UTC (or stdlib), change to pytz to make + # tests happy + tz = pytz.utc + return tz + + @cython.wraparound(False) @cython.boundscheck(False) cpdef array_to_datetime(ndarray[object] values, str errors='raise', bint dayfirst=False, bint yearfirst=False, - object format=None, object utc=None, - bint require_iso8601=False): + object utc=None, bint require_iso8601=False): """ Converts a 1D array of date-like values to a numpy array of either: 1) datetime64[ns] data @@ -488,8 +538,6 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', dayfirst parsing behavior when encountering datetime strings yearfirst : bool, default False yearfirst parsing behavior when encountering datetime strings - format : str, default None - format of the string to parse utc : bool, default None indicator whether the dates should be UTC require_iso8601 : bool, default False @@ -507,9 +555,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', npy_datetimestruct dts bint utc_convert = bool(utc) bint seen_integer = 0 - bint seen_string = 0 bint seen_datetime = 0 - bint seen_datetime_offset = 0 bint is_raise = errors=='raise' bint is_ignore = errors=='ignore' bint is_coerce = errors=='coerce' @@ -517,15 +563,14 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', _TSObject _ts int64_t value int out_local=0, out_tzoffset=0 - float offset_seconds, tz_offset - set out_tzoffset_vals = set() + dict out_tzinfos = {} # specify error conditions assert is_raise or is_ignore or is_coerce + result = np.empty(n, dtype='M8[ns]') + iresult = result.view('i8') try: - result = np.empty(n, dtype='M8[ns]') - iresult = result.view('i8') for i in range(n): val = values[i] @@ -534,34 +579,18 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', elif PyDateTime_Check(val): seen_datetime = 1 - if val.tzinfo is not None: - if utc_convert: - try: - _ts = convert_datetime_to_tsobject(val, None) - iresult[i] = _ts.value - except OutOfBoundsDatetime: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - else: - raise ValueError('Tz-aware datetime.datetime cannot ' - 'be converted to datetime64 unless ' - 'utc=True') - else: - iresult[i] = pydatetime_to_dt64(val, &dts) - if not PyDateTime_CheckExact(val): - # i.e. a Timestamp object - iresult[i] += val.nanosecond - try: - check_dts_bounds(&dts) - except OutOfBoundsDatetime: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise + out_tzinfos[get_key(val.tzinfo)] = val.tzinfo + try: + _ts = convert_datetime_to_tsobject(val, None) + iresult[i] = _ts.value + except OutOfBoundsDatetime: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise elif PyDate_Check(val): + # Treating as either naive or UTC seen_datetime = 1 iresult[i] = pydate_to_dt64(val, &dts) try: @@ -573,17 +602,15 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', raise elif is_datetime64_object(val): + # Treating as either naive or UTC seen_datetime = 1 - if get_datetime64_value(val) == NPY_NAT: - iresult[i] = NPY_NAT - else: - try: - iresult[i] = get_datetime64_nanos(val) - except OutOfBoundsDatetime: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise + try: + iresult[i] = get_datetime64_nanos(val) + except OutOfBoundsDatetime: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise elif is_integer_object(val) or is_float_object(val): # these must be ns unit by-definition @@ -606,11 +633,11 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', elif is_string_object(val): # string - seen_string = 1 if len(val) == 0 or val in nat_strings: iresult[i] = NPY_NAT continue + if isinstance(val, unicode) and PY2: val = val.encode('utf-8') @@ -620,6 +647,8 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', # A ValueError at this point is a _parsing_ error # specifically _not_ OutOfBoundsDatetime if _parse_today_now(val, &iresult[i]): + # TODO: Do we treat this as local? + # "now" is UTC, "today" is local continue elif require_iso8601: # if requiring iso8601 strings, skip trying @@ -645,15 +674,8 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', # If the dateutil parser returned tzinfo, capture it # to check if all arguments have the same tzinfo tz = py_dt.utcoffset() - if tz is not None: - seen_datetime_offset = 1 - # dateutil timezone objects cannot be hashed, so store - # the UTC offsets in seconds instead - out_tzoffset_vals.add(tz.total_seconds()) - else: - # Add a marker for naive string, to track if we are - # parsing mixed naive and aware strings - out_tzoffset_vals.add('naive') + out_tzinfos[get_key(py_dt.tzinfo)] = py_dt.tzinfo + try: _ts = convert_datetime_to_tsobject(py_dt, None) iresult[i] = _ts.value @@ -673,17 +695,17 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', # where we left off value = dtstruct_to_dt64(&dts) if out_local == 1: - seen_datetime_offset = 1 # Store the out_tzoffset in seconds # since we store the total_seconds of # dateutil.tz.tzoffset objects - out_tzoffset_vals.add(out_tzoffset * 60.) tz = pytz.FixedOffset(out_tzoffset) + out_tzinfos[get_key(tz)] = tz value = tz_convert_single(value, tz, UTC) else: # Add a marker for naive string, to track if we are # parsing mixed naive and aware strings - out_tzoffset_vals.add('naive') + out_tzinfos[None] = None + iresult[i] = value try: check_dts_bounds(&dts) @@ -725,21 +747,28 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', else: raise TypeError - if seen_datetime_offset and not utc_convert: + # TODO: File bug report with cython. it raises + # Closures Not Supported + # error when I tried to use + # `if any(key is not None for key in out_tzinfos)` + keys = out_tzinfos.keys() + nnkeys = [x for x in keys if x is not None] + if len(nnkeys) and not utc_convert: # GH 17697 # 1) If all the offsets are equal, return one offset for # the parsed dates to (maybe) pass to DatetimeIndex # 2) If the offsets are different, then force the parsing down the # object path where an array of datetimes # (with individual dateutil.tzoffsets) are returned - is_same_offsets = len(out_tzoffset_vals) == 1 + is_same_offsets = len(out_tzinfos) == 1 if not is_same_offsets: return array_to_datetime_object(values, is_raise, dayfirst, yearfirst) else: - tz_offset = out_tzoffset_vals.pop() - tz_out = pytz.FixedOffset(tz_offset / 60.) + tz_out = list(out_tzinfos.values())[0] + tz_out = fixed_offset_to_pytz(tz_out) return result, tz_out + except OutOfBoundsDatetime: if is_raise: raise @@ -819,6 +848,8 @@ cdef array_to_datetime_object(ndarray[object] values, bint is_raise, if is_raise: raise return values, None + elif PyDateTime_Check(val): + oresult[i] = val # TODO: check_dts_bounds? else: if is_raise: raise diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index bf5429c39e8fe..2ddc55826f562 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -65,6 +65,9 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1: unit = get_datetime64_unit(val) ival = get_datetime64_value(val) + if ival == NPY_NAT: + return ival + if unit != NPY_FR_ns: pandas_datetime_to_datetimestruct(ival, unit, &dts) check_dts_bounds(&dts) diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 50c4a41f97a82..20623b5329336 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -14,3 +14,5 @@ cdef get_utcoffset(tzinfo, obj) cdef bint is_fixed_offset(object tz) cdef object get_dst_info(object tz) + +cpdef object tz_cache_key(object tz) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 5fa8a45af3083..62633af0974a2 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -112,7 +112,7 @@ def _p_tz_cache_key(tz): dst_cache = {} -cdef inline object tz_cache_key(object tz): +cpdef object tz_cache_key(object tz): """ Return the key in the cache for the timezone info object or None if unknown. diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index eae9eb97f35fe..39f95b9cd1685 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -877,7 +877,7 @@ def maybe_infer_to_datetimelike(value, convert_dates=False): return value shape = v.shape - if not v.ndim == 1: + if v.ndim != 1: v = v.ravel() if not len(v): @@ -887,9 +887,9 @@ def try_datetime(v): # safe coerce to datetime64 try: # GH19671 - v = tslib.array_to_datetime(v, - require_iso8601=True, - errors='raise')[0] + v, inferred_tz = tslib.array_to_datetime(v, + require_iso8601=True, + errors='raise') except ValueError: # we might have a sequence of the same-datetimes with tz's @@ -902,11 +902,17 @@ def try_datetime(v): values, tz = conversion.datetime_to_datetime64(v) return DatetimeIndex(values).tz_localize( 'UTC').tz_convert(tz=tz) + # TODO: possibly reshape? except (ValueError, TypeError): pass except Exception: pass + else: + if inferred_tz is not None: # TODO: de-duplicate with to_datetime + from pandas import DatetimeIndex + return DatetimeIndex(v).tz_localize('UTC').tz_convert(tz=inferred_tz) + # TODO: possibly reshape? return v.reshape(shape) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index ee44a64514f4f..4afecd143d568 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -5,7 +5,7 @@ import numpy as np from pandas._libs import tslib, tslibs -from pandas._libs.tslibs import Timestamp, conversion, parsing +from pandas._libs.tslibs import Timestamp, parsing from pandas._libs.tslibs.parsing import ( # noqa DateParseError, _format_is_iso, _guess_datetime_format, parse_time_string) from pandas._libs.tslibs.strptime import array_strptime @@ -134,7 +134,7 @@ def _return_parsed_timezone_results(result, timezones, box, tz, name): def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, unit=None, errors=None, infer_datetime_format=None, dayfirst=None, - yearfirst=None, exact=None): + yearfirst=None, exact=None, allow_object=True): """ Helper function for to_datetime. Performs the conversions of 1D listlike of dates @@ -231,80 +231,77 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, require_iso8601 = not infer_datetime_format format = None - try: - result = None + tz_parsed = None + result = None - if format is not None: - # shortcut formatting here - if format == '%Y%m%d': - try: - # pass orig_arg as float-dtype may have been converted to - # datetime64[ns] - orig_arg = ensure_object(orig_arg) - result = _attempt_YYYYMMDD(orig_arg, errors=errors) - except (ValueError, TypeError, tslibs.OutOfBoundsDatetime): - raise ValueError("cannot convert the input to " - "'%Y%m%d' date format") - - # fallback - if result is None: - try: - result, timezones = array_strptime( - arg, format, exact=exact, errors=errors) - if '%Z' in format or '%z' in format: - return _return_parsed_timezone_results( - result, timezones, box, tz, name) - except tslibs.OutOfBoundsDatetime: + if format is not None: + # shortcut formatting here + if format == '%Y%m%d': + try: + # pass orig_arg as float-dtype may have been converted to + # datetime64[ns] + orig_arg = ensure_object(orig_arg) + result = _attempt_YYYYMMDD(orig_arg, errors=errors) + except (ValueError, TypeError, tslibs.OutOfBoundsDatetime): + raise ValueError("cannot convert the input to " + "'%Y%m%d' date format") + + # fallback + if result is None: + try: + result, timezones = array_strptime( + arg, format, exact=exact, errors=errors) + if '%Z' in format or '%z' in format: + return _return_parsed_timezone_results( + result, timezones, box, tz, name) + except tslibs.OutOfBoundsDatetime: + if errors == 'raise': + raise + result = arg + except ValueError: + # if format was inferred, try falling back + # to array_to_datetime - terminate here + # for specified formats + if not infer_datetime_format: if errors == 'raise': raise result = arg - except ValueError: - # if format was inferred, try falling back - # to array_to_datetime - terminate here - # for specified formats - if not infer_datetime_format: - if errors == 'raise': - raise - result = arg - - if result is None and (format is None or infer_datetime_format): - result, tz_parsed = tslib.array_to_datetime( - arg, - errors=errors, - utc=tz == 'utc', - dayfirst=dayfirst, - yearfirst=yearfirst, - require_iso8601=require_iso8601 - ) - if tz_parsed is not None: - if box: - # We can take a shortcut since the datetime64 numpy array - # is in UTC - return DatetimeIndex._simple_new(result, name=name, - tz=tz_parsed) - else: - # Convert the datetime64 numpy array to an numpy array - # of datetime objects - result = [Timestamp(ts, tz=tz_parsed).to_pydatetime() - for ts in result] - return np.array(result, dtype=object) + if result is None: + assert format is None or infer_datetime_format + result, tz_parsed = tslib.array_to_datetime( + arg, + errors=errors, + utc=tz == 'utc', + dayfirst=dayfirst, + yearfirst=yearfirst, + require_iso8601=require_iso8601 + ) + + if tz_parsed is not None: if box: - # Ensure we return an Index in all cases where box=True - if is_datetime64_dtype(result): - return DatetimeIndex(result, tz=tz, name=name) - elif is_object_dtype(result): - # e.g. an Index of datetime objects - from pandas import Index - return Index(result, name=name) - return result + # We can take a shortcut since the datetime64 numpy array + # is in UTC + return DatetimeIndex._simple_new(result, name=name, + tz=tz_parsed) + else: + # Convert the datetime64 numpy array to an numpy array + # of datetime objects + result = [Timestamp(ts, tz=tz_parsed).to_pydatetime() + for ts in result] + return np.array(result, dtype=object) - except ValueError as e: - try: - values, tz = conversion.datetime_to_datetime64(arg) - return DatetimeIndex._simple_new(values, name=name, tz=tz) - except (ValueError, TypeError): - raise e + if box: + # Ensure we return an Index in all cases where box=True + if is_datetime64_dtype(result): + return DatetimeIndex(result, tz=tz, name=name) + elif is_object_dtype(result): + # e.g. an Index of datetime objects + if not allow_object: + raise ValueError("Mixed timezones (including naive)") + from pandas import Index + return Index(result, name=name) + return result def _adjust_to_origin(arg, origin, unit): @@ -383,7 +380,7 @@ def _adjust_to_origin(arg, origin, unit): def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, utc=None, box=True, format=None, exact=True, unit=None, infer_datetime_format=False, origin='unix', - cache=False): + cache=False, allow_object=True): """ Convert argument to datetime. @@ -556,7 +553,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, convert_listlike = partial(_convert_listlike_datetimes, tz=tz, unit=unit, dayfirst=dayfirst, yearfirst=yearfirst, errors=errors, exact=exact, - infer_datetime_format=infer_datetime_format) + infer_datetime_format=infer_datetime_format, allow_object=allow_object) if isinstance(arg, Timestamp): result = arg diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index c24c1025ea63c..c032c7a0fa25f 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -397,7 +397,9 @@ def test_to_datetime_tz(self, cache): # mixed tzs will raise arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')] - pytest.raises(ValueError, lambda: pd.to_datetime(arr, cache=cache)) + + with pytest.raises(ValueError): + pd.to_datetime(arr, cache=cache, allow_object=False) @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_tz_pytz(self, cache): From f0dccc7e9be68d29deeb883a20ff58b83e42c076 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 29 Nov 2018 15:23:29 -0800 Subject: [PATCH 2/6] remove no-longer-needed --- pandas/_libs/tslibs/conversion.pyx | 54 +----------------------------- pandas/core/dtypes/cast.py | 22 +++--------- 2 files changed, 5 insertions(+), 71 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 31ea50a894c20..354bc9d7297a7 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -40,7 +40,7 @@ from timezones import UTC from parsing import parse_datetime_string from nattype import nat_strings, NaT -from nattype cimport NPY_NAT, checknull_with_nat +from nattype cimport NPY_NAT # ---------------------------------------------------------------------- # Constants @@ -146,58 +146,6 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool=True): # TODO: check for overflows when going from a lower-resolution to nanos -@cython.boundscheck(False) -@cython.wraparound(False) -def datetime_to_datetime64(values: object[:]): - """ - Convert ndarray of datetime-like objects to int64 array representing - nanosecond timestamps. - - Parameters - ---------- - values : ndarray[object] - - Returns - ------- - result : ndarray[int64_t] - inferred_tz : tzinfo or None - """ - cdef: - Py_ssize_t i, n = len(values) - object val, inferred_tz = None - int64_t[:] iresult - npy_datetimestruct dts - _TSObject _ts - - result = np.empty(n, dtype='M8[ns]') - iresult = result.view('i8') - for i in range(n): - val = values[i] - if checknull_with_nat(val): - iresult[i] = NPY_NAT - elif PyDateTime_Check(val): - if val.tzinfo is not None: - if inferred_tz is not None: - if not tz_compare(val.tzinfo, inferred_tz): - raise ValueError('Array must be all same time zone') - else: - inferred_tz = get_timezone(val.tzinfo) - - _ts = convert_datetime_to_tsobject(val, None) - iresult[i] = _ts.value - check_dts_bounds(&_ts.dts) - else: - if inferred_tz is not None: - raise ValueError('Cannot mix tz-aware with ' - 'tz-naive values') - iresult[i] = pydatetime_to_dt64(val, &dts) - check_dts_bounds(&dts) - else: - raise TypeError('Unrecognized value type: %s' % type(val)) - - return result, inferred_tz - - cdef inline maybe_datetimelike_to_i8(object val): """ Try to convert to a nanosecond timestamp. Fall back to returning the diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 39f95b9cd1685..5660ff872da7d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -890,28 +890,14 @@ def try_datetime(v): v, inferred_tz = tslib.array_to_datetime(v, require_iso8601=True, errors='raise') - except ValueError: - - # we might have a sequence of the same-datetimes with tz's - # if so coerce to a DatetimeIndex; if they are not the same, - # then these stay as object dtype, xref GH19671 - try: - from pandas._libs.tslibs import conversion - from pandas import DatetimeIndex - - values, tz = conversion.datetime_to_datetime64(v) - return DatetimeIndex(values).tz_localize( - 'UTC').tz_convert(tz=tz) - # TODO: possibly reshape? - except (ValueError, TypeError): - pass - except Exception: pass else: - if inferred_tz is not None: # TODO: de-duplicate with to_datetime + if inferred_tz is not None: + # TODO: de-duplicate with to_datetime from pandas import DatetimeIndex - return DatetimeIndex(v).tz_localize('UTC').tz_convert(tz=inferred_tz) + dti = DatetimeIndex(v).tz_localize('UTC') + return dti.tz_convert(tz=inferred_tz) # TODO: possibly reshape? return v.reshape(shape) From 1b36e6f76aeec06989e3d91d2edee68924dc101d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 29 Nov 2018 20:38:46 -0800 Subject: [PATCH 3/6] fix tests, make timezone funcs stricter --- pandas/_libs/tslib.pyx | 35 ++----------------- pandas/_libs/tslibs/timezones.pxd | 2 +- pandas/_libs/tslibs/timezones.pyx | 28 +++++++++++++-- pandas/core/series.py | 4 ++- .../indexes/datetimes/test_construction.py | 28 ++++++++------- pandas/tests/tslibs/test_array_to_datetime.py | 8 +++-- pandas/util/testing.py | 18 ++++++++-- 7 files changed, 67 insertions(+), 56 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 6b5b927f92015..9e1e126bee677 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -475,41 +475,11 @@ cdef get_key(tz): return str(tz._offset.total_seconds()) except AttributeError: return str(tz) + if is_utc(tz): + return 'UTC' return tz_cache_key(tz) -cdef fixed_offset_to_pytz(tz): - """ - If we have a FixedOffset, ensure it is a pytz fixed offset - """ - if is_fixed_offset(tz): - # tests expect pytz, not dateutil... - if tz is pytz.utc: - pass - elif hasattr(tz, '_minutes'): - # i.e. pytz - pass # TODO: use the treat_as_pytz method? - elif hasattr(tz, '_offset'): - # i.e. dateutil # TODO: use the treat_as_dateutil method? - secs = tz._offset.total_seconds() - assert secs % 60 == 0, secs - tz = pytz.FixedOffset(secs / 60) - else: - # e.g. custom FixedOffset implemented in tests - pass - # TODO: using the below breaks some tests and fixes others - # off = get_utcoffset(tz, Timestamp.now()) - # secs = off.total_seconds() - # assert secs % 60 == 0, secs - # tz = pytz.FixedOffset(secs / 60) - - elif is_utc(tz): - # if we have a dateutil UTC (or stdlib), change to pytz to make - # tests happy - tz = pytz.utc - return tz - - @cython.wraparound(False) @cython.boundscheck(False) cpdef array_to_datetime(ndarray[object] values, str errors='raise', @@ -766,7 +736,6 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', dayfirst, yearfirst) else: tz_out = list(out_tzinfos.values())[0] - tz_out = fixed_offset_to_pytz(tz_out) return result, tz_out except OutOfBoundsDatetime: diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 20623b5329336..4d023232f9275 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -6,7 +6,7 @@ cdef bint is_tzlocal(object tz) cdef bint treat_tz_as_pytz(object tz) cdef bint treat_tz_as_dateutil(object tz) -cpdef bint tz_compare(object start, object end) +cpdef bint tz_compare(object start, object end) except? -1 cpdef object get_timezone(object tz) cpdef object maybe_get_tz(object tz) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 62633af0974a2..17d44835bf4c0 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -2,6 +2,8 @@ from cython import Py_ssize_t +from cpython.datetime cimport tzinfo + # dateutil compat from dateutil.tz import ( tzutc as _dateutil_tzutc, @@ -156,8 +158,22 @@ cdef get_utcoffset(tzinfo, obj): return tzinfo.utcoffset(obj) +cdef get_fixed_offset_total_seconds(tzinfo tz): + """ + For compat between pytz.FixedOffset, dateutil.tz.tzoffset + """ + if hasattr(tz, "_offset"): + # dateutil, pytz + return tz._offset.total_seconds() + else: + # TODO: Will it ever want an actual datetime? + return tz.utcoffset(None) + + cdef inline bint is_fixed_offset(object tz): - if treat_tz_as_dateutil(tz): + if tz is None: + return 0 + elif treat_tz_as_dateutil(tz): if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0: return 1 else: @@ -168,7 +184,9 @@ cdef inline bint is_fixed_offset(object tz): return 1 else: return 0 - return 1 + if not isinstance(tz, tzinfo): + return 0 + return 1 # TODO: No! cdef object get_utc_trans_times_from_dateutil_tz(object tz): @@ -293,7 +311,7 @@ def infer_tzinfo(start, end): return tz -cpdef bint tz_compare(object start, object end): +cpdef bint tz_compare(object start, object end) except? -1: """ Compare string representations of timezones @@ -319,6 +337,10 @@ cpdef bint tz_compare(object start, object end): """ # GH 18523 + if is_fixed_offset(start) and is_fixed_offset(end): + start_seconds = get_fixed_offset_total_seconds(start) + end_seconds = get_fixed_offset_total_seconds(end) + return start_seconds == end_seconds return get_timezone(start) == get_timezone(end) diff --git a/pandas/core/series.py b/pandas/core/series.py index 0d6c9f4d845da..c4b283c919a44 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -366,9 +366,11 @@ def _set_axis(self, axis, labels, fastpath=False): # need to set here because we changed the index if fastpath: self._data.set_axis(axis, labels) - except (tslibs.OutOfBoundsDatetime, ValueError): + except (tslibs.OutOfBoundsDatetime, ValueError, TypeError): # labels may exceeds datetime bounds, # or not be a DatetimeIndex + # GH#24006 TypeError can occur when all entries are + # datetimes but they do not have matching timezones pass self._set_subtyp(is_all_dates) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 5de79044bc239..60ea595db6d2d 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -277,34 +277,36 @@ def test_construction_dti_with_mixed_timezones(self): tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) - # different tz coerces tz-naive to tz-awareIndex(dtype=object) - result = DatetimeIndex([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00', - tz='US/Eastern')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 05:00'), - Timestamp('2011-01-02 10:00')], - tz='US/Eastern', name='idx') - tm.assert_index_equal(result, exp, exact=True) - assert isinstance(result, DatetimeIndex) + # tzaware/tznaive mismatch raises + with pytest.raises(TypeError): + # TODO: The exception message is not so useful + # "Cannot cast Index to dtype " + DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + name='idx') # tz mismatch affecting to tz-aware raises TypeError/ValueError - with pytest.raises(ValueError): + with pytest.raises(TypeError): DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') - with pytest.raises(TypeError, match='data is already tz-aware'): + # FIXME: The exception message here and below used to have + # match='data is already tz-aware' + # but it now has an un-helpful message because it is raising + # in an unintentional place + with pytest.raises(TypeError): DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='Asia/Tokyo', name='idx') - with pytest.raises(ValueError): + with pytest.raises(TypeError): DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='US/Eastern', name='idx') - with pytest.raises(TypeError, match='data is already tz-aware'): + with pytest.raises(TypeError): # passing tz should results in DatetimeIndex, then mismatch raises # TypeError Index([pd.NaT, Timestamp('2011-01-01 10:00'), diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index ff8880257b225..fffb72c31f15e 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -7,6 +7,7 @@ import pytz from pandas._libs import iNaT, tslib +from pandas._libs.tslibs.timezones import is_utc, tz_compare from pandas.compat.numpy import np_array_datetime64_compat import pandas.util.testing as tm @@ -84,15 +85,18 @@ def test_parsing_timezone_offsets(self, dt_string, expected_tz): arr = np.array([dt_string], dtype=object) result, result_tz = tslib.array_to_datetime(arr) tm.assert_numpy_array_equal(result, expected) - assert result_tz is expected_tz + + # in some cases result_tz ends up as a dateutil fixed offset + assert tz_compare(result_tz, expected_tz) def test_parsing_non_iso_timezone_offset(self): + # ends up parsed by dateutil, result has dateutil's tzutc() tzinfo dt_string = '01-01-2013T00:00:00.000000000+0000' arr = np.array([dt_string], dtype=object) result, result_tz = tslib.array_to_datetime(arr) expected = np.array([np.datetime64('2013-01-01 00:00:00.000000000')]) tm.assert_numpy_array_equal(result, expected) - assert result_tz is pytz.FixedOffset(0) + assert is_utc(result_tz) def test_parsing_different_timezone_offsets(self): # GH 17697 diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 24aff12e64192..f029092edcbf3 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1,7 +1,7 @@ from __future__ import division from contextlib import contextmanager -from datetime import datetime +from datetime import datetime, tzinfo from functools import wraps import locale import os @@ -18,6 +18,7 @@ from numpy.random import rand, randn from pandas._libs import testing as _testing +from pandas._libs.tslibs.timezones import tz_compare import pandas.compat as compat from pandas.compat import ( PY2, PY3, Counter, StringIO, callable, filter, httplib, lmap, lrange, lzip, @@ -878,8 +879,13 @@ def _get_ilevel_values(index, level): # get_level_values may change dtype _check_types(left.levels[level], right.levels[level], obj=obj) - # skip exact index checking when `check_categorical` is False - if check_exact and check_categorical: + if isinstance(left, DatetimeIndex): + # by now we know right is also a DatetimeIndex + assert_numpy_array_equal(left.asi8, right.asi8) + assert tz_compare(left.tz, right.tz) + + elif check_exact and check_categorical: + # skip exact index checking when `check_categorical` is False if not left.equals(right): diff = np.sum((left.values != right.values) .astype(int)) * 100.0 / len(left) @@ -960,6 +966,12 @@ def assert_attr_equal(attr, left, right, obj='Attributes'): is_number(right_attr) and np.isnan(right_attr)): # np.nan return True + elif is_datetime64tz_dtype(left_attr) and is_datetime64tz_dtype(right_attr): + left_attr = getattr(left_attr, 'dtype', left_attr) + right_attr = getattr(right_attr, 'dtype', right_attr) + return tz_compare(left_attr.tz, right_attr.tz) + elif isinstance(left_attr, tzinfo) and isinstance(right_attr, tzinfo): + return tz_compare(left_attr, right_attr) try: result = left_attr == right_attr From 9d42d97b2044d430e922f6c7cad9f003c959bfa1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 30 Nov 2018 16:27:36 -0800 Subject: [PATCH 4/6] kludge the kludge --- pandas/tests/tslibs/test_array_to_datetime.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index fffb72c31f15e..7483f7f6e613c 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -13,6 +13,17 @@ import pandas.util.testing as tm +def tzlocal_is_utc(): + # kludge because (apparently) when dateutil parses a UTC timezone + # and tzlocal happens to be UTC, it returns tzlocal() instead of tzutc() + now = datetime.now() + from dateutil.tz import tzlocal + tz = tzlocal() + now = now.replace(tzinfo=tz) + offset = tz.utcoffset(now) + return offset.total_seconds() == 0 + + class TestParseISO8601(object): @pytest.mark.parametrize('date_str, exp', [ ('2011-01-02', datetime(2011, 1, 2)), @@ -96,7 +107,8 @@ def test_parsing_non_iso_timezone_offset(self): result, result_tz = tslib.array_to_datetime(arr) expected = np.array([np.datetime64('2013-01-01 00:00:00.000000000')]) tm.assert_numpy_array_equal(result, expected) - assert is_utc(result_tz) + assert is_utc(result_tz) or (type(result_tz).__name__ == 'tzlocal') and + tzlocal_is_utc()) def test_parsing_different_timezone_offsets(self): # GH 17697 From edc177d4f83444466e23c310df90af15f44e9a99 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 30 Nov 2018 18:17:26 -0800 Subject: [PATCH 5/6] typo fixup --- pandas/tests/tslibs/test_array_to_datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 7483f7f6e613c..b5d6cf5ec43e9 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -107,7 +107,7 @@ def test_parsing_non_iso_timezone_offset(self): result, result_tz = tslib.array_to_datetime(arr) expected = np.array([np.datetime64('2013-01-01 00:00:00.000000000')]) tm.assert_numpy_array_equal(result, expected) - assert is_utc(result_tz) or (type(result_tz).__name__ == 'tzlocal') and + assert is_utc(result_tz) or (type(result_tz).__name__ == 'tzlocal' and tzlocal_is_utc()) def test_parsing_different_timezone_offsets(self): From 1c6a8ee024cdd67b148a28c065bd830a06e9f303 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 2 Dec 2018 13:59:33 -0800 Subject: [PATCH 6/6] manual rebase --- pandas/_libs/tslib.pyx | 368 +++++++++++++++++++++-------------------- 1 file changed, 186 insertions(+), 182 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 9e1e126bee677..44c1a0b1dea09 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -480,6 +480,7 @@ cdef get_key(tz): return tz_cache_key(tz) + @cython.wraparound(False) @cython.boundscheck(False) cpdef array_to_datetime(ndarray[object] values, str errors='raise', @@ -525,7 +526,9 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', npy_datetimestruct dts bint utc_convert = bool(utc) bint seen_integer = 0 + bint seen_string = 0 bint seen_datetime = 0 + bint seen_datetime_offset = 0 bint is_raise = errors=='raise' bint is_ignore = errors=='ignore' bint is_coerce = errors=='coerce' @@ -540,229 +543,232 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', result = np.empty(n, dtype='M8[ns]') iresult = result.view('i8') + try: for i in range(n): val = values[i] - if checknull_with_nat(val): - iresult[i] = NPY_NAT + try: + if checknull_with_nat(val): + iresult[i] = NPY_NAT - elif PyDateTime_Check(val): - seen_datetime = 1 - out_tzinfos[get_key(val.tzinfo)] = val.tzinfo - try: + elif PyDateTime_Check(val): + seen_datetime = 1 + out_tzinfos[get_key(val.tzinfo)] = val.tzinfo _ts = convert_datetime_to_tsobject(val, None) iresult[i] = _ts.value - except OutOfBoundsDatetime: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - elif PyDate_Check(val): - # Treating as either naive or UTC - seen_datetime = 1 - iresult[i] = pydate_to_dt64(val, &dts) - try: + elif PyDate_Check(val): + # Treating as either naive or UTC + seen_datetime = 1 + iresult[i] = pydate_to_dt64(val, &dts) check_dts_bounds(&dts) - except OutOfBoundsDatetime: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - elif is_datetime64_object(val): - # Treating as either naive or UTC - seen_datetime = 1 - try: + elif is_datetime64_object(val): + # Treating as either naive or UTC + seen_datetime = 1 iresult[i] = get_datetime64_nanos(val) - except OutOfBoundsDatetime: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - elif is_integer_object(val) or is_float_object(val): - # these must be ns unit by-definition - seen_integer = 1 + elif is_integer_object(val) or is_float_object(val): + # these must be ns unit by-definition + seen_integer = 1 - if val != val or val == NPY_NAT: - iresult[i] = NPY_NAT - elif is_raise or is_ignore: - iresult[i] = val - else: - # coerce - # we now need to parse this as if unit='ns' - # we can ONLY accept integers at this point - # if we have previously (or in future accept - # datetimes/strings, then we must coerce) - try: - iresult[i] = cast_from_unit(val, 'ns') - except: + if val != val or val == NPY_NAT: iresult[i] = NPY_NAT + elif is_raise or is_ignore: + iresult[i] = val + else: + # coerce + # we now need to parse this as if unit='ns' + # we can ONLY accept integers at this point + # if we have previously (or in future accept + # datetimes/strings, then we must coerce) + try: + iresult[i] = cast_from_unit(val, 'ns') + except: + iresult[i] = NPY_NAT - elif is_string_object(val): - # string - - if len(val) == 0 or val in nat_strings: - iresult[i] = NPY_NAT - continue - - if isinstance(val, unicode) and PY2: - val = val.encode('utf-8') + elif is_string_object(val): + # string + seen_string = 1 - try: - _string_to_dts(val, &dts, &out_local, &out_tzoffset) - except ValueError: - # A ValueError at this point is a _parsing_ error - # specifically _not_ OutOfBoundsDatetime - if _parse_today_now(val, &iresult[i]): - # TODO: Do we treat this as local? - # "now" is UTC, "today" is local + if len(val) == 0 or val in nat_strings: + iresult[i] = NPY_NAT continue - elif require_iso8601: - # if requiring iso8601 strings, skip trying - # other formats - if is_coerce: - iresult[i] = NPY_NAT - continue - elif is_raise: - raise ValueError("time data {val} doesn't match " - "format specified" - .format(val=val)) - return values, tz_out + if isinstance(val, unicode) and PY2: + val = val.encode('utf-8') try: - py_dt = parse_datetime_string(val, dayfirst=dayfirst, - yearfirst=yearfirst) - except Exception: - if is_coerce: - iresult[i] = NPY_NAT + _string_to_dts(val, &dts, &out_local, &out_tzoffset) + except ValueError: + # A ValueError at this point is a _parsing_ error + # specifically _not_ OutOfBoundsDatetime + if _parse_today_now(val, &iresult[i]): + # TODO: Do we treat this as local? + # "now" is UTC, "today" is local continue - raise TypeError("invalid string coercion to datetime") + elif require_iso8601: + # if requiring iso8601 strings, skip trying + # other formats + if is_coerce: + iresult[i] = NPY_NAT + continue + elif is_raise: + raise ValueError("time data {val} doesn't " + "match format specified" + .format(val=val)) + return values, tz_out - # If the dateutil parser returned tzinfo, capture it - # to check if all arguments have the same tzinfo - tz = py_dt.utcoffset() - out_tzinfos[get_key(py_dt.tzinfo)] = py_dt.tzinfo + try: + py_dt = parse_datetime_string(val, + dayfirst=dayfirst, + yearfirst=yearfirst) + except Exception: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise TypeError("invalid string coercion to " + "datetime") + + # If the dateutil parser returned tzinfo, capture it + # to check if all arguments have the same tzinfo + tz = py_dt.utcoffset() + out_tzinfos[get_key(py_dt.tzinfo)] = py_dt.tzinfo - try: _ts = convert_datetime_to_tsobject(py_dt, None) iresult[i] = _ts.value - except OutOfBoundsDatetime: + except: + # TODO: What exception are we concerned with here? if is_coerce: iresult[i] = NPY_NAT continue raise - except: - # TODO: What exception are we concerned with here? + else: + # No error raised by string_to_dts, pick back up + # where we left off + value = dtstruct_to_dt64(&dts) + if out_local == 1: + seen_datetime_offset = 1 + # Store the out_tzoffset in seconds + # since we store the total_seconds of + # dateutil.tz.tzoffset objects + tz = pytz.FixedOffset(out_tzoffset) + out_tzinfos[get_key(tz)] = tz + value = tz_convert_single(value, tz, UTC) + else: + # Add a marker for naive string, to track if we are + # parsing mixed naive and aware strings + out_tzinfos[None] = None + iresult[i] = value + check_dts_bounds(&dts) + + else: if is_coerce: iresult[i] = NPY_NAT - continue - raise - else: - # No error raised by string_to_dts, pick back up - # where we left off - value = dtstruct_to_dt64(&dts) - if out_local == 1: - # Store the out_tzoffset in seconds - # since we store the total_seconds of - # dateutil.tz.tzoffset objects - tz = pytz.FixedOffset(out_tzoffset) - out_tzinfos[get_key(tz)] = tz - value = tz_convert_single(value, tz, UTC) else: - # Add a marker for naive string, to track if we are - # parsing mixed naive and aware strings - out_tzinfos[None] = None + raise TypeError("{typ} is not convertible to datetime" + .format(typ=type(val))) - iresult[i] = value - try: - check_dts_bounds(&dts) - except OutOfBoundsDatetime: - # GH#19382 for just-barely-OutOfBounds falling back to - # dateutil parser will return incorrect result because - # it will ignore nanoseconds - if is_coerce: - iresult[i] = NPY_NAT - continue - elif require_iso8601: - if is_raise: - raise ValueError("time data {val} doesn't " - "match format specified" - .format(val=val)) - return values, tz_out - raise - - else: + except OutOfBoundsDatetime: if is_coerce: iresult[i] = NPY_NAT - else: - raise TypeError("{typ} is not convertible to datetime" - .format(typ=type(val))) - - if seen_datetime and seen_integer: - # we have mixed datetimes & integers - - if is_coerce: - # coerce all of the integers/floats to NaT, preserve - # the datetimes and other convertibles - for i in range(n): - val = values[i] - if is_integer_object(val) or is_float_object(val): - result[i] = NPY_NAT - elif is_raise: - raise ValueError( - "mixed datetimes and integers in passed array") - else: - raise TypeError - - # TODO: File bug report with cython. it raises - # Closures Not Supported - # error when I tried to use - # `if any(key is not None for key in out_tzinfos)` - keys = out_tzinfos.keys() - nnkeys = [x for x in keys if x is not None] - if len(nnkeys) and not utc_convert: - # GH 17697 - # 1) If all the offsets are equal, return one offset for - # the parsed dates to (maybe) pass to DatetimeIndex - # 2) If the offsets are different, then force the parsing down the - # object path where an array of datetimes - # (with individual dateutil.tzoffsets) are returned - is_same_offsets = len(out_tzinfos) == 1 - if not is_same_offsets: - return array_to_datetime_object(values, is_raise, - dayfirst, yearfirst) - else: - tz_out = list(out_tzinfos.values())[0] - return result, tz_out + continue + elif require_iso8601 and is_string_object(val): + # GH#19382 for just-barely-OutOfBounds falling back to + # dateutil parser will return incorrect result because + # it will ignore nanoseconds + if is_raise: + raise ValueError("time data {val} doesn't " + "match format specified" + .format(val=val)) + assert is_ignore + return values, tz_out + raise except OutOfBoundsDatetime: if is_raise: raise - oresult = np.empty(n, dtype=object) - for i in range(n): - val = values[i] + return ignore_errors_out_of_bounds_fallback(values), tz_out - # set as nan except if its a NaT - if checknull_with_nat(val): - if isinstance(val, float): - oresult[i] = np.nan - else: - oresult[i] = NaT - elif is_datetime64_object(val): - if get_datetime64_value(val) == NPY_NAT: - oresult[i] = NaT - else: - oresult[i] = val.item() - else: - oresult[i] = val - return oresult, tz_out except TypeError: return array_to_datetime_object(values, is_raise, dayfirst, yearfirst) + if seen_datetime and seen_integer: + # we have mixed datetimes & integers + + if is_coerce: + # coerce all of the integers/floats to NaT, preserve + # the datetimes and other convertibles + for i in range(n): + val = values[i] + if is_integer_object(val) or is_float_object(val): + result[i] = NPY_NAT + elif is_raise: + raise ValueError("mixed datetimes and integers in passed array") + else: + return array_to_datetime_object(values, is_raise, + dayfirst, yearfirst) + + # TODO: File bug report with cython. it raises + # Closures Not Supported + # error when I tried to use + # `if any(key is not None for key in out_tzinfos)` + keys = out_tzinfos.keys() + nnkeys = [x for x in keys if x is not None] + if len(nnkeys) and not utc_convert: + # GH#17697 + # 1) If all the offsets are equal, return one offset for + # the parsed dates to (maybe) pass to DatetimeIndex + # 2) If the offsets are different, then force the parsing down the + # object path where an array of datetimes + # (with individual dateutil.tzoffsets) are returned + is_same_offsets = len(out_tzinfos) == 1 + if not is_same_offsets: + return array_to_datetime_object(values, is_raise, + dayfirst, yearfirst) + else: + tz_out = list(out_tzinfos.values())[0] + return result, tz_out + + +cdef inline ignore_errors_out_of_bounds_fallback(ndarray[object] values): + """ + Fallback for array_to_datetime if an OutOfBoundsDatetime is raised + and errors == "ignore" + + Parameters + ---------- + values : ndarray[object] + + Returns + ------- + ndarray[object] + """ + cdef: + Py_ssize_t i, n = len(values) + object val + + oresult = np.empty(n, dtype=object) + + for i in range(n): + val = values[i] + + # set as nan except if its a NaT + if checknull_with_nat(val): + if isinstance(val, float): + oresult[i] = np.nan + else: + oresult[i] = NaT + elif is_datetime64_object(val): + if get_datetime64_value(val) == NPY_NAT: + oresult[i] = NaT + else: + oresult[i] = val.item() + else: + oresult[i] = val + return oresult + @cython.wraparound(False) @cython.boundscheck(False) @@ -817,8 +823,6 @@ cdef array_to_datetime_object(ndarray[object] values, bint is_raise, if is_raise: raise return values, None - elif PyDateTime_Check(val): - oresult[i] = val # TODO: check_dts_bounds? else: if is_raise: raise