diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 10bcf6c9eabbf..36001248d664b 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -51,7 +51,7 @@ from pandas._libs.tslibs.parsing import parse_datetime_string from pandas._libs.tslibs.conversion cimport ( _TSObject, cast_from_unit, - convert_datetime_to_tsobject, + convert_str_to_tsobject, convert_timezone, get_datetime64_nanos, parse_pydatetime, @@ -482,7 +482,6 @@ cpdef array_to_datetime( object val, tz ndarray[int64_t] iresult npy_datetimestruct dts - NPY_DATETIMEUNIT out_bestunit bint utc_convert = bool(utc) bint seen_datetime_offset = False bint is_raise = errors=="raise" @@ -490,12 +489,8 @@ cpdef array_to_datetime( bint is_coerce = errors=="coerce" bint is_same_offsets _TSObject _ts - int64_t value - int out_local = 0, out_tzoffset = 0 float tz_offset set out_tzoffset_vals = set() - bint string_to_dts_failed - datetime py_dt tzinfo tz_out = None bint found_tz = False, found_naive = False cnp.broadcast mi @@ -557,61 +552,40 @@ cpdef array_to_datetime( # GH#32264 np.str_ object val = str(val) - if len(val) == 0 or val in nat_strings: - iresult[i] = NPY_NAT + if parse_today_now(val, &iresult[i], utc): + # We can't _quite_ dispatch this to convert_str_to_tsobject + # bc there isn't a nice way to pass "utc" cnp.PyArray_MultiIter_NEXT(mi) continue - string_to_dts_failed = string_to_dts( - val, &dts, &out_bestunit, &out_local, - &out_tzoffset, False, None, False + _ts = convert_str_to_tsobject( + val, None, unit="ns", dayfirst=dayfirst, yearfirst=yearfirst ) - if string_to_dts_failed: - # An error at this point is a _parsing_ error - # specifically _not_ OutOfBoundsDatetime - if parse_today_now(val, &iresult[i], utc): - cnp.PyArray_MultiIter_NEXT(mi) - continue - - py_dt = parse_datetime_string(val, - dayfirst=dayfirst, - yearfirst=yearfirst) - # If the dateutil parser returned tzinfo, capture it - # to check if all arguments have the same tzinfo - tz = py_dt.utcoffset() - - if tz is not None: - seen_datetime_offset = True - # dateutil timezone objects cannot be hashed, so - # store the UTC offsets in seconds instead - out_tzoffset_vals.add(tz.total_seconds()) - else: - # Add a marker for naive string, to track if we are - # parsing mixed naive and aware strings - out_tzoffset_vals.add("naive") - - _ts = convert_datetime_to_tsobject(py_dt, None) - iresult[i] = _ts.value + try: + _ts.ensure_reso(NPY_FR_ns) + except OutOfBoundsDatetime as err: + # re-raise with better exception message + raise OutOfBoundsDatetime( + f"Out of bounds nanosecond timestamp: {val}" + ) from err + + iresult[i] = _ts.value + + tz = _ts.tzinfo + if tz is not None: + # dateutil timezone objects cannot be hashed, so + # store the UTC offsets in seconds instead + nsecs = tz.utcoffset(None).total_seconds() + out_tzoffset_vals.add(nsecs) + # need to set seen_datetime_offset *after* the + # potentially-raising timezone(timedelta(...)) call, + # otherwise we can go down the is_same_offsets path + # bc len(out_tzoffset_vals) == 0 + seen_datetime_offset = True else: - # No error reported by string_to_dts, pick back up - # where we left off - value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts) - if out_local == 1: - seen_datetime_offset = True - # Store the out_tzoffset in seconds - # since we store the total_seconds of - # dateutil.tz.tzoffset objects - out_tzoffset_vals.add(out_tzoffset * 60.) - tz = timezone(timedelta(minutes=out_tzoffset)) - value = tz_localize_to_utc_single(value, tz) - out_local = 0 - out_tzoffset = 0 - else: - # Add a marker for naive string, to track if we are - # parsing mixed naive and aware strings - out_tzoffset_vals.add("naive") - iresult[i] = value - check_dts_bounds(&dts) + # Add a marker for naive string, to track if we are + # parsing mixed naive and aware strings + out_tzoffset_vals.add("naive") else: raise TypeError(f"{type(val)} is not convertible to datetime") diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 332ff1522ccf5..756ab67aa7084 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -35,6 +35,10 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz, int32_t nanos=*, NPY_DATETIMEUNIT reso=*) +cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, str unit, + bint dayfirst=*, + bint yearfirst=*) + cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1 cpdef datetime localize_pydatetime(datetime dt, tzinfo tz) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 7cff269d2191e..aacb06fe36037 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -246,7 +246,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, obj = _TSObject() if isinstance(ts, str): - return _convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) + return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) if ts is None or ts is NaT: obj.value = NPY_NAT @@ -463,9 +463,9 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, return obj -cdef _TSObject _convert_str_to_tsobject(str ts, tzinfo tz, str unit, - bint dayfirst=False, - bint yearfirst=False): +cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, str unit, + bint dayfirst=False, + bint yearfirst=False): """ Convert a string input `ts`, along with optional timezone object`tz` to a _TSObject. diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index be05a649ec0b6..622f41236edb9 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -38,7 +38,10 @@ def test_dti_date(self): @pytest.mark.parametrize("data", [["1400-01-01"], [datetime(1400, 1, 1)]]) def test_dti_date_out_of_range(self, data): # GH#1475 - msg = "^Out of bounds nanosecond timestamp: 1400-01-01 00:00:00, at position 0$" + msg = ( + "^Out of bounds nanosecond timestamp: " + "1400-01-01( 00:00:00)?, at position 0$" + ) with pytest.raises(OutOfBoundsDatetime, match=msg): DatetimeIndex(data) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index a6e40c30d5b82..bf0db0da1c3e3 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2783,7 +2783,7 @@ def test_day_not_in_month_coerce(self, cache, arg, format, warning): assert isna(to_datetime(arg, errors="coerce", format=format, cache=cache)) def test_day_not_in_month_raise(self, cache): - msg = "day is out of range for month" + msg = "could not convert string to Timestamp" with pytest.raises(ValueError, match=msg): with tm.assert_produces_warning( UserWarning, match="Could not infer format"