Skip to content

Commit 13f5395

Browse files
authored
REF: share string parsing code (#50736)
* REF: re-use convert_str_to_tsobj * revert utcoffset checks
1 parent c128ffa commit 13f5395

File tree

5 files changed

+43
-62
lines changed

5 files changed

+43
-62
lines changed

pandas/_libs/tslib.pyx

Lines changed: 30 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ from pandas._libs.tslibs.parsing import parse_datetime_string
5151
from pandas._libs.tslibs.conversion cimport (
5252
_TSObject,
5353
cast_from_unit,
54-
convert_datetime_to_tsobject,
54+
convert_str_to_tsobject,
5555
convert_timezone,
5656
get_datetime64_nanos,
5757
parse_pydatetime,
@@ -482,20 +482,15 @@ cpdef array_to_datetime(
482482
object val, tz
483483
ndarray[int64_t] iresult
484484
npy_datetimestruct dts
485-
NPY_DATETIMEUNIT out_bestunit
486485
bint utc_convert = bool(utc)
487486
bint seen_datetime_offset = False
488487
bint is_raise = errors=="raise"
489488
bint is_ignore = errors=="ignore"
490489
bint is_coerce = errors=="coerce"
491490
bint is_same_offsets
492491
_TSObject _ts
493-
int64_t value
494-
int out_local = 0, out_tzoffset = 0
495492
float tz_offset
496493
set out_tzoffset_vals = set()
497-
bint string_to_dts_failed
498-
datetime py_dt
499494
tzinfo tz_out = None
500495
bint found_tz = False, found_naive = False
501496
cnp.broadcast mi
@@ -557,61 +552,40 @@ cpdef array_to_datetime(
557552
# GH#32264 np.str_ object
558553
val = str(val)
559554

560-
if len(val) == 0 or val in nat_strings:
561-
iresult[i] = NPY_NAT
555+
if parse_today_now(val, &iresult[i], utc):
556+
# We can't _quite_ dispatch this to convert_str_to_tsobject
557+
# bc there isn't a nice way to pass "utc"
562558
cnp.PyArray_MultiIter_NEXT(mi)
563559
continue
564560

565-
string_to_dts_failed = string_to_dts(
566-
val, &dts, &out_bestunit, &out_local,
567-
&out_tzoffset, False, None, False
561+
_ts = convert_str_to_tsobject(
562+
val, None, unit="ns", dayfirst=dayfirst, yearfirst=yearfirst
568563
)
569-
if string_to_dts_failed:
570-
# An error at this point is a _parsing_ error
571-
# specifically _not_ OutOfBoundsDatetime
572-
if parse_today_now(val, &iresult[i], utc):
573-
cnp.PyArray_MultiIter_NEXT(mi)
574-
continue
575-
576-
py_dt = parse_datetime_string(val,
577-
dayfirst=dayfirst,
578-
yearfirst=yearfirst)
579-
# If the dateutil parser returned tzinfo, capture it
580-
# to check if all arguments have the same tzinfo
581-
tz = py_dt.utcoffset()
582-
583-
if tz is not None:
584-
seen_datetime_offset = True
585-
# dateutil timezone objects cannot be hashed, so
586-
# store the UTC offsets in seconds instead
587-
out_tzoffset_vals.add(tz.total_seconds())
588-
else:
589-
# Add a marker for naive string, to track if we are
590-
# parsing mixed naive and aware strings
591-
out_tzoffset_vals.add("naive")
592-
593-
_ts = convert_datetime_to_tsobject(py_dt, None)
594-
iresult[i] = _ts.value
564+
try:
565+
_ts.ensure_reso(NPY_FR_ns)
566+
except OutOfBoundsDatetime as err:
567+
# re-raise with better exception message
568+
raise OutOfBoundsDatetime(
569+
f"Out of bounds nanosecond timestamp: {val}"
570+
) from err
571+
572+
iresult[i] = _ts.value
573+
574+
tz = _ts.tzinfo
575+
if tz is not None:
576+
# dateutil timezone objects cannot be hashed, so
577+
# store the UTC offsets in seconds instead
578+
nsecs = tz.utcoffset(None).total_seconds()
579+
out_tzoffset_vals.add(nsecs)
580+
# need to set seen_datetime_offset *after* the
581+
# potentially-raising timezone(timedelta(...)) call,
582+
# otherwise we can go down the is_same_offsets path
583+
# bc len(out_tzoffset_vals) == 0
584+
seen_datetime_offset = True
595585
else:
596-
# No error reported by string_to_dts, pick back up
597-
# where we left off
598-
value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
599-
if out_local == 1:
600-
seen_datetime_offset = True
601-
# Store the out_tzoffset in seconds
602-
# since we store the total_seconds of
603-
# dateutil.tz.tzoffset objects
604-
out_tzoffset_vals.add(out_tzoffset * 60.)
605-
tz = timezone(timedelta(minutes=out_tzoffset))
606-
value = tz_localize_to_utc_single(value, tz)
607-
out_local = 0
608-
out_tzoffset = 0
609-
else:
610-
# Add a marker for naive string, to track if we are
611-
# parsing mixed naive and aware strings
612-
out_tzoffset_vals.add("naive")
613-
iresult[i] = value
614-
check_dts_bounds(&dts)
586+
# Add a marker for naive string, to track if we are
587+
# parsing mixed naive and aware strings
588+
out_tzoffset_vals.add("naive")
615589

616590
else:
617591
raise TypeError(f"{type(val)} is not convertible to datetime")

pandas/_libs/tslibs/conversion.pxd

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz,
3535
int32_t nanos=*,
3636
NPY_DATETIMEUNIT reso=*)
3737

38+
cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, str unit,
39+
bint dayfirst=*,
40+
bint yearfirst=*)
41+
3842
cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1
3943

4044
cpdef datetime localize_pydatetime(datetime dt, tzinfo tz)

pandas/_libs/tslibs/conversion.pyx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
246246
obj = _TSObject()
247247

248248
if isinstance(ts, str):
249-
return _convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst)
249+
return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst)
250250

251251
if ts is None or ts is NaT:
252252
obj.value = NPY_NAT
@@ -463,9 +463,9 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
463463
return obj
464464

465465

466-
cdef _TSObject _convert_str_to_tsobject(str ts, tzinfo tz, str unit,
467-
bint dayfirst=False,
468-
bint yearfirst=False):
466+
cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, str unit,
467+
bint dayfirst=False,
468+
bint yearfirst=False):
469469
"""
470470
Convert a string input `ts`, along with optional timezone object`tz`
471471
to a _TSObject.

pandas/tests/indexes/datetimes/test_scalar_compat.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,10 @@ def test_dti_date(self):
3838
@pytest.mark.parametrize("data", [["1400-01-01"], [datetime(1400, 1, 1)]])
3939
def test_dti_date_out_of_range(self, data):
4040
# GH#1475
41-
msg = "^Out of bounds nanosecond timestamp: 1400-01-01 00:00:00, at position 0$"
41+
msg = (
42+
"^Out of bounds nanosecond timestamp: "
43+
"1400-01-01( 00:00:00)?, at position 0$"
44+
)
4245
with pytest.raises(OutOfBoundsDatetime, match=msg):
4346
DatetimeIndex(data)
4447

pandas/tests/tools/test_to_datetime.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2791,7 +2791,7 @@ def test_day_not_in_month_coerce(self, cache, arg, format, warning):
27912791
assert isna(to_datetime(arg, errors="coerce", format=format, cache=cache))
27922792

27932793
def test_day_not_in_month_raise(self, cache):
2794-
msg = "day is out of range for month"
2794+
msg = "could not convert string to Timestamp"
27952795
with pytest.raises(ValueError, match=msg):
27962796
with tm.assert_produces_warning(
27972797
UserWarning, match="Could not infer format"

0 commit comments

Comments
 (0)