diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index cfa16df367bce..c98c6850d10c5 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -892,6 +892,8 @@ def guess_datetime_format( if not isinstance(dt_str, str): return None + valid_am_pm = ['am', 'pm', 'a', 'p'] + day_attribute_and_format = (('day',), '%d', 2) # attr name, format, padding (if any) @@ -947,6 +949,11 @@ def guess_datetime_format( found_attrs.update(attrs) break + # Final formatting is required if the dt_string contains AM/PM. + if tokens[i].lower() in valid_am_pm: + format_guess = ["%I" if guess == "%H" else guess for guess in format_guess] + format_guess[i] = "%p" + # Only consider it a valid guess if we have a year, month and day if len({'year', 'month', 'day'} & found_attrs) != 3: return None diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 469a5caf7d694..7e80c166efc7e 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1965,6 +1965,41 @@ def test_to_datetime_infer_datetime_format_consistent_format(self, cache): tm.assert_series_equal(with_format, no_infer) tm.assert_series_equal(no_infer, yes_infer) + @pytest.mark.parametrize("cache", [True, False]) + @pytest.mark.parametrize("errors", ["ignore", "coerce", "raise"]) + def test_to_datetime_infer_datetime_consistent_formats_error_types( + self, cache, errors + ): + # GH#43414 + s = Series(date_range("20000101", periods=50, freq="H")) + begin_timerange_condition = s.dt.hour >= 7 + end_timerange_condition = s.dt.hour <= 13 + s = s.loc[begin_timerange_condition & end_timerange_condition] + + test_formats = [ + "%m-%d-%Y", + "%m/%d/%Y %H:%M:%S.%f", + "%m/%d/%Y %I:%M:%S%p", + "%m/%d/%Y %I:%M:%S %p", + "%Y-%m-%dT%H:%M:%S.%f", + "%Y-%m-%dT%I:%M:%S%p", + "%Y-%m-%dT%I:%M:%S %p", + "%Y-%m-%d %H:%M:%S.%f", + "%Y-%m-%d %I:%M:%S%p", + "%Y-%m-%d %I:%M:%S %p", + ] + + for test_format in test_formats: + s_as_dt_strings = s.apply(lambda x: x.strftime(test_format)) + + explicit_format = to_datetime( + s_as_dt_strings, errors=errors, format=test_format, cache=cache + ) + inferred_format = to_datetime( + s_as_dt_strings, errors=errors, infer_datetime_format=True, cache=cache + ) + tm.assert_series_equal(explicit_format, inferred_format) + @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_infer_datetime_format_inconsistent_format(self, cache): s = Series(