Skip to content

bpo-34454: datetime: Fix crash on PyUnicode_AsUTF8AndSize() failure #8850

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 44 additions & 10 deletions Lib/test/datetimetester.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,8 @@ def test_tzname(self):
self.assertEqual('UTC+09:30', timezone(9.5 * HOUR).tzname(None))
self.assertEqual('UTC-00:01', timezone(timedelta(minutes=-1)).tzname(None))
self.assertEqual('XYZ', timezone(-5 * HOUR, 'XYZ').tzname(None))
# bpo-34454: Check that surrogates don't cause a crash.
self.assertEqual('\ud800', timezone(ZERO, '\ud800').tzname(None))

# Sub-minute offsets:
self.assertEqual('UTC+01:06:40', timezone(timedelta(0, 4000)).tzname(None))
Expand Down Expand Up @@ -1301,7 +1303,8 @@ def test_strftime(self):
#oh well, some systems just ignore those invalid ones.
#at least, exercise them to make sure that no crashes
#are generated
for f in ["%e", "%", "%#"]:
# bpo-34454: Check that surrogates don't cause a crash.
for f in ["%e", "%", "%#", "\ud800"]:
try:
t.strftime(f)
except ValueError:
Expand Down Expand Up @@ -1667,6 +1670,7 @@ def test_fromisoformat_fails(self):
# Test that fromisoformat() fails on invalid values
bad_strs = [
'', # Empty string
'\ud800', # bpo-34454: Surrogate code point
'009-03-04', # Not 10 characters
'123456789', # Not a date
'200a-12-04', # Invalid character in year
Expand Down Expand Up @@ -1744,6 +1748,9 @@ def test_isoformat(self):
self.assertEqual(t.isoformat('T'), "0001-02-03T04:05:01.000123")
self.assertEqual(t.isoformat(' '), "0001-02-03 04:05:01.000123")
self.assertEqual(t.isoformat('\x00'), "0001-02-03\x0004:05:01.000123")
# bpo-34454: Check that surrogates don't cause a crash.
self.assertEqual(t.isoformat('\ud800'),
"0001-02-03\ud80004:05:01.000123")
self.assertEqual(t.isoformat(timespec='hours'), "0001-02-03T04")
self.assertEqual(t.isoformat(timespec='minutes'), "0001-02-03T04:05")
self.assertEqual(t.isoformat(timespec='seconds'), "0001-02-03T04:05:01")
Expand All @@ -1752,6 +1759,8 @@ def test_isoformat(self):
self.assertEqual(t.isoformat(timespec='auto'), "0001-02-03T04:05:01.000123")
self.assertEqual(t.isoformat(sep=' ', timespec='minutes'), "0001-02-03 04:05")
self.assertRaises(ValueError, t.isoformat, timespec='foo')
# bpo-34454: Check that surrogates don't cause a crash.
self.assertRaises(ValueError, t.isoformat, timespec='\ud800')
# str is ISO format with the separator forced to a blank.
self.assertEqual(str(t), "0001-02-03 04:05:01.000123")

Expand Down Expand Up @@ -2275,13 +2284,19 @@ def test_utcnow(self):
self.assertLessEqual(abs(from_timestamp - from_now), tolerance)

def test_strptime(self):
string = '2004-12-01 13:02:47.197'
format = '%Y-%m-%d %H:%M:%S.%f'
expected = _strptime._strptime_datetime(self.theclass, string, format)
got = self.theclass.strptime(string, format)
self.assertEqual(expected, got)
self.assertIs(type(expected), self.theclass)
self.assertIs(type(got), self.theclass)
inputs = [
('2004-12-01 13:02:47.197', '%Y-%m-%d %H:%M:%S.%f'),
# bpo-34454: Check that surrogates don't cause a crash.
('2004-12-01\ud80013:02:47.197', '%Y-%m-%d\ud800%H:%M:%S.%f'),
]
for string, format in inputs:
with self.subTest(string=string, format=format):
expected = _strptime._strptime_datetime(self.theclass, string,
format)
got = self.theclass.strptime(string, format)
self.assertEqual(expected, got)
self.assertIs(type(expected), self.theclass)
self.assertIs(type(got), self.theclass)

strptime = self.theclass.strptime
self.assertEqual(strptime("+0002", "%z").utcoffset(), 2 * MINUTE)
Expand Down Expand Up @@ -2587,15 +2602,22 @@ def test_fromisoformat_separators(self):
' ', 'T', '\u007f', # 1-bit widths
'\u0080', 'ʁ', # 2-bit widths
'ᛇ', '時', # 3-bit widths
'🐍' # 4-bit widths
'🐍', # 4-bit widths
'\ud800', # bpo-34454
]

for sep in separators:
dt = self.theclass(2018, 1, 31, 23, 59, 47, 124789)
dtstr = dt.isoformat(sep=sep)

with self.subTest(dtstr=dtstr):
dt_rt = self.theclass.fromisoformat(dtstr)
try:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this try/catch is the right solution here. We should fix the actual problem.

dt_rt = self.theclass.fromisoformat(dtstr)
except UnicodeEncodeError:
# FIXME: The C datetime implementation raises an exception
# while the pure-Python one succeeds.
if sep != '\ud800':
raise
self.assertEqual(dt, dt_rt)

def test_fromisoformat_ambiguous(self):
Expand Down Expand Up @@ -2639,6 +2661,7 @@ def test_fromisoformat_fails_datetime(self):
# Test that fromisoformat() fails on invalid values
bad_strs = [
'', # Empty string
'\ud800', # bpo-34454: Surrogate code point
'2009.04-19T03', # Wrong first separator
'2009-04.19T03', # Wrong second separator
'2009-04-19T0a', # Invalid hours
Expand Down Expand Up @@ -2863,6 +2886,8 @@ def test_isoformat(self):
self.assertEqual(t.isoformat(timespec='microseconds'), "12:34:56.123456")
self.assertEqual(t.isoformat(timespec='auto'), "12:34:56.123456")
self.assertRaises(ValueError, t.isoformat, timespec='monkey')
# bpo-34454: Check that surrogates don't cause a crash.
self.assertRaises(ValueError, t.isoformat, timespec='\ud800')

t = self.theclass(hour=12, minute=34, second=56, microsecond=999500)
self.assertEqual(t.isoformat(timespec='milliseconds'), "12:34:56.999")
Expand Down Expand Up @@ -2913,6 +2938,14 @@ def test_strftime(self):
# A naive object replaces %z and %Z with empty strings.
self.assertEqual(t.strftime("'%z' '%Z'"), "'' ''")

# bpo-34454: Check that surrogates don't cause a crash.
# FIXME: The C datetime implementation raises an exception
# while the pure-Python one succeeds.
try:
t.strftime('\ud800')
except UnicodeEncodeError:
pass

def test_format(self):
t = self.theclass(1, 2, 3, 4)
self.assertEqual(t.__format__(''), str(t))
Expand Down Expand Up @@ -3521,6 +3554,7 @@ def test_fromisoformat_timespecs(self):
def test_fromisoformat_fails(self):
bad_strs = [
'', # Empty string
'\ud800', # bpo-34454: Surrogate code point
'12:', # Ends on a separator
'12:30:', # Ends on a separator
'12:30:15.', # Ends on a separator
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix crash in ``fromisoformat`` method of classes from :mod:`datetime` on
:c:func:`PyUnicode_AsUTF8AndSize()` failure. Patch by Alexey Izbyshev.
9 changes: 9 additions & 0 deletions Modules/_datetimemodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -2883,6 +2883,9 @@ date_fromisoformat(PyObject *cls, PyObject *dtstr) {
Py_ssize_t len;

const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len);
if (dt_ptr == NULL) {
return NULL;
}

int year = 0, month = 0, day = 0;

Expand Down Expand Up @@ -4257,6 +4260,9 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) {

Py_ssize_t len;
const char *p = PyUnicode_AsUTF8AndSize(tstr, &len);
if (p == NULL) {
return NULL;
}

int hour = 0, minute = 0, second = 0, microsecond = 0;
int tzoffset, tzimicrosecond = 0;
Expand Down Expand Up @@ -4850,6 +4856,9 @@ datetime_fromisoformat(PyObject* cls, PyObject *dtstr) {

Py_ssize_t len;
const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len);
if (dt_ptr == NULL) {
return NULL;
}
const char * p = dt_ptr;

int year = 0, month = 0, day = 0;
Expand Down