From e2dad4a4f7b94cd5a227752739007d4c3193f42b Mon Sep 17 00:00:00 2001 From: Alexey Izbyshev Date: Tue, 21 Aug 2018 23:43:28 +0300 Subject: [PATCH] bpo-34454: datetime: Fix crash on PyUnicode_AsUTF8AndSize() failure The missing NULL check was reported by Svace static analyzer. --- Lib/test/datetimetester.py | 54 +++++++++++++++---- .../2018-08-22-00-29-35.bpo-34454.dmIlq5.rst | 2 + Modules/_datetimemodule.c | 9 ++++ 3 files changed, 55 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2018-08-22-00-29-35.bpo-34454.dmIlq5.rst diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index f647a232f40442..97e5b5ba176900 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -302,6 +302,8 @@ def test_tzname(self): self.assertEqual('UTC+09:30', timezone(9.5 * HOUR).tzname(None)) self.assertEqual('UTC-00:01', timezone(timedelta(minutes=-1)).tzname(None)) self.assertEqual('XYZ', timezone(-5 * HOUR, 'XYZ').tzname(None)) + # bpo-34454: Check that surrogates don't cause a crash. + self.assertEqual('\ud800', timezone(ZERO, '\ud800').tzname(None)) # Sub-minute offsets: self.assertEqual('UTC+01:06:40', timezone(timedelta(0, 4000)).tzname(None)) @@ -1301,7 +1303,8 @@ def test_strftime(self): #oh well, some systems just ignore those invalid ones. #at least, exercise them to make sure that no crashes #are generated - for f in ["%e", "%", "%#"]: + # bpo-34454: Check that surrogates don't cause a crash. + for f in ["%e", "%", "%#", "\ud800"]: try: t.strftime(f) except ValueError: @@ -1667,6 +1670,7 @@ def test_fromisoformat_fails(self): # Test that fromisoformat() fails on invalid values bad_strs = [ '', # Empty string + '\ud800', # bpo-34454: Surrogate code point '009-03-04', # Not 10 characters '123456789', # Not a date '200a-12-04', # Invalid character in year @@ -1744,6 +1748,9 @@ def test_isoformat(self): self.assertEqual(t.isoformat('T'), "0001-02-03T04:05:01.000123") self.assertEqual(t.isoformat(' '), "0001-02-03 04:05:01.000123") self.assertEqual(t.isoformat('\x00'), "0001-02-03\x0004:05:01.000123") + # bpo-34454: Check that surrogates don't cause a crash. + self.assertEqual(t.isoformat('\ud800'), + "0001-02-03\ud80004:05:01.000123") self.assertEqual(t.isoformat(timespec='hours'), "0001-02-03T04") self.assertEqual(t.isoformat(timespec='minutes'), "0001-02-03T04:05") self.assertEqual(t.isoformat(timespec='seconds'), "0001-02-03T04:05:01") @@ -1752,6 +1759,8 @@ def test_isoformat(self): self.assertEqual(t.isoformat(timespec='auto'), "0001-02-03T04:05:01.000123") self.assertEqual(t.isoformat(sep=' ', timespec='minutes'), "0001-02-03 04:05") self.assertRaises(ValueError, t.isoformat, timespec='foo') + # bpo-34454: Check that surrogates don't cause a crash. + self.assertRaises(ValueError, t.isoformat, timespec='\ud800') # str is ISO format with the separator forced to a blank. self.assertEqual(str(t), "0001-02-03 04:05:01.000123") @@ -2275,13 +2284,19 @@ def test_utcnow(self): self.assertLessEqual(abs(from_timestamp - from_now), tolerance) def test_strptime(self): - string = '2004-12-01 13:02:47.197' - format = '%Y-%m-%d %H:%M:%S.%f' - expected = _strptime._strptime_datetime(self.theclass, string, format) - got = self.theclass.strptime(string, format) - self.assertEqual(expected, got) - self.assertIs(type(expected), self.theclass) - self.assertIs(type(got), self.theclass) + inputs = [ + ('2004-12-01 13:02:47.197', '%Y-%m-%d %H:%M:%S.%f'), + # bpo-34454: Check that surrogates don't cause a crash. + ('2004-12-01\ud80013:02:47.197', '%Y-%m-%d\ud800%H:%M:%S.%f'), + ] + for string, format in inputs: + with self.subTest(string=string, format=format): + expected = _strptime._strptime_datetime(self.theclass, string, + format) + got = self.theclass.strptime(string, format) + self.assertEqual(expected, got) + self.assertIs(type(expected), self.theclass) + self.assertIs(type(got), self.theclass) strptime = self.theclass.strptime self.assertEqual(strptime("+0002", "%z").utcoffset(), 2 * MINUTE) @@ -2587,7 +2602,8 @@ def test_fromisoformat_separators(self): ' ', 'T', '\u007f', # 1-bit widths '\u0080', 'ʁ', # 2-bit widths 'ᛇ', '時', # 3-bit widths - '🐍' # 4-bit widths + '🐍', # 4-bit widths + '\ud800', # bpo-34454 ] for sep in separators: @@ -2595,7 +2611,13 @@ def test_fromisoformat_separators(self): dtstr = dt.isoformat(sep=sep) with self.subTest(dtstr=dtstr): - dt_rt = self.theclass.fromisoformat(dtstr) + try: + dt_rt = self.theclass.fromisoformat(dtstr) + except UnicodeEncodeError: + # FIXME: The C datetime implementation raises an exception + # while the pure-Python one succeeds. + if sep != '\ud800': + raise self.assertEqual(dt, dt_rt) def test_fromisoformat_ambiguous(self): @@ -2639,6 +2661,7 @@ def test_fromisoformat_fails_datetime(self): # Test that fromisoformat() fails on invalid values bad_strs = [ '', # Empty string + '\ud800', # bpo-34454: Surrogate code point '2009.04-19T03', # Wrong first separator '2009-04.19T03', # Wrong second separator '2009-04-19T0a', # Invalid hours @@ -2863,6 +2886,8 @@ def test_isoformat(self): self.assertEqual(t.isoformat(timespec='microseconds'), "12:34:56.123456") self.assertEqual(t.isoformat(timespec='auto'), "12:34:56.123456") self.assertRaises(ValueError, t.isoformat, timespec='monkey') + # bpo-34454: Check that surrogates don't cause a crash. + self.assertRaises(ValueError, t.isoformat, timespec='\ud800') t = self.theclass(hour=12, minute=34, second=56, microsecond=999500) self.assertEqual(t.isoformat(timespec='milliseconds'), "12:34:56.999") @@ -2913,6 +2938,14 @@ def test_strftime(self): # A naive object replaces %z and %Z with empty strings. self.assertEqual(t.strftime("'%z' '%Z'"), "'' ''") + # bpo-34454: Check that surrogates don't cause a crash. + # FIXME: The C datetime implementation raises an exception + # while the pure-Python one succeeds. + try: + t.strftime('\ud800') + except UnicodeEncodeError: + pass + def test_format(self): t = self.theclass(1, 2, 3, 4) self.assertEqual(t.__format__(''), str(t)) @@ -3521,6 +3554,7 @@ def test_fromisoformat_timespecs(self): def test_fromisoformat_fails(self): bad_strs = [ '', # Empty string + '\ud800', # bpo-34454: Surrogate code point '12:', # Ends on a separator '12:30:', # Ends on a separator '12:30:15.', # Ends on a separator diff --git a/Misc/NEWS.d/next/Library/2018-08-22-00-29-35.bpo-34454.dmIlq5.rst b/Misc/NEWS.d/next/Library/2018-08-22-00-29-35.bpo-34454.dmIlq5.rst new file mode 100644 index 00000000000000..367ab44ab7c845 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-08-22-00-29-35.bpo-34454.dmIlq5.rst @@ -0,0 +1,2 @@ +Fix crash in ``fromisoformat`` method of classes from :mod:`datetime` on +:c:func:`PyUnicode_AsUTF8AndSize()` failure. Patch by Alexey Izbyshev. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 076912d58f4af8..a06cdf1a6cd5a8 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -2883,6 +2883,9 @@ date_fromisoformat(PyObject *cls, PyObject *dtstr) { Py_ssize_t len; const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len); + if (dt_ptr == NULL) { + return NULL; + } int year = 0, month = 0, day = 0; @@ -4257,6 +4260,9 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) { Py_ssize_t len; const char *p = PyUnicode_AsUTF8AndSize(tstr, &len); + if (p == NULL) { + return NULL; + } int hour = 0, minute = 0, second = 0, microsecond = 0; int tzoffset, tzimicrosecond = 0; @@ -4850,6 +4856,9 @@ datetime_fromisoformat(PyObject* cls, PyObject *dtstr) { Py_ssize_t len; const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len); + if (dt_ptr == NULL) { + return NULL; + } const char * p = dt_ptr; int year = 0, month = 0, day = 0;