From dfd63aad3ab75c07b3bb1e62377936c600e4198c Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 13 Jul 2018 22:03:27 +0900 Subject: [PATCH 1/4] Fix int(s) and similar function may break memory _PyUnicode_TransformDecimalAndSpaceToASCII() missed trailing NUL char. It cause buffer overflow in _Py_string_to_number_with_underscores(). This bug is introduced in bpo-31979, 9b6c60cb. --- Objects/unicodeobject.c | 1 + Python/pystrtod.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 80d1bba1e9b407..936d35c86b271c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9072,6 +9072,7 @@ _PyUnicode_TransformDecimalAndSpaceToASCII(PyObject *unicode) int decimal = Py_UNICODE_TODECIMAL(ch); if (decimal < 0) { out[i] = '?'; + out[i+1] = '\0'; _PyUnicode_LENGTH(result) = i + 1; break; } diff --git a/Python/pystrtod.c b/Python/pystrtod.c index 3546d44c84248b..461e8dcb5e0c4a 100644 --- a/Python/pystrtod.c +++ b/Python/pystrtod.c @@ -391,6 +391,8 @@ _Py_string_to_number_with_underscores( char *dup, *end; PyObject *result; + assert(s[orig_len] == '\0'); + if (strchr(s, '_') == NULL) { return innerfunc(s, orig_len, arg); } From 46d4225785823577aecf5025fb2987bc98af3f3b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 13 Jul 2018 22:09:58 +0900 Subject: [PATCH 2/4] Add NEWS --- .../Core and Builtins/2018-07-13-22-09-55.bpo-34087.I1Bxfc.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-07-13-22-09-55.bpo-34087.I1Bxfc.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-07-13-22-09-55.bpo-34087.I1Bxfc.rst b/Misc/NEWS.d/next/Core and Builtins/2018-07-13-22-09-55.bpo-34087.I1Bxfc.rst new file mode 100644 index 00000000000000..5147395fa21722 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-07-13-22-09-55.bpo-34087.I1Bxfc.rst @@ -0,0 +1 @@ +Fix buffer overflow while converting unicode to numeric values. From 938eb5e55486d3eaad298fa580047a1f0f3ca174 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 14 Jul 2018 00:22:47 +0900 Subject: [PATCH 3/4] Add one more assert --- Objects/unicodeobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 936d35c86b271c..2b06f15f6c65e9 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9080,6 +9080,7 @@ _PyUnicode_TransformDecimalAndSpaceToASCII(PyObject *unicode) } } + assert(_PyUnicode_CheckConsistency(result, 1)); return result; } From 85d976fc4dea92f7ef78de8d8b47aa3cad947e11 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 14 Jul 2018 00:34:10 +0900 Subject: [PATCH 4/4] Add tests causing assertion failure --- Lib/test/test_complex.py | 3 +++ Lib/test/test_float.py | 3 +++ Lib/test/test_long.py | 4 ++++ 3 files changed, 10 insertions(+) diff --git a/Lib/test/test_complex.py b/Lib/test/test_complex.py index 2d883c5348ea6f..21c6eaed60540c 100644 --- a/Lib/test/test_complex.py +++ b/Lib/test/test_complex.py @@ -345,6 +345,9 @@ def split_zeros(x): self.assertEqual(type(complex("1"*500)), complex) # check whitespace processing self.assertEqual(complex('\N{EM SPACE}(\N{EN SPACE}1+1j ) '), 1+1j) + # Invalid unicode string + # See bpo-34087 + self.assertRaises(ValueError, complex, '\u3053\u3093\u306b\u3061\u306f') class EvilExc(Exception): pass diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 17174dd295dfcc..06ea90c207f56c 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -60,6 +60,9 @@ def test_float(self): # extra long strings should not be a problem float(b'.' + b'1'*1000) float('.' + '1'*1000) + # Invalid unicode string + # See bpo-34087 + self.assertRaises(ValueError, float, '\u3053\u3093\u306b\u3061\u306f') def test_underscores(self): for lit in VALID_UNDERSCORE_LITERALS: diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index cc48259e35fb2c..8472889d48bade 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -373,6 +373,10 @@ def test_long(self): for base in invalid_bases: self.assertRaises(ValueError, int, '42', base) + # Invalid unicode string + # See bpo-34087 + self.assertRaises(ValueError, int, '\u3053\u3093\u306b\u3061\u306f') + def test_conversion(self):