From f256afee0d31456efbb0a3e41e0ad99fc551a0ac Mon Sep 17 00:00:00 2001 From: Ionite Date: Thu, 9 Feb 2023 20:16:33 -0500 Subject: [PATCH 01/24] Fix undefined behavior in listiter_reduce from _PyEval_GetBuiltin side effects --- Objects/listobject.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index ca6b712311340a..cdf11174e9043e 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -3443,19 +3443,26 @@ static PyObject * listiter_reduce_general(void *_it, int forward) { PyObject *list; + PyObject *iter; + PyObject *reversed; + + /* _PyEval_GetBuiltin can invoke arbitrary `__eq__` code. + * calls must be *before* access of _it pointers, + * since C/C++ parameter eval order is undefined. + * see issue #101765 */ /* the objects are not the same, index is of different types! */ if (forward) { + iter = _PyEval_GetBuiltin(&_Py_ID(iter)); _PyListIterObject *it = (_PyListIterObject *)_it; if (it->it_seq) { - return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)), - it->it_seq, it->it_index); + return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index); } } else { + reversed = _PyEval_GetBuiltin(&_Py_ID(reversed)); listreviterobject *it = (listreviterobject *)_it; if (it->it_seq) { - return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(reversed)), - it->it_seq, it->it_index); + return Py_BuildValue("N(O)n", reversed, it->it_seq, it->it_index); } } /* empty iterator, create an empty list */ From 6b4faad87e981730cde7bdbefe4d8d68f0a0ac31 Mon Sep 17 00:00:00 2001 From: Ionite Date: Thu, 9 Feb 2023 20:34:47 -0500 Subject: [PATCH 02/24] Update comment to not mention `__eq__` --- Objects/listobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index cdf11174e9043e..7353d15b23d653 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -3446,7 +3446,7 @@ listiter_reduce_general(void *_it, int forward) PyObject *iter; PyObject *reversed; - /* _PyEval_GetBuiltin can invoke arbitrary `__eq__` code. + /* _PyEval_GetBuiltin can invoke arbitrary code. * calls must be *before* access of _it pointers, * since C/C++ parameter eval order is undefined. * see issue #101765 */ From a6d6211ec7ee7f35ce693232ba64cfbf8685a4e5 Mon Sep 17 00:00:00 2001 From: Ionite Date: Thu, 9 Feb 2023 21:13:15 -0500 Subject: [PATCH 03/24] Fix undefined behavior in iter_reduce and calliter_reduce --- Objects/iterobject.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/Objects/iterobject.c b/Objects/iterobject.c index cfd6d0a7c959c9..82c138d1cf6829 100644 --- a/Objects/iterobject.c +++ b/Objects/iterobject.c @@ -102,11 +102,17 @@ PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list( static PyObject * iter_reduce(seqiterobject *it, PyObject *Py_UNUSED(ignored)) { + PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter)); + + /* _PyEval_GetBuiltin can invoke arbitrary code. + * calls must be *before* access of `it` pointers, + * since C/C++ parameter eval order is undefined. + * see issue #101765 */ + if (it->it_seq != NULL) - return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)), - it->it_seq, it->it_index); + return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index); else - return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter))); + return Py_BuildValue("N(())", iter); } PyDoc_STRVAR(reduce_doc, "Return state information for pickling."); @@ -239,11 +245,17 @@ calliter_iternext(calliterobject *it) static PyObject * calliter_reduce(calliterobject *it, PyObject *Py_UNUSED(ignored)) { + PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter)); + + /* _PyEval_GetBuiltin can invoke arbitrary code. + * calls must be *before* access of `it` pointers, + * since C/C++ parameter eval order is undefined. + * see issue #101765 */ + if (it->it_callable != NULL && it->it_sentinel != NULL) - return Py_BuildValue("N(OO)", _PyEval_GetBuiltin(&_Py_ID(iter)), - it->it_callable, it->it_sentinel); + return Py_BuildValue("N(OO)", iter, it->it_callable, it->it_sentinel); else - return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter))); + return Py_BuildValue("N(())", iter); } static PyMethodDef calliter_methods[] = { From 4ccf427e9730249f9c9e79cd5a42f205b86ba914 Mon Sep 17 00:00:00 2001 From: Ionite Date: Thu, 9 Feb 2023 21:14:37 -0500 Subject: [PATCH 04/24] Update listiter_reduce_general comment --- Objects/listobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 7353d15b23d653..48483f95ba86e9 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -3447,7 +3447,7 @@ listiter_reduce_general(void *_it, int forward) PyObject *reversed; /* _PyEval_GetBuiltin can invoke arbitrary code. - * calls must be *before* access of _it pointers, + * calls must be *before* access of `_it` pointers, * since C/C++ parameter eval order is undefined. * see issue #101765 */ From c2c9cfb66dd2eca788e63b07e65d29652e5d9877 Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 10 Feb 2023 00:13:49 -0500 Subject: [PATCH 05/24] Fix undefined behavior in bytearrayiter_reduce from _PyEval_GetBuiltin side effects --- Objects/bytearrayobject.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 072089e39aa207..36601f7ccb64b9 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -2391,11 +2391,17 @@ PyDoc_STRVAR(length_hint_doc, static PyObject * bytearrayiter_reduce(bytesiterobject *it, PyObject *Py_UNUSED(ignored)) { + PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter)); + + /* _PyEval_GetBuiltin can invoke arbitrary code. + * calls must be *before* access of `it` pointers, + * since C/C++ parameter eval order is undefined. + * see issue #101765 */ + if (it->it_seq != NULL) { - return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)), - it->it_seq, it->it_index); + return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index); } else { - return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter))); + return Py_BuildValue("N(())", iter); } } From e2989d938497ef4c1f4b9b9f68ac064e65c8c31a Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 10 Feb 2023 00:26:13 -0500 Subject: [PATCH 06/24] Fix undefined behavior in striter_reduce from _PyEval_GetBuiltin side effects --- Objects/bytesobject.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index ba2c2e978c6e42..867004f89b11b3 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -3169,11 +3169,17 @@ PyDoc_STRVAR(length_hint_doc, static PyObject * striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored)) { + PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter)); + + /* _PyEval_GetBuiltin can invoke arbitrary code. + * calls must be *before* access of `it` pointers, + * since C/C++ parameter eval order is undefined. + * see issue #101765 */ + if (it->it_seq != NULL) { - return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)), - it->it_seq, it->it_index); + return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index); } else { - return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter))); + return Py_BuildValue("N(())", iter); } } From 71960a887a9c0c6b6ac0fb18d0abb7633e327d06 Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 10 Feb 2023 00:32:14 -0500 Subject: [PATCH 07/24] Fix undefined behavior in tupleiter_reduce from _PyEval_GetBuiltin side effects --- Objects/tupleobject.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index e1b9953226c0d7..615ffae6c9556e 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -1048,11 +1048,17 @@ PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list( static PyObject * tupleiter_reduce(_PyTupleIterObject *it, PyObject *Py_UNUSED(ignored)) { + PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter)); + + /* _PyEval_GetBuiltin can invoke arbitrary code. + * calls must be *before* access of `it` pointers, + * since C/C++ parameter eval order is undefined. + * see issue #101765 */ + if (it->it_seq) - return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)), - it->it_seq, it->it_index); + return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index); else - return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter))); + return Py_BuildValue("N(())", iter); } static PyObject * From efa0540d36084024276261da7e4535008a519582 Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 10 Feb 2023 01:04:39 -0500 Subject: [PATCH 08/24] Move iter call in unicodeiter_reduce before `it` pointer access due to potential side effects --- Objects/unicodeobject.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f0c7aa7707fdb5..d9e95f0cc0173f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14784,14 +14784,15 @@ PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list( static PyObject * unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored)) { + PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter)); + if (it->it_seq != NULL) { - return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)), - it->it_seq, it->it_index); + return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index); } else { PyObject *u = unicode_new_empty(); if (u == NULL) return NULL; - return Py_BuildValue("N(N)", _PyEval_GetBuiltin(&_Py_ID(iter)), u); + return Py_BuildValue("N(N)", iter, u); } } From c5abb14eb2750bd316abb5e9ed8e286c3f5aca8c Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 10 Feb 2023 01:38:07 -0500 Subject: [PATCH 09/24] Add iter reduce tests for issue #101765 --- Lib/test/test_iter.py | 52 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/Lib/test/test_iter.py b/Lib/test/test_iter.py index acbdcb5f302060..965be159b477f4 100644 --- a/Lib/test/test_iter.py +++ b/Lib/test/test_iter.py @@ -91,6 +91,12 @@ def __call__(self): raise IndexError # Emergency stop return i +class EmptyIterClass: + def __len__(self): + return 0 + def __getitem__(self, i): + raise StopIteration + # Main test suite class TestCase(unittest.TestCase): @@ -238,6 +244,52 @@ def test_mutating_seq_class_exhausted_iter(self): self.assertEqual(list(empit), [5, 6]) self.assertEqual(list(a), [0, 1, 2, 3, 4, 5, 6]) + def test_reduce_mutating_builtins_iter(self): + # This is a reproducer of issue #101765 + # where iter `__reduce__` calls could lead to a segfault or SystemError + # depending on the order of C argument evaluation, which is undefined + + # Backup builtins.iter + builtins = __builtins__.__dict__ if hasattr(__builtins__, "__dict__") else __builtins__ + orig_iter = builtins["iter"] + + def run(item): + (fn, *flag), *initializer = item + it = iter(fn(*initializer), *flag) + + class CustomStr: + def __hash__(self): + return hash("iter") + def __eq__(self, other): + # Here we exhaust `it`, possibly changing our `it_seq` pointer to NULL + # The `__reduce__` call should correctly get the pointers after this call + list(it) + return other == "iter" + + _iter = builtins["iter"] + del builtins["iter"] + builtins[CustomStr()] = _iter + + return it.__reduce__() + + types = [ + ([EmptyIterClass],), + ([bytes], 8), + ([bytearray], 8), + ([tuple], range(8)), + ([lambda: (lambda: 0), 0],) + ] + + try: + self.assertEqual(run(([str], "xyz")), (orig_iter, ("xyz",), 0)) + self.assertEqual(run(([list], range(8))), (orig_iter, ([],))) + for case in types: + self.assertEqual(run(case), (orig_iter, ((),))) + finally: + # Restore original iter + del builtins["iter"] + builtins["iter"] = orig_iter + # Test a new_style class with __iter__ but no next() method def test_new_style_iter_class(self): class IterClass(object): From 45522c6dd8f6d39b3aeb05015371e31a88168d29 Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 10 Feb 2023 01:41:21 -0500 Subject: [PATCH 10/24] Remove C++ reference in comments --- Objects/bytearrayobject.c | 2 +- Objects/bytesobject.c | 2 +- Objects/iterobject.c | 2 +- Objects/listobject.c | 2 +- Objects/tupleobject.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 36601f7ccb64b9..707f1f8a4a3a7d 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -2395,7 +2395,7 @@ bytearrayiter_reduce(bytesiterobject *it, PyObject *Py_UNUSED(ignored)) /* _PyEval_GetBuiltin can invoke arbitrary code. * calls must be *before* access of `it` pointers, - * since C/C++ parameter eval order is undefined. + * since C parameter eval order is undefined. * see issue #101765 */ if (it->it_seq != NULL) { diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 867004f89b11b3..75f3a5d44cf705 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -3173,7 +3173,7 @@ striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored)) /* _PyEval_GetBuiltin can invoke arbitrary code. * calls must be *before* access of `it` pointers, - * since C/C++ parameter eval order is undefined. + * since C parameter eval order is undefined. * see issue #101765 */ if (it->it_seq != NULL) { diff --git a/Objects/iterobject.c b/Objects/iterobject.c index 82c138d1cf6829..3159e0786ebd3f 100644 --- a/Objects/iterobject.c +++ b/Objects/iterobject.c @@ -106,7 +106,7 @@ iter_reduce(seqiterobject *it, PyObject *Py_UNUSED(ignored)) /* _PyEval_GetBuiltin can invoke arbitrary code. * calls must be *before* access of `it` pointers, - * since C/C++ parameter eval order is undefined. + * since C parameter eval order is undefined. * see issue #101765 */ if (it->it_seq != NULL) diff --git a/Objects/listobject.c b/Objects/listobject.c index 48483f95ba86e9..80df09ebb11661 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -3448,7 +3448,7 @@ listiter_reduce_general(void *_it, int forward) /* _PyEval_GetBuiltin can invoke arbitrary code. * calls must be *before* access of `_it` pointers, - * since C/C++ parameter eval order is undefined. + * since C parameter eval order is undefined. * see issue #101765 */ /* the objects are not the same, index is of different types! */ diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 615ffae6c9556e..87e207205348bd 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -1052,7 +1052,7 @@ tupleiter_reduce(_PyTupleIterObject *it, PyObject *Py_UNUSED(ignored)) /* _PyEval_GetBuiltin can invoke arbitrary code. * calls must be *before* access of `it` pointers, - * since C/C++ parameter eval order is undefined. + * since C parameter eval order is undefined. * see issue #101765 */ if (it->it_seq) From 4f5fc19c1e42721e4b70f788c571b75e836512a7 Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 10 Feb 2023 01:42:46 -0500 Subject: [PATCH 11/24] Remove C++ reference in comments --- Objects/iterobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/iterobject.c b/Objects/iterobject.c index 3159e0786ebd3f..b50efc21aa2f8c 100644 --- a/Objects/iterobject.c +++ b/Objects/iterobject.c @@ -249,7 +249,7 @@ calliter_reduce(calliterobject *it, PyObject *Py_UNUSED(ignored)) /* _PyEval_GetBuiltin can invoke arbitrary code. * calls must be *before* access of `it` pointers, - * since C/C++ parameter eval order is undefined. + * since C parameter eval order is undefined. * see issue #101765 */ if (it->it_callable != NULL && it->it_sentinel != NULL) From 049a8dda075a4b44cc7bd6997b2c42b70fead05e Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 10 Feb 2023 01:44:43 -0500 Subject: [PATCH 12/24] Move builtin declarations inside if blocks --- Objects/listobject.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 80df09ebb11661..579c30ba9570e7 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -3443,8 +3443,6 @@ static PyObject * listiter_reduce_general(void *_it, int forward) { PyObject *list; - PyObject *iter; - PyObject *reversed; /* _PyEval_GetBuiltin can invoke arbitrary code. * calls must be *before* access of `_it` pointers, @@ -3453,13 +3451,13 @@ listiter_reduce_general(void *_it, int forward) /* the objects are not the same, index is of different types! */ if (forward) { - iter = _PyEval_GetBuiltin(&_Py_ID(iter)); + PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter)); _PyListIterObject *it = (_PyListIterObject *)_it; if (it->it_seq) { return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index); } } else { - reversed = _PyEval_GetBuiltin(&_Py_ID(reversed)); + PyObject *reversed = _PyEval_GetBuiltin(&_Py_ID(reversed)); listreviterobject *it = (listreviterobject *)_it; if (it->it_seq) { return Py_BuildValue("N(O)n", reversed, it->it_seq, it->it_index); From ef4f955946cee3d4e3d9520877cd90ebb9b7e705 Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 10 Feb 2023 02:07:41 -0500 Subject: [PATCH 13/24] Move _PyEval_GetBuiltin before gi checks, add gi NULL check in ga_iter_reduce --- Objects/genericaliasobject.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Objects/genericaliasobject.c b/Objects/genericaliasobject.c index 675fd496eefdaf..8971746d4a845a 100644 --- a/Objects/genericaliasobject.c +++ b/Objects/genericaliasobject.c @@ -877,8 +877,18 @@ ga_iter_clear(PyObject *self) { static PyObject * ga_iter_reduce(PyObject *self, PyObject *Py_UNUSED(ignored)) { + PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter)); gaiterobject *gi = (gaiterobject *)self; - return Py_BuildValue("N(O)", _PyEval_GetBuiltin(&_Py_ID(iter)), gi->obj); + + /* _PyEval_GetBuiltin can invoke arbitrary code. + * call must be *before* access of `gi` pointers, + * since C parameter eval order is undefined. + * see issue #101765 */ + + if (gi->obj) + return Py_BuildValue("N(O)", iter, gi->obj); + else + return Py_BuildValue("N(())", iter); } static PyMethodDef ga_iter_methods[] = { From 7d4afb0999233e8a54f2cb215c4f53656b861667 Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 10 Feb 2023 02:14:25 -0500 Subject: [PATCH 14/24] Update iter reduce mutating tests for generic alias --- Lib/test/test_iter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_iter.py b/Lib/test/test_iter.py index 965be159b477f4..e73cd42dc8d4f0 100644 --- a/Lib/test/test_iter.py +++ b/Lib/test/test_iter.py @@ -266,9 +266,8 @@ def __eq__(self, other): list(it) return other == "iter" - _iter = builtins["iter"] del builtins["iter"] - builtins[CustomStr()] = _iter + builtins[CustomStr()] = orig_iter return it.__reduce__() @@ -277,11 +276,12 @@ def __eq__(self, other): ([bytes], 8), ([bytearray], 8), ([tuple], range(8)), - ([lambda: (lambda: 0), 0],) + ([lambda: (lambda: 0), 0],), + ([tuple[int]],) # GenericAlias ] try: - self.assertEqual(run(([str], "xyz")), (orig_iter, ("xyz",), 0)) + self.assertEqual(run(([str], "xyz")), (orig_iter, ("",))) self.assertEqual(run(([list], range(8))), (orig_iter, ([],))) for case in types: self.assertEqual(run(case), (orig_iter, ((),))) From 8e4418daaf0763b98d6290b06410affa9a51e26c Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 10 Feb 2023 07:21:48 +0000 Subject: [PATCH 15/24] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst b/Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst new file mode 100644 index 00000000000000..d8254dfa10acb4 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst @@ -0,0 +1 @@ +Fix SystemError / segmentation fault in iter `__reduce__` when calling `__builtins__.__dict__["iter"]` mutates the iter object. From d8ced8eba34990775637a4fa390122db24529c5b Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 10 Feb 2023 02:25:47 -0500 Subject: [PATCH 16/24] Fix backticks format for news --- .../2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst b/Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst index d8254dfa10acb4..8eac13d92770c6 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst @@ -1 +1 @@ -Fix SystemError / segmentation fault in iter `__reduce__` when calling `__builtins__.__dict__["iter"]` mutates the iter object. +Fix SystemError / segmentation fault in iter ``__reduce__`` when calling ``__builtins__.__dict__["iter"]`` mutates the iter object. From 178b8ea41bb045eed63d06674e2f337d4de97bef Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 10 Feb 2023 12:46:37 -0500 Subject: [PATCH 17/24] Refactor iter reduce builtins mutation tests - Added some comments for dict del usages - Switched to `__builtin__` instead of conditional `__dict__` access - Use kwargs for improved readability --- Lib/test/test_iter.py | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_iter.py b/Lib/test/test_iter.py index e73cd42dc8d4f0..3ff2d39083ef13 100644 --- a/Lib/test/test_iter.py +++ b/Lib/test/test_iter.py @@ -250,12 +250,14 @@ def test_reduce_mutating_builtins_iter(self): # depending on the order of C argument evaluation, which is undefined # Backup builtins.iter - builtins = __builtins__.__dict__ if hasattr(__builtins__, "__dict__") else __builtins__ + builtins = __builtins__ orig_iter = builtins["iter"] - def run(item): - (fn, *flag), *initializer = item - it = iter(fn(*initializer), *flag) + def run(item, init=None, sentinel=None): + if init is not None: + args = init if isinstance(init, tuple) else (init,) + item = item(*args) + it = iter(item) if sentinel is None else iter(item, sentinel) class CustomStr: def __hash__(self): @@ -266,28 +268,35 @@ def __eq__(self, other): list(it) return other == "iter" + # del is required here + # to avoid calling the last test case's custom __eq__ del builtins["iter"] builtins[CustomStr()] = orig_iter return it.__reduce__() types = [ - ([EmptyIterClass],), - ([bytes], 8), - ([bytearray], 8), - ([tuple], range(8)), - ([lambda: (lambda: 0), 0],), - ([tuple[int]],) # GenericAlias + (EmptyIterClass, ()), + (bytes, 8), + (bytearray, 8), + (tuple, range(8)), + (lambda: 0, None, 0), + (tuple[int],) # GenericAlias ] try: - self.assertEqual(run(([str], "xyz")), (orig_iter, ("",))) - self.assertEqual(run(([list], range(8))), (orig_iter, ([],))) + # The returned value of `__reduce__` should not only be valid + # but also *empty*, as `it` was exhausted during `__eq__` + # i.e "xyz" returns (iter, ("",)) + self.assertEqual(run(str, "xyz"), (orig_iter, ("",))) + self.assertEqual(run(list, range(8)), (orig_iter, ([],))) for case in types: - self.assertEqual(run(case), (orig_iter, ((),))) + self.assertEqual(run(*case), (orig_iter, ((),))) finally: - # Restore original iter + # del is required here + # to avoid calling our custom __eq__ del builtins["iter"] + # Restore original iter builtins["iter"] = orig_iter # Test a new_style class with __iter__ but no next() method From 49ba8c3bfc09e026970bc4fa842c466dcee7d0f5 Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 10 Feb 2023 13:28:46 -0500 Subject: [PATCH 18/24] Update iter mutating builtins test to include reversed iterator for list conditional branch --- Lib/test/test_iter.py | 54 ++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_iter.py b/Lib/test/test_iter.py index 3ff2d39083ef13..a3861cfbf2f5e2 100644 --- a/Lib/test/test_iter.py +++ b/Lib/test/test_iter.py @@ -7,6 +7,8 @@ from test.support import check_free_after_iterating, ALWAYS_EQ, NEVER_EQ import pickle import collections.abc +import functools +import contextlib # Test result of triple loop (too big to inline) TRIPLETS = [(0, 0, 0), (0, 0, 1), (0, 0, 2), @@ -249,29 +251,34 @@ def test_reduce_mutating_builtins_iter(self): # where iter `__reduce__` calls could lead to a segfault or SystemError # depending on the order of C argument evaluation, which is undefined - # Backup builtins.iter + # Backup builtins builtins = __builtins__ - orig_iter = builtins["iter"] + orig = {"iter": iter, "reversed": reversed} - def run(item, init=None, sentinel=None): + def run(builtin_name, item, init=None, sentinel=None): if init is not None: args = init if isinstance(init, tuple) else (init,) item = item(*args) it = iter(item) if sentinel is None else iter(item, sentinel) class CustomStr: + def __init__(self, name, iterator): + self.name = name + self.iterator = iterator def __hash__(self): - return hash("iter") + return hash(self.name) def __eq__(self, other): - # Here we exhaust `it`, possibly changing our `it_seq` pointer to NULL - # The `__reduce__` call should correctly get the pointers after this call - list(it) - return other == "iter" + # Here we exhaust our iterator, possibly changing + # its `it_seq` pointer to NULL + # The `__reduce__` call should correctly get + # the pointers after this call + list(self.iterator) + return other == self.name # del is required here # to avoid calling the last test case's custom __eq__ - del builtins["iter"] - builtins[CustomStr()] = orig_iter + del builtins[builtin_name] + builtins[CustomStr(builtin_name, it)] = orig[builtin_name] return it.__reduce__() @@ -285,19 +292,34 @@ def __eq__(self, other): ] try: + run_iter = functools.partial(run, "iter") # The returned value of `__reduce__` should not only be valid # but also *empty*, as `it` was exhausted during `__eq__` # i.e "xyz" returns (iter, ("",)) - self.assertEqual(run(str, "xyz"), (orig_iter, ("",))) - self.assertEqual(run(list, range(8)), (orig_iter, ([],))) + self.assertEqual(run_iter(str, "xyz"), (orig["iter"], ("",))) + self.assertEqual(run_iter(list, range(8)), (orig["iter"], ([],))) + + # _PyEval_GetBuiltin is also called for `reversed` in a branch of + # listiter_reduce_general + self.assertEqual( + run("reversed", orig["reversed"], list(range(8))), + (iter, ([],)) + ) + for case in types: - self.assertEqual(run(*case), (orig_iter, ((),))) + self.assertEqual(run_iter(*case), (orig["iter"], ((),))) finally: # del is required here # to avoid calling our custom __eq__ - del builtins["iter"] - # Restore original iter - builtins["iter"] = orig_iter + # we also need to supress KeyErrors in case + # a failed test deletes the key without setting anything + with contextlib.suppress(KeyError): + del builtins["iter"] + with contextlib.suppress(KeyError): + del builtins["reversed"] + # Restore original builtins + for key, func in orig.items(): + builtins[key] = func # Test a new_style class with __iter__ but no next() method def test_new_style_iter_class(self): From 93854e172e0b4c7cb3b999bdec6c97da74853a90 Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 10 Feb 2023 13:32:46 -0500 Subject: [PATCH 19/24] Add comment in unicodeiter_reduce for moving iter call before it pointers --- Objects/unicodeobject.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d9e95f0cc0173f..168b9172489325 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14785,6 +14785,11 @@ static PyObject * unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored)) { PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter)); + + /* _PyEval_GetBuiltin can invoke arbitrary code. + * calls must be *before* access of `it` pointers, + * since C parameter eval order is undefined. + * see issue #101765 */ if (it->it_seq != NULL) { return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index); From 98ec3c63a973d06e784f8a04bcb67d8c324808ec Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 10 Feb 2023 13:42:31 -0500 Subject: [PATCH 20/24] Change test `__builtins__` to builtins import --- Lib/test/test_iter.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_iter.py b/Lib/test/test_iter.py index a3861cfbf2f5e2..abef7329407b9e 100644 --- a/Lib/test/test_iter.py +++ b/Lib/test/test_iter.py @@ -9,6 +9,7 @@ import collections.abc import functools import contextlib +import builtins # Test result of triple loop (too big to inline) TRIPLETS = [(0, 0, 0), (0, 0, 1), (0, 0, 2), @@ -252,7 +253,7 @@ def test_reduce_mutating_builtins_iter(self): # depending on the order of C argument evaluation, which is undefined # Backup builtins - builtins = __builtins__ + builtins_dict = builtins.__dict__ orig = {"iter": iter, "reversed": reversed} def run(builtin_name, item, init=None, sentinel=None): @@ -277,8 +278,8 @@ def __eq__(self, other): # del is required here # to avoid calling the last test case's custom __eq__ - del builtins[builtin_name] - builtins[CustomStr(builtin_name, it)] = orig[builtin_name] + del builtins_dict[builtin_name] + builtins_dict[CustomStr(builtin_name, it)] = orig[builtin_name] return it.__reduce__() @@ -314,12 +315,12 @@ def __eq__(self, other): # we also need to supress KeyErrors in case # a failed test deletes the key without setting anything with contextlib.suppress(KeyError): - del builtins["iter"] + del builtins_dict["iter"] with contextlib.suppress(KeyError): - del builtins["reversed"] + del builtins_dict["reversed"] # Restore original builtins for key, func in orig.items(): - builtins[key] = func + builtins_dict[key] = func # Test a new_style class with __iter__ but no next() method def test_new_style_iter_class(self): From e6614951574c699a926be5eea833c0703d96205a Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 10 Feb 2023 15:22:06 -0500 Subject: [PATCH 21/24] Change NEWS blurb phrasing --- .../2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst b/Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst index 8eac13d92770c6..cc99779a944ec6 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst @@ -1 +1 @@ -Fix SystemError / segmentation fault in iter ``__reduce__`` when calling ``__builtins__.__dict__["iter"]`` mutates the iter object. +Fix SystemError / segmentation fault in iter ``__reduce__`` when internal access of ``builtins.__dict__`` keys mutates the iter object. From 19ab9c63b275eaa3b6e10e04d5eec642083f233f Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 10 Feb 2023 15:34:55 -0500 Subject: [PATCH 22/24] Update iter reduce mutating builtins test comments and simplify logic --- Lib/test/test_iter.py | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/Lib/test/test_iter.py b/Lib/test/test_iter.py index abef7329407b9e..ea059aa541c2c5 100644 --- a/Lib/test/test_iter.py +++ b/Lib/test/test_iter.py @@ -256,10 +256,7 @@ def test_reduce_mutating_builtins_iter(self): builtins_dict = builtins.__dict__ orig = {"iter": iter, "reversed": reversed} - def run(builtin_name, item, init=None, sentinel=None): - if init is not None: - args = init if isinstance(init, tuple) else (init,) - item = item(*args) + def run(builtin_name, item, sentinel=None): it = iter(item) if sentinel is None else iter(item, sentinel) class CustomStr: @@ -277,18 +274,19 @@ def __eq__(self, other): return other == self.name # del is required here - # to avoid calling the last test case's custom __eq__ + # since only setting will result in + # both keys existing with a hash collision del builtins_dict[builtin_name] builtins_dict[CustomStr(builtin_name, it)] = orig[builtin_name] return it.__reduce__() types = [ - (EmptyIterClass, ()), - (bytes, 8), - (bytearray, 8), - (tuple, range(8)), - (lambda: 0, None, 0), + (EmptyIterClass(),), + (bytes(8),), + (bytearray(8),), + ((1, 2, 3),), + (lambda: 0, 0), (tuple[int],) # GenericAlias ] @@ -297,29 +295,25 @@ def __eq__(self, other): # The returned value of `__reduce__` should not only be valid # but also *empty*, as `it` was exhausted during `__eq__` # i.e "xyz" returns (iter, ("",)) - self.assertEqual(run_iter(str, "xyz"), (orig["iter"], ("",))) - self.assertEqual(run_iter(list, range(8)), (orig["iter"], ([],))) + self.assertEqual(run_iter("xyz"), (orig["iter"], ("",))) + self.assertEqual(run_iter([1, 2, 3]), (orig["iter"], ([],))) # _PyEval_GetBuiltin is also called for `reversed` in a branch of # listiter_reduce_general self.assertEqual( - run("reversed", orig["reversed"], list(range(8))), + run("reversed", orig["reversed"](list(range(8)))), (iter, ([],)) ) for case in types: self.assertEqual(run_iter(*case), (orig["iter"], ((),))) finally: - # del is required here - # to avoid calling our custom __eq__ - # we also need to supress KeyErrors in case - # a failed test deletes the key without setting anything - with contextlib.suppress(KeyError): - del builtins_dict["iter"] - with contextlib.suppress(KeyError): - del builtins_dict["reversed"] # Restore original builtins + # need to suppress KeyErrors in case + # a failed test deletes the key without setting anything for key, func in orig.items(): + with contextlib.suppress(KeyError): + del builtins_dict[key] builtins_dict[key] = func # Test a new_style class with __iter__ but no next() method From 9b664c225c44e14870b83c59a003d7a879a99831 Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 24 Feb 2023 14:42:53 -0500 Subject: [PATCH 23/24] Update comments to better reflect issue --- Objects/bytearrayobject.c | 5 ++--- Objects/bytesobject.c | 5 ++--- Objects/genericaliasobject.c | 5 ++--- Objects/iterobject.c | 10 ++++------ Objects/listobject.c | 5 ++--- Objects/tupleobject.c | 5 ++--- Objects/unicodeobject.c | 7 +++---- 7 files changed, 17 insertions(+), 25 deletions(-) diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 707f1f8a4a3a7d..49d4dd524696a5 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -2393,9 +2393,8 @@ bytearrayiter_reduce(bytesiterobject *it, PyObject *Py_UNUSED(ignored)) { PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter)); - /* _PyEval_GetBuiltin can invoke arbitrary code. - * calls must be *before* access of `it` pointers, - * since C parameter eval order is undefined. + /* _PyEval_GetBuiltin can invoke arbitrary code, + * call must be before access of iterator pointers. * see issue #101765 */ if (it->it_seq != NULL) { diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 75f3a5d44cf705..657443f31fa709 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -3171,9 +3171,8 @@ striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored)) { PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter)); - /* _PyEval_GetBuiltin can invoke arbitrary code. - * calls must be *before* access of `it` pointers, - * since C parameter eval order is undefined. + /* _PyEval_GetBuiltin can invoke arbitrary code, + * call must be before access of iterator pointers. * see issue #101765 */ if (it->it_seq != NULL) { diff --git a/Objects/genericaliasobject.c b/Objects/genericaliasobject.c index 8971746d4a845a..888cb16edd1b46 100644 --- a/Objects/genericaliasobject.c +++ b/Objects/genericaliasobject.c @@ -880,9 +880,8 @@ ga_iter_reduce(PyObject *self, PyObject *Py_UNUSED(ignored)) PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter)); gaiterobject *gi = (gaiterobject *)self; - /* _PyEval_GetBuiltin can invoke arbitrary code. - * call must be *before* access of `gi` pointers, - * since C parameter eval order is undefined. + /* _PyEval_GetBuiltin can invoke arbitrary code, + * call must be before access of iterator pointers. * see issue #101765 */ if (gi->obj) diff --git a/Objects/iterobject.c b/Objects/iterobject.c index b50efc21aa2f8c..149b701b68e91a 100644 --- a/Objects/iterobject.c +++ b/Objects/iterobject.c @@ -104,9 +104,8 @@ iter_reduce(seqiterobject *it, PyObject *Py_UNUSED(ignored)) { PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter)); - /* _PyEval_GetBuiltin can invoke arbitrary code. - * calls must be *before* access of `it` pointers, - * since C parameter eval order is undefined. + /* _PyEval_GetBuiltin can invoke arbitrary code, + * call must be before access of iterator pointers. * see issue #101765 */ if (it->it_seq != NULL) @@ -247,9 +246,8 @@ calliter_reduce(calliterobject *it, PyObject *Py_UNUSED(ignored)) { PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter)); - /* _PyEval_GetBuiltin can invoke arbitrary code. - * calls must be *before* access of `it` pointers, - * since C parameter eval order is undefined. + /* _PyEval_GetBuiltin can invoke arbitrary code, + * call must be before access of iterator pointers. * see issue #101765 */ if (it->it_callable != NULL && it->it_sentinel != NULL) diff --git a/Objects/listobject.c b/Objects/listobject.c index 579c30ba9570e7..494b40178f9f27 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -3444,9 +3444,8 @@ listiter_reduce_general(void *_it, int forward) { PyObject *list; - /* _PyEval_GetBuiltin can invoke arbitrary code. - * calls must be *before* access of `_it` pointers, - * since C parameter eval order is undefined. + /* _PyEval_GetBuiltin can invoke arbitrary code, + * call must be before access of iterator pointers. * see issue #101765 */ /* the objects are not the same, index is of different types! */ diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 87e207205348bd..f5a0e66c549bf9 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -1050,9 +1050,8 @@ tupleiter_reduce(_PyTupleIterObject *it, PyObject *Py_UNUSED(ignored)) { PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter)); - /* _PyEval_GetBuiltin can invoke arbitrary code. - * calls must be *before* access of `it` pointers, - * since C parameter eval order is undefined. + /* _PyEval_GetBuiltin can invoke arbitrary code, + * call must be before access of iterator pointers. * see issue #101765 */ if (it->it_seq) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 168b9172489325..6403e359c70714 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14785,10 +14785,9 @@ static PyObject * unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored)) { PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter)); - - /* _PyEval_GetBuiltin can invoke arbitrary code. - * calls must be *before* access of `it` pointers, - * since C parameter eval order is undefined. + + /* _PyEval_GetBuiltin can invoke arbitrary code, + * call must be before access of iterator pointers. * see issue #101765 */ if (it->it_seq != NULL) { From c67b11ac520047e63e0ca9fcd5d13cba96491f3f Mon Sep 17 00:00:00 2001 From: Ionite Date: Fri, 24 Feb 2023 14:50:32 -0500 Subject: [PATCH 24/24] Clarify test comments --- Lib/test/test_iter.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_iter.py b/Lib/test/test_iter.py index ea059aa541c2c5..6ab9b7a7230309 100644 --- a/Lib/test/test_iter.py +++ b/Lib/test/test_iter.py @@ -274,8 +274,8 @@ def __eq__(self, other): return other == self.name # del is required here - # since only setting will result in - # both keys existing with a hash collision + # to not prematurely call __eq__ from + # the hash collision with the old key del builtins_dict[builtin_name] builtins_dict[CustomStr(builtin_name, it)] = orig[builtin_name] @@ -309,10 +309,13 @@ def __eq__(self, other): self.assertEqual(run_iter(*case), (orig["iter"], ((),))) finally: # Restore original builtins - # need to suppress KeyErrors in case - # a failed test deletes the key without setting anything for key, func in orig.items(): + # need to suppress KeyErrors in case + # a failed test deletes the key without setting anything with contextlib.suppress(KeyError): + # del is required here + # to not invoke our custom __eq__ from + # the hash collision with the old key del builtins_dict[key] builtins_dict[key] = func