From 3c7050e9a72ff1848e3d1c5bbc8fe751cc91c206 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 17 Jul 2024 11:46:37 +0200 Subject: [PATCH 01/18] Immortalize implicit arguments; use static sting for ".0" --- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 4 ++++ Python/symtable.c | 17 ++++++++++++++--- 5 files changed, 21 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index d9b46df507dfd7..bf750050dfa74b 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -556,6 +556,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(dbl_percent)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(defaults)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(dot_locals)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(dot_zero)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(empty)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(format)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(generic_base)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 10773d7a6c7e3f..9e287ca1cb5ed6 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -42,6 +42,7 @@ struct _Py_global_strings { STRUCT_FOR_STR(dbl_percent, "%%") STRUCT_FOR_STR(defaults, ".defaults") STRUCT_FOR_STR(dot_locals, ".") + STRUCT_FOR_STR(dot_zero, ".0") STRUCT_FOR_STR(empty, "") STRUCT_FOR_STR(format, ".format") STRUCT_FOR_STR(generic_base, ".generic_base") diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 618f8d0a36b6c3..400737150eff94 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -551,6 +551,7 @@ extern "C" { INIT_STR(dbl_percent, "%%"), \ INIT_STR(defaults, ".defaults"), \ INIT_STR(dot_locals, "."), \ + INIT_STR(dot_zero, ".0"), \ INIT_STR(empty, ""), \ INIT_STR(format, ".format"), \ INIT_STR(generic_base, ".generic_base"), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index f848a002c3b5d1..f7b144e7e8178b 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -2896,6 +2896,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(dot_zero); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_STR(dot_locals); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Python/symtable.c b/Python/symtable.c index c4508cac7f5928..e75606e01afda8 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -2505,9 +2505,20 @@ symtable_visit_pattern(struct symtable *st, pattern_ty p) static int symtable_implicit_arg(struct symtable *st, int pos) { - PyObject *id = PyUnicode_FromFormat(".%d", pos); - if (id == NULL) - return 0; + PyObject *id; + if (pos == 0) { + // In most cases there is only one implicit arg, `.0` + _Py_DECLARE_STR(dot_zero, ".0"); + id = &_Py_STR(dot_zero); + } + else { + PyObject *id = PyUnicode_FromFormat(".%d", pos); + if (id == NULL) { + return 0; + } + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_InternImmortal(interp, &id); + } if (!symtable_add_def(st, id, DEF_PARAM, st->st_cur->ste_loc)) { Py_DECREF(id); return 0; From 2ebe868e6c0dcd84769be4842ac3568c54a705e3 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 17 Jul 2024 11:49:26 +0200 Subject: [PATCH 02/18] Intern mangled identifiers after creation --- Python/symtable.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/symtable.c b/Python/symtable.c index e75606e01afda8..f00858b57a8581 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -3040,5 +3040,7 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident) return NULL; } assert(_PyUnicode_CheckConsistency(result, 1)); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_InternMortal(interp, &result); return result; } From 7ba01d59a7f64450d950a210e62278d69497595d Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 17 Jul 2024 15:33:59 +0200 Subject: [PATCH 03/18] Create new tuples with _PyCode_New --- Objects/codeobject.c | 258 ++++++++++++++++++++++++++----------------- 1 file changed, 158 insertions(+), 100 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index d45ba5ed4a9c06..8ba70ff36ab4de 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -134,74 +134,137 @@ should_intern_string(PyObject *o) static PyObject *intern_one_constant(PyObject *op); #endif -static int -intern_strings(PyObject *tuple) +/* Helper for intern_names and intern_constants, which create a new tuple + * as a copy of `orig_tuple`, with some elements possibly changed. + * But we optimize for cases where no change, and no copying, is necessary. + * + * This helper sets the i'th element to `new_item`. + * `new_item` is stolen. + * Return 0 on success; -1 with exception set on error. + * + * Error pass-through: If `new_item` is NULL, an exception should be set; + * return -1 with that exception still set. + * + * Until a copy is necessary, `*new_tuple_p` should be NULL. + * This function will initialize it to a new reference when needed. + * + * If `*new_tuple_p` remains NULL after all calls to _set_newtuple_item, + * the final result should be a new reference to `orig_tuple`. + */ +static inline int +_set_newtuple_item( + PyObject *orig_tuple, + PyObject **new_tuple_p /* NULL or owned by caller */, + Py_ssize_t i, + PyObject *new_item /* Stolen. If NULL, pass error through. */) +{ + if (new_item == NULL) { + return -1; + } + if (new_item == PyTuple_GET_ITEM(orig_tuple, i)) { + Py_DECREF(new_item); + return 0; + } + PyObject *new_tuple = *new_tuple_p; + if (!new_tuple) { + Py_ssize_t size = PyTuple_GET_SIZE(orig_tuple); + new_tuple = PyTuple_New(size); + if (!new_tuple) { + Py_DECREF(new_item); + return -1; + } + for (Py_ssize_t i = size; --i >= 0; ) { + _PyTuple_ITEMS(new_tuple)[i] = Py_NewRef( + _PyTuple_ITEMS(orig_tuple)[i]); + } + *new_tuple_p = new_tuple; + } + Py_SETREF(_PyTuple_ITEMS(new_tuple)[i], new_item); + return 0; +} + +/* Intern, and immortalize, a tuple of strings. + Conceptually: return a new tuple. + (If no changes are needed, return a new ref to the argument.) + */ +static PyObject* +intern_names(PyObject *tuple) { PyInterpreterState *interp = _PyInterpreterState_GET(); Py_ssize_t i; + PyObject *new_tuple = NULL; for (i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); if (v == NULL || !PyUnicode_CheckExact(v)) { PyErr_SetString(PyExc_SystemError, "non-string found in code slot"); - return -1; + return NULL; + } + if (PyUnicode_CHECK_INTERNED(v) && _Py_IsImmortal(v)) { + continue; + } + PyObject *str = Py_NewRef(v); + _PyUnicode_InternImmortal(interp, &str); + if (_set_newtuple_item(tuple, &new_tuple, i, str) < 0) { + return NULL; } - _PyUnicode_InternImmortal(interp, &_PyTuple_ITEMS(tuple)[i]); } - return 0; + if (new_tuple) { + return new_tuple; + } + return Py_NewRef(tuple); } /* Intern constants. In the default build, this interns selected string constants. In the free-threaded build, this also interns non-string - constants. */ -static int -intern_constants(PyObject *tuple, int *modified) + constants. + + Conceptually: return a new tuple. + (If no changes are needed, return a new ref to the argument.) + */ +static PyObject * +intern_constants(PyObject *tuple) { + PyObject *new_tuple = NULL; + PyInterpreterState *interp = _PyInterpreterState_GET(); for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); if (PyUnicode_CheckExact(v)) { - if (should_intern_string(v)) { - PyObject *w = v; - _PyUnicode_InternMortal(interp, &v); - if (w != v) { - PyTuple_SET_ITEM(tuple, i, v); - if (modified) { - *modified = 1; - } + if (!PyUnicode_CHECK_INTERNED(v) && should_intern_string(v)) { + PyObject *w = Py_NewRef(v); + _PyUnicode_InternMortal(interp, &w); + if (_set_newtuple_item(tuple, &new_tuple, i, w) < 0) { + goto error; } } } else if (PyTuple_CheckExact(v)) { - if (intern_constants(v, NULL) < 0) { - return -1; + if (_set_newtuple_item(tuple, &new_tuple, i, + intern_constants(v)) < 0) { + goto error; } } else if (PyFrozenSet_CheckExact(v)) { - PyObject *w = v; PyObject *tmp = PySequence_Tuple(v); if (tmp == NULL) { - return -1; + goto error; } - int tmp_modified = 0; - if (intern_constants(tmp, &tmp_modified) < 0) { + PyObject *interned = intern_constants(tmp); + if (!interned) { Py_DECREF(tmp); - return -1; + goto error; } - if (tmp_modified) { - v = PyFrozenSet_New(tmp); - if (v == NULL) { + if (interned != tmp) { + PyObject *new = PyFrozenSet_New(interned); + if (_set_newtuple_item(tuple, &new_tuple, i, new) < 0) { + Py_DECREF(interned); Py_DECREF(tmp); - return -1; - } - - PyTuple_SET_ITEM(tuple, i, v); - Py_DECREF(w); - if (modified) { - *modified = 1; + goto error; } } + Py_DECREF(interned); Py_DECREF(tmp); } #ifdef Py_GIL_DISABLED @@ -209,30 +272,27 @@ intern_constants(PyObject *tuple, int *modified) PySliceObject *slice = (PySliceObject *)v; PyObject *tmp = PyTuple_New(3); if (tmp == NULL) { - return -1; + goto error; } PyTuple_SET_ITEM(tmp, 0, Py_NewRef(slice->start)); PyTuple_SET_ITEM(tmp, 1, Py_NewRef(slice->stop)); PyTuple_SET_ITEM(tmp, 2, Py_NewRef(slice->step)); - int tmp_modified = 0; - if (intern_constants(tmp, &tmp_modified) < 0) { + PyObject *interned = intern_constants(tmp); + if (!interned) { Py_DECREF(tmp); - return -1; + goto error; } - if (tmp_modified) { - v = PySlice_New(PyTuple_GET_ITEM(tmp, 0), - PyTuple_GET_ITEM(tmp, 1), - PyTuple_GET_ITEM(tmp, 2)); - if (v == NULL) { + if (interned != tmp) { + PyObject *new = PySlice_New(PyTuple_GET_ITEM(interned, 0), + PyTuple_GET_ITEM(interned, 1), + PyTuple_GET_ITEM(interned, 2)); + if (_set_newtuple_item(tuple, &new_tuple, i, new) < 0) { + Py_DECREF(interned); Py_DECREF(tmp); - return -1; - } - PyTuple_SET_ITEM(tuple, i, v); - Py_DECREF(slice); - if (modified) { - *modified = 1; + goto error; } } + Py_DECREF(interned); Py_DECREF(tmp); } @@ -244,21 +304,20 @@ intern_constants(PyObject *tuple, int *modified) !PyUnicode_CheckExact(v) && _Py_atomic_load_int(&tstate->interp->gc.immortalize) >= 0) { - PyObject *interned = intern_one_constant(v); - if (interned == NULL) { - return -1; - } - else if (interned != v) { - PyTuple_SET_ITEM(tuple, i, interned); - Py_SETREF(v, interned); - if (modified) { - *modified = 1; - } + if (_set_newtuple_item(tuple, &new_tuple, i, + intern_one_constant(v)) < 0) { + goto error; } } #endif } - return 0; + if (new_tuple) { + return new_tuple; + } + return Py_NewRef(tuple); +error: + Py_XDECREF(new_tuple); + return NULL; } /* Return a shallow copy of a tuple that is @@ -450,7 +509,11 @@ _PyCode_Validate(struct _PyCodeConstructor *con) extern void _PyCode_Quicken(PyCodeObject *code); static void -init_code(PyCodeObject *co, struct _PyCodeConstructor *con) +init_code(PyCodeObject *co, + struct _PyCodeConstructor *con, + PyObject *names_interned, + PyObject *consts_interned, + PyObject *localsplusnames_interned) { int nlocalsplus = (int)PyTuple_GET_SIZE(con->localsplusnames); int nlocals, ncellvars, nfreevars; @@ -472,10 +535,11 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) co->co_firstlineno = con->firstlineno; co->co_linetable = Py_NewRef(con->linetable); - co->co_consts = Py_NewRef(con->consts); - co->co_names = Py_NewRef(con->names); - co->co_localsplusnames = Py_NewRef(con->localsplusnames); + co->co_consts = Py_NewRef(consts_interned); + co->co_names = Py_NewRef(names_interned); + + co->co_localsplusnames = Py_NewRef(localsplusnames_interned); co->co_localspluskinds = Py_NewRef(con->localspluskinds); co->co_argcount = con->argcount; @@ -613,45 +677,17 @@ remove_column_info(PyObject *locations) return res; } -static int -intern_code_constants(struct _PyCodeConstructor *con) -{ -#ifdef Py_GIL_DISABLED - PyInterpreterState *interp = _PyInterpreterState_GET(); - struct _py_code_state *state = &interp->code_state; - PyMutex_Lock(&state->mutex); -#endif - if (intern_strings(con->names) < 0) { - goto error; - } - if (intern_constants(con->consts, NULL) < 0) { - goto error; - } - if (intern_strings(con->localsplusnames) < 0) { - goto error; - } -#ifdef Py_GIL_DISABLED - PyMutex_Unlock(&state->mutex); -#endif - return 0; - -error: -#ifdef Py_GIL_DISABLED - PyMutex_Unlock(&state->mutex); -#endif - return -1; -} - /* The caller is responsible for ensuring that the given data is valid. */ PyCodeObject * _PyCode_New(struct _PyCodeConstructor *con) { - if (intern_code_constants(con) < 0) { - return NULL; - } - + PyCodeObject *result = NULL; PyObject *replacement_locations = NULL; + PyObject *names_interned = NULL; + PyObject *consts_interned = NULL; + PyObject *localsplusnames_interned = NULL; + // Compact the linetable if we are opted out of debug // ranges. if (!_Py_GetConfig()->code_debug_ranges) { @@ -662,6 +698,20 @@ _PyCode_New(struct _PyCodeConstructor *con) con->linetable = replacement_locations; } + // Intern constants; inern and immortalize names + names_interned = intern_names(con->names); + if (!names_interned) { + goto finally; + } + consts_interned = intern_constants(con->consts); + if (!consts_interned) { + goto finally; + } + localsplusnames_interned = intern_constants(con->localsplusnames); + if (!localsplusnames_interned) { + goto finally; + } + Py_ssize_t size = PyBytes_GET_SIZE(con->code) / sizeof(_Py_CODEUNIT); PyCodeObject *co; #ifdef Py_GIL_DISABLED @@ -669,18 +719,26 @@ _PyCode_New(struct _PyCodeConstructor *con) #else co = PyObject_NewVar(PyCodeObject, &PyCode_Type, size); #endif + if (co == NULL) { - Py_XDECREF(replacement_locations); PyErr_NoMemory(); - return NULL; + goto finally; } - init_code(co, con); + init_code(co, con, + names_interned, consts_interned, localsplusnames_interned); + #ifdef Py_GIL_DISABLED _PyObject_SetDeferredRefcount((PyObject *)co); _PyObject_GC_TRACK(co); #endif + + result = co; +finally: Py_XDECREF(replacement_locations); - return co; + Py_XDECREF(names_interned); + Py_XDECREF(consts_interned); + Py_XDECREF(localsplusnames_interned); + return result; } From ff7f4e6695153c81b9955761832244ed18229c43 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 23 Jul 2024 14:21:40 +0200 Subject: [PATCH 04/18] Intern strings in _PyCode_ConstantKey --- Objects/codeobject.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 8ba70ff36ab4de..08bacbcb31ba4a 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -2431,7 +2431,6 @@ _PyCode_ConstantKey(PyObject *op) /* Py_None and Py_Ellipsis are singletons. */ if (op == Py_None || op == Py_Ellipsis || PyLong_CheckExact(op) - || PyUnicode_CheckExact(op) /* code_richcompare() uses _PyCode_ConstantKey() internally */ || PyCode_Check(op)) { @@ -2439,6 +2438,11 @@ _PyCode_ConstantKey(PyObject *op) * type and from tuples. */ key = Py_NewRef(op); } + else if (PyUnicode_CheckExact(op)) { + key = Py_NewRef(op); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_InternMortal(interp, &key); + } else if (PyBool_Check(op) || PyBytes_CheckExact(op)) { /* Make booleans different from integers 0 and 1. * Avoid BytesWarning from comparing bytes with strings. */ From c2db0f0aa83bd3016381f335ce362106538ce070 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 23 Jul 2024 14:55:19 +0200 Subject: [PATCH 05/18] Merge validate_and_copy_tuple into intern_names --- Objects/codeobject.c | 87 ++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 52 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 08bacbcb31ba4a..12d067e0f118bd 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -184,6 +184,10 @@ _set_newtuple_item( } /* Intern, and immortalize, a tuple of strings. + + Convert string subclasses to exact strings and error out if + a non-string is found (that is, handle arbitrary user input). + Conceptually: return a new tuple. (If no changes are needed, return a new ref to the argument.) */ @@ -196,15 +200,33 @@ intern_names(PyObject *tuple) for (i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); - if (v == NULL || !PyUnicode_CheckExact(v)) { + if (v == NULL) { PyErr_SetString(PyExc_SystemError, - "non-string found in code slot"); - return NULL; + "NULL found in code name tuple"); + goto error; } - if (PyUnicode_CHECK_INTERNED(v) && _Py_IsImmortal(v)) { - continue; + PyObject *str; + if (PyUnicode_CheckExact(v)) { + if (PyUnicode_CHECK_INTERNED(v) && _Py_IsImmortal(v)) { + continue; + } + str = Py_NewRef(v); + } + else { + if (PyUnicode_Check(v)) { + str = _PyUnicode_Copy(v); + if (str == NULL) { + goto error; + } + } + else { + PyErr_Format( + PyExc_TypeError, + "name tuples must contain only strings, not '%T'", + v); + goto error; + } } - PyObject *str = Py_NewRef(v); _PyUnicode_InternImmortal(interp, &str); if (_set_newtuple_item(tuple, &new_tuple, i, str) < 0) { return NULL; @@ -214,6 +236,9 @@ intern_names(PyObject *tuple) return new_tuple; } return Py_NewRef(tuple); +error: + Py_XDECREF(new_tuple); + return NULL; } /* Intern constants. In the default build, this interns selected string @@ -320,48 +345,6 @@ intern_constants(PyObject *tuple) return NULL; } -/* Return a shallow copy of a tuple that is - guaranteed to contain exact strings, by converting string subclasses - to exact strings and complaining if a non-string is found. */ -static PyObject* -validate_and_copy_tuple(PyObject *tup) -{ - PyObject *newtuple; - PyObject *item; - Py_ssize_t i, len; - - len = PyTuple_GET_SIZE(tup); - newtuple = PyTuple_New(len); - if (newtuple == NULL) - return NULL; - - for (i = 0; i < len; i++) { - item = PyTuple_GET_ITEM(tup, i); - if (PyUnicode_CheckExact(item)) { - Py_INCREF(item); - } - else if (!PyUnicode_Check(item)) { - PyErr_Format( - PyExc_TypeError, - "name tuples must contain only " - "strings, not '%.500s'", - Py_TYPE(item)->tp_name); - Py_DECREF(newtuple); - return NULL; - } - else { - item = _PyUnicode_Copy(item); - if (item == NULL) { - Py_DECREF(newtuple); - return NULL; - } - } - PyTuple_SET_ITEM(newtuple, i, item); - } - - return newtuple; -} - static int init_co_cached(PyCodeObject *self) { if (self->_co_cached == NULL) { @@ -1815,20 +1798,20 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount, goto cleanup; } - ournames = validate_and_copy_tuple(names); + ournames = intern_names(names); if (ournames == NULL) goto cleanup; - ourvarnames = validate_and_copy_tuple(varnames); + ourvarnames = intern_names(varnames); if (ourvarnames == NULL) goto cleanup; if (freevars) - ourfreevars = validate_and_copy_tuple(freevars); + ourfreevars = intern_names(freevars); else ourfreevars = PyTuple_New(0); if (ourfreevars == NULL) goto cleanup; if (cellvars) - ourcellvars = validate_and_copy_tuple(cellvars); + ourcellvars = intern_names(cellvars); else ourcellvars = PyTuple_New(0); if (ourcellvars == NULL) From 259d2caab39c1c4e19c1cf1ea8263af298a8cccc Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 23 Jul 2024 14:56:29 +0200 Subject: [PATCH 06/18] Remove test that asserted a constant is *not* interned --- Lib/test/test_code.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index ba77e1c5341db8..964fb7cbe293a2 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -570,13 +570,6 @@ def f(a='str_value'): return a self.assertIsInterned(f()) - @cpython_only - @unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants") - def test_interned_string_with_null(self): - co = compile(r'res = "str\0value!"', '?', 'exec') - v = self.find_const(co.co_consts, 'str\0value!') - self.assertIsNotInterned(v) - @cpython_only @unittest.skipUnless(Py_GIL_DISABLED, "does not intern all constants") @skip_if_suppress_immortalization() From 891304ddab50e6b44509fc2a2bea1889cad96bae Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 23 Jul 2024 15:03:41 +0200 Subject: [PATCH 07/18] Fix ref handling in error case --- Objects/codeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 12d067e0f118bd..1bbce637dcd785 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -229,7 +229,7 @@ intern_names(PyObject *tuple) } _PyUnicode_InternImmortal(interp, &str); if (_set_newtuple_item(tuple, &new_tuple, i, str) < 0) { - return NULL; + goto error; } } if (new_tuple) { From 1e655b2dbed03989ea931807cf2960b13354abbd Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 23 Jul 2024 15:54:01 +0200 Subject: [PATCH 08/18] Skip interning in _Py_Mangle (_PyCode_ConstantKey is enough) --- .../2024-07-23-16-20-02.gh-issue-121954.ydF8sg.rst | 3 +++ Python/symtable.c | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-07-23-16-20-02.gh-issue-121954.ydF8sg.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-07-23-16-20-02.gh-issue-121954.ydF8sg.rst b/Misc/NEWS.d/next/Core and Builtins/2024-07-23-16-20-02.gh-issue-121954.ydF8sg.rst new file mode 100644 index 00000000000000..4a47916e4739f4 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-07-23-16-20-02.gh-issue-121954.ydF8sg.rst @@ -0,0 +1,3 @@ +Fixed issue where creating a :class:`types.CodeType ` object would +sometimes mutate argument tuples. This changes details of how code constants +and names are interned, immortalized and de-duplicated. diff --git a/Python/symtable.c b/Python/symtable.c index f00858b57a8581..e75606e01afda8 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -3040,7 +3040,5 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident) return NULL; } assert(_PyUnicode_CheckConsistency(result, 1)); - PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyUnicode_InternMortal(interp, &result); return result; } From a75078fc845313c331a278f5e72ba58971614db1 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 23 Jul 2024 16:27:14 +0200 Subject: [PATCH 09/18] Add blurb --- .../2024-07-23-16-20-02.gh-issue-121954.ydF8sg.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename Misc/NEWS.d/next/{Core and Builtins => Core_and_Builtins}/2024-07-23-16-20-02.gh-issue-121954.ydF8sg.rst (68%) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-07-23-16-20-02.gh-issue-121954.ydF8sg.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-07-23-16-20-02.gh-issue-121954.ydF8sg.rst similarity index 68% rename from Misc/NEWS.d/next/Core and Builtins/2024-07-23-16-20-02.gh-issue-121954.ydF8sg.rst rename to Misc/NEWS.d/next/Core_and_Builtins/2024-07-23-16-20-02.gh-issue-121954.ydF8sg.rst index 4a47916e4739f4..273281428361a0 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-07-23-16-20-02.gh-issue-121954.ydF8sg.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-07-23-16-20-02.gh-issue-121954.ydF8sg.rst @@ -1,3 +1,3 @@ -Fixed issue where creating a :class:`types.CodeType ` object would +Fixed issue where creating a :class:`code ` object would sometimes mutate argument tuples. This changes details of how code constants and names are interned, immortalized and de-duplicated. From 2557b5b3550b74e161b0a37eb543b78bcd6dbc46 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 24 Jul 2024 12:42:24 +0200 Subject: [PATCH 10/18] Immortalize localsplusnames --- Objects/codeobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 1bbce637dcd785..dbce6b10ee29a7 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -691,6 +691,7 @@ _PyCode_New(struct _PyCodeConstructor *con) goto finally; } localsplusnames_interned = intern_constants(con->localsplusnames); + localsplusnames_interned = intern_names(con->localsplusnames); if (!localsplusnames_interned) { goto finally; } From 9bca4c69d133ada11fa6c963fa3f5a8dc5d1e04e Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 24 Jul 2024 12:42:58 +0200 Subject: [PATCH 11/18] Free-threading: Re-add lock for intern_constants --- Objects/codeobject.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index dbce6b10ee29a7..91e18aa7ec328f 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -247,6 +247,8 @@ intern_names(PyObject *tuple) Conceptually: return a new tuple. (If no changes are needed, return a new ref to the argument.) + + In the free-threaded build this needs a lock. */ static PyObject * intern_constants(PyObject *tuple) @@ -686,11 +688,18 @@ _PyCode_New(struct _PyCodeConstructor *con) if (!names_interned) { goto finally; } +#ifdef Py_GIL_DISABLED + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct _py_code_state *state = &interp->code_state; + PyMutex_Lock(&state->mutex); +#endif consts_interned = intern_constants(con->consts); if (!consts_interned) { goto finally; } - localsplusnames_interned = intern_constants(con->localsplusnames); +#ifdef Py_GIL_DISABLED + PyMutex_Unlock(&state->mutex); +#endif localsplusnames_interned = intern_names(con->localsplusnames); if (!localsplusnames_interned) { goto finally; From 25dbda6f388037c833f99dfaa98c02b011000732 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Sat, 3 Aug 2024 15:35:24 +0200 Subject: [PATCH 12/18] Intern tuples in free-threaded build --- Objects/codeobject.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index fa46fb1b942537..86eaedc38e826e 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -339,6 +339,9 @@ intern_constants(PyObject *tuple) #endif } if (new_tuple) { +#ifdef Py_GIL_DISABLED + return intern_one_constant(new_tuple); +#endif return new_tuple; } return Py_NewRef(tuple); From b33c1eb3c687b450eb119776a7892a48d27e292a Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Sat, 3 Aug 2024 17:52:58 +0200 Subject: [PATCH 13/18] Intern the outer tuples like any other object --- Objects/codeobject.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 86eaedc38e826e..355c9996235e37 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -340,7 +340,12 @@ intern_constants(PyObject *tuple) } if (new_tuple) { #ifdef Py_GIL_DISABLED - return intern_one_constant(new_tuple); + PyThreadState *tstate = PyThreadState_GET(); + if (!_Py_IsImmortal(new_tuple) && + _Py_atomic_load_int(&tstate->interp->gc.immortalize) >= 0 + ) { + return intern_one_constant(new_tuple); + } #endif return new_tuple; } From cd6086f85fc0df9fadf8a0d99ac9a753280ef1f8 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Sat, 3 Aug 2024 18:01:03 +0200 Subject: [PATCH 14/18] Regenerate test_frozenmain.h --- Programs/test_frozenmain.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 22354c9bbf8a35..5dc99486ea43f5 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -12,26 +12,26 @@ unsigned char M_test_frozenmain[] = { 0,0,112,6,89,2,31,0,80,6,89,6,12,0,80,7, 89,5,89,6,2,0,0,0,12,0,47,4,49,1,0,0, 0,0,0,0,29,0,73,22,0,0,9,0,29,0,101,1, - 41,8,233,0,0,0,0,78,122,18,70,114,111,122,101,110, - 32,72,101,108,108,111,32,87,111,114,108,100,122,8,115,121, + 41,8,233,0,0,0,0,78,218,18,70,114,111,122,101,110, + 32,72,101,108,108,111,32,87,111,114,108,100,218,8,115,121, 115,46,97,114,103,118,218,6,99,111,110,102,105,103,41,5, 218,12,112,114,111,103,114,97,109,95,110,97,109,101,218,10, 101,120,101,99,117,116,97,98,108,101,218,15,117,115,101,95, 101,110,118,105,114,111,110,109,101,110,116,218,17,99,111,110, 102,105,103,117,114,101,95,99,95,115,116,100,105,111,218,14, - 98,117,102,102,101,114,101,100,95,115,116,100,105,111,122,7, - 99,111,110,102,105,103,32,122,2,58,32,41,7,218,3,115, + 98,117,102,102,101,114,101,100,95,115,116,100,105,111,218,7, + 99,111,110,102,105,103,32,218,2,58,32,41,7,218,3,115, 121,115,218,17,95,116,101,115,116,105,110,116,101,114,110,97, 108,99,97,112,105,218,5,112,114,105,110,116,218,4,97,114, 103,118,218,11,103,101,116,95,99,111,110,102,105,103,115,114, - 3,0,0,0,218,3,107,101,121,169,0,243,0,0,0,0, + 5,0,0,0,218,3,107,101,121,169,0,243,0,0,0,0, 218,18,116,101,115,116,95,102,114,111,122,101,110,109,97,105, - 110,46,112,121,218,8,60,109,111,100,117,108,101,62,114,18, + 110,46,112,121,218,8,60,109,111,100,117,108,101,62,114,22, 0,0,0,1,0,0,0,115,94,0,0,0,240,3,1,1, 1,243,8,0,1,11,219,0,24,225,0,5,208,6,26,212, 0,27,217,0,5,128,106,144,35,151,40,145,40,212,0,27, 216,9,26,215,9,38,210,9,38,211,9,40,168,24,209,9, 50,128,6,243,2,6,12,2,128,67,241,14,0,5,10,136, 71,144,67,144,53,152,2,152,54,160,35,153,59,152,45,208, - 10,40,214,4,41,242,15,6,12,2,114,16,0,0,0, + 10,40,214,4,41,242,15,6,12,2,114,20,0,0,0, }; From ecd7ae75a5f545033725eb118fa14361a31baa9c Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 12 Aug 2024 17:23:07 +0200 Subject: [PATCH 15/18] Adjust comment for _PyCode_Fini --- Python/pylifecycle.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 6b641c0775f533..71487f47ca2026 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1833,6 +1833,8 @@ finalize_interp_types(PyInterpreterState *interp) _PyTypes_Fini(interp); + // Call _PyCode_Fini after any code that uses tuples, + // since it sets contents of "interned" tuples to NULL. _PyCode_Fini(interp); // Call _PyUnicode_ClearInterned() before _PyDict_Fini() since it uses From aa1a5ca0dfa5c7f6ec393286e2f95c5213de2530 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 12 Aug 2024 17:56:51 +0200 Subject: [PATCH 16/18] Add tests for converting to strict unicode --- Lib/test/test_code.py | 47 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index 964fb7cbe293a2..14dd5fdea1da01 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -525,6 +525,53 @@ def test_code_equal_with_instrumentation(self): self.assertNotEqual(code1, code2) sys.settrace(None) + @cpython_only + def test_names_strict_string(self): + class StrSubclass(str): pass + + def f(): + return name1 + name2 + + code1 = f.__code__ + code2 = code1.replace(co_names=tuple( + StrSubclass(name) for name in code1.co_names + )) + self.assertEqual(code1.co_names, ('name1' ,'name2')) + self.assertEqual(code2.co_names, ('name1' ,'name2')) + for name in code2.co_names: + with self.subTest(name=name): + self.assertIs(type(name), str) + + @cpython_only + def test_varnames_strict_string(self): + class StrSubclass(str): pass + + def f(): + name1 = name2 = call() + return name1 + name2 + + code1 = f.__code__ + code2 = code1.replace(co_varnames=tuple( + StrSubclass(name) for name in code1.co_varnames + )) + self.assertEqual(code1.co_varnames, ('name1' ,'name2')) + self.assertEqual(code2.co_varnames, ('name1' ,'name2')) + for name in code2.co_varnames: + with self.subTest(name=name): + self.assertIs(type(name), str) + + @cpython_only + def test_names_nonstring(self): + def f(): + var1, var2 = name1, name2 + return var1 + var2 + + code = f.__code__ + + with self.assertRaises(TypeError): + code.replace(co_names=(1, 2)) + with self.assertRaises(TypeError): + code.replace(co_varnames=(1, 2)) def isinterned(s): return s is sys.intern(('_' + s + '_')[1:-1]) From e705379ae5413a46c74c305a28807173b6188a0e Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 12 Aug 2024 18:01:57 +0200 Subject: [PATCH 17/18] Unlock mutex in error path --- Objects/codeobject.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 355c9996235e37..e97bc6f34c0be2 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -702,12 +702,12 @@ _PyCode_New(struct _PyCodeConstructor *con) PyMutex_Lock(&state->mutex); #endif consts_interned = intern_constants(con->consts); - if (!consts_interned) { - goto finally; - } #ifdef Py_GIL_DISABLED PyMutex_Unlock(&state->mutex); #endif + if (!consts_interned) { + goto finally; + } localsplusnames_interned = intern_names(con->localsplusnames); if (!localsplusnames_interned) { goto finally; From ed6115fad5f5cf9149ade8edd86d67862492949b Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 12 Aug 2024 18:05:00 +0200 Subject: [PATCH 18/18] Add braces to if bodies --- Objects/codeobject.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index e97bc6f34c0be2..d1b66181b0b103 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1817,23 +1817,31 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount, } ournames = intern_names(names); - if (ournames == NULL) + if (ournames == NULL) { goto cleanup; + } ourvarnames = intern_names(varnames); - if (ourvarnames == NULL) + if (ourvarnames == NULL) { goto cleanup; - if (freevars) + } + if (freevars) { ourfreevars = intern_names(freevars); - else + } + else { ourfreevars = PyTuple_New(0); - if (ourfreevars == NULL) + } + if (ourfreevars == NULL) { goto cleanup; - if (cellvars) + } + if (cellvars) { ourcellvars = intern_names(cellvars); - else + } + else { ourcellvars = PyTuple_New(0); - if (ourcellvars == NULL) + } + if (ourcellvars == NULL) { goto cleanup; + } co = (PyObject *)PyCode_NewWithPosOnlyArgs(argcount, posonlyargcount, kwonlyargcount,