python · ericsnowcurrently · Dec 16, 2021 · Dec 16, 2021 · Dec 17, 2021 · Dec 17, 2021
@@ -45,6 +45,8 @@ extern "C" {
     _PyRuntime.global_objects.NAME
 #define _Py_SINGLETON(NAME) \
     _Py_GLOBAL_OBJECT(singletons.NAME)
+#define _Py_CACHED_OBJECT(NAME) \
+    _Py_GLOBAL_OBJECT(cached.NAME)
 
 struct _Py_global_objects {
     struct {
@@ -54,7 +56,29 @@ struct _Py_global_objects {
          * -_PY_NSMALLNEGINTS (inclusive) to _PY_NSMALLPOSINTS (exclusive).
          */
         PyLongObject small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS];
+
+        /* Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId() */
+        struct _Py_unicode_ids {
+            PyThread_type_lock lock;
+            // next_index value must be preserved when Py_Initialize()/Py_Finalize()
+            // is called multiple times: see _PyUnicode_FromId() implementation.
+            Py_ssize_t next_index;
+
+            Py_ssize_t size;
+            PyObject **array;
+        } unicode_ids;
     } singletons;
+    struct {
+        /* This dictionary holds all interned unicode strings.  Note that references
+           to strings in this dictionary are *not* counted in the string's ob_refcnt.
+           When the interned string reaches a refcnt of 0 the string deallocation
+           function will delete the reference from this dictionary.
+
+           Another way to look at this is that to say that the actual reference
+           count of a string is:  s->ob_refcnt + (s->state ? 2 : 0)
+        */
+        PyObject *unicode_interned;
+    } cached;
 };
 
 #define _Py_global_objects_INIT { \

@@ -116,8 +116,6 @@ typedef struct pyruntimestate {
     void *open_code_userdata;
     _Py_AuditHookEntry *audit_hook_head;
 
-    struct _Py_unicode_runtime_ids unicode_ids;
-
     struct _Py_global_objects global_objects;
     // If anything gets added after global_objects then
     // _PyRuntimeState_reset() needs to get updated to clear it.

diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h
@@ -19,13 +19,6 @@ extern void _PyUnicode_Fini(PyInterpreterState *);
 
 /* other API */
 
-struct _Py_unicode_runtime_ids {
-    PyThread_type_lock lock;
-    // next_index value must be preserved when Py_Initialize()/Py_Finalize()
-    // is called multiple times: see _PyUnicode_FromId() implementation.
-    Py_ssize_t next_index;
-};
-
 /* fs_codec.encoding is initialized to NULL.
    Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */
 struct _Py_unicode_fs_codec {
@@ -35,31 +28,13 @@ struct _Py_unicode_fs_codec {
     _Py_error_handler error_handler;
 };
 
-struct _Py_unicode_ids {
-    Py_ssize_t size;
-    PyObject **array;
-};
-
 struct _Py_unicode_state {
     // The empty Unicode object is a singleton to improve performance.
     PyObject *empty_string;
     /* Single character Unicode strings in the Latin-1 range are being
        shared as well. */
     PyObject *latin1[256];
     struct _Py_unicode_fs_codec fs_codec;
-
-    /* This dictionary holds all interned unicode strings.  Note that references
-       to strings in this dictionary are *not* counted in the string's ob_refcnt.
-       When the interned string reaches a refcnt of 0 the string deallocation
-       function will delete the reference from this dictionary.
-
-       Another way to look at this is that to say that the actual reference
-       count of a string is:  s->ob_refcnt + (s->state ? 2 : 0)
-    */
-    PyObject *interned;
-
-    // Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
-    struct _Py_unicode_ids ids;
 };
 
 extern void _PyUnicode_ClearInterned(PyInterpreterState *);

diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-12-16-17-26-17.bpo-46006.vAP3Et.rst b/Misc/NEWS.d/next/Core and Builtins/2021-12-16-17-26-17.bpo-46006.vAP3Et.rst
@@ -0,0 +1,3 @@
+Move the interned strings and Py_IDENTIFIER strings back to the
+process-global runtime state instead of the per-interpreter state (at least
+for now).
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
@@ -233,6 +233,9 @@ static int unicode_is_singleton(PyObject *unicode);
 #endif
 
 
+#define IDENTIFIERS _Py_SINGLETON(unicode_ids)
+#define INTERNED _Py_CACHED_OBJECT(unicode_interned)
+
 static struct _Py_unicode_state*
 get_unicode_state(void)
 {
@@ -1950,15 +1953,14 @@ unicode_dealloc(PyObject *unicode)
 
     case SSTATE_INTERNED_MORTAL:
     {
-        struct _Py_unicode_state *state = get_unicode_state();
         /* Revive the dead object temporarily. PyDict_DelItem() removes two
            references (key and value) which were ignored by
            PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
            to prevent calling unicode_dealloc() again. Adjust refcnt after
            PyDict_DelItem(). */
         assert(Py_REFCNT(unicode) == 0);
         Py_SET_REFCNT(unicode, 3);
-        if (PyDict_DelItem(state->interned, unicode) != 0) {
+        if (PyDict_DelItem(INTERNED, unicode) != 0) {
             _PyErr_WriteUnraisableMsg("deletion of interned string failed",
                                       NULL);
         }
@@ -2331,30 +2333,25 @@ PyUnicode_FromString(const char *u)
 PyObject *
 _PyUnicode_FromId(_Py_Identifier *id)
 {
-    PyInterpreterState *interp = _PyInterpreterState_GET();
-    struct _Py_unicode_ids *ids = &interp->unicode.ids;
-
     Py_ssize_t index = _Py_atomic_size_get(&id->index);
     if (index < 0) {
-        struct _Py_unicode_runtime_ids *rt_ids = &interp->runtime->unicode_ids;
-
-        PyThread_acquire_lock(rt_ids->lock, WAIT_LOCK);
+        PyThread_acquire_lock(IDENTIFIERS.lock, WAIT_LOCK);
         // Check again to detect concurrent access. Another thread can have
         // initialized the index while this thread waited for the lock.
         index = _Py_atomic_size_get(&id->index);
         if (index < 0) {
-            assert(rt_ids->next_index < PY_SSIZE_T_MAX);
-            index = rt_ids->next_index;
-            rt_ids->next_index++;
+            assert(IDENTIFIERS.next_index < PY_SSIZE_T_MAX);
+            index = IDENTIFIERS.next_index;
+            IDENTIFIERS.next_index++;
             _Py_atomic_size_set(&id->index, index);
         }
-        PyThread_release_lock(rt_ids->lock);
+        PyThread_release_lock(IDENTIFIERS.lock);
     }
     assert(index >= 0);
 
     PyObject *obj;
-    if (index < ids->size) {
-        obj = ids->array[index];
+    if (index < IDENTIFIERS.size) {
+        obj = IDENTIFIERS.array[index];
         if (obj) {
             // Return a borrowed reference
             return obj;
@@ -2368,38 +2365,40 @@ _PyUnicode_FromId(_Py_Identifier *id)
     }
     PyUnicode_InternInPlace(&obj);
 
-    if (index >= ids->size) {
+    if (index >= IDENTIFIERS.size) {
         // Overallocate to reduce the number of realloc
         Py_ssize_t new_size = Py_MAX(index * 2, 16);
-        Py_ssize_t item_size = sizeof(ids->array[0]);
-        PyObject **new_array = PyMem_Realloc(ids->array, new_size * item_size);
+        Py_ssize_t item_size = sizeof(IDENTIFIERS.array[0]);
+        PyObject **new_array = PyMem_Realloc(IDENTIFIERS.array, new_size * item_size);
         if (new_array == NULL) {
             PyErr_NoMemory();
             return NULL;
         }
-        memset(&new_array[ids->size], 0, (new_size - ids->size) * item_size);
-        ids->array = new_array;
-        ids->size = new_size;
+        memset(&new_array[IDENTIFIERS.size], 0, (new_size - IDENTIFIERS.size) * item_size);
+        IDENTIFIERS.array = new_array;
+        IDENTIFIERS.size = new_size;
     }
 
     // The array stores a strong reference
-    ids->array[index] = obj;
+    IDENTIFIERS.array[index] = obj;
 
     // Return a borrowed reference
     return obj;
 }
 
 
 static void
-unicode_clear_identifiers(struct _Py_unicode_state *state)
+unicode_clear_identifiers(PyInterpreterState *interp)
 {
-    struct _Py_unicode_ids *ids = &state->ids;
-    for (Py_ssize_t i=0; i < ids->size; i++) {
-        Py_XDECREF(ids->array[i]);
+    if (!_Py_IsMainInterpreter(interp)) {
+        return;
+    }
+    for (Py_ssize_t i=0; i < IDENTIFIERS.size; i++) {
+        Py_XDECREF(IDENTIFIERS.array[i]);
     }
-    ids->size = 0;
-    PyMem_Free(ids->array);
-    ids->array = NULL;
+    IDENTIFIERS.size = 0;
+    PyMem_Free(IDENTIFIERS.array);
+    IDENTIFIERS.array = NULL;
     // Don't reset _PyRuntime next_index: _Py_Identifier.id remains valid
     // after Py_Finalize().
 }
@@ -15596,16 +15595,15 @@ PyUnicode_InternInPlace(PyObject **p)
         return;
     }
 
-    struct _Py_unicode_state *state = get_unicode_state();
-    if (state->interned == NULL) {
-        state->interned = PyDict_New();
-        if (state->interned == NULL) {
+    if (INTERNED == NULL) {
+        INTERNED = PyDict_New();
+        if (INTERNED == NULL) {
             PyErr_Clear(); /* Don't leave an exception */
             return;
         }
     }
 
-    PyObject *t = PyDict_SetDefault(state->interned, s, s);
+    PyObject *t = PyDict_SetDefault(INTERNED, s, s);
     if (t == NULL) {
         PyErr_Clear();
         return;
@@ -15658,25 +15656,27 @@ PyUnicode_InternFromString(const char *cp)
 void
 _PyUnicode_ClearInterned(PyInterpreterState *interp)
 {
-    struct _Py_unicode_state *state = &interp->unicode;
-    if (state->interned == NULL) {
+    if (!_Py_IsMainInterpreter(interp)) {
         return;
     }
-    assert(PyDict_CheckExact(state->interned));
+    if (INTERNED == NULL) {
+        return;
+    }
+    assert(PyDict_CheckExact(INTERNED));
 
     /* Interned unicode strings are not forcibly deallocated; rather, we give
        them their stolen references back, and then clear and DECREF the
        interned dict. */
 
 #ifdef INTERNED_STATS
     fprintf(stderr, "releasing %zd interned strings\n",
-            PyDict_GET_SIZE(state->interned));
+            PyDict_GET_SIZE(INTERNED));
 
     Py_ssize_t immortal_size = 0, mortal_size = 0;
 #endif
     Py_ssize_t pos = 0;
     PyObject *s, *ignored_value;
-    while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {
+    while (PyDict_Next(INTERNED, &pos, &s, &ignored_value)) {
         assert(PyUnicode_IS_READY(s));
 
         switch (PyUnicode_CHECK_INTERNED(s)) {
@@ -15707,8 +15707,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
             mortal_size, immortal_size);
 #endif
 
-    PyDict_Clear(state->interned);
-    Py_CLEAR(state->interned);
+    PyDict_Clear(INTERNED);
+    Py_CLEAR(INTERNED);
 }
 
 
@@ -16079,8 +16079,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
 static inline int
 unicode_is_finalizing(void)
 {
-    struct _Py_unicode_state *state = get_unicode_state();
-    return (state->interned == NULL);
+    return (INTERNED == NULL);
 }
 #endif
 
@@ -16091,11 +16090,11 @@ _PyUnicode_Fini(PyInterpreterState *interp)
     struct _Py_unicode_state *state = &interp->unicode;
 
     // _PyUnicode_ClearInterned() must be called before
-    assert(state->interned == NULL);
+    assert(INTERNED == NULL || !_Py_IsMainInterpreter(interp));
 
     _PyUnicode_FiniEncodings(&state->fs_codec);
 
-    unicode_clear_identifiers(state);
+    unicode_clear_identifiers(interp);
 
     for (Py_ssize_t i = 0; i < 256; i++) {
         Py_CLEAR(state->latin1[i]);

@@ -120,8 +120,9 @@ init_runtime(_PyRuntimeState *runtime,
     // Set it to the ID of the main thread of the main interpreter.
     runtime->main_thread = PyThread_get_thread_ident();
 
-    runtime->unicode_ids.next_index = unicode_next_index;
-    runtime->unicode_ids.lock = unicode_ids_mutex;
+    struct _Py_unicode_ids *ids = &runtime->global_objects.singletons.unicode_ids;
+    ids->next_index = unicode_next_index;
+    ids->lock = unicode_ids_mutex;
 
     runtime->_initialized = 1;
 }
@@ -137,7 +138,8 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime)
     _Py_AuditHookEntry *audit_hook_head = runtime->audit_hook_head;
     // bpo-42882: Preserve next_index value if Py_Initialize()/Py_Finalize()
     // is called multiple times.
-    Py_ssize_t unicode_next_index = runtime->unicode_ids.next_index;
+    struct _Py_unicode_ids *ids = &runtime->global_objects.singletons.unicode_ids;
+    Py_ssize_t unicode_next_index = ids->next_index;
 
     PyThread_type_lock lock1, lock2, lock3;
     if (alloc_for_runtime(&lock1, &lock2, &lock3) != 0) {
@@ -164,7 +166,8 @@ _PyRuntimeState_Fini(_PyRuntimeState *runtime)
 
     FREE_LOCK(runtime->interpreters.mutex);
     FREE_LOCK(runtime->xidregistry.mutex);
-    FREE_LOCK(runtime->unicode_ids.lock);
+    struct _Py_unicode_ids *ids = &runtime->global_objects.singletons.unicode_ids;
+    FREE_LOCK(ids->lock);
 
 #undef FREE_LOCK
     PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
@@ -186,7 +189,8 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime)
 
     int reinit_interp = _PyThread_at_fork_reinit(&runtime->interpreters.mutex);
     int reinit_xidregistry = _PyThread_at_fork_reinit(&runtime->xidregistry.mutex);
-    int reinit_unicode_ids = _PyThread_at_fork_reinit(&runtime->unicode_ids.lock);
+    struct _Py_unicode_ids *ids = &runtime->global_objects.singletons.unicode_ids;
+    int reinit_unicode_ids = _PyThread_at_fork_reinit(&ids->lock);
 
     PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);