-
-
Notifications
You must be signed in to change notification settings - Fork 32.1k
bpo-46006: Move the interned strings and identifiers to _PyRuntimeState. #30131
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e79afd9
7c6c441
58d9c0b
01f19dc
b60bd33
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,6 +45,8 @@ extern "C" { | |
_PyRuntime.global_objects.NAME | ||
#define _Py_SINGLETON(NAME) \ | ||
_Py_GLOBAL_OBJECT(singletons.NAME) | ||
#define _Py_CACHED_OBJECT(NAME) \ | ||
_Py_GLOBAL_OBJECT(cached.NAME) | ||
|
||
struct _Py_global_objects { | ||
struct { | ||
|
@@ -54,7 +56,29 @@ struct _Py_global_objects { | |
* -_PY_NSMALLNEGINTS (inclusive) to _PY_NSMALLPOSINTS (exclusive). | ||
*/ | ||
PyLongObject small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS]; | ||
|
||
/* Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId() */ | ||
struct _Py_unicode_ids { | ||
PyThread_type_lock lock; | ||
// next_index value must be preserved when Py_Initialize()/Py_Finalize() | ||
// is called multiple times: see _PyUnicode_FromId() implementation. | ||
Py_ssize_t next_index; | ||
|
||
Py_ssize_t size; | ||
PyObject **array; | ||
} unicode_ids; | ||
} singletons; | ||
struct { | ||
/* This dictionary holds all interned unicode strings. Note that references | ||
to strings in this dictionary are *not* counted in the string's ob_refcnt. | ||
When the interned string reaches a refcnt of 0 the string deallocation | ||
function will delete the reference from this dictionary. | ||
|
||
Another way to look at this is that to say that the actual reference | ||
count of a string is: s->ob_refcnt + (s->state ? 2 : 0) | ||
*/ | ||
PyObject *unicode_interned; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you use "string" or "str" rather than "unicode". Python 2 is history 🙂 |
||
} cached; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please drop this struct. |
||
}; | ||
|
||
#define _Py_global_objects_INIT { \ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Move the interned strings and Py_IDENTIFIER strings back to the | ||
process-global runtime state instead of the per-interpreter state (at least | ||
for now). |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -233,6 +233,9 @@ static int unicode_is_singleton(PyObject *unicode); | |
#endif | ||
|
||
|
||
#define IDENTIFIERS _Py_SINGLETON(unicode_ids) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you drop the |
||
#define INTERNED _Py_CACHED_OBJECT(unicode_interned) | ||
|
||
static struct _Py_unicode_state* | ||
get_unicode_state(void) | ||
{ | ||
|
@@ -1950,15 +1953,14 @@ unicode_dealloc(PyObject *unicode) | |
|
||
case SSTATE_INTERNED_MORTAL: | ||
{ | ||
struct _Py_unicode_state *state = get_unicode_state(); | ||
/* Revive the dead object temporarily. PyDict_DelItem() removes two | ||
references (key and value) which were ignored by | ||
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2 | ||
to prevent calling unicode_dealloc() again. Adjust refcnt after | ||
PyDict_DelItem(). */ | ||
assert(Py_REFCNT(unicode) == 0); | ||
Py_SET_REFCNT(unicode, 3); | ||
if (PyDict_DelItem(state->interned, unicode) != 0) { | ||
if (PyDict_DelItem(INTERNED, unicode) != 0) { | ||
_PyErr_WriteUnraisableMsg("deletion of interned string failed", | ||
NULL); | ||
} | ||
|
@@ -2331,30 +2333,25 @@ PyUnicode_FromString(const char *u) | |
PyObject * | ||
_PyUnicode_FromId(_Py_Identifier *id) | ||
{ | ||
PyInterpreterState *interp = _PyInterpreterState_GET(); | ||
struct _Py_unicode_ids *ids = &interp->unicode.ids; | ||
|
||
Py_ssize_t index = _Py_atomic_size_get(&id->index); | ||
if (index < 0) { | ||
struct _Py_unicode_runtime_ids *rt_ids = &interp->runtime->unicode_ids; | ||
|
||
PyThread_acquire_lock(rt_ids->lock, WAIT_LOCK); | ||
PyThread_acquire_lock(IDENTIFIERS.lock, WAIT_LOCK); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Drop this, and assert that the GIL is held? |
||
// Check again to detect concurrent access. Another thread can have | ||
// initialized the index while this thread waited for the lock. | ||
index = _Py_atomic_size_get(&id->index); | ||
if (index < 0) { | ||
assert(rt_ids->next_index < PY_SSIZE_T_MAX); | ||
index = rt_ids->next_index; | ||
rt_ids->next_index++; | ||
assert(IDENTIFIERS.next_index < PY_SSIZE_T_MAX); | ||
index = IDENTIFIERS.next_index; | ||
IDENTIFIERS.next_index++; | ||
_Py_atomic_size_set(&id->index, index); | ||
} | ||
PyThread_release_lock(rt_ids->lock); | ||
PyThread_release_lock(IDENTIFIERS.lock); | ||
} | ||
assert(index >= 0); | ||
|
||
PyObject *obj; | ||
if (index < ids->size) { | ||
obj = ids->array[index]; | ||
if (index < IDENTIFIERS.size) { | ||
obj = IDENTIFIERS.array[index]; | ||
if (obj) { | ||
// Return a borrowed reference | ||
return obj; | ||
|
@@ -2368,38 +2365,40 @@ _PyUnicode_FromId(_Py_Identifier *id) | |
} | ||
PyUnicode_InternInPlace(&obj); | ||
|
||
if (index >= ids->size) { | ||
if (index >= IDENTIFIERS.size) { | ||
// Overallocate to reduce the number of realloc | ||
Py_ssize_t new_size = Py_MAX(index * 2, 16); | ||
Py_ssize_t item_size = sizeof(ids->array[0]); | ||
PyObject **new_array = PyMem_Realloc(ids->array, new_size * item_size); | ||
Py_ssize_t item_size = sizeof(IDENTIFIERS.array[0]); | ||
PyObject **new_array = PyMem_Realloc(IDENTIFIERS.array, new_size * item_size); | ||
if (new_array == NULL) { | ||
PyErr_NoMemory(); | ||
return NULL; | ||
} | ||
memset(&new_array[ids->size], 0, (new_size - ids->size) * item_size); | ||
ids->array = new_array; | ||
ids->size = new_size; | ||
memset(&new_array[IDENTIFIERS.size], 0, (new_size - IDENTIFIERS.size) * item_size); | ||
IDENTIFIERS.array = new_array; | ||
IDENTIFIERS.size = new_size; | ||
} | ||
|
||
// The array stores a strong reference | ||
ids->array[index] = obj; | ||
IDENTIFIERS.array[index] = obj; | ||
|
||
// Return a borrowed reference | ||
return obj; | ||
} | ||
|
||
|
||
static void | ||
unicode_clear_identifiers(struct _Py_unicode_state *state) | ||
unicode_clear_identifiers(PyInterpreterState *interp) | ||
{ | ||
ericsnowcurrently marked this conversation as resolved.
Show resolved
Hide resolved
|
||
struct _Py_unicode_ids *ids = &state->ids; | ||
for (Py_ssize_t i=0; i < ids->size; i++) { | ||
Py_XDECREF(ids->array[i]); | ||
if (!_Py_IsMainInterpreter(interp)) { | ||
return; | ||
} | ||
for (Py_ssize_t i=0; i < IDENTIFIERS.size; i++) { | ||
Py_XDECREF(IDENTIFIERS.array[i]); | ||
} | ||
ids->size = 0; | ||
PyMem_Free(ids->array); | ||
ids->array = NULL; | ||
IDENTIFIERS.size = 0; | ||
PyMem_Free(IDENTIFIERS.array); | ||
IDENTIFIERS.array = NULL; | ||
// Don't reset _PyRuntime next_index: _Py_Identifier.id remains valid | ||
// after Py_Finalize(). | ||
} | ||
|
@@ -15596,16 +15595,15 @@ PyUnicode_InternInPlace(PyObject **p) | |
return; | ||
} | ||
|
||
struct _Py_unicode_state *state = get_unicode_state(); | ||
if (state->interned == NULL) { | ||
state->interned = PyDict_New(); | ||
if (state->interned == NULL) { | ||
if (INTERNED == NULL) { | ||
INTERNED = PyDict_New(); | ||
if (INTERNED == NULL) { | ||
PyErr_Clear(); /* Don't leave an exception */ | ||
return; | ||
} | ||
} | ||
|
||
PyObject *t = PyDict_SetDefault(state->interned, s, s); | ||
PyObject *t = PyDict_SetDefault(INTERNED, s, s); | ||
if (t == NULL) { | ||
PyErr_Clear(); | ||
return; | ||
|
@@ -15658,25 +15656,27 @@ PyUnicode_InternFromString(const char *cp) | |
void | ||
_PyUnicode_ClearInterned(PyInterpreterState *interp) | ||
{ | ||
ericsnowcurrently marked this conversation as resolved.
Show resolved
Hide resolved
|
||
struct _Py_unicode_state *state = &interp->unicode; | ||
if (state->interned == NULL) { | ||
if (!_Py_IsMainInterpreter(interp)) { | ||
return; | ||
} | ||
assert(PyDict_CheckExact(state->interned)); | ||
if (INTERNED == NULL) { | ||
return; | ||
} | ||
assert(PyDict_CheckExact(INTERNED)); | ||
|
||
/* Interned unicode strings are not forcibly deallocated; rather, we give | ||
them their stolen references back, and then clear and DECREF the | ||
interned dict. */ | ||
|
||
#ifdef INTERNED_STATS | ||
fprintf(stderr, "releasing %zd interned strings\n", | ||
PyDict_GET_SIZE(state->interned)); | ||
PyDict_GET_SIZE(INTERNED)); | ||
|
||
Py_ssize_t immortal_size = 0, mortal_size = 0; | ||
#endif | ||
Py_ssize_t pos = 0; | ||
PyObject *s, *ignored_value; | ||
while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) { | ||
while (PyDict_Next(INTERNED, &pos, &s, &ignored_value)) { | ||
assert(PyUnicode_IS_READY(s)); | ||
|
||
switch (PyUnicode_CHECK_INTERNED(s)) { | ||
|
@@ -15707,8 +15707,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) | |
mortal_size, immortal_size); | ||
#endif | ||
|
||
PyDict_Clear(state->interned); | ||
Py_CLEAR(state->interned); | ||
PyDict_Clear(INTERNED); | ||
Py_CLEAR(INTERNED); | ||
} | ||
|
||
|
||
|
@@ -16079,8 +16079,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void) | |
static inline int | ||
unicode_is_finalizing(void) | ||
{ | ||
struct _Py_unicode_state *state = get_unicode_state(); | ||
return (state->interned == NULL); | ||
return (INTERNED == NULL); | ||
} | ||
#endif | ||
|
||
|
@@ -16091,11 +16090,11 @@ _PyUnicode_Fini(PyInterpreterState *interp) | |
struct _Py_unicode_state *state = &interp->unicode; | ||
|
||
// _PyUnicode_ClearInterned() must be called before | ||
assert(state->interned == NULL); | ||
assert(INTERNED == NULL || !_Py_IsMainInterpreter(interp)); | ||
|
||
_PyUnicode_FiniEncodings(&state->fs_codec); | ||
|
||
unicode_clear_identifiers(state); | ||
unicode_clear_identifiers(interp); | ||
|
||
for (Py_ssize_t i = 0; i < 256; i++) { | ||
Py_CLEAR(state->latin1[i]); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't this lock unnecessary? The GIL is held whenever an identifier is used, isn't it?