From 415902a053241e1b61d813a3fc0602c092a4a74b Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Thu, 18 Aug 2022 15:39:31 +0000 Subject: [PATCH 1/3] move interned dict under runtime state --- Include/internal/pycore_global_objects.h | 9 +++++++++ Objects/unicodeobject.c | 23 +++++++++-------------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index 98673d4efcedcc..64965178f62a21 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -45,6 +45,15 @@ struct _Py_global_objects { _PyGC_Head_UNUSED _tuple_empty_gc_not_used; PyTupleObject tuple_empty; } singletons; + /* This dictionary holds all interned unicode strings. Note that references + to strings in this dictionary are *not* counted in the string's ob_refcnt. + When the interned string reaches a refcnt of 0 the string deallocation + function will delete the reference from this dictionary. + + Another way to look at this is that to say that the actual reference + count of a string is: s->ob_refcnt + (s->state ? 2 : 0) + */ + PyObject *interned; }; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b1d14a32f70f3e..4b8eba4e9dcedb 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -69,6 +69,8 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. in _PyUnicode_ClearInterned(). */ /* #define INTERNED_STATS 1 */ +#define INTERNED_DICT() \ + _PyRuntime.global_objects.interned /*[clinic input] class str "PyObject *" "&PyUnicode_Type" @@ -191,16 +193,6 @@ extern "C" { # define OVERALLOCATE_FACTOR 4 #endif -/* This dictionary holds all interned unicode strings. Note that references - to strings in this dictionary are *not* counted in the string's ob_refcnt. - When the interned string reaches a refcnt of 0 the string deallocation - function will delete the reference from this dictionary. - - Another way to look at this is that to say that the actual reference - count of a string is: s->ob_refcnt + (s->state ? 2 : 0) -*/ -static PyObject *interned = NULL; - /* Forward declaration */ static inline int _PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch); @@ -1523,7 +1515,7 @@ unicode_dealloc(PyObject *unicode) _Py_FatalRefcountError("deallocating an Unicode singleton"); } #endif - + PyObject *interned = INTERNED_DICT(); if (PyUnicode_CHECK_INTERNED(unicode)) { /* Revive the dead object temporarily. PyDict_DelItem() removes two references (key and value) which were ignored by @@ -14657,12 +14649,14 @@ PyUnicode_InternInPlace(PyObject **p) return; } + PyObject *interned = INTERNED_DICT(); if (interned == NULL) { interned = PyDict_New(); if (interned == NULL) { PyErr_Clear(); /* Don't leave an exception */ return; } + INTERNED_DICT() = interned; } PyObject *t = PyDict_SetDefault(interned, s, s); @@ -14713,6 +14707,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) return; } + PyObject *interned = INTERNED_DICT(); if (interned == NULL) { return; } @@ -14748,7 +14743,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) #endif PyDict_Clear(interned); - Py_CLEAR(interned); + Py_CLEAR(INTERNED_DICT()); } @@ -15155,7 +15150,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void) static inline int unicode_is_finalizing(void) { - return (interned == NULL); + return (INTERNED_DICT() == NULL); } #endif @@ -15197,7 +15192,7 @@ _PyUnicode_Fini(PyInterpreterState *interp) if (_Py_IsMainInterpreter(interp)) { // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() - assert(interned == NULL); + assert(INTERNED_DICT() == NULL); // bpo-47182: force a unicodedata CAPI capsule re-import on // subsequent initialization of main interpreter. ucnhash_capi = NULL; From 53ac9e9b8fe31afffea2e5c27d0727de46021f2b Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Sat, 20 Aug 2022 17:18:52 +0000 Subject: [PATCH 2/3] use static inline functions --- Include/internal/pycore_global_objects.h | 9 +----- Objects/unicodeobject.c | 35 +++++++++++++++++------- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index 64965178f62a21..82e89db7b1b750 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -45,14 +45,7 @@ struct _Py_global_objects { _PyGC_Head_UNUSED _tuple_empty_gc_not_used; PyTupleObject tuple_empty; } singletons; - /* This dictionary holds all interned unicode strings. Note that references - to strings in this dictionary are *not* counted in the string's ob_refcnt. - When the interned string reaches a refcnt of 0 the string deallocation - function will delete the reference from this dictionary. - - Another way to look at this is that to say that the actual reference - count of a string is: s->ob_refcnt + (s->state ? 2 : 0) - */ + PyObject *interned; }; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4b8eba4e9dcedb..9661fd7ce6f9f7 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -69,9 +69,6 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. in _PyUnicode_ClearInterned(). */ /* #define INTERNED_STATS 1 */ -#define INTERNED_DICT() \ - _PyRuntime.global_objects.interned - /*[clinic input] class str "PyObject *" "&PyUnicode_Type" [clinic start generated code]*/ @@ -227,6 +224,23 @@ static inline PyObject* unicode_new_empty(void) return empty; } +/* This dictionary holds all interned unicode strings. Note that references + to strings in this dictionary are *not* counted in the string's ob_refcnt. + When the interned string reaches a refcnt of 0 the string deallocation + function will delete the reference from this dictionary. + Another way to look at this is that to say that the actual reference + count of a string is: s->ob_refcnt + (s->state ? 2 : 0) +*/ +static inline PyObject *get_interned_dict(void) +{ + return _PyRuntime.global_objects.interned; +} + +static inline void set_interned_dict(PyObject *dict) +{ + _PyRuntime.global_objects.interned = dict; +} + #define _Py_RETURN_UNICODE_EMPTY() \ do { \ return unicode_new_empty(); \ @@ -1515,7 +1529,7 @@ unicode_dealloc(PyObject *unicode) _Py_FatalRefcountError("deallocating an Unicode singleton"); } #endif - PyObject *interned = INTERNED_DICT(); + PyObject *interned = get_interned_dict(); if (PyUnicode_CHECK_INTERNED(unicode)) { /* Revive the dead object temporarily. PyDict_DelItem() removes two references (key and value) which were ignored by @@ -14649,14 +14663,14 @@ PyUnicode_InternInPlace(PyObject **p) return; } - PyObject *interned = INTERNED_DICT(); + PyObject *interned = get_interned_dict(); if (interned == NULL) { interned = PyDict_New(); if (interned == NULL) { PyErr_Clear(); /* Don't leave an exception */ return; } - INTERNED_DICT() = interned; + set_interned_dict(interned); } PyObject *t = PyDict_SetDefault(interned, s, s); @@ -14707,7 +14721,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) return; } - PyObject *interned = INTERNED_DICT(); + PyObject *interned = get_interned_dict(); if (interned == NULL) { return; } @@ -14743,7 +14757,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) #endif PyDict_Clear(interned); - Py_CLEAR(INTERNED_DICT()); + Py_DECREF(interned); + set_interned_dict(NULL); } @@ -15150,7 +15165,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void) static inline int unicode_is_finalizing(void) { - return (INTERNED_DICT() == NULL); + return (get_interned_dict() == NULL); } #endif @@ -15192,7 +15207,7 @@ _PyUnicode_Fini(PyInterpreterState *interp) if (_Py_IsMainInterpreter(interp)) { // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() - assert(INTERNED_DICT() == NULL); + assert(get_interned_dict() == NULL); // bpo-47182: force a unicodedata CAPI capsule re-import on // subsequent initialization of main interpreter. ucnhash_capi = NULL; From c6758d8d6123f247877580c18f87f249bec9aa1b Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Sat, 20 Aug 2022 17:20:31 +0000 Subject: [PATCH 3/3] whitespace --- Objects/unicodeobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9661fd7ce6f9f7..13f2c5b49bd73a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -69,6 +69,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. in _PyUnicode_ClearInterned(). */ /* #define INTERNED_STATS 1 */ + /*[clinic input] class str "PyObject *" "&PyUnicode_Type" [clinic start generated code]*/