From 01c332251186c0f2abb4a136010078b9e4626845 Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Sat, 17 Aug 2024 20:32:25 +0900 Subject: [PATCH 1/4] add Py_ALWAYS_INLINE --- Objects/typeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 0d7009ac57bd5f..ea5f7f8abed4f6 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5176,7 +5176,7 @@ PyType_GetModuleState(PyTypeObject *type) /* Get the module of the first superclass where the module has the * given PyModuleDef. */ -static inline PyObject * +static inline Py_ALWAYS_INLINE PyObject * get_module_by_def(PyTypeObject *type, PyModuleDef *def) { assert(PyType_Check(type)); From f740a5d2526de59fadf8aa4d0bc0089f7008ceeb Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Tue, 20 Aug 2024 19:18:54 +0900 Subject: [PATCH 2/4] benchmark setup: /Ob3, __declspec(noinline) --- Objects/typeobject.c | 51 +++++++++++++++++++++++++++++++++++++++++ PCbuild/pyproject.props | 3 +++ 2 files changed, 54 insertions(+) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index ea5f7f8abed4f6..69e5aeebd074ea 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5226,6 +5226,57 @@ get_module_by_def(PyTypeObject *type, PyModuleDef *def) return res; } +// copied from the above +Py_NO_INLINE static PyObject * +get_module_by_def_NoInline(PyTypeObject *type, PyModuleDef *def) +{ + assert(PyType_Check(type)); + + if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { + // type_ready_mro() ensures that no heap type is + // contained in a static type MRO. + return NULL; + } + else { + PyHeapTypeObject *ht = (PyHeapTypeObject*)type; + PyObject *module = ht->ht_module; + if (module && _PyModule_GetDef(module) == def) { + return module; + } + } + + PyObject *res = NULL; + BEGIN_TYPE_LOCK(); + + PyObject *mro = lookup_tp_mro(type); + // The type must be ready + assert(mro != NULL); + assert(PyTuple_Check(mro)); + // mro_invoke() ensures that the type MRO cannot be empty. + assert(PyTuple_GET_SIZE(mro) >= 1); + // Also, the first item in the MRO is the type itself, which + // we already checked above. We skip it in the loop. + assert(PyTuple_GET_ITEM(mro, 0) == (PyObject *)type); + + Py_ssize_t n = PyTuple_GET_SIZE(mro); + for (Py_ssize_t i = 1; i < n; i++) { + PyObject *super = PyTuple_GET_ITEM(mro, i); + if(!_PyType_HasFeature((PyTypeObject *)super, Py_TPFLAGS_HEAPTYPE)) { + // Static types in the MRO need to be skipped + continue; + } + + PyHeapTypeObject *ht = (PyHeapTypeObject*)super; + PyObject *module = ht->ht_module; + if (module && _PyModule_GetDef(module) == def) { + res = module; + break; + } + } + END_TYPE_LOCK(); + return res; +} + PyObject * PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) { diff --git a/PCbuild/pyproject.props b/PCbuild/pyproject.props index 9c85e5efa4af4a..a17a6e5b3fae1a 100644 --- a/PCbuild/pyproject.props +++ b/PCbuild/pyproject.props @@ -73,6 +73,9 @@ -d2ssa-patterns-all- %(AdditionalOptions) /sourceDependencies "$(IntDir.Trim(`\`))" %(AdditionalOptions) + + /Ob3 %(AdditionalOptions) + OnlyExplicitInline Disabled From 6e62c38ce334b1a04778097ff232bad08ae4a5e9 Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Tue, 20 Aug 2024 19:20:56 +0900 Subject: [PATCH 3/4] unsafe experiment: do not respect TLS access --- Include/internal/pycore_pystate.h | 2 +- Include/internal/pycore_runtime.h | 1 + Python/pystate.c | 18 +++++++++++++----- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index fade55945b7dbf..ef9d6b28f2d4c6 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -135,7 +135,7 @@ static inline PyThreadState* _PyThreadState_GET(void) { #if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE) - return _Py_tss_tstate; + return (PyThreadState*)_Py_atomic_load_ptr_relaxed(&_PyRuntime.tstate_current); #else return _PyThreadState_GetCurrent(); #endif diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index d4291b87261ae0..d183251a39fbe7 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -285,6 +285,7 @@ typedef struct pyruntimestate { struct _pythread_runtime_state threads; struct _signals_runtime_state signals; + PyThreadState *tstate_current; /* Used for the thread state bound to the current thread. */ Py_tss_t autoTSSkey; diff --git a/Python/pystate.c b/Python/pystate.c index 4d7bec65ff5c49..7b63a6b5277259 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -79,12 +79,19 @@ current_fast_get(void) #endif } +static inline PyThreadState * +current_fast_get2(void) +{ + return (PyThreadState*)_Py_atomic_load_ptr_relaxed(&_PyRuntime.tstate_current); +} + static inline void current_fast_set(_PyRuntimeState *Py_UNUSED(runtime), PyThreadState *tstate) { assert(tstate != NULL); #ifdef HAVE_THREAD_LOCAL _Py_tss_tstate = tstate; + _Py_atomic_store_ptr_relaxed(&_PyRuntime.tstate_current, tstate); #else // XXX Fall back to the PyThread_tss_*() API. # error "no supported thread-local variable storage classifier" @@ -92,10 +99,11 @@ current_fast_set(_PyRuntimeState *Py_UNUSED(runtime), PyThreadState *tstate) } static inline void -current_fast_clear(_PyRuntimeState *Py_UNUSED(runtime)) +current_fast_clear(_PyRuntimeState *runtime) { #ifdef HAVE_THREAD_LOCAL _Py_tss_tstate = NULL; + _Py_atomic_store_ptr_relaxed(&runtime->tstate_current, NULL); #else // XXX Fall back to the PyThread_tss_*() API. # error "no supported thread-local variable storage classifier" @@ -110,7 +118,7 @@ current_fast_clear(_PyRuntimeState *Py_UNUSED(runtime)) PyThreadState * _PyThreadState_GetCurrent(void) { - return current_fast_get(); + return current_fast_get2(); } @@ -1331,7 +1339,7 @@ _PyInterpreterState_RequireIDRef(PyInterpreterState *interp, int required) PyInterpreterState* PyInterpreterState_Get(void) { - PyThreadState *tstate = current_fast_get(); + PyThreadState *tstate = current_fast_get2(); _Py_EnsureTstateNotNULL(tstate); PyInterpreterState *interp = tstate->interp; if (interp == NULL) { @@ -2412,14 +2420,14 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc) PyThreadState * PyThreadState_GetUnchecked(void) { - return current_fast_get(); + return current_fast_get2(); } PyThreadState * PyThreadState_Get(void) { - PyThreadState *tstate = current_fast_get(); + PyThreadState *tstate = current_fast_get2(); _Py_EnsureTstateNotNULL(tstate); return tstate; } From 89754d7ff47259fc886c101f27033fd234dd3202 Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Tue, 20 Aug 2024 19:26:28 +0900 Subject: [PATCH 4/4] revert benchmark stuff --- Include/internal/pycore_pystate.h | 2 +- Include/internal/pycore_runtime.h | 1 - Objects/typeobject.c | 51 ------------------------------- PCbuild/pyproject.props | 3 -- Python/pystate.c | 18 +++-------- 5 files changed, 6 insertions(+), 69 deletions(-) diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index ef9d6b28f2d4c6..fade55945b7dbf 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -135,7 +135,7 @@ static inline PyThreadState* _PyThreadState_GET(void) { #if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE) - return (PyThreadState*)_Py_atomic_load_ptr_relaxed(&_PyRuntime.tstate_current); + return _Py_tss_tstate; #else return _PyThreadState_GetCurrent(); #endif diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index d183251a39fbe7..d4291b87261ae0 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -285,7 +285,6 @@ typedef struct pyruntimestate { struct _pythread_runtime_state threads; struct _signals_runtime_state signals; - PyThreadState *tstate_current; /* Used for the thread state bound to the current thread. */ Py_tss_t autoTSSkey; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 69e5aeebd074ea..ea5f7f8abed4f6 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5226,57 +5226,6 @@ get_module_by_def(PyTypeObject *type, PyModuleDef *def) return res; } -// copied from the above -Py_NO_INLINE static PyObject * -get_module_by_def_NoInline(PyTypeObject *type, PyModuleDef *def) -{ - assert(PyType_Check(type)); - - if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { - // type_ready_mro() ensures that no heap type is - // contained in a static type MRO. - return NULL; - } - else { - PyHeapTypeObject *ht = (PyHeapTypeObject*)type; - PyObject *module = ht->ht_module; - if (module && _PyModule_GetDef(module) == def) { - return module; - } - } - - PyObject *res = NULL; - BEGIN_TYPE_LOCK(); - - PyObject *mro = lookup_tp_mro(type); - // The type must be ready - assert(mro != NULL); - assert(PyTuple_Check(mro)); - // mro_invoke() ensures that the type MRO cannot be empty. - assert(PyTuple_GET_SIZE(mro) >= 1); - // Also, the first item in the MRO is the type itself, which - // we already checked above. We skip it in the loop. - assert(PyTuple_GET_ITEM(mro, 0) == (PyObject *)type); - - Py_ssize_t n = PyTuple_GET_SIZE(mro); - for (Py_ssize_t i = 1; i < n; i++) { - PyObject *super = PyTuple_GET_ITEM(mro, i); - if(!_PyType_HasFeature((PyTypeObject *)super, Py_TPFLAGS_HEAPTYPE)) { - // Static types in the MRO need to be skipped - continue; - } - - PyHeapTypeObject *ht = (PyHeapTypeObject*)super; - PyObject *module = ht->ht_module; - if (module && _PyModule_GetDef(module) == def) { - res = module; - break; - } - } - END_TYPE_LOCK(); - return res; -} - PyObject * PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) { diff --git a/PCbuild/pyproject.props b/PCbuild/pyproject.props index a17a6e5b3fae1a..9c85e5efa4af4a 100644 --- a/PCbuild/pyproject.props +++ b/PCbuild/pyproject.props @@ -73,9 +73,6 @@ -d2ssa-patterns-all- %(AdditionalOptions) /sourceDependencies "$(IntDir.Trim(`\`))" %(AdditionalOptions) - - /Ob3 %(AdditionalOptions) - OnlyExplicitInline Disabled diff --git a/Python/pystate.c b/Python/pystate.c index 7b63a6b5277259..4d7bec65ff5c49 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -79,19 +79,12 @@ current_fast_get(void) #endif } -static inline PyThreadState * -current_fast_get2(void) -{ - return (PyThreadState*)_Py_atomic_load_ptr_relaxed(&_PyRuntime.tstate_current); -} - static inline void current_fast_set(_PyRuntimeState *Py_UNUSED(runtime), PyThreadState *tstate) { assert(tstate != NULL); #ifdef HAVE_THREAD_LOCAL _Py_tss_tstate = tstate; - _Py_atomic_store_ptr_relaxed(&_PyRuntime.tstate_current, tstate); #else // XXX Fall back to the PyThread_tss_*() API. # error "no supported thread-local variable storage classifier" @@ -99,11 +92,10 @@ current_fast_set(_PyRuntimeState *Py_UNUSED(runtime), PyThreadState *tstate) } static inline void -current_fast_clear(_PyRuntimeState *runtime) +current_fast_clear(_PyRuntimeState *Py_UNUSED(runtime)) { #ifdef HAVE_THREAD_LOCAL _Py_tss_tstate = NULL; - _Py_atomic_store_ptr_relaxed(&runtime->tstate_current, NULL); #else // XXX Fall back to the PyThread_tss_*() API. # error "no supported thread-local variable storage classifier" @@ -118,7 +110,7 @@ current_fast_clear(_PyRuntimeState *runtime) PyThreadState * _PyThreadState_GetCurrent(void) { - return current_fast_get2(); + return current_fast_get(); } @@ -1339,7 +1331,7 @@ _PyInterpreterState_RequireIDRef(PyInterpreterState *interp, int required) PyInterpreterState* PyInterpreterState_Get(void) { - PyThreadState *tstate = current_fast_get2(); + PyThreadState *tstate = current_fast_get(); _Py_EnsureTstateNotNULL(tstate); PyInterpreterState *interp = tstate->interp; if (interp == NULL) { @@ -2420,14 +2412,14 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc) PyThreadState * PyThreadState_GetUnchecked(void) { - return current_fast_get2(); + return current_fast_get(); } PyThreadState * PyThreadState_Get(void) { - PyThreadState *tstate = current_fast_get2(); + PyThreadState *tstate = current_fast_get(); _Py_EnsureTstateNotNULL(tstate); return tstate; }