From 01c332251186c0f2abb4a136010078b9e4626845 Mon Sep 17 00:00:00 2001
From: neonene <53406459+neonene@users.noreply.github.com>
Date: Sat, 17 Aug 2024 20:32:25 +0900
Subject: [PATCH 1/4] add Py_ALWAYS_INLINE
---
Objects/typeobject.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 0d7009ac57bd5f..ea5f7f8abed4f6 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -5176,7 +5176,7 @@ PyType_GetModuleState(PyTypeObject *type)
/* Get the module of the first superclass where the module has the
* given PyModuleDef.
*/
-static inline PyObject *
+static inline Py_ALWAYS_INLINE PyObject *
get_module_by_def(PyTypeObject *type, PyModuleDef *def)
{
assert(PyType_Check(type));
From f740a5d2526de59fadf8aa4d0bc0089f7008ceeb Mon Sep 17 00:00:00 2001
From: neonene <53406459+neonene@users.noreply.github.com>
Date: Tue, 20 Aug 2024 19:18:54 +0900
Subject: [PATCH 2/4] benchmark setup: /Ob3, __declspec(noinline)
---
Objects/typeobject.c | 51 +++++++++++++++++++++++++++++++++++++++++
PCbuild/pyproject.props | 3 +++
2 files changed, 54 insertions(+)
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index ea5f7f8abed4f6..69e5aeebd074ea 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -5226,6 +5226,57 @@ get_module_by_def(PyTypeObject *type, PyModuleDef *def)
return res;
}
+// copied from the above
+Py_NO_INLINE static PyObject *
+get_module_by_def_NoInline(PyTypeObject *type, PyModuleDef *def)
+{
+ assert(PyType_Check(type));
+
+ if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
+ // type_ready_mro() ensures that no heap type is
+ // contained in a static type MRO.
+ return NULL;
+ }
+ else {
+ PyHeapTypeObject *ht = (PyHeapTypeObject*)type;
+ PyObject *module = ht->ht_module;
+ if (module && _PyModule_GetDef(module) == def) {
+ return module;
+ }
+ }
+
+ PyObject *res = NULL;
+ BEGIN_TYPE_LOCK();
+
+ PyObject *mro = lookup_tp_mro(type);
+ // The type must be ready
+ assert(mro != NULL);
+ assert(PyTuple_Check(mro));
+ // mro_invoke() ensures that the type MRO cannot be empty.
+ assert(PyTuple_GET_SIZE(mro) >= 1);
+ // Also, the first item in the MRO is the type itself, which
+ // we already checked above. We skip it in the loop.
+ assert(PyTuple_GET_ITEM(mro, 0) == (PyObject *)type);
+
+ Py_ssize_t n = PyTuple_GET_SIZE(mro);
+ for (Py_ssize_t i = 1; i < n; i++) {
+ PyObject *super = PyTuple_GET_ITEM(mro, i);
+ if(!_PyType_HasFeature((PyTypeObject *)super, Py_TPFLAGS_HEAPTYPE)) {
+ // Static types in the MRO need to be skipped
+ continue;
+ }
+
+ PyHeapTypeObject *ht = (PyHeapTypeObject*)super;
+ PyObject *module = ht->ht_module;
+ if (module && _PyModule_GetDef(module) == def) {
+ res = module;
+ break;
+ }
+ }
+ END_TYPE_LOCK();
+ return res;
+}
+
PyObject *
PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def)
{
diff --git a/PCbuild/pyproject.props b/PCbuild/pyproject.props
index 9c85e5efa4af4a..a17a6e5b3fae1a 100644
--- a/PCbuild/pyproject.props
+++ b/PCbuild/pyproject.props
@@ -73,6 +73,9 @@
-d2ssa-patterns-all- %(AdditionalOptions)
/sourceDependencies "$(IntDir.Trim(`\`))" %(AdditionalOptions)
+
+ /Ob3 %(AdditionalOptions)
+
OnlyExplicitInline
Disabled
From 6e62c38ce334b1a04778097ff232bad08ae4a5e9 Mon Sep 17 00:00:00 2001
From: neonene <53406459+neonene@users.noreply.github.com>
Date: Tue, 20 Aug 2024 19:20:56 +0900
Subject: [PATCH 3/4] unsafe experiment: do not respect TLS access
---
Include/internal/pycore_pystate.h | 2 +-
Include/internal/pycore_runtime.h | 1 +
Python/pystate.c | 18 +++++++++++++-----
3 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h
index fade55945b7dbf..ef9d6b28f2d4c6 100644
--- a/Include/internal/pycore_pystate.h
+++ b/Include/internal/pycore_pystate.h
@@ -135,7 +135,7 @@ static inline PyThreadState*
_PyThreadState_GET(void)
{
#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
- return _Py_tss_tstate;
+ return (PyThreadState*)_Py_atomic_load_ptr_relaxed(&_PyRuntime.tstate_current);
#else
return _PyThreadState_GetCurrent();
#endif
diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h
index d4291b87261ae0..d183251a39fbe7 100644
--- a/Include/internal/pycore_runtime.h
+++ b/Include/internal/pycore_runtime.h
@@ -285,6 +285,7 @@ typedef struct pyruntimestate {
struct _pythread_runtime_state threads;
struct _signals_runtime_state signals;
+ PyThreadState *tstate_current;
/* Used for the thread state bound to the current thread. */
Py_tss_t autoTSSkey;
diff --git a/Python/pystate.c b/Python/pystate.c
index 4d7bec65ff5c49..7b63a6b5277259 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -79,12 +79,19 @@ current_fast_get(void)
#endif
}
+static inline PyThreadState *
+current_fast_get2(void)
+{
+ return (PyThreadState*)_Py_atomic_load_ptr_relaxed(&_PyRuntime.tstate_current);
+}
+
static inline void
current_fast_set(_PyRuntimeState *Py_UNUSED(runtime), PyThreadState *tstate)
{
assert(tstate != NULL);
#ifdef HAVE_THREAD_LOCAL
_Py_tss_tstate = tstate;
+ _Py_atomic_store_ptr_relaxed(&_PyRuntime.tstate_current, tstate);
#else
// XXX Fall back to the PyThread_tss_*() API.
# error "no supported thread-local variable storage classifier"
@@ -92,10 +99,11 @@ current_fast_set(_PyRuntimeState *Py_UNUSED(runtime), PyThreadState *tstate)
}
static inline void
-current_fast_clear(_PyRuntimeState *Py_UNUSED(runtime))
+current_fast_clear(_PyRuntimeState *runtime)
{
#ifdef HAVE_THREAD_LOCAL
_Py_tss_tstate = NULL;
+ _Py_atomic_store_ptr_relaxed(&runtime->tstate_current, NULL);
#else
// XXX Fall back to the PyThread_tss_*() API.
# error "no supported thread-local variable storage classifier"
@@ -110,7 +118,7 @@ current_fast_clear(_PyRuntimeState *Py_UNUSED(runtime))
PyThreadState *
_PyThreadState_GetCurrent(void)
{
- return current_fast_get();
+ return current_fast_get2();
}
@@ -1331,7 +1339,7 @@ _PyInterpreterState_RequireIDRef(PyInterpreterState *interp, int required)
PyInterpreterState*
PyInterpreterState_Get(void)
{
- PyThreadState *tstate = current_fast_get();
+ PyThreadState *tstate = current_fast_get2();
_Py_EnsureTstateNotNULL(tstate);
PyInterpreterState *interp = tstate->interp;
if (interp == NULL) {
@@ -2412,14 +2420,14 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
PyThreadState *
PyThreadState_GetUnchecked(void)
{
- return current_fast_get();
+ return current_fast_get2();
}
PyThreadState *
PyThreadState_Get(void)
{
- PyThreadState *tstate = current_fast_get();
+ PyThreadState *tstate = current_fast_get2();
_Py_EnsureTstateNotNULL(tstate);
return tstate;
}
From 89754d7ff47259fc886c101f27033fd234dd3202 Mon Sep 17 00:00:00 2001
From: neonene <53406459+neonene@users.noreply.github.com>
Date: Tue, 20 Aug 2024 19:26:28 +0900
Subject: [PATCH 4/4] revert benchmark stuff
---
Include/internal/pycore_pystate.h | 2 +-
Include/internal/pycore_runtime.h | 1 -
Objects/typeobject.c | 51 -------------------------------
PCbuild/pyproject.props | 3 --
Python/pystate.c | 18 +++--------
5 files changed, 6 insertions(+), 69 deletions(-)
diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h
index ef9d6b28f2d4c6..fade55945b7dbf 100644
--- a/Include/internal/pycore_pystate.h
+++ b/Include/internal/pycore_pystate.h
@@ -135,7 +135,7 @@ static inline PyThreadState*
_PyThreadState_GET(void)
{
#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
- return (PyThreadState*)_Py_atomic_load_ptr_relaxed(&_PyRuntime.tstate_current);
+ return _Py_tss_tstate;
#else
return _PyThreadState_GetCurrent();
#endif
diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h
index d183251a39fbe7..d4291b87261ae0 100644
--- a/Include/internal/pycore_runtime.h
+++ b/Include/internal/pycore_runtime.h
@@ -285,7 +285,6 @@ typedef struct pyruntimestate {
struct _pythread_runtime_state threads;
struct _signals_runtime_state signals;
- PyThreadState *tstate_current;
/* Used for the thread state bound to the current thread. */
Py_tss_t autoTSSkey;
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 69e5aeebd074ea..ea5f7f8abed4f6 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -5226,57 +5226,6 @@ get_module_by_def(PyTypeObject *type, PyModuleDef *def)
return res;
}
-// copied from the above
-Py_NO_INLINE static PyObject *
-get_module_by_def_NoInline(PyTypeObject *type, PyModuleDef *def)
-{
- assert(PyType_Check(type));
-
- if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
- // type_ready_mro() ensures that no heap type is
- // contained in a static type MRO.
- return NULL;
- }
- else {
- PyHeapTypeObject *ht = (PyHeapTypeObject*)type;
- PyObject *module = ht->ht_module;
- if (module && _PyModule_GetDef(module) == def) {
- return module;
- }
- }
-
- PyObject *res = NULL;
- BEGIN_TYPE_LOCK();
-
- PyObject *mro = lookup_tp_mro(type);
- // The type must be ready
- assert(mro != NULL);
- assert(PyTuple_Check(mro));
- // mro_invoke() ensures that the type MRO cannot be empty.
- assert(PyTuple_GET_SIZE(mro) >= 1);
- // Also, the first item in the MRO is the type itself, which
- // we already checked above. We skip it in the loop.
- assert(PyTuple_GET_ITEM(mro, 0) == (PyObject *)type);
-
- Py_ssize_t n = PyTuple_GET_SIZE(mro);
- for (Py_ssize_t i = 1; i < n; i++) {
- PyObject *super = PyTuple_GET_ITEM(mro, i);
- if(!_PyType_HasFeature((PyTypeObject *)super, Py_TPFLAGS_HEAPTYPE)) {
- // Static types in the MRO need to be skipped
- continue;
- }
-
- PyHeapTypeObject *ht = (PyHeapTypeObject*)super;
- PyObject *module = ht->ht_module;
- if (module && _PyModule_GetDef(module) == def) {
- res = module;
- break;
- }
- }
- END_TYPE_LOCK();
- return res;
-}
-
PyObject *
PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def)
{
diff --git a/PCbuild/pyproject.props b/PCbuild/pyproject.props
index a17a6e5b3fae1a..9c85e5efa4af4a 100644
--- a/PCbuild/pyproject.props
+++ b/PCbuild/pyproject.props
@@ -73,9 +73,6 @@
-d2ssa-patterns-all- %(AdditionalOptions)
/sourceDependencies "$(IntDir.Trim(`\`))" %(AdditionalOptions)
-
- /Ob3 %(AdditionalOptions)
-
OnlyExplicitInline
Disabled
diff --git a/Python/pystate.c b/Python/pystate.c
index 7b63a6b5277259..4d7bec65ff5c49 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -79,19 +79,12 @@ current_fast_get(void)
#endif
}
-static inline PyThreadState *
-current_fast_get2(void)
-{
- return (PyThreadState*)_Py_atomic_load_ptr_relaxed(&_PyRuntime.tstate_current);
-}
-
static inline void
current_fast_set(_PyRuntimeState *Py_UNUSED(runtime), PyThreadState *tstate)
{
assert(tstate != NULL);
#ifdef HAVE_THREAD_LOCAL
_Py_tss_tstate = tstate;
- _Py_atomic_store_ptr_relaxed(&_PyRuntime.tstate_current, tstate);
#else
// XXX Fall back to the PyThread_tss_*() API.
# error "no supported thread-local variable storage classifier"
@@ -99,11 +92,10 @@ current_fast_set(_PyRuntimeState *Py_UNUSED(runtime), PyThreadState *tstate)
}
static inline void
-current_fast_clear(_PyRuntimeState *runtime)
+current_fast_clear(_PyRuntimeState *Py_UNUSED(runtime))
{
#ifdef HAVE_THREAD_LOCAL
_Py_tss_tstate = NULL;
- _Py_atomic_store_ptr_relaxed(&runtime->tstate_current, NULL);
#else
// XXX Fall back to the PyThread_tss_*() API.
# error "no supported thread-local variable storage classifier"
@@ -118,7 +110,7 @@ current_fast_clear(_PyRuntimeState *runtime)
PyThreadState *
_PyThreadState_GetCurrent(void)
{
- return current_fast_get2();
+ return current_fast_get();
}
@@ -1339,7 +1331,7 @@ _PyInterpreterState_RequireIDRef(PyInterpreterState *interp, int required)
PyInterpreterState*
PyInterpreterState_Get(void)
{
- PyThreadState *tstate = current_fast_get2();
+ PyThreadState *tstate = current_fast_get();
_Py_EnsureTstateNotNULL(tstate);
PyInterpreterState *interp = tstate->interp;
if (interp == NULL) {
@@ -2420,14 +2412,14 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
PyThreadState *
PyThreadState_GetUnchecked(void)
{
- return current_fast_get2();
+ return current_fast_get();
}
PyThreadState *
PyThreadState_Get(void)
{
- PyThreadState *tstate = current_fast_get2();
+ PyThreadState *tstate = current_fast_get();
_Py_EnsureTstateNotNULL(tstate);
return tstate;
}