Skip to content

bpo-36616: optimize handling of thread state in function call code #12839

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 109 additions & 94 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,10 @@ extern int _PyObject_GetMethod(PyObject *, PyObject *, PyObject **);
typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *);

/* Forward declarations */
Py_LOCAL_INLINE(PyObject *) call_function(PyObject ***, Py_ssize_t,
PyObject *);
static PyObject * do_call_core(PyObject *, PyObject *, PyObject *);
static PyObject * profile_call(PyThreadState *, PyObject *,
PyObject *, PyObject *);
Py_LOCAL_INLINE(PyObject *) call_function(PyThreadState *,
PyObject ***, Py_ssize_t, PyObject *);

#ifdef LLTRACE
static int lltrace;
Expand Down Expand Up @@ -3241,9 +3242,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)

case TARGET(CALL_METHOD): {
/* Designed to work in tamdem with LOAD_METHOD. */
PyObject **sp, *res, *meth;

sp = stack_pointer;
PyObject *res, *meth;

meth = PEEK(oparg + 2);
if (meth == NULL) {
Expand All @@ -3261,8 +3260,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
`callable` will be POPed by call_function.
NULL will will be POPed manually later.
*/
res = call_function(&sp, oparg, NULL);
stack_pointer = sp;
res = call_function(tstate, &stack_pointer, oparg, NULL);
(void)POP(); /* POP the NULL. */
}
else {
Expand All @@ -3278,8 +3276,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
We'll be passing `oparg + 1` to call_function, to
make it accept the `self` as a first argument.
*/
res = call_function(&sp, oparg + 1, NULL);
stack_pointer = sp;
res = call_function(tstate, &stack_pointer, oparg + 1, NULL);
}

PUSH(res);
Expand All @@ -3290,10 +3287,8 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)

case TARGET(CALL_FUNCTION): {
PREDICTED(CALL_FUNCTION);
PyObject **sp, *res;
sp = stack_pointer;
res = call_function(&sp, oparg, NULL);
stack_pointer = sp;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These stack pointer assignments look a bit like someone left them for documentation purposes, trying to make it clear what is allowed/supposed to happen with the pointer that is passed down.
I''m not saying that it's wrong to remove these assignments, just that Chesterton's Fence might apply.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those strange redirections were added because somebody wanted to use

register PyObject **stack_pointer;

and it's illegal to take the address of a register variable. But the register keyword is obsolete, compilers are now much more clever than they were before. Also, call_function() should be inlined and then the pointer isn't even a real pointer.

PyObject *res;
res = call_function(tstate, &stack_pointer, oparg, NULL);
PUSH(res);
if (res == NULL) {
goto error;
Expand All @@ -3302,13 +3297,11 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}

case TARGET(CALL_FUNCTION_KW): {
PyObject **sp, *res, *names;
PyObject *res, *names;

names = POP();
assert(PyTuple_CheckExact(names) && PyTuple_GET_SIZE(names) <= oparg);
sp = stack_pointer;
res = call_function(&sp, oparg, names);
stack_pointer = sp;
res = call_function(tstate, &stack_pointer, oparg, names);
PUSH(res);
Py_DECREF(names);

Expand Down Expand Up @@ -3351,7 +3344,18 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
assert(PyTuple_CheckExact(callargs));

result = do_call_core(func, callargs, kwargs);
if (tstate->use_tracing) {
result = profile_call(tstate, func, callargs, kwargs);
}
else if (PyCFunction_Check(func)) {
result = _PyCFunction_FastCallDict(func,
_PyTuple_ITEMS(callargs),
PyTuple_GET_SIZE(callargs),
kwargs);
}
else {
result = PyObject_Call(func, callargs, kwargs);
}
Py_DECREF(func);
Py_DECREF(callargs);
Py_XDECREF(kwargs);
Expand Down Expand Up @@ -4624,7 +4628,7 @@ PyEval_GetFuncDesc(PyObject *func)
}

#define C_TRACE(x, call) \
if (tstate->use_tracing && tstate->c_profilefunc) { \
if (tstate->c_profilefunc) { \
if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \
tstate, tstate->frame, \
PyTrace_C_CALL, func)) { \
Expand Down Expand Up @@ -4652,55 +4656,103 @@ if (tstate->use_tracing && tstate->c_profilefunc) { \
} \
} else { \
x = call; \
}


/* Call function when profiling is enabled */
_Py_NO_INLINE static PyObject *
profile_fastcall(PyThreadState *tstate, PyObject *func,
PyObject *const *argv, Py_ssize_t nargs,
PyObject *kwnames)
{
PyObject *result;
if (PyCFunction_Check(func)) {
C_TRACE(result, _PyCFunction_FastCallKeywords(func, argv, nargs, kwnames));
return result;
}
else if (Py_TYPE(func) == &PyMethodDescr_Type && nargs > 0) {
/* We need to create a temporary bound method as argument
for profiling.

If nargs == 0, then this cannot work because we have no
"self". In any case, the call itself would raise
TypeError (foo needs an argument), so we just skip
profiling. */
PyObject *self = *(argv++);
nargs--;
func = Py_TYPE(func)->tp_descr_get(func, self, (PyObject*)Py_TYPE(self));
if (func == NULL) {
return NULL;
}
C_TRACE(result, _PyCFunction_FastCallKeywords(func,
argv, nargs,
kwnames));
Py_DECREF(func);
return result;
}
return _PyObject_FastCallKeywords(func, argv, nargs, kwnames);
}

/* Call function when profiling is enabled */
_Py_NO_INLINE static PyObject *
profile_call(PyThreadState *tstate, PyObject *func,
PyObject *args, PyObject *kwdict)
{
PyObject *result;
PyObject * const* argv = _PyTuple_ITEMS(args);
Py_ssize_t nargs = PyTuple_GET_SIZE(args);

if (PyCFunction_Check(func)) {
C_TRACE(result, _PyCFunction_FastCallDict(func, argv, nargs, kwdict));
return result;
}
else if (Py_TYPE(func) == &PyMethodDescr_Type && nargs > 0) {
/* We need to create a temporary bound method as argument
for profiling.

If nargs == 0, then this cannot work because we have no
"self". In any case, the call itself would raise
TypeError (foo needs an argument), so we just skip
profiling. */
PyObject *self = *(argv++);
nargs--;
func = Py_TYPE(func)->tp_descr_get(func, self, (PyObject*)Py_TYPE(self));
if (func == NULL) {
return NULL;
}
C_TRACE(result, _PyCFunction_FastCallDict(func,
argv, nargs,
kwdict));
Py_DECREF(func);
return result;
}
return PyObject_Call(func, args, kwdict);
}


/* Issue #29227: Inline call_function() into _PyEval_EvalFrameDefault()
to reduce the stack consumption. */
Py_LOCAL_INLINE(PyObject *) _Py_HOT_FUNCTION
call_function(PyObject ***pp_stack, Py_ssize_t oparg, PyObject *kwnames)
call_function(PyThreadState *tstate, PyObject ***pp_stack, Py_ssize_t oparg, PyObject *kwnames)
{
PyObject **pfunc = (*pp_stack) - oparg - 1;
PyObject **argv = (*pp_stack) - oparg;
PyObject **pfunc = argv - 1;
PyObject *func = *pfunc;
PyObject *x, *w;
Py_ssize_t nkwargs = (kwnames == NULL) ? 0 : PyTuple_GET_SIZE(kwnames);
Py_ssize_t nargs = oparg - nkwargs;
PyObject **stack = (*pp_stack) - nargs - nkwargs;

/* Always dispatch PyCFunction first, because these are
presumed to be the most frequent callable object.
*/
if (PyCFunction_Check(func)) {
PyThreadState *tstate = _PyThreadState_GET();
C_TRACE(x, _PyCFunction_FastCallKeywords(func, stack, nargs, kwnames));
if (tstate->use_tracing) {
x = profile_fastcall(tstate, func, argv, nargs, kwnames);
}
else if (PyCFunction_Check(func)) {
x = _PyCFunction_FastCallKeywords(func, argv, nargs, kwnames);
}
else if (Py_TYPE(func) == &PyMethodDescr_Type) {
PyThreadState *tstate = _PyThreadState_GET();
if (nargs > 0 && tstate->use_tracing) {
/* We need to create a temporary bound method as argument
for profiling.

If nargs == 0, then this cannot work because we have no
"self". In any case, the call itself would raise
TypeError (foo needs an argument), so we just skip
profiling. */
PyObject *self = stack[0];
func = Py_TYPE(func)->tp_descr_get(func, self, (PyObject*)Py_TYPE(self));
if (func != NULL) {
C_TRACE(x, _PyCFunction_FastCallKeywords(func,
stack+1, nargs-1,
kwnames));
Py_DECREF(func);
}
else {
x = NULL;
}
}
else {
x = _PyMethodDescr_FastCallKeywords(func, stack, nargs, kwnames);
}
x = _PyMethodDescr_FastCallKeywords(func, argv, nargs, kwnames);
}
else {
if (PyMethod_Check(func) && PyMethod_GET_SELF(func) != NULL) {
if (PyMethod_Check(func)) {
/* Optimize access to bound methods. Reuse the Python stack
to pass 'self' as the first argument, replace 'func'
with 'self'. It avoids the creation of a new temporary tuple
Expand All @@ -4712,17 +4764,17 @@ call_function(PyObject ***pp_stack, Py_ssize_t oparg, PyObject *kwnames)
Py_INCREF(func);
Py_SETREF(*pfunc, self);
nargs++;
stack--;
argv--;
}
else {
Py_INCREF(func);
}

if (PyFunction_Check(func)) {
x = _PyFunction_FastCallKeywords(func, stack, nargs, kwnames);
x = _PyFunction_FastCallKeywords(func, argv, nargs, kwnames);
}
else {
x = _PyObject_FastCallKeywords(func, stack, nargs, kwnames);
x = _PyObject_FastCallKeywords(func, argv, nargs, kwnames);
}
Py_DECREF(func);
}
Expand All @@ -4738,43 +4790,6 @@ call_function(PyObject ***pp_stack, Py_ssize_t oparg, PyObject *kwnames)
return x;
}

static PyObject *
do_call_core(PyObject *func, PyObject *callargs, PyObject *kwdict)
{
PyObject *result;

if (PyCFunction_Check(func)) {
PyThreadState *tstate = _PyThreadState_GET();
C_TRACE(result, PyCFunction_Call(func, callargs, kwdict));
return result;
}
else if (Py_TYPE(func) == &PyMethodDescr_Type) {
PyThreadState *tstate = _PyThreadState_GET();
Py_ssize_t nargs = PyTuple_GET_SIZE(callargs);
if (nargs > 0 && tstate->use_tracing) {
/* We need to create a temporary bound method as argument
for profiling.

If nargs == 0, then this cannot work because we have no
"self". In any case, the call itself would raise
TypeError (foo needs an argument), so we just skip
profiling. */
PyObject *self = PyTuple_GET_ITEM(callargs, 0);
func = Py_TYPE(func)->tp_descr_get(func, self, (PyObject*)Py_TYPE(self));
if (func == NULL) {
return NULL;
}

C_TRACE(result, _PyCFunction_FastCallDict(func,
&_PyTuple_ITEMS(callargs)[1],
nargs - 1,
kwdict));
Py_DECREF(func);
return result;
}
}
return PyObject_Call(func, callargs, kwdict);
}

/* Extract a slice index from a PyLong or an object with the
nb_index slot defined, and store in *pi.
Expand Down