Skip to content

GH-93657: More LOAD_ATTR specializations #93892

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Include/descrobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ PyAPI_FUNC(PyObject *) PyDescr_NewGetSet(PyTypeObject *, PyGetSetDef *);
PyAPI_FUNC(PyObject *) PyDictProxy_New(PyObject *);
PyAPI_FUNC(PyObject *) PyWrapper_New(PyObject *, PyObject *);

PyObject *_PyProperty_PropGet(PyObject *prop);

#ifndef Py_LIMITED_API
# define Py_CPYTHON_DESCROBJECT_H
# include "cpython/descrobject.h"
Expand Down
44 changes: 23 additions & 21 deletions Include/internal/pycore_opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

58 changes: 30 additions & 28 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,10 @@ def jabs_op(name, op):
"LOAD_ATTR_ADAPTIVE",
# These potentially push [NULL, bound method] onto the stack.
"LOAD_ATTR_CLASS",
"LOAD_ATTR_CLASS_MUTABLE_DESCRIPTOR",
"LOAD_ATTR_INSTANCE_VALUE",
"LOAD_ATTR_MODULE",
"LOAD_ATTR_PROPERTY",
"LOAD_ATTR_SLOT",
"LOAD_ATTR_WITH_HINT",
# These will always push [unbound method, self] onto the stack.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Specialize `LOAD_ATTR` for mutable class attribute descriptors and ``property()`` attributes.
7 changes: 7 additions & 0 deletions Objects/descrobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1893,6 +1893,13 @@ property_clear(PyObject *self)
return 0;
}

PyObject *
_PyProperty_PropGet(PyObject *self)
{
propertyobject *pp = (propertyobject *)self;
return pp->prop_get;
}

#include "clinic/descrobject.c.h"

PyTypeObject PyDictProxy_Type = {
Expand Down
77 changes: 73 additions & 4 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -3627,7 +3627,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}

TARGET(LOAD_ATTR_CLASS) {
/* LOAD_METHOD, for class methods */
assert(cframe.use_tracing == 0);
_PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr;

Expand All @@ -3650,6 +3649,76 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
NOTRACE_DISPATCH();
}

TARGET(LOAD_ATTR_CLASS_MUTABLE_DESCRIPTOR) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this common enough to justify its own specialization, especially as we need to do much of the work of general lookup?

Copy link
Member Author

@Fidget-Spinner Fidget-Spinner Jun 16, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to the stats on the faster-cpython repo, this represents 5.7% of the remaining failures. In contrast, properties are 13.8%. Class method objects are quite close, at 6.2%.

Here's a microbenchmark, with no PGO nor LTO, but it's a Windows release build (similar to -O3).

python -m timeit -s "
from enum import Enum

class Colours(Enum):
 RED = 1
" "Colours.RED"

# on MAIN:
2000000 loops, best of 5: 121 nsec per loop
2000000 loops, best of 5: 120 nsec per loop
2000000 loops, best of 5: 120 nsec per loop

# on this PR:
5000000 loops, best of 5: 77.2 nsec per loop
5000000 loops, best of 5: 73.5 nsec per loop
5000000 loops, best of 5: 72.1 nsec per loop

With PGO and LTO, I expect the difference to be less dramatic. But I still expect this form to be ~=15% faster than the base version.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have a non-enum example?

enums members should be simple class attributes.
#93910

Copy link
Member Author

@Fidget-Spinner Fidget-Spinner Jun 16, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't. This opcode was specifically designed with enum in mind.

assert(cframe.use_tracing == 0);
_PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr;

PyObject *cls = TOP();
DEOPT_IF(!PyType_Check(cls), LOAD_ATTR);
uint32_t type_version = read_u32(cache->type_version);
DEOPT_IF(((PyTypeObject *)cls)->tp_version_tag != type_version,
LOAD_ATTR);
assert(type_version != 0);
PyObject *descr = read_obj(cache->descr);
descrgetfunc get = Py_TYPE(descr)->tp_descr_get;
DEOPT_IF(get == NULL, LOAD_ATTR);
STAT_INC(LOAD_ATTR, hit);
PyObject *res = get(descr, NULL, cls);
if (res == NULL) {
goto error;
}
assert(res != NULL);
SET_TOP(NULL);
STACK_GROW((oparg & 1));
SET_TOP(res);
Py_DECREF(cls);
JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR);
NOTRACE_DISPATCH();
}

TARGET(LOAD_ATTR_PROPERTY) {
assert(cframe.use_tracing == 0);
_PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr;

PyObject *owner = TOP();
PyTypeObject *cls = Py_TYPE(owner);
uint32_t type_version = read_u32(cache->type_version);
DEOPT_IF(cls->tp_version_tag != type_version, LOAD_ATTR);
assert(type_version != 0);
PyObject *prop = read_obj(cache->descr);
assert(Py_TYPE(prop) == &PyProperty_Type);
PyObject *fget = _PyProperty_PropGet(prop);
DEOPT_IF(!PyFunction_Check(fget), LOAD_ATTR);
PyFunctionObject *f = (PyFunctionObject *)fget;
DEOPT_IF(f->func_version != cache->keys_version[0], LOAD_ATTR);
PyCodeObject *code = (PyCodeObject *)f->func_code;
size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE;
assert(code->co_argcount == 1);
STAT_INC(LOAD_ATTR, hit);

_PyInterpreterFrame *new_frame = _PyThreadState_BumpFramePointer(tstate, size);
if (new_frame == NULL) {
goto error;
}
CALL_STAT_INC(frames_pushed);
Py_INCREF(f);
_PyFrame_InitializeSpecials(new_frame, f,
NULL, code->co_nlocalsplus);
SET_TOP(NULL);
STACK_SHRINK(!(oparg & 1));
new_frame->localsplus[0] = owner;
for (int i = 1; i < code->co_nlocalsplus; i++) {
new_frame->localsplus[i] = NULL;
}
_PyFrame_SetStackPointer(frame, stack_pointer);
JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR);
frame->prev_instr = next_instr - 1;
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
CALL_STAT_INC(inlined_py_calls);
goto start_frame;
}

TARGET(STORE_ATTR_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyAttrCache *cache = (_PyAttrCache *)next_instr;
Expand Down Expand Up @@ -4549,7 +4618,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}

TARGET(LOAD_ATTR_METHOD_WITH_VALUES) {
/* LOAD_METHOD, with cached method object */
/* Cached method object */
assert(cframe.use_tracing == 0);
PyObject *self = TOP();
PyTypeObject *self_cls = Py_TYPE(self);
Expand All @@ -4575,8 +4644,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}

TARGET(LOAD_ATTR_METHOD_WITH_DICT) {
/* LOAD_METHOD, with a dict
Can be either a managed dict, or a tp_dictoffset offset.*/
/* Can be either a managed dict, or a tp_dictoffset offset.*/
assert(cframe.use_tracing == 0);
PyObject *self = TOP();
PyTypeObject *self_cls = Py_TYPE(self);
Expand Down Expand Up @@ -5527,6 +5595,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}

TARGET(CACHE) {
printf("cache\n");
Py_UNREACHABLE();
}

Expand Down
Loading