Skip to content

[3.11] GH-93354: Use exponential backoff to avoid excessive specialization attempts (GH-93355) #93379

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 44 additions & 3 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,6 @@ extern void _PyLineTable_InitAddressRange(
extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range);
extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);


#define ADAPTIVE_CACHE_BACKOFF 64

/* Specialization functions */

extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr,
Expand Down Expand Up @@ -475,6 +472,50 @@ write_location_entry_start(uint8_t *ptr, int code, int length)
}


/** Counters
* The first 16-bit value in each inline cache is a counter.
* When counting misses, the counter is treated as a simple unsigned value.
*
* When counting executions until the next specialization attempt,
* exponential backoff is used to reduce the number of specialization failures.
* The high 12 bits store the counter, the low 4 bits store the backoff exponent.
* On a specialization failure, the backoff exponent is incremented and the
* counter set to (2**backoff - 1).
* Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023.
*/

/* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */
#define ADAPTIVE_BACKOFF_BITS 4
/* The initial counter value is 31 == 2**ADAPTIVE_BACKOFF_START - 1 */
#define ADAPTIVE_BACKOFF_START 5

#define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS)


static inline uint16_t
adaptive_counter_bits(int value, int backoff) {
return (value << ADAPTIVE_BACKOFF_BITS) |
(backoff & ((1<<ADAPTIVE_BACKOFF_BITS)-1));
}

static inline uint16_t
adaptive_counter_start(void) {
unsigned int value = (1 << ADAPTIVE_BACKOFF_START) - 1;
return adaptive_counter_bits(value, ADAPTIVE_BACKOFF_START);
}

static inline uint16_t
adaptive_counter_backoff(uint16_t counter) {
unsigned int backoff = counter & ((1<<ADAPTIVE_BACKOFF_BITS)-1);
backoff++;
if (backoff > MAX_BACKOFF_VALUE) {
backoff = MAX_BACKOFF_VALUE;
}
unsigned int value = (1 << backoff) - 1;
return adaptive_counter_bits(value, backoff);
}


/* Line array cache for tracing */

extern int _PyCode_CreateLineArray(PyCodeObject *co);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Use exponential backoff for specialization counters in the interpreter. Can
reduce the number of failed specializations significantly and avoid slowdown
for those parts of a program that are not suitable for specialization.
46 changes: 25 additions & 21 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -1559,7 +1559,11 @@ eval_frame_handle_pending(PyThreadState *tstate)
dtrace_function_entry(frame); \
}

#define ADAPTIVE_COUNTER_IS_ZERO(cache) \
(cache)->counter < (1<<ADAPTIVE_BACKOFF_BITS)

#define DECREMENT_ADAPTIVE_COUNTER(cache) \
(cache)->counter -= (1<<ADAPTIVE_BACKOFF_BITS)

static int
trace_function_entry(PyThreadState *tstate, _PyInterpreterFrame *frame)
Expand Down Expand Up @@ -2154,7 +2158,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int

TARGET(BINARY_SUBSCR_ADAPTIVE) {
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *sub = TOP();
PyObject *container = SECOND();
next_instr--;
Expand All @@ -2165,7 +2169,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(BINARY_SUBSCR, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(BINARY_SUBSCR);
}
}
Expand Down Expand Up @@ -2319,7 +2323,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int

TARGET(STORE_SUBSCR_ADAPTIVE) {
_PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *sub = TOP();
PyObject *container = SECOND();
next_instr--;
Expand All @@ -2330,7 +2334,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(STORE_SUBSCR, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(STORE_SUBSCR);
}
}
Expand Down Expand Up @@ -2812,15 +2816,15 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(UNPACK_SEQUENCE_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *seq = TOP();
next_instr--;
_Py_Specialize_UnpackSequence(seq, next_instr, oparg);
NOTRACE_DISPATCH_SAME_OPARG();
}
else {
STAT_INC(UNPACK_SEQUENCE, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(UNPACK_SEQUENCE);
}
}
Expand Down Expand Up @@ -3053,7 +3057,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(LOAD_GLOBAL_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *name = GETITEM(names, oparg>>1);
next_instr--;
if (_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name) < 0) {
Expand All @@ -3063,7 +3067,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(LOAD_GLOBAL, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(LOAD_GLOBAL);
}
}
Expand Down Expand Up @@ -3477,7 +3481,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(LOAD_ATTR_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyAttrCache *cache = (_PyAttrCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *owner = TOP();
PyObject *name = GETITEM(names, oparg);
next_instr--;
Expand All @@ -3488,7 +3492,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(LOAD_ATTR, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(LOAD_ATTR);
}
}
Expand Down Expand Up @@ -3586,7 +3590,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(STORE_ATTR_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyAttrCache *cache = (_PyAttrCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *owner = TOP();
PyObject *name = GETITEM(names, oparg);
next_instr--;
Expand All @@ -3597,7 +3601,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(STORE_ATTR, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(STORE_ATTR);
}
}
Expand Down Expand Up @@ -3716,7 +3720,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(COMPARE_OP_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *right = TOP();
PyObject *left = SECOND();
next_instr--;
Expand All @@ -3725,7 +3729,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(COMPARE_OP, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(COMPARE_OP);
}
}
Expand Down Expand Up @@ -4523,7 +4527,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(LOAD_METHOD_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *owner = TOP();
PyObject *name = GETITEM(names, oparg);
next_instr--;
Expand All @@ -4534,7 +4538,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(LOAD_METHOD, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(LOAD_METHOD);
}
}
Expand Down Expand Up @@ -4815,7 +4819,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int

TARGET(CALL_ADAPTIVE) {
_PyCallCache *cache = (_PyCallCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
next_instr--;
int is_meth = is_method(stack_pointer, oparg);
int nargs = oparg + is_meth;
Expand All @@ -4829,7 +4833,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(CALL, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
goto call_function;
}
}
Expand Down Expand Up @@ -5560,7 +5564,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
TARGET(BINARY_OP_ADAPTIVE) {
assert(cframe.use_tracing == 0);
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr;
if (cache->counter == 0) {
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
PyObject *lhs = SECOND();
PyObject *rhs = TOP();
next_instr--;
Expand All @@ -5569,7 +5573,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
else {
STAT_INC(BINARY_OP, deferred);
cache->counter--;
DECREMENT_ADAPTIVE_COUNTER(cache);
JUMP_TO_INSTRUCTION(BINARY_OP);
}
}
Expand Down Expand Up @@ -5690,7 +5694,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
assert(adaptive_opcode);
_Py_SET_OPCODE(next_instr[-1], adaptive_opcode);
STAT_INC(opcode, deopt);
*counter = ADAPTIVE_CACHE_BACKOFF;
*counter = adaptive_counter_start();
}
next_instr--;
DISPATCH_GOTO();
Expand Down
Loading