diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 551b9c01e6a98b..9528c8996a443c 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -233,9 +233,6 @@ extern void _PyLineTable_InitAddressRange( extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range); extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range); - -#define ADAPTIVE_CACHE_BACKOFF 64 - /* Specialization functions */ extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, @@ -475,6 +472,50 @@ write_location_entry_start(uint8_t *ptr, int code, int length) } +/** Counters + * The first 16-bit value in each inline cache is a counter. + * When counting misses, the counter is treated as a simple unsigned value. + * + * When counting executions until the next specialization attempt, + * exponential backoff is used to reduce the number of specialization failures. + * The high 12 bits store the counter, the low 4 bits store the backoff exponent. + * On a specialization failure, the backoff exponent is incremented and the + * counter set to (2**backoff - 1). + * Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023. + */ + +/* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */ +#define ADAPTIVE_BACKOFF_BITS 4 +/* The initial counter value is 31 == 2**ADAPTIVE_BACKOFF_START - 1 */ +#define ADAPTIVE_BACKOFF_START 5 + +#define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS) + + +static inline uint16_t +adaptive_counter_bits(int value, int backoff) { + return (value << ADAPTIVE_BACKOFF_BITS) | + (backoff & ((1< MAX_BACKOFF_VALUE) { + backoff = MAX_BACKOFF_VALUE; + } + unsigned int value = (1 << backoff) - 1; + return adaptive_counter_bits(value, backoff); +} + + /* Line array cache for tracing */ extern int _PyCode_CreateLineArray(PyCodeObject *co); diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-05-30-15-35-42.gh-issue-93354.RZk8gs.rst b/Misc/NEWS.d/next/Core and Builtins/2022-05-30-15-35-42.gh-issue-93354.RZk8gs.rst new file mode 100644 index 00000000000000..dcfe6a9b6ba3a5 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-05-30-15-35-42.gh-issue-93354.RZk8gs.rst @@ -0,0 +1,3 @@ +Use exponential backoff for specialization counters in the interpreter. Can +reduce the number of failed specializations significantly and avoid slowdown +for those parts of a program that are not suitable for specialization. diff --git a/Python/ceval.c b/Python/ceval.c index b8681617c8c22c..a2c1d0bc43985c 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1559,7 +1559,11 @@ eval_frame_handle_pending(PyThreadState *tstate) dtrace_function_entry(frame); \ } +#define ADAPTIVE_COUNTER_IS_ZERO(cache) \ + (cache)->counter < (1<counter -= (1<counter == 0) { + if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { PyObject *sub = TOP(); PyObject *container = SECOND(); next_instr--; @@ -2165,7 +2169,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } else { STAT_INC(BINARY_SUBSCR, deferred); - cache->counter--; + DECREMENT_ADAPTIVE_COUNTER(cache); JUMP_TO_INSTRUCTION(BINARY_SUBSCR); } } @@ -2319,7 +2323,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int TARGET(STORE_SUBSCR_ADAPTIVE) { _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr; - if (cache->counter == 0) { + if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { PyObject *sub = TOP(); PyObject *container = SECOND(); next_instr--; @@ -2330,7 +2334,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } else { STAT_INC(STORE_SUBSCR, deferred); - cache->counter--; + DECREMENT_ADAPTIVE_COUNTER(cache); JUMP_TO_INSTRUCTION(STORE_SUBSCR); } } @@ -2812,7 +2816,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int TARGET(UNPACK_SEQUENCE_ADAPTIVE) { assert(cframe.use_tracing == 0); _PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)next_instr; - if (cache->counter == 0) { + if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { PyObject *seq = TOP(); next_instr--; _Py_Specialize_UnpackSequence(seq, next_instr, oparg); @@ -2820,7 +2824,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } else { STAT_INC(UNPACK_SEQUENCE, deferred); - cache->counter--; + DECREMENT_ADAPTIVE_COUNTER(cache); JUMP_TO_INSTRUCTION(UNPACK_SEQUENCE); } } @@ -3053,7 +3057,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int TARGET(LOAD_GLOBAL_ADAPTIVE) { assert(cframe.use_tracing == 0); _PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr; - if (cache->counter == 0) { + if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { PyObject *name = GETITEM(names, oparg>>1); next_instr--; if (_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name) < 0) { @@ -3063,7 +3067,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } else { STAT_INC(LOAD_GLOBAL, deferred); - cache->counter--; + DECREMENT_ADAPTIVE_COUNTER(cache); JUMP_TO_INSTRUCTION(LOAD_GLOBAL); } } @@ -3477,7 +3481,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int TARGET(LOAD_ATTR_ADAPTIVE) { assert(cframe.use_tracing == 0); _PyAttrCache *cache = (_PyAttrCache *)next_instr; - if (cache->counter == 0) { + if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { PyObject *owner = TOP(); PyObject *name = GETITEM(names, oparg); next_instr--; @@ -3488,7 +3492,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } else { STAT_INC(LOAD_ATTR, deferred); - cache->counter--; + DECREMENT_ADAPTIVE_COUNTER(cache); JUMP_TO_INSTRUCTION(LOAD_ATTR); } } @@ -3586,7 +3590,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int TARGET(STORE_ATTR_ADAPTIVE) { assert(cframe.use_tracing == 0); _PyAttrCache *cache = (_PyAttrCache *)next_instr; - if (cache->counter == 0) { + if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { PyObject *owner = TOP(); PyObject *name = GETITEM(names, oparg); next_instr--; @@ -3597,7 +3601,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } else { STAT_INC(STORE_ATTR, deferred); - cache->counter--; + DECREMENT_ADAPTIVE_COUNTER(cache); JUMP_TO_INSTRUCTION(STORE_ATTR); } } @@ -3716,7 +3720,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int TARGET(COMPARE_OP_ADAPTIVE) { assert(cframe.use_tracing == 0); _PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr; - if (cache->counter == 0) { + if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { PyObject *right = TOP(); PyObject *left = SECOND(); next_instr--; @@ -3725,7 +3729,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } else { STAT_INC(COMPARE_OP, deferred); - cache->counter--; + DECREMENT_ADAPTIVE_COUNTER(cache); JUMP_TO_INSTRUCTION(COMPARE_OP); } } @@ -4523,7 +4527,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int TARGET(LOAD_METHOD_ADAPTIVE) { assert(cframe.use_tracing == 0); _PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr; - if (cache->counter == 0) { + if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { PyObject *owner = TOP(); PyObject *name = GETITEM(names, oparg); next_instr--; @@ -4534,7 +4538,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } else { STAT_INC(LOAD_METHOD, deferred); - cache->counter--; + DECREMENT_ADAPTIVE_COUNTER(cache); JUMP_TO_INSTRUCTION(LOAD_METHOD); } } @@ -4815,7 +4819,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int TARGET(CALL_ADAPTIVE) { _PyCallCache *cache = (_PyCallCache *)next_instr; - if (cache->counter == 0) { + if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { next_instr--; int is_meth = is_method(stack_pointer, oparg); int nargs = oparg + is_meth; @@ -4829,7 +4833,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } else { STAT_INC(CALL, deferred); - cache->counter--; + DECREMENT_ADAPTIVE_COUNTER(cache); goto call_function; } } @@ -5560,7 +5564,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int TARGET(BINARY_OP_ADAPTIVE) { assert(cframe.use_tracing == 0); _PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr; - if (cache->counter == 0) { + if (ADAPTIVE_COUNTER_IS_ZERO(cache)) { PyObject *lhs = SECOND(); PyObject *rhs = TOP(); next_instr--; @@ -5569,7 +5573,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } else { STAT_INC(BINARY_OP, deferred); - cache->counter--; + DECREMENT_ADAPTIVE_COUNTER(cache); JUMP_TO_INSTRUCTION(BINARY_OP); } } @@ -5690,7 +5694,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int assert(adaptive_opcode); _Py_SET_OPCODE(next_instr[-1], adaptive_opcode); STAT_INC(opcode, deopt); - *counter = ADAPTIVE_CACHE_BACKOFF; + *counter = adaptive_counter_start(); } next_instr--; DISPATCH_GOTO(); diff --git a/Python/specialize.c b/Python/specialize.c index 80d952c90a813d..cf7bc32a205be0 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -316,7 +316,7 @@ _PyCode_Quicken(PyCodeObject *code) } static inline int -initial_counter_value(void) { +miss_counter_start(void) { /* Starting value for the counter. * This value needs to be not too low, otherwise * it would cause excessive de-optimization. @@ -738,12 +738,12 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) fail: STAT_INC(LOAD_ATTR, failure); assert(!PyErr_Occurred()); - cache->counter = ADAPTIVE_CACHE_BACKOFF; + cache->counter = adaptive_counter_backoff(cache->counter); return 0; success: STAT_INC(LOAD_ATTR, success); assert(!PyErr_Occurred()); - cache->counter = initial_counter_value(); + cache->counter = miss_counter_start(); return 0; } @@ -820,12 +820,12 @@ _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) fail: STAT_INC(STORE_ATTR, failure); assert(!PyErr_Occurred()); - cache->counter = ADAPTIVE_CACHE_BACKOFF; + cache->counter = adaptive_counter_backoff(cache->counter); return 0; success: STAT_INC(STORE_ATTR, success); assert(!PyErr_Occurred()); - cache->counter = initial_counter_value(); + cache->counter = miss_counter_start(); return 0; } @@ -1027,14 +1027,13 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) success: STAT_INC(LOAD_METHOD, success); assert(!PyErr_Occurred()); - cache->counter = initial_counter_value(); + cache->counter = miss_counter_start(); return 0; fail: STAT_INC(LOAD_METHOD, failure); assert(!PyErr_Occurred()); - cache->counter = ADAPTIVE_CACHE_BACKOFF; + cache->counter = adaptive_counter_backoff(cache->counter); return 0; - } int @@ -1110,12 +1109,12 @@ _Py_Specialize_LoadGlobal( fail: STAT_INC(LOAD_GLOBAL, failure); assert(!PyErr_Occurred()); - cache->counter = ADAPTIVE_CACHE_BACKOFF; + cache->counter = adaptive_counter_backoff(cache->counter); return 0; success: STAT_INC(LOAD_GLOBAL, success); assert(!PyErr_Occurred()); - cache->counter = initial_counter_value(); + cache->counter = miss_counter_start(); return 0; } @@ -1239,12 +1238,12 @@ _Py_Specialize_BinarySubscr( fail: STAT_INC(BINARY_SUBSCR, failure); assert(!PyErr_Occurred()); - cache->counter = ADAPTIVE_CACHE_BACKOFF; + cache->counter = adaptive_counter_backoff(cache->counter); return 0; success: STAT_INC(BINARY_SUBSCR, success); assert(!PyErr_Occurred()); - cache->counter = initial_counter_value(); + cache->counter = miss_counter_start(); return 0; } @@ -1343,12 +1342,12 @@ _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *ins fail: STAT_INC(STORE_SUBSCR, failure); assert(!PyErr_Occurred()); - cache->counter = ADAPTIVE_CACHE_BACKOFF; + cache->counter = adaptive_counter_backoff(cache->counter); return 0; success: STAT_INC(STORE_SUBSCR, success); assert(!PyErr_Occurred()); - cache->counter = initial_counter_value(); + cache->counter = miss_counter_start(); return 0; } @@ -1646,12 +1645,12 @@ _Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr, int nargs, if (fail) { STAT_INC(PRECALL, failure); assert(!PyErr_Occurred()); - cache->counter = ADAPTIVE_CACHE_BACKOFF; + cache->counter = adaptive_counter_backoff(cache->counter); } else { STAT_INC(PRECALL, success); assert(!PyErr_Occurred()); - cache->counter = initial_counter_value(); + cache->counter = miss_counter_start(); } return 0; } @@ -1678,12 +1677,12 @@ _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, if (fail) { STAT_INC(CALL, failure); assert(!PyErr_Occurred()); - cache->counter = ADAPTIVE_CACHE_BACKOFF; + cache->counter = adaptive_counter_backoff(cache->counter); } else { STAT_INC(CALL, success); assert(!PyErr_Occurred()); - cache->counter = initial_counter_value(); + cache->counter = miss_counter_start(); } return 0; } @@ -1831,11 +1830,11 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, } SPECIALIZATION_FAIL(BINARY_OP, binary_op_fail_kind(oparg, lhs, rhs)); STAT_INC(BINARY_OP, failure); - cache->counter = ADAPTIVE_CACHE_BACKOFF; + cache->counter = adaptive_counter_backoff(cache->counter); return; success: STAT_INC(BINARY_OP, success); - cache->counter = initial_counter_value(); + cache->counter = miss_counter_start(); } @@ -1958,11 +1957,11 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SPECIALIZATION_FAIL(COMPARE_OP, compare_op_fail_kind(lhs, rhs)); failure: STAT_INC(COMPARE_OP, failure); - cache->counter = ADAPTIVE_CACHE_BACKOFF; + cache->counter = adaptive_counter_backoff(cache->counter); return; success: STAT_INC(COMPARE_OP, success); - cache->counter = initial_counter_value(); + cache->counter = miss_counter_start(); } #ifdef Py_STATS @@ -2008,11 +2007,11 @@ _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr, int oparg) SPECIALIZATION_FAIL(UNPACK_SEQUENCE, unpack_sequence_fail_kind(seq)); failure: STAT_INC(UNPACK_SEQUENCE, failure); - cache->counter = ADAPTIVE_CACHE_BACKOFF; + cache->counter = adaptive_counter_backoff(cache->counter); return; success: STAT_INC(UNPACK_SEQUENCE, success); - cache->counter = initial_counter_value(); + cache->counter = miss_counter_start(); } #ifdef Py_STATS