Skip to content

Commit 113b309

Browse files
markshannonambv
andauthored
[3.11] GH-93354: Use exponential backoff to avoid excessive specialization attempts (GH-93355) (GH-93379)
Co-authored-by: Mark Shannon <[email protected]> Co-authored-by: Łukasz Langa <[email protected]>
1 parent 6c40538 commit 113b309

File tree

4 files changed

+95
-48
lines changed

4 files changed

+95
-48
lines changed

Include/internal/pycore_code.h

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,9 +233,6 @@ extern void _PyLineTable_InitAddressRange(
233233
extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range);
234234
extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);
235235

236-
237-
#define ADAPTIVE_CACHE_BACKOFF 64
238-
239236
/* Specialization functions */
240237

241238
extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr,
@@ -475,6 +472,50 @@ write_location_entry_start(uint8_t *ptr, int code, int length)
475472
}
476473

477474

475+
/** Counters
476+
* The first 16-bit value in each inline cache is a counter.
477+
* When counting misses, the counter is treated as a simple unsigned value.
478+
*
479+
* When counting executions until the next specialization attempt,
480+
* exponential backoff is used to reduce the number of specialization failures.
481+
* The high 12 bits store the counter, the low 4 bits store the backoff exponent.
482+
* On a specialization failure, the backoff exponent is incremented and the
483+
* counter set to (2**backoff - 1).
484+
* Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023.
485+
*/
486+
487+
/* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */
488+
#define ADAPTIVE_BACKOFF_BITS 4
489+
/* The initial counter value is 31 == 2**ADAPTIVE_BACKOFF_START - 1 */
490+
#define ADAPTIVE_BACKOFF_START 5
491+
492+
#define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS)
493+
494+
495+
static inline uint16_t
496+
adaptive_counter_bits(int value, int backoff) {
497+
return (value << ADAPTIVE_BACKOFF_BITS) |
498+
(backoff & ((1<<ADAPTIVE_BACKOFF_BITS)-1));
499+
}
500+
501+
static inline uint16_t
502+
adaptive_counter_start(void) {
503+
unsigned int value = (1 << ADAPTIVE_BACKOFF_START) - 1;
504+
return adaptive_counter_bits(value, ADAPTIVE_BACKOFF_START);
505+
}
506+
507+
static inline uint16_t
508+
adaptive_counter_backoff(uint16_t counter) {
509+
unsigned int backoff = counter & ((1<<ADAPTIVE_BACKOFF_BITS)-1);
510+
backoff++;
511+
if (backoff > MAX_BACKOFF_VALUE) {
512+
backoff = MAX_BACKOFF_VALUE;
513+
}
514+
unsigned int value = (1 << backoff) - 1;
515+
return adaptive_counter_bits(value, backoff);
516+
}
517+
518+
478519
/* Line array cache for tracing */
479520

480521
extern int _PyCode_CreateLineArray(PyCodeObject *co);
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Use exponential backoff for specialization counters in the interpreter. Can
2+
reduce the number of failed specializations significantly and avoid slowdown
3+
for those parts of a program that are not suitable for specialization.

Python/ceval.c

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1559,7 +1559,11 @@ eval_frame_handle_pending(PyThreadState *tstate)
15591559
dtrace_function_entry(frame); \
15601560
}
15611561

1562+
#define ADAPTIVE_COUNTER_IS_ZERO(cache) \
1563+
(cache)->counter < (1<<ADAPTIVE_BACKOFF_BITS)
15621564

1565+
#define DECREMENT_ADAPTIVE_COUNTER(cache) \
1566+
(cache)->counter -= (1<<ADAPTIVE_BACKOFF_BITS)
15631567

15641568
static int
15651569
trace_function_entry(PyThreadState *tstate, _PyInterpreterFrame *frame)
@@ -2154,7 +2158,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
21542158

21552159
TARGET(BINARY_SUBSCR_ADAPTIVE) {
21562160
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
2157-
if (cache->counter == 0) {
2161+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
21582162
PyObject *sub = TOP();
21592163
PyObject *container = SECOND();
21602164
next_instr--;
@@ -2165,7 +2169,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
21652169
}
21662170
else {
21672171
STAT_INC(BINARY_SUBSCR, deferred);
2168-
cache->counter--;
2172+
DECREMENT_ADAPTIVE_COUNTER(cache);
21692173
JUMP_TO_INSTRUCTION(BINARY_SUBSCR);
21702174
}
21712175
}
@@ -2319,7 +2323,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
23192323

23202324
TARGET(STORE_SUBSCR_ADAPTIVE) {
23212325
_PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr;
2322-
if (cache->counter == 0) {
2326+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
23232327
PyObject *sub = TOP();
23242328
PyObject *container = SECOND();
23252329
next_instr--;
@@ -2330,7 +2334,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
23302334
}
23312335
else {
23322336
STAT_INC(STORE_SUBSCR, deferred);
2333-
cache->counter--;
2337+
DECREMENT_ADAPTIVE_COUNTER(cache);
23342338
JUMP_TO_INSTRUCTION(STORE_SUBSCR);
23352339
}
23362340
}
@@ -2812,15 +2816,15 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
28122816
TARGET(UNPACK_SEQUENCE_ADAPTIVE) {
28132817
assert(cframe.use_tracing == 0);
28142818
_PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)next_instr;
2815-
if (cache->counter == 0) {
2819+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
28162820
PyObject *seq = TOP();
28172821
next_instr--;
28182822
_Py_Specialize_UnpackSequence(seq, next_instr, oparg);
28192823
NOTRACE_DISPATCH_SAME_OPARG();
28202824
}
28212825
else {
28222826
STAT_INC(UNPACK_SEQUENCE, deferred);
2823-
cache->counter--;
2827+
DECREMENT_ADAPTIVE_COUNTER(cache);
28242828
JUMP_TO_INSTRUCTION(UNPACK_SEQUENCE);
28252829
}
28262830
}
@@ -3053,7 +3057,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
30533057
TARGET(LOAD_GLOBAL_ADAPTIVE) {
30543058
assert(cframe.use_tracing == 0);
30553059
_PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr;
3056-
if (cache->counter == 0) {
3060+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
30573061
PyObject *name = GETITEM(names, oparg>>1);
30583062
next_instr--;
30593063
if (_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name) < 0) {
@@ -3063,7 +3067,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
30633067
}
30643068
else {
30653069
STAT_INC(LOAD_GLOBAL, deferred);
3066-
cache->counter--;
3070+
DECREMENT_ADAPTIVE_COUNTER(cache);
30673071
JUMP_TO_INSTRUCTION(LOAD_GLOBAL);
30683072
}
30693073
}
@@ -3477,7 +3481,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
34773481
TARGET(LOAD_ATTR_ADAPTIVE) {
34783482
assert(cframe.use_tracing == 0);
34793483
_PyAttrCache *cache = (_PyAttrCache *)next_instr;
3480-
if (cache->counter == 0) {
3484+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
34813485
PyObject *owner = TOP();
34823486
PyObject *name = GETITEM(names, oparg);
34833487
next_instr--;
@@ -3488,7 +3492,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
34883492
}
34893493
else {
34903494
STAT_INC(LOAD_ATTR, deferred);
3491-
cache->counter--;
3495+
DECREMENT_ADAPTIVE_COUNTER(cache);
34923496
JUMP_TO_INSTRUCTION(LOAD_ATTR);
34933497
}
34943498
}
@@ -3586,7 +3590,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
35863590
TARGET(STORE_ATTR_ADAPTIVE) {
35873591
assert(cframe.use_tracing == 0);
35883592
_PyAttrCache *cache = (_PyAttrCache *)next_instr;
3589-
if (cache->counter == 0) {
3593+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
35903594
PyObject *owner = TOP();
35913595
PyObject *name = GETITEM(names, oparg);
35923596
next_instr--;
@@ -3597,7 +3601,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
35973601
}
35983602
else {
35993603
STAT_INC(STORE_ATTR, deferred);
3600-
cache->counter--;
3604+
DECREMENT_ADAPTIVE_COUNTER(cache);
36013605
JUMP_TO_INSTRUCTION(STORE_ATTR);
36023606
}
36033607
}
@@ -3716,7 +3720,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
37163720
TARGET(COMPARE_OP_ADAPTIVE) {
37173721
assert(cframe.use_tracing == 0);
37183722
_PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr;
3719-
if (cache->counter == 0) {
3723+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
37203724
PyObject *right = TOP();
37213725
PyObject *left = SECOND();
37223726
next_instr--;
@@ -3725,7 +3729,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
37253729
}
37263730
else {
37273731
STAT_INC(COMPARE_OP, deferred);
3728-
cache->counter--;
3732+
DECREMENT_ADAPTIVE_COUNTER(cache);
37293733
JUMP_TO_INSTRUCTION(COMPARE_OP);
37303734
}
37313735
}
@@ -4523,7 +4527,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
45234527
TARGET(LOAD_METHOD_ADAPTIVE) {
45244528
assert(cframe.use_tracing == 0);
45254529
_PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr;
4526-
if (cache->counter == 0) {
4530+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
45274531
PyObject *owner = TOP();
45284532
PyObject *name = GETITEM(names, oparg);
45294533
next_instr--;
@@ -4534,7 +4538,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
45344538
}
45354539
else {
45364540
STAT_INC(LOAD_METHOD, deferred);
4537-
cache->counter--;
4541+
DECREMENT_ADAPTIVE_COUNTER(cache);
45384542
JUMP_TO_INSTRUCTION(LOAD_METHOD);
45394543
}
45404544
}
@@ -4815,7 +4819,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
48154819

48164820
TARGET(CALL_ADAPTIVE) {
48174821
_PyCallCache *cache = (_PyCallCache *)next_instr;
4818-
if (cache->counter == 0) {
4822+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
48194823
next_instr--;
48204824
int is_meth = is_method(stack_pointer, oparg);
48214825
int nargs = oparg + is_meth;
@@ -4829,7 +4833,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
48294833
}
48304834
else {
48314835
STAT_INC(CALL, deferred);
4832-
cache->counter--;
4836+
DECREMENT_ADAPTIVE_COUNTER(cache);
48334837
goto call_function;
48344838
}
48354839
}
@@ -5560,7 +5564,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
55605564
TARGET(BINARY_OP_ADAPTIVE) {
55615565
assert(cframe.use_tracing == 0);
55625566
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr;
5563-
if (cache->counter == 0) {
5567+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
55645568
PyObject *lhs = SECOND();
55655569
PyObject *rhs = TOP();
55665570
next_instr--;
@@ -5569,7 +5573,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
55695573
}
55705574
else {
55715575
STAT_INC(BINARY_OP, deferred);
5572-
cache->counter--;
5576+
DECREMENT_ADAPTIVE_COUNTER(cache);
55735577
JUMP_TO_INSTRUCTION(BINARY_OP);
55745578
}
55755579
}
@@ -5690,7 +5694,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
56905694
assert(adaptive_opcode);
56915695
_Py_SET_OPCODE(next_instr[-1], adaptive_opcode);
56925696
STAT_INC(opcode, deopt);
5693-
*counter = ADAPTIVE_CACHE_BACKOFF;
5697+
*counter = adaptive_counter_start();
56945698
}
56955699
next_instr--;
56965700
DISPATCH_GOTO();

0 commit comments

Comments
 (0)