Skip to content

Commit eb618d5

Browse files
authored
GH-93354: Use exponential backoff to avoid excessive specialization attempts. (GH-93355)
1 parent a565ab0 commit eb618d5

File tree

4 files changed

+93
-46
lines changed

4 files changed

+93
-46
lines changed

Include/internal/pycore_code.h

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,6 @@ extern void _PyLineTable_InitAddressRange(
227227
extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range);
228228
extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);
229229

230-
231-
#define ADAPTIVE_CACHE_BACKOFF 64
232-
233230
/* Specialization functions */
234231

235232
extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr,
@@ -423,6 +420,50 @@ write_location_entry_start(uint8_t *ptr, int code, int length)
423420
}
424421

425422

423+
/** Counters
424+
* The first 16-bit value in each inline cache is a counter.
425+
* When counting misses, the counter is treated as a simple unsigned value.
426+
*
427+
* When counting executions until the next specialization attempt,
428+
* exponential backoff is used to reduce the number of specialization failures.
429+
* The high 12 bits store the counter, the low 4 bits store the backoff exponent.
430+
* On a specialization failure, the backoff exponent is incremented and the
431+
* counter set to (2**backoff - 1).
432+
* Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023.
433+
*/
434+
435+
/* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */
436+
#define ADAPTIVE_BACKOFF_BITS 4
437+
/* The initial counter value is 31 == 2**ADAPTIVE_BACKOFF_START - 1 */
438+
#define ADAPTIVE_BACKOFF_START 5
439+
440+
#define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS)
441+
442+
443+
static inline uint16_t
444+
adaptive_counter_bits(int value, int backoff) {
445+
return (value << ADAPTIVE_BACKOFF_BITS) |
446+
(backoff & ((1<<ADAPTIVE_BACKOFF_BITS)-1));
447+
}
448+
449+
static inline uint16_t
450+
adaptive_counter_start(void) {
451+
unsigned int value = (1 << ADAPTIVE_BACKOFF_START) - 1;
452+
return adaptive_counter_bits(value, ADAPTIVE_BACKOFF_START);
453+
}
454+
455+
static inline uint16_t
456+
adaptive_counter_backoff(uint16_t counter) {
457+
unsigned int backoff = counter & ((1<<ADAPTIVE_BACKOFF_BITS)-1);
458+
backoff++;
459+
if (backoff > MAX_BACKOFF_VALUE) {
460+
backoff = MAX_BACKOFF_VALUE;
461+
}
462+
unsigned int value = (1 << backoff) - 1;
463+
return adaptive_counter_bits(value, backoff);
464+
}
465+
466+
426467
#ifdef __cplusplus
427468
}
428469
#endif
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Use exponential backoff for specialization counters in the interpreter. Can
2+
reduce the number of failed specializations significantly and avoid slowdown
3+
for those parts of a program that are not suitable for specialization.

Python/ceval.c

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1561,7 +1561,11 @@ eval_frame_handle_pending(PyThreadState *tstate)
15611561
dtrace_function_entry(frame); \
15621562
}
15631563

1564+
#define ADAPTIVE_COUNTER_IS_ZERO(cache) \
1565+
(cache)->counter < (1<<ADAPTIVE_BACKOFF_BITS)
15641566

1567+
#define DECREMENT_ADAPTIVE_COUNTER(cache) \
1568+
(cache)->counter -= (1<<ADAPTIVE_BACKOFF_BITS)
15651569

15661570
static int
15671571
trace_function_entry(PyThreadState *tstate, _PyInterpreterFrame *frame)
@@ -2156,7 +2160,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
21562160

21572161
TARGET(BINARY_SUBSCR_ADAPTIVE) {
21582162
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
2159-
if (cache->counter == 0) {
2163+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
21602164
PyObject *sub = TOP();
21612165
PyObject *container = SECOND();
21622166
next_instr--;
@@ -2167,7 +2171,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
21672171
}
21682172
else {
21692173
STAT_INC(BINARY_SUBSCR, deferred);
2170-
cache->counter--;
2174+
DECREMENT_ADAPTIVE_COUNTER(cache);
21712175
JUMP_TO_INSTRUCTION(BINARY_SUBSCR);
21722176
}
21732177
}
@@ -2321,7 +2325,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
23212325

23222326
TARGET(STORE_SUBSCR_ADAPTIVE) {
23232327
_PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr;
2324-
if (cache->counter == 0) {
2328+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
23252329
PyObject *sub = TOP();
23262330
PyObject *container = SECOND();
23272331
next_instr--;
@@ -2332,7 +2336,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
23322336
}
23332337
else {
23342338
STAT_INC(STORE_SUBSCR, deferred);
2335-
cache->counter--;
2339+
DECREMENT_ADAPTIVE_COUNTER(cache);
23362340
JUMP_TO_INSTRUCTION(STORE_SUBSCR);
23372341
}
23382342
}
@@ -2815,15 +2819,15 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
28152819
TARGET(UNPACK_SEQUENCE_ADAPTIVE) {
28162820
assert(cframe.use_tracing == 0);
28172821
_PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)next_instr;
2818-
if (cache->counter == 0) {
2822+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
28192823
PyObject *seq = TOP();
28202824
next_instr--;
28212825
_Py_Specialize_UnpackSequence(seq, next_instr, oparg);
28222826
NOTRACE_DISPATCH_SAME_OPARG();
28232827
}
28242828
else {
28252829
STAT_INC(UNPACK_SEQUENCE, deferred);
2826-
cache->counter--;
2830+
DECREMENT_ADAPTIVE_COUNTER(cache);
28272831
JUMP_TO_INSTRUCTION(UNPACK_SEQUENCE);
28282832
}
28292833
}
@@ -3056,7 +3060,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
30563060
TARGET(LOAD_GLOBAL_ADAPTIVE) {
30573061
assert(cframe.use_tracing == 0);
30583062
_PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr;
3059-
if (cache->counter == 0) {
3063+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
30603064
PyObject *name = GETITEM(names, oparg>>1);
30613065
next_instr--;
30623066
if (_Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name) < 0) {
@@ -3066,7 +3070,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
30663070
}
30673071
else {
30683072
STAT_INC(LOAD_GLOBAL, deferred);
3069-
cache->counter--;
3073+
DECREMENT_ADAPTIVE_COUNTER(cache);
30703074
JUMP_TO_INSTRUCTION(LOAD_GLOBAL);
30713075
}
30723076
}
@@ -3480,7 +3484,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
34803484
TARGET(LOAD_ATTR_ADAPTIVE) {
34813485
assert(cframe.use_tracing == 0);
34823486
_PyAttrCache *cache = (_PyAttrCache *)next_instr;
3483-
if (cache->counter == 0) {
3487+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
34843488
PyObject *owner = TOP();
34853489
PyObject *name = GETITEM(names, oparg);
34863490
next_instr--;
@@ -3491,7 +3495,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
34913495
}
34923496
else {
34933497
STAT_INC(LOAD_ATTR, deferred);
3494-
cache->counter--;
3498+
DECREMENT_ADAPTIVE_COUNTER(cache);
34953499
JUMP_TO_INSTRUCTION(LOAD_ATTR);
34963500
}
34973501
}
@@ -3589,7 +3593,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
35893593
TARGET(STORE_ATTR_ADAPTIVE) {
35903594
assert(cframe.use_tracing == 0);
35913595
_PyAttrCache *cache = (_PyAttrCache *)next_instr;
3592-
if (cache->counter == 0) {
3596+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
35933597
PyObject *owner = TOP();
35943598
PyObject *name = GETITEM(names, oparg);
35953599
next_instr--;
@@ -3600,7 +3604,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
36003604
}
36013605
else {
36023606
STAT_INC(STORE_ATTR, deferred);
3603-
cache->counter--;
3607+
DECREMENT_ADAPTIVE_COUNTER(cache);
36043608
JUMP_TO_INSTRUCTION(STORE_ATTR);
36053609
}
36063610
}
@@ -3719,7 +3723,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
37193723
TARGET(COMPARE_OP_ADAPTIVE) {
37203724
assert(cframe.use_tracing == 0);
37213725
_PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr;
3722-
if (cache->counter == 0) {
3726+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
37233727
PyObject *right = TOP();
37243728
PyObject *left = SECOND();
37253729
next_instr--;
@@ -3728,7 +3732,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
37283732
}
37293733
else {
37303734
STAT_INC(COMPARE_OP, deferred);
3731-
cache->counter--;
3735+
DECREMENT_ADAPTIVE_COUNTER(cache);
37323736
JUMP_TO_INSTRUCTION(COMPARE_OP);
37333737
}
37343738
}
@@ -4526,7 +4530,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
45264530
TARGET(LOAD_METHOD_ADAPTIVE) {
45274531
assert(cframe.use_tracing == 0);
45284532
_PyLoadMethodCache *cache = (_PyLoadMethodCache *)next_instr;
4529-
if (cache->counter == 0) {
4533+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
45304534
PyObject *owner = TOP();
45314535
PyObject *name = GETITEM(names, oparg);
45324536
next_instr--;
@@ -4537,7 +4541,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
45374541
}
45384542
else {
45394543
STAT_INC(LOAD_METHOD, deferred);
4540-
cache->counter--;
4544+
DECREMENT_ADAPTIVE_COUNTER(cache);
45414545
JUMP_TO_INSTRUCTION(LOAD_METHOD);
45424546
}
45434547
}
@@ -4775,7 +4779,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
47754779

47764780
TARGET(CALL_ADAPTIVE) {
47774781
_PyCallCache *cache = (_PyCallCache *)next_instr;
4778-
if (cache->counter == 0) {
4782+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
47794783
next_instr--;
47804784
int is_meth = is_method(stack_pointer, oparg);
47814785
int nargs = oparg + is_meth;
@@ -4789,7 +4793,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
47894793
}
47904794
else {
47914795
STAT_INC(CALL, deferred);
4792-
cache->counter--;
4796+
DECREMENT_ADAPTIVE_COUNTER(cache);
47934797
goto call_function;
47944798
}
47954799
}
@@ -5521,7 +5525,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
55215525
TARGET(BINARY_OP_ADAPTIVE) {
55225526
assert(cframe.use_tracing == 0);
55235527
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr;
5524-
if (cache->counter == 0) {
5528+
if (ADAPTIVE_COUNTER_IS_ZERO(cache)) {
55255529
PyObject *lhs = SECOND();
55265530
PyObject *rhs = TOP();
55275531
next_instr--;
@@ -5530,7 +5534,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
55305534
}
55315535
else {
55325536
STAT_INC(BINARY_OP, deferred);
5533-
cache->counter--;
5537+
DECREMENT_ADAPTIVE_COUNTER(cache);
55345538
JUMP_TO_INSTRUCTION(BINARY_OP);
55355539
}
55365540
}
@@ -5658,7 +5662,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
56585662
assert(adaptive_opcode);
56595663
_Py_SET_OPCODE(next_instr[-1], adaptive_opcode);
56605664
STAT_INC(opcode, deopt);
5661-
*counter = ADAPTIVE_CACHE_BACKOFF;
5665+
*counter = adaptive_counter_start();
56625666
}
56635667
next_instr--;
56645668
DISPATCH_GOTO();

0 commit comments

Comments
 (0)