Skip to content

GH-98686: Quicken everything #98687

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Nov 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Include/cpython/code.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ typedef struct {
PyObject *co_exceptiontable; /* Byte string encoding exception handling \
table */ \
int co_flags; /* CO_..., see below */ \
short co_warmup; /* Warmup counter for quickening */ \
short _co_linearray_entry_size; /* Size of each entry in _co_linearray */ \
\
/* The rest are not so impactful on performance. */ \
Expand Down
32 changes: 6 additions & 26 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,28 +91,8 @@ typedef struct {

#define INLINE_CACHE_ENTRIES_FOR_ITER CACHE_ENTRIES(_PyForIterCache)

#define QUICKENING_WARMUP_DELAY 8

/* We want to compare to zero for efficiency, so we offset values accordingly */
#define QUICKENING_INITIAL_WARMUP_VALUE (-QUICKENING_WARMUP_DELAY)

void _PyCode_Quicken(PyCodeObject *code);

static inline void
_PyCode_Warmup(PyCodeObject *code)
{
if (code->co_warmup != 0) {
code->co_warmup++;
if (code->co_warmup == 0) {
_PyCode_Quicken(code);
}
}
}

extern uint8_t _PyOpcode_Adaptive[256];

extern Py_ssize_t _Py_QuickenedCount;

// Borrowed references to common callables:
struct callable_cache {
PyObject *isinstance;
Expand Down Expand Up @@ -252,10 +232,10 @@ extern void _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr,
int oparg);
extern void _Py_Specialize_ForIter(PyObject *iter, _Py_CODEUNIT *instr);

/* Deallocator function for static codeobjects used in deepfreeze.py */
extern void _PyStaticCode_Dealloc(PyCodeObject *co);
/* Function to intern strings of codeobjects */
extern int _PyStaticCode_InternStrings(PyCodeObject *co);
/* Finalizer function for static codeobjects used in deepfreeze.py */
extern void _PyStaticCode_Fini(PyCodeObject *co);
/* Function to intern strings of codeobjects and quicken the bytecode */
extern int _PyStaticCode_Init(PyCodeObject *co);

#ifdef Py_STATS

Expand Down Expand Up @@ -397,8 +377,8 @@ write_location_entry_start(uint8_t *ptr, int code, int length)

/* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */
#define ADAPTIVE_BACKOFF_BITS 4
/* The initial counter value is 31 == 2**ADAPTIVE_BACKOFF_START - 1 */
#define ADAPTIVE_BACKOFF_START 5
/* The initial counter value is 1 == 2**ADAPTIVE_BACKOFF_START - 1 */
#define ADAPTIVE_BACKOFF_START 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for backoff, after failure. Do you want to change this, or just lower the initial counter value?


#define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS)

Expand Down
30 changes: 15 additions & 15 deletions Include/internal/pycore_opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

64 changes: 31 additions & 33 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 0 additions & 6 deletions Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,9 +327,6 @@ def pseudo_op(name, op, real_ops):
"FOR_ITER_LIST",
"FOR_ITER_RANGE",
],
"JUMP_BACKWARD": [
"JUMP_BACKWARD_QUICK",
],
"LOAD_ATTR": [
"LOAD_ATTR_ADAPTIVE",
# These potentially push [NULL, bound method] onto the stack.
Expand Down Expand Up @@ -358,9 +355,6 @@ def pseudo_op(name, op, real_ops):
"LOAD_GLOBAL_BUILTIN",
"LOAD_GLOBAL_MODULE",
],
"RESUME": [
"RESUME_QUICK",
],
"STORE_ATTR": [
"STORE_ATTR_ADAPTIVE",
"STORE_ATTR_INSTANCE_VALUE",
Expand Down
3 changes: 1 addition & 2 deletions Lib/test/libregrtest/refleak.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ def get_pooled_int(value):
fd_deltas = [0] * repcount
getallocatedblocks = sys.getallocatedblocks
gettotalrefcount = sys.gettotalrefcount
_getquickenedcount = sys._getquickenedcount
fd_count = os_helper.fd_count
# initialize variables to make pyflakes quiet
rc_before = alloc_before = fd_before = 0
Expand All @@ -93,7 +92,7 @@ def get_pooled_int(value):
support.gc_collect()

# Read memory statistics immediately after the garbage collection
alloc_after = getallocatedblocks() - _getquickenedcount()
alloc_after = getallocatedblocks()
rc_after = gettotalrefcount()
fd_after = fd_count()

Expand Down
10 changes: 5 additions & 5 deletions Lib/test/test_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ def testfunction_kw(self, *, kw):
return self


QUICKENING_WARMUP_DELAY = 8
ADAPTIVE_WARMUP_DELAY = 2


class TestPEP590(unittest.TestCase):
Expand Down Expand Up @@ -771,7 +771,7 @@ def f(num): return num + 1
assert_equal(11, f(num))
function_setvectorcall(f)
# make sure specializer is triggered by running > 50 times
for _ in range(10 * QUICKENING_WARMUP_DELAY):
for _ in range(10 * ADAPTIVE_WARMUP_DELAY):
assert_equal("overridden", f(num))

def test_setvectorcall_load_attr_specialization_skip(self):
Expand All @@ -787,7 +787,7 @@ def __getattribute__(self, attr):
function_setvectorcall(X.__getattribute__)
# make sure specialization doesn't trigger
# when vectorcall is overridden
for _ in range(QUICKENING_WARMUP_DELAY):
for _ in range(ADAPTIVE_WARMUP_DELAY):
assert_equal("overridden", x.a)

def test_setvectorcall_load_attr_specialization_deopt(self):
Expand All @@ -803,12 +803,12 @@ def get_a(x):
assert_equal = self.assertEqual
x = X()
# trigger LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN specialization
for _ in range(QUICKENING_WARMUP_DELAY):
for _ in range(ADAPTIVE_WARMUP_DELAY):
assert_equal("a", get_a(x))
function_setvectorcall(X.__getattribute__)
# make sure specialized LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN
# gets deopted due to overridden vectorcall
for _ in range(QUICKENING_WARMUP_DELAY):
for _ in range(ADAPTIVE_WARMUP_DELAY):
assert_equal("overridden", get_a(x))

@requires_limited_api
Expand Down
Loading