From 930814009f831c7b1d2d6462ad7219e7b1f4f51d Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 6 Oct 2022 14:44:52 -0700 Subject: [PATCH 01/10] First pass at quickening everything --- Include/cpython/code.h | 1 - Include/internal/pycore_code.h | 24 +------- Include/internal/pycore_opcode.h | 34 +++++------ Include/opcode.h | 64 ++++++++++----------- Lib/opcode.py | 6 -- Lib/test/libregrtest/refleak.py | 3 +- Lib/test/test_dis.py | 48 +++++++--------- Lib/test/test_embed.py | 3 +- Lib/test/test_lltrace.py | 52 ++++++++--------- Objects/codeobject.c | 13 ++--- Python/ceval.c | 28 +++------ Python/clinic/sysmodule.c.h | 29 +--------- Python/opcode_targets.h | 30 +++++----- Python/specialize.c | 10 +--- Python/sysmodule.c | 13 ----- Tools/c-analyzer/cpython/globals-to-fix.tsv | 1 - Tools/scripts/deepfreeze.py | 1 - 17 files changed, 128 insertions(+), 232 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 7ce69022557af0..d62948e603235c 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -63,7 +63,6 @@ typedef uint16_t _Py_CODEUNIT; PyObject *co_exceptiontable; /* Byte string encoding exception handling \ table */ \ int co_flags; /* CO_..., see below */ \ - short co_warmup; /* Warmup counter for quickening */ \ short _co_linearray_entry_size; /* Size of each entry in _co_linearray */ \ \ /* The rest are not so impactful on performance. */ \ diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index bf5945435c1774..5c2893b58848c1 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -91,28 +91,8 @@ typedef struct { #define INLINE_CACHE_ENTRIES_FOR_ITER CACHE_ENTRIES(_PyForIterCache) -#define QUICKENING_WARMUP_DELAY 8 - -/* We want to compare to zero for efficiency, so we offset values accordingly */ -#define QUICKENING_INITIAL_WARMUP_VALUE (-QUICKENING_WARMUP_DELAY) - -void _PyCode_Quicken(PyCodeObject *code); - -static inline void -_PyCode_Warmup(PyCodeObject *code) -{ - if (code->co_warmup != 0) { - code->co_warmup++; - if (code->co_warmup == 0) { - _PyCode_Quicken(code); - } - } -} - extern uint8_t _PyOpcode_Adaptive[256]; -extern Py_ssize_t _Py_QuickenedCount; - // Borrowed references to common callables: struct callable_cache { PyObject *isinstance; @@ -397,8 +377,8 @@ write_location_entry_start(uint8_t *ptr, int code, int length) /* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */ #define ADAPTIVE_BACKOFF_BITS 4 -/* The initial counter value is 31 == 2**ADAPTIVE_BACKOFF_START - 1 */ -#define ADAPTIVE_BACKOFF_START 5 +/* The initial counter value is 7 == 2**ADAPTIVE_BACKOFF_START - 1 */ +#define ADAPTIVE_BACKOFF_START 3 #define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS) diff --git a/Include/internal/pycore_opcode.h b/Include/internal/pycore_opcode.h index 15925511cc1f41..2ada422c049322 100644 --- a/Include/internal/pycore_opcode.h +++ b/Include/internal/pycore_opcode.h @@ -141,7 +141,6 @@ const uint8_t _PyOpcode_Deopt[256] = { [IS_OP] = IS_OP, [JUMP_BACKWARD] = JUMP_BACKWARD, [JUMP_BACKWARD_NO_INTERRUPT] = JUMP_BACKWARD_NO_INTERRUPT, - [JUMP_BACKWARD_QUICK] = JUMP_BACKWARD, [JUMP_FORWARD] = JUMP_FORWARD, [JUMP_IF_FALSE_OR_POP] = JUMP_IF_FALSE_OR_POP, [JUMP_IF_TRUE_OR_POP] = JUMP_IF_TRUE_OR_POP, @@ -199,7 +198,6 @@ const uint8_t _PyOpcode_Deopt[256] = { [RAISE_VARARGS] = RAISE_VARARGS, [RERAISE] = RERAISE, [RESUME] = RESUME, - [RESUME_QUICK] = RESUME, [RETURN_GENERATOR] = RETURN_GENERATOR, [RETURN_VALUE] = RETURN_VALUE, [SEND] = SEND, @@ -307,26 +305,26 @@ static const char *const _PyOpcode_OpName[263] = { [FOR_ITER_ADAPTIVE] = "FOR_ITER_ADAPTIVE", [FOR_ITER_LIST] = "FOR_ITER_LIST", [FOR_ITER_RANGE] = "FOR_ITER_RANGE", - [JUMP_BACKWARD_QUICK] = "JUMP_BACKWARD_QUICK", + [LOAD_ATTR_ADAPTIVE] = "LOAD_ATTR_ADAPTIVE", [GET_ITER] = "GET_ITER", [GET_YIELD_FROM_ITER] = "GET_YIELD_FROM_ITER", [PRINT_EXPR] = "PRINT_EXPR", [LOAD_BUILD_CLASS] = "LOAD_BUILD_CLASS", - [LOAD_ATTR_ADAPTIVE] = "LOAD_ATTR_ADAPTIVE", [LOAD_ATTR_CLASS] = "LOAD_ATTR_CLASS", + [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = "LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN", [LOAD_ASSERTION_ERROR] = "LOAD_ASSERTION_ERROR", [RETURN_GENERATOR] = "RETURN_GENERATOR", - [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = "LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN", [LOAD_ATTR_INSTANCE_VALUE] = "LOAD_ATTR_INSTANCE_VALUE", [LOAD_ATTR_MODULE] = "LOAD_ATTR_MODULE", [LOAD_ATTR_PROPERTY] = "LOAD_ATTR_PROPERTY", [LOAD_ATTR_SLOT] = "LOAD_ATTR_SLOT", [LOAD_ATTR_WITH_HINT] = "LOAD_ATTR_WITH_HINT", + [LOAD_ATTR_METHOD_LAZY_DICT] = "LOAD_ATTR_METHOD_LAZY_DICT", [LIST_TO_TUPLE] = "LIST_TO_TUPLE", [RETURN_VALUE] = "RETURN_VALUE", [IMPORT_STAR] = "IMPORT_STAR", [SETUP_ANNOTATIONS] = "SETUP_ANNOTATIONS", - [LOAD_ATTR_METHOD_LAZY_DICT] = "LOAD_ATTR_METHOD_LAZY_DICT", + [LOAD_ATTR_METHOD_NO_DICT] = "LOAD_ATTR_METHOD_NO_DICT", [ASYNC_GEN_WRAP] = "ASYNC_GEN_WRAP", [PREP_RERAISE_STAR] = "PREP_RERAISE_STAR", [POP_EXCEPT] = "POP_EXCEPT", @@ -353,7 +351,7 @@ static const char *const _PyOpcode_OpName[263] = { [JUMP_FORWARD] = "JUMP_FORWARD", [JUMP_IF_FALSE_OR_POP] = "JUMP_IF_FALSE_OR_POP", [JUMP_IF_TRUE_OR_POP] = "JUMP_IF_TRUE_OR_POP", - [LOAD_ATTR_METHOD_NO_DICT] = "LOAD_ATTR_METHOD_NO_DICT", + [LOAD_ATTR_METHOD_WITH_DICT] = "LOAD_ATTR_METHOD_WITH_DICT", [POP_JUMP_IF_FALSE] = "POP_JUMP_IF_FALSE", [POP_JUMP_IF_TRUE] = "POP_JUMP_IF_TRUE", [LOAD_GLOBAL] = "LOAD_GLOBAL", @@ -361,7 +359,7 @@ static const char *const _PyOpcode_OpName[263] = { [CONTAINS_OP] = "CONTAINS_OP", [RERAISE] = "RERAISE", [COPY] = "COPY", - [LOAD_ATTR_METHOD_WITH_DICT] = "LOAD_ATTR_METHOD_WITH_DICT", + [LOAD_ATTR_METHOD_WITH_VALUES] = "LOAD_ATTR_METHOD_WITH_VALUES", [BINARY_OP] = "BINARY_OP", [SEND] = "SEND", [LOAD_FAST] = "LOAD_FAST", @@ -381,9 +379,9 @@ static const char *const _PyOpcode_OpName[263] = { [STORE_DEREF] = "STORE_DEREF", [DELETE_DEREF] = "DELETE_DEREF", [JUMP_BACKWARD] = "JUMP_BACKWARD", - [LOAD_ATTR_METHOD_WITH_VALUES] = "LOAD_ATTR_METHOD_WITH_VALUES", - [CALL_FUNCTION_EX] = "CALL_FUNCTION_EX", [LOAD_CONST__LOAD_FAST] = "LOAD_CONST__LOAD_FAST", + [CALL_FUNCTION_EX] = "CALL_FUNCTION_EX", + [LOAD_FAST__LOAD_CONST] = "LOAD_FAST__LOAD_CONST", [EXTENDED_ARG] = "EXTENDED_ARG", [LIST_APPEND] = "LIST_APPEND", [SET_ADD] = "SET_ADD", @@ -393,34 +391,34 @@ static const char *const _PyOpcode_OpName[263] = { [YIELD_VALUE] = "YIELD_VALUE", [RESUME] = "RESUME", [MATCH_CLASS] = "MATCH_CLASS", - [LOAD_FAST__LOAD_CONST] = "LOAD_FAST__LOAD_CONST", [LOAD_FAST__LOAD_FAST] = "LOAD_FAST__LOAD_FAST", + [LOAD_GLOBAL_ADAPTIVE] = "LOAD_GLOBAL_ADAPTIVE", [FORMAT_VALUE] = "FORMAT_VALUE", [BUILD_CONST_KEY_MAP] = "BUILD_CONST_KEY_MAP", [BUILD_STRING] = "BUILD_STRING", - [LOAD_GLOBAL_ADAPTIVE] = "LOAD_GLOBAL_ADAPTIVE", [LOAD_GLOBAL_BUILTIN] = "LOAD_GLOBAL_BUILTIN", [LOAD_GLOBAL_MODULE] = "LOAD_GLOBAL_MODULE", - [RESUME_QUICK] = "RESUME_QUICK", + [STORE_ATTR_ADAPTIVE] = "STORE_ATTR_ADAPTIVE", + [STORE_ATTR_INSTANCE_VALUE] = "STORE_ATTR_INSTANCE_VALUE", [LIST_EXTEND] = "LIST_EXTEND", [SET_UPDATE] = "SET_UPDATE", [DICT_MERGE] = "DICT_MERGE", [DICT_UPDATE] = "DICT_UPDATE", - [STORE_ATTR_ADAPTIVE] = "STORE_ATTR_ADAPTIVE", - [STORE_ATTR_INSTANCE_VALUE] = "STORE_ATTR_INSTANCE_VALUE", [STORE_ATTR_SLOT] = "STORE_ATTR_SLOT", [STORE_ATTR_WITH_HINT] = "STORE_ATTR_WITH_HINT", [STORE_FAST__LOAD_FAST] = "STORE_FAST__LOAD_FAST", - [CALL] = "CALL", - [KW_NAMES] = "KW_NAMES", [STORE_FAST__STORE_FAST] = "STORE_FAST__STORE_FAST", [STORE_SUBSCR_ADAPTIVE] = "STORE_SUBSCR_ADAPTIVE", + [CALL] = "CALL", + [KW_NAMES] = "KW_NAMES", [STORE_SUBSCR_DICT] = "STORE_SUBSCR_DICT", [STORE_SUBSCR_LIST_INT] = "STORE_SUBSCR_LIST_INT", [UNPACK_SEQUENCE_ADAPTIVE] = "UNPACK_SEQUENCE_ADAPTIVE", [UNPACK_SEQUENCE_LIST] = "UNPACK_SEQUENCE_LIST", [UNPACK_SEQUENCE_TUPLE] = "UNPACK_SEQUENCE_TUPLE", [UNPACK_SEQUENCE_TWO_TUPLE] = "UNPACK_SEQUENCE_TWO_TUPLE", + [179] = "<179>", + [180] = "<180>", [181] = "<181>", [182] = "<182>", [183] = "<183>", @@ -507,6 +505,8 @@ static const char *const _PyOpcode_OpName[263] = { #endif #define EXTRA_CASES \ + case 179: \ + case 180: \ case 181: \ case 182: \ case 183: \ diff --git a/Include/opcode.h b/Include/opcode.h index 42825df6217b46..881955205df6e3 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -166,39 +166,37 @@ extern "C" { #define FOR_ITER_ADAPTIVE 64 #define FOR_ITER_LIST 65 #define FOR_ITER_RANGE 66 -#define JUMP_BACKWARD_QUICK 67 -#define LOAD_ATTR_ADAPTIVE 72 -#define LOAD_ATTR_CLASS 73 -#define LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN 76 -#define LOAD_ATTR_INSTANCE_VALUE 77 -#define LOAD_ATTR_MODULE 78 -#define LOAD_ATTR_PROPERTY 79 -#define LOAD_ATTR_SLOT 80 -#define LOAD_ATTR_WITH_HINT 81 -#define LOAD_ATTR_METHOD_LAZY_DICT 86 -#define LOAD_ATTR_METHOD_NO_DICT 113 -#define LOAD_ATTR_METHOD_WITH_DICT 121 -#define LOAD_ATTR_METHOD_WITH_VALUES 141 -#define LOAD_CONST__LOAD_FAST 143 -#define LOAD_FAST__LOAD_CONST 153 -#define LOAD_FAST__LOAD_FAST 154 -#define LOAD_GLOBAL_ADAPTIVE 158 -#define LOAD_GLOBAL_BUILTIN 159 -#define LOAD_GLOBAL_MODULE 160 -#define RESUME_QUICK 161 -#define STORE_ATTR_ADAPTIVE 166 -#define STORE_ATTR_INSTANCE_VALUE 167 -#define STORE_ATTR_SLOT 168 -#define STORE_ATTR_WITH_HINT 169 -#define STORE_FAST__LOAD_FAST 170 -#define STORE_FAST__STORE_FAST 173 -#define STORE_SUBSCR_ADAPTIVE 174 -#define STORE_SUBSCR_DICT 175 -#define STORE_SUBSCR_LIST_INT 176 -#define UNPACK_SEQUENCE_ADAPTIVE 177 -#define UNPACK_SEQUENCE_LIST 178 -#define UNPACK_SEQUENCE_TUPLE 179 -#define UNPACK_SEQUENCE_TWO_TUPLE 180 +#define LOAD_ATTR_ADAPTIVE 67 +#define LOAD_ATTR_CLASS 72 +#define LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN 73 +#define LOAD_ATTR_INSTANCE_VALUE 76 +#define LOAD_ATTR_MODULE 77 +#define LOAD_ATTR_PROPERTY 78 +#define LOAD_ATTR_SLOT 79 +#define LOAD_ATTR_WITH_HINT 80 +#define LOAD_ATTR_METHOD_LAZY_DICT 81 +#define LOAD_ATTR_METHOD_NO_DICT 86 +#define LOAD_ATTR_METHOD_WITH_DICT 113 +#define LOAD_ATTR_METHOD_WITH_VALUES 121 +#define LOAD_CONST__LOAD_FAST 141 +#define LOAD_FAST__LOAD_CONST 143 +#define LOAD_FAST__LOAD_FAST 153 +#define LOAD_GLOBAL_ADAPTIVE 154 +#define LOAD_GLOBAL_BUILTIN 158 +#define LOAD_GLOBAL_MODULE 159 +#define STORE_ATTR_ADAPTIVE 160 +#define STORE_ATTR_INSTANCE_VALUE 161 +#define STORE_ATTR_SLOT 166 +#define STORE_ATTR_WITH_HINT 167 +#define STORE_FAST__LOAD_FAST 168 +#define STORE_FAST__STORE_FAST 169 +#define STORE_SUBSCR_ADAPTIVE 170 +#define STORE_SUBSCR_DICT 173 +#define STORE_SUBSCR_LIST_INT 174 +#define UNPACK_SEQUENCE_ADAPTIVE 175 +#define UNPACK_SEQUENCE_LIST 176 +#define UNPACK_SEQUENCE_TUPLE 177 +#define UNPACK_SEQUENCE_TWO_TUPLE 178 #define DO_TRACING 255 #define HAS_ARG(op) ((((op) >= HAVE_ARGUMENT) && (!IS_PSEUDO_OPCODE(op)))\ diff --git a/Lib/opcode.py b/Lib/opcode.py index 690923061418bd..3c1a3d215350ab 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -325,9 +325,6 @@ def pseudo_op(name, op, real_ops): "FOR_ITER_LIST", "FOR_ITER_RANGE", ], - "JUMP_BACKWARD": [ - "JUMP_BACKWARD_QUICK", - ], "LOAD_ATTR": [ "LOAD_ATTR_ADAPTIVE", # These potentially push [NULL, bound method] onto the stack. @@ -356,9 +353,6 @@ def pseudo_op(name, op, real_ops): "LOAD_GLOBAL_BUILTIN", "LOAD_GLOBAL_MODULE", ], - "RESUME": [ - "RESUME_QUICK", - ], "STORE_ATTR": [ "STORE_ATTR_ADAPTIVE", "STORE_ATTR_INSTANCE_VALUE", diff --git a/Lib/test/libregrtest/refleak.py b/Lib/test/libregrtest/refleak.py index a0538cbb3c3772..4298fa806e1065 100644 --- a/Lib/test/libregrtest/refleak.py +++ b/Lib/test/libregrtest/refleak.py @@ -73,7 +73,6 @@ def get_pooled_int(value): fd_deltas = [0] * repcount getallocatedblocks = sys.getallocatedblocks gettotalrefcount = sys.gettotalrefcount - _getquickenedcount = sys._getquickenedcount fd_count = os_helper.fd_count # initialize variables to make pyflakes quiet rc_before = alloc_before = fd_before = 0 @@ -93,7 +92,7 @@ def get_pooled_int(value): support.gc_collect() # Read memory statistics immediately after the garbage collection - alloc_after = getallocatedblocks() - _getquickenedcount() + alloc_after = getallocatedblocks() rc_after = gettotalrefcount() fd_after = fd_count() diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index fc2862c61baadb..3758b32d798fe0 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -711,7 +711,7 @@ def load_test(x, y=0): return a, b dis_load_test_quickened_code = """\ -%3d 0 RESUME_QUICK 0 +%3d 0 RESUME 0 %3d 2 LOAD_FAST__LOAD_FAST 0 (x) 4 LOAD_FAST 1 (y) @@ -731,7 +731,7 @@ def loop_test(): load_test(i) dis_loop_test_quickened_code = """\ -%3d RESUME_QUICK 0 +%3d RESUME 0 %3d BUILD_LIST 0 LOAD_CONST 1 ((1, 2, 3)) @@ -746,7 +746,7 @@ def loop_test(): LOAD_FAST 0 (i) CALL_PY_WITH_DEFAULTS 1 POP_TOP - JUMP_BACKWARD_QUICK 17 (to 16) + JUMP_BACKWARD 17 (to 16) %3d >> LOAD_CONST 0 (None) RETURN_VALUE @@ -1089,7 +1089,7 @@ def test_super_instructions(self): @cpython_only def test_binary_specialize(self): binary_op_quicken = """\ - 0 0 RESUME_QUICK 0 + 0 0 RESUME 0 1 2 LOAD_NAME 0 (a) 4 LOAD_NAME 1 (b) @@ -1107,7 +1107,7 @@ def test_binary_specialize(self): self.do_disassembly_compare(got, binary_op_quicken % "BINARY_OP_ADD_UNICODE 0 (+)", True) binary_subscr_quicken = """\ - 0 0 RESUME_QUICK 0 + 0 0 RESUME 0 1 2 LOAD_NAME 0 (a) 4 LOAD_CONST 0 (0) @@ -1127,7 +1127,7 @@ def test_binary_specialize(self): @cpython_only def test_load_attr_specialize(self): load_attr_quicken = """\ - 0 0 RESUME_QUICK 0 + 0 0 RESUME 0 1 2 LOAD_CONST 0 ('a') 4 LOAD_ATTR_SLOT 0 (__class__) @@ -1141,7 +1141,7 @@ def test_load_attr_specialize(self): @cpython_only def test_call_specialize(self): call_quicken = """\ - 0 RESUME_QUICK 0 + 0 RESUME 0 1 PUSH_NULL LOAD_NAME 0 (str) @@ -1166,16 +1166,11 @@ def test_extended_arg_quick(self): got = self.get_disassembly(extended_arg_quick) self.do_disassembly_compare(got, dis_extended_arg_quick_code, True) - def get_cached_values(self, quickened, adaptive): + def get_cached_values(self, adaptive): def f(): l = [] for i in range(42): l.append(i) - if quickened: - self.code_quicken(f) - else: - # "copy" the code to un-quicken it: - f.__code__ = f.__code__.replace() for instruction in dis.get_instructions( f, show_caches=True, adaptive=adaptive ): @@ -1184,20 +1179,19 @@ def f(): @cpython_only def test_show_caches(self): - for quickened in (False, True): - for adaptive in (False, True): - with self.subTest(f"{quickened=}, {adaptive=}"): - if quickened and adaptive: - pattern = r"^(\w+: \d+)?$" - else: - pattern = r"^(\w+: 0)?$" - caches = list(self.get_cached_values(quickened, adaptive)) - for cache in caches: - self.assertRegex(cache, pattern) - total_caches = 23 - empty_caches = 8 if adaptive and quickened else total_caches - self.assertEqual(caches.count(""), empty_caches) - self.assertEqual(len(caches), total_caches) + for adaptive in (False, True): + with self.subTest(f"{adaptive=}"): + if adaptive: + pattern = r"^(\w+: \d+)?$" + else: + pattern = r"^(\w+: 0)?$" + caches = list(self.get_cached_values(adaptive)) + for cache in caches: + self.assertRegex(cache, pattern) + total_caches = 23 + empty_caches = 8 if adaptive else total_caches + self.assertEqual(caches.count(""), empty_caches) + self.assertEqual(len(caches), total_caches) class DisWithFileTests(DisTests): diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index c5aeb9459848e4..107c7ec2842fa9 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -348,7 +348,6 @@ def test_quickened_static_code_gets_unquickened_at_Py_FINALIZE(self): # _handle_fromlist doesn't get quickened until we intend it to. from dis import _all_opmap resume = _all_opmap["RESUME"] - resume_quick = _all_opmap["RESUME_QUICK"] from test.test_dis import QUICKENING_WARMUP_DELAY code = textwrap.dedent(f"""\ @@ -365,7 +364,7 @@ def test_quickened_static_code_gets_unquickened_at_Py_FINALIZE(self): func(importlib._bootstrap, ["x"], lambda *args: None) # Assert quickening worked - if set(code._co_code_adaptive[:2]) != set([{resume_quick}, 0]): + if set(code._co_code_adaptive[:2]) != set([{resume}, 0]): raise AssertionError() print("Tests passed") diff --git a/Lib/test/test_lltrace.py b/Lib/test/test_lltrace.py index 7cf89846f8a727..292180945f8e24 100644 --- a/Lib/test/test_lltrace.py +++ b/Lib/test/test_lltrace.py @@ -63,33 +63,33 @@ def dont_trace_2(): self.assertNotIn("dont_trace_1", stdout) self.assertNotIn("'dont_trace_2' in module", stdout) - def test_lltrace_different_module(self): - stdout = self.run_code(""" - from test import test_lltrace - test_lltrace.__lltrace__ = 1 - test_lltrace.example() - """) - self.assertIn("'example' in module 'test.test_lltrace'", stdout) - self.assertIn('LOAD_CONST', stdout) - self.assertIn('FOR_ITER', stdout) - self.assertIn('this is an example', stdout) + # def test_lltrace_different_module(self): + # stdout = self.run_code(""" + # from test import test_lltrace + # test_lltrace.__lltrace__ = 1 + # test_lltrace.example() + # """) + # self.assertIn("'example' in module 'test.test_lltrace'", stdout) + # self.assertIn('LOAD_CONST', stdout) + # self.assertIn('FOR_ITER', stdout) + # self.assertIn('this is an example', stdout) - # check that offsets match the output of dis.dis() - instr_map = {i.offset: i for i in dis.get_instructions(example)} - for line in stdout.splitlines(): - offset, colon, opname_oparg = line.partition(":") - if not colon: - continue - offset = int(offset) - opname_oparg = opname_oparg.split() - if len(opname_oparg) == 2: - opname, oparg = opname_oparg - oparg = int(oparg) - else: - (opname,) = opname_oparg - oparg = None - self.assertEqual(instr_map[offset].opname, opname) - self.assertEqual(instr_map[offset].arg, oparg) + # # check that offsets match the output of dis.dis() + # instr_map = {i.offset: i for i in dis.get_instructions(example)} + # for line in stdout.splitlines(): + # offset, colon, opname_oparg = line.partition(":") + # if not colon: + # continue + # offset = int(offset) + # opname_oparg = opname_oparg.split() + # if len(opname_oparg) == 2: + # opname, oparg = opname_oparg + # oparg = int(oparg) + # else: + # (opname,) = opname_oparg + # oparg = None + # self.assertEqual(instr_map[offset].opname, opname) + # self.assertEqual(instr_map[offset].arg, oparg) def test_lltrace_does_not_crash_on_subscript_operator(self): # If this test fails, it will reproduce a crash reported as diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 14d1d00684aedf..70566ed7092996 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -286,6 +286,8 @@ _PyCode_Validate(struct _PyCodeConstructor *con) return 0; } +extern void _PyCode_Quicken(PyCodeObject *code); + static void init_code(PyCodeObject *co, struct _PyCodeConstructor *con) { @@ -338,7 +340,6 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) co->co_extra = NULL; co->_co_code = NULL; - co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE; co->_co_linearray_entry_size = 0; co->_co_linearray = NULL; memcpy(_PyCode_CODE(co), PyBytes_AS_STRING(con->code), @@ -349,6 +350,7 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) entry_point++; } co->_co_firsttraceable = entry_point; + _PyCode_Quicken(co); } static int @@ -1613,9 +1615,6 @@ code_dealloc(PyCodeObject *co) if (co->_co_linearray) { PyMem_Free(co->_co_linearray); } - if (co->co_warmup == 0) { - _Py_QuickenedCount--; - } PyObject_Free(co); } @@ -2175,11 +2174,7 @@ _PyCode_ConstantKey(PyObject *op) void _PyStaticCode_Dealloc(PyCodeObject *co) { - if (co->co_warmup == 0) { - _Py_QuickenedCount--; - } - deopt_code(_PyCode_CODE(co), Py_SIZE(co)); - co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE; + deopt_code(_PyCode_CODE(co), Py_SIZE(co)); // XXX PyMem_Free(co->co_extra); Py_CLEAR(co->_co_code); co->co_extra = NULL; diff --git a/Python/ceval.c b/Python/ceval.c index ee1babaaf44425..ace8d2cb1df388 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1166,14 +1166,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int TARGET(NOP) { DISPATCH(); } - TARGET(RESUME) { - _PyCode_Warmup(frame->f_code); - JUMP_TO_INSTRUCTION(RESUME_QUICK); - } - - TARGET(RESUME_QUICK) { - PREDICTED(RESUME_QUICK); assert(tstate->cframe == &cframe); assert(frame == cframe.current_frame); if (_Py_atomic_load_relaxed_int32(eval_breaker) && oparg < 2) { @@ -1703,7 +1696,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int PyObject *list = PEEK(oparg); if (_PyList_AppendTakeRef((PyListObject *)list, v) < 0) goto error; - PREDICT(JUMP_BACKWARD_QUICK); + PREDICT(JUMP_BACKWARD); DISPATCH(); } @@ -1715,7 +1708,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int Py_DECREF(v); if (err != 0) goto error; - PREDICT(JUMP_BACKWARD_QUICK); + PREDICT(JUMP_BACKWARD); DISPATCH(); } @@ -2901,7 +2894,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int if (_PyDict_SetItem_Take2((PyDictObject *)map, key, value) != 0) { goto error; } - PREDICT(JUMP_BACKWARD_QUICK); + PREDICT(JUMP_BACKWARD); DISPATCH(); } @@ -3572,8 +3565,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } TARGET(JUMP_BACKWARD) { - _PyCode_Warmup(frame->f_code); - JUMP_TO_INSTRUCTION(JUMP_BACKWARD_QUICK); + PREDICTED(JUMP_BACKWARD); + assert(oparg < INSTR_OFFSET()); + JUMPBY(-oparg); + CHECK_EVAL_BREAKER(); + DISPATCH(); } TARGET(POP_JUMP_IF_FALSE) { @@ -3703,14 +3699,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int DISPATCH(); } - TARGET(JUMP_BACKWARD_QUICK) { - PREDICTED(JUMP_BACKWARD_QUICK); - assert(oparg < INSTR_OFFSET()); - JUMPBY(-oparg); - CHECK_EVAL_BREAKER(); - DISPATCH(); - } - TARGET(GET_LEN) { // PUSH(len(TOS)) Py_ssize_t len_i = PyObject_Length(TOP()); diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index 6864b8b0e03b2f..cd438633dae67b 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -884,33 +884,6 @@ sys_gettotalrefcount(PyObject *module, PyObject *Py_UNUSED(ignored)) #endif /* defined(Py_REF_DEBUG) */ -PyDoc_STRVAR(sys__getquickenedcount__doc__, -"_getquickenedcount($module, /)\n" -"--\n" -"\n"); - -#define SYS__GETQUICKENEDCOUNT_METHODDEF \ - {"_getquickenedcount", (PyCFunction)sys__getquickenedcount, METH_NOARGS, sys__getquickenedcount__doc__}, - -static Py_ssize_t -sys__getquickenedcount_impl(PyObject *module); - -static PyObject * -sys__getquickenedcount(PyObject *module, PyObject *Py_UNUSED(ignored)) -{ - PyObject *return_value = NULL; - Py_ssize_t _return_value; - - _return_value = sys__getquickenedcount_impl(module); - if ((_return_value == -1) && PyErr_Occurred()) { - goto exit; - } - return_value = PyLong_FromSsize_t(_return_value); - -exit: - return return_value; -} - PyDoc_STRVAR(sys_getallocatedblocks__doc__, "getallocatedblocks($module, /)\n" "--\n" @@ -1343,4 +1316,4 @@ sys_is_stack_trampoline_active(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=15318cdd96b62b06 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=eae6e8244fbebee9 input=a9049054013a1b77]*/ diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index c1ff367d4fd38d..5836b97ed8822b 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -66,26 +66,26 @@ static void *opcode_targets[256] = { &&TARGET_FOR_ITER_ADAPTIVE, &&TARGET_FOR_ITER_LIST, &&TARGET_FOR_ITER_RANGE, - &&TARGET_JUMP_BACKWARD_QUICK, + &&TARGET_LOAD_ATTR_ADAPTIVE, &&TARGET_GET_ITER, &&TARGET_GET_YIELD_FROM_ITER, &&TARGET_PRINT_EXPR, &&TARGET_LOAD_BUILD_CLASS, - &&TARGET_LOAD_ATTR_ADAPTIVE, &&TARGET_LOAD_ATTR_CLASS, + &&TARGET_LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN, &&TARGET_LOAD_ASSERTION_ERROR, &&TARGET_RETURN_GENERATOR, - &&TARGET_LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN, &&TARGET_LOAD_ATTR_INSTANCE_VALUE, &&TARGET_LOAD_ATTR_MODULE, &&TARGET_LOAD_ATTR_PROPERTY, &&TARGET_LOAD_ATTR_SLOT, &&TARGET_LOAD_ATTR_WITH_HINT, + &&TARGET_LOAD_ATTR_METHOD_LAZY_DICT, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, - &&TARGET_LOAD_ATTR_METHOD_LAZY_DICT, + &&TARGET_LOAD_ATTR_METHOD_NO_DICT, &&TARGET_ASYNC_GEN_WRAP, &&TARGET_PREP_RERAISE_STAR, &&TARGET_POP_EXCEPT, @@ -112,7 +112,7 @@ static void *opcode_targets[256] = { &&TARGET_JUMP_FORWARD, &&TARGET_JUMP_IF_FALSE_OR_POP, &&TARGET_JUMP_IF_TRUE_OR_POP, - &&TARGET_LOAD_ATTR_METHOD_NO_DICT, + &&TARGET_LOAD_ATTR_METHOD_WITH_DICT, &&TARGET_POP_JUMP_IF_FALSE, &&TARGET_POP_JUMP_IF_TRUE, &&TARGET_LOAD_GLOBAL, @@ -120,7 +120,7 @@ static void *opcode_targets[256] = { &&TARGET_CONTAINS_OP, &&TARGET_RERAISE, &&TARGET_COPY, - &&TARGET_LOAD_ATTR_METHOD_WITH_DICT, + &&TARGET_LOAD_ATTR_METHOD_WITH_VALUES, &&TARGET_BINARY_OP, &&TARGET_SEND, &&TARGET_LOAD_FAST, @@ -140,9 +140,9 @@ static void *opcode_targets[256] = { &&TARGET_STORE_DEREF, &&TARGET_DELETE_DEREF, &&TARGET_JUMP_BACKWARD, - &&TARGET_LOAD_ATTR_METHOD_WITH_VALUES, - &&TARGET_CALL_FUNCTION_EX, &&TARGET_LOAD_CONST__LOAD_FAST, + &&TARGET_CALL_FUNCTION_EX, + &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_EXTENDED_ARG, &&TARGET_LIST_APPEND, &&TARGET_SET_ADD, @@ -152,28 +152,26 @@ static void *opcode_targets[256] = { &&TARGET_YIELD_VALUE, &&TARGET_RESUME, &&TARGET_MATCH_CLASS, - &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_LOAD_FAST__LOAD_FAST, + &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_FORMAT_VALUE, &&TARGET_BUILD_CONST_KEY_MAP, &&TARGET_BUILD_STRING, - &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_LOAD_GLOBAL_MODULE, - &&TARGET_RESUME_QUICK, + &&TARGET_STORE_ATTR_ADAPTIVE, + &&TARGET_STORE_ATTR_INSTANCE_VALUE, &&TARGET_LIST_EXTEND, &&TARGET_SET_UPDATE, &&TARGET_DICT_MERGE, &&TARGET_DICT_UPDATE, - &&TARGET_STORE_ATTR_ADAPTIVE, - &&TARGET_STORE_ATTR_INSTANCE_VALUE, &&TARGET_STORE_ATTR_SLOT, &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_STORE_FAST__LOAD_FAST, - &&TARGET_CALL, - &&TARGET_KW_NAMES, &&TARGET_STORE_FAST__STORE_FAST, &&TARGET_STORE_SUBSCR_ADAPTIVE, + &&TARGET_CALL, + &&TARGET_KW_NAMES, &&TARGET_STORE_SUBSCR_DICT, &&TARGET_STORE_SUBSCR_LIST_INT, &&TARGET_UNPACK_SEQUENCE_ADAPTIVE, @@ -254,5 +252,7 @@ static void *opcode_targets[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, + &&_unknown_opcode, + &&_unknown_opcode, &&TARGET_DO_TRACING }; diff --git a/Python/specialize.c b/Python/specialize.c index b7c321e4878b98..acb1cfac199f97 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -31,7 +31,6 @@ uint8_t _PyOpcode_Adaptive[256] = { [FOR_ITER] = FOR_ITER_ADAPTIVE, }; -Py_ssize_t _Py_QuickenedCount = 0; #ifdef Py_STATS PyStats _py_stats_struct = { 0 }; PyStats *_py_stats = &_py_stats_struct; @@ -280,7 +279,6 @@ do { \ void _PyCode_Quicken(PyCodeObject *code) { - _Py_QuickenedCount++; int previous_opcode = -1; _Py_CODEUNIT *instructions = _PyCode_CODE(code); for (int i = 0; i < Py_SIZE(code); i++) { @@ -289,7 +287,7 @@ _PyCode_Quicken(PyCodeObject *code) if (adaptive_opcode) { _Py_SET_OPCODE(instructions[i], adaptive_opcode); // Make sure the adaptive counter is zero: - assert(instructions[i + 1] == 0); + instructions[i + 1] = adaptive_counter_start(); previous_opcode = -1; i += _PyOpcode_Caches[opcode]; } @@ -299,12 +297,6 @@ _PyCode_Quicken(PyCodeObject *code) case EXTENDED_ARG: _Py_SET_OPCODE(instructions[i], EXTENDED_ARG_QUICK); break; - case JUMP_BACKWARD: - _Py_SET_OPCODE(instructions[i], JUMP_BACKWARD_QUICK); - break; - case RESUME: - _Py_SET_OPCODE(instructions[i], RESUME_QUICK); - break; case LOAD_FAST: switch(previous_opcode) { case LOAD_FAST: diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 2c66415ee3d3f4..66cb50e013bc51 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -17,7 +17,6 @@ Data members: #include "Python.h" #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_ceval.h" // _PyEval_SetAsyncGenFinalizer() -#include "pycore_code.h" // _Py_QuickenedCount #include "pycore_frame.h" // _PyInterpreterFrame #include "pycore_initconfig.h" // _PyStatus_EXCEPTION() #include "pycore_long.h" // _PY_LONG_MAX_STR_DIGITS_THRESHOLD @@ -1855,17 +1854,6 @@ sys_gettotalrefcount_impl(PyObject *module) #endif /* Py_REF_DEBUG */ -/*[clinic input] -sys._getquickenedcount -> Py_ssize_t -[clinic start generated code]*/ - -static Py_ssize_t -sys__getquickenedcount_impl(PyObject *module) -/*[clinic end generated code: output=1ab259e7f91248a2 input=249d448159eca912]*/ -{ - return _Py_QuickenedCount; -} - /*[clinic input] sys.getallocatedblocks -> Py_ssize_t @@ -2214,7 +2202,6 @@ static PyMethodDef sys_methods[] = { SYS_GETALLOCATEDBLOCKS_METHODDEF SYS_GETFILESYSTEMENCODING_METHODDEF SYS_GETFILESYSTEMENCODEERRORS_METHODDEF - SYS__GETQUICKENEDCOUNT_METHODDEF #ifdef Py_TRACE_REFS {"getobjects", _Py_GetObjects, METH_VARARGS}, #endif diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv index 196d62d361b679..09c1fe5ab87c0c 100644 --- a/Tools/c-analyzer/cpython/globals-to-fix.tsv +++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv @@ -477,7 +477,6 @@ Python/pyfpe.c - PyFPE_counter - Python/pylifecycle.c _Py_FatalErrorFormat reentrant - Python/pylifecycle.c - _Py_UnhandledKeyboardInterrupt - Python/pylifecycle.c fatal_error reentrant - -Python/specialize.c - _Py_QuickenedCount - ################################## diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py index d9c6030fc17c07..b94023f258171f 100644 --- a/Tools/scripts/deepfreeze.py +++ b/Tools/scripts/deepfreeze.py @@ -257,7 +257,6 @@ def generate_code(self, name: str, code: types.CodeType) -> str: self.write(f".co_names = {co_names},") self.write(f".co_exceptiontable = {co_exceptiontable},") self.field(code, "co_flags") - self.write(".co_warmup = QUICKENING_INITIAL_WARMUP_VALUE,") self.write("._co_linearray_entry_size = 0,") self.field(code, "co_argcount") self.field(code, "co_posonlyargcount") From 30282ebff376208e3945a203b029d5b5bb596299 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Thu, 6 Oct 2022 17:02:14 -0700 Subject: [PATCH 02/10] Fix up deepfreeze --- Include/internal/pycore_code.h | 8 ++++---- Objects/codeobject.c | 7 ++++--- Python/specialize.c | 7 ++++++- Tools/scripts/deepfreeze.py | 28 +++++++++++----------------- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 5c2893b58848c1..0348a0e72a781e 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -232,10 +232,10 @@ extern void _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr, int oparg); extern void _Py_Specialize_ForIter(PyObject *iter, _Py_CODEUNIT *instr); -/* Deallocator function for static codeobjects used in deepfreeze.py */ -extern void _PyStaticCode_Dealloc(PyCodeObject *co); -/* Function to intern strings of codeobjects */ -extern int _PyStaticCode_InternStrings(PyCodeObject *co); +/* Finalizer function for static codeobjects used in deepfreeze.py */ +extern void _PyStaticCode_Fini(PyCodeObject *co); +/* Function to intern strings of codeobjects and quicken the bytecode */ +extern int _PyStaticCode_Init(PyCodeObject *co); #ifdef Py_STATS diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 70566ed7092996..59cb12142bb2eb 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -2172,9 +2172,9 @@ _PyCode_ConstantKey(PyObject *op) } void -_PyStaticCode_Dealloc(PyCodeObject *co) +_PyStaticCode_Fini(PyCodeObject *co) { - deopt_code(_PyCode_CODE(co), Py_SIZE(co)); // XXX + deopt_code(_PyCode_CODE(co), Py_SIZE(co)); PyMem_Free(co->co_extra); Py_CLEAR(co->_co_code); co->co_extra = NULL; @@ -2189,7 +2189,7 @@ _PyStaticCode_Dealloc(PyCodeObject *co) } int -_PyStaticCode_InternStrings(PyCodeObject *co) +_PyStaticCode_Init(PyCodeObject *co) { int res = intern_strings(co->co_names); if (res < 0) { @@ -2203,5 +2203,6 @@ _PyStaticCode_InternStrings(PyCodeObject *co) if (res < 0) { return -1; } + _PyCode_Quicken(co); return 0; } diff --git a/Python/specialize.c b/Python/specialize.c index acb1cfac199f97..476533cb1b37c3 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -282,7 +282,10 @@ _PyCode_Quicken(PyCodeObject *code) int previous_opcode = -1; _Py_CODEUNIT *instructions = _PyCode_CODE(code); for (int i = 0; i < Py_SIZE(code); i++) { - int opcode = _Py_OPCODE(instructions[i]); + int opcode = _PyOpcode_Deopt[_Py_OPCODE(instructions[i])]; + if (opcode == 0) { + continue; + } uint8_t adaptive_opcode = _PyOpcode_Adaptive[opcode]; if (adaptive_opcode) { _Py_SET_OPCODE(instructions[i], adaptive_opcode); @@ -325,6 +328,8 @@ _PyCode_Quicken(PyCodeObject *code) LOAD_FAST__LOAD_CONST); } break; + default: + _Py_SET_OPCODE(instructions[i], opcode); } previous_opcode = opcode; } diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py index b94023f258171f..0e69501211a225 100644 --- a/Tools/scripts/deepfreeze.py +++ b/Tools/scripts/deepfreeze.py @@ -114,9 +114,8 @@ def __init__(self, file: TextIO) -> None: self.file = file self.cache: Dict[tuple[type, object, str], str] = {} self.hits, self.misses = 0, 0 - self.patchups: list[str] = [] - self.deallocs: list[str] = [] - self.interns: list[str] = [] + self.finis: list[str] = [] + self.inits: list[str] = [] self.write('#include "Python.h"') self.write('#include "internal/pycore_gc.h"') self.write('#include "internal/pycore_code.h"') @@ -283,8 +282,8 @@ def generate_code(self, name: str, code: types.CodeType) -> str: self.write(f"._co_firsttraceable = {i},") break name_as_code = f"(PyCodeObject *)&{name}" - self.deallocs.append(f"_PyStaticCode_Dealloc({name_as_code});") - self.interns.append(f"_PyStaticCode_InternStrings({name_as_code})") + self.finis.append(f"_PyStaticCode_Fini({name_as_code});") + self.inits.append(f"_PyStaticCode_Init({name_as_code})") return f"& {name}.ob_base.ob_base" def generate_tuple(self, name: str, t: Tuple[object, ...]) -> str: @@ -372,11 +371,7 @@ def generate_frozenset(self, name: str, fs: FrozenSet[object]) -> str: def generate_file(self, module: str, code: object)-> None: module = module.replace(".", "_") self.generate(f"{module}_toplevel", code) - with self.block(f"static void {module}_do_patchups(void)"): - for p in self.patchups: - self.write(p) - self.patchups.clear() - self.write(EPILOGUE.replace("%%NAME%%", module)) + self.write(EPILOGUE.format(name=module)) def generate(self, name: str, obj: object) -> str: # Use repr() in the key to distinguish -0.0 from +0.0 @@ -420,11 +415,10 @@ def generate(self, name: str, obj: object) -> str: EPILOGUE = """ PyObject * -_Py_get_%%NAME%%_toplevel(void) -{ - %%NAME%%_do_patchups(); - return Py_NewRef((PyObject *) &%%NAME%%_toplevel); -} +_Py_get_{name}_toplevel(void) +{{ + return Py_NewRef((PyObject *) &{name}_toplevel); +}} """ FROZEN_COMMENT_C = "/* Auto-generated by Programs/_freeze_module.c */" @@ -460,10 +454,10 @@ def generate(args: list[str], output: TextIO) -> None: code = compile(fd.read(), f"", "exec") printer.generate_file(modname, code) with printer.block(f"void\n_Py_Deepfreeze_Fini(void)"): - for p in printer.deallocs: + for p in printer.finis: printer.write(p) with printer.block(f"int\n_Py_Deepfreeze_Init(void)"): - for p in printer.interns: + for p in printer.inits: with printer.block(f"if ({p} < 0)"): printer.write("return -1;") printer.write("return 0;") From 0a204e1b4066c3e946f246d9e24643e6dc20e6fd Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sat, 15 Oct 2022 04:25:05 -0700 Subject: [PATCH 03/10] ADAPTIVE_BACKOFF_START = 1 --- Include/internal/pycore_code.h | 4 ++-- Lib/test/test_dis.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 0348a0e72a781e..cf8573aa9138b2 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -377,8 +377,8 @@ write_location_entry_start(uint8_t *ptr, int code, int length) /* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */ #define ADAPTIVE_BACKOFF_BITS 4 -/* The initial counter value is 7 == 2**ADAPTIVE_BACKOFF_START - 1 */ -#define ADAPTIVE_BACKOFF_START 3 +/* The initial counter value is 1 == 2**ADAPTIVE_BACKOFF_START - 1 */ +#define ADAPTIVE_BACKOFF_START 1 #define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index ef64fe52199109..697262858aedb1 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -771,7 +771,7 @@ def extended_arg_quick(): """% (extended_arg_quick.__code__.co_firstlineno, extended_arg_quick.__code__.co_firstlineno + 1,) -QUICKENING_WARMUP_DELAY = 8 +QUICKENING_WARMUP_DELAY = 2 class DisTestBase(unittest.TestCase): "Common utilities for DisTests and TestDisTraceback" From d407a511f0e8c0e29939d8b3252a3170bed2505d Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sat, 15 Oct 2022 05:01:39 -0700 Subject: [PATCH 04/10] Fix __lltrace__ --- Lib/test/test_lltrace.py | 55 ++++++++++++++++++++-------------------- Python/ceval.c | 2 +- 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/Lib/test/test_lltrace.py b/Lib/test/test_lltrace.py index 292180945f8e24..271a8c4fcf3485 100644 --- a/Lib/test/test_lltrace.py +++ b/Lib/test/test_lltrace.py @@ -8,7 +8,7 @@ def example(): x = [] - for i in range(1): + for i in range(0): x.append(i) x = "this is" y = "an example" @@ -63,33 +63,32 @@ def dont_trace_2(): self.assertNotIn("dont_trace_1", stdout) self.assertNotIn("'dont_trace_2' in module", stdout) - # def test_lltrace_different_module(self): - # stdout = self.run_code(""" - # from test import test_lltrace - # test_lltrace.__lltrace__ = 1 - # test_lltrace.example() - # """) - # self.assertIn("'example' in module 'test.test_lltrace'", stdout) - # self.assertIn('LOAD_CONST', stdout) - # self.assertIn('FOR_ITER', stdout) - # self.assertIn('this is an example', stdout) - - # # check that offsets match the output of dis.dis() - # instr_map = {i.offset: i for i in dis.get_instructions(example)} - # for line in stdout.splitlines(): - # offset, colon, opname_oparg = line.partition(":") - # if not colon: - # continue - # offset = int(offset) - # opname_oparg = opname_oparg.split() - # if len(opname_oparg) == 2: - # opname, oparg = opname_oparg - # oparg = int(oparg) - # else: - # (opname,) = opname_oparg - # oparg = None - # self.assertEqual(instr_map[offset].opname, opname) - # self.assertEqual(instr_map[offset].arg, oparg) + def test_lltrace_different_module(self): + stdout = self.run_code(""" + from test import test_lltrace + test_lltrace.__lltrace__ = 1 + test_lltrace.example() + """) + self.assertIn("'example' in module 'test.test_lltrace'", stdout) + self.assertIn('LOAD_CONST', stdout) + self.assertIn('FOR_ITER', stdout) + self.assertIn('this is an example', stdout) + # check that offsets match the output of dis.dis() + instr_map = {i.offset: i for i in dis.get_instructions(example, adaptive=True)} + for line in stdout.splitlines(): + offset, colon, opname_oparg = line.partition(":") + if not colon: + continue + offset = int(offset) + opname_oparg = opname_oparg.split() + if len(opname_oparg) == 2: + opname, oparg = opname_oparg + oparg = int(oparg) + else: + (opname,) = opname_oparg + oparg = None + self.assertEqual(instr_map[offset].opname, opname) + self.assertEqual(instr_map[offset].arg, oparg) def test_lltrace_does_not_crash_on_subscript_operator(self): # If this test fails, it will reproduce a crash reported as diff --git a/Python/ceval.c b/Python/ceval.c index ef1b0dca25719e..698a062b642800 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -143,7 +143,7 @@ lltrace_instruction(_PyInterpreterFrame *frame, const char *opname = _PyOpcode_OpName[opcode]; assert(opname != NULL); int offset = (int)(next_instr - _PyCode_CODE(frame->f_code)); - if (HAS_ARG(opcode)) { + if (HAS_ARG(_PyOpcode_Deopt[opcode])) { printf("%d: %s %d\n", offset * 2, opname, oparg); } else { From fc5c3388cbc1191428fc1aee516c4229de679971 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sat, 15 Oct 2022 05:02:01 -0700 Subject: [PATCH 05/10] Fix test_dis --- Lib/test/test_dis.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 697262858aedb1..b7fda47ebe5e44 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1166,11 +1166,16 @@ def test_extended_arg_quick(self): got = self.get_disassembly(extended_arg_quick) self.do_disassembly_compare(got, dis_extended_arg_quick_code, True) - def get_cached_values(self, adaptive): + def get_cached_values(self, quickened, adaptive): def f(): l = [] for i in range(42): l.append(i) + if quickened: + self.code_quicken(f) + else: + # "copy" the code to un-quicken it: + f.__code__ = f.__code__.replace() for instruction in dis.get_instructions( f, show_caches=True, adaptive=adaptive ): @@ -1179,20 +1184,20 @@ def f(): @cpython_only def test_show_caches(self): - for adaptive in (False, True): - with self.subTest(f"{adaptive=}"): - if adaptive: - pattern = r"^(\w+: \d+)?$" - else: - pattern = r"^(\w+: 0)?$" - caches = list(self.get_cached_values(adaptive)) - for cache in caches: - self.assertRegex(cache, pattern) - total_caches = 23 - empty_caches = 8 if adaptive else total_caches - self.assertEqual(caches.count(""), empty_caches) - self.assertEqual(len(caches), total_caches) - + for quickened in (False, True): + for adaptive in (False, True): + with self.subTest(f"{quickened=}, {adaptive=}"): + if adaptive: + pattern = r"^(\w+: \d+)?$" + else: + pattern = r"^(\w+: 0)?$" + caches = list(self.get_cached_values(quickened, adaptive)) + for cache in caches: + self.assertRegex(cache, pattern) + total_caches = 23 + empty_caches = 8 if adaptive else total_caches + self.assertEqual(caches.count(""), empty_caches) + self.assertEqual(len(caches), total_caches) class DisWithFileTests(DisTests): From 659add75bc222b3e622b60efb8fe454efbcf5406 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sat, 15 Oct 2022 05:02:21 -0700 Subject: [PATCH 06/10] Handle double-quickening correctly --- Python/specialize.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index 476533cb1b37c3..3354fdd5aab6c2 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -283,13 +283,9 @@ _PyCode_Quicken(PyCodeObject *code) _Py_CODEUNIT *instructions = _PyCode_CODE(code); for (int i = 0; i < Py_SIZE(code); i++) { int opcode = _PyOpcode_Deopt[_Py_OPCODE(instructions[i])]; - if (opcode == 0) { - continue; - } uint8_t adaptive_opcode = _PyOpcode_Adaptive[opcode]; if (adaptive_opcode) { _Py_SET_OPCODE(instructions[i], adaptive_opcode); - // Make sure the adaptive counter is zero: instructions[i + 1] = adaptive_counter_start(); previous_opcode = -1; i += _PyOpcode_Caches[opcode]; @@ -328,8 +324,6 @@ _PyCode_Quicken(PyCodeObject *code) LOAD_FAST__LOAD_CONST); } break; - default: - _Py_SET_OPCODE(instructions[i], opcode); } previous_opcode = opcode; } From f9f9c3ac4aa9265d958267d5fcf6f282f08a9049 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sat, 15 Oct 2022 05:41:37 -0700 Subject: [PATCH 07/10] Fix test_embed --- Lib/test/test_embed.py | 45 +++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 107c7ec2842fa9..ab955da76926ba 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -340,32 +340,41 @@ def test_finalize_structseq(self): out, err = self.run_embedded_interpreter("test_repeated_init_exec", code) self.assertEqual(out, 'Tests passed\n' * INIT_LOOPS) - def test_quickened_static_code_gets_unquickened_at_Py_FINALIZE(self): + def test_specialized_static_code_gets_unspecialized_at_Py_FINALIZE(self): # https://github.com/python/cpython/issues/92031 - # Do these imports outside of the code string to avoid using - # importlib too much from within the code string, so that - # _handle_fromlist doesn't get quickened until we intend it to. - from dis import _all_opmap - resume = _all_opmap["RESUME"] - from test.test_dis import QUICKENING_WARMUP_DELAY - - code = textwrap.dedent(f"""\ + code = textwrap.dedent("""\ + import dis import importlib._bootstrap + import opcode + import test.test_dis + + def is_specialized(f): + for instruction in dis.get_instructions(f, adaptive=True): + opname = instruction.opname + if ( + opname in opcode._specialized_instructions + # Exclude superinstructions: + and "__" not in opname + # Exclude adaptive instructions: + and not opname.endswith("_ADAPTIVE") + # Exclude "quick" instructions: + and not opname.endswith("_QUICK") + ): + return True + return False + func = importlib._bootstrap._handle_fromlist - code = func.__code__ - # Assert initially unquickened. - # Use sets to account for byte order. - if set(code._co_code_adaptive[:2]) != set([{resume}, 0]): - raise AssertionError() + # "copy" the code to un-quicken it: + func.__code__ = func.__code__.replace() + + assert not is_specialized(func), "specialized instructions found" - for i in range({QUICKENING_WARMUP_DELAY}): + for i in range(test.test_dis.QUICKENING_WARMUP_DELAY): func(importlib._bootstrap, ["x"], lambda *args: None) - # Assert quickening worked - if set(code._co_code_adaptive[:2]) != set([{resume}, 0]): - raise AssertionError() + assert is_specialized(func), "no specialized instructions found" print("Tests passed") """) From 5bf4b124f011b57adbf43a89f6ee52d7ae200ef1 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sat, 15 Oct 2022 05:54:47 -0700 Subject: [PATCH 08/10] Remove some unneeded _PyOpcode_Deopts --- Objects/frameobject.c | 2 +- Objects/genobject.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 8b4494a5fe82fd..96de6c497d773a 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -603,7 +603,7 @@ _PyFrame_GetState(PyFrameObject *frame) if (_PyInterpreterFrame_LASTI(frame->f_frame) < 0) { return FRAME_CREATED; } - switch (_PyOpcode_Deopt[_Py_OPCODE(*frame->f_frame->prev_instr)]) + switch (_Py_OPCODE(*frame->f_frame->prev_instr)) { case COPY_FREE_VARS: case MAKE_CELL: diff --git a/Objects/genobject.c b/Objects/genobject.c index ad4fbed6d8d579..c62fb623153591 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -8,7 +8,6 @@ #include "pycore_frame.h" // _PyInterpreterFrame #include "pycore_genobject.h" // struct _Py_async_gen_state #include "pycore_object.h" // _PyObject_GC_UNTRACK() -#include "pycore_opcode.h" // _PyOpcode_Deopt #include "pycore_pyerrors.h" // _PyErr_ClearExcState() #include "pycore_pystate.h" // _PyThreadState_GET() #include "structmember.h" // PyMemberDef @@ -364,7 +363,7 @@ _PyGen_yf(PyGenObject *gen) return NULL; } _Py_CODEUNIT next = frame->prev_instr[1]; - if (_PyOpcode_Deopt[_Py_OPCODE(next)] != RESUME || _Py_OPARG(next) < 2) + if (_Py_OPCODE(next) != RESUME || _Py_OPARG(next) < 2) { /* Not in a yield from */ return NULL; From 9b59294b2361cb26cde3de826d5e160d0169cb95 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sat, 15 Oct 2022 06:51:45 -0700 Subject: [PATCH 09/10] fixup --- Lib/test/test_lltrace.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_lltrace.py b/Lib/test/test_lltrace.py index 271a8c4fcf3485..747666e256700e 100644 --- a/Lib/test/test_lltrace.py +++ b/Lib/test/test_lltrace.py @@ -73,6 +73,7 @@ def test_lltrace_different_module(self): self.assertIn('LOAD_CONST', stdout) self.assertIn('FOR_ITER', stdout) self.assertIn('this is an example', stdout) + # check that offsets match the output of dis.dis() instr_map = {i.offset: i for i in dis.get_instructions(example, adaptive=True)} for line in stdout.splitlines(): From 6e3869c19d10cce0c5480201708e1b8c89df64c8 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sun, 16 Oct 2022 13:26:56 -0700 Subject: [PATCH 10/10] blurb add --- .../2022-10-16-13-26-46.gh-issue-98686.D9Gu_Q.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-10-16-13-26-46.gh-issue-98686.D9Gu_Q.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-16-13-26-46.gh-issue-98686.D9Gu_Q.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-16-13-26-46.gh-issue-98686.D9Gu_Q.rst new file mode 100644 index 00000000000000..b97c1f9f4eddf1 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-16-13-26-46.gh-issue-98686.D9Gu_Q.rst @@ -0,0 +1,2 @@ +Quicken all code objects, and specialize adaptive bytecode instructions more +aggressively.