diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 2bea3a55ec8735..87881a80f8e3ef 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -34,6 +34,9 @@ static inline PyObject* _PyLong_GetZero(void) static inline PyObject* _PyLong_GetOne(void) { return __PyLong_GetSmallInt_internal(1); } +// Add a C int to a PyLong (for ADD_INT opcode) +PyAPI_FUNC(PyObject *) _PyLong_AddInt(PyLongObject *, int); + #ifdef __cplusplus } #endif diff --git a/Include/opcode.h b/Include/opcode.h index ea484c5a68fc96..f6c681673de959 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -133,6 +133,7 @@ extern "C" { #define SET_UPDATE 163 #define DICT_MERGE 164 #define DICT_UPDATE 165 +#define ADD_INT 166 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 358c650916b0bb..f54487557a3336 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -314,8 +314,8 @@ def _write_atomic(path, data, mode=0o666): # Python 3.10a2 3432 (Function annotation for MAKE_FUNCTION is changed from dict to tuple bpo-42202) # Python 3.10a2 3433 (RERAISE restores f_lasti if oparg != 0) # Python 3.10a6 3434 (PEP 634: Structural Pattern Matching) -# Python 3.10a7 3435 Use instruction offsets (as opposed to byte offsets). - +# Python 3.10a7 3435 (Use instruction offsets instead of byte offsets) +# Python 3.10a7 3435 (Add ADD_INT opcode) # # MAGIC must change whenever the bytecode emitted by the compiler may no # longer be understood by older implementations of the eval loop (usually @@ -324,7 +324,7 @@ def _write_atomic(path, data, mode=0o666): # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3434).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3435).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c _PYCACHE = '__pycache__' diff --git a/Lib/opcode.py b/Lib/opcode.py index b1197129571cd3..7df8b227b784cb 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -211,5 +211,6 @@ def jabs_op(name, op): def_op('SET_UPDATE', 163) def_op('DICT_MERGE', 164) def_op('DICT_UPDATE', 165) +def_op('ADD_INT', 166) del def_op, name_op, jrel_op, jabs_op diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 19e5c0f6335020..0689b78306bb74 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -205,20 +205,20 @@ def bug42562(): """ -expr_str = "x + 1" +expr_str = "x + 1000000" dis_expr_str = """\ 1 0 LOAD_NAME 0 (x) - 2 LOAD_CONST 0 (1) + 2 LOAD_CONST 0 (1000000) 4 BINARY_ADD 6 RETURN_VALUE """ -simple_stmt_str = "x = x + 1" +simple_stmt_str = "x = x + 1000000" dis_simple_stmt_str = """\ 1 0 LOAD_NAME 0 (x) - 2 LOAD_CONST 0 (1) + 2 LOAD_CONST 0 (1000000) 4 BINARY_ADD 6 STORE_NAME 0 (x) 8 LOAD_CONST 1 (None) @@ -559,14 +559,14 @@ def func(count): def test_big_offsets(self): def func(count): namespace = {} - func = "def foo(x):\n " + ";".join(["x = x + 1"] * count) + "\n return x" + func = "def foo(x):\n " + ";".join(["x = x + 1000000"] * count) + "\n return x" exec(func, namespace) return namespace['foo'] def expected(count, w): s = ['''\ %*d LOAD_FAST 0 (x) - %*d LOAD_CONST 1 (1) + %*d LOAD_CONST 1 (1000000) %*d BINARY_ADD %*d STORE_FAST 0 (x) ''' % (w, 8*i, w, 8*i + 2, w, 8*i + 4, w, 8*i + 6) @@ -783,7 +783,7 @@ def f(c=c): Stack size: 2 Flags: NOFREE Constants: - 0: 1 + 0: 1000000 Names: 0: x""" @@ -797,7 +797,7 @@ def f(c=c): Stack size: 2 Flags: NOFREE Constants: - 0: 1 + 0: 1000000 1: None Names: 0: x""" diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-03-31-17-47-25.bpo-43684.hHmyyt.rst b/Misc/NEWS.d/next/Core and Builtins/2021-03-31-17-47-25.bpo-43684.hHmyyt.rst new file mode 100644 index 00000000000000..5ac0118cb70bd6 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-03-31-17-47-25.bpo-43684.hHmyyt.rst @@ -0,0 +1,2 @@ +Add an ``INT_ADD`` opcode. This combines ``LOAD_CONST`` for a small integer +constant and ``BINARY_ADD`` into a single opcode. Combining this popular pair of opcodes into one saves a modest amount of bytecode size and dispatch time. diff --git a/Objects/longobject.c b/Objects/longobject.c index e1c1191e648dae..3c1180f735a52e 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -3094,6 +3094,21 @@ long_add(PyLongObject *a, PyLongObject *b) return (PyObject *)z; } +PyObject * +_PyLong_AddInt(PyLongObject *left, int iright) +{ + Py_ssize_t size = left->ob_base.ob_size; + if (((size_t)size + 1) < 3) { + Py_ssize_t lsum = iright + size * left->ob_digit[0]; + return PyLong_FromLongLong(lsum); + } + PyLongObject *right = (PyLongObject *)PyLong_FromLongLong(iright); + if (right == NULL) { + return NULL; + } + return long_add(left, right); +} + static PyObject * long_sub(PyLongObject *a, PyLongObject *b) { diff --git a/Python/ceval.c b/Python/ceval.c index b9d784a6298400..c1ac867c4a64ca 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -16,6 +16,7 @@ #include "pycore_ceval.h" // _PyEval_SignalAsyncExc() #include "pycore_code.h" // _PyCode_InitOpcache() #include "pycore_initconfig.h" // _PyStatus_OK() +#include "pycore_long.h" // _PyLong_AddInt() #include "pycore_object.h" // _PyObject_GC_TRACK() #include "pycore_pyerrors.h" // _PyErr_Fetch() #include "pycore_pylifecycle.h" // _PyErr_Print() @@ -2064,16 +2065,36 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) DISPATCH(); } + case TARGET(ADD_INT): { + PyObject *left = TOP(); + if (PyLong_CheckExact(left)) { + PyObject *sum = _PyLong_AddInt((PyLongObject *)left, oparg); + Py_DECREF(left); + SET_TOP(sum); + if (sum == NULL) { + goto error; + } + DISPATCH(); + } + PyObject *right = PyLong_FromLongLong(oparg); + if (right == NULL) { + goto error; + } + // Don't optimize unicode here since we know right is a long + PyObject *sum = PyNumber_Add(left, right); + Py_DECREF(left); + Py_DECREF(right); + SET_TOP(sum); + if (sum == NULL) { + goto error; + } + DISPATCH(); + } + case TARGET(BINARY_ADD): { PyObject *right = POP(); PyObject *left = TOP(); PyObject *sum; - /* NOTE(vstinner): Please don't try to micro-optimize int+int on - CPython using bytecode, it is simply worthless. - See http://bugs.python.org/issue21955 and - http://bugs.python.org/issue10044 for the discussion. In short, - no patch shown any impact on a realistic benchmark, only a minor - speedup on microbenchmarks. */ if (PyUnicode_CheckExact(left) && PyUnicode_CheckExact(right)) { sum = unicode_concatenate(tstate, left, right, f, next_instr); diff --git a/Python/compile.c b/Python/compile.c index 308d6866c7dc4b..aa5b9ce732dbfd 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -978,6 +978,10 @@ stack_effect(int opcode, int oparg, int jump) case INPLACE_OR: return -1; + /* Combined instructions */ + case ADD_INT: + return 0; + case SETUP_WITH: /* 1 in the normal flow. * Restore the stack position and push 6 values before jumping to @@ -6842,6 +6846,7 @@ optimize_basic_block(basicblock *bb, PyObject *consts) } switch (inst->i_opcode) { /* Remove LOAD_CONST const; conditional jump */ + /* Also optimize LOAD_CONST(small_int) + BINARY_ADD */ case LOAD_CONST: { PyObject* cnt; @@ -6882,6 +6887,22 @@ optimize_basic_block(basicblock *bb, PyObject *consts) bb->b_instr[i+1].i_opcode = NOP; } break; + case BINARY_ADD: + cnt = PyList_GET_ITEM(consts, oparg); + if (PyLong_CheckExact(cnt) && inst->i_lineno == bb->b_instr[i+1].i_lineno) { + int ovf = 0; + long val = PyLong_AsLongAndOverflow(cnt, &ovf); + // TODO: What about larger values? + // They would cause an EXTENDED_ARG to be generated, + // which may defeat any potential cost savings. + if (ovf == 0 && val >= 0 && val < 256) { + inst->i_opcode = ADD_INT; + inst->i_oparg = val; + bb->b_instr[i+1].i_opcode = NOP; + break; + } + } + break; } break; } diff --git a/Python/importlib_external.h b/Python/importlib_external.h index 465ec0cd6dd0e9..89f4d6fc254a60 100644 --- a/Python/importlib_external.h +++ b/Python/importlib_external.h @@ -315,7 +315,7 @@ const unsigned char _Py_M__importlib_bootstrap_external[] = { 0,115,38,0,0,0,16,5,6,1,22,1,4,255,2,2, 14,3,24,1,16,128,18,1,12,1,2,1,12,1,2,3, 12,254,2,1,2,1,2,254,2,253,255,128,114,84,0,0, - 0,105,106,13,0,0,114,45,0,0,0,114,33,0,0,0, + 0,105,107,13,0,0,114,45,0,0,0,114,33,0,0,0, 115,2,0,0,0,13,10,90,11,95,95,112,121,99,97,99, 104,101,95,95,122,4,111,112,116,45,122,3,46,112,121,122, 4,46,112,121,119,122,4,46,112,121,99,41,1,218,12,111, diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 692442ac8c1b82..b873d69b0fa3af 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -165,7 +165,7 @@ static void *opcode_targets[256] = { &&TARGET_SET_UPDATE, &&TARGET_DICT_MERGE, &&TARGET_DICT_UPDATE, - &&_unknown_opcode, + &&TARGET_ADD_INT, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Tools/scripts/count_opcodes.py b/Tools/scripts/count_opcodes.py new file mode 100644 index 00000000000000..abe7539e1a6bc5 --- /dev/null +++ b/Tools/scripts/count_opcodes.py @@ -0,0 +1,53 @@ +import opcode +import sys + + +ADD_INT = opcode.opmap['ADD_INT'] + + +def all_code_objects(code): + yield code + for x in code.co_consts: + if hasattr(x, 'co_code'): + yield x + + +def report(code, filename): + add_int_count = total_count = 0 + for co in all_code_objects(code): + co_code = co.co_code + for i in range(0, len(co_code), 2): + op = co_code[i] + if op == ADD_INT: + add_int_count += 1 + else: + total_count += 1 + if add_int_count: + print(filename + ":", add_int_count, "/", total_count, + f"{add_int_count/total_count*100:.2f}%") + return add_int_count, total_count + + +def main(dirname): + import os + add_int_count = total_count = 0 + for root, dirs, files in os.walk(dirname): + for file in files: + if file.endswith(".py"): + full = os.path.join(root, file) + try: + with open(full, "r") as f: + source = f.read() + code = compile(source, full, "exec") + except Exception as err: + print(full + ":", err) + continue + a, b = report(code, filename=full) + add_int_count += a + total_count += b + print("TOTAL", add_int_count, "/", total_count, + f"{add_int_count/total_count*100:.2f}%") + + +if __name__ == "__main__": + main(sys.argv[1])