From a301c922748368d0d70f7fbf72b10db7091fe428 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Tue, 23 Nov 2021 02:09:36 -0500 Subject: [PATCH 01/10] specialize compare_op --- Include/internal/pycore_code.h | 1 + Include/internal/pycore_long.h | 1 + Include/opcode.h | 77 ++++++++++++++++++---------------- Lib/opcode.py | 3 ++ Objects/longobject.c | 31 +++++++++++--- Python/ceval.c | 77 ++++++++++++++++++++++++++++++++++ Python/opcode_targets.h | 36 ++++++++-------- Python/specialize.c | 35 ++++++++++++++++ 8 files changed, 200 insertions(+), 61 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 194af46a3a2740..e5d410281ee02d 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -272,6 +272,7 @@ int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT int _Py_Specialize_CallFunction(PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache, PyObject *builtins); void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); +void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); #define PRINT_SPECIALIZATION_STATS 0 #define PRINT_SPECIALIZATION_STATS_DETAILED 0 diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index b9f926996d8107..990aaa32ebf50b 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -24,6 +24,7 @@ static inline PyObject* _PyLong_GetOne(void) PyObject *_PyLong_Add(PyLongObject *left, PyLongObject *right); PyObject *_PyLong_Multiply(PyLongObject *left, PyLongObject *right); PyObject *_PyLong_Subtract(PyLongObject *left, PyLongObject *right); +PyObject *_PyLong_RichCompare(PyLongObject *left, PyLongObject *right, int op); /* Used by Python/mystrtoul.c, _PyBytes_FromHex(), _PyBytes_DecodeEscape(), etc. */ diff --git a/Include/opcode.h b/Include/opcode.h index 3ec89bd4c0be60..2021ae6108fb6c 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -120,43 +120,46 @@ extern "C" { #define BINARY_OP_MULTIPLY_FLOAT 18 #define BINARY_OP_SUBTRACT_INT 19 #define BINARY_OP_SUBTRACT_FLOAT 20 -#define BINARY_SUBSCR_ADAPTIVE 21 -#define BINARY_SUBSCR_GETITEM 22 -#define BINARY_SUBSCR_LIST_INT 23 -#define BINARY_SUBSCR_TUPLE_INT 24 -#define BINARY_SUBSCR_DICT 26 -#define STORE_SUBSCR_ADAPTIVE 27 -#define STORE_SUBSCR_LIST_INT 28 -#define STORE_SUBSCR_DICT 29 -#define CALL_FUNCTION_ADAPTIVE 34 -#define CALL_FUNCTION_BUILTIN_O 36 -#define CALL_FUNCTION_BUILTIN_FAST 38 -#define CALL_FUNCTION_LEN 39 -#define CALL_FUNCTION_ISINSTANCE 40 -#define CALL_FUNCTION_PY_SIMPLE 41 -#define JUMP_ABSOLUTE_QUICK 42 -#define LOAD_ATTR_ADAPTIVE 43 -#define LOAD_ATTR_INSTANCE_VALUE 44 -#define LOAD_ATTR_WITH_HINT 45 -#define LOAD_ATTR_SLOT 46 -#define LOAD_ATTR_MODULE 47 -#define LOAD_GLOBAL_ADAPTIVE 48 -#define LOAD_GLOBAL_MODULE 55 -#define LOAD_GLOBAL_BUILTIN 56 -#define LOAD_METHOD_ADAPTIVE 57 -#define LOAD_METHOD_CACHED 58 -#define LOAD_METHOD_CLASS 59 -#define LOAD_METHOD_MODULE 62 -#define LOAD_METHOD_NO_DICT 63 -#define STORE_ATTR_ADAPTIVE 64 -#define STORE_ATTR_INSTANCE_VALUE 65 -#define STORE_ATTR_SLOT 66 -#define STORE_ATTR_WITH_HINT 67 -#define LOAD_FAST__LOAD_FAST 75 -#define STORE_FAST__LOAD_FAST 76 -#define LOAD_FAST__LOAD_CONST 77 -#define LOAD_CONST__LOAD_FAST 78 -#define STORE_FAST__STORE_FAST 79 +#define COMPARE_OP_ADAPTIVE 21 +#define COMPARE_OP_FLOAT 22 +#define COMPARE_OP_INT 23 +#define BINARY_SUBSCR_ADAPTIVE 24 +#define BINARY_SUBSCR_GETITEM 26 +#define BINARY_SUBSCR_LIST_INT 27 +#define BINARY_SUBSCR_TUPLE_INT 28 +#define BINARY_SUBSCR_DICT 29 +#define STORE_SUBSCR_ADAPTIVE 34 +#define STORE_SUBSCR_LIST_INT 36 +#define STORE_SUBSCR_DICT 38 +#define CALL_FUNCTION_ADAPTIVE 39 +#define CALL_FUNCTION_BUILTIN_O 40 +#define CALL_FUNCTION_BUILTIN_FAST 41 +#define CALL_FUNCTION_LEN 42 +#define CALL_FUNCTION_ISINSTANCE 43 +#define CALL_FUNCTION_PY_SIMPLE 44 +#define JUMP_ABSOLUTE_QUICK 45 +#define LOAD_ATTR_ADAPTIVE 46 +#define LOAD_ATTR_INSTANCE_VALUE 47 +#define LOAD_ATTR_WITH_HINT 48 +#define LOAD_ATTR_SLOT 55 +#define LOAD_ATTR_MODULE 56 +#define LOAD_GLOBAL_ADAPTIVE 57 +#define LOAD_GLOBAL_MODULE 58 +#define LOAD_GLOBAL_BUILTIN 59 +#define LOAD_METHOD_ADAPTIVE 62 +#define LOAD_METHOD_CACHED 63 +#define LOAD_METHOD_CLASS 64 +#define LOAD_METHOD_MODULE 65 +#define LOAD_METHOD_NO_DICT 66 +#define STORE_ATTR_ADAPTIVE 67 +#define STORE_ATTR_INSTANCE_VALUE 75 +#define STORE_ATTR_SLOT 76 +#define STORE_ATTR_WITH_HINT 77 +#define LOAD_FAST__LOAD_FAST 78 +#define STORE_FAST__LOAD_FAST 79 +#define LOAD_FAST__LOAD_CONST 80 +#define LOAD_CONST__LOAD_FAST 81 +#define STORE_FAST__STORE_FAST 87 #define DO_TRACING 255 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { diff --git a/Lib/opcode.py b/Lib/opcode.py index 3603bb422b1504..40be5d82c00c23 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -233,6 +233,9 @@ def jabs_op(name, op): "BINARY_OP_MULTIPLY_FLOAT", "BINARY_OP_SUBTRACT_INT", "BINARY_OP_SUBTRACT_FLOAT", + "COMPARE_OP_ADAPTIVE", + "COMPARE_OP_FLOAT", + "COMPARE_OP_INT", "BINARY_SUBSCR_ADAPTIVE", "BINARY_SUBSCR_GETITEM", "BINARY_SUBSCR_LIST_INT", diff --git a/Objects/longobject.c b/Objects/longobject.c index ce4f0d72540d38..096bcdf0eb836b 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -2956,16 +2956,35 @@ long_compare(PyLongObject *a, PyLongObject *b) return sign; } +PyObject * +_PyLong_RichCompare(PyLongObject *left, PyLongObject *right, int op) +{ + Py_ssize_t diff; + if (left == right) { + diff = 0; + } + else { + diff = long_compare(left, right); + } + int cmp; + switch (op) { + case Py_LT: cmp = (diff < 0); break; + case Py_LE: cmp = (diff <= 0); break; + case Py_EQ: cmp = (diff == 0); break; + case Py_NE: cmp = (diff != 0); break; + case Py_GT: cmp = (diff > 0); break; + case Py_GE: cmp = (diff >= 0); break; + default: Py_UNREACHABLE(); + } + return PyBool_FromLong(cmp); +} + static PyObject * long_richcompare(PyObject *self, PyObject *other, int op) { - Py_ssize_t result; CHECK_BINOP(self, other); - if (self == other) - result = 0; - else - result = long_compare((PyLongObject*)self, (PyLongObject*)other); - Py_RETURN_RICHCOMPARE(result, 0, op); + return _PyLong_RichCompare((PyLongObject *)self, + (PyLongObject *)other, op); } static Py_hash_t diff --git a/Python/ceval.c b/Python/ceval.c index 1d69708576fa47..c3e848f6babdb5 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3670,6 +3670,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr } TARGET(COMPARE_OP) { + PREDICTED(COMPARE_OP); + STAT_INC(COMPARE_OP, unquickened); assert(oparg <= Py_GE); PyObject *right = POP(); PyObject *left = TOP(); @@ -3684,6 +3686,80 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DISPATCH(); } + TARGET(COMPARE_OP_ADAPTIVE) { + assert(cframe.use_tracing == 0); + SpecializedCacheEntry *cache = GET_CACHE(); + if (cache->adaptive.counter == 0) { + PyObject *right = TOP(); + PyObject *left = SECOND(); + next_instr--; + _Py_Specialize_CompareOp(left, right, next_instr, cache); + DISPATCH(); + } + else { + STAT_INC(COMPARE_OP, deferred); + cache->adaptive.counter--; + oparg = cache->adaptive.original_oparg; + STAT_DEC(COMPARE_OP, unquickened); + JUMP_TO_INSTRUCTION(COMPARE_OP); + } + } + + TARGET(COMPARE_OP_FLOAT) { + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + PyObject *right = TOP(); + PyObject *left = SECOND(); + DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP); + DEOPT_IF(!PyFloat_CheckExact(right), COMPARE_OP); + STAT_INC(COMPARE_OP, hit); + double dleft = PyFloat_AS_DOUBLE(left); + double dright = PyFloat_AS_DOUBLE(right); + int cmp; + switch (cache0->original_oparg) { + case Py_LT: cmp = (dleft < dright); break; + case Py_LE: cmp = (dleft <= dright); break; + case Py_EQ: cmp = (dleft == dright); break; + case Py_NE: cmp = (dleft != dright); break; + case Py_GT: cmp = (dleft > dright); break; + case Py_GE: cmp = (dleft >= dright); break; + default: Py_UNREACHABLE(); + } + // This cannot fail + PyObject *res = PyBool_FromLong(cmp); + assert(!PyErr_Occurred()); + SET_SECOND(res); + STACK_SHRINK(1); + Py_DECREF(left); + Py_DECREF(right); + PREDICT(POP_JUMP_IF_FALSE); + PREDICT(POP_JUMP_IF_TRUE); + DISPATCH(); + } + + TARGET(COMPARE_OP_INT) { + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + PyObject *right = TOP(); + PyObject *left = SECOND(); + DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP); + DEOPT_IF(!PyLong_CheckExact(right), COMPARE_OP); + STAT_INC(COMPARE_OP, hit); + // This cannot fail. + PyObject *res = _PyLong_RichCompare( + (PyLongObject *)left, (PyLongObject *)right, + cache0->original_oparg); + assert(res != NULL); + assert(!PyErr_Occurred()); + SET_SECOND(res); + STACK_SHRINK(1); + Py_DECREF(left); + Py_DECREF(right); + PREDICT(POP_JUMP_IF_FALSE); + PREDICT(POP_JUMP_IF_TRUE); + DISPATCH(); + } + TARGET(IS_OP) { PyObject *right = POP(); PyObject *left = TOP(); @@ -4970,6 +5046,7 @@ MISS_WITH_CACHE(LOAD_GLOBAL) MISS_WITH_CACHE(LOAD_METHOD) MISS_WITH_CACHE(CALL_FUNCTION) MISS_WITH_CACHE(BINARY_OP) +MISS_WITH_CACHE(COMPARE_OP) MISS_WITH_CACHE(BINARY_SUBSCR) MISS_WITH_OPARG_COUNTER(STORE_SUBSCR) diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 903b967c3a52ef..f7b47a840d7ea9 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -20,23 +20,26 @@ static void *opcode_targets[256] = { &&TARGET_BINARY_OP_MULTIPLY_FLOAT, &&TARGET_BINARY_OP_SUBTRACT_INT, &&TARGET_BINARY_OP_SUBTRACT_FLOAT, + &&TARGET_COMPARE_OP_ADAPTIVE, + &&TARGET_COMPARE_OP_FLOAT, + &&TARGET_COMPARE_OP_INT, &&TARGET_BINARY_SUBSCR_ADAPTIVE, + &&TARGET_BINARY_SUBSCR, &&TARGET_BINARY_SUBSCR_GETITEM, &&TARGET_BINARY_SUBSCR_LIST_INT, &&TARGET_BINARY_SUBSCR_TUPLE_INT, - &&TARGET_BINARY_SUBSCR, &&TARGET_BINARY_SUBSCR_DICT, - &&TARGET_STORE_SUBSCR_ADAPTIVE, - &&TARGET_STORE_SUBSCR_LIST_INT, - &&TARGET_STORE_SUBSCR_DICT, &&TARGET_GET_LEN, &&TARGET_MATCH_MAPPING, &&TARGET_MATCH_SEQUENCE, &&TARGET_MATCH_KEYS, - &&TARGET_CALL_FUNCTION_ADAPTIVE, + &&TARGET_STORE_SUBSCR_ADAPTIVE, &&TARGET_PUSH_EXC_INFO, - &&TARGET_CALL_FUNCTION_BUILTIN_O, + &&TARGET_STORE_SUBSCR_LIST_INT, &&TARGET_POP_EXCEPT_AND_RERAISE, + &&TARGET_STORE_SUBSCR_DICT, + &&TARGET_CALL_FUNCTION_ADAPTIVE, + &&TARGET_CALL_FUNCTION_BUILTIN_O, &&TARGET_CALL_FUNCTION_BUILTIN_FAST, &&TARGET_CALL_FUNCTION_LEN, &&TARGET_CALL_FUNCTION_ISINSTANCE, @@ -45,28 +48,25 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_ATTR_ADAPTIVE, &&TARGET_LOAD_ATTR_INSTANCE_VALUE, &&TARGET_LOAD_ATTR_WITH_HINT, - &&TARGET_LOAD_ATTR_SLOT, - &&TARGET_LOAD_ATTR_MODULE, - &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, &&TARGET_BEFORE_ASYNC_WITH, &&TARGET_BEFORE_WITH, &&TARGET_END_ASYNC_FOR, + &&TARGET_LOAD_ATTR_SLOT, + &&TARGET_LOAD_ATTR_MODULE, + &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_LOAD_GLOBAL_BUILTIN, + &&TARGET_STORE_SUBSCR, + &&TARGET_DELETE_SUBSCR, &&TARGET_LOAD_METHOD_ADAPTIVE, &&TARGET_LOAD_METHOD_CACHED, &&TARGET_LOAD_METHOD_CLASS, - &&TARGET_STORE_SUBSCR, - &&TARGET_DELETE_SUBSCR, &&TARGET_LOAD_METHOD_MODULE, &&TARGET_LOAD_METHOD_NO_DICT, &&TARGET_STORE_ATTR_ADAPTIVE, - &&TARGET_STORE_ATTR_INSTANCE_VALUE, - &&TARGET_STORE_ATTR_SLOT, - &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_GET_ITER, &&TARGET_GET_YIELD_FROM_ITER, &&TARGET_PRINT_EXPR, @@ -74,19 +74,19 @@ static void *opcode_targets[256] = { &&TARGET_YIELD_FROM, &&TARGET_GET_AWAITABLE, &&TARGET_LOAD_ASSERTION_ERROR, + &&TARGET_STORE_ATTR_INSTANCE_VALUE, + &&TARGET_STORE_ATTR_SLOT, + &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_STORE_FAST__LOAD_FAST, &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_LOAD_CONST__LOAD_FAST, - &&TARGET_STORE_FAST__STORE_FAST, - &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, &&TARGET_YIELD_VALUE, - &&_unknown_opcode, + &&TARGET_STORE_FAST__STORE_FAST, &&_unknown_opcode, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, diff --git a/Python/specialize.c b/Python/specialize.c index 130da008ad8ce0..1a073443656eda 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -129,6 +129,7 @@ _Py_GetSpecializationStats(void) { err += add_stat_dict(stats, STORE_ATTR, "store_attr"); err += add_stat_dict(stats, CALL_FUNCTION, "call_function"); err += add_stat_dict(stats, BINARY_OP, "binary_op"); + err += add_stat_dict(stats, COMPARE_OP, "compare_op"); if (err < 0) { Py_DECREF(stats); return NULL; @@ -187,6 +188,7 @@ _Py_PrintSpecializationStats(void) print_stats(out, &_specialization_stats[STORE_ATTR], "store_attr"); print_stats(out, &_specialization_stats[CALL_FUNCTION], "call_function"); print_stats(out, &_specialization_stats[BINARY_OP], "binary_op"); + print_stats(out, &_specialization_stats[COMPARE_OP], "compare_op"); if (out != stderr) { fclose(out); } @@ -239,6 +241,7 @@ static uint8_t adaptive_opcodes[256] = { [CALL_FUNCTION] = CALL_FUNCTION_ADAPTIVE, [STORE_ATTR] = STORE_ATTR_ADAPTIVE, [BINARY_OP] = BINARY_OP_ADAPTIVE, + [COMPARE_OP] = COMPARE_OP_ADAPTIVE, }; /* The number of cache entries required for a "family" of instructions. */ @@ -251,6 +254,7 @@ static uint8_t cache_requirements[256] = { [CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ [BINARY_OP] = 1, // _PyAdaptiveEntry + [COMPARE_OP] = 1, // _PyAdaptiveEntry }; /* Return the oparg for the cache_offset and instruction index. @@ -1539,3 +1543,34 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, STAT_INC(BINARY_OP, specialization_success); adaptive->counter = initial_counter_value(); } + + +void +_Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, + _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) +{ + _PyAdaptiveEntry *adaptive = &cache->adaptive; + if (Py_TYPE(lhs) != Py_TYPE(rhs)) { + SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_DIFFERENT_TYPES); + goto failure; + } + else if (PyFloat_CheckExact(lhs)) { + *instr = _Py_MAKECODEUNIT(COMPARE_OP_FLOAT, _Py_OPARG(*instr)); + goto success; + } + else if (PyLong_CheckExact(lhs)) { + *instr = _Py_MAKECODEUNIT(COMPARE_OP_INT, _Py_OPARG(*instr)); + goto success; + } + else { + SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_OTHER); + goto failure; + } +failure: + STAT_INC(COMPARE_OP, specialization_failure); + cache_backoff(adaptive); + return; +success: + STAT_INC(COMPARE_OP, specialization_success); + adaptive->counter = initial_counter_value(); +} From 9b5e15a016bec072d032d12589538a9502e87af8 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Tue, 23 Nov 2021 15:54:50 -0500 Subject: [PATCH 02/10] Add COMPARE_OP_STR for string equality --- Include/opcode.h | 75 +++++++++++++++++++++-------------------- Lib/opcode.py | 1 + Python/ceval.c | 24 +++++++++++++ Python/opcode_targets.h | 18 +++++----- Python/specialize.c | 24 ++++++++++++- 5 files changed, 95 insertions(+), 47 deletions(-) diff --git a/Include/opcode.h b/Include/opcode.h index 2021ae6108fb6c..242324c1e808e0 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -123,43 +123,44 @@ extern "C" { #define COMPARE_OP_ADAPTIVE 21 #define COMPARE_OP_FLOAT 22 #define COMPARE_OP_INT 23 -#define BINARY_SUBSCR_ADAPTIVE 24 -#define BINARY_SUBSCR_GETITEM 26 -#define BINARY_SUBSCR_LIST_INT 27 -#define BINARY_SUBSCR_TUPLE_INT 28 -#define BINARY_SUBSCR_DICT 29 -#define STORE_SUBSCR_ADAPTIVE 34 -#define STORE_SUBSCR_LIST_INT 36 -#define STORE_SUBSCR_DICT 38 -#define CALL_FUNCTION_ADAPTIVE 39 -#define CALL_FUNCTION_BUILTIN_O 40 -#define CALL_FUNCTION_BUILTIN_FAST 41 -#define CALL_FUNCTION_LEN 42 -#define CALL_FUNCTION_ISINSTANCE 43 -#define CALL_FUNCTION_PY_SIMPLE 44 -#define JUMP_ABSOLUTE_QUICK 45 -#define LOAD_ATTR_ADAPTIVE 46 -#define LOAD_ATTR_INSTANCE_VALUE 47 -#define LOAD_ATTR_WITH_HINT 48 -#define LOAD_ATTR_SLOT 55 -#define LOAD_ATTR_MODULE 56 -#define LOAD_GLOBAL_ADAPTIVE 57 -#define LOAD_GLOBAL_MODULE 58 -#define LOAD_GLOBAL_BUILTIN 59 -#define LOAD_METHOD_ADAPTIVE 62 -#define LOAD_METHOD_CACHED 63 -#define LOAD_METHOD_CLASS 64 -#define LOAD_METHOD_MODULE 65 -#define LOAD_METHOD_NO_DICT 66 -#define STORE_ATTR_ADAPTIVE 67 -#define STORE_ATTR_INSTANCE_VALUE 75 -#define STORE_ATTR_SLOT 76 -#define STORE_ATTR_WITH_HINT 77 -#define LOAD_FAST__LOAD_FAST 78 -#define STORE_FAST__LOAD_FAST 79 -#define LOAD_FAST__LOAD_CONST 80 -#define LOAD_CONST__LOAD_FAST 81 -#define STORE_FAST__STORE_FAST 87 +#define COMPARE_OP_STR 24 +#define BINARY_SUBSCR_ADAPTIVE 26 +#define BINARY_SUBSCR_GETITEM 27 +#define BINARY_SUBSCR_LIST_INT 28 +#define BINARY_SUBSCR_TUPLE_INT 29 +#define BINARY_SUBSCR_DICT 34 +#define STORE_SUBSCR_ADAPTIVE 36 +#define STORE_SUBSCR_LIST_INT 38 +#define STORE_SUBSCR_DICT 39 +#define CALL_FUNCTION_ADAPTIVE 40 +#define CALL_FUNCTION_BUILTIN_O 41 +#define CALL_FUNCTION_BUILTIN_FAST 42 +#define CALL_FUNCTION_LEN 43 +#define CALL_FUNCTION_ISINSTANCE 44 +#define CALL_FUNCTION_PY_SIMPLE 45 +#define JUMP_ABSOLUTE_QUICK 46 +#define LOAD_ATTR_ADAPTIVE 47 +#define LOAD_ATTR_INSTANCE_VALUE 48 +#define LOAD_ATTR_WITH_HINT 55 +#define LOAD_ATTR_SLOT 56 +#define LOAD_ATTR_MODULE 57 +#define LOAD_GLOBAL_ADAPTIVE 58 +#define LOAD_GLOBAL_MODULE 59 +#define LOAD_GLOBAL_BUILTIN 62 +#define LOAD_METHOD_ADAPTIVE 63 +#define LOAD_METHOD_CACHED 64 +#define LOAD_METHOD_CLASS 65 +#define LOAD_METHOD_MODULE 66 +#define LOAD_METHOD_NO_DICT 67 +#define STORE_ATTR_ADAPTIVE 75 +#define STORE_ATTR_INSTANCE_VALUE 76 +#define STORE_ATTR_SLOT 77 +#define STORE_ATTR_WITH_HINT 78 +#define LOAD_FAST__LOAD_FAST 79 +#define STORE_FAST__LOAD_FAST 80 +#define LOAD_FAST__LOAD_CONST 81 +#define LOAD_CONST__LOAD_FAST 87 +#define STORE_FAST__STORE_FAST 88 #define DO_TRACING 255 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { diff --git a/Lib/opcode.py b/Lib/opcode.py index 40be5d82c00c23..43db834105d52e 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -236,6 +236,7 @@ def jabs_op(name, op): "COMPARE_OP_ADAPTIVE", "COMPARE_OP_FLOAT", "COMPARE_OP_INT", + "COMPARE_OP_STR", "BINARY_SUBSCR_ADAPTIVE", "BINARY_SUBSCR_GETITEM", "BINARY_SUBSCR_LIST_INT", diff --git a/Python/ceval.c b/Python/ceval.c index c3e848f6babdb5..bfa67beeca8924 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3760,6 +3760,30 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DISPATCH(); } + TARGET(COMPARE_OP_STR) { + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + PyObject *right = TOP(); + PyObject *left = SECOND(); + DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP); + DEOPT_IF(!PyUnicode_CheckExact(right), COMPARE_OP); + DEOPT_IF(!PyUnicode_IS_READY(left), COMPARE_OP); + DEOPT_IF(!PyUnicode_IS_READY(right), COMPARE_OP); + STAT_INC(COMPARE_OP, hit); + assert(cache0->original_oparg == Py_EQ || cache0->original_oparg == Py_NE); + int cmp = Py_Is(left, right) || _PyUnicode_EQ(left, right); + cmp ^= (cache0->original_oparg == Py_NE); + PyObject *res = PyBool_FromLong(cmp); + assert(!PyErr_Occurred()); + SET_SECOND(res); + STACK_SHRINK(1); + Py_DECREF(left); + Py_DECREF(right); + PREDICT(POP_JUMP_IF_FALSE); + PREDICT(POP_JUMP_IF_TRUE); + DISPATCH(); + } + TARGET(IS_OP) { PyObject *right = POP(); PyObject *left = TOP(); diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index f7b47a840d7ea9..6d802c273f41d7 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -23,20 +23,21 @@ static void *opcode_targets[256] = { &&TARGET_COMPARE_OP_ADAPTIVE, &&TARGET_COMPARE_OP_FLOAT, &&TARGET_COMPARE_OP_INT, - &&TARGET_BINARY_SUBSCR_ADAPTIVE, + &&TARGET_COMPARE_OP_STR, &&TARGET_BINARY_SUBSCR, + &&TARGET_BINARY_SUBSCR_ADAPTIVE, &&TARGET_BINARY_SUBSCR_GETITEM, &&TARGET_BINARY_SUBSCR_LIST_INT, &&TARGET_BINARY_SUBSCR_TUPLE_INT, - &&TARGET_BINARY_SUBSCR_DICT, &&TARGET_GET_LEN, &&TARGET_MATCH_MAPPING, &&TARGET_MATCH_SEQUENCE, &&TARGET_MATCH_KEYS, - &&TARGET_STORE_SUBSCR_ADAPTIVE, + &&TARGET_BINARY_SUBSCR_DICT, &&TARGET_PUSH_EXC_INFO, - &&TARGET_STORE_SUBSCR_LIST_INT, + &&TARGET_STORE_SUBSCR_ADAPTIVE, &&TARGET_POP_EXCEPT_AND_RERAISE, + &&TARGET_STORE_SUBSCR_LIST_INT, &&TARGET_STORE_SUBSCR_DICT, &&TARGET_CALL_FUNCTION_ADAPTIVE, &&TARGET_CALL_FUNCTION_BUILTIN_O, @@ -47,26 +48,25 @@ static void *opcode_targets[256] = { &&TARGET_JUMP_ABSOLUTE_QUICK, &&TARGET_LOAD_ATTR_ADAPTIVE, &&TARGET_LOAD_ATTR_INSTANCE_VALUE, - &&TARGET_LOAD_ATTR_WITH_HINT, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, &&TARGET_BEFORE_ASYNC_WITH, &&TARGET_BEFORE_WITH, &&TARGET_END_ASYNC_FOR, + &&TARGET_LOAD_ATTR_WITH_HINT, &&TARGET_LOAD_ATTR_SLOT, &&TARGET_LOAD_ATTR_MODULE, &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_LOAD_GLOBAL_MODULE, - &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_STORE_SUBSCR, &&TARGET_DELETE_SUBSCR, + &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_LOAD_METHOD_ADAPTIVE, &&TARGET_LOAD_METHOD_CACHED, &&TARGET_LOAD_METHOD_CLASS, &&TARGET_LOAD_METHOD_MODULE, &&TARGET_LOAD_METHOD_NO_DICT, - &&TARGET_STORE_ATTR_ADAPTIVE, &&TARGET_GET_ITER, &&TARGET_GET_YIELD_FROM_ITER, &&TARGET_PRINT_EXPR, @@ -74,20 +74,20 @@ static void *opcode_targets[256] = { &&TARGET_YIELD_FROM, &&TARGET_GET_AWAITABLE, &&TARGET_LOAD_ASSERTION_ERROR, + &&TARGET_STORE_ATTR_ADAPTIVE, &&TARGET_STORE_ATTR_INSTANCE_VALUE, &&TARGET_STORE_ATTR_SLOT, &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_STORE_FAST__LOAD_FAST, &&TARGET_LOAD_FAST__LOAD_CONST, - &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, &&TARGET_YIELD_VALUE, + &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_STORE_FAST__STORE_FAST, - &&_unknown_opcode, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, diff --git a/Python/specialize.c b/Python/specialize.c index 1a073443656eda..ad003bea30313e 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -144,6 +144,11 @@ _Py_GetSpecializationStats(void) { static void print_stats(FILE *out, SpecializationStats *stats, const char *name) { + long total = (stats->specialization_failure + stats->specialization_success + + stats->hit + stats->deferred + stats->miss + + stats->deopt + stats->unquickened); + double ratio = (double)stats->hit / (double)(total) * 100.0; + fprintf(out, " Hit percentage: %.2f%%\n", ratio); PRINT_STAT(name, specialization_success); PRINT_STAT(name, specialization_failure); PRINT_STAT(name, hit); @@ -491,6 +496,9 @@ initial_counter_value(void) { #define SPEC_FAIL_BAD_CALL_FLAGS 17 #define SPEC_FAIL_CLASS 18 +/* COMPARE_OP */ +#define SPEC_FAIL_STRING_COMPARE 13 +#define SPEC_FAIL_STRING_UNREADY 14 static int specialize_module_load_attr( @@ -1544,7 +1552,6 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, adaptive->counter = initial_counter_value(); } - void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) @@ -1562,6 +1569,21 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, *instr = _Py_MAKECODEUNIT(COMPARE_OP_INT, _Py_OPARG(*instr)); goto success; } + else if (PyUnicode_CheckExact(lhs)) { + int op = adaptive->original_oparg; + if (op != Py_EQ && op != Py_NE) { + SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_STRING_COMPARE); + goto failure; + } + else if (!PyUnicode_IS_READY(lhs) || !PyUnicode_IS_READY(rhs)) { + SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_STRING_UNREADY); + goto failure; + } + else { + *instr = _Py_MAKECODEUNIT(COMPARE_OP_STR, _Py_OPARG(*instr)); + goto success; + } + } else { SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_OTHER); goto failure; From 26cc70c56e26ecff640e1903c8d5fa523c87503c Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 23 Nov 2021 21:01:57 +0000 Subject: [PATCH 03/10] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Core and Builtins/2021-11-23-21-01-56.bpo-45885.3IxeCX.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-11-23-21-01-56.bpo-45885.3IxeCX.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-11-23-21-01-56.bpo-45885.3IxeCX.rst b/Misc/NEWS.d/next/Core and Builtins/2021-11-23-21-01-56.bpo-45885.3IxeCX.rst new file mode 100644 index 00000000000000..316daf966f149c --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-11-23-21-01-56.bpo-45885.3IxeCX.rst @@ -0,0 +1 @@ +Specialized the ``COMPARE_OP`` opcode using the PEP 659 machinery. \ No newline at end of file From 6bfe99a15fa6ed57ae145967c14ee7a1c26007cc Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Sun, 28 Nov 2021 10:34:05 -0500 Subject: [PATCH 04/10] Use 3 combined instructions --- Include/internal/pycore_code.h | 5 ++ Include/internal/pycore_long.h | 2 +- Include/opcode.h | 6 +- Lib/opcode.py | 6 +- Objects/longobject.c | 25 +++------ Python/ceval.c | 100 ++++++++++++++++++++------------- Python/opcode_targets.h | 6 +- Python/specialize.c | 53 +++++++++++------ 8 files changed, 118 insertions(+), 85 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index e5d410281ee02d..743638645e3580 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -42,6 +42,10 @@ typedef struct { uint16_t defaults_len; } _PyCallCache; +typedef struct { + int mask; +} _PyCompareCache; + /* Add specialized versions of entries to this union. * * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8 @@ -59,6 +63,7 @@ typedef union { _PyLoadGlobalCache load_global; _PyObjectCache obj; _PyCallCache call; + _PyCompareCache compare; } SpecializedCacheEntry; #define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT)) diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 990aaa32ebf50b..db0bf65b8788c9 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -24,7 +24,7 @@ static inline PyObject* _PyLong_GetOne(void) PyObject *_PyLong_Add(PyLongObject *left, PyLongObject *right); PyObject *_PyLong_Multiply(PyLongObject *left, PyLongObject *right); PyObject *_PyLong_Subtract(PyLongObject *left, PyLongObject *right); -PyObject *_PyLong_RichCompare(PyLongObject *left, PyLongObject *right, int op); +Py_ssize_t _PyLong_RichCompare(PyLongObject *left, PyLongObject *right); /* Used by Python/mystrtoul.c, _PyBytes_FromHex(), _PyBytes_DecodeEscape(), etc. */ diff --git a/Include/opcode.h b/Include/opcode.h index 242324c1e808e0..28cae207d9ccd5 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -121,9 +121,9 @@ extern "C" { #define BINARY_OP_SUBTRACT_INT 19 #define BINARY_OP_SUBTRACT_FLOAT 20 #define COMPARE_OP_ADAPTIVE 21 -#define COMPARE_OP_FLOAT 22 -#define COMPARE_OP_INT 23 -#define COMPARE_OP_STR 24 +#define COMPARE_OP_FLOAT_JUMP 22 +#define COMPARE_OP_INT_JUMP 23 +#define COMPARE_OP_STR_JUMP 24 #define BINARY_SUBSCR_ADAPTIVE 26 #define BINARY_SUBSCR_GETITEM 27 #define BINARY_SUBSCR_LIST_INT 28 diff --git a/Lib/opcode.py b/Lib/opcode.py index 43db834105d52e..063b5705645948 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -234,9 +234,9 @@ def jabs_op(name, op): "BINARY_OP_SUBTRACT_INT", "BINARY_OP_SUBTRACT_FLOAT", "COMPARE_OP_ADAPTIVE", - "COMPARE_OP_FLOAT", - "COMPARE_OP_INT", - "COMPARE_OP_STR", + "COMPARE_OP_FLOAT_JUMP", + "COMPARE_OP_INT_JUMP", + "COMPARE_OP_STR_JUMP", "BINARY_SUBSCR_ADAPTIVE", "BINARY_SUBSCR_GETITEM", "BINARY_SUBSCR_LIST_INT", diff --git a/Objects/longobject.c b/Objects/longobject.c index 096bcdf0eb836b..7f2e743fd245c8 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -2956,35 +2956,24 @@ long_compare(PyLongObject *a, PyLongObject *b) return sign; } -PyObject * -_PyLong_RichCompare(PyLongObject *left, PyLongObject *right, int op) +Py_ssize_t +_PyLong_RichCompare(PyLongObject *left, PyLongObject *right) { - Py_ssize_t diff; if (left == right) { - diff = 0; + return 0; } else { - diff = long_compare(left, right); + return long_compare(left, right); } - int cmp; - switch (op) { - case Py_LT: cmp = (diff < 0); break; - case Py_LE: cmp = (diff <= 0); break; - case Py_EQ: cmp = (diff == 0); break; - case Py_NE: cmp = (diff != 0); break; - case Py_GT: cmp = (diff > 0); break; - case Py_GE: cmp = (diff >= 0); break; - default: Py_UNREACHABLE(); - } - return PyBool_FromLong(cmp); } static PyObject * long_richcompare(PyObject *self, PyObject *other, int op) { CHECK_BINOP(self, other); - return _PyLong_RichCompare((PyLongObject *)self, - (PyLongObject *)other, op); + Py_ssize_t result = _PyLong_RichCompare((PyLongObject *)self, + (PyLongObject *)other); + Py_RETURN_RICHCOMPARE(result, 0, op); } static Py_hash_t diff --git a/Python/ceval.c b/Python/ceval.c index bfa67beeca8924..077a64bac44f40 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3705,64 +3705,77 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr } } - TARGET(COMPARE_OP_FLOAT) { + TARGET(COMPARE_OP_FLOAT_JUMP) { + assert(cframe.use_tracing == 0); + // Combined: COMPARE_OP (float ? float) + POP_JUMP_IF_(true/false) SpecializedCacheEntry *caches = GET_CACHE(); _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + int mask = caches[-1].compare.mask; PyObject *right = TOP(); PyObject *left = SECOND(); DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP); DEOPT_IF(!PyFloat_CheckExact(right), COMPARE_OP); - STAT_INC(COMPARE_OP, hit); double dleft = PyFloat_AS_DOUBLE(left); double dright = PyFloat_AS_DOUBLE(right); - int cmp; - switch (cache0->original_oparg) { - case Py_LT: cmp = (dleft < dright); break; - case Py_LE: cmp = (dleft <= dright); break; - case Py_EQ: cmp = (dleft == dright); break; - case Py_NE: cmp = (dleft != dright); break; - case Py_GT: cmp = (dleft > dright); break; - case Py_GE: cmp = (dleft >= dright); break; - default: Py_UNREACHABLE(); - } - // This cannot fail - PyObject *res = PyBool_FromLong(cmp); - assert(!PyErr_Occurred()); - SET_SECOND(res); - STACK_SHRINK(1); + DEOPT_IF(isnan(dleft), COMPARE_OP); + DEOPT_IF(isnan(dright), COMPARE_OP); + STAT_INC(COMPARE_OP, hit); + NEXTOPARG(); + STACK_SHRINK(2); Py_DECREF(left); Py_DECREF(right); - PREDICT(POP_JUMP_IF_FALSE); - PREDICT(POP_JUMP_IF_TRUE); - DISPATCH(); + assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE); + int sign = (dleft > dright) - (dleft < dright); + int jump = (1 << (sign + 1)) & mask; + if (!jump) { + next_instr++; + NOTRACE_DISPATCH(); + } + else { + JUMPTO(oparg); + CHECK_EVAL_BREAKER(); + NOTRACE_DISPATCH(); + } } - TARGET(COMPARE_OP_INT) { + TARGET(COMPARE_OP_INT_JUMP) { + assert(cframe.use_tracing == 0); + // Combined: COMPARE_OP (int ? int) + POP_JUMP_IF_(true/false) SpecializedCacheEntry *caches = GET_CACHE(); _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + int mask = caches[-1].compare.mask; PyObject *right = TOP(); PyObject *left = SECOND(); DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP); DEOPT_IF(!PyLong_CheckExact(right), COMPARE_OP); STAT_INC(COMPARE_OP, hit); // This cannot fail. - PyObject *res = _PyLong_RichCompare( - (PyLongObject *)left, (PyLongObject *)right, - cache0->original_oparg); - assert(res != NULL); - assert(!PyErr_Occurred()); - SET_SECOND(res); - STACK_SHRINK(1); + Py_ssize_t cmp = _PyLong_RichCompare((PyLongObject *)left, + (PyLongObject *)right); + NEXTOPARG(); + STACK_SHRINK(2); Py_DECREF(left); Py_DECREF(right); - PREDICT(POP_JUMP_IF_FALSE); - PREDICT(POP_JUMP_IF_TRUE); - DISPATCH(); + assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE); + int sign = (cmp > 0) - (cmp < 0); + int jump = (1 << (sign + 1)) & mask; + if (!jump) { + next_instr++; + NOTRACE_DISPATCH(); + } + else { + JUMPTO(oparg); + CHECK_EVAL_BREAKER(); + NOTRACE_DISPATCH(); + } } - TARGET(COMPARE_OP_STR) { + TARGET(COMPARE_OP_STR_JUMP) { + assert(cframe.use_tracing == 0); + // Combined: COMPARE_OP (str == str or str != str) + POP_JUMP_IF_(true/false) SpecializedCacheEntry *caches = GET_CACHE(); _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + int mask = caches[-1].compare.mask; PyObject *right = TOP(); PyObject *left = SECOND(); DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP); @@ -3771,17 +3784,24 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DEOPT_IF(!PyUnicode_IS_READY(right), COMPARE_OP); STAT_INC(COMPARE_OP, hit); assert(cache0->original_oparg == Py_EQ || cache0->original_oparg == Py_NE); + NEXTOPARG(); + assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE); int cmp = Py_Is(left, right) || _PyUnicode_EQ(left, right); - cmp ^= (cache0->original_oparg == Py_NE); - PyObject *res = PyBool_FromLong(cmp); - assert(!PyErr_Occurred()); - SET_SECOND(res); - STACK_SHRINK(1); + STACK_SHRINK(2); Py_DECREF(left); Py_DECREF(right); - PREDICT(POP_JUMP_IF_FALSE); - PREDICT(POP_JUMP_IF_TRUE); - DISPATCH(); + assert(cmp == 0 || cmp == 1); + assert(mask == 0 || mask == 1); + int jump = cmp ^ mask; + if (!jump) { + next_instr++; + NOTRACE_DISPATCH(); + } + else { + JUMPTO(oparg); + CHECK_EVAL_BREAKER(); + NOTRACE_DISPATCH(); + } } TARGET(IS_OP) { diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 6d802c273f41d7..c79c33be57e179 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -21,9 +21,9 @@ static void *opcode_targets[256] = { &&TARGET_BINARY_OP_SUBTRACT_INT, &&TARGET_BINARY_OP_SUBTRACT_FLOAT, &&TARGET_COMPARE_OP_ADAPTIVE, - &&TARGET_COMPARE_OP_FLOAT, - &&TARGET_COMPARE_OP_INT, - &&TARGET_COMPARE_OP_STR, + &&TARGET_COMPARE_OP_FLOAT_JUMP, + &&TARGET_COMPARE_OP_INT_JUMP, + &&TARGET_COMPARE_OP_STR_JUMP, &&TARGET_BINARY_SUBSCR, &&TARGET_BINARY_SUBSCR_ADAPTIVE, &&TARGET_BINARY_SUBSCR_GETITEM, diff --git a/Python/specialize.c b/Python/specialize.c index ad003bea30313e..b27519eb2d8c5d 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -144,11 +144,6 @@ _Py_GetSpecializationStats(void) { static void print_stats(FILE *out, SpecializationStats *stats, const char *name) { - long total = (stats->specialization_failure + stats->specialization_success - + stats->hit + stats->deferred + stats->miss - + stats->deopt + stats->unquickened); - double ratio = (double)stats->hit / (double)(total) * 100.0; - fprintf(out, " Hit percentage: %.2f%%\n", ratio); PRINT_STAT(name, specialization_success); PRINT_STAT(name, specialization_failure); PRINT_STAT(name, hit); @@ -259,7 +254,7 @@ static uint8_t cache_requirements[256] = { [CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ [BINARY_OP] = 1, // _PyAdaptiveEntry - [COMPARE_OP] = 1, // _PyAdaptiveEntry + [COMPARE_OP] = 2, /* _PyAdaptiveEntry and _PyCompareCache */ }; /* Return the oparg for the cache_offset and instruction index. @@ -499,6 +494,7 @@ initial_counter_value(void) { /* COMPARE_OP */ #define SPEC_FAIL_STRING_COMPARE 13 #define SPEC_FAIL_STRING_UNREADY 14 +#define SPEC_FAIL_NOT_FOLLOWED_BY_COND_JUMP 15 static int specialize_module_load_attr( @@ -1552,25 +1548,50 @@ _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, adaptive->counter = initial_counter_value(); } +static int compare_masks[] = { + // 1-bit: jump if less than + // 2-bit: jump if equal + // 4-bit: jump if greater + [Py_LT] = 1 | 0 | 0, + [Py_LE] = 1 | 2 | 0, + [Py_EQ] = 0 | 2 | 0, + [Py_NE] = 1 | 0 | 4, + [Py_GT] = 0 | 0 | 4, + [Py_GE] = 0 | 2 | 4, +}; + void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) { _PyAdaptiveEntry *adaptive = &cache->adaptive; + _PyCompareCache *cache1 = &cache[-1].compare; + int op = adaptive->original_oparg; + int next_opcode = _Py_OPCODE(instr[1]); + if (next_opcode != POP_JUMP_IF_FALSE && next_opcode != POP_JUMP_IF_TRUE) { + SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_NOT_FOLLOWED_BY_COND_JUMP); + goto failure; + } + assert(op <= Py_GE); + int mask = compare_masks[op]; + if (next_opcode == POP_JUMP_IF_FALSE) { + mask = (1 | 2 | 4) & ~mask; + } if (Py_TYPE(lhs) != Py_TYPE(rhs)) { SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_DIFFERENT_TYPES); goto failure; } - else if (PyFloat_CheckExact(lhs)) { - *instr = _Py_MAKECODEUNIT(COMPARE_OP_FLOAT, _Py_OPARG(*instr)); + if (PyFloat_CheckExact(lhs)) { + *instr = _Py_MAKECODEUNIT(COMPARE_OP_FLOAT_JUMP, _Py_OPARG(*instr)); + cache1->mask = mask; goto success; } - else if (PyLong_CheckExact(lhs)) { - *instr = _Py_MAKECODEUNIT(COMPARE_OP_INT, _Py_OPARG(*instr)); + if (PyLong_CheckExact(lhs)) { + *instr = _Py_MAKECODEUNIT(COMPARE_OP_INT_JUMP, _Py_OPARG(*instr)); + cache1->mask = mask; goto success; } - else if (PyUnicode_CheckExact(lhs)) { - int op = adaptive->original_oparg; + if (PyUnicode_CheckExact(lhs)) { if (op != Py_EQ && op != Py_NE) { SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_STRING_COMPARE); goto failure; @@ -1580,14 +1601,12 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, goto failure; } else { - *instr = _Py_MAKECODEUNIT(COMPARE_OP_STR, _Py_OPARG(*instr)); + *instr = _Py_MAKECODEUNIT(COMPARE_OP_STR_JUMP, _Py_OPARG(*instr)); + cache1->mask = (mask & 2) == 0; goto success; } } - else { - SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_OTHER); - goto failure; - } + SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_OTHER); failure: STAT_INC(COMPARE_OP, specialization_failure); cache_backoff(adaptive); From a0f558e5c1dec93630e29cbd44d0eb24426fad07 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Sun, 28 Nov 2021 14:23:56 -0500 Subject: [PATCH 05/10] remove unused variables --- Python/ceval.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 077a64bac44f40..9c4803bf803be4 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3709,7 +3709,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr assert(cframe.use_tracing == 0); // Combined: COMPARE_OP (float ? float) + POP_JUMP_IF_(true/false) SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; int mask = caches[-1].compare.mask; PyObject *right = TOP(); PyObject *left = SECOND(); @@ -3742,7 +3741,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr assert(cframe.use_tracing == 0); // Combined: COMPARE_OP (int ? int) + POP_JUMP_IF_(true/false) SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; int mask = caches[-1].compare.mask; PyObject *right = TOP(); PyObject *left = SECOND(); @@ -3774,7 +3772,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr assert(cframe.use_tracing == 0); // Combined: COMPARE_OP (str == str or str != str) + POP_JUMP_IF_(true/false) SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; int mask = caches[-1].compare.mask; PyObject *right = TOP(); PyObject *left = SECOND(); @@ -3783,7 +3780,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DEOPT_IF(!PyUnicode_IS_READY(left), COMPARE_OP); DEOPT_IF(!PyUnicode_IS_READY(right), COMPARE_OP); STAT_INC(COMPARE_OP, hit); - assert(cache0->original_oparg == Py_EQ || cache0->original_oparg == Py_NE); + assert(caches[0].adaptive.original_oparg == Py_EQ || + caches[0].adaptive.original_oparg == Py_NE); NEXTOPARG(); assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE); int cmp = Py_Is(left, right) || _PyUnicode_EQ(left, right); From 124693c5c28ae82f296803ed8dc1f4f7e5be3ca1 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Wed, 1 Dec 2021 22:05:19 -0500 Subject: [PATCH 06/10] Remove adaptivity when specialization is impossible --- Python/specialize.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/specialize.c b/Python/specialize.c index b27519eb2d8c5d..51a2a48fea9650 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1569,7 +1569,9 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, int op = adaptive->original_oparg; int next_opcode = _Py_OPCODE(instr[1]); if (next_opcode != POP_JUMP_IF_FALSE && next_opcode != POP_JUMP_IF_TRUE) { + // Can't ever combine, so don't don't bother being adaptive. SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_NOT_FOLLOWED_BY_COND_JUMP); + *instr = _Py_MAKECODEUNIT(COMPARE_OP, adaptive->original_oparg); goto failure; } assert(op <= Py_GE); From 91fc62ab63ff1e06d0327504ca80895091110a02 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Thu, 2 Dec 2021 01:34:59 -0500 Subject: [PATCH 07/10] Only consider Py_ABS(Py_SIZE(int)) <= 1 --- Include/internal/pycore_long.h | 1 - Objects/longobject.c | 18 +++++------------- Python/ceval.c | 12 +++++++----- Python/specialize.c | 13 ++++++++++--- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index db0bf65b8788c9..b9f926996d8107 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -24,7 +24,6 @@ static inline PyObject* _PyLong_GetOne(void) PyObject *_PyLong_Add(PyLongObject *left, PyLongObject *right); PyObject *_PyLong_Multiply(PyLongObject *left, PyLongObject *right); PyObject *_PyLong_Subtract(PyLongObject *left, PyLongObject *right); -Py_ssize_t _PyLong_RichCompare(PyLongObject *left, PyLongObject *right); /* Used by Python/mystrtoul.c, _PyBytes_FromHex(), _PyBytes_DecodeEscape(), etc. */ diff --git a/Objects/longobject.c b/Objects/longobject.c index 7f2e743fd245c8..ce4f0d72540d38 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -2956,23 +2956,15 @@ long_compare(PyLongObject *a, PyLongObject *b) return sign; } -Py_ssize_t -_PyLong_RichCompare(PyLongObject *left, PyLongObject *right) -{ - if (left == right) { - return 0; - } - else { - return long_compare(left, right); - } -} - static PyObject * long_richcompare(PyObject *self, PyObject *other, int op) { + Py_ssize_t result; CHECK_BINOP(self, other); - Py_ssize_t result = _PyLong_RichCompare((PyLongObject *)self, - (PyLongObject *)other); + if (self == other) + result = 0; + else + result = long_compare((PyLongObject*)self, (PyLongObject*)other); Py_RETURN_RICHCOMPARE(result, 0, op); } diff --git a/Python/ceval.c b/Python/ceval.c index 9c4803bf803be4..c8d5b1bccd9878 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3716,6 +3716,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DEOPT_IF(!PyFloat_CheckExact(right), COMPARE_OP); double dleft = PyFloat_AS_DOUBLE(left); double dright = PyFloat_AS_DOUBLE(right); + int sign = (dleft > dright) - (dleft < dright); DEOPT_IF(isnan(dleft), COMPARE_OP); DEOPT_IF(isnan(dright), COMPARE_OP); STAT_INC(COMPARE_OP, hit); @@ -3724,7 +3725,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr Py_DECREF(left); Py_DECREF(right); assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE); - int sign = (dleft > dright) - (dleft < dright); int jump = (1 << (sign + 1)) & mask; if (!jump) { next_instr++; @@ -3746,16 +3746,18 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr PyObject *left = SECOND(); DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP); DEOPT_IF(!PyLong_CheckExact(right), COMPARE_OP); + DEOPT_IF((size_t)(Py_SIZE(left) + 1) > 2, COMPARE_OP); + DEOPT_IF((size_t)(Py_SIZE(right) + 1) > 2, COMPARE_OP); STAT_INC(COMPARE_OP, hit); - // This cannot fail. - Py_ssize_t cmp = _PyLong_RichCompare((PyLongObject *)left, - (PyLongObject *)right); + assert(Py_ABS(Py_SIZE(left)) <= 1 && Py_ABS(Py_SIZE(right)) <= 1); + Py_ssize_t ileft = Py_SIZE(left) * ((PyLongObject *)left)->ob_digit[0]; + Py_ssize_t iright = Py_SIZE(right) * ((PyLongObject *)right)->ob_digit[0]; + int sign = (ileft > iright) - (ileft < iright); NEXTOPARG(); STACK_SHRINK(2); Py_DECREF(left); Py_DECREF(right); assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE); - int sign = (cmp > 0) - (cmp < 0); int jump = (1 << (sign + 1)) & mask; if (!jump) { next_instr++; diff --git a/Python/specialize.c b/Python/specialize.c index 51a2a48fea9650..c913370fdb033b 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -495,6 +495,7 @@ initial_counter_value(void) { #define SPEC_FAIL_STRING_COMPARE 13 #define SPEC_FAIL_STRING_UNREADY 14 #define SPEC_FAIL_NOT_FOLLOWED_BY_COND_JUMP 15 +#define SPEC_FAIL_BIG_INT 16 static int specialize_module_load_attr( @@ -1589,9 +1590,15 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, goto success; } if (PyLong_CheckExact(lhs)) { - *instr = _Py_MAKECODEUNIT(COMPARE_OP_INT_JUMP, _Py_OPARG(*instr)); - cache1->mask = mask; - goto success; + if (Py_ABS(Py_SIZE(lhs)) <= 1 && Py_ABS(Py_SIZE(rhs)) <= 1) { + *instr = _Py_MAKECODEUNIT(COMPARE_OP_INT_JUMP, _Py_OPARG(*instr)); + cache1->mask = mask; + goto success; + } + else { + SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_BIG_INT); + goto failure; + } } if (PyUnicode_CheckExact(lhs)) { if (op != Py_EQ && op != Py_NE) { From 24c4def1b9e889bb339ea1c5d84bfadcb454e569 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Thu, 2 Dec 2021 15:01:42 -0500 Subject: [PATCH 08/10] Pack the mask into _PyAdaptiveEntry.index --- Include/internal/pycore_code.h | 4 ---- Python/ceval.c | 14 +++++++------- Python/specialize.c | 13 ++++++------- 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 743638645e3580..4bd33a0148bfb9 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -42,9 +42,6 @@ typedef struct { uint16_t defaults_len; } _PyCallCache; -typedef struct { - int mask; -} _PyCompareCache; /* Add specialized versions of entries to this union. * @@ -63,7 +60,6 @@ typedef union { _PyLoadGlobalCache load_global; _PyObjectCache obj; _PyCallCache call; - _PyCompareCache compare; } SpecializedCacheEntry; #define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT)) diff --git a/Python/ceval.c b/Python/ceval.c index c8d5b1bccd9878..32427d1909833b 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3709,7 +3709,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr assert(cframe.use_tracing == 0); // Combined: COMPARE_OP (float ? float) + POP_JUMP_IF_(true/false) SpecializedCacheEntry *caches = GET_CACHE(); - int mask = caches[-1].compare.mask; + int when_to_jump_mask = caches[0].adaptive.index; PyObject *right = TOP(); PyObject *left = SECOND(); DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP); @@ -3725,7 +3725,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr Py_DECREF(left); Py_DECREF(right); assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE); - int jump = (1 << (sign + 1)) & mask; + int jump = (1 << (sign + 1)) & when_to_jump_mask; if (!jump) { next_instr++; NOTRACE_DISPATCH(); @@ -3741,7 +3741,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr assert(cframe.use_tracing == 0); // Combined: COMPARE_OP (int ? int) + POP_JUMP_IF_(true/false) SpecializedCacheEntry *caches = GET_CACHE(); - int mask = caches[-1].compare.mask; + int when_to_jump_mask = caches[0].adaptive.index; PyObject *right = TOP(); PyObject *left = SECOND(); DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP); @@ -3758,7 +3758,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr Py_DECREF(left); Py_DECREF(right); assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE); - int jump = (1 << (sign + 1)) & mask; + int jump = (1 << (sign + 1)) & when_to_jump_mask; if (!jump) { next_instr++; NOTRACE_DISPATCH(); @@ -3774,7 +3774,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr assert(cframe.use_tracing == 0); // Combined: COMPARE_OP (str == str or str != str) + POP_JUMP_IF_(true/false) SpecializedCacheEntry *caches = GET_CACHE(); - int mask = caches[-1].compare.mask; + int invert = caches[0].adaptive.index; PyObject *right = TOP(); PyObject *left = SECOND(); DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP); @@ -3791,8 +3791,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr Py_DECREF(left); Py_DECREF(right); assert(cmp == 0 || cmp == 1); - assert(mask == 0 || mask == 1); - int jump = cmp ^ mask; + assert(invert == 0 || invert == 1); + int jump = cmp ^ invert; if (!jump) { next_instr++; NOTRACE_DISPATCH(); diff --git a/Python/specialize.c b/Python/specialize.c index c913370fdb033b..bbd9bc395c5287 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -254,7 +254,7 @@ static uint8_t cache_requirements[256] = { [CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ [BINARY_OP] = 1, // _PyAdaptiveEntry - [COMPARE_OP] = 2, /* _PyAdaptiveEntry and _PyCompareCache */ + [COMPARE_OP] = 1, /* _PyAdaptiveEntry */ }; /* Return the oparg for the cache_offset and instruction index. @@ -1566,7 +1566,6 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) { _PyAdaptiveEntry *adaptive = &cache->adaptive; - _PyCompareCache *cache1 = &cache[-1].compare; int op = adaptive->original_oparg; int next_opcode = _Py_OPCODE(instr[1]); if (next_opcode != POP_JUMP_IF_FALSE && next_opcode != POP_JUMP_IF_TRUE) { @@ -1576,9 +1575,9 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, goto failure; } assert(op <= Py_GE); - int mask = compare_masks[op]; + int when_to_jump_mask = compare_masks[op]; if (next_opcode == POP_JUMP_IF_FALSE) { - mask = (1 | 2 | 4) & ~mask; + when_to_jump_mask = (1 | 2 | 4) & ~when_to_jump_mask; } if (Py_TYPE(lhs) != Py_TYPE(rhs)) { SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_DIFFERENT_TYPES); @@ -1586,13 +1585,13 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, } if (PyFloat_CheckExact(lhs)) { *instr = _Py_MAKECODEUNIT(COMPARE_OP_FLOAT_JUMP, _Py_OPARG(*instr)); - cache1->mask = mask; + adaptive->index = when_to_jump_mask; goto success; } if (PyLong_CheckExact(lhs)) { if (Py_ABS(Py_SIZE(lhs)) <= 1 && Py_ABS(Py_SIZE(rhs)) <= 1) { *instr = _Py_MAKECODEUNIT(COMPARE_OP_INT_JUMP, _Py_OPARG(*instr)); - cache1->mask = mask; + adaptive->index = when_to_jump_mask; goto success; } else { @@ -1611,7 +1610,7 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, } else { *instr = _Py_MAKECODEUNIT(COMPARE_OP_STR_JUMP, _Py_OPARG(*instr)); - cache1->mask = (mask & 2) == 0; + adaptive->index = (when_to_jump_mask & 2) == 0; goto success; } } From 24ba945bb44251ee682e4621baa95fc72e91b3b5 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Thu, 2 Dec 2021 16:12:25 -0500 Subject: [PATCH 09/10] Introduce and use _PyUnicode_Equal --- Include/cpython/unicodeobject.h | 3 +++ Objects/unicodeobject.c | 14 ++++++++++++++ Python/ceval.c | 11 ++++++----- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index ab4aebf5e70b93..e02137c7cad7d6 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -1016,6 +1016,9 @@ PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*); and where the hash values are equal (i.e. a very probable match) */ PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *); +/* Equality check. Returns -1 on failure. */ +PyAPI_FUNC(int) _PyUnicode_Equal(PyObject *, PyObject *); + PyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject *, void *); PyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 61fc34d71da3ce..532c48ad4d4aad 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11168,6 +11168,20 @@ unicode_compare_eq(PyObject *str1, PyObject *str2) return (cmp == 0); } +int +_PyUnicode_Equal(PyObject *str1, PyObject *str2) +{ + assert(PyUnicode_CheckExact(str1)); + assert(PyUnicode_CheckExact(str2)); + if (str1 == str2) { + return 1; + } + if (PyUnicode_READY(str1) || PyUnicode_READY(str2)) { + return -1; + } + return unicode_compare_eq(str1, str2); +} + int PyUnicode_Compare(PyObject *left, PyObject *right) diff --git a/Python/ceval.c b/Python/ceval.c index 32427d1909833b..7a54e78a457a3f 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3779,20 +3779,21 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr PyObject *left = SECOND(); DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP); DEOPT_IF(!PyUnicode_CheckExact(right), COMPARE_OP); - DEOPT_IF(!PyUnicode_IS_READY(left), COMPARE_OP); - DEOPT_IF(!PyUnicode_IS_READY(right), COMPARE_OP); STAT_INC(COMPARE_OP, hit); + int res = _PyUnicode_Equal(left, right); + if (res < 0) { + goto error; + } assert(caches[0].adaptive.original_oparg == Py_EQ || caches[0].adaptive.original_oparg == Py_NE); NEXTOPARG(); assert(opcode == POP_JUMP_IF_TRUE || opcode == POP_JUMP_IF_FALSE); - int cmp = Py_Is(left, right) || _PyUnicode_EQ(left, right); STACK_SHRINK(2); Py_DECREF(left); Py_DECREF(right); - assert(cmp == 0 || cmp == 1); + assert(res == 0 || res == 1); assert(invert == 0 || invert == 1); - int jump = cmp ^ invert; + int jump = res ^ invert; if (!jump) { next_instr++; NOTRACE_DISPATCH(); From bb979ded334533106ae9e1fa89d3a17fbd246c2e Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Thu, 2 Dec 2021 16:23:00 -0500 Subject: [PATCH 10/10] Don't check for readiness in specialization --- Python/specialize.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index bbd9bc395c5287..7d88cf3dc1a19f 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -493,9 +493,8 @@ initial_counter_value(void) { /* COMPARE_OP */ #define SPEC_FAIL_STRING_COMPARE 13 -#define SPEC_FAIL_STRING_UNREADY 14 -#define SPEC_FAIL_NOT_FOLLOWED_BY_COND_JUMP 15 -#define SPEC_FAIL_BIG_INT 16 +#define SPEC_FAIL_NOT_FOLLOWED_BY_COND_JUMP 14 +#define SPEC_FAIL_BIG_INT 15 static int specialize_module_load_attr( @@ -1604,10 +1603,6 @@ _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_STRING_COMPARE); goto failure; } - else if (!PyUnicode_IS_READY(lhs) || !PyUnicode_IS_READY(rhs)) { - SPECIALIZATION_FAIL(COMPARE_OP, SPEC_FAIL_STRING_UNREADY); - goto failure; - } else { *instr = _Py_MAKECODEUNIT(COMPARE_OP_STR_JUMP, _Py_OPARG(*instr)); adaptive->index = (when_to_jump_mask & 2) == 0;