Skip to content

Commit 3044375

Browse files
committed
Call lazy constants using 'mini-subroutines'
This looks to be twice as fast as classic marshal+exec!
1 parent a21f509 commit 3044375

File tree

2 files changed

+107
-72
lines changed

2 files changed

+107
-72
lines changed

Lib/test/test_new_pyc.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ def test_function(self):
7373
dis.dis(f, depth=0)
7474
# breakpoint()
7575
assert f(1, 10) == 11
76+
assert f(a=1, b=10) == 11
7677

7778
def test_speed(self):
7879
body = " a, b = b, a\n"*100
@@ -81,28 +82,33 @@ def test_speed(self):
8182
for num in range(100)
8283
]
8384
source = "\n\n".join(functions)
85+
print("Starting speed test")
8486

8587
def helper(data, label):
8688
t0 = time.time()
8789
codes = []
88-
for _ in range(1000):
90+
for _ in range(5000):
8991
code = marshal.loads(data)
9092
codes.append(code)
9193
t1 = time.time()
9294
print(f"{label} load: {t1-t0:.3f}")
93-
t0 = time.time()
95+
t2 = time.time()
9496
for code in codes:
9597
exec(code, {})
96-
t1 = time.time()
97-
print(f"{label} exec: {t1-t0:.3f}")
98+
t3 = time.time()
99+
print(f"{label} exec: {t3-t2:.3f}")
100+
print(f" {label} total: {t3-t0:.3f}")
101+
return t3 - t0
98102

99103
code = compile(source, "<old>", "exec")
100104
data = marshal.dumps(code)
101-
helper(data, "Classic")
105+
tc = helper(data, "Classic")
102106

103107
data = pyco.serialize_source(source, "<new>")
104108
assert data.startswith(b"PYC.")
105-
helper(data, "New PYC")
109+
tn = helper(data, "New PYC")
110+
if tc and tn:
111+
print(f"Classic-to-new ratio: {tc/tn:.2f} (new is {100*(tc/tn-1):.0f}% faster)")
106112

107113

108114
if __name__ == "__main__":

Python/ceval.c

Lines changed: 95 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,65 +1075,54 @@ PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals)
10751075
return _PyEval_Vector(tstate, &desc, locals, NULL, 0, NULL);
10761076
}
10771077

1078-
1079-
/* Constants in PYC files are lazily loaded.
1080-
* For now, just construct a code object for these and eval it.
1081-
* (This is slow, and may defeat the purpose -- but each constant
1082-
* is only evaluated once, and the alternative would be a lot of code
1083-
* for a prototype.)
1084-
*/
1085-
static PyObject *
1086-
call_constant(PyThreadState *tstate, PyCodeObject *code, int oparg, PyObject *globals)
1078+
/* Activation records for LAZY_LOAD_CONSTANT "mini-subroutines" */
1079+
1080+
struct activation_record {
1081+
struct activation_record *prev_act_rec;
1082+
PyObject **prev_stack_bottom;
1083+
PyObject **prev_stack_pointer;
1084+
PyObject **prev_stack_top;
1085+
_Py_CODEUNIT *prev_first_instr;
1086+
_Py_CODEUNIT *prev_next_instr;
1087+
PyObject *stack[1];
1088+
};
1089+
1090+
static struct activation_record *
1091+
new_activation_record(PyThreadState *tstate, int stacksize)
10871092
{
1088-
struct lazy_pyc *pyc = code->co_pyc;
1089-
if (pyc == NULL) {
1090-
_PyErr_SetString(tstate, PyExc_SystemError,
1091-
"call_constant from non-PYC code");
1093+
struct activation_record *new = (struct activation_record *)
1094+
_PyThreadState_PushLocals(tstate, sizeof(struct activation_record) + stacksize - 1);
1095+
if (new == NULL) {
1096+
PyErr_NoMemory();
10921097
return NULL;
10931098
}
1094-
PyObject *result = PyTuple_GetItem(pyc->consts, oparg);
1095-
if (result != NULL) {
1096-
Py_INCREF(result);
1097-
return result;
1098-
}
1099+
new->prev_act_rec = NULL;
1100+
new->prev_stack_bottom = NULL;
1101+
new->prev_stack_pointer = NULL;
1102+
new->prev_stack_top = NULL;
1103+
new->prev_first_instr = NULL;
1104+
new->prev_next_instr = NULL;
1105+
return new;
1106+
}
1107+
1108+
static void
1109+
free_activation_record(PyThreadState *tstate, struct activation_record *rec)
1110+
{
1111+
_PyThreadState_PopLocals(tstate, (PyObject **)rec);
1112+
}
1113+
1114+
static void
1115+
get_subroutine_info(PyThreadState *tstate, PyCodeObject *code, int oparg,
1116+
_Py_CODEUNIT **p_instrs, int *p_stacksize)
1117+
{
1118+
struct lazy_pyc *pyc = code->co_pyc;
1119+
assert(pyc);
10991120
uint32_t offset = pyc->const_offsets[oparg];
11001121
uint32_t *pointer = (uint32_t *)lazy_get_pointer(pyc, offset);
11011122
uint32_t stacksize = *pointer++;
11021123
Py_ssize_t n_instrs = *pointer++;
1103-
PyObject *bytecode = PyBytes_FromStringAndSize((char *)pointer, 2*n_instrs);
1104-
if (bytecode == NULL) {
1105-
return NULL;
1106-
}
1107-
PyObject *name = PyUnicode_FromString("<dummy>");
1108-
if (name == NULL) {
1109-
Py_DECREF(name);
1110-
Py_DECREF(bytecode);
1111-
return NULL;
1112-
}
1113-
Py_INCREF(name); // For filename, right?
1114-
struct _PyCodeConstructor con = {
1115-
.name = name,
1116-
.filename = name,
1117-
.stacksize = stacksize,
1118-
.pyc = pyc,
1119-
.consts = pyc->consts,
1120-
// TODO: Do we ever need names here? .names = pyc->names,
1121-
};
1122-
PyCodeObject *newcode = _PyCode_New(&con);
1123-
if (newcode == NULL) {
1124-
Py_DECREF(bytecode);
1125-
return NULL;
1126-
}
1127-
newcode->co_code = bytecode;
1128-
newcode->co_firstinstr = (_Py_CODEUNIT *)PyBytes_AsString(bytecode);
1129-
Py_INCREF(pyc->consts);
1130-
newcode->co_consts = pyc->consts;
1131-
Py_INCREF(pyc->names);
1132-
newcode->co_names = pyc->names;
1133-
// unsigned char *cp = pointer;
1134-
result = PyEval_EvalCode((PyObject *)newcode, globals, NULL);
1135-
Py_DECREF(newcode); // TODO: DECREF(bytecode) or not?
1136-
return result;
1124+
*p_instrs = (_Py_CODEUNIT *)pointer;
1125+
*p_stacksize = stacksize;
11371126
}
11381127

11391128

@@ -1399,8 +1388,8 @@ eval_frame_handle_pending(PyThreadState *tstate)
13991388

14001389
/* The stack can grow at most MAXINT deep, as co_nlocals and
14011390
co_stacksize are ints. */
1402-
#define STACK_LEVEL() ((int)(stack_pointer - f->f_valuestack))
1403-
#define EMPTY() (STACK_LEVEL() == 0)
1391+
#define STACK_LEVEL() ((int)(stack_pointer - stack_bottom))
1392+
#define EMPTY() (stack_bottom == stack_pointer)
14041393
#define TOP() (stack_pointer[-1])
14051394
#define SECOND() (stack_pointer[-2])
14061395
#define THIRD() (stack_pointer[-3])
@@ -1417,20 +1406,20 @@ eval_frame_handle_pending(PyThreadState *tstate)
14171406
#ifdef LLTRACE
14181407
#define PUSH(v) { (void)(BASIC_PUSH(v), \
14191408
lltrace && prtrace(tstate, TOP(), "push")); \
1420-
assert(STACK_LEVEL() <= co->co_stacksize); }
1409+
assert(stack_pointer <= stack_top); }
14211410
#define POP() ((void)(lltrace && prtrace(tstate, TOP(), "pop")), \
14221411
BASIC_POP())
14231412
#define STACK_GROW(n) do { \
14241413
assert(n >= 0); \
14251414
(void)(BASIC_STACKADJ(n), \
14261415
lltrace && prtrace(tstate, TOP(), "stackadj")); \
1427-
assert(STACK_LEVEL() <= co->co_stacksize); \
1416+
assert(stack_pointer <= stack_top); \
14281417
} while (0)
14291418
#define STACK_SHRINK(n) do { \
14301419
assert(n >= 0); \
14311420
(void)(lltrace && prtrace(tstate, TOP(), "stackadj")); \
14321421
(void)(BASIC_STACKADJ(-(n))); \
1433-
assert(STACK_LEVEL() <= co->co_stacksize); \
1422+
assert(stack_bottom <= stack_pointer); \
14341423
} while (0)
14351424
#define EXT_POP(STACK_POINTER) ((void)(lltrace && \
14361425
prtrace(tstate, (STACK_POINTER)[-1], "ext_pop")), \
@@ -1483,6 +1472,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
14831472
int lastopcode = 0;
14841473
#endif
14851474
PyObject **stack_pointer; /* Next free slot in value stack */
1475+
PyObject **stack_bottom, **stack_top; /* Extent of stack (DEBUG only) */
14861476
_Py_CODEUNIT *next_instr;
14871477
int opcode; /* Current opcode */
14881478
int oparg; /* Current opcode argument, if any */
@@ -1519,6 +1509,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
15191509
specials = f->f_valuestack - FRAME_SPECIALS_SIZE;
15201510
co = (PyCodeObject *)specials[FRAME_SPECIALS_CODE_OFFSET];
15211511

1512+
/* subroutine activation records for LAZY_LOAD_CONSTANT */
1513+
struct activation_record *current_activation_record = NULL;
1514+
15221515
if (cframe.use_tracing) {
15231516
if (tstate->c_tracefunc != NULL) {
15241517
/* tstate->c_tracefunc, if defined, is a
@@ -1597,7 +1590,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
15971590
*/
15981591
assert(f->f_lasti >= -1);
15991592
next_instr = first_instr + f->f_lasti + 1;
1600-
stack_pointer = f->f_valuestack + f->f_stackdepth;
1593+
stack_bottom = f->f_valuestack;
1594+
stack_pointer = stack_bottom + f->f_stackdepth;
1595+
stack_top = stack_bottom + co->co_stacksize;
16011596
/* Set f->f_stackdepth to -1.
16021597
* Update when returning or calling trace function.
16031598
Having f_stackdepth <= 0 ensures that invalid
@@ -4412,10 +4407,30 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
44124407
case TARGET(LAZY_LOAD_CONSTANT): {
44134408
PyObject *value = GETITEM(consts, oparg);
44144409
if (value == NULL) {
4415-
value = call_constant(tstate, co, oparg, GLOBALS());
4416-
if (value == NULL) {
4410+
_Py_CODEUNIT *instrs;
4411+
int stacksize;
4412+
get_subroutine_info(tstate, co, oparg, &instrs, &stacksize);
4413+
4414+
struct activation_record *rec =
4415+
new_activation_record(tstate, stacksize);
4416+
if (rec == NULL) {
44174417
goto error;
44184418
}
4419+
rec->prev_act_rec = current_activation_record;
4420+
rec->prev_first_instr = first_instr;
4421+
rec->prev_next_instr = next_instr;
4422+
rec->prev_stack_bottom = stack_bottom;
4423+
rec->prev_stack_pointer = stack_pointer;
4424+
rec->prev_stack_top = stack_top;
4425+
4426+
stack_bottom = &rec->stack[0];
4427+
stack_pointer = stack_bottom;
4428+
stack_top = stack_bottom + stacksize;
4429+
first_instr = instrs;
4430+
next_instr = instrs;
4431+
4432+
current_activation_record = rec;
4433+
DISPATCH();
44194434
}
44204435
PUSH(value);
44214436
DISPATCH();
@@ -4524,13 +4539,27 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
45244539
}
45254540

45264541
case TARGET(RETURN_CONSTANT): {
4527-
retval = POP();
4528-
Py_INCREF(retval);
4529-
PyTuple_SET_ITEM(co->co_pyc->consts, oparg, retval);
4542+
PyObject *value = POP();
4543+
PyObject *shared = PyTuple_GET_ITEM(co->co_pyc->consts, oparg);
4544+
if (shared == NULL) {
4545+
PyTuple_SET_ITEM(co->co_pyc->consts, oparg, value);
4546+
}
4547+
else {
4548+
Py_DECREF(value);
4549+
value = shared;
4550+
}
4551+
Py_INCREF(value);
45304552
assert(EMPTY());
4531-
f->f_state = FRAME_RETURNED;
4532-
f->f_stackdepth = 0;
4533-
goto exiting;
4553+
struct activation_record *rec = current_activation_record;
4554+
assert(rec != NULL);
4555+
first_instr = rec->prev_first_instr;
4556+
next_instr = rec->prev_next_instr;
4557+
stack_bottom = rec->prev_stack_bottom;
4558+
stack_pointer = rec->prev_stack_pointer;
4559+
stack_top = rec->prev_stack_top;
4560+
free_activation_record(tstate, rec);
4561+
PUSH(value);
4562+
DISPATCH();
45344563
}
45354564

45364565

0 commit comments

Comments
 (0)