Skip to content

Commit ed16d98

Browse files
authored
Merge pull request python#34 from tiran/perf-2
munmap pages on shutdown, keep FILE open
2 parents 41cc1ce + 08d3421 commit ed16d98

File tree

4 files changed

+85
-23
lines changed

4 files changed

+85
-23
lines changed

Include/internal/pycore_ceval.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ extern PyObject* _PyEval_BuiltinsFromGlobals(
6666
PyObject *globals);
6767

6868
extern int _PyPerfTrampoline_Init(int activate);
69+
extern int _PyPerfTrampoline_Fini(void);
70+
extern PyStatus _PyPerfTrampoline_AfterFork_Child(void);
6971

7072
static inline PyObject*
7173
_PyEval_EvalFrame(PyThreadState *tstate, struct _PyInterpreterFrame *frame, int throwflag)

Modules/posixmodule.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,11 @@ PyOS_AfterFork_Child(void)
606606
}
607607
assert(_PyThreadState_GET() == tstate);
608608

609+
status = _PyPerfTrampoline_AfterFork_Child();
610+
if (_PyStatus_EXCEPTION(status)) {
611+
goto fatal_error;
612+
}
613+
609614
run_at_forkers(tstate->interp->after_forkers_child, 0);
610615
return;
611616

Objects/perf_trampoline.c

Lines changed: 77 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,30 @@ typedef PyObject *(*py_trampoline)(py_evaluator, PyThreadState *,
1818
extern void *_Py_trampoline_func_start;
1919
extern void *_Py_trampoline_func_end;
2020

21-
typedef struct {
21+
struct code_arena_st {
2222
char *start_addr;
2323
char *current_addr;
2424
size_t size;
2525
size_t size_left;
2626
size_t code_size;
27-
} code_arena_t;
27+
struct code_arena_st *prev;
28+
};
29+
30+
typedef struct code_arena_st code_arena_t;
2831

2932
static Py_ssize_t extra_code_index = -1;
30-
static code_arena_t code_arena;
33+
static code_arena_t *code_arena;
34+
static FILE *perf_map_file;
3135

3236
static int
33-
new_code_arena()
37+
new_code_arena(void)
3438
{
35-
size_t page_size = sysconf(_SC_PAGESIZE);
39+
// non-trivial programs typically need 64 to 256 kiB.
40+
size_t mem_size = 4096 * 16;
41+
assert(mem_size % sysconf(_SC_PAGESIZE) == 0);
3642
char *memory = mmap(NULL, // address
37-
page_size, PROT_READ | PROT_WRITE | PROT_EXEC,
43+
mem_size,
44+
PROT_READ | PROT_WRITE | PROT_EXEC,
3845
MAP_PRIVATE | MAP_ANONYMOUS,
3946
-1, // fd (not used here)
4047
0); // offset (not used here)
@@ -46,21 +53,43 @@ new_code_arena()
4653
void *end = &_Py_trampoline_func_end;
4754
size_t code_size = end - start;
4855

49-
long n_copies = page_size / code_size;
56+
long n_copies = mem_size / code_size;
5057
for (int i = 0; i < n_copies; i++) {
5158
memcpy(memory + i * code_size, start, code_size * sizeof(char));
5259
}
5360

54-
mprotect(memory, page_size, PROT_READ | PROT_EXEC);
61+
mprotect(memory, mem_size, PROT_READ | PROT_EXEC);
62+
63+
code_arena_t *new_arena = PyMem_RawCalloc(1, sizeof(code_arena_t));
64+
if (new_arena == NULL) {
65+
Py_FatalError("Failed to allocate new code arena struct");
66+
return -1;
67+
}
5568

56-
code_arena.start_addr = memory;
57-
code_arena.current_addr = memory;
58-
code_arena.size = page_size;
59-
code_arena.size_left = page_size;
60-
code_arena.code_size = code_size;
69+
new_arena->start_addr = memory;
70+
new_arena->current_addr = memory;
71+
new_arena->size = mem_size;
72+
new_arena->size_left = mem_size;
73+
new_arena->code_size = code_size;
74+
new_arena->prev = code_arena;
75+
code_arena = new_arena;
6176
return 0;
6277
}
6378

79+
static void
80+
free_code_arenas(void)
81+
{
82+
code_arena_t *cur = code_arena;
83+
code_arena_t *prev;
84+
code_arena = NULL; // invalid static pointer
85+
while(cur) {
86+
munmap(cur->start_addr, cur->size);
87+
prev = cur->prev;
88+
PyMem_RawFree(cur);
89+
cur = prev;
90+
}
91+
}
92+
6493
static inline py_trampoline
6594
code_arena_new_code(code_arena_t *code_arena)
6695
{
@@ -73,27 +102,32 @@ code_arena_new_code(code_arena_t *code_arena)
73102
static inline py_trampoline
74103
compile_trampoline(void)
75104
{
76-
if (code_arena.size_left <= code_arena.code_size) {
105+
if ((code_arena == NULL) || (code_arena->size_left <= code_arena->code_size)) {
77106
if (new_code_arena() < 0) {
78107
return NULL;
79108
}
80109
}
81110

82-
assert(code_arena.size_left <= code_arena.size);
83-
return code_arena_new_code(&code_arena);
111+
assert(code_arena->size_left <= code_arena->size);
112+
return code_arena_new_code(code_arena);
84113
}
85114

86115
static inline FILE *
87-
perf_map_open(pid_t pid)
116+
perf_map_get_file(void)
88117
{
118+
if (perf_map_file) {
119+
return perf_map_file;
120+
}
89121
char filename[100];
122+
pid_t pid = getpid();
123+
// TODO: %d is incorrect if pid_t is long long
90124
snprintf(filename, sizeof(filename), "/tmp/perf-%d.map", pid);
91-
FILE *res = fopen(filename, "a");
92-
if (!res) {
125+
perf_map_file = fopen(filename, "a");
126+
if (!perf_map_file) {
93127
_Py_FatalErrorFormat(__func__, "Couldn't open %s: errno(%d)", filename, errno);
94128
return NULL;
95129
}
96-
return res;
130+
return perf_map_file;
97131
}
98132

99133
static inline int
@@ -112,6 +146,7 @@ perf_map_write_entry(FILE *method_file, const void *code_addr,
112146
{
113147
fprintf(method_file, "%lx %x py::%s:%s\n", (unsigned long)code_addr,
114148
code_size, entry, file);
149+
fflush(method_file);
115150
}
116151

117152
static PyObject *
@@ -129,14 +164,13 @@ py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame,
129164
if (new_trampoline == NULL) {
130165
return NULL;
131166
}
132-
FILE *pfile = perf_map_open(getpid());
167+
FILE *pfile = perf_map_get_file();
133168
if (pfile == NULL) {
134169
return NULL;
135170
}
136-
perf_map_write_entry(pfile, new_trampoline, code_arena.code_size,
171+
perf_map_write_entry(pfile, new_trampoline, code_arena->code_size,
137172
PyUnicode_AsUTF8(co->co_qualname),
138173
PyUnicode_AsUTF8(co->co_filename));
139-
perf_map_close(pfile);
140174
_PyCode_SetExtra((PyObject *)co, extra_code_index,
141175
(void *)new_trampoline);
142176
f = new_trampoline;
@@ -163,3 +197,23 @@ _PyPerfTrampoline_Init(int activate)
163197
}
164198
return 0;
165199
}
200+
201+
int
202+
_PyPerfTrampoline_Fini(void)
203+
{
204+
#ifdef HAVE_PERF_TRAMPOLINE
205+
free_code_arenas();
206+
perf_map_close(perf_map_file);
207+
#endif
208+
return 0;
209+
}
210+
211+
PyStatus
212+
_PyPerfTrampoline_AfterFork_Child(void)
213+
{
214+
#ifdef HAVE_PERF_TRAMPOLINE
215+
// close file in child.
216+
perf_map_close(perf_map_file);
217+
#endif
218+
return PyStatus_Ok();
219+
}

Python/pylifecycle.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,6 +1728,7 @@ finalize_interp_clear(PyThreadState *tstate)
17281728
_PyArg_Fini();
17291729
_Py_ClearFileSystemEncoding();
17301730
_Py_Deepfreeze_Fini();
1731+
_PyPerfTrampoline_Fini();
17311732
}
17321733

17331734
finalize_interp_types(tstate->interp);

0 commit comments

Comments
 (0)