@@ -18,23 +18,30 @@ typedef PyObject *(*py_trampoline)(py_evaluator, PyThreadState *,
18
18
extern void * _Py_trampoline_func_start ;
19
19
extern void * _Py_trampoline_func_end ;
20
20
21
- typedef struct {
21
+ struct code_arena_st {
22
22
char * start_addr ;
23
23
char * current_addr ;
24
24
size_t size ;
25
25
size_t size_left ;
26
26
size_t code_size ;
27
- } code_arena_t ;
27
+ struct code_arena_st * prev ;
28
+ };
29
+
30
+ typedef struct code_arena_st code_arena_t ;
28
31
29
32
static Py_ssize_t extra_code_index = -1 ;
30
- static code_arena_t code_arena ;
33
+ static code_arena_t * code_arena ;
34
+ static FILE * perf_map_file ;
31
35
32
36
static int
33
- new_code_arena ()
37
+ new_code_arena (void )
34
38
{
35
- size_t page_size = sysconf (_SC_PAGESIZE );
39
+ // non-trivial programs typically need 64 to 256 kiB.
40
+ size_t mem_size = 4096 * 16 ;
41
+ assert (mem_size % sysconf (_SC_PAGESIZE ) == 0 );
36
42
char * memory = mmap (NULL , // address
37
- page_size , PROT_READ | PROT_WRITE | PROT_EXEC ,
43
+ mem_size ,
44
+ PROT_READ | PROT_WRITE | PROT_EXEC ,
38
45
MAP_PRIVATE | MAP_ANONYMOUS ,
39
46
-1 , // fd (not used here)
40
47
0 ); // offset (not used here)
@@ -46,21 +53,43 @@ new_code_arena()
46
53
void * end = & _Py_trampoline_func_end ;
47
54
size_t code_size = end - start ;
48
55
49
- long n_copies = page_size / code_size ;
56
+ long n_copies = mem_size / code_size ;
50
57
for (int i = 0 ; i < n_copies ; i ++ ) {
51
58
memcpy (memory + i * code_size , start , code_size * sizeof (char ));
52
59
}
53
60
54
- mprotect (memory , page_size , PROT_READ | PROT_EXEC );
61
+ mprotect (memory , mem_size , PROT_READ | PROT_EXEC );
62
+
63
+ code_arena_t * new_arena = PyMem_RawCalloc (1 , sizeof (code_arena_t ));
64
+ if (new_arena == NULL ) {
65
+ Py_FatalError ("Failed to allocate new code arena struct" );
66
+ return -1 ;
67
+ }
55
68
56
- code_arena .start_addr = memory ;
57
- code_arena .current_addr = memory ;
58
- code_arena .size = page_size ;
59
- code_arena .size_left = page_size ;
60
- code_arena .code_size = code_size ;
69
+ new_arena -> start_addr = memory ;
70
+ new_arena -> current_addr = memory ;
71
+ new_arena -> size = mem_size ;
72
+ new_arena -> size_left = mem_size ;
73
+ new_arena -> code_size = code_size ;
74
+ new_arena -> prev = code_arena ;
75
+ code_arena = new_arena ;
61
76
return 0 ;
62
77
}
63
78
79
+ static void
80
+ free_code_arenas (void )
81
+ {
82
+ code_arena_t * cur = code_arena ;
83
+ code_arena_t * prev ;
84
+ code_arena = NULL ; // invalid static pointer
85
+ while (cur ) {
86
+ munmap (cur -> start_addr , cur -> size );
87
+ prev = cur -> prev ;
88
+ PyMem_RawFree (cur );
89
+ cur = prev ;
90
+ }
91
+ }
92
+
64
93
static inline py_trampoline
65
94
code_arena_new_code (code_arena_t * code_arena )
66
95
{
@@ -73,27 +102,32 @@ code_arena_new_code(code_arena_t *code_arena)
73
102
static inline py_trampoline
74
103
compile_trampoline (void )
75
104
{
76
- if (code_arena . size_left <= code_arena . code_size ) {
105
+ if (( code_arena == NULL ) || ( code_arena -> size_left <= code_arena -> code_size ) ) {
77
106
if (new_code_arena () < 0 ) {
78
107
return NULL ;
79
108
}
80
109
}
81
110
82
- assert (code_arena . size_left <= code_arena . size );
83
- return code_arena_new_code (& code_arena );
111
+ assert (code_arena -> size_left <= code_arena -> size );
112
+ return code_arena_new_code (code_arena );
84
113
}
85
114
86
115
static inline FILE *
87
- perf_map_open ( pid_t pid )
116
+ perf_map_get_file ( void )
88
117
{
118
+ if (perf_map_file ) {
119
+ return perf_map_file ;
120
+ }
89
121
char filename [100 ];
122
+ pid_t pid = getpid ();
123
+ // TODO: %d is incorrect if pid_t is long long
90
124
snprintf (filename , sizeof (filename ), "/tmp/perf-%d.map" , pid );
91
- FILE * res = fopen (filename , "a" );
92
- if (!res ) {
125
+ perf_map_file = fopen (filename , "a" );
126
+ if (!perf_map_file ) {
93
127
_Py_FatalErrorFormat (__func__ , "Couldn't open %s: errno(%d)" , filename , errno );
94
128
return NULL ;
95
129
}
96
- return res ;
130
+ return perf_map_file ;
97
131
}
98
132
99
133
static inline int
@@ -112,6 +146,7 @@ perf_map_write_entry(FILE *method_file, const void *code_addr,
112
146
{
113
147
fprintf (method_file , "%lx %x py::%s:%s\n" , (unsigned long )code_addr ,
114
148
code_size , entry , file );
149
+ fflush (method_file );
115
150
}
116
151
117
152
static PyObject *
@@ -129,14 +164,13 @@ py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame,
129
164
if (new_trampoline == NULL ) {
130
165
return NULL ;
131
166
}
132
- FILE * pfile = perf_map_open ( getpid () );
167
+ FILE * pfile = perf_map_get_file ( );
133
168
if (pfile == NULL ) {
134
169
return NULL ;
135
170
}
136
- perf_map_write_entry (pfile , new_trampoline , code_arena . code_size ,
171
+ perf_map_write_entry (pfile , new_trampoline , code_arena -> code_size ,
137
172
PyUnicode_AsUTF8 (co -> co_qualname ),
138
173
PyUnicode_AsUTF8 (co -> co_filename ));
139
- perf_map_close (pfile );
140
174
_PyCode_SetExtra ((PyObject * )co , extra_code_index ,
141
175
(void * )new_trampoline );
142
176
f = new_trampoline ;
@@ -163,3 +197,23 @@ _PyPerfTrampoline_Init(int activate)
163
197
}
164
198
return 0 ;
165
199
}
200
+
201
+ int
202
+ _PyPerfTrampoline_Fini (void )
203
+ {
204
+ #ifdef HAVE_PERF_TRAMPOLINE
205
+ free_code_arenas ();
206
+ perf_map_close (perf_map_file );
207
+ #endif
208
+ return 0 ;
209
+ }
210
+
211
+ PyStatus
212
+ _PyPerfTrampoline_AfterFork_Child (void )
213
+ {
214
+ #ifdef HAVE_PERF_TRAMPOLINE
215
+ // close file in child.
216
+ perf_map_close (perf_map_file );
217
+ #endif
218
+ return PyStatus_Ok ();
219
+ }
0 commit comments