Skip to content

gh-129185: Fix PyTraceMalloc_Untrack() at Python exit #129191

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 37 additions & 6 deletions Lib/test/test_tracemalloc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import contextlib
import os
import sys
import textwrap
import tracemalloc
import unittest
from unittest.mock import patch
Expand All @@ -19,6 +20,7 @@
_testinternalcapi = None


DEFAULT_DOMAIN = 0
EMPTY_STRING_SIZE = sys.getsizeof(b'')
INVALID_NFRAME = (-1, 2**30)

Expand Down Expand Up @@ -1027,8 +1029,8 @@ def track(self, release_gil=False, nframe=1):
release_gil)
return frames

def untrack(self):
_testcapi.tracemalloc_untrack(self.domain, self.ptr)
def untrack(self, release_gil=False):
_testcapi.tracemalloc_untrack(self.domain, self.ptr, release_gil)

def get_traced_memory(self):
# Get the traced size in the domain
Expand Down Expand Up @@ -1070,21 +1072,27 @@ def test_track_already_tracked(self):
self.assertEqual(self.get_traceback(),
tracemalloc.Traceback(frames))

def test_untrack(self):
def check_untrack(self, release_gil):
tracemalloc.start()

self.track()
self.assertIsNotNone(self.get_traceback())
self.assertEqual(self.get_traced_memory(), self.size)

# untrack must remove the trace
self.untrack()
self.untrack(release_gil)
self.assertIsNone(self.get_traceback())
self.assertEqual(self.get_traced_memory(), 0)

# calling _PyTraceMalloc_Untrack() multiple times must not crash
self.untrack()
self.untrack()
self.untrack(release_gil)
self.untrack(release_gil)

def test_untrack(self):
self.check_untrack(False)

def test_untrack_without_gil(self):
self.check_untrack(True)

def test_stop_track(self):
tracemalloc.start()
Expand All @@ -1110,6 +1118,29 @@ def test_tracemalloc_track_race(self):
# gh-128679: Test fix for tracemalloc.stop() race condition
_testcapi.tracemalloc_track_race()

def test_late_untrack(self):
code = textwrap.dedent(f"""
from test import support
import tracemalloc
import _testcapi

class Tracked:
def __init__(self, domain, size):
self.domain = domain
self.ptr = id(self)
self.size = size
_testcapi.tracemalloc_track(self.domain, self.ptr, self.size)

def __del__(self, untrack=_testcapi.tracemalloc_untrack):
untrack(self.domain, self.ptr, 1)

domain = {DEFAULT_DOMAIN}
tracemalloc.start()
obj = Tracked(domain, 1024 * 1024)
support.late_deletion(obj)
""")
assert_python_ok("-c", code)


if __name__ == "__main__":
unittest.main()
13 changes: 11 additions & 2 deletions Modules/_testcapi/mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -557,16 +557,25 @@ tracemalloc_untrack(PyObject *self, PyObject *args)
{
unsigned int domain;
PyObject *ptr_obj;
int release_gil = 0;

if (!PyArg_ParseTuple(args, "IO", &domain, &ptr_obj)) {
if (!PyArg_ParseTuple(args, "IO|i", &domain, &ptr_obj, &release_gil)) {
return NULL;
}
void *ptr = PyLong_AsVoidPtr(ptr_obj);
if (PyErr_Occurred()) {
return NULL;
}

int res = PyTraceMalloc_Untrack(domain, (uintptr_t)ptr);
int res;
if (release_gil) {
Py_BEGIN_ALLOW_THREADS
res = PyTraceMalloc_Untrack(domain, (uintptr_t)ptr);
Py_END_ALLOW_THREADS
}
else {
res = PyTraceMalloc_Untrack(domain, (uintptr_t)ptr);
}
if (res < 0) {
PyErr_SetString(PyExc_RuntimeError, "PyTraceMalloc_Untrack error");
return NULL;
Expand Down
1 change: 1 addition & 0 deletions Modules/_testcapimodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -3394,6 +3394,7 @@ static void
tracemalloc_track_race_thread(void *data)
{
PyTraceMalloc_Track(123, 10, 1);
PyTraceMalloc_Untrack(123, 10);

PyThread_type_lock lock = (PyThread_type_lock)data;
PyThread_release_lock(lock);
Expand Down
4 changes: 3 additions & 1 deletion Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -2113,7 +2113,7 @@ _Py_Finalize(_PyRuntimeState *runtime)

/* Disable tracemalloc after all Python objects have been destroyed,
so it is possible to use tracemalloc in objects destructor. */
_PyTraceMalloc_Fini();
_PyTraceMalloc_Stop();

/* Finalize any remaining import state */
// XXX Move these up to where finalize_modules() is currently.
Expand Down Expand Up @@ -2166,6 +2166,8 @@ _Py_Finalize(_PyRuntimeState *runtime)

finalize_interp_clear(tstate);

_PyTraceMalloc_Fini();

#ifdef Py_TRACE_REFS
/* Display addresses (& refcnts) of all objects still alive.
* An address can be used to find the repr of the object, printed
Expand Down
25 changes: 23 additions & 2 deletions Python/tracemalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1256,9 +1256,17 @@ PyTraceMalloc_Track(unsigned int domain, uintptr_t ptr,
size_t size)
{
PyGILState_STATE gil_state = PyGILState_Ensure();
int result;

// gh-129185: Check before TABLES_LOCK() to support calls after
// _PyTraceMalloc_Fini().
if (!tracemalloc_config.tracing) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to hold the lock or GIL, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The purpose of this "pre-check" is to avoid the GIL and TABLES_LOCK().

The flag is tested again below with TABLES_LOCK().

PyTraceMalloc_Untrack() doesn't lock the GIL, only TABLES_LOCK(), when tracing.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change restores the check that we had before recent refactoring. For example, Python 3.14 alpha3 uses:

int
PyTraceMalloc_Track(unsigned int domain, uintptr_t ptr,
                    size_t size)
{
    int res;
    PyGILState_STATE gil_state;

    if (!tracemalloc_config.tracing) {
        /* tracemalloc is not tracing: do nothing */
        return -2;
    }

    gil_state = PyGILState_Ensure();

    TABLES_LOCK();
    res = tracemalloc_add_trace(domain, ptr, size);
    TABLES_UNLOCK();

    PyGILState_Release(gil_state);
    return res;
}

The check is done without holding the GIL.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But then we lose the thread safety: if one thread that holds the GIL were to write to tracing, then a thread that calls one of these without the GIL would race.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the easiest solution would be to grab the tables lock for the read, and then unlock it before calling PyGILState_Ensure to prevent lock-ordering deadlocks.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The race is right here. Thread A might check tracing at the exact same time as thread B, which is a data race.

What is the problem of "continue execution" of thread A, since thread A checks again tracing with the tables lock?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because it might not reach the second tracing read at all--if the first one races, then we're probably going to get a crash.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, you're talking about thread B calling _PyTraceMalloc_Fini(), not tracemalloc.stop(). In this case, thread A can call TABLES_LOCK() after thread B deleted the lock, and yes, we can get a crash.

Sadly, PyTraceMalloc_Track() and PyTraceMalloc_Untrack() API doesn't require the caller to hold the GIL.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That can happen too, but I meant that we'll get a crash from the data race on tracing, not a use-after-free on the lock. I think we just hold the GIL (via PyGILState_Ensure) and call without the tables lock held, that should be thread safe enough for 3.12.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I modified the Untrack() function to get the GIL. It should address your last concern.

result = -2;
goto done;
}

TABLES_LOCK();

int result;
if (tracemalloc_config.tracing) {
result = tracemalloc_add_trace_unlocked(domain, ptr, size);
}
Expand All @@ -1268,6 +1276,7 @@ PyTraceMalloc_Track(unsigned int domain, uintptr_t ptr,
}

TABLES_UNLOCK();
done:
PyGILState_Release(gil_state);

return result;
Expand All @@ -1277,9 +1286,19 @@ PyTraceMalloc_Track(unsigned int domain, uintptr_t ptr,
int
PyTraceMalloc_Untrack(unsigned int domain, uintptr_t ptr)
{
// Need the GIL to prevent races on the first 'tracing' test
PyGILState_STATE gil_state = PyGILState_Ensure();
int result;

// gh-129185: Check before TABLES_LOCK() to support calls after
// _PyTraceMalloc_Fini()
if (!tracemalloc_config.tracing) {
result = -2;
goto done;
}

TABLES_LOCK();

int result;
if (tracemalloc_config.tracing) {
tracemalloc_remove_trace_unlocked(domain, ptr);
result = 0;
Expand All @@ -1290,6 +1309,8 @@ PyTraceMalloc_Untrack(unsigned int domain, uintptr_t ptr)
}

TABLES_UNLOCK();
done:
PyGILState_Release(gil_state);
return result;
}

Expand Down
Loading