From f7421f8c9cc620f829b3e3ce86d5ff12a86c1412 Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Wed, 24 Jul 2024 17:19:30 +0100 Subject: [PATCH 1/2] slightly more thread safe gc --- src/C/pointers.jl | 2 ++ src/GC/GC.jl | 63 +++++++++++++++++++++++++----------- test/finalize_test_script.jl | 24 ++++++++++++++ 3 files changed, 71 insertions(+), 18 deletions(-) create mode 100644 test/finalize_test_script.jl diff --git a/src/C/pointers.jl b/src/C/pointers.jl index dd0476fc..6faabb60 100644 --- a/src/C/pointers.jl +++ b/src/C/pointers.jl @@ -22,6 +22,8 @@ const CAPI_FUNC_SIGS = Dict{Symbol,Pair{Tuple,Type}}( :PyEval_RestoreThread => (Ptr{Cvoid},) => Cvoid, :PyGILState_Ensure => () => PyGILState_STATE, :PyGILState_Release => (PyGILState_STATE,) => Cvoid, + :PyGILState_GetThisThreadState => () => Ptr{Cvoid}, + :PyGILState_Check => () => Cint, # IMPORT :PyImport_ImportModule => (Ptr{Cchar},) => PyPtr, :PyImport_Import => (PyPtr,) => PyPtr, diff --git a/src/GC/GC.jl b/src/GC/GC.jl index 0d1fa9a8..48e70544 100644 --- a/src/GC/GC.jl +++ b/src/GC/GC.jl @@ -39,25 +39,36 @@ Like most PythonCall functions, you must only call this from the main thread. """ function enable() ENABLED[] = true - if !isempty(QUEUE) - C.with_gil(false) do - for ptr in QUEUE - if ptr != C.PyNULL - C.Py_DecRef(ptr) - end - end + if !isempty(QUEUE) && C.PyGILState_Check() == 1 + free_queue() + end + return +end + +function free_queue() + for ptr in QUEUE + if ptr != C.PyNULL + C.Py_DecRef(ptr) end end empty!(QUEUE) - return + nothing +end + +function gc() + if ENABLED[] && C.PyGILState_Check() == 1 + free_queue() + true + else + false + end end function enqueue(ptr::C.PyPtr) if ptr != C.PyNULL && C.CTX.is_initialized - if ENABLED[] - C.with_gil(false) do - C.Py_DecRef(ptr) - end + if ENABLED[] && C.PyGILState_Check() == 1 + C.Py_DecRef(ptr) + isempty(QUEUE) || free_queue() else push!(QUEUE, ptr) end @@ -67,14 +78,13 @@ end function enqueue_all(ptrs) if C.CTX.is_initialized - if ENABLED[] - C.with_gil(false) do - for ptr in ptrs - if ptr != C.PyNULL - C.Py_DecRef(ptr) - end + if ENABLED[] && C.PyGILState_Check() == 1 + for ptr in ptrs + if ptr != C.PyNULL + C.Py_DecRef(ptr) end end + isempty(QUEUE) || free_queue() else append!(QUEUE, ptrs) end @@ -82,4 +92,21 @@ function enqueue_all(ptrs) return end +mutable struct GCHook + function GCHook() + finalizer(_gchook_finalizer, new()) + end +end + +function _gchook_finalizer(x) + gc() + finalizer(_gchook_finalizer, x) + nothing +end + +function __init__() + GCHook() + nothing +end + end # module GC diff --git a/test/finalize_test_script.jl b/test/finalize_test_script.jl new file mode 100644 index 00000000..28d4dd72 --- /dev/null +++ b/test/finalize_test_script.jl @@ -0,0 +1,24 @@ +using PythonCall + +# This would consistently segfault pre-GC-thread-safety +let + pyobjs = map(pylist, 1:100) + Threads.@threads for obj in pyobjs + finalize(obj) + end +end + +@show PythonCall.GC.ENABLED[] +@show length(PythonCall.GC.QUEUE) +GC.gc(false) +# with GCHook, the queue should be empty now (the above gc() triggered GCHook to clear the PythonCall QUEUE) +# without GCHook, gc() has no effect on the QUEUE +@show length(PythonCall.GC.QUEUE) +GC.gc(false) +@show length(PythonCall.GC.QUEUE) +GC.gc(false) +@show length(PythonCall.GC.QUEUE) +# with GCHook this is not necessary, GC.gc() is enough +# without GCHook, this is required to free any objects in the PythonCall QUEUE +PythonCall.GC.gc() +@show length(PythonCall.GC.QUEUE) From 3e28ea7795c362a0dd9c5d8a7670cb6ce8a8edd5 Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Mon, 29 Jul 2024 17:05:54 +0100 Subject: [PATCH 2/2] make Py finalizer thread-safe --- src/Core/Py.jl | 16 +++++- src/GC/GC.jl | 100 +++++------------------------------ src/JlWrap/objectarray.jl | 16 +++++- test/finalize_test_script.jl | 56 ++++++++++++++------ 4 files changed, 82 insertions(+), 106 deletions(-) diff --git a/src/Core/Py.jl b/src/Core/Py.jl index e56d74fa..82d2d85f 100644 --- a/src/Core/Py.jl +++ b/src/Core/Py.jl @@ -43,7 +43,21 @@ mutable struct Py end export Py -py_finalizer(x::Py) = GC.enqueue(getptr(x)) +const NUM_DECREFS = Ref(0) + +function py_finalizer(x::Py) + if C.CTX.is_initialized + if C.PyGILState_Check() == 1 + # if this thread holds the GIL then decref now + C.Py_DecRef(getptr(x)) + NUM_DECREFS[] += 1 + else + # otherwise re-attach the finalizer and try again next GC + finalizer(py_finalizer, x) + end + end + nothing +end ispy(::Py) = true getptr(x::Py) = getfield(x, :ptr) diff --git a/src/GC/GC.jl b/src/GC/GC.jl index 48e70544..058d024e 100644 --- a/src/GC/GC.jl +++ b/src/GC/GC.jl @@ -7,106 +7,30 @@ See `disable` and `enable`. """ module GC -using ..C: C - -const ENABLED = Ref(true) -const QUEUE = C.PyPtr[] - """ PythonCall.GC.disable() -Disable the PythonCall garbage collector. +Do nothing. -This means that whenever a Python object owned by Julia is finalized, it is not immediately -freed but is instead added to a queue of objects to free later when `enable()` is called. +!!! note -Like most PythonCall functions, you must only call this from the main thread. + In earlier versions of PythonCall, this function disabled the PythonCall garbage + collector. This is no longer required because Python objects now have thread-safe + finalizers. This function will be removed in PythonCall v1. """ -function disable() - ENABLED[] = false - return -end +disable() = nothing """ PythonCall.GC.enable() -Re-enable the PythonCall garbage collector. +Do nothing. -This frees any Python objects which were finalized while the GC was disabled, and allows -objects finalized in the future to be freed immediately. +!!! note -Like most PythonCall functions, you must only call this from the main thread. + In earlier versions of PythonCall, this function re-enabled the PythonCall garbage + collector. This is no longer required because Python objects now have thread-safe + finalizers. This function will be removed in PythonCall v1. """ -function enable() - ENABLED[] = true - if !isempty(QUEUE) && C.PyGILState_Check() == 1 - free_queue() - end - return -end - -function free_queue() - for ptr in QUEUE - if ptr != C.PyNULL - C.Py_DecRef(ptr) - end - end - empty!(QUEUE) - nothing -end - -function gc() - if ENABLED[] && C.PyGILState_Check() == 1 - free_queue() - true - else - false - end -end - -function enqueue(ptr::C.PyPtr) - if ptr != C.PyNULL && C.CTX.is_initialized - if ENABLED[] && C.PyGILState_Check() == 1 - C.Py_DecRef(ptr) - isempty(QUEUE) || free_queue() - else - push!(QUEUE, ptr) - end - end - return -end - -function enqueue_all(ptrs) - if C.CTX.is_initialized - if ENABLED[] && C.PyGILState_Check() == 1 - for ptr in ptrs - if ptr != C.PyNULL - C.Py_DecRef(ptr) - end - end - isempty(QUEUE) || free_queue() - else - append!(QUEUE, ptrs) - end - end - return -end - -mutable struct GCHook - function GCHook() - finalizer(_gchook_finalizer, new()) - end -end - -function _gchook_finalizer(x) - gc() - finalizer(_gchook_finalizer, x) - nothing -end - -function __init__() - GCHook() - nothing -end +enable() = nothing end # module GC diff --git a/src/JlWrap/objectarray.jl b/src/JlWrap/objectarray.jl index a98189e8..ffd0fa80 100644 --- a/src/JlWrap/objectarray.jl +++ b/src/JlWrap/objectarray.jl @@ -27,7 +27,21 @@ PyObjectArray{N}(x::AbstractArray{T,N}) where {T,N} = copyto!(PyObjectArray{N}(undef, size(x)), x) PyObjectArray(x::AbstractArray{T,N}) where {T,N} = PyObjectArray{N}(x) -pyobjectarray_finalizer(x::PyObjectArray) = GC.enqueue_all(x.ptrs) +function pyobjectarray_finalizer(x::PyObjectArray) + if C.CTX.is_initialized + if C.PyGILState_Check() == 1 + # if this thread holds the GIL then decref now + for ptr in x.ptrs + C.Py_DecRef(ptr) + end + else + # otherwise re-attach the finalizer and try again next GC + finalizer(pyobjectarray_finalizer, x) + end + end + nothing +end + Base.IndexStyle(x::PyObjectArray) = Base.IndexStyle(x.ptrs) diff --git a/test/finalize_test_script.jl b/test/finalize_test_script.jl index 28d4dd72..a5a86941 100644 --- a/test/finalize_test_script.jl +++ b/test/finalize_test_script.jl @@ -1,24 +1,48 @@ using PythonCall # This would consistently segfault pre-GC-thread-safety -let - pyobjs = map(pylist, 1:100) - Threads.@threads for obj in pyobjs - finalize(obj) +function test() + pyobjs = map(pyint, 1:10) + Threads.@threads for i = 1:10 + finalize(pyobjs[i]) end + # The following loop is a workaround and can be removed if the issue is fixed: + # https://github.com/JuliaLang/julia/issues/40626#issuecomment-1054890774 + Threads.@threads :static for _ = 1:Threads.nthreads() + Timer(Returns(nothing), 0; interval = 1) + end + nothing end -@show PythonCall.GC.ENABLED[] -@show length(PythonCall.GC.QUEUE) -GC.gc(false) -# with GCHook, the queue should be empty now (the above gc() triggered GCHook to clear the PythonCall QUEUE) -# without GCHook, gc() has no effect on the QUEUE -@show length(PythonCall.GC.QUEUE) +function decrefs() + n = PythonCall.Core.NUM_DECREFS[] + PythonCall.Core.NUM_DECREFS[] = 0 + n +end + +GC.gc() +decrefs() +println("test()") +test() +println(" decrefs: ", decrefs()) +println("gc(false)") GC.gc(false) -@show length(PythonCall.GC.QUEUE) +println(" decrefs: ", decrefs()) +println("gc(false)") GC.gc(false) -@show length(PythonCall.GC.QUEUE) -# with GCHook this is not necessary, GC.gc() is enough -# without GCHook, this is required to free any objects in the PythonCall QUEUE -PythonCall.GC.gc() -@show length(PythonCall.GC.QUEUE) +println(" decrefs: ", decrefs()) +println("gc()") +GC.gc() +println(" decrefs: ", decrefs()) +println("gc()") +GC.gc() +println(" decrefs: ", decrefs()) +println("gc()") +GC.gc() +println(" decrefs: ", decrefs()) +println("gc()") +GC.gc() +println(" decrefs: ", decrefs()) +println("gc()") +GC.gc() +println(" decrefs: ", decrefs())