Skip to content

improve handling of unrecoverable errors caught in doRestartChild #88

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 21 additions & 13 deletions src/Control/Distributed/Process/Platform/Supervisor.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1149,14 +1149,14 @@ tryRestartChild pid st active' spec reason
| True <- isTemporary (childRestart spec) = continue childRemoved
| DiedNormal <- reason
, True <- isIntrinsic (childRestart spec) = stopWith updateStopped ExitNormal
| otherwise = continue =<< doRestartChild pid spec reason st
| otherwise = doRestartChild pid spec reason st
where
childDown = (active ^= active') $ updateStopped
childRemoved = (active ^= active') $ removeChild spec st
updateStopped = maybe st id $ updateChild chKey (setChildStopped False) st
chKey = childKey spec

doRestartChild :: ProcessId -> ChildSpec -> DiedReason -> State -> Process State
doRestartChild :: ProcessId -> ChildSpec -> DiedReason -> State -> Process (ProcessAction State)
doRestartChild _ spec _ state = do -- TODO: use ProcessId and DiedReason to log
state' <- addRestart state
case state' of
Expand All @@ -1167,20 +1167,28 @@ doRestartChild _ spec _ state = do -- TODO: use ProcessId and DiedReason to log
Just st -> do
start' <- doStartChild spec st
case start' of
Right (ref, st') -> do
return $ markActive st' ref spec
Left _ -> do -- TODO: handle this by policy
Right (ref, st') -> continue $ markActive st' ref spec
Left err -> do
-- All child failures are handled via monitor signals, apart from
-- BadClosure, which comes back from doStartChild as (Left err).
-- Since we cannot recover from that, there's no point in trying
-- to start this child again (as the closure will never resolve),
-- so we remove the child forthwith. We should provide a policy
-- for handling this situation though...
return $ ( (active ^: Map.filter (/= chKey))
-- BadClosure and UnresolvableAddress from the StarterProcess
-- variants of ChildStart, which both come back from
-- doStartChild as (Left err).
sup <- getSelfPid
if isTemporary (childRestart spec)
then do
logEntry Log.warning $
mkReport "Error in temporary child" sup (childKey spec) (show err)
continue $ ( (active ^: Map.filter (/= chKey))
. (bumpStats Active chType decrement)
. (bumpStats Specified chType decrement)
$ removeChild spec st
)
$ removeChild spec st)
else do
logEntry Log.error $
mkReport "Unrecoverable error in child. Stopping supervisor"
sup (childKey spec) (show err)
stopWith st $ ExitOther $ "Unrecoverable error in child " ++ (childKey spec)
-- TODO: convert this to a meaningful exception type

where
chKey = childKey spec
chType = childType spec
Expand Down