diff --git a/.changeset/eighty-spies-knock.md b/.changeset/eighty-spies-knock.md new file mode 100644 index 0000000000..d034532f90 --- /dev/null +++ b/.changeset/eighty-spies-knock.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +Detect ffmpeg OOM errors, added manual OutOfMemoryError diff --git a/apps/webapp/app/v3/services/completeAttempt.server.ts b/apps/webapp/app/v3/services/completeAttempt.server.ts index fe18c5c29c..7023fc2bf8 100644 --- a/apps/webapp/app/v3/services/completeAttempt.server.ts +++ b/apps/webapp/app/v3/services/completeAttempt.server.ts @@ -11,6 +11,7 @@ import { exceptionEventEnhancer, flattenAttributes, internalErrorFromUnexpectedExit, + isManualOutOfMemoryError, sanitizeError, shouldRetryError, taskRunErrorEnhancer, @@ -691,20 +692,38 @@ async function findAttempt(prismaClient: PrismaClientOrTransaction, friendlyId: } function isOOMError(error: TaskRunError) { - if (error.type !== "INTERNAL_ERROR") return false; - if (error.code === "TASK_PROCESS_OOM_KILLED" || error.code === "TASK_PROCESS_MAYBE_OOM_KILLED") { - return true; + if (error.type === "INTERNAL_ERROR") { + if ( + error.code === "TASK_PROCESS_OOM_KILLED" || + error.code === "TASK_PROCESS_MAYBE_OOM_KILLED" + ) { + return true; + } + + // For the purposes of retrying on a larger machine, we're going to treat this is an OOM error. + // This is what they look like if we're executing using k8s. They then get corrected later, but it's too late. + // {"code": "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE", "type": "INTERNAL_ERROR", "message": "Process exited with code -1 after signal SIGKILL."} + if ( + error.code === "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE" && + error.message && + error.message.includes("SIGKILL") && + error.message.includes("-1") + ) { + return true; + } + } + + if (error.type === "BUILT_IN_ERROR") { + // ffmpeg also does weird stuff + // { "name": "Error", "type": "BUILT_IN_ERROR", "message": "ffmpeg was killed with signal SIGKILL" } + if (error.message && error.message.includes("ffmpeg was killed with signal SIGKILL")) { + return true; + } } - // For the purposes of retrying on a larger machine, we're going to treat this is an OOM error. - // This is what they look like if we're executing using k8s. They then get corrected later, but it's too late. - // {"code": "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE", "type": "INTERNAL_ERROR", "message": "Process exited with code -1 after signal SIGKILL."} - if ( - error.code === "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE" && - error.message && - error.message.includes("SIGKILL") && - error.message.includes("-1") - ) { + // Special `OutOfMemoryError` for doing a manual OOM kill. + // Useful if a native library does an OOM but doesn't actually crash the run and you want to manually + if (isManualOutOfMemoryError(error)) { return true; } diff --git a/packages/core/src/v3/errors.ts b/packages/core/src/v3/errors.ts index ba60089bbb..a079d0d71c 100644 --- a/packages/core/src/v3/errors.ts +++ b/packages/core/src/v3/errors.ts @@ -54,6 +54,28 @@ export class AbortTaskRunError extends Error { } } +const MANUAL_OOM_KILL_ERROR_MESSAGE = "MANUAL_OOM_KILL_ERROR"; + +/** + * This causes an Out Of Memory error on the run (if it's uncaught). + * This can be useful if you use a native package that detects it's run out of memory but doesn't kill Node.js + */ +export class OutOfMemoryError extends Error { + constructor() { + super(MANUAL_OOM_KILL_ERROR_MESSAGE); + this.name = "OutOfMemoryError"; + } +} + +export function isManualOutOfMemoryError(error: TaskRunError) { + if (error.type === "BUILT_IN_ERROR") { + if (error.message && error.message === MANUAL_OOM_KILL_ERROR_MESSAGE) { + return true; + } + } + return false; +} + export class TaskPayloadParsedError extends Error { public readonly cause: unknown; @@ -562,6 +584,13 @@ export function taskRunErrorEnhancer(error: TaskRunError): EnhanceError { + run: async ( + { + succeedOnLargerMachine = false, + ffmpeg = false, + manual = false, + }: { succeedOnLargerMachine?: boolean; ffmpeg?: boolean; manual?: boolean }, + { ctx } + ) => { logger.info("running out of memory below this line"); logger.info(`Running on ${ctx.machine?.name}`); @@ -23,6 +31,14 @@ export const oomTask = task({ }; } + if (manual) { + throw new OutOfMemoryError(); + } + + if (ffmpeg) { + throw new Error("ffmpeg was killed with signal SIGKILL"); + } + let a = "a"; try {