Skip to content

Commit d8006e1

Browse files
authored
Prevent abort signals from causing uncaught exceptions (#1320)
* never abort the same controller twice * prevent uncaught exception when aborting pipe * abort signal assertions and more logging * never abort running pipe
1 parent f89d93b commit d8006e1

File tree

1 file changed

+35
-5
lines changed

1 file changed

+35
-5
lines changed

apps/coordinator/src/checkpointer.ts

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ type CheckpointerOptions = {
4848
chaosMonkey?: ChaosMonkey;
4949
};
5050

51+
class CheckpointAbortError extends Error {}
52+
5153
async function getFileSize(filePath: string): Promise<number> {
5254
try {
5355
const stats = await fs.stat(filePath);
@@ -248,7 +250,12 @@ export class Checkpointer {
248250
return false;
249251
}
250252

251-
controller.abort("cancelCheckpointing()");
253+
if (controller.signal.aborted) {
254+
this.#logger.debug("Controller already aborted", { runId });
255+
return false;
256+
}
257+
258+
controller.abort("cancelCheckpoint()");
252259
this.#abortControllers.delete(runId);
253260

254261
return true;
@@ -395,6 +402,14 @@ export class Checkpointer {
395402
const controller = new AbortController();
396403
this.#abortControllers.set(runId, controller);
397404

405+
const assertNotAborted = (abortMessage?: string) => {
406+
if (controller.signal.aborted) {
407+
throw new CheckpointAbortError(abortMessage);
408+
}
409+
410+
this.#logger.debug("Not aborted", { abortMessage });
411+
};
412+
398413
const $$ = $({ signal: controller.signal });
399414

400415
const shortCode = nanoid(8);
@@ -418,6 +433,7 @@ export class Checkpointer {
418433
};
419434

420435
try {
436+
assertNotAborted("chaosMonkey.call");
421437
await this.chaosMonkey.call({ $: $$ });
422438

423439
this.#logger.log("Checkpointing:", { options });
@@ -474,11 +490,12 @@ export class Checkpointer {
474490
return { success: false, reason: "SKIP_RETRYING" };
475491
}
476492

493+
assertNotAborted("cmd: crictl ps");
477494
const containerId = this.#logger.debug(
478495
// @ts-expect-error
479-
await $$`crictl ps`
480-
.pipeStdout($$({ stdin: "pipe" })`grep ${containterName}`)
481-
.pipeStdout($$({ stdin: "pipe" })`cut -f1 ${"-d "}`)
496+
await $`crictl ps`
497+
.pipeStdout($({ stdin: "pipe" })`grep ${containterName}`)
498+
.pipeStdout($({ stdin: "pipe" })`cut -f1 ${"-d "}`)
482499
);
483500

484501
if (!containerId.stdout) {
@@ -496,6 +513,7 @@ export class Checkpointer {
496513
}
497514

498515
// Create checkpoint
516+
assertNotAborted("cmd: crictl checkpoint");
499517
this.#logger.debug(await $$`crictl checkpoint --export=${exportLocation} ${containerId}`);
500518
const postCheckpoint = performance.now();
501519

@@ -504,20 +522,25 @@ export class Checkpointer {
504522
this.#logger.log("checkpoint archive created", { size, options });
505523

506524
// Create image from checkpoint
525+
assertNotAborted("cmd: buildah from scratch");
507526
const container = this.#logger.debug(await $$`buildah from scratch`);
508527
const postFrom = performance.now();
509528

529+
assertNotAborted("cmd: buildah add");
510530
this.#logger.debug(await $$`buildah add ${container} ${exportLocation} /`);
511531
const postAdd = performance.now();
512532

533+
assertNotAborted("cmd: buildah config");
513534
this.#logger.debug(
514535
await $$`buildah config --annotation=io.kubernetes.cri-o.annotations.checkpoint.name=counter ${container}`
515536
);
516537
const postConfig = performance.now();
517538

539+
assertNotAborted("cmd: buildah commit");
518540
this.#logger.debug(await $$`buildah commit ${container} ${imageRef}`);
519541
const postCommit = performance.now();
520542

543+
assertNotAborted("cmd: buildah rm");
521544
this.#logger.debug(await $$`buildah rm ${container}`);
522545
const postRm = performance.now();
523546

@@ -529,6 +552,7 @@ export class Checkpointer {
529552
}
530553

531554
// Push checkpoint image
555+
assertNotAborted("cmd: buildah push");
532556
this.#logger.debug(
533557
await $$`buildah push --tls-verify=${String(this.registryTlsVerify)} ${imageRef}`
534558
);
@@ -554,9 +578,15 @@ export class Checkpointer {
554578
},
555579
};
556580
} catch (error) {
581+
if (error instanceof CheckpointAbortError) {
582+
this.#logger.error("Checkpoint canceled: CheckpointAbortError", { options, error });
583+
584+
return { success: false, reason: "CANCELED" };
585+
}
586+
557587
if (isExecaChildProcess(error)) {
558588
if (error.isCanceled) {
559-
this.#logger.error("Checkpoint canceled", { options, error });
589+
this.#logger.error("Checkpoint canceled: ExecaChildProcess", { options, error });
560590

561591
return { success: false, reason: "CANCELED" };
562592
}

0 commit comments

Comments
 (0)