Skip to content

Prevent abort signals from causing uncaught exceptions #1320

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Sep 18, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 35 additions & 5 deletions apps/coordinator/src/checkpointer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ type CheckpointerOptions = {
chaosMonkey?: ChaosMonkey;
};

class CheckpointAbortError extends Error {}

async function getFileSize(filePath: string): Promise<number> {
try {
const stats = await fs.stat(filePath);
Expand Down Expand Up @@ -248,7 +250,12 @@ export class Checkpointer {
return false;
}

controller.abort("cancelCheckpointing()");
if (controller.signal.aborted) {
this.#logger.debug("Controller already aborted", { runId });
return false;
}

controller.abort("cancelCheckpoint()");
this.#abortControllers.delete(runId);

return true;
Expand Down Expand Up @@ -395,6 +402,14 @@ export class Checkpointer {
const controller = new AbortController();
this.#abortControllers.set(runId, controller);

const assertNotAborted = (abortMessage?: string) => {
if (controller.signal.aborted) {
throw new CheckpointAbortError(abortMessage);
}

this.#logger.debug("Not aborted", { abortMessage });
};

const $$ = $({ signal: controller.signal });

const shortCode = nanoid(8);
Expand All @@ -418,6 +433,7 @@ export class Checkpointer {
};

try {
assertNotAborted("chaosMonkey.call");
await this.chaosMonkey.call({ $: $$ });

this.#logger.log("Checkpointing:", { options });
Expand Down Expand Up @@ -474,11 +490,12 @@ export class Checkpointer {
return { success: false, reason: "SKIP_RETRYING" };
}

assertNotAborted("cmd: crictl ps");
const containerId = this.#logger.debug(
// @ts-expect-error
await $$`crictl ps`
.pipeStdout($$({ stdin: "pipe" })`grep ${containterName}`)
.pipeStdout($$({ stdin: "pipe" })`cut -f1 ${"-d "}`)
await $`crictl ps`
.pipeStdout($({ stdin: "pipe" })`grep ${containterName}`)
.pipeStdout($({ stdin: "pipe" })`cut -f1 ${"-d "}`)
);

if (!containerId.stdout) {
Expand All @@ -496,6 +513,7 @@ export class Checkpointer {
}

// Create checkpoint
assertNotAborted("cmd: crictl checkpoint");
this.#logger.debug(await $$`crictl checkpoint --export=${exportLocation} ${containerId}`);
const postCheckpoint = performance.now();

Expand All @@ -504,20 +522,25 @@ export class Checkpointer {
this.#logger.log("checkpoint archive created", { size, options });

// Create image from checkpoint
assertNotAborted("cmd: buildah from scratch");
const container = this.#logger.debug(await $$`buildah from scratch`);
const postFrom = performance.now();

assertNotAborted("cmd: buildah add");
this.#logger.debug(await $$`buildah add ${container} ${exportLocation} /`);
const postAdd = performance.now();

assertNotAborted("cmd: buildah config");
this.#logger.debug(
await $$`buildah config --annotation=io.kubernetes.cri-o.annotations.checkpoint.name=counter ${container}`
);
const postConfig = performance.now();

assertNotAborted("cmd: buildah commit");
this.#logger.debug(await $$`buildah commit ${container} ${imageRef}`);
const postCommit = performance.now();

assertNotAborted("cmd: buildah rm");
this.#logger.debug(await $$`buildah rm ${container}`);
const postRm = performance.now();

Expand All @@ -529,6 +552,7 @@ export class Checkpointer {
}

// Push checkpoint image
assertNotAborted("cmd: buildah push");
this.#logger.debug(
await $$`buildah push --tls-verify=${String(this.registryTlsVerify)} ${imageRef}`
);
Expand All @@ -554,9 +578,15 @@ export class Checkpointer {
},
};
} catch (error) {
if (error instanceof CheckpointAbortError) {
this.#logger.error("Checkpoint canceled: CheckpointAbortError", { options, error });

return { success: false, reason: "CANCELED" };
}

if (isExecaChildProcess(error)) {
if (error.isCanceled) {
this.#logger.error("Checkpoint canceled", { options, error });
this.#logger.error("Checkpoint canceled: ExecaChildProcess", { options, error });

return { success: false, reason: "CANCELED" };
}
Expand Down
Loading