Skip to content

Commit 1a88809

Browse files
authored
fix(runner): prevent retry immediately race condition which can cause stuck runs that end up being system failures (#2402)
1 parent 3d17ce5 commit 1a88809

File tree

3 files changed

+78
-1
lines changed

3 files changed

+78
-1
lines changed

.changeset/loud-rules-dream.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"trigger.dev": patch
3+
---
4+
5+
fix(runner): prevent retry immediately race condition which can cause stuck runs that end up being system failures

packages/cli-v3/src/entryPoints/managed/execution.ts

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ export class RunExecution {
7777
private shutdownReason?: string;
7878

7979
private isCompletingRun = false;
80+
private ignoreSnapshotChanges = false;
8081

8182
private supervisorSocket: SupervisorSocket;
8283
private notifier?: RunNotifier;
@@ -237,6 +238,16 @@ export class RunExecution {
237238
completedWaitpoints: completedWaitpoints.length,
238239
};
239240

241+
if (this.ignoreSnapshotChanges) {
242+
this.sendDebugLog("processSnapshotChange: ignoring snapshot change", {
243+
incomingSnapshotId: snapshot.friendlyId,
244+
completedWaitpoints: completedWaitpoints.length,
245+
currentAttemptNumber: this.currentAttemptNumber,
246+
newAttemptNumber: run.attemptNumber,
247+
});
248+
return;
249+
}
250+
240251
if (!this.snapshotManager) {
241252
this.sendDebugLog("handleSnapshotChange: missing snapshot manager", snapshotMetadata);
242253
return;
@@ -808,7 +819,9 @@ export class RunExecution {
808819
}
809820

810821
// Start and execute next attempt
811-
const [startError, start] = await tryCatch(this.startAttempt({ isWarmStart: true }));
822+
const [startError, start] = await tryCatch(
823+
this.enableIgnoreSnapshotChanges(() => this.startAttempt({ isWarmStart: true }))
824+
);
812825

813826
if (startError) {
814827
this.sendDebugLog("failed to start attempt for retry", { error: startError.message });
@@ -829,6 +842,15 @@ export class RunExecution {
829842
}
830843
}
831844

845+
private async enableIgnoreSnapshotChanges<T>(fn: () => Promise<T>): Promise<T> {
846+
this.ignoreSnapshotChanges = true;
847+
try {
848+
return await fn();
849+
} finally {
850+
this.ignoreSnapshotChanges = false;
851+
}
852+
}
853+
832854
/**
833855
* Restores a suspended execution from PENDING_EXECUTING
834856
*/
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import { task } from "@trigger.dev/sdk";
2+
import { setTimeout } from "timers/promises";
3+
4+
export const attemptFailures = task({
5+
id: "attempt-failures",
6+
retry: {
7+
maxAttempts: 3,
8+
minTimeoutInMs: 500,
9+
maxTimeoutInMs: 1000,
10+
factor: 1.5,
11+
},
12+
run: async (payload: any, { ctx }) => {
13+
await setTimeout(5);
14+
15+
await attemptFailureSubtask.triggerAndWait({}).unwrap();
16+
},
17+
});
18+
19+
export const attemptFailureSubtask = task({
20+
id: "attempt-failure-subtask",
21+
retry: {
22+
maxAttempts: 1,
23+
},
24+
run: async (payload: any, { ctx }) => {
25+
await setTimeout(20_000);
26+
27+
throw new Error("Forced error to cause a retry");
28+
},
29+
});
30+
31+
export const attemptFailures2 = task({
32+
id: "attempt-failures-2",
33+
retry: {
34+
maxAttempts: 3,
35+
minTimeoutInMs: 500,
36+
maxTimeoutInMs: 1000,
37+
factor: 1.5,
38+
},
39+
run: async (payload: any, { ctx }) => {
40+
if (ctx.attempt.number <= 2) {
41+
throw new Error("Forced error to cause a retry");
42+
}
43+
44+
await setTimeout(10_000);
45+
46+
return {
47+
success: true,
48+
};
49+
},
50+
});

0 commit comments

Comments
 (0)