chore(orchestration): add support for autoRetry, maxAwaitingRetries, retryStep (#13391)

RESOLVES CORE-1163
RESOLVES CORE-1164

**What**

### Add support for non auto retryable steps.

When marking a step with `maxRetries`, when it will fail it will be marked as temporary failure and then retry itself automatically. Thats the default behaviour, if you now add `autoRetry: false`, when the step will fail it will be marked as temporary failure but not retry automatically. you can now call the workflow engine run to resume the workflow from the failing step to be retried.

### Add support for `maxAwaitingRetries`

When setting `retyIntervalAwaiting` a step that is awaiting will be retried after the specified interval without maximun retry. Now you can set `maxAwaitingRetries` to force a maximum awaiting retry number

### Add support to manually retry an awaiting step

In some scenario, either a machine dies while a step is executing or a step is taking longer than expected, you can now call `retryStep` on the workflow engine to force a retry of the step that is supposedly stucked
This commit is contained in:
Adrien de Peretti
2025-09-08 14:46:30 +02:00
committed by GitHub
parent ac5e23b96c
commit d7692100e7
28 changed files with 1366 additions and 64 deletions

View File

@@ -25,6 +25,10 @@ export type FlowRegisterStepSuccessOptions<TData = unknown> =
response?: TData
}
export type FlowRetryStepOptions = Omit<BaseFlowRunOptions, "resultFrom"> & {
idempotencyKey: string
}
export type FlowRegisterStepFailureOptions<TData = unknown> =
BaseFlowRunOptions & {
idempotencyKey: string
@@ -93,6 +97,7 @@ export type ExportedWorkflow<
TResultOverride extends undefined ? TResult : TResultOverride
>
>
retryStep: (args?: FlowRetryStepOptions) => Promise<WorkflowResult>
cancel: (args?: FlowCancelOptions) => Promise<WorkflowResult>
}

View File

@@ -28,6 +28,7 @@ import {
FlowCancelOptions,
FlowRegisterStepFailureOptions,
FlowRegisterStepSuccessOptions,
FlowRetryStepOptions,
FlowRunOptions,
MainExportedWorkflow,
WorkflowResult,
@@ -53,7 +54,7 @@ function createContextualWorkflowRunner<
container?: LoadedModule[] | MedusaContainer
}): Omit<
LocalWorkflow,
"run" | "registerStepSuccess" | "registerStepFailure" | "cancel"
"run" | "registerStepSuccess" | "registerStepFailure" | "cancel" | "retryStep"
> &
ExportedWorkflow<TData, TResult, TDataOverride, TResultOverride> {
const flow = new LocalWorkflow(workflowId, container!)
@@ -62,6 +63,7 @@ function createContextualWorkflowRunner<
const originalRegisterStepSuccess = flow.registerStepSuccess.bind(flow)
const originalRegisterStepFailure = flow.registerStepFailure.bind(flow)
const originalCancel = flow.cancel.bind(flow)
const originalRetryStep = flow.retryStep.bind(flow)
const originalExecution = async (
method,
@@ -310,6 +312,46 @@ function createContextualWorkflowRunner<
}
flow.registerStepFailure = newRegisterStepFailure as any
const newRetryStep = async (
{
idempotencyKey,
context: outerContext,
throwOnError,
logOnError,
events,
container,
}: FlowRetryStepOptions = {
idempotencyKey: "",
}
) => {
idempotencyKey ??= ""
throwOnError ??= true
logOnError ??= false
const [, transactionId] = idempotencyKey.split(":")
const context = {
...outerContext,
transactionId,
__type: MedusaContextType as Context["__type"],
}
context.eventGroupId ??= ulid()
return await originalExecution(
originalRetryStep,
{
throwOnError,
container,
logOnError,
},
idempotencyKey,
undefined,
context,
events
)
}
flow.retryStep = newRetryStep as any
const newCancel = async ({
transaction,
transactionId,
@@ -367,7 +409,11 @@ export const exportWorkflow = <TData = unknown, TResult = unknown>(
container?: LoadedModule[] | MedusaContainer
): Omit<
LocalWorkflow,
"run" | "registerStepSuccess" | "registerStepFailure" | "cancel"
| "run"
| "registerStepSuccess"
| "registerStepFailure"
| "cancel"
| "retryStep"
> &
ExportedWorkflow<TData, TResult, TDataOverride, TResultOverride> {
return createContextualWorkflowRunner<
@@ -388,11 +434,17 @@ export const exportWorkflow = <TData = unknown, TResult = unknown>(
| "run"
| "registerStepSuccess"
| "registerStepFailure"
| "cancel",
| "cancel"
| "retryStep",
TDataOverride,
TResultOverride
>(
action: "run" | "registerStepSuccess" | "registerStepFailure" | "cancel",
action:
| "run"
| "registerStepSuccess"
| "registerStepFailure"
| "cancel"
| "retryStep",
container?: LoadedModule[] | MedusaContainer
) => {
const contextualRunner = createContextualWorkflowRunner<
@@ -495,6 +547,22 @@ export const exportWorkflow = <TData = unknown, TResult = unknown>(
)(inputArgs)
}
exportedWorkflow.retryStep = async <
TDataOverride = undefined,
TResultOverride = undefined
>(
args?: FlowRetryStepOptions
): Promise<WorkflowResult> => {
const container = args?.container
delete args?.container
const inputArgs = { ...args } as FlowRetryStepOptions
return await buildRunnerFn<"retryStep", TDataOverride, TResultOverride>(
"retryStep",
container
)(inputArgs)
}
exportedWorkflow.cancel = async (
args?: FlowCancelOptions
): Promise<WorkflowResult> => {

View File

@@ -268,7 +268,11 @@ export type ReturnWorkflow<TData, TResult, THooks extends any[]> = {
container?: LoadedModule[] | MedusaContainer
): Omit<
LocalWorkflow,
"run" | "registerStepSuccess" | "registerStepFailure" | "cancel"
| "run"
| "registerStepSuccess"
| "registerStepFailure"
| "cancel"
| "retryStep"
> &
ExportedWorkflow<TData, TResult, TDataOverride, TResultOverride>
} & {