chore(orchestration): add support for autoRetry, maxAwaitingRetries, retryStep (#13391)
RESOLVES CORE-1163 RESOLVES CORE-1164 **What** ### Add support for non auto retryable steps. When marking a step with `maxRetries`, when it will fail it will be marked as temporary failure and then retry itself automatically. Thats the default behaviour, if you now add `autoRetry: false`, when the step will fail it will be marked as temporary failure but not retry automatically. you can now call the workflow engine run to resume the workflow from the failing step to be retried. ### Add support for `maxAwaitingRetries` When setting `retyIntervalAwaiting` a step that is awaiting will be retried after the specified interval without maximun retry. Now you can set `maxAwaitingRetries` to force a maximum awaiting retry number ### Add support to manually retry an awaiting step In some scenario, either a machine dies while a step is executing or a step is taking longer than expected, you can now call `retryStep` on the workflow engine to force a retry of the step that is supposedly stucked
This commit is contained in:
committed by
GitHub
parent
ac5e23b96c
commit
d7692100e7
@@ -648,6 +648,116 @@ describe("Transaction Orchestrator", () => {
|
||||
)
|
||||
})
|
||||
|
||||
it("Should not retry steps X times automatically when flag 'autoRetry' is set to false and then compensate steps afterward", async () => {
|
||||
const mocks = {
|
||||
one: jest.fn().mockImplementation((payload) => {
|
||||
return payload
|
||||
}),
|
||||
compensateOne: jest.fn().mockImplementation((payload) => {
|
||||
return payload
|
||||
}),
|
||||
two: jest.fn().mockImplementation((payload) => {
|
||||
throw new Error()
|
||||
}),
|
||||
compensateTwo: jest.fn().mockImplementation((payload) => {
|
||||
return payload
|
||||
}),
|
||||
}
|
||||
|
||||
async function handler(
|
||||
actionId: string,
|
||||
functionHandlerType: TransactionHandlerType,
|
||||
payload: TransactionPayload
|
||||
) {
|
||||
const command = {
|
||||
firstMethod: {
|
||||
[TransactionHandlerType.INVOKE]: () => {
|
||||
mocks.one(payload)
|
||||
},
|
||||
[TransactionHandlerType.COMPENSATE]: () => {
|
||||
mocks.compensateOne(payload)
|
||||
},
|
||||
},
|
||||
secondMethod: {
|
||||
[TransactionHandlerType.INVOKE]: () => {
|
||||
mocks.two(payload)
|
||||
},
|
||||
[TransactionHandlerType.COMPENSATE]: () => {
|
||||
mocks.compensateTwo(payload)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return command[actionId][functionHandlerType](payload)
|
||||
}
|
||||
|
||||
const flow: TransactionStepsDefinition = {
|
||||
next: {
|
||||
action: "firstMethod",
|
||||
maxRetries: 3,
|
||||
autoRetry: false,
|
||||
next: {
|
||||
action: "secondMethod",
|
||||
maxRetries: 3,
|
||||
autoRetry: false,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
const strategy = new TransactionOrchestrator({
|
||||
id: "transaction-name",
|
||||
definition: flow,
|
||||
})
|
||||
|
||||
const transaction = await strategy.beginTransaction({
|
||||
transactionId: "transaction_id_123",
|
||||
handler,
|
||||
})
|
||||
|
||||
await strategy.resume(transaction)
|
||||
|
||||
expect(transaction.transactionId).toBe("transaction_id_123")
|
||||
|
||||
expect(mocks.one).toHaveBeenCalledTimes(1)
|
||||
expect(mocks.two).toHaveBeenCalledTimes(1)
|
||||
|
||||
await strategy.resume(transaction)
|
||||
|
||||
expect(mocks.one).toHaveBeenCalledTimes(1)
|
||||
expect(mocks.two).toHaveBeenCalledTimes(2)
|
||||
|
||||
await strategy.resume(transaction)
|
||||
|
||||
expect(mocks.one).toHaveBeenCalledTimes(1)
|
||||
expect(mocks.two).toHaveBeenCalledTimes(3)
|
||||
|
||||
await strategy.resume(transaction)
|
||||
|
||||
expect(mocks.one).toHaveBeenCalledTimes(1)
|
||||
expect(mocks.two).toHaveBeenCalledTimes(4)
|
||||
|
||||
expect(transaction.getState()).toBe(TransactionState.REVERTED)
|
||||
expect(mocks.compensateOne).toHaveBeenCalledTimes(1)
|
||||
|
||||
expect(mocks.two).nthCalledWith(
|
||||
1,
|
||||
expect.objectContaining({
|
||||
metadata: expect.objectContaining({
|
||||
attempt: 1,
|
||||
}),
|
||||
})
|
||||
)
|
||||
|
||||
expect(mocks.two).nthCalledWith(
|
||||
4,
|
||||
expect.objectContaining({
|
||||
metadata: expect.objectContaining({
|
||||
attempt: 4,
|
||||
}),
|
||||
})
|
||||
)
|
||||
})
|
||||
|
||||
it("Should fail a transaction if any step fails after retrying X time to compensate it", async () => {
|
||||
const mocks = {
|
||||
one: jest.fn().mockImplementation((payload) => {
|
||||
|
||||
@@ -99,6 +99,22 @@ export class SkipExecutionError extends Error {
|
||||
}
|
||||
}
|
||||
|
||||
export class SkipStepAlreadyFinishedError extends Error {
|
||||
static isSkipStepAlreadyFinishedError(
|
||||
error: Error
|
||||
): error is SkipStepAlreadyFinishedError {
|
||||
return (
|
||||
error instanceof SkipStepAlreadyFinishedError ||
|
||||
error?.name === "SkipStepAlreadyFinishedError"
|
||||
)
|
||||
}
|
||||
|
||||
constructor(message?: string) {
|
||||
super(message)
|
||||
this.name = "SkipStepAlreadyFinishedError"
|
||||
}
|
||||
}
|
||||
|
||||
export class SkipCancelledExecutionError extends Error {
|
||||
static isSkipCancelledExecutionError(
|
||||
error: Error
|
||||
|
||||
@@ -33,6 +33,7 @@ import {
|
||||
PermanentStepFailureError,
|
||||
SkipCancelledExecutionError,
|
||||
SkipExecutionError,
|
||||
SkipStepAlreadyFinishedError,
|
||||
SkipStepResponse,
|
||||
TransactionStepTimeoutError,
|
||||
TransactionTimeoutError,
|
||||
@@ -117,7 +118,8 @@ export class TransactionOrchestrator extends EventEmitter {
|
||||
private static isExpectedError(error: Error): boolean {
|
||||
return (
|
||||
SkipCancelledExecutionError.isSkipCancelledExecutionError(error) ||
|
||||
SkipExecutionError.isSkipExecutionError(error)
|
||||
SkipExecutionError.isSkipExecutionError(error) ||
|
||||
SkipStepAlreadyFinishedError.isSkipStepAlreadyFinishedError(error)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -415,10 +417,24 @@ export class TransactionOrchestrator extends EventEmitter {
|
||||
stepDef.definition.retryIntervalAwaiting!
|
||||
)
|
||||
}
|
||||
} else if (stepDef.retryRescheduledAt) {
|
||||
// The step is not configured for awaiting retry but is manually force to retry
|
||||
stepDef.retryRescheduledAt = null
|
||||
nextSteps.push(stepDef)
|
||||
}
|
||||
|
||||
continue
|
||||
} else if (curState.status === TransactionStepStatus.TEMPORARY_FAILURE) {
|
||||
if (
|
||||
!stepDef.temporaryFailedAt &&
|
||||
stepDef.definition.autoRetry === false
|
||||
) {
|
||||
stepDef.temporaryFailedAt = Date.now()
|
||||
continue
|
||||
}
|
||||
|
||||
stepDef.temporaryFailedAt = null
|
||||
|
||||
currentSteps.push(stepDef)
|
||||
|
||||
if (!stepDef.canRetry()) {
|
||||
@@ -565,6 +581,27 @@ export class TransactionOrchestrator extends EventEmitter {
|
||||
}
|
||||
}
|
||||
|
||||
private static async retryStep(
|
||||
transaction: DistributedTransactionType,
|
||||
step: TransactionStep
|
||||
): Promise<void> {
|
||||
if (!step.retryRescheduledAt) {
|
||||
step.hasScheduledRetry = true
|
||||
step.retryRescheduledAt = Date.now()
|
||||
}
|
||||
|
||||
transaction.getFlow().hasWaitingSteps = true
|
||||
|
||||
try {
|
||||
await transaction.saveCheckpoint()
|
||||
await transaction.scheduleRetry(step, 0)
|
||||
} catch (error) {
|
||||
if (!TransactionOrchestrator.isExpectedError(error)) {
|
||||
throw error
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static async skipStep({
|
||||
transaction,
|
||||
step,
|
||||
@@ -677,10 +714,13 @@ export class TransactionOrchestrator extends EventEmitter {
|
||||
stopExecution: boolean
|
||||
transactionIsCancelling?: boolean
|
||||
}> {
|
||||
const result = {
|
||||
stopExecution: false,
|
||||
transactionIsCancelling: false,
|
||||
}
|
||||
|
||||
if (SkipExecutionError.isSkipExecutionError(error)) {
|
||||
return {
|
||||
stopExecution: false,
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
step.failures++
|
||||
@@ -773,10 +813,21 @@ export class TransactionOrchestrator extends EventEmitter {
|
||||
if (step.hasTimeout()) {
|
||||
cleaningUp.push(transaction.clearStepTimeout(step))
|
||||
}
|
||||
} else {
|
||||
const isAsync = step.isCompensating()
|
||||
? step.definition.compensateAsync
|
||||
: step.definition.async
|
||||
|
||||
if (
|
||||
step.getStates().status === TransactionStepStatus.TEMPORARY_FAILURE &&
|
||||
step.definition.autoRetry === false &&
|
||||
isAsync
|
||||
) {
|
||||
step.temporaryFailedAt = Date.now()
|
||||
result.stopExecution = true
|
||||
}
|
||||
}
|
||||
|
||||
let transactionIsCancelling = false
|
||||
let shouldEmit = true
|
||||
try {
|
||||
await transaction.saveCheckpoint()
|
||||
} catch (error) {
|
||||
@@ -784,11 +835,11 @@ export class TransactionOrchestrator extends EventEmitter {
|
||||
throw error
|
||||
}
|
||||
|
||||
transactionIsCancelling =
|
||||
result.transactionIsCancelling =
|
||||
SkipCancelledExecutionError.isSkipCancelledExecutionError(error)
|
||||
|
||||
if (SkipExecutionError.isSkipExecutionError(error)) {
|
||||
shouldEmit = false
|
||||
result.stopExecution = true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -798,7 +849,7 @@ export class TransactionOrchestrator extends EventEmitter {
|
||||
|
||||
await promiseAll(cleaningUp)
|
||||
|
||||
if (shouldEmit) {
|
||||
if (!result.stopExecution) {
|
||||
const eventName = step.isCompensating()
|
||||
? DistributedTransactionEvent.COMPENSATE_STEP_FAILURE
|
||||
: DistributedTransactionEvent.STEP_FAILURE
|
||||
@@ -806,8 +857,8 @@ export class TransactionOrchestrator extends EventEmitter {
|
||||
}
|
||||
|
||||
return {
|
||||
stopExecution: !shouldEmit,
|
||||
transactionIsCancelling,
|
||||
stopExecution: result.stopExecution,
|
||||
transactionIsCancelling: result.transactionIsCancelling,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1732,6 +1783,50 @@ export class TransactionOrchestrator extends EventEmitter {
|
||||
return curTransaction
|
||||
}
|
||||
|
||||
/**
|
||||
* Manually force a step to retry even if it is still in awaiting status
|
||||
* @param responseIdempotencyKey - The idempotency key for the step
|
||||
* @param handler - The handler function to execute the step
|
||||
* @param transaction - The current transaction. If not provided it will be loaded based on the responseIdempotencyKey
|
||||
*/
|
||||
public async retryStep({
|
||||
responseIdempotencyKey,
|
||||
handler,
|
||||
transaction,
|
||||
onLoad,
|
||||
}: {
|
||||
responseIdempotencyKey: string
|
||||
handler?: TransactionStepHandler
|
||||
transaction?: DistributedTransactionType
|
||||
onLoad?: (transaction: DistributedTransactionType) => Promise<void> | void
|
||||
}): Promise<DistributedTransactionType> {
|
||||
const [curTransaction, step] =
|
||||
await TransactionOrchestrator.getTransactionAndStepFromIdempotencyKey(
|
||||
responseIdempotencyKey,
|
||||
handler,
|
||||
transaction
|
||||
)
|
||||
|
||||
if (onLoad) {
|
||||
await onLoad(curTransaction)
|
||||
}
|
||||
|
||||
if (step.getStates().status === TransactionStepStatus.WAITING) {
|
||||
this.emit(DistributedTransactionEvent.RESUME, {
|
||||
transaction: curTransaction,
|
||||
})
|
||||
|
||||
await TransactionOrchestrator.retryStep(curTransaction, step)
|
||||
} else {
|
||||
throw new MedusaError(
|
||||
MedusaError.Types.NOT_ALLOWED,
|
||||
`Cannot retry step when status is ${step.getStates().status}`
|
||||
)
|
||||
}
|
||||
|
||||
return curTransaction
|
||||
}
|
||||
|
||||
/** Register a step success for a specific transaction and step
|
||||
* @param responseIdempotencyKey - The idempotency key for the step
|
||||
* @param handler - The handler function to execute the step
|
||||
|
||||
@@ -54,6 +54,7 @@ export class TransactionStep {
|
||||
}
|
||||
attempts: number
|
||||
failures: number
|
||||
temporaryFailedAt: number | null
|
||||
lastAttempt: number | null
|
||||
retryRescheduledAt: number | null
|
||||
hasScheduledRetry: boolean
|
||||
@@ -189,7 +190,9 @@ export class TransactionStep {
|
||||
this.hasAwaitingRetry() &&
|
||||
this.lastAttempt &&
|
||||
Date.now() - this.lastAttempt >
|
||||
this.definition.retryIntervalAwaiting! * 1e3
|
||||
this.definition.retryIntervalAwaiting! * 1e3 &&
|
||||
(!("maxAwaitingRetries" in this.definition) ||
|
||||
this.attempts < this.definition.maxAwaitingRetries!)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -199,7 +202,8 @@ export class TransactionStep {
|
||||
(!this.isCompensating() &&
|
||||
state === TransactionStepState.NOT_STARTED &&
|
||||
flowState === TransactionState.INVOKING) ||
|
||||
status === TransactionStepStatus.TEMPORARY_FAILURE
|
||||
(status === TransactionStepStatus.TEMPORARY_FAILURE &&
|
||||
!this.temporaryFailedAt)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -47,6 +47,12 @@ export type TransactionStepsDefinition = {
|
||||
*/
|
||||
maxRetries?: number
|
||||
|
||||
/**
|
||||
* If true, the step will be retried automatically in case of a temporary failure.
|
||||
* The default is true.
|
||||
*/
|
||||
autoRetry?: boolean
|
||||
|
||||
/**
|
||||
* The interval (in seconds) between retry attempts after a temporary failure.
|
||||
* The default is to retry immediately.
|
||||
@@ -58,6 +64,11 @@ export type TransactionStepsDefinition = {
|
||||
*/
|
||||
retryIntervalAwaiting?: number
|
||||
|
||||
/**
|
||||
* The maximum number of times to retry a step even if its status is "TransactionStepStatus.WAITING".
|
||||
*/
|
||||
maxAwaitingRetries?: number
|
||||
|
||||
/**
|
||||
* The maximum amount of time (in seconds) to wait for this step to complete.
|
||||
* This is NOT an execution timeout, the step will always be executed and wait for its response.
|
||||
@@ -279,6 +290,7 @@ export type TransactionFlow = {
|
||||
timedOutAt: number | null
|
||||
startedAt?: number
|
||||
cancelledAt?: number
|
||||
temporaryFailedAt?: number | null
|
||||
state: TransactionState
|
||||
steps: {
|
||||
[key: string]: TransactionStep
|
||||
|
||||
@@ -435,6 +435,33 @@ export class LocalWorkflow {
|
||||
}
|
||||
}
|
||||
|
||||
async retryStep(
|
||||
idempotencyKey: string,
|
||||
context?: Context,
|
||||
subscribe?: DistributedTransactionEvents
|
||||
): Promise<DistributedTransactionType> {
|
||||
this.medusaContext = context
|
||||
const { handler, orchestrator } = this.workflow
|
||||
|
||||
const { cleanUpEventListeners } = this.registerEventCallbacks({
|
||||
orchestrator,
|
||||
idempotencyKey,
|
||||
subscribe,
|
||||
})
|
||||
|
||||
const transaction = await orchestrator.retryStep({
|
||||
responseIdempotencyKey: idempotencyKey,
|
||||
handler: handler(this.container_, context),
|
||||
onLoad: this.onLoad.bind(this),
|
||||
})
|
||||
|
||||
try {
|
||||
return transaction
|
||||
} finally {
|
||||
cleanUpEventListeners()
|
||||
}
|
||||
}
|
||||
|
||||
async registerStepSuccess(
|
||||
idempotencyKey: string,
|
||||
response?: unknown,
|
||||
|
||||
Reference in New Issue
Block a user