fix(workflow-engines): race condition when retry interval is used (#11771)

This commit is contained in:
Adrien de Peretti
2025-03-12 13:53:34 +01:00
committed by GitHub
parent c97eaa0e0d
commit 72d2cf9207
24 changed files with 1130 additions and 235 deletions

View File

@@ -3,12 +3,19 @@ import {
IDistributedSchedulerStorage,
IDistributedTransactionStorage,
SchedulerOptions,
SkipExecutionError,
TransactionCheckpoint,
TransactionFlow,
TransactionOptions,
TransactionStep,
} from "@medusajs/framework/orchestration"
import { Logger, ModulesSdkTypes } from "@medusajs/framework/types"
import { MedusaError, TransactionState } from "@medusajs/framework/utils"
import {
MedusaError,
TransactionState,
TransactionStepState,
isPresent,
} from "@medusajs/framework/utils"
import { WorkflowOrchestratorService } from "@services"
import { CronExpression, parseExpression } from "cron-parser"
@@ -121,8 +128,6 @@ export class InMemoryDistributedTransactionStorage
ttl?: number,
options?: TransactionOptions
): Promise<void> {
this.storage.set(key, data)
/**
* Store the retention time only if the transaction is done, failed or reverted.
* From that moment, this tuple can be later on archived or deleted after the retention time.
@@ -135,11 +140,16 @@ export class InMemoryDistributedTransactionStorage
const { retentionTime, idempotent } = options ?? {}
if (hasFinished) {
Object.assign(data, {
retention_time: retentionTime,
})
}
await this.#preventRaceConditionExecutionIfNecessary({
data,
key,
options,
})
Object.assign(data, {
retention_time: retentionTime,
})
this.storage.set(key, data)
if (hasFinished && !retentionTime && !idempotent) {
await this.deleteFromDb(data)
@@ -152,6 +162,118 @@ export class InMemoryDistributedTransactionStorage
}
}
async #preventRaceConditionExecutionIfNecessary({
data,
key,
options,
}: {
data: TransactionCheckpoint
key: string
options?: TransactionOptions
}) {
let isInitialCheckpoint = false
if (data.flow.state === TransactionState.NOT_STARTED) {
isInitialCheckpoint = true
}
/**
* In case many execution can succeed simultaneously, we need to ensure that the latest
* execution does continue if a previous execution is considered finished
*/
const currentFlow = data.flow
const { flow: latestUpdatedFlow } =
(await this.get(key, options)) ??
({ flow: {} } as { flow: TransactionFlow })
if (!isInitialCheckpoint && !isPresent(latestUpdatedFlow)) {
/**
* the initial checkpoint expect no other checkpoint to have been stored.
* In case it is not the initial one and another checkpoint is trying to
* find if a concurrent execution has finished, we skip the execution.
* The already finished execution would have deleted the checkpoint already.
*/
throw new SkipExecutionError("Already finished by another execution")
}
const currentFlowLastInvokingStepIndex = Object.values(
currentFlow.steps
).findIndex((step) => {
return [
TransactionStepState.INVOKING,
TransactionStepState.NOT_STARTED,
].includes(step.invoke?.state)
})
const latestUpdatedFlowLastInvokingStepIndex = !latestUpdatedFlow.steps
? 1 // There is no other execution, so the current execution is the latest
: Object.values(
(latestUpdatedFlow.steps as Record<string, TransactionStep>) ?? {}
).findIndex((step) => {
return [
TransactionStepState.INVOKING,
TransactionStepState.NOT_STARTED,
].includes(step.invoke?.state)
})
const currentFlowLastCompensatingStepIndex = Object.values(
currentFlow.steps
)
.reverse()
.findIndex((step) => {
return [
TransactionStepState.COMPENSATING,
TransactionStepState.NOT_STARTED,
].includes(step.compensate?.state)
})
const latestUpdatedFlowLastCompensatingStepIndex = !latestUpdatedFlow.steps
? -1 // There is no other execution, so the current execution is the latest
: Object.values(
(latestUpdatedFlow.steps as Record<string, TransactionStep>) ?? {}
)
.reverse()
.findIndex((step) => {
return [
TransactionStepState.COMPENSATING,
TransactionStepState.NOT_STARTED,
].includes(step.compensate?.state)
})
const isLatestExecutionFinishedIndex = -1
const invokeShouldBeSkipped =
(latestUpdatedFlowLastInvokingStepIndex ===
isLatestExecutionFinishedIndex ||
currentFlowLastInvokingStepIndex <
latestUpdatedFlowLastInvokingStepIndex) &&
currentFlowLastInvokingStepIndex !== isLatestExecutionFinishedIndex
const compensateShouldBeSkipped =
currentFlowLastCompensatingStepIndex <
latestUpdatedFlowLastCompensatingStepIndex &&
currentFlowLastCompensatingStepIndex !== isLatestExecutionFinishedIndex &&
latestUpdatedFlowLastCompensatingStepIndex !==
isLatestExecutionFinishedIndex
if (
(data.flow.state !== TransactionState.COMPENSATING &&
invokeShouldBeSkipped) ||
(data.flow.state === TransactionState.COMPENSATING &&
compensateShouldBeSkipped) ||
(latestUpdatedFlow.state === TransactionState.COMPENSATING &&
![TransactionState.REVERTED, TransactionState.FAILED].includes(
currentFlow.state
) &&
currentFlow.state !== latestUpdatedFlow.state) ||
(latestUpdatedFlow.state === TransactionState.REVERTED &&
currentFlow.state !== TransactionState.REVERTED) ||
(latestUpdatedFlow.state === TransactionState.FAILED &&
currentFlow.state !== TransactionState.FAILED)
) {
throw new SkipExecutionError("Already finished by another execution")
}
}
async scheduleRetry(
transaction: DistributedTransactionType,
step: TransactionStep,