fix(workflow-engines): race condition when retry interval is used (#11771)

This commit is contained in:
Adrien de Peretti
2025-03-12 13:53:34 +01:00
committed by GitHub
parent c97eaa0e0d
commit 72d2cf9207
24 changed files with 1130 additions and 235 deletions

View File

@@ -12,13 +12,9 @@ import {
Logger,
MedusaContainer,
} from "@medusajs/framework/types"
import { isString, TransactionState } from "@medusajs/framework/utils"
import {
InjectSharedContext,
isString,
MedusaContext,
TransactionState,
} from "@medusajs/framework/utils"
import {
FlowCancelOptions,
FlowRunOptions,
MedusaWorkflow,
resolveValue,
@@ -37,6 +33,11 @@ export type WorkflowOrchestratorRunOptions<T> = Omit<
container?: ContainerLike
}
export type WorkflowOrchestratorCancelOptions = Omit<
FlowCancelOptions,
"transaction"
>
type RegisterStepSuccessOptions<T> = Omit<
WorkflowOrchestratorRunOptions<T>,
"transactionId" | "input"
@@ -183,11 +184,9 @@ export class WorkflowOrchestratorService {
}
}
@InjectSharedContext()
async run<T = unknown>(
workflowIdOrWorkflow: string | ReturnWorkflow<any, any, any>,
options?: WorkflowOrchestratorRunOptions<T>,
@MedusaContext() sharedContext: Context = {}
options?: WorkflowOrchestratorRunOptions<T>
) {
const {
input,
@@ -239,6 +238,7 @@ export class WorkflowOrchestratorService {
const hasFinished = ret.transaction.hasFinished()
const metadata = ret.transaction.getFlow().metadata
const { parentStepIdempotencyKey } = metadata ?? {}
const hasFailed = [
TransactionState.REVERTED,
TransactionState.FAILED,
@@ -271,12 +271,115 @@ export class WorkflowOrchestratorService {
return { acknowledgement, ...ret }
}
@InjectSharedContext()
async cancel(
workflowIdOrWorkflow: string | ReturnWorkflow<any, any, any>,
options?: WorkflowOrchestratorCancelOptions
) {
const {
transactionId,
logOnError,
events: eventHandlers,
container,
} = options ?? {}
let { throwOnError, context } = options ?? {}
throwOnError ??= true
context ??= {}
const workflowId = isString(workflowIdOrWorkflow)
? workflowIdOrWorkflow
: workflowIdOrWorkflow.getName()
if (!workflowId) {
throw new Error("Workflow ID is required")
}
if (!transactionId) {
throw new Error("Transaction ID is required")
}
const events: FlowRunOptions["events"] = this.buildWorkflowEvents({
customEventHandlers: eventHandlers,
workflowId,
transactionId: transactionId,
})
const exportedWorkflow = MedusaWorkflow.getWorkflow(workflowId)
if (!exportedWorkflow) {
throw new Error(`Workflow with id "${workflowId}" not found.`)
}
const originalOnFinishHandler = events.onFinish!
delete events.onFinish
const transaction = await this.getRunningTransaction(
workflowId,
transactionId,
options
)
if (!transaction) {
if (!throwOnError) {
return {
acknowledgement: {
transactionId,
workflowId,
exists: false,
},
}
}
throw new Error("Transaction not found")
}
const ret = await exportedWorkflow.cancel({
transaction,
throwOnError: false,
logOnError,
context,
events,
container: container ?? this.container_,
})
const hasFinished = ret.transaction.hasFinished()
const metadata = ret.transaction.getFlow().metadata
const { parentStepIdempotencyKey } = metadata ?? {}
const hasFailed = [TransactionState.FAILED].includes(
ret.transaction.getFlow().state
)
const acknowledgement = {
transactionId: context.transactionId,
workflowId: workflowId,
parentStepIdempotencyKey,
hasFinished,
hasFailed,
exists: true,
}
if (hasFinished) {
const { result, errors } = ret
await originalOnFinishHandler({
transaction: ret.transaction,
result,
errors,
})
await this.triggerParentStep(ret.transaction, result)
}
if (throwOnError && ret.thrownError) {
throw ret.thrownError
}
return { acknowledgement, ...ret }
}
async getRunningTransaction(
workflowId: string,
transactionId: string,
options?: WorkflowOrchestratorRunOptions<undefined>,
@MedusaContext() sharedContext: Context = {}
options?: { context?: Context }
): Promise<DistributedTransactionType> {
let { context } = options ?? {}
@@ -289,7 +392,6 @@ export class WorkflowOrchestratorService {
}
context ??= {}
context.transactionId ??= transactionId
const exportedWorkflow: any = MedusaWorkflow.getWorkflow(workflowId)
if (!exportedWorkflow) {
@@ -304,19 +406,15 @@ export class WorkflowOrchestratorService {
return transaction
}
@InjectSharedContext()
async setStepSuccess<T = unknown>(
{
idempotencyKey,
stepResponse,
options,
}: {
idempotencyKey: string | IdempotencyKeyParts
stepResponse: unknown
options?: RegisterStepSuccessOptions<T>
},
@MedusaContext() sharedContext: Context = {}
) {
async setStepSuccess<T = unknown>({
idempotencyKey,
stepResponse,
options,
}: {
idempotencyKey: string | IdempotencyKeyParts
stepResponse: unknown
options?: RegisterStepSuccessOptions<T>
}) {
const {
context,
logOnError,
@@ -375,19 +473,15 @@ export class WorkflowOrchestratorService {
return ret
}
@InjectSharedContext()
async setStepFailure<T = unknown>(
{
idempotencyKey,
stepResponse,
options,
}: {
idempotencyKey: string | IdempotencyKeyParts
stepResponse: unknown
options?: RegisterStepSuccessOptions<T>
},
@MedusaContext() sharedContext: Context = {}
) {
async setStepFailure<T = unknown>({
idempotencyKey,
stepResponse,
options,
}: {
idempotencyKey: string | IdempotencyKeyParts
stepResponse: unknown
options?: RegisterStepSuccessOptions<T>
}) {
const {
context,
logOnError,
@@ -446,11 +540,12 @@ export class WorkflowOrchestratorService {
return ret
}
@InjectSharedContext()
subscribe(
{ workflowId, transactionId, subscriber, subscriberId }: SubscribeOptions,
@MedusaContext() sharedContext: Context = {}
) {
subscribe({
workflowId,
transactionId,
subscriber,
subscriberId,
}: SubscribeOptions) {
subscriber._id = subscriberId
const subscribers = this.subscribers.get(workflowId) ?? new Map()
@@ -487,11 +582,11 @@ export class WorkflowOrchestratorService {
this.subscribers.set(workflowId, subscribers)
}
@InjectSharedContext()
unsubscribe(
{ workflowId, transactionId, subscriberOrId }: UnsubscribeOptions,
@MedusaContext() sharedContext: Context = {}
) {
unsubscribe({
workflowId,
transactionId,
subscriberOrId,
}: UnsubscribeOptions) {
const subscribers = this.subscribers.get(workflowId) ?? new Map()
const filterSubscribers = (handlers: SubscriberHandler[]) => {

View File

@@ -75,7 +75,9 @@ export class WorkflowsModuleService<
await this.clearExpiredExecutions()
this.clearTimeout_ = setInterval(async () => {
await this.clearExpiredExecutions()
try {
await this.clearExpiredExecutions()
} catch {}
}, 1000 * 60 * 60)
},
}
@@ -90,11 +92,13 @@ export class WorkflowsModuleService<
> = {},
@MedusaContext() context: Context = {}
) {
options ??= {}
options.context ??= context
const ret = await this.workflowOrchestratorService_.run<
TWorkflow extends ReturnWorkflow<any, any, any>
? UnwrapWorkflowInputDataType<TWorkflow>
: unknown
>(workflowIdOrWorkflow, options, context)
>(workflowIdOrWorkflow, options)
return ret as any
}
@@ -108,7 +112,7 @@ export class WorkflowsModuleService<
return await this.workflowOrchestratorService_.getRunningTransaction(
workflowId,
transactionId,
context
{ context }
)
}
@@ -125,14 +129,14 @@ export class WorkflowsModuleService<
},
@MedusaContext() context: Context = {}
) {
return await this.workflowOrchestratorService_.setStepSuccess(
{
idempotencyKey,
stepResponse,
options,
} as any,
context
)
options ??= {}
options.context ??= context
return await this.workflowOrchestratorService_.setStepSuccess({
idempotencyKey,
stepResponse,
options,
} as any)
}
@InjectSharedContext()
@@ -148,14 +152,14 @@ export class WorkflowsModuleService<
},
@MedusaContext() context: Context = {}
) {
return await this.workflowOrchestratorService_.setStepFailure(
{
idempotencyKey,
stepResponse,
options,
} as any,
context
)
options ??= {}
options.context ??= context
return await this.workflowOrchestratorService_.setStepFailure({
idempotencyKey,
stepResponse,
options,
} as any)
}
@InjectSharedContext()
@@ -168,7 +172,7 @@ export class WorkflowsModuleService<
},
@MedusaContext() context: Context = {}
) {
return this.workflowOrchestratorService_.subscribe(args as any, context)
return this.workflowOrchestratorService_.subscribe(args as any)
}
@InjectSharedContext()
@@ -180,7 +184,7 @@ export class WorkflowsModuleService<
},
@MedusaContext() context: Context = {}
) {
return this.workflowOrchestratorService_.unsubscribe(args as any, context)
return this.workflowOrchestratorService_.unsubscribe(args as any)
}
private async clearExpiredExecutions() {

View File

@@ -4,15 +4,19 @@ import {
IDistributedSchedulerStorage,
IDistributedTransactionStorage,
SchedulerOptions,
SkipExecutionError,
TransactionCheckpoint,
TransactionFlow,
TransactionOptions,
TransactionStep,
} from "@medusajs/framework/orchestration"
import { Logger, ModulesSdkTypes } from "@medusajs/framework/types"
import {
isPresent,
MedusaError,
promiseAll,
TransactionState,
TransactionStepState,
} from "@medusajs/framework/utils"
import { WorkflowOrchestratorService } from "@services"
import { Queue, Worker } from "bullmq"
@@ -28,7 +32,6 @@ enum JobType {
export class RedisDistributedTransactionStorage
implements IDistributedTransactionStorage, IDistributedSchedulerStorage
{
private static TTL_AFTER_COMPLETED = 60 * 2 // 2 minutes
private workflowExecutionService_: ModulesSdkTypes.IMedusaInternalService<any>
private logger_: Logger
private workflowOrchestratorService_: WorkflowOrchestratorService
@@ -263,6 +266,12 @@ export class RedisDistributedTransactionStorage
const { retentionTime, idempotent } = options ?? {}
await this.#preventRaceConditionExecutionIfNecessary({
data,
key,
options,
})
if (hasFinished) {
Object.assign(data, {
retention_time: retentionTime,
@@ -286,12 +295,7 @@ export class RedisDistributedTransactionStorage
}
if (hasFinished) {
await this.redisClient.set(
key,
stringifiedData,
"EX",
RedisDistributedTransactionStorage.TTL_AFTER_COMPLETED
)
await this.redisClient.unlink(key)
}
}
@@ -457,4 +461,116 @@ export class RedisDistributedTransactionStorage
repeatableJobs.map((job) => this.jobQueue?.removeRepeatableByKey(job.key))
)
}
async #preventRaceConditionExecutionIfNecessary({
data,
key,
options,
}: {
data: TransactionCheckpoint
key: string
options?: TransactionOptions
}) {
let isInitialCheckpoint = false
if (data.flow.state === TransactionState.NOT_STARTED) {
isInitialCheckpoint = true
}
/**
* In case many execution can succeed simultaneously, we need to ensure that the latest
* execution does continue if a previous execution is considered finished
*/
const currentFlow = data.flow
const { flow: latestUpdatedFlow } =
(await this.get(key, options)) ??
({ flow: {} } as { flow: TransactionFlow })
if (!isInitialCheckpoint && !isPresent(latestUpdatedFlow)) {
/**
* the initial checkpoint expect no other checkpoint to have been stored.
* In case it is not the initial one and another checkpoint is trying to
* find if a concurrent execution has finished, we skip the execution.
* The already finished execution would have deleted the checkpoint already.
*/
throw new SkipExecutionError("Already finished by another execution")
}
const currentFlowLastInvokingStepIndex = Object.values(
currentFlow.steps
).findIndex((step) => {
return [
TransactionStepState.INVOKING,
TransactionStepState.NOT_STARTED,
].includes(step.invoke?.state)
})
const latestUpdatedFlowLastInvokingStepIndex = !latestUpdatedFlow.steps
? 1 // There is no other execution, so the current execution is the latest
: Object.values(
(latestUpdatedFlow.steps as Record<string, TransactionStep>) ?? {}
).findIndex((step) => {
return [
TransactionStepState.INVOKING,
TransactionStepState.NOT_STARTED,
].includes(step.invoke?.state)
})
const currentFlowLastCompensatingStepIndex = Object.values(
currentFlow.steps
)
.reverse()
.findIndex((step) => {
return [
TransactionStepState.COMPENSATING,
TransactionStepState.NOT_STARTED,
].includes(step.compensate?.state)
})
const latestUpdatedFlowLastCompensatingStepIndex = !latestUpdatedFlow.steps
? -1
: Object.values(
(latestUpdatedFlow.steps as Record<string, TransactionStep>) ?? {}
)
.reverse()
.findIndex((step) => {
return [
TransactionStepState.COMPENSATING,
TransactionStepState.NOT_STARTED,
].includes(step.compensate?.state)
})
const isLatestExecutionFinishedIndex = -1
const invokeShouldBeSkipped =
(latestUpdatedFlowLastInvokingStepIndex ===
isLatestExecutionFinishedIndex ||
currentFlowLastInvokingStepIndex <
latestUpdatedFlowLastInvokingStepIndex) &&
currentFlowLastInvokingStepIndex !== isLatestExecutionFinishedIndex
const compensateShouldBeSkipped =
currentFlowLastCompensatingStepIndex <
latestUpdatedFlowLastCompensatingStepIndex &&
currentFlowLastCompensatingStepIndex !== isLatestExecutionFinishedIndex &&
latestUpdatedFlowLastCompensatingStepIndex !==
isLatestExecutionFinishedIndex
if (
(data.flow.state !== TransactionState.COMPENSATING &&
invokeShouldBeSkipped) ||
(data.flow.state === TransactionState.COMPENSATING &&
compensateShouldBeSkipped) ||
(latestUpdatedFlow.state === TransactionState.COMPENSATING &&
![TransactionState.REVERTED, TransactionState.FAILED].includes(
currentFlow.state
) &&
currentFlow.state !== latestUpdatedFlow.state) ||
(latestUpdatedFlow.state === TransactionState.REVERTED &&
currentFlow.state !== TransactionState.REVERTED) ||
(latestUpdatedFlow.state === TransactionState.FAILED &&
currentFlow.state !== TransactionState.FAILED)
) {
throw new SkipExecutionError("Already finished by another execution")
}
}
}