fix: workflow async concurrency (#13769)
* executeAsync * || 1 * wip * stepId * stepId * wip * wip * continue versioning management changes * fix and improve concurrency * update in memory engine * remove duplicated test * fix script * Create weak-drinks-confess.md * fixes * fix * fix * continuation * centralize merge checkepoint * centralize merge checkpoint * fix locking * rm only * Continue improvements and fixes * fixes * fixes * hasAwaiting will be recomputed * fix orchestrator engine * bump version on async parallel steps only * mark as delivered fix * changeset * check partitions * avoid saving when having parent step * cart test --------- Co-authored-by: Carlos R. L. Rodrigues <rodrigolr@gmail.com> Co-authored-by: Carlos R. L. Rodrigues <37986729+carlos-r-l-rodrigues@users.noreply.github.com> Co-authored-by: Oli Juhl <59018053+olivermrbl@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
d97a60d3c1
commit
516f5a3896
+187
-256
@@ -36,24 +36,22 @@ enum JobType {
|
||||
const THIRTY_MINUTES_IN_MS = 1000 * 60 * 30
|
||||
const REPEATABLE_CLEARER_JOB_ID = "clear-expired-executions"
|
||||
|
||||
const invokingStatesSet = new Set([
|
||||
TransactionStepState.INVOKING,
|
||||
TransactionStepState.NOT_STARTED,
|
||||
])
|
||||
const doneStates = [
|
||||
TransactionStepState.DONE,
|
||||
TransactionStepState.REVERTED,
|
||||
TransactionStepState.FAILED,
|
||||
TransactionStepState.SKIPPED,
|
||||
TransactionStepState.SKIPPED_FAILURE,
|
||||
TransactionStepState.TIMEOUT,
|
||||
]
|
||||
|
||||
const compensatingStatesSet = new Set([
|
||||
TransactionStepState.COMPENSATING,
|
||||
TransactionStepState.NOT_STARTED,
|
||||
])
|
||||
|
||||
function isInvokingState(step: TransactionStep) {
|
||||
return invokingStatesSet.has(step.invoke?.state)
|
||||
}
|
||||
|
||||
function isCompensatingState(step: TransactionStep) {
|
||||
return compensatingStatesSet.has(step.compensate?.state)
|
||||
}
|
||||
const finishedStates = [
|
||||
TransactionState.DONE,
|
||||
TransactionState.FAILED,
|
||||
TransactionState.REVERTED,
|
||||
]
|
||||
|
||||
const failedStates = [TransactionState.FAILED, TransactionState.REVERTED]
|
||||
export class RedisDistributedTransactionStorage
|
||||
implements IDistributedTransactionStorage, IDistributedSchedulerStorage
|
||||
{
|
||||
@@ -152,11 +150,17 @@ export class RedisDistributedTransactionStorage
|
||||
} with the following data: ${JSON.stringify(job.data)}`
|
||||
)
|
||||
if (allowedJobs.includes(job.name as JobType)) {
|
||||
await this.executeTransaction(
|
||||
job.data.workflowId,
|
||||
job.data.transactionId,
|
||||
job.data.transactionMetadata
|
||||
)
|
||||
try {
|
||||
await this.executeTransaction(
|
||||
job.data.workflowId,
|
||||
job.data.transactionId,
|
||||
job.data.transactionMetadata
|
||||
)
|
||||
} catch (error) {
|
||||
if (!SkipExecutionError.isSkipExecutionError(error)) {
|
||||
throw error
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (job.name === JobType.SCHEDULE) {
|
||||
@@ -273,29 +277,12 @@ export class RedisDistributedTransactionStorage
|
||||
|
||||
private async saveToDb(data: TransactionCheckpoint, retentionTime?: number) {
|
||||
const isNotStarted = data.flow.state === TransactionState.NOT_STARTED
|
||||
const isFinished = [
|
||||
TransactionState.DONE,
|
||||
TransactionState.FAILED,
|
||||
TransactionState.REVERTED,
|
||||
].includes(data.flow.state)
|
||||
const asyncVersion = data.flow._v
|
||||
|
||||
const isFinished = finishedStates.includes(data.flow.state)
|
||||
const isWaitingToCompensate =
|
||||
data.flow.state === TransactionState.WAITING_TO_COMPENSATE
|
||||
|
||||
/**
|
||||
* Bit of explanation:
|
||||
*
|
||||
* When a workflow run, it run all sync step in memory until it reaches a async step.
|
||||
* In that case, it might handover to another process to continue the execution. Thats why
|
||||
* we need to save the current state of the flow. Then from there, it will run again all
|
||||
* sync steps until the next async step. an so on so forth.
|
||||
*
|
||||
* To summarize, we only trully need to save the data when we are reaching any steps that
|
||||
* trigger a handover to a potential other process.
|
||||
*
|
||||
* This allows us to spare some resources and time by not over communicating with the external
|
||||
* database when it is not really needed
|
||||
*/
|
||||
|
||||
const isFlowInvoking = data.flow.state === TransactionState.INVOKING
|
||||
|
||||
const stepsArray = Object.values(data.flow.steps) as TransactionStep[]
|
||||
@@ -334,7 +321,8 @@ export class RedisDistributedTransactionStorage
|
||||
|
||||
if (
|
||||
!(isNotStarted || isFinished || isWaitingToCompensate) &&
|
||||
!currentStepsIsAsync
|
||||
!currentStepsIsAsync &&
|
||||
!asyncVersion
|
||||
) {
|
||||
return
|
||||
}
|
||||
@@ -440,10 +428,7 @@ export class RedisDistributedTransactionStorage
|
||||
const execution = trx.execution as TransactionFlow
|
||||
|
||||
if (!idempotent) {
|
||||
const isFailedOrReverted = [
|
||||
TransactionState.REVERTED,
|
||||
TransactionState.FAILED,
|
||||
].includes(execution.state)
|
||||
const isFailedOrReverted = failedStates.includes(execution.state)
|
||||
|
||||
const isDone = execution.state === TransactionState.DONE
|
||||
|
||||
@@ -461,11 +446,11 @@ export class RedisDistributedTransactionStorage
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
flow: flow ?? (trx.execution as TransactionFlow),
|
||||
context: trx.context?.data as TransactionContext,
|
||||
errors: errors ?? (trx.context?.errors as TransactionStepError[]),
|
||||
}
|
||||
return new TransactionCheckpoint(
|
||||
flow ?? (trx.execution as TransactionFlow),
|
||||
trx.context?.data as TransactionContext,
|
||||
errors ?? (trx.context?.errors as TransactionStepError[])
|
||||
)
|
||||
}
|
||||
|
||||
return
|
||||
@@ -476,94 +461,120 @@ export class RedisDistributedTransactionStorage
|
||||
data: TransactionCheckpoint,
|
||||
ttl?: number,
|
||||
options?: TransactionOptions
|
||||
): Promise<void> {
|
||||
): Promise<TransactionCheckpoint> {
|
||||
/**
|
||||
* Store the retention time only if the transaction is done, failed or reverted.
|
||||
* From that moment, this tuple can be later on archived or deleted after the retention time.
|
||||
*/
|
||||
const hasFinished = [
|
||||
TransactionState.DONE,
|
||||
TransactionState.FAILED,
|
||||
TransactionState.REVERTED,
|
||||
].includes(data.flow.state)
|
||||
|
||||
const { retentionTime } = options ?? {}
|
||||
|
||||
await this.#preventRaceConditionExecutionIfNecessary({
|
||||
data,
|
||||
key,
|
||||
options,
|
||||
})
|
||||
let lockAcquired = false
|
||||
|
||||
if (hasFinished && retentionTime) {
|
||||
Object.assign(data, {
|
||||
retention_time: retentionTime,
|
||||
})
|
||||
}
|
||||
if (data.flow._v) {
|
||||
lockAcquired = await this.#acquireLock(key)
|
||||
|
||||
// Only set if not exists
|
||||
const shouldSetNX =
|
||||
data.flow.state === TransactionState.NOT_STARTED &&
|
||||
!data.flow.transactionId.startsWith("auto-")
|
||||
|
||||
// Prepare operations to be executed in batch or pipeline
|
||||
const data_ = {
|
||||
errors: data.errors,
|
||||
flow: data.flow,
|
||||
}
|
||||
const stringifiedData = JSON.stringify(data_)
|
||||
const pipeline = this.redisClient.pipeline()
|
||||
|
||||
// Execute Redis operations
|
||||
if (!hasFinished) {
|
||||
if (ttl) {
|
||||
if (shouldSetNX) {
|
||||
pipeline.set(key, stringifiedData, "EX", ttl, "NX")
|
||||
} else {
|
||||
pipeline.set(key, stringifiedData, "EX", ttl)
|
||||
}
|
||||
} else {
|
||||
if (shouldSetNX) {
|
||||
pipeline.set(key, stringifiedData, "NX")
|
||||
} else {
|
||||
pipeline.set(key, stringifiedData)
|
||||
}
|
||||
if (!lockAcquired) {
|
||||
throw new Error("Lock not acquired")
|
||||
}
|
||||
} else {
|
||||
pipeline.unlink(key)
|
||||
|
||||
const storedData = await this.get(key, {
|
||||
isCancelling: !!data.flow.cancelledAt,
|
||||
} as any)
|
||||
|
||||
TransactionCheckpoint.mergeCheckpoints(data, storedData)
|
||||
}
|
||||
|
||||
const execPipeline = () => {
|
||||
return pipeline.exec().then((result) => {
|
||||
if (!shouldSetNX) {
|
||||
return result
|
||||
}
|
||||
try {
|
||||
const hasFinished = finishedStates.includes(data.flow.state)
|
||||
|
||||
const actionResult = result?.pop()
|
||||
const isOk = !!actionResult?.pop()
|
||||
if (!isOk) {
|
||||
throw new SkipExecutionError(
|
||||
"Transaction already started for transactionId: " +
|
||||
data.flow.transactionId
|
||||
)
|
||||
let cachedCheckpoint: TransactionCheckpoint | undefined
|
||||
const getCheckpoint = async (options?: TransactionOptions) => {
|
||||
if (!cachedCheckpoint) {
|
||||
cachedCheckpoint = await this.get(key, options)
|
||||
}
|
||||
return cachedCheckpoint
|
||||
}
|
||||
|
||||
return result
|
||||
await this.#preventRaceConditionExecutionIfNecessary({
|
||||
data: data,
|
||||
key,
|
||||
options,
|
||||
getCheckpoint,
|
||||
})
|
||||
}
|
||||
|
||||
// Database operations
|
||||
if (hasFinished && !retentionTime) {
|
||||
// If the workflow is nested, we cant just remove it because it would break the compensation algorithm. Instead, it will get deleted when the top level parent is deleted.
|
||||
if (!data.flow.metadata?.parentStepIdempotencyKey) {
|
||||
await promiseAll([execPipeline(), this.deleteFromDb(data)])
|
||||
// Only set if not exists
|
||||
const shouldSetNX =
|
||||
data.flow.state === TransactionState.NOT_STARTED &&
|
||||
!data.flow.transactionId.startsWith("auto-")
|
||||
|
||||
if (retentionTime) {
|
||||
Object.assign(data, {
|
||||
retention_time: retentionTime,
|
||||
})
|
||||
}
|
||||
|
||||
const execPipeline = () => {
|
||||
const lightData_ = {
|
||||
errors: data.errors,
|
||||
flow: data.flow,
|
||||
}
|
||||
const stringifiedData = JSON.stringify(lightData_)
|
||||
|
||||
const pipeline = this.redisClient.pipeline()
|
||||
|
||||
if (!hasFinished) {
|
||||
if (ttl) {
|
||||
if (shouldSetNX) {
|
||||
pipeline.set(key, stringifiedData, "EX", ttl, "NX")
|
||||
} else {
|
||||
pipeline.set(key, stringifiedData, "EX", ttl)
|
||||
}
|
||||
} else {
|
||||
if (shouldSetNX) {
|
||||
pipeline.set(key, stringifiedData, "NX")
|
||||
} else {
|
||||
pipeline.set(key, stringifiedData)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
pipeline.unlink(key)
|
||||
}
|
||||
|
||||
return pipeline.exec().then((result) => {
|
||||
if (!shouldSetNX) {
|
||||
return result
|
||||
}
|
||||
|
||||
const actionResult = result?.pop()
|
||||
const isOk = !!actionResult?.pop()
|
||||
if (!isOk) {
|
||||
throw new SkipExecutionError(
|
||||
"Transaction already started for transactionId: " +
|
||||
data.flow.transactionId
|
||||
)
|
||||
}
|
||||
|
||||
return result
|
||||
})
|
||||
}
|
||||
|
||||
if (hasFinished && !retentionTime) {
|
||||
if (!data.flow.metadata?.parentStepIdempotencyKey) {
|
||||
await this.deleteFromDb(data)
|
||||
await execPipeline()
|
||||
} else {
|
||||
await this.saveToDb(data, retentionTime)
|
||||
await execPipeline()
|
||||
}
|
||||
} else {
|
||||
await this.saveToDb(data, retentionTime)
|
||||
await execPipeline()
|
||||
}
|
||||
} else {
|
||||
await this.saveToDb(data, retentionTime)
|
||||
await execPipeline()
|
||||
|
||||
return data as TransactionCheckpoint
|
||||
} finally {
|
||||
if (lockAcquired) {
|
||||
await this.#releaseLock(key)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -750,19 +761,47 @@ export class RedisDistributedTransactionStorage
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a lock key for the given transaction key
|
||||
*/
|
||||
#getLockKey(key: string): string {
|
||||
return `${key}:lock`
|
||||
}
|
||||
|
||||
async #acquireLock(key: string, ttlSeconds: number = 2): Promise<boolean> {
|
||||
const lockKey = this.#getLockKey(key)
|
||||
|
||||
const result = await this.redisClient.set(
|
||||
lockKey,
|
||||
1,
|
||||
"EX",
|
||||
ttlSeconds,
|
||||
"NX"
|
||||
)
|
||||
return result === "OK"
|
||||
}
|
||||
|
||||
async #releaseLock(key: string): Promise<void> {
|
||||
const lockKey = this.#getLockKey(key)
|
||||
await this.redisClient.del(lockKey)
|
||||
}
|
||||
|
||||
async #preventRaceConditionExecutionIfNecessary({
|
||||
data,
|
||||
key,
|
||||
options,
|
||||
getCheckpoint,
|
||||
}: {
|
||||
data: TransactionCheckpoint
|
||||
key: string
|
||||
options?: TransactionOptions
|
||||
getCheckpoint: (
|
||||
options: TransactionOptions
|
||||
) => Promise<TransactionCheckpoint | undefined>
|
||||
}) {
|
||||
const isInitialCheckpoint = [TransactionState.NOT_STARTED].includes(
|
||||
data.flow.state
|
||||
)
|
||||
|
||||
/**
|
||||
* In case many execution can succeed simultaneously, we need to ensure that the latest
|
||||
* execution does continue if a previous execution is considered finished
|
||||
@@ -780,13 +819,37 @@ export class RedisDistributedTransactionStorage
|
||||
} as Parameters<typeof this.get>[1]
|
||||
|
||||
data_ =
|
||||
(await this.get(key, getOptions)) ??
|
||||
(await getCheckpoint(getOptions as TransactionOptions)) ??
|
||||
({ flow: {} } as TransactionCheckpoint)
|
||||
}
|
||||
|
||||
const { flow: latestUpdatedFlow } = data_
|
||||
if (options?.stepId) {
|
||||
const stepId = options.stepId
|
||||
const currentStep = data.flow.steps[stepId]
|
||||
const latestStep = latestUpdatedFlow.steps?.[stepId]
|
||||
if (latestStep && currentStep) {
|
||||
const isCompensating = data.flow.state === TransactionState.COMPENSATING
|
||||
|
||||
if (!isInitialCheckpoint && !isPresent(latestUpdatedFlow)) {
|
||||
const latestState = isCompensating
|
||||
? latestStep.compensate?.state
|
||||
: latestStep.invoke?.state
|
||||
|
||||
const shouldSkip = doneStates.includes(latestState)
|
||||
|
||||
if (shouldSkip) {
|
||||
throw new SkipStepAlreadyFinishedError(
|
||||
`Step ${stepId} already finished by another execution`
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
!isInitialCheckpoint &&
|
||||
!isPresent(latestUpdatedFlow) &&
|
||||
!data.flow.metadata?.parentStepIdempotencyKey
|
||||
) {
|
||||
/**
|
||||
* the initial checkpoint expect no other checkpoint to have been stored.
|
||||
* In case it is not the initial one and another checkpoint is trying to
|
||||
@@ -796,54 +859,7 @@ export class RedisDistributedTransactionStorage
|
||||
throw new SkipExecutionError("Already finished by another execution")
|
||||
}
|
||||
|
||||
let currentFlowLatestExecutedStep: TransactionStep | undefined
|
||||
const currentFlowSteps = Object.values(currentFlow.steps || {})
|
||||
for (let i = currentFlowSteps.length - 1; i >= 0; i--) {
|
||||
if (currentFlowSteps[i].lastAttempt) {
|
||||
currentFlowLatestExecutedStep = currentFlowSteps[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
let latestUpdatedFlowLatestExecutedStep: TransactionStep | undefined
|
||||
const latestUpdatedFlowSteps = Object.values(latestUpdatedFlow.steps || {})
|
||||
for (let i = latestUpdatedFlowSteps.length - 1; i >= 0; i--) {
|
||||
if (latestUpdatedFlowSteps[i].lastAttempt) {
|
||||
latestUpdatedFlowLatestExecutedStep = latestUpdatedFlowSteps[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The current flow and the latest updated flow have the same latest executed step.
|
||||
*/
|
||||
const isSameLatestExecutedStep =
|
||||
currentFlowLatestExecutedStep &&
|
||||
latestUpdatedFlowLatestExecutedStep &&
|
||||
currentFlowLatestExecutedStep?.id ===
|
||||
latestUpdatedFlowLatestExecutedStep?.id
|
||||
|
||||
/**
|
||||
* The current flow's latest executed step has a last attempt ahead of the latest updated
|
||||
* flow's latest executed step. Therefor it is fine, otherwise another execution has already
|
||||
* finished the step.
|
||||
*/
|
||||
const isCurrentLatestExecutedStepLastAttemptAhead =
|
||||
currentFlowLatestExecutedStep?.lastAttempt &&
|
||||
latestUpdatedFlowLatestExecutedStep?.lastAttempt &&
|
||||
currentFlowLatestExecutedStep.lastAttempt >=
|
||||
latestUpdatedFlowLatestExecutedStep.lastAttempt
|
||||
|
||||
if (
|
||||
isSameLatestExecutedStep &&
|
||||
!isCurrentLatestExecutedStepLastAttemptAhead
|
||||
) {
|
||||
throw new SkipStepAlreadyFinishedError(
|
||||
"Step already finished by another execution"
|
||||
)
|
||||
}
|
||||
|
||||
// First ensure that the latest execution was not cancelled, otherwise we skip the execution
|
||||
// Ensure that the latest execution was not cancelled, otherwise we skip the execution
|
||||
const latestTransactionCancelledAt = latestUpdatedFlow.cancelledAt
|
||||
const currentTransactionCancelledAt = currentFlow.cancelledAt
|
||||
|
||||
@@ -855,91 +871,6 @@ export class RedisDistributedTransactionStorage
|
||||
"Workflow execution has been cancelled during the execution"
|
||||
)
|
||||
}
|
||||
|
||||
let currentFlowLastInvokingStepIndex = -1
|
||||
for (let i = 0; i < currentFlowSteps.length; i++) {
|
||||
if (isInvokingState(currentFlowSteps[i])) {
|
||||
currentFlowLastInvokingStepIndex = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
let latestUpdatedFlowLastInvokingStepIndex = !latestUpdatedFlow.steps
|
||||
? 1 // There is no other execution, so the current execution is the latest
|
||||
: -1
|
||||
|
||||
if (latestUpdatedFlow.steps) {
|
||||
for (let i = 0; i < latestUpdatedFlowSteps.length; i++) {
|
||||
if (isInvokingState(latestUpdatedFlowSteps[i])) {
|
||||
latestUpdatedFlowLastInvokingStepIndex = i
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let currentFlowLastCompensatingStepIndex = -1
|
||||
for (let i = currentFlowSteps.length - 1; i >= 0; i--) {
|
||||
if (isCompensatingState(currentFlowSteps[i])) {
|
||||
currentFlowLastCompensatingStepIndex = currentFlowSteps.length - 1 - i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
let latestUpdatedFlowLastCompensatingStepIndex = !latestUpdatedFlow.steps
|
||||
? -1 // There is no other execution, so the current execution is the latest
|
||||
: -1
|
||||
|
||||
if (latestUpdatedFlow.steps) {
|
||||
for (let i = latestUpdatedFlowSteps.length - 1; i >= 0; i--) {
|
||||
if (isCompensatingState(latestUpdatedFlowSteps[i])) {
|
||||
latestUpdatedFlowLastCompensatingStepIndex =
|
||||
latestUpdatedFlowSteps.length - 1 - i
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const isLatestExecutionFinishedIndex = -1
|
||||
const invokeShouldBeSkipped =
|
||||
(latestUpdatedFlowLastInvokingStepIndex ===
|
||||
isLatestExecutionFinishedIndex ||
|
||||
currentFlowLastInvokingStepIndex <
|
||||
latestUpdatedFlowLastInvokingStepIndex) &&
|
||||
currentFlowLastInvokingStepIndex !== isLatestExecutionFinishedIndex
|
||||
|
||||
const compensateShouldBeSkipped =
|
||||
currentFlowLastCompensatingStepIndex <
|
||||
latestUpdatedFlowLastCompensatingStepIndex &&
|
||||
currentFlowLastCompensatingStepIndex !== isLatestExecutionFinishedIndex &&
|
||||
latestUpdatedFlowLastCompensatingStepIndex !==
|
||||
isLatestExecutionFinishedIndex
|
||||
|
||||
const isCompensatingMismatch =
|
||||
latestUpdatedFlow.state === TransactionState.COMPENSATING &&
|
||||
![TransactionState.REVERTED, TransactionState.FAILED].includes(
|
||||
currentFlow.state
|
||||
) &&
|
||||
currentFlow.state !== latestUpdatedFlow.state
|
||||
|
||||
const isRevertedMismatch =
|
||||
latestUpdatedFlow.state === TransactionState.REVERTED &&
|
||||
currentFlow.state !== TransactionState.REVERTED
|
||||
|
||||
const isFailedMismatch =
|
||||
latestUpdatedFlow.state === TransactionState.FAILED &&
|
||||
currentFlow.state !== TransactionState.FAILED
|
||||
|
||||
if (
|
||||
(data.flow.state !== TransactionState.COMPENSATING &&
|
||||
invokeShouldBeSkipped) ||
|
||||
(data.flow.state === TransactionState.COMPENSATING &&
|
||||
compensateShouldBeSkipped) ||
|
||||
isCompensatingMismatch ||
|
||||
isRevertedMismatch ||
|
||||
isFailedMismatch
|
||||
) {
|
||||
throw new SkipExecutionError("Already finished by another execution")
|
||||
}
|
||||
}
|
||||
|
||||
async clearExpiredExecutions() {
|
||||
|
||||
Reference in New Issue
Block a user