chore(): improve workflow engine storage (#13345)

* chore(workflow-engines): Improve race condition management * cleanup * cleanup * chore(workflow-engines): Improve race condition management * chore(workflow-engines): Improve race condition management * chore(workflow-engines): heartbeat extend TTL * Refactor chore title for workflow engine improvements * chore(): Improve workflow execution db interaction * chore(): Improve workflow execution db interaction * chore(): Improve workflow execution db interaction * chore(): Improve workflow execution db interaction * chore(): Improve workflow execution db interaction * chore(): Improve workflow execution db interaction * chore(): Improve workflow execution db interaction * chore(): Improve workflow execution db interaction * chore(): Improve workflow execution db interaction * update tests * revert idempotent * add run_id index + await deletion * improve saving * comment * remove only --------- Co-authored-by: Carlos R. L. Rodrigues <37986729+carlos-r-l-rodrigues@users.noreply.github.com>
2025-09-02 11:18:12 +02:00
parent b85a46e85b
commit bd206cb250
10 changed files with 255 additions and 48 deletions
@@ -119,6 +119,68 @@ export class InMemoryDistributedTransactionStorage
  }

  private async saveToDb(data: TransactionCheckpoint, retentionTime?: number) {
+    const isNotStarted = data.flow.state === TransactionState.NOT_STARTED
+    const isFinished = [
+      TransactionState.DONE,
+      TransactionState.FAILED,
+      TransactionState.REVERTED,
+    ].includes(data.flow.state)
+
+    /**
+     * Bit of explanation:
+     *
+     * When a workflow run, it run all sync step in memory until it reaches a async step.
+     * In that case, it might handover to another process to continue the execution. Thats why
+     * we need to save the current state of the flow. Then from there, it will run again all
+     * sync steps until the next async step. an so on so forth.
+     *
+     * To summarize, we only trully need to save the data when we are reaching any steps that
+     * trigger a handover to a potential other process.
+     *
+     * This allows us to spare some resources and time by not over communicating with the external
+     * database when it is not really needed
+     */
+
+    const isFlowInvoking = data.flow.state === TransactionState.INVOKING
+
+    const stepsArray = Object.values(data.flow.steps) as TransactionStep[]
+    let currentStep!: TransactionStep
+
+    const targetStates = isFlowInvoking
+      ? [
+          TransactionStepState.INVOKING,
+          TransactionStepState.DONE,
+          TransactionStepState.FAILED,
+        ]
+      : [TransactionStepState.COMPENSATING]
+
+    // Find the current step from the end
+    for (let i = stepsArray.length - 1; i >= 0; i--) {
+      const step = stepsArray[i]
+
+      if (step.id === "_root") {
+        break
+      }
+
+      const isTargetState = targetStates.includes(step.invoke?.state)
+
+      if (isTargetState) {
+        currentStep = step
+        break
+      }
+    }
+
+    const currentStepsIsAsync = currentStep
+      ? stepsArray.some(
+          (step) =>
+            step?.definition?.async === true && step.depth === currentStep.depth
+        )
+      : false
+
+    if (!(isNotStarted || isFinished) && !currentStepsIsAsync) {
+      return
+    }
+
    await this.workflowExecutionService_.upsert([
      {
        workflow_id: data.flow.modelId,
@@ -285,6 +347,12 @@ export class InMemoryDistributedTransactionStorage
    key: string
    options?: TransactionOptions
  }) {
+    // TODO: comment, we have been able to try to replace this entire function
+    // with a locking first approach. We might come back to that another time.
+    // This remove the necessity of all the below logic to prevent race conditions
+    // by preventing the exact same execution to run at the same time.
+    // See early commits from: https://github.com/medusajs/medusa/pull/13345/commits
+
    const isInitialCheckpoint = [TransactionState.NOT_STARTED].includes(
      data.flow.state
    )