feat(index): full sync operations (#11178)

Closes: FRMW-2892, FRMW-2893

**What**
Wired up the building block that we merged previously in order to manage data synchronization. The flow is as follow
- On application start
  - Build schema object representation from configuration
  - Check configuration changes
    - if new entities configured
      - Data synchronizer initialize orchestrator and start sync
        - for each entity
          - acquire lock
          - mark existing data as staled
          - sync all data by batch
          - marked them not staled anymore
          - acknowledge each processed batch and renew lock
          - update metadata with last synced cursor for entity X
          - release lock
      - remove all remaining staled data
    - if any entities removed from last configuration
      - remove the index data and relations

Co-authored-by: Carlos R. L. Rodrigues <37986729+carlos-r-l-rodrigues@users.noreply.github.com>
This commit is contained in:
Adrien de Peretti
2025-02-05 17:49:18 +01:00
committed by GitHub
parent 60f46e07fd
commit a33aebd895
35 changed files with 1677 additions and 727 deletions

View File

@@ -0,0 +1,142 @@
import { simpleHash } from "@medusajs/framework/utils"
import { IndexTypes, InferEntityType } from "@medusajs/types"
import { IndexMetadata } from "@models"
import { schemaObjectRepresentationPropertiesToOmit } from "@types"
import { DataSynchronizer } from "../../services/data-synchronizer"
import { IndexMetadataService } from "../../services/index-metadata"
import { IndexSyncService } from "../../services/index-sync"
import { IndexMetadataStatus } from "../index-metadata-status"
export class Configuration {
#schemaObjectRepresentation: IndexTypes.SchemaObjectRepresentation
#indexMetadataService: IndexMetadataService
#indexSyncService: IndexSyncService
#dataSynchronizer: DataSynchronizer
constructor({
schemaObjectRepresentation,
indexMetadataService,
indexSyncService,
dataSynchronizer,
}: {
schemaObjectRepresentation: IndexTypes.SchemaObjectRepresentation
indexMetadataService: IndexMetadataService
indexSyncService: IndexSyncService
dataSynchronizer: DataSynchronizer
}) {
this.#schemaObjectRepresentation = schemaObjectRepresentation ?? {}
this.#indexMetadataService = indexMetadataService
this.#indexSyncService = indexSyncService
this.#dataSynchronizer = dataSynchronizer
}
async checkChanges(): Promise<InferEntityType<typeof IndexMetadata>[]> {
const schemaObjectRepresentation = this.#schemaObjectRepresentation
const currentConfig = await this.#indexMetadataService.list()
const currentConfigMap = new Map(
currentConfig.map((c) => [c.entity, c] as const)
)
type modifiedConfig = {
id?: string
entity: string
fields: string[]
fields_hash: string
status?: IndexMetadataStatus
}[]
type dataSyncEntry = {
id?: string
entity: string
last_key: null
}[]
const entityPresent = new Set<string>()
const newConfig: modifiedConfig = []
const updatedConfig: modifiedConfig = []
const deletedConfig: { entity: string }[] = []
const idxSyncData: dataSyncEntry = []
for (const [entityName, schemaEntityObjectRepresentation] of Object.entries(
schemaObjectRepresentation
)) {
if (schemaObjectRepresentationPropertiesToOmit.includes(entityName)) {
continue
}
const entity = schemaEntityObjectRepresentation.entity
const fields = schemaEntityObjectRepresentation.fields.sort().join(",")
const fields_hash = simpleHash(fields)
const existingEntityConfig = currentConfigMap.get(entity)
entityPresent.add(entity)
if (
!existingEntityConfig ||
existingEntityConfig.fields_hash !== fields_hash
) {
const meta = {
id: existingEntityConfig?.id,
entity,
fields,
fields_hash,
}
if (!existingEntityConfig) {
newConfig.push(meta)
} else {
updatedConfig.push({
...meta,
status: IndexMetadataStatus.PENDING,
})
}
idxSyncData.push({
entity,
last_key: null,
})
}
}
for (const [entity] of currentConfigMap) {
if (!entityPresent.has(entity)) {
deletedConfig.push({ entity })
}
}
if (newConfig.length > 0) {
await this.#indexMetadataService.create(newConfig)
}
if (updatedConfig.length > 0) {
await this.#indexMetadataService.update(updatedConfig)
}
if (deletedConfig.length > 0) {
await this.#indexMetadataService.delete(deletedConfig)
await this.#dataSynchronizer.removeEntities(
deletedConfig.map((c) => c.entity)
)
}
if (idxSyncData.length > 0) {
if (updatedConfig.length > 0) {
const ids = await this.#indexSyncService.list({
entity: updatedConfig.map((c) => c.entity),
})
idxSyncData.forEach((sync) => {
const id = ids.find((i) => i.entity === sync.entity)?.id
if (id) {
sync.id = id
}
})
}
await this.#indexSyncService.upsert(idxSyncData)
}
return await this.#indexMetadataService.list({
status: [IndexMetadataStatus.PENDING, IndexMetadataStatus.PROCESSING],
})
}
}

View File

@@ -1,137 +0,0 @@
import {
IndexTypes,
RemoteQueryFunction,
SchemaObjectEntityRepresentation,
Event,
} from "@medusajs/framework/types"
import { CommonEvents } from "@medusajs/framework/utils"
export class DataSynchronizer {
#storageProvider: IndexTypes.StorageProvider
#schemaObjectRepresentation: IndexTypes.SchemaObjectRepresentation
#query: RemoteQueryFunction
constructor({
storageProvider,
schemaObjectRepresentation,
query,
}: {
storageProvider: IndexTypes.StorageProvider
schemaObjectRepresentation: IndexTypes.SchemaObjectRepresentation
query: RemoteQueryFunction
}) {
this.#storageProvider = storageProvider
this.#schemaObjectRepresentation = schemaObjectRepresentation
this.#query = query
}
async sync({
entityName,
pagination = {},
ack,
}: {
entityName: string
pagination?: {
cursor?: string
updated_at?: string | Date
limit?: number
batchSize?: number
}
ack: (ack: {
lastCursor: string | null
done?: boolean
err?: Error
}) => Promise<void>
}) {
const schemaEntityObjectRepresentation = this.#schemaObjectRepresentation[
entityName
] as SchemaObjectEntityRepresentation
const { fields, alias, moduleConfig } = schemaEntityObjectRepresentation
const entityPrimaryKey = fields.find(
(field) => !!moduleConfig.linkableKeys?.[field]
)
if (!entityPrimaryKey) {
void ack({
lastCursor: pagination.cursor ?? null,
err: new Error(
`Entity ${entityName} does not have a linkable primary key`
),
})
return
}
let processed = 0
let currentCursor = pagination.cursor!
const batchSize = pagination.batchSize ?? 1000
const limit = pagination.limit ?? Infinity
let done = false
let error = null
while (processed < limit || !done) {
const filters: Record<string, any> = {}
if (currentCursor) {
filters[entityPrimaryKey] = { $gt: currentCursor }
}
if (pagination.updated_at) {
filters["updated_at"] = { $gt: pagination.updated_at }
}
const { data } = await this.#query.graph({
entity: alias,
fields: [entityPrimaryKey],
filters,
pagination: {
order: {
[entityPrimaryKey]: "asc",
},
take: batchSize,
},
})
done = !data.length
if (done) {
break
}
const envelop: Event = {
data,
name: `*.${CommonEvents.CREATED}`,
}
try {
await this.#storageProvider.consumeEvent(
schemaEntityObjectRepresentation
)(envelop)
currentCursor = data[data.length - 1][entityPrimaryKey]
processed += data.length
void ack({ lastCursor: currentCursor })
} catch (err) {
error = err
break
}
}
let acknoledgement: { lastCursor: string; done?: boolean; err?: Error } = {
lastCursor: currentCursor,
done: true,
}
if (error) {
acknoledgement = {
lastCursor: currentCursor,
err: error,
}
void ack(acknoledgement)
return acknoledgement
}
void ack(acknoledgement)
return acknoledgement
}
}

View File

@@ -0,0 +1,160 @@
import { ILockingModule } from "@medusajs/types"
export class Orchestrator {
/**
* Reference to the locking module
*/
#lockingModule: ILockingModule
/**
* Owner id when acquiring locks
*/
#lockingOwner = `index-sync-${process.pid}`
/**
* The current state of the orchestrator
*
* - In "idle" state, one can call the "run" method.
* - In "processing" state, the orchestrator is looping over the entities
* and processing them.
* - In "completed" state, the provided entities have been processed.
* - The "error" state is set when the task runner throws an error.
*/
#state: "idle" | "processing" | "completed" | "error" = "idle"
/**
* Options for the locking module and the task runner to execute the
* task.
*
* - Lock duration is the maximum duration for which to hold the lock.
* After this the lock will be removed.
*
* The entity is provided to the taskRunner only when the orchestrator
* is able to acquire a lock.
*/
#options: {
lockDuration: number
}
/**
* Index of the entity that is currently getting processed.
*/
#currentIndex: number = 0
/**
* Collection of entities to process in sequence. A lock is obtained
* while an entity is getting synced to avoid multiple processes
* from syncing the same entity
*/
#entities: string[] = []
/**
* The current state of the orchestrator
*/
get state() {
return this.#state
}
/**
* Reference to the currently processed entity
*/
get current() {
return this.#entities[this.#currentIndex]
}
/**
* Reference to the number of entities left for processing
*/
get remainingCount() {
return this.#entities.length - (this.#currentIndex + 1)
}
constructor(
lockingModule: ILockingModule,
entities: string[],
options: {
lockDuration: number
}
) {
this.#lockingModule = lockingModule
this.#entities = entities
this.#options = options
}
/**
* Acquires using the lock module.
*/
async #acquireLock(forKey: string): Promise<boolean> {
try {
await this.#lockingModule.acquire(forKey, {
expire: this.#options.lockDuration,
ownerId: this.#lockingOwner,
})
return true
} catch {
return false
}
}
/**
* Acquires or renew the lock for a given key.
*/
async renewLock(forKey: string): Promise<boolean> {
return this.#acquireLock(forKey)
}
/**
* Processes the entity at a given index. If there are no entities
* left, the orchestrator state will be set to completed.
*
* - Task runner is the implementation function to execute a task.
* Orchestrator has no inbuilt execution logic and it relies on
* the task runner for the same.
*/
async #processAtIndex(
taskRunner: (entity: string) => Promise<void>,
entity: string
) {
const lockAcquired = await this.#acquireLock(entity)
if (lockAcquired) {
try {
await taskRunner(entity)
} catch (error) {
this.#state = "error"
throw error
} finally {
await this.#lockingModule.release(entity, {
ownerId: this.#lockingOwner,
})
}
}
}
/**
* Run the orchestrator to process the entities one by one.
*
* - Task runner is the implementation function to execute a task.
* Orchestrator has no inbuilt execution logic and it relies on
* the task runner for the same.
*/
async process(taskRunner: (entity: string) => Promise<void>) {
if (this.state !== "idle") {
throw new Error("Cannot re-run an already running orchestrator instance")
}
this.#state = "processing"
for (let i = 0; i < this.#entities.length; i++) {
this.#currentIndex = i
const entity = this.#entities[i]
if (!entity) {
this.#state = "completed"
break
}
await this.#processAtIndex(taskRunner, entity)
}
this.#state = "completed"
}
}