feat(index): full sync operations (#11178)
Closes: FRMW-2892, FRMW-2893
**What**
Wired up the building block that we merged previously in order to manage data synchronization. The flow is as follow
- On application start
- Build schema object representation from configuration
- Check configuration changes
- if new entities configured
- Data synchronizer initialize orchestrator and start sync
- for each entity
- acquire lock
- mark existing data as staled
- sync all data by batch
- marked them not staled anymore
- acknowledge each processed batch and renew lock
- update metadata with last synced cursor for entity X
- release lock
- remove all remaining staled data
- if any entities removed from last configuration
- remove the index data and relations
Co-authored-by: Carlos R. L. Rodrigues <37986729+carlos-r-l-rodrigues@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
60f46e07fd
commit
a33aebd895
142
packages/modules/index/src/utils/sync/configuration.ts
Normal file
142
packages/modules/index/src/utils/sync/configuration.ts
Normal file
@@ -0,0 +1,142 @@
|
||||
import { simpleHash } from "@medusajs/framework/utils"
|
||||
import { IndexTypes, InferEntityType } from "@medusajs/types"
|
||||
import { IndexMetadata } from "@models"
|
||||
import { schemaObjectRepresentationPropertiesToOmit } from "@types"
|
||||
import { DataSynchronizer } from "../../services/data-synchronizer"
|
||||
import { IndexMetadataService } from "../../services/index-metadata"
|
||||
import { IndexSyncService } from "../../services/index-sync"
|
||||
import { IndexMetadataStatus } from "../index-metadata-status"
|
||||
|
||||
export class Configuration {
|
||||
#schemaObjectRepresentation: IndexTypes.SchemaObjectRepresentation
|
||||
#indexMetadataService: IndexMetadataService
|
||||
#indexSyncService: IndexSyncService
|
||||
#dataSynchronizer: DataSynchronizer
|
||||
|
||||
constructor({
|
||||
schemaObjectRepresentation,
|
||||
indexMetadataService,
|
||||
indexSyncService,
|
||||
dataSynchronizer,
|
||||
}: {
|
||||
schemaObjectRepresentation: IndexTypes.SchemaObjectRepresentation
|
||||
indexMetadataService: IndexMetadataService
|
||||
indexSyncService: IndexSyncService
|
||||
dataSynchronizer: DataSynchronizer
|
||||
}) {
|
||||
this.#schemaObjectRepresentation = schemaObjectRepresentation ?? {}
|
||||
this.#indexMetadataService = indexMetadataService
|
||||
this.#indexSyncService = indexSyncService
|
||||
this.#dataSynchronizer = dataSynchronizer
|
||||
}
|
||||
|
||||
async checkChanges(): Promise<InferEntityType<typeof IndexMetadata>[]> {
|
||||
const schemaObjectRepresentation = this.#schemaObjectRepresentation
|
||||
|
||||
const currentConfig = await this.#indexMetadataService.list()
|
||||
const currentConfigMap = new Map(
|
||||
currentConfig.map((c) => [c.entity, c] as const)
|
||||
)
|
||||
|
||||
type modifiedConfig = {
|
||||
id?: string
|
||||
entity: string
|
||||
fields: string[]
|
||||
fields_hash: string
|
||||
status?: IndexMetadataStatus
|
||||
}[]
|
||||
|
||||
type dataSyncEntry = {
|
||||
id?: string
|
||||
entity: string
|
||||
last_key: null
|
||||
}[]
|
||||
|
||||
const entityPresent = new Set<string>()
|
||||
const newConfig: modifiedConfig = []
|
||||
const updatedConfig: modifiedConfig = []
|
||||
const deletedConfig: { entity: string }[] = []
|
||||
const idxSyncData: dataSyncEntry = []
|
||||
|
||||
for (const [entityName, schemaEntityObjectRepresentation] of Object.entries(
|
||||
schemaObjectRepresentation
|
||||
)) {
|
||||
if (schemaObjectRepresentationPropertiesToOmit.includes(entityName)) {
|
||||
continue
|
||||
}
|
||||
|
||||
const entity = schemaEntityObjectRepresentation.entity
|
||||
const fields = schemaEntityObjectRepresentation.fields.sort().join(",")
|
||||
const fields_hash = simpleHash(fields)
|
||||
|
||||
const existingEntityConfig = currentConfigMap.get(entity)
|
||||
|
||||
entityPresent.add(entity)
|
||||
if (
|
||||
!existingEntityConfig ||
|
||||
existingEntityConfig.fields_hash !== fields_hash
|
||||
) {
|
||||
const meta = {
|
||||
id: existingEntityConfig?.id,
|
||||
entity,
|
||||
fields,
|
||||
fields_hash,
|
||||
}
|
||||
|
||||
if (!existingEntityConfig) {
|
||||
newConfig.push(meta)
|
||||
} else {
|
||||
updatedConfig.push({
|
||||
...meta,
|
||||
status: IndexMetadataStatus.PENDING,
|
||||
})
|
||||
}
|
||||
|
||||
idxSyncData.push({
|
||||
entity,
|
||||
last_key: null,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
for (const [entity] of currentConfigMap) {
|
||||
if (!entityPresent.has(entity)) {
|
||||
deletedConfig.push({ entity })
|
||||
}
|
||||
}
|
||||
|
||||
if (newConfig.length > 0) {
|
||||
await this.#indexMetadataService.create(newConfig)
|
||||
}
|
||||
if (updatedConfig.length > 0) {
|
||||
await this.#indexMetadataService.update(updatedConfig)
|
||||
}
|
||||
|
||||
if (deletedConfig.length > 0) {
|
||||
await this.#indexMetadataService.delete(deletedConfig)
|
||||
await this.#dataSynchronizer.removeEntities(
|
||||
deletedConfig.map((c) => c.entity)
|
||||
)
|
||||
}
|
||||
|
||||
if (idxSyncData.length > 0) {
|
||||
if (updatedConfig.length > 0) {
|
||||
const ids = await this.#indexSyncService.list({
|
||||
entity: updatedConfig.map((c) => c.entity),
|
||||
})
|
||||
idxSyncData.forEach((sync) => {
|
||||
const id = ids.find((i) => i.entity === sync.entity)?.id
|
||||
if (id) {
|
||||
sync.id = id
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
await this.#indexSyncService.upsert(idxSyncData)
|
||||
}
|
||||
|
||||
return await this.#indexMetadataService.list({
|
||||
status: [IndexMetadataStatus.PENDING, IndexMetadataStatus.PROCESSING],
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,137 +0,0 @@
|
||||
import {
|
||||
IndexTypes,
|
||||
RemoteQueryFunction,
|
||||
SchemaObjectEntityRepresentation,
|
||||
Event,
|
||||
} from "@medusajs/framework/types"
|
||||
import { CommonEvents } from "@medusajs/framework/utils"
|
||||
|
||||
export class DataSynchronizer {
|
||||
#storageProvider: IndexTypes.StorageProvider
|
||||
#schemaObjectRepresentation: IndexTypes.SchemaObjectRepresentation
|
||||
#query: RemoteQueryFunction
|
||||
|
||||
constructor({
|
||||
storageProvider,
|
||||
schemaObjectRepresentation,
|
||||
query,
|
||||
}: {
|
||||
storageProvider: IndexTypes.StorageProvider
|
||||
schemaObjectRepresentation: IndexTypes.SchemaObjectRepresentation
|
||||
query: RemoteQueryFunction
|
||||
}) {
|
||||
this.#storageProvider = storageProvider
|
||||
this.#schemaObjectRepresentation = schemaObjectRepresentation
|
||||
this.#query = query
|
||||
}
|
||||
|
||||
async sync({
|
||||
entityName,
|
||||
pagination = {},
|
||||
ack,
|
||||
}: {
|
||||
entityName: string
|
||||
pagination?: {
|
||||
cursor?: string
|
||||
updated_at?: string | Date
|
||||
limit?: number
|
||||
batchSize?: number
|
||||
}
|
||||
ack: (ack: {
|
||||
lastCursor: string | null
|
||||
done?: boolean
|
||||
err?: Error
|
||||
}) => Promise<void>
|
||||
}) {
|
||||
const schemaEntityObjectRepresentation = this.#schemaObjectRepresentation[
|
||||
entityName
|
||||
] as SchemaObjectEntityRepresentation
|
||||
|
||||
const { fields, alias, moduleConfig } = schemaEntityObjectRepresentation
|
||||
|
||||
const entityPrimaryKey = fields.find(
|
||||
(field) => !!moduleConfig.linkableKeys?.[field]
|
||||
)
|
||||
|
||||
if (!entityPrimaryKey) {
|
||||
void ack({
|
||||
lastCursor: pagination.cursor ?? null,
|
||||
err: new Error(
|
||||
`Entity ${entityName} does not have a linkable primary key`
|
||||
),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
let processed = 0
|
||||
let currentCursor = pagination.cursor!
|
||||
const batchSize = pagination.batchSize ?? 1000
|
||||
const limit = pagination.limit ?? Infinity
|
||||
let done = false
|
||||
let error = null
|
||||
|
||||
while (processed < limit || !done) {
|
||||
const filters: Record<string, any> = {}
|
||||
|
||||
if (currentCursor) {
|
||||
filters[entityPrimaryKey] = { $gt: currentCursor }
|
||||
}
|
||||
|
||||
if (pagination.updated_at) {
|
||||
filters["updated_at"] = { $gt: pagination.updated_at }
|
||||
}
|
||||
|
||||
const { data } = await this.#query.graph({
|
||||
entity: alias,
|
||||
fields: [entityPrimaryKey],
|
||||
filters,
|
||||
pagination: {
|
||||
order: {
|
||||
[entityPrimaryKey]: "asc",
|
||||
},
|
||||
take: batchSize,
|
||||
},
|
||||
})
|
||||
|
||||
done = !data.length
|
||||
if (done) {
|
||||
break
|
||||
}
|
||||
|
||||
const envelop: Event = {
|
||||
data,
|
||||
name: `*.${CommonEvents.CREATED}`,
|
||||
}
|
||||
|
||||
try {
|
||||
await this.#storageProvider.consumeEvent(
|
||||
schemaEntityObjectRepresentation
|
||||
)(envelop)
|
||||
currentCursor = data[data.length - 1][entityPrimaryKey]
|
||||
processed += data.length
|
||||
|
||||
void ack({ lastCursor: currentCursor })
|
||||
} catch (err) {
|
||||
error = err
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
let acknoledgement: { lastCursor: string; done?: boolean; err?: Error } = {
|
||||
lastCursor: currentCursor,
|
||||
done: true,
|
||||
}
|
||||
|
||||
if (error) {
|
||||
acknoledgement = {
|
||||
lastCursor: currentCursor,
|
||||
err: error,
|
||||
}
|
||||
void ack(acknoledgement)
|
||||
return acknoledgement
|
||||
}
|
||||
|
||||
void ack(acknoledgement)
|
||||
return acknoledgement
|
||||
}
|
||||
}
|
||||
160
packages/modules/index/src/utils/sync/orchestrator.ts
Normal file
160
packages/modules/index/src/utils/sync/orchestrator.ts
Normal file
@@ -0,0 +1,160 @@
|
||||
import { ILockingModule } from "@medusajs/types"
|
||||
|
||||
export class Orchestrator {
|
||||
/**
|
||||
* Reference to the locking module
|
||||
*/
|
||||
#lockingModule: ILockingModule
|
||||
|
||||
/**
|
||||
* Owner id when acquiring locks
|
||||
*/
|
||||
#lockingOwner = `index-sync-${process.pid}`
|
||||
|
||||
/**
|
||||
* The current state of the orchestrator
|
||||
*
|
||||
* - In "idle" state, one can call the "run" method.
|
||||
* - In "processing" state, the orchestrator is looping over the entities
|
||||
* and processing them.
|
||||
* - In "completed" state, the provided entities have been processed.
|
||||
* - The "error" state is set when the task runner throws an error.
|
||||
*/
|
||||
#state: "idle" | "processing" | "completed" | "error" = "idle"
|
||||
|
||||
/**
|
||||
* Options for the locking module and the task runner to execute the
|
||||
* task.
|
||||
*
|
||||
* - Lock duration is the maximum duration for which to hold the lock.
|
||||
* After this the lock will be removed.
|
||||
*
|
||||
* The entity is provided to the taskRunner only when the orchestrator
|
||||
* is able to acquire a lock.
|
||||
*/
|
||||
#options: {
|
||||
lockDuration: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Index of the entity that is currently getting processed.
|
||||
*/
|
||||
#currentIndex: number = 0
|
||||
|
||||
/**
|
||||
* Collection of entities to process in sequence. A lock is obtained
|
||||
* while an entity is getting synced to avoid multiple processes
|
||||
* from syncing the same entity
|
||||
*/
|
||||
#entities: string[] = []
|
||||
|
||||
/**
|
||||
* The current state of the orchestrator
|
||||
*/
|
||||
get state() {
|
||||
return this.#state
|
||||
}
|
||||
|
||||
/**
|
||||
* Reference to the currently processed entity
|
||||
*/
|
||||
get current() {
|
||||
return this.#entities[this.#currentIndex]
|
||||
}
|
||||
|
||||
/**
|
||||
* Reference to the number of entities left for processing
|
||||
*/
|
||||
get remainingCount() {
|
||||
return this.#entities.length - (this.#currentIndex + 1)
|
||||
}
|
||||
|
||||
constructor(
|
||||
lockingModule: ILockingModule,
|
||||
entities: string[],
|
||||
options: {
|
||||
lockDuration: number
|
||||
}
|
||||
) {
|
||||
this.#lockingModule = lockingModule
|
||||
this.#entities = entities
|
||||
this.#options = options
|
||||
}
|
||||
|
||||
/**
|
||||
* Acquires using the lock module.
|
||||
*/
|
||||
async #acquireLock(forKey: string): Promise<boolean> {
|
||||
try {
|
||||
await this.#lockingModule.acquire(forKey, {
|
||||
expire: this.#options.lockDuration,
|
||||
ownerId: this.#lockingOwner,
|
||||
})
|
||||
return true
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Acquires or renew the lock for a given key.
|
||||
*/
|
||||
async renewLock(forKey: string): Promise<boolean> {
|
||||
return this.#acquireLock(forKey)
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes the entity at a given index. If there are no entities
|
||||
* left, the orchestrator state will be set to completed.
|
||||
*
|
||||
* - Task runner is the implementation function to execute a task.
|
||||
* Orchestrator has no inbuilt execution logic and it relies on
|
||||
* the task runner for the same.
|
||||
*/
|
||||
async #processAtIndex(
|
||||
taskRunner: (entity: string) => Promise<void>,
|
||||
entity: string
|
||||
) {
|
||||
const lockAcquired = await this.#acquireLock(entity)
|
||||
if (lockAcquired) {
|
||||
try {
|
||||
await taskRunner(entity)
|
||||
} catch (error) {
|
||||
this.#state = "error"
|
||||
throw error
|
||||
} finally {
|
||||
await this.#lockingModule.release(entity, {
|
||||
ownerId: this.#lockingOwner,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the orchestrator to process the entities one by one.
|
||||
*
|
||||
* - Task runner is the implementation function to execute a task.
|
||||
* Orchestrator has no inbuilt execution logic and it relies on
|
||||
* the task runner for the same.
|
||||
*/
|
||||
async process(taskRunner: (entity: string) => Promise<void>) {
|
||||
if (this.state !== "idle") {
|
||||
throw new Error("Cannot re-run an already running orchestrator instance")
|
||||
}
|
||||
|
||||
this.#state = "processing"
|
||||
|
||||
for (let i = 0; i < this.#entities.length; i++) {
|
||||
this.#currentIndex = i
|
||||
const entity = this.#entities[i]
|
||||
if (!entity) {
|
||||
this.#state = "completed"
|
||||
break
|
||||
}
|
||||
|
||||
await this.#processAtIndex(taskRunner, entity)
|
||||
}
|
||||
|
||||
this.#state = "completed"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user