feat(index): full sync operations (#11178)

Closes: FRMW-2892, FRMW-2893

**What**
Wired up the building block that we merged previously in order to manage data synchronization. The flow is as follow
- On application start
  - Build schema object representation from configuration
  - Check configuration changes
    - if new entities configured
      - Data synchronizer initialize orchestrator and start sync
        - for each entity
          - acquire lock
          - mark existing data as staled
          - sync all data by batch
          - marked them not staled anymore
          - acknowledge each processed batch and renew lock
          - update metadata with last synced cursor for entity X
          - release lock
      - remove all remaining staled data
    - if any entities removed from last configuration
      - remove the index data and relations

Co-authored-by: Carlos R. L. Rodrigues <37986729+carlos-r-l-rodrigues@users.noreply.github.com>
This commit is contained in:
Adrien de Peretti
2025-02-05 17:49:18 +01:00
committed by GitHub
parent 60f46e07fd
commit a33aebd895
35 changed files with 1677 additions and 727 deletions

View File

@@ -23,6 +23,7 @@ export class QueryBuilder {
private readonly selector: QueryFormat
private readonly options?: QueryOptions
private readonly schema: IndexTypes.SchemaObjectRepresentation
private readonly allSchemaFields: Set<string>
constructor(args: {
schema: IndexTypes.SchemaObjectRepresentation
@@ -37,14 +38,24 @@ export class QueryBuilder {
this.options = args.options
this.knex = args.knex
this.structure = this.selector.select
this.allSchemaFields = new Set(
Object.values(this.schema).flatMap((entity) => entity.fields ?? [])
)
}
private getStructureKeys(structure) {
return Object.keys(structure ?? {}).filter((key) => key !== "entity")
}
private getEntity(path): IndexTypes.SchemaPropertiesMap[0] {
private getEntity(
path,
throwWhenNotFound = true
): IndexTypes.SchemaPropertiesMap[0] | undefined {
if (!this.schema._schemaPropertiesMap[path]) {
if (!throwWhenNotFound) {
return
}
throw new Error(`Could not find entity for path: ${path}`)
}
@@ -52,7 +63,7 @@ export class QueryBuilder {
}
private getGraphQLType(path, field) {
const entity = this.getEntity(path)?.ref?.entity
const entity = this.getEntity(path)?.ref?.entity!
const fieldRef = this.entityMap[entity]._fields[field]
if (!fieldRef) {
throw new Error(`Field ${field} not found in the entityMap.`)
@@ -270,6 +281,18 @@ export class QueryBuilder {
return builder
}
private getShortAlias(aliasMapping, alias: string) {
aliasMapping.__aliasIndex ??= 0
if (aliasMapping[alias]) {
return aliasMapping[alias]
}
aliasMapping[alias] = "t_" + aliasMapping.__aliasIndex++ + "_"
return aliasMapping[alias]
}
private buildQueryParts(
structure: Select,
parentAlias: string,
@@ -281,10 +304,17 @@ export class QueryBuilder {
): string[] {
const currentAliasPath = [...aliasPath, parentProperty].join(".")
const entities = this.getEntity(currentAliasPath)
const isSelectableField = this.allSchemaFields.has(parentProperty)
const entities = this.getEntity(currentAliasPath, false)
if (isSelectableField || !entities) {
// We are currently selecting a specific field of the parent entity or the entity is not found on the index schema
// We don't need to build the query parts for this as there is no join
return []
}
const mainEntity = entities.ref.entity
const mainAlias = mainEntity.toLowerCase() + level
const mainAlias =
this.getShortAlias(aliasMapping, mainEntity.toLowerCase()) + level
const allEntities: any[] = []
if (!entities.shortCutOf) {
@@ -298,7 +328,13 @@ export class QueryBuilder {
const intermediateAlias = entities.shortCutOf.split(".")
for (let i = intermediateAlias.length - 1, x = 0; i >= 0; i--, x++) {
const intermediateEntity = this.getEntity(intermediateAlias.join("."))
const intermediateEntity = this.getEntity(
intermediateAlias.join("."),
false
)
if (!intermediateEntity) {
break
}
intermediateAlias.pop()
@@ -308,14 +344,24 @@ export class QueryBuilder {
const parentIntermediateEntity = this.getEntity(
intermediateAlias.join(".")
)
)!
const alias =
intermediateEntity.ref.entity.toLowerCase() + level + "_" + x
this.getShortAlias(
aliasMapping,
intermediateEntity.ref.entity.toLowerCase()
) +
level +
"_" +
x
const parAlias =
parentIntermediateEntity.ref.entity === parentEntity
? parentAlias
: parentIntermediateEntity.ref.entity.toLowerCase() +
: this.getShortAlias(
aliasMapping,
parentIntermediateEntity.ref.entity.toLowerCase()
) +
level +
"_" +
(x + 1)
@@ -335,62 +381,68 @@ export class QueryBuilder {
let queryParts: string[] = []
for (const join of allEntities) {
const joinBuilder = this.knex.queryBuilder()
const { alias, entity, parEntity, parAlias } = join
aliasMapping[currentAliasPath] = alias
if (level > 0) {
const subQuery = this.knex.queryBuilder()
const knex = this.knex
subQuery
.select(`${alias}.id`, `${alias}.data`)
.from("index_data AS " + alias)
.join(`index_relation AS ${alias}_ref`, function () {
this.on(
`${alias}_ref.pivot`,
"=",
knex.raw("?", [`${parEntity}-${entity}`])
)
.andOn(`${alias}_ref.parent_id`, "=", `${parAlias}.id`)
.andOn(`${alias}.id`, "=", `${alias}_ref.child_id`)
})
.where(`${alias}.name`, "=", knex.raw("?", [entity]))
const cName = entity.toLowerCase()
const pName = `${parEntity}${entity}`.toLowerCase()
let joinTable = `cat_${cName} AS ${alias}`
const pivotTable = `cat_pivot_${pName}`
joinBuilder.leftJoin(
`${pivotTable} AS ${alias}_ref`,
`${alias}_ref.parent_id`,
`${parAlias}.id`
)
joinBuilder.leftJoin(joinTable, `${alias}.id`, `${alias}_ref.child_id`)
const joinWhere = this.selector.joinWhere ?? {}
const joinKey = Object.keys(joinWhere).find((key) => {
const k = key.split(".")
k.pop()
return k.join(".") === currentAliasPath
const curPath = k.join(".")
if (curPath === currentAliasPath) {
const relEntity = this.getEntity(curPath, false)
return relEntity?.ref?.entity === entity
}
return false
})
if (joinKey) {
this.parseWhere(
aliasMapping,
{ [joinKey]: joinWhere[joinKey] },
subQuery
joinBuilder
)
}
queryParts.push(`LEFT JOIN LATERAL (
${subQuery.toQuery()}
) ${alias} ON TRUE`)
queryParts.push(
joinBuilder.toQuery().replace("select * ", "").replace("where", "and")
)
}
}
const children = this.getStructureKeys(structure)
for (const child of children) {
const childStructure = structure[child] as Select
queryParts = queryParts.concat(
this.buildQueryParts(
childStructure,
mainAlias,
mainEntity,
child,
aliasPath.concat(parentProperty),
level + 1,
aliasMapping
queryParts = queryParts
.concat(
this.buildQueryParts(
childStructure,
mainAlias,
mainEntity,
child,
aliasPath.concat(parentProperty),
level + 1,
aliasMapping
)
)
)
.filter(Boolean)
}
return queryParts
@@ -404,7 +456,26 @@ export class QueryBuilder {
selectParts: object = {}
): object {
const currentAliasPath = [...aliasPath, parentProperty].join(".")
const isSelectableField = this.allSchemaFields.has(parentProperty)
if (isSelectableField) {
// We are currently selecting a specific field of the parent entity
// Let's remove the parent alias from the select parts to not select everything entirely
// and add the specific field to the select parts
const parentAliasPath = aliasPath.join(".")
const alias = aliasMapping[parentAliasPath]
delete selectParts[parentAliasPath]
selectParts[currentAliasPath] = this.knex.raw(
`${alias}.data->'${parentProperty}'`
)
return selectParts
}
const alias = aliasMapping[currentAliasPath]
// If the entity is not found in the schema (not indexed), we don't need to build the select parts
if (!alias) {
return selectParts
}
selectParts[currentAliasPath] = `${alias}.data`
selectParts[currentAliasPath + ".id"] = `${alias}.id`
@@ -473,7 +544,8 @@ export class QueryBuilder {
const rootKey = this.getStructureKeys(structure)[0]
const rootStructure = structure[rootKey] as Select
const entity = this.getEntity(rootKey).ref.entity
const entity = this.getEntity(rootKey)!.ref.entity
const rootEntity = entity.toLowerCase()
const aliasMapping: { [path: string]: string } = {}
@@ -494,20 +566,23 @@ export class QueryBuilder {
if (countAllResults) {
selectParts["offset_"] = this.knex.raw(
`DENSE_RANK() OVER (ORDER BY ${rootEntity}0.id)`
`DENSE_RANK() OVER (ORDER BY ${this.getShortAlias(
aliasMapping,
rootEntity
)}.id)`
)
}
queryBuilder.select(selectParts)
queryBuilder.from(`index_data AS ${rootEntity}0`)
queryBuilder.from(
`cat_${rootEntity} AS ${this.getShortAlias(aliasMapping, rootEntity)}`
)
joinParts.forEach((joinPart) => {
queryBuilder.joinRaw(joinPart)
})
queryBuilder.where(`${aliasMapping[rootEntity]}.name`, "=", entity)
// WHERE clause
this.parseWhere(aliasMapping, filter, queryBuilder)
@@ -559,6 +634,11 @@ export class QueryBuilder {
const initializeMaps = (structure: Select, path: string[]) => {
const currentPath = path.join(".")
const entity = this.getEntity(currentPath, false)
if (!entity) {
return
}
maps[currentPath] = {}
if (path.length > 1) {
@@ -566,9 +646,19 @@ export class QueryBuilder {
const parents = path.slice(0, -1)
const parentPath = parents.join(".")
isListMap[currentPath] = !!this.getEntity(currentPath).ref.parents.find(
(p) => p.targetProp === property
)?.isList
// In the case of specific selection
// We dont need to check if the property is a list
const isSelectableField = this.allSchemaFields.has(property)
if (isSelectableField) {
pathDetails[currentPath] = { property, parents, parentPath }
isListMap[currentPath] = false
return
}
isListMap[currentPath] = !!this.getEntity(
currentPath,
false
)?.ref?.parents?.find((p) => p.targetProp === property)?.isList
pathDetails[currentPath] = { property, parents, parentPath }
}
@@ -595,6 +685,20 @@ export class QueryBuilder {
return key + id
}
const columnMap = {}
const columnNames = Object.keys(resultSet[0] ?? {})
for (const property of columnNames) {
const segments = property.split(".")
const field = segments.pop()
const parent = segments.join(".")
columnMap[parent] ??= []
columnMap[parent].push({
field,
property,
})
}
resultSet.forEach((row) => {
for (const path in maps) {
const id = row[`${path}.id`]
@@ -603,6 +707,15 @@ export class QueryBuilder {
if (!pathDetails[path]) {
if (!maps[path][id]) {
maps[path][id] = row[path] || undefined
// If there is an id, but no object values, it means that specific fields were selected
// so we recompose the object with all selected fields. (id will always be selected)
if (!maps[path][id] && id) {
maps[path][id] = {}
for (const column of columnMap[path]) {
maps[path][id][column.field] = row[column.property]
}
}
}
continue
}
@@ -616,6 +729,15 @@ export class QueryBuilder {
maps[path][id] = row[path] || undefined
// If there is an id, but no object values, it means that specific fields were selected
// so we recompose the object with all selected fields. (id will always be selected)
if (!maps[path][id] && id) {
maps[path][id] = {}
for (const column of columnMap[path]) {
maps[path][id][column.field] = row[column.property]
}
}
const parentObj = maps[parentPath][row[`${parentPath}.id`]]
if (!parentObj) {
@@ -623,8 +745,8 @@ export class QueryBuilder {
}
const isList = isListMap[parentPath + "." + property]
if (isList) {
parentObj[property] ??= []
if (isList && !Array.isArray(parentObj[property])) {
parentObj[property] = []
}
if (maps[path][id] !== undefined) {