(start: Date, end: Date)
| 97 | } |
| 98 | |
| 99 | export async function checkInvalidVector(start: Date, end: Date) { |
| 100 | let deletedVectorAmount = 0; |
| 101 | // 1. get all vector data |
| 102 | const rows = await getVectorDataByTime(start, end); |
| 103 | logger.info('Start cleaning invalid vector records', { totalVectors: rows.length, start, end }); |
| 104 | |
| 105 | let index = 0; |
| 106 | |
| 107 | for await (const item of rows) { |
| 108 | if (!item.teamId || !item.datasetId || !item.id) { |
| 109 | logger.error('Invalid vector record encountered', { ...item }); |
| 110 | continue; |
| 111 | } |
| 112 | try { |
| 113 | // 2. find dataset.data |
| 114 | const hasData = await MongoDatasetData.countDocuments({ |
| 115 | teamId: item.teamId, |
| 116 | datasetId: item.datasetId, |
| 117 | 'indexes.dataId': item.id |
| 118 | }); |
| 119 | |
| 120 | // 3. if not found, delete vector |
| 121 | if (hasData === 0) { |
| 122 | await deleteDatasetDataVector({ |
| 123 | teamId: item.teamId, |
| 124 | id: item.id |
| 125 | }); |
| 126 | logger.info('Deleted orphan vector record', { |
| 127 | vectorId: item.id, |
| 128 | teamId: item.teamId, |
| 129 | datasetId: item.datasetId |
| 130 | }); |
| 131 | deletedVectorAmount++; |
| 132 | } |
| 133 | |
| 134 | index++; |
| 135 | if (index % 100 === 0) { |
| 136 | logger.debug('Invalid vector cleaning progress', { |
| 137 | processedVectors: index, |
| 138 | totalVectors: rows.length, |
| 139 | deletedVectorAmount |
| 140 | }); |
| 141 | } |
| 142 | } catch (error) { |
| 143 | logger.error('Failed to clean invalid vector record', { ...item, error }); |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | logger.info('Finished cleaning invalid vector records', { |
| 148 | deletedVectorAmount, |
| 149 | totalVectors: rows.length |
| 150 | }); |
| 151 | } |
no test coverage detected