MCPcopy
hub / github.com/labring/FastGPT / checkInvalidVector

Function checkInvalidVector

projects/app/src/service/common/system/cronTask.ts:99–151  ·  view source on GitHub ↗
(start: Date, end: Date)

Source from the content-addressed store, hash-verified

97}
98
99export async function checkInvalidVector(start: Date, end: Date) {
100 let deletedVectorAmount = 0;
101 // 1. get all vector data
102 const rows = await getVectorDataByTime(start, end);
103 logger.info('Start cleaning invalid vector records', { totalVectors: rows.length, start, end });
104
105 let index = 0;
106
107 for await (const item of rows) {
108 if (!item.teamId || !item.datasetId || !item.id) {
109 logger.error('Invalid vector record encountered', { ...item });
110 continue;
111 }
112 try {
113 // 2. find dataset.data
114 const hasData = await MongoDatasetData.countDocuments({
115 teamId: item.teamId,
116 datasetId: item.datasetId,
117 'indexes.dataId': item.id
118 });
119
120 // 3. if not found, delete vector
121 if (hasData === 0) {
122 await deleteDatasetDataVector({
123 teamId: item.teamId,
124 id: item.id
125 });
126 logger.info('Deleted orphan vector record', {
127 vectorId: item.id,
128 teamId: item.teamId,
129 datasetId: item.datasetId
130 });
131 deletedVectorAmount++;
132 }
133
134 index++;
135 if (index % 100 === 0) {
136 logger.debug('Invalid vector cleaning progress', {
137 processedVectors: index,
138 totalVectors: rows.length,
139 deletedVectorAmount
140 });
141 }
142 } catch (error) {
143 logger.error('Failed to clean invalid vector record', { ...item, error });
144 }
145 }
146
147 logger.info('Finished cleaning invalid vector records', {
148 deletedVectorAmount,
149 totalVectors: rows.length
150 });
151}

Callers 1

clearInvalidDataCronFunction · 0.90

Calls 4

deleteDatasetDataVectorFunction · 0.90
infoMethod · 0.45
errorMethod · 0.45
debugMethod · 0.45

Tested by

no test coverage detected