MCPcopy
hub / github.com/VectifyAI/PageIndex / _save_doc

Method _save_doc

pageindex/client.py:157–168  ·  view source on GitHub ↗
(self, doc_id: str)

Source from the content-addressed store, hash-verified

155 return None
156
157 def _save_doc(self, doc_id: str):
158 doc = self.documents[doc_id].copy()
159 # Strip text from structure nodes — redundant with pages (PDF only)
160 if doc.get('structure') and doc.get('type') == 'pdf':
161 doc['structure'] = remove_fields(doc['structure'], fields=['text'])
162 path = self.workspace / f"{doc_id}.json"
163 with open(path, "w", encoding="utf-8") as f:
164 json.dump(doc, f, ensure_ascii=False, indent=2)
165 self._save_meta(doc_id, self._make_meta_entry(doc))
166 # Drop heavy fields; will lazy-load on demand
167 self.documents[doc_id].pop('structure', None)
168 self.documents[doc_id].pop('pages', None)
169
170 def _rebuild_meta(self) -> dict:
171 """Scan individual doc JSON files and return a meta dict."""

Callers 1

indexMethod · 0.95

Calls 3

_save_metaMethod · 0.95
_make_meta_entryMethod · 0.95
remove_fieldsFunction · 0.85

Tested by

no test coverage detected