Method _save_doc

pageindex/client.py:157–168 · view source on GitHub ↗

(self, doc_id: str)

Source from the content-addressed store, hash-verified

155	return None
156
157	def _save_doc(self, doc_id: str):
158	doc = self.documents[doc_id].copy()
159	# Strip text from structure nodes — redundant with pages (PDF only)
160	if doc.get('structure') and doc.get('type') == 'pdf':
161	doc['structure'] = remove_fields(doc['structure'], fields=['text'])
162	path = self.workspace / f"{doc_id}.json"
163	with open(path, "w", encoding="utf-8") as f:
164	json.dump(doc, f, ensure_ascii=False, indent=2)
165	self._save_meta(doc_id, self._make_meta_entry(doc))
166	# Drop heavy fields; will lazy-load on demand
167	self.documents[doc_id].pop('structure', None)
168	self.documents[doc_id].pop('pages', None)
169
170	def _rebuild_meta(self) -> dict:
171	"""Scan individual doc JSON files and return a meta dict."""

indexMethod · 0.95

_save_metaMethod · 0.95

_make_meta_entryMethod · 0.95

remove_fieldsFunction · 0.85

no test coverage detected