MCPcopy
hub / github.com/impira/docquery / context

Method context

src/docquery/document.py:210–234  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

208
209 @cached_property
210 def context(self) -> Dict[str, List[Tuple["Image.Image", List[Any]]]]:
211 self.ensure_loaded()
212 word_boxes = self.driver.find_word_boxes()
213
214 tops, _ = self.page_screenshots
215
216 n_pages = len(tops)
217 page = 0
218 offset = 0
219
220 words = [[] for _ in range(n_pages)]
221 boxes = [[] for _ in range(n_pages)]
222 for word_box in word_boxes["word_boxes"]:
223 box = word_box["box"]
224
225 if page < len(tops) - 1 and box["top"] >= tops[page + 1]:
226 page += 1
227 offset = tops[page]
228
229 words[page].append(word_box["text"])
230 boxes[page].append((box["left"], box["top"] - offset, box["right"], box["bottom"] - offset))
231
232 return self._generate_document_output(
233 self.preview, words, boxes, [(word_boxes["vw"], word_boxes["vh"])] * n_pages
234 )
235
236
237@validate_arguments

Callers

nothing calls this directly

Calls 3

ensure_loadedMethod · 0.95
find_word_boxesMethod · 0.80

Tested by

no test coverage detected