Compute parse errors for the table . Parameters ---------- table : camelot.core.Table Returns ------- Tuple Parse errors
(self, table)
| 176 | return idx |
| 177 | |
| 178 | def compute_parse_errors(self, table): |
| 179 | """Compute parse errors for the table . |
| 180 | |
| 181 | Parameters |
| 182 | ---------- |
| 183 | table : camelot.core.Table |
| 184 | |
| 185 | Returns |
| 186 | ------- |
| 187 | Tuple |
| 188 | Parse errors |
| 189 | """ |
| 190 | pos_errors = [] |
| 191 | # Process textlines from both orientations in a single global |
| 192 | # reading-order stream (-y0 top-first, then x0 left-first) rather |
| 193 | # than the previous vertical-pass-then-horizontal-pass loop. |
| 194 | # |
| 195 | # Cell.text is an *appending* setter, so the order textlines are |
| 196 | # visited determines the order their fragments concatenate in a |
| 197 | # cell. The old "all vertical, then all horizontal" order meant a |
| 198 | # glyph that playa happened to classify as a vertical textline |
| 199 | # (e.g. a lone single character split off from a word) was |
| 200 | # appended *before* the horizontal textlines of the same cell — |
| 201 | # floating it to the front of the cell text. Reported as #385 |
| 202 | # ('d' of 'dihydroclorid' jumping to the start of the cell). |
| 203 | # |
| 204 | # Sorting both orientations together by reading order places each |
| 205 | # textline by its own position, so the cell accumulates |
| 206 | # top-to-bottom, left-to-right regardless of orientation tag. |
| 207 | textlines = [ |
| 208 | (t, direction) |
| 209 | for direction in ("vertical", "horizontal") |
| 210 | for t in self.t_bbox[direction] |
| 211 | ] |
| 212 | textlines.sort(key=lambda td: (-td[0].y0, td[0].x0)) |
| 213 | for t, direction in textlines: |
| 214 | indices, error = get_table_index( |
| 215 | table, |
| 216 | t, |
| 217 | direction, |
| 218 | split_text=self.split_text, |
| 219 | flag_size=self.flag_size, |
| 220 | strip_text=self.strip_text, |
| 221 | ) |
| 222 | if len(indices) > 0: |
| 223 | if indices[0][:2] != (-1, -1): |
| 224 | pos_errors.append(error) |
| 225 | indices = type(self)._reduce_index( |
| 226 | table, indices, shift_text=self.shift_text |
| 227 | ) |
| 228 | for r_idx, c_idx, text in indices: |
| 229 | # replace_text (#482) is applied after the |
| 230 | # split/strip/flag-size pipeline, at the |
| 231 | # last point before the text reaches the |
| 232 | # output cell. Order: strip first (already |
| 233 | # done upstream in get_table_index), then |
| 234 | # replace, then assign. |
| 235 | if self.replace_text: |
no test coverage detected