解析文本 :param text: 需要解析的文本 :param index: 从那个正则开始解析 :return: 解析后的树形结果数据
(self, text: str, index=0)
| 378 | self.limit = limit |
| 379 | |
| 380 | def parse_to_tree(self, text: str, index=0): |
| 381 | """ |
| 382 | 解析文本 |
| 383 | :param text: 需要解析的文本 |
| 384 | :param index: 从那个正则开始解析 |
| 385 | :return: 解析后的树形结果数据 |
| 386 | """ |
| 387 | level_content_list = parse_title_level(text, self.content_level_pattern, index) |
| 388 | if len(level_content_list) == 0: |
| 389 | return [to_tree_obj(row, 'block') for row in smart_split_paragraph(text, limit=self.limit)] |
| 390 | if index == 0 and text.lstrip().index(level_content_list[0]["content"].lstrip()) != 0: |
| 391 | level_content_list.insert(0, to_tree_obj("")) |
| 392 | |
| 393 | cursor = 0 |
| 394 | level_title_content_list = [item for item in level_content_list if item.get('state') == 'title'] |
| 395 | for i in range(len(level_title_content_list)): |
| 396 | start_content: str = level_title_content_list[i].get('content') |
| 397 | if cursor < text.index(start_content, cursor): |
| 398 | for row in smart_split_paragraph(text[cursor: text.index(start_content, cursor)], limit=self.limit): |
| 399 | level_content_list.insert(0, to_tree_obj(row, 'block')) |
| 400 | |
| 401 | block, cursor = get_level_block(text, level_title_content_list, i, cursor) |
| 402 | if len(block) == 0: |
| 403 | continue |
| 404 | children = self.parse_to_tree(text=block, index=index + 1) |
| 405 | level_title_content_list[i]['children'] = children |
| 406 | first_child_idx_in_block = block.lstrip().index(children[0]["content"].lstrip()) |
| 407 | if first_child_idx_in_block != 0: |
| 408 | inner_children = self.parse_to_tree(block[:first_child_idx_in_block], index + 1) |
| 409 | level_title_content_list[i]['children'].extend(inner_children) |
| 410 | return level_content_list |
| 411 | |
| 412 | def parse(self, text: str): |
| 413 | """ |
no test coverage detected