| 1748 | return text_list, text_node, ch_order.sort() |
| 1749 | |
| 1750 | def encodech(self, text): |
| 1751 | """ """ |
| 1752 | if len(text) == 0 or len(text) > self.max_text_len: |
| 1753 | return None, None, None |
| 1754 | if self.lower: |
| 1755 | text = text.lower() |
| 1756 | text_node_dict = {} |
| 1757 | text_node_dict.update({0: 1}) |
| 1758 | character_index = [_ for _ in range(self.num_character)] |
| 1759 | text_list = [] |
| 1760 | for char in text: |
| 1761 | if char not in self.dict: |
| 1762 | continue |
| 1763 | i_c = self.dict[char] |
| 1764 | text_list.append(i_c) |
| 1765 | if i_c in text_node_dict.keys(): |
| 1766 | text_node_dict[i_c] += 1 |
| 1767 | else: |
| 1768 | text_node_dict.update({i_c: 1}) |
| 1769 | for ic in list(text_node_dict.keys()): |
| 1770 | character_index.remove(ic) |
| 1771 | none_char_index = sample(character_index, 37 - len(list(text_node_dict.keys()))) |
| 1772 | for ic in none_char_index: |
| 1773 | text_node_dict[ic] = 0 |
| 1774 | |
| 1775 | text_node_index = sorted(text_node_dict) |
| 1776 | text_node_num = [text_node_dict[k] for k in text_node_index] |
| 1777 | if len(text_list) == 0: |
| 1778 | return None, None, None |
| 1779 | return text_list, text_node_index, text_node_num |
| 1780 | |
| 1781 | |
| 1782 | class LatexOCRLabelEncode(object): |