MCPcopy
hub / github.com/JaidedAI/EasyOCR / CTCLabelConverter

Class CTCLabelConverter

easyocr/utils.py:273–384  ·  view source on GitHub ↗

Convert between text-label and text-index

Source from the content-addressed store, hash-verified

271
272
273class CTCLabelConverter(object):
274 """ Convert between text-label and text-index """
275
276 def __init__(self, character, separator_list = {}, dict_pathlist = {}):
277 # character (str): set of the possible characters.
278 dict_character = list(character)
279
280 self.dict = {}
281 for i, char in enumerate(dict_character):
282 self.dict[char] = i + 1
283
284 self.character = ['[blank]'] + dict_character # dummy '[blank]' token for CTCLoss (index 0)
285
286 self.separator_list = separator_list
287 separator_char = []
288 for lang, sep in separator_list.items():
289 separator_char += sep
290 self.ignore_idx = [0] + [i+1 for i,item in enumerate(separator_char)]
291
292 ####### latin dict
293 if len(separator_list) == 0:
294 dict_list = []
295 for lang, dict_path in dict_pathlist.items():
296 try:
297 with open(dict_path, "r", encoding = "utf-8-sig") as input_file:
298 word_count = input_file.read().splitlines()
299 dict_list += word_count
300 except:
301 pass
302 else:
303 dict_list = {}
304 for lang, dict_path in dict_pathlist.items():
305 with open(dict_path, "r", encoding = "utf-8-sig") as input_file:
306 word_count = input_file.read().splitlines()
307 dict_list[lang] = word_count
308
309 self.dict_list = dict_list
310
311 def encode(self, text, batch_max_length=25):
312 """convert text-label into text-index.
313 input:
314 text: text labels of each image. [batch_size]
315
316 output:
317 text: concatenated text index for CTCLoss.
318 [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)]
319 length: length of each text. [batch_size]
320 """
321 length = [len(s) for s in text]
322 text = ''.join(text)
323 text = [self.dict[char] for char in text]
324
325 return (torch.IntTensor(text), torch.IntTensor(length))
326
327 def decode_greedy(self, text_index, length):
328 """ convert text-index into text-label. """
329 texts = []
330 index = 0

Callers 1

get_recognizerFunction · 0.70

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…