Calls external tesseract.exe on input file (restrictions on types), outputting output_filename+'txt
(input_filename, output_filename, bool_digits=False)
| 10 | cleanup_scratch_flag = False # Temporary files cleaned up after OCR operation |
| 11 | |
| 12 | def call_tesseract(input_filename, output_filename, bool_digits=False): |
| 13 | """Calls external tesseract.exe on input file (restrictions on types), |
| 14 | outputting output_filename+'txt'""" |
| 15 | # args = [tesseract_exe_name, input_filename, output_filename] |
| 16 | if bool_digits: |
| 17 | # args = tesseract_exe_name+" "+input_filename+" "+output_filename+" -l eng -psm 7 nobatch eng_digits" # price |
| 18 | args = tesseract_exe_name+" "+input_filename+" "+output_filename+" -l test_digits -psm 7 nobatch" # price |
| 19 | else: |
| 20 | args = tesseract_exe_name+" "+input_filename+" "+output_filename+" -l eng -psm 7 nobatch eng_characters" # English letters |
| 21 | # args = tesseract_exe_name+" "+input_filename+" "+output_filename+" -l test_eng -psm 7 nobatch" # English letters |
| 22 | # print args |
| 23 | proc = subprocess.Popen(args, shell=True) |
| 24 | retcode = proc.wait() |
| 25 | if retcode != 0: |
| 26 | errors.check_for_errors() |
| 27 | |
| 28 | def image_to_string(im, cleanup = cleanup_scratch_flag, bool_digits=False): |
| 29 | """Converts im to file, applies tesseract, and fetches resulting text. |
no outgoing calls
no test coverage detected