(session_id:str)
| 876 | return None |
| 877 | |
| 878 | def convert2epub(session_id:str)->bool: |
| 879 | session = context.get_session(session_id) |
| 880 | if session and session.get('id', False): |
| 881 | if session['cancellation_requested']: |
| 882 | return False |
| 883 | try: |
| 884 | title = False |
| 885 | author = False |
| 886 | ebook_convert = shutil.which('ebook-convert') |
| 887 | if not ebook_convert: |
| 888 | error = 'ebook-convert utility is not installed or not found.' |
| 889 | print(error) |
| 890 | return False |
| 891 | file_input = session['ebook'] |
| 892 | if os.path.getsize(file_input) == 0: |
| 893 | error = f'Input file is empty: {file_input}' |
| 894 | print(error) |
| 895 | return False |
| 896 | file_ext = os.path.splitext(file_input)[1].lower() |
| 897 | if file_ext not in ebook_formats: |
| 898 | error = f'Unsupported file format: {file_ext}' |
| 899 | print(error) |
| 900 | return False |
| 901 | if file_ext == '.zip': |
| 902 | file_input = normalize_epub_zip(session_id, file_input) |
| 903 | if file_input is None: |
| 904 | return False |
| 905 | file_ext = '.epub' |
| 906 | if file_ext == '.txt': |
| 907 | with open(file_input, 'r', encoding='utf-8') as f: |
| 908 | text = f.read() |
| 909 | text = text.replace('\r\n', '\n') |
| 910 | text = re.sub(r'\n{2,}', f".{TTS_SML['pause']['static']}", text) |
| 911 | with open(file_input, 'w', encoding='utf-8') as f: |
| 912 | f.write(text) |
| 913 | elif file_ext == '.pdf': |
| 914 | msg = 'File input is a PDF. flatten it in XHTML…' |
| 915 | print(msg) |
| 916 | doc = fitz.open(file_input) |
| 917 | file_meta = doc.metadata |
| 918 | filename_noext = os.path.splitext(os.path.basename(session['ebook']))[0] |
| 919 | title = file_meta.get('title') or filename_noext |
| 920 | author = file_meta.get('author') or False |
| 921 | xhtml_pages = [] |
| 922 | for i, page in enumerate(doc): |
| 923 | has_text = page.get_text('text').strip() |
| 924 | if has_text: |
| 925 | try: |
| 926 | xhtml_content = page.get_text('xhtml').strip() |
| 927 | except Exception as e: |
| 928 | print(f'Error extracting text from page {i+1}: {e}') |
| 929 | xhtml_content = '' |
| 930 | error = None |
| 931 | else: |
| 932 | xhtml_content = '' |
| 933 | error = None |
| 934 | if not xhtml_content: |
| 935 | msg = f'The page {i+1} seems to be image-based. Using OCR…' |
no test coverage detected