MCPcopy
hub / github.com/DrewThomasson/ebook2audiobook / convert2epub

Function convert2epub

lib/core.py:878–1153  ·  view source on GitHub ↗
(session_id:str)

Source from the content-addressed store, hash-verified

876 return None
877
878def convert2epub(session_id:str)->bool:
879 session = context.get_session(session_id)
880 if session and session.get('id', False):
881 if session['cancellation_requested']:
882 return False
883 try:
884 title = False
885 author = False
886 ebook_convert = shutil.which('ebook-convert')
887 if not ebook_convert:
888 error = 'ebook-convert utility is not installed or not found.'
889 print(error)
890 return False
891 file_input = session['ebook']
892 if os.path.getsize(file_input) == 0:
893 error = f'Input file is empty: {file_input}'
894 print(error)
895 return False
896 file_ext = os.path.splitext(file_input)[1].lower()
897 if file_ext not in ebook_formats:
898 error = f'Unsupported file format: {file_ext}'
899 print(error)
900 return False
901 if file_ext == '.zip':
902 file_input = normalize_epub_zip(session_id, file_input)
903 if file_input is None:
904 return False
905 file_ext = '.epub'
906 if file_ext == '.txt':
907 with open(file_input, 'r', encoding='utf-8') as f:
908 text = f.read()
909 text = text.replace('\r\n', '\n')
910 text = re.sub(r'\n{2,}', f".{TTS_SML['pause']['static']}", text)
911 with open(file_input, 'w', encoding='utf-8') as f:
912 f.write(text)
913 elif file_ext == '.pdf':
914 msg = 'File input is a PDF. flatten it in XHTML…'
915 print(msg)
916 doc = fitz.open(file_input)
917 file_meta = doc.metadata
918 filename_noext = os.path.splitext(os.path.basename(session['ebook']))[0]
919 title = file_meta.get('title') or filename_noext
920 author = file_meta.get('author') or False
921 xhtml_pages = []
922 for i, page in enumerate(doc):
923 has_text = page.get_text('text').strip()
924 if has_text:
925 try:
926 xhtml_content = page.get_text('xhtml').strip()
927 except Exception as e:
928 print(f'Error extracting text from page {i+1}: {e}')
929 xhtml_content = ''
930 error = None
931 else:
932 xhtml_content = ''
933 error = None
934 if not xhtml_content:
935 msg = f'The page {i+1} seems to be image-based. Using OCR…'

Callers 1

convert_ebookFunction · 0.85

Calls 10

normalize_epub_zipFunction · 0.85
show_alertFunction · 0.85
ocr2xhtmlFunction · 0.85
DependencyErrorClass · 0.85
get_sessionMethod · 0.80
readMethod · 0.80
joinMethod · 0.80
runMethod · 0.80
writeMethod · 0.45
convertMethod · 0.45

Tested by

no test coverage detected