hub / github.com/DrewThomasson/ebook2audiobook / convert2epub

Function convert2epub

lib/core.py:878–1153 · view source on GitHub ↗

(session_id:str)

Source from the content-addressed store, hash-verified

876	return None
877
878	def convert2epub(session_id:str)->bool:
879	session = context.get_session(session_id)
880	if session and session.get('id', False):
881	if session['cancellation_requested']:
882	return False
883	try:
884	title = False
885	author = False
886	ebook_convert = shutil.which('ebook-convert')
887	if not ebook_convert:
888	error = 'ebook-convert utility is not installed or not found.'
889	print(error)
890	return False
891	file_input = session['ebook']
892	if os.path.getsize(file_input) == 0:
893	error = f'Input file is empty: {file_input}'
894	print(error)
895	return False
896	file_ext = os.path.splitext(file_input)[1].lower()
897	if file_ext not in ebook_formats:
898	error = f'Unsupported file format: {file_ext}'
899	print(error)
900	return False
901	if file_ext == '.zip':
902	file_input = normalize_epub_zip(session_id, file_input)
903	if file_input is None:
904	return False
905	file_ext = '.epub'
906	if file_ext == '.txt':
907	with open(file_input, 'r', encoding='utf-8') as f:
908	text = f.read()
909	text = text.replace('\r\n', '\n')
910	text = re.sub(r'\n{2,}', f".{TTS_SML['pause']['static']}", text)
911	with open(file_input, 'w', encoding='utf-8') as f:
912	f.write(text)
913	elif file_ext == '.pdf':
914	msg = 'File input is a PDF. flatten it in XHTML…'
915	print(msg)
916	doc = fitz.open(file_input)
917	file_meta = doc.metadata
918	filename_noext = os.path.splitext(os.path.basename(session['ebook']))[0]
919	title = file_meta.get('title') or filename_noext
920	author = file_meta.get('author') or False
921	xhtml_pages = []
922	for i, page in enumerate(doc):
923	has_text = page.get_text('text').strip()
924	if has_text:
925	try:
926	xhtml_content = page.get_text('xhtml').strip()
927	except Exception as e:
928	print(f'Error extracting text from page {i+1}: {e}')
929	xhtml_content = ''
930	error = None
931	else:
932	xhtml_content = ''
933	error = None
934	if not xhtml_content:
935	msg = f'The page {i+1} seems to be image-based. Using OCR…'

Callers 1

convert_ebookFunction · 0.85

Calls 10

normalize_epub_zipFunction · 0.85

show_alertFunction · 0.85

ocr2xhtmlFunction · 0.85

DependencyErrorClass · 0.85

get_sessionMethod · 0.80

readMethod · 0.80

joinMethod · 0.80

runMethod · 0.80

writeMethod · 0.45

convertMethod · 0.45

Tested by

no test coverage detected