Spaces:

drewThomasson
/

ebook2audiobook_v2.0_Beta

Running

App Files Files Community

drewThomasson commited on 10 days ago

Commit

9a27def

•

1 Parent(s): 5132ddc

Upload 4 files

Browse files

Files changed (2) hide show

app.py +4 -9
lib/functions.py +76 -34

app.py CHANGED Viewed

@@ -144,24 +144,21 @@ Linux/Mac:
         if arg.startswith('--') and arg not in options:
             print(f'Error: Unrecognized option "{arg}"')
             sys.exit(1)
     args = vars(parser.parse_args())
     # Check if the port is already in use to prevent multiple launches
     if not args['headless'] and is_port_in_use(interface_port):
         print(f'Error: Port {interface_port} is already in use. The web interface may already be running.')
         sys.exit(1)
     args['script_mode'] = args['script_mode'] if args['script_mode'] else NATIVE
     args['share'] =  args['share'] if args['share'] else False
     if args['script_mode'] == NATIVE:
         check_pkg = check_and_install_requirements(requirements_file)
         if check_pkg:
-            print('Package requirements ok')
-            if check_dictionary():
-                print ('Dictionary ok')
-            else:
                 sys.exit(1)
         else:
             print('Some packages could not be installed')
@@ -191,8 +188,6 @@ Linux/Mac:
                     new_ebooks_dir = os.path.abspath(args['ebooks_dir'])
                 else:
                     print(f'Error: The provided --ebooks_dir "{args["ebooks_dir"]}" does not exist.')
                     sys.exit(1)
             if os.path.exists(new_ebooks_dir):

         if arg.startswith('--') and arg not in options:
             print(f'Error: Unrecognized option "{arg}"')
             sys.exit(1)
     args = vars(parser.parse_args())
     # Check if the port is already in use to prevent multiple launches
     if not args['headless'] and is_port_in_use(interface_port):
         print(f'Error: Port {interface_port} is already in use. The web interface may already be running.')
         sys.exit(1)
     args['script_mode'] = args['script_mode'] if args['script_mode'] else NATIVE
     args['share'] =  args['share'] if args['share'] else False
     if args['script_mode'] == NATIVE:
         check_pkg = check_and_install_requirements(requirements_file)
         if check_pkg:
+            if not check_dictionary():
                 sys.exit(1)
         else:
             print('Some packages could not be installed')
                     new_ebooks_dir = os.path.abspath(args['ebooks_dir'])
                 else:
                     print(f'Error: The provided --ebooks_dir "{args["ebooks_dir"]}" does not exist.')
                     sys.exit(1)
             if os.path.exists(new_ebooks_dir):

lib/functions.py CHANGED Viewed

@@ -2,6 +2,7 @@ import argparse
 import csv
 import docker
 import ebooklib
 import gradio as gr
 import hashlib
 import json
@@ -287,7 +288,7 @@ def has_metadata(f):
 def convert_to_epub(session):
     if session['cancellation_requested']:
-        stop_and_detach_tts()
         print('Cancel requested')
         return False
     if session['script_mode'] == DOCKER_UTILS:
@@ -330,7 +331,7 @@ def convert_to_epub(session):
 def get_cover(session):
     try:
         if session['cancellation_requested']:
-            stop_and_detach_tts()
             print('Cancel requested')
             return False
         cover_image = False
@@ -354,7 +355,7 @@ def get_cover(session):
 def get_chapters(language, session):
     try:
         if session['cancellation_requested']:
-            stop_and_detach_tts()
             print('Cancel requested')
             return False
         all_docs = list(session['epub'].get_items_of_type(ebooklib.ITEM_DOCUMENT))
@@ -445,7 +446,7 @@ def get_sentences(sentence, language, max_pauses=9):
 def convert_chapters_to_audio(session):
     try:
         if session['cancellation_requested']:
-            stop_and_detach_tts()
             print('Cancel requested')
             return False
         progress_bar = None
@@ -531,10 +532,11 @@ def convert_chapters_to_audio(session):
                 chapter_num = x + 1
                 chapter_audio_file = f'chapter_{chapter_num}.{audioproc_format}'
                 sentences = session['chapters'][x]
                 start = current_sentence  # Mark the starting sentence of the chapter
-                print(f"\nChapter {chapter_num} containing {len(sentences)} sentences...")
                 for i, sentence in enumerate(sentences):
-                    if current_sentence >= resume_sentence and resume_sentence > 0 or resume_sentence == 0:
                         params['sentence_audio_file'] = os.path.join(session['chapters_dir_sentences'], f'{current_sentence}.{audioproc_format}')
                         params['sentence'] = sentence
                         if convert_sentence_to_audio(params, session):
@@ -549,11 +551,13 @@ def convert_chapters_to_audio(session):
                             return False
                     current_sentence += 1
                 end = current_sentence - 1
-                if combine_audio_sentences(chapter_audio_file, start, end, session):
-                    print(f'Combining chapter {chapter_num} to audio, sentence {start} to {end}')
-                else:
-                    print('combine_audio_sentences() failed!')
-                    return False
         return True
     except Exception as e:
         raise DependencyError(e)
@@ -561,7 +565,7 @@ def convert_chapters_to_audio(session):
 def convert_sentence_to_audio(params, session):
     try:
         if session['cancellation_requested']:
-            stop_and_detach_tts(params['tts'])
             print('Cancel requested')
             return False
         generation_params = {
@@ -624,7 +628,7 @@ def combine_audio_sentences(chapter_audio_file, start, end, session):
         ]
         for file in selected_files:
             if session['cancellation_requested']:
-                stop_and_detach_tts(params['tts'])
                 print('Cancel requested')
                 return False
             if session['cancellation_requested']:
@@ -751,8 +755,11 @@ def combine_audio_chapters(session):
                     ffmpeg_cmd += ['-c:v', 'copy', '-disposition:v', 'attached_pic']  # JPEG cover (no re-encoding needed)
             if ffmpeg_cover is not None and ffmpeg_cover.endswith('.png'):
                 ffmpeg_cmd += ['-pix_fmt', 'yuv420p']
-            ffmpeg_cmd += ['-af', 'agate=threshold=-33dB:ratio=2:attack=5:release=100,acompressor=threshold=-20dB:ratio=2.5:attack=50:release=200:makeup=0dB,loudnorm=I=-19:TP=-3:LRA=7:linear=true']
-            ffmpeg_cmd += ['-movflags', '+faststart', '-y', ffmpeg_final_file]
             if session['script_mode'] == DOCKER_UTILS:
                 try:
                     container = session['client'].containers.run(
@@ -795,7 +802,6 @@ def combine_audio_chapters(session):
                 docker_final_file = os.path.join(session['tmp_dir'], final_name)
                 final_file = os.path.join(session['audiobooks_dir'], final_name)
                 if export_audio():
-                    shutil.rmtree(session['tmp_dir'])
                     return final_file
         return None
     except Exception as e:
@@ -846,7 +852,7 @@ def replace_roman_numbers(text):
     text = roman_chapter_pattern.sub(replace_chapter_match, text)
     text = roman_numerals_with_period.sub(replace_numeral_with_period, text)
     return text
 def stop_and_detach_tts(tts=None):
     if tts is not None:
         if next(tts.parameters()).is_cuda:
@@ -854,7 +860,7 @@ def stop_and_detach_tts(tts=None):
         del tts
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
 def delete_old_web_folders(root_dir):
     try:
         if not os.path.exists(root_dir):
@@ -937,7 +943,6 @@ def convert_ebook(args):
             session['tmp_dir'] = os.path.join(processes_dir, f"ebook-{session['id']}")
             session['chapters_dir'] = os.path.join(session['tmp_dir'], f"chapters_{hashlib.md5(args['ebook'].encode()).hexdigest()}")
             session['chapters_dir_sentences'] = os.path.join(session['chapters_dir'], 'sentences')
             if not is_gui_process:
@@ -995,6 +1000,20 @@ def convert_ebook(args):
                                 if convert_chapters_to_audio(session):
                                     final_file = combine_audio_chapters(session)
                                     if final_file is not None:
                                         progress_status = f'Audiobook {os.path.basename(final_file)} created!'
                                         return progress_status, final_file
                                     else:
@@ -1011,7 +1030,6 @@ def convert_ebook(args):
                     error = 'convert_to_epub() failed!'
             else:
                 error = f"Temporary directory {session['tmp_dir']} not removed due to failure."
         else:
             error = f"Language {args['language']} is not supported."
         if session['cancellation_requested']:
@@ -1116,7 +1134,7 @@ def web_interface(args):
                         with gr.Group():
                             gr_ebook_file = gr.File(label='EBook File (.epub, .mobi, .azw3, fb2, lrf, rb, snb, tcr, .pdf, .txt, .rtf, doc, .docx, .html, .odt, .azw)', file_types=['.epub', '.mobi', '.azw3', 'fb2', 'lrf', 'rb', 'snb', 'tcr', '.pdf', '.txt', '.rtf', 'doc', '.docx', '.html', '.odt', '.azw'])
                         with gr.Group():
-                            gr_voice_file = gr.File(label='*Cloning Voice (a .wav 24000hz for XTTS base model and 16000hz for FAIRSEQ base model, no more than 6 sec)', file_types=['.wav'], visible=interface_component_options['gr_voice_file'])
                             gr.Markdown('<p>&nbsp;&nbsp;* Optional</p>')
                         with gr.Group():
                             gr_device = gr.Radio(label='Processor Unit', choices=['CPU', 'GPU'], value='CPU')
@@ -1267,7 +1285,7 @@ def web_interface(args):
         def update_interface():
             nonlocal is_converting
             is_converting = False
-            return gr.update('Convert', variant='primary', interactive=False), gr.update(), gr.update(value=audiobook_file), update_audiobooks_ddn(), hide_modal()
         def refresh_audiobook_list():
             files = []
@@ -1284,13 +1302,17 @@ def web_interface(args):
                     return link, link, gr.update(visible=True)
             return None, None, gr.update(visible=False)
-        def update_convert_btn(upload_file, custom_model_file, session_id):
-            session = context.get_session(session_id)
-            if hasattr(upload_file, 'name') and not hasattr(custom_model_file, 'name'):
-                yield gr.update(variant='primary', interactive=True)
             else:
-                yield gr.update(variant='primary', interactive=False)
-            return
         def update_audiobooks_ddn():
             files = refresh_audiobook_list()
@@ -1375,6 +1397,12 @@ def web_interface(args):
                 yield gr.update(), gr.update(), gr.update(value=f'Error: {str(e)}')
                 return
         def change_gr_fine_tuned(fine_tuned):
             visible = False
             if fine_tuned == 'std':
@@ -1440,20 +1468,25 @@ def web_interface(args):
             }
             if args["ebook"] is None:
-                return gr.update(value='Error: a file is required.')
             try:
                 is_converting = True
                 progress_status, audiobook_file = convert_ebook(args)
                 if audiobook_file is None:
                     if is_converting:
-                        return gr.update(value='Conversion cancelled.')
                     else:
-                        return gr.update(value='Conversion failed.')
                 else:
-                    return progress_status
             except Exception as e:
-                return DependencyError(e)
         gr_ebook_file.change(
             fn=update_convert_btn,
@@ -1484,6 +1517,11 @@ def web_interface(args):
             inputs=gr_custom_model_list,
             outputs=gr_fine_tuned
         )
         gr_fine_tuned.change(
             fn=change_gr_fine_tuned,
             inputs=gr_fine_tuned,
@@ -1511,6 +1549,10 @@ def web_interface(args):
             outputs=[gr_data, gr_session_status, gr_session, gr_audiobooks_ddn, gr_custom_model_list]
         )
         gr_convert_btn.click(
             fn=submit_convert_btn,
             inputs=[
                 gr_session, gr_device, gr_ebook_file, gr_voice_file, gr_language,
@@ -1521,7 +1563,7 @@ def web_interface(args):
         ).then(
             fn=update_interface,
             inputs=None,
-            outputs=[gr_convert_btn, gr_ebook_file, gr_audio_player, gr_audiobooks_ddn, gr_modal_html]
         )
         interface.load(
             fn=None,

 import csv
 import docker
 import ebooklib
+import fnmatch
 import gradio as gr
 import hashlib
 import json
 def convert_to_epub(session):
     if session['cancellation_requested']:
+        #stop_and_detach_tts()
         print('Cancel requested')
         return False
     if session['script_mode'] == DOCKER_UTILS:
 def get_cover(session):
     try:
         if session['cancellation_requested']:
+            #stop_and_detach_tts()
             print('Cancel requested')
             return False
         cover_image = False
 def get_chapters(language, session):
     try:
         if session['cancellation_requested']:
+            #stop_and_detach_tts()
             print('Cancel requested')
             return False
         all_docs = list(session['epub'].get_items_of_type(ebooklib.ITEM_DOCUMENT))
 def convert_chapters_to_audio(session):
     try:
         if session['cancellation_requested']:
+            #stop_and_detach_tts()
             print('Cancel requested')
             return False
         progress_bar = None
                 chapter_num = x + 1
                 chapter_audio_file = f'chapter_{chapter_num}.{audioproc_format}'
                 sentences = session['chapters'][x]
+                sentences_count = len(sentences)
                 start = current_sentence  # Mark the starting sentence of the chapter
+                print(f"\nChapter {chapter_num} containing {sentences_count} sentences...")
                 for i, sentence in enumerate(sentences):
+                    if current_sentence >= resume_sentence:
                         params['sentence_audio_file'] = os.path.join(session['chapters_dir_sentences'], f'{current_sentence}.{audioproc_format}')
                         params['sentence'] = sentence
                         if convert_sentence_to_audio(params, session):
                             return False
                     current_sentence += 1
                 end = current_sentence - 1
+                print(f"\nEnd of Chapter {chapter_num}")
+                if start >= resume_sentence:
+                    if combine_audio_sentences(chapter_audio_file, start, end, session):
+                        print(f'Combining chapter {chapter_num} to audio, sentence {start} to {end}')
+                    else:
+                        print('combine_audio_sentences() failed!')
+                        return False
         return True
     except Exception as e:
         raise DependencyError(e)
 def convert_sentence_to_audio(params, session):
     try:
         if session['cancellation_requested']:
+            #stop_and_detach_tts(params['tts'])
             print('Cancel requested')
             return False
         generation_params = {
         ]
         for file in selected_files:
             if session['cancellation_requested']:
+                #stop_and_detach_tts(params['tts'])
                 print('Cancel requested')
                 return False
             if session['cancellation_requested']:
                     ffmpeg_cmd += ['-c:v', 'copy', '-disposition:v', 'attached_pic']  # JPEG cover (no re-encoding needed)
             if ffmpeg_cover is not None and ffmpeg_cover.endswith('.png'):
                 ffmpeg_cmd += ['-pix_fmt', 'yuv420p']
+            ffmpeg_cmd += [
+                '-af',
+                'agate=threshold=-35dB:ratio=1.5:attack=10:release=200,acompressor=threshold=-20dB:ratio=2:attack=80:release=200:makeup=1dB,loudnorm=I=-19:TP=-3:LRA=7:linear=true,afftdn=nf=-50,equalizer=f=150:t=q:w=2:g=2,equalizer=f=250:t=q:w=2:g=-2,equalizer=f=12000:t=q:w=2:g=2',
+                '-movflags', '+faststart', '-y', ffmpeg_final_file
+            ]
             if session['script_mode'] == DOCKER_UTILS:
                 try:
                     container = session['client'].containers.run(
                 docker_final_file = os.path.join(session['tmp_dir'], final_name)
                 final_file = os.path.join(session['audiobooks_dir'], final_name)
                 if export_audio():
                     return final_file
         return None
     except Exception as e:
     text = roman_chapter_pattern.sub(replace_chapter_match, text)
     text = roman_numerals_with_period.sub(replace_numeral_with_period, text)
     return text
+'''
 def stop_and_detach_tts(tts=None):
     if tts is not None:
         if next(tts.parameters()).is_cuda:
         del tts
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
+'''
 def delete_old_web_folders(root_dir):
     try:
         if not os.path.exists(root_dir):
             session['tmp_dir'] = os.path.join(processes_dir, f"ebook-{session['id']}")
             session['chapters_dir'] = os.path.join(session['tmp_dir'], f"chapters_{hashlib.md5(args['ebook'].encode()).hexdigest()}")
             session['chapters_dir_sentences'] = os.path.join(session['chapters_dir'], 'sentences')
             if not is_gui_process:
                                 if convert_chapters_to_audio(session):
                                     final_file = combine_audio_chapters(session)
                                     if final_file is not None:
+                                        chapters_dirs = [
+                                            dir_name for dir_name in os.listdir(session['tmp_dir'])
+                                            if fnmatch.fnmatch(dir_name, "chapters_*") and os.path.isdir(os.path.join(session['tmp_dir'], dir_name))
+                                        ]
+                                        if len(chapters_dirs) > 1:
+                                            if os.path.exists(session['chapters_dir']):
+                                                shutil.rmtree(session['chapters_dir'])
+                                            if os.path.exists(session['epub_path']):
+                                                os.remove(session['epub_path'])
+                                            if os.path.exists(session['cover']):
+                                                os.remove(session['cover'])
+                                        else:
+                                            if os.path.exists(session['tmp_dir']):
+                                                shutil.rmtree(session['tmp_dir'])
                                         progress_status = f'Audiobook {os.path.basename(final_file)} created!'
                                         return progress_status, final_file
                                     else:
                     error = 'convert_to_epub() failed!'
             else:
                 error = f"Temporary directory {session['tmp_dir']} not removed due to failure."
         else:
             error = f"Language {args['language']} is not supported."
         if session['cancellation_requested']:
                         with gr.Group():
                             gr_ebook_file = gr.File(label='EBook File (.epub, .mobi, .azw3, fb2, lrf, rb, snb, tcr, .pdf, .txt, .rtf, doc, .docx, .html, .odt, .azw)', file_types=['.epub', '.mobi', '.azw3', 'fb2', 'lrf', 'rb', 'snb', 'tcr', '.pdf', '.txt', '.rtf', 'doc', '.docx', '.html', '.odt', '.azw'])
                         with gr.Group():
+                            gr_voice_file = gr.File(label='*Cloning Voice (a .wav 24khz for XTTS base model and 16khz for FAIRSEQ base model, no more than 6 sec)', file_types=['.wav'], visible=interface_component_options['gr_voice_file'])
                             gr.Markdown('<p>&nbsp;&nbsp;* Optional</p>')
                         with gr.Group():
                             gr_device = gr.Radio(label='Processor Unit', choices=['CPU', 'GPU'], value='CPU')
         def update_interface():
             nonlocal is_converting
             is_converting = False
+            return gr.update('Convert', variant='primary', interactive=False), gr.update(value=None), gr.update(value=None), gr.update(value=audiobook_file), update_audiobooks_ddn(), hide_modal()
         def refresh_audiobook_list():
             files = []
                     return link, link, gr.update(visible=True)
             return None, None, gr.update(visible=False)
+        def update_convert_btn(upload_file=None, custom_model_file=None, session_id=None):
+            if session_id is None:
+                yield gr.update(variant='primary', interactive=False)
+                return
             else:
+                session = context.get_session(session_id)
+                if hasattr(upload_file, 'name') and not hasattr(custom_model_file, 'name'):
+                    yield gr.update(variant='primary', interactive=True)
+                else:
+                    yield gr.update(variant='primary', interactive=False)
+                return
         def update_audiobooks_ddn():
             files = refresh_audiobook_list()
                 yield gr.update(), gr.update(), gr.update(value=f'Error: {str(e)}')
                 return
+        def change_gr_tts_engine(engine):
+            if engine == 'xtts':
+                return gr.update(visible=True)
+            else:
+                return gr.update(visible=False)
         def change_gr_fine_tuned(fine_tuned):
             visible = False
             if fine_tuned == 'std':
             }
             if args["ebook"] is None:
+                yield gr.update(value='Error: a file is required.')
+                return
             try:
                 is_converting = True
                 progress_status, audiobook_file = convert_ebook(args)
                 if audiobook_file is None:
                     if is_converting:
+                        yield gr.update(value='Conversion cancelled.')
+                        return
                     else:
+                        yield gr.update(value='Conversion failed.')
+                        return
                 else:
+                    yield progress_status
+                    return
             except Exception as e:
+                yield DependencyError(e)
+                return
         gr_ebook_file.change(
             fn=update_convert_btn,
             inputs=gr_custom_model_list,
             outputs=gr_fine_tuned
         )
+        gr_tts_engine.change(
+            fn=change_gr_tts_engine,
+            inputs=gr_tts_engine,
+            outputs=gr_tab_preferences
+        )
         gr_fine_tuned.change(
             fn=change_gr_fine_tuned,
             inputs=gr_fine_tuned,
             outputs=[gr_data, gr_session_status, gr_session, gr_audiobooks_ddn, gr_custom_model_list]
         )
         gr_convert_btn.click(
+            fn=update_convert_btn,
+            inputs=None,
+            outputs=gr_convert_btn
+        ).then(
             fn=submit_convert_btn,
             inputs=[
                 gr_session, gr_device, gr_ebook_file, gr_voice_file, gr_language,
         ).then(
             fn=update_interface,
             inputs=None,
+            outputs=[gr_convert_btn, gr_ebook_file, gr_voice_file, gr_audio_player, gr_audiobooks_ddn, gr_modal_html]
         )
         interface.load(
             fn=None,