Spaces:

drewThomasson
/

ebook2audiobook_v2.0_Beta

Running

App Files Files Community

drewThomasson commited on 10 days ago

Commit

5f0212d

•

1 Parent(s): eb2f049

Upload 10 files

Browse files

Files changed (10) hide show

app.py +35 -56
ebook2audiobook.cmd +285 -0
ebook2audiobook.sh +3 -1
lib/__pycache__/conf.cpython-312.pyc +0 -0
lib/__pycache__/functions.cpython-312.pyc +0 -0
lib/__pycache__/lang.cpython-312.pyc +0 -0
lib/conf.py +88 -17
lib/functions.py +355 -322
lib/lang.py +0 -0
setup.py +50 -50

app.py CHANGED Viewed

@@ -47,18 +47,7 @@ def check_and_install_requirements(file_path):
             except subprocess.CalledProcessError as e:
                 print(f'Failed to install packages: {e}')
                 return False
-        '''
-        from lib.functions import check_missing_files, download_model
-        for mod in models.keys():
-            if mod == 'xtts':
-                mod_exists, err, list = check_missing_files(models[mod]['local'], models[mod]['files'])
-                if mod_exists:
-                    print('All specified xtts base model files are present in the folder.')
-                else:
-                    print('The following files are missing:', list)
-                    print(f'Downloading {mod} files . . .')
-                    download_model(models[mod]['local'], models[mod]['url'])
-        '''
         return True
     except Exception as e:
         raise(f'An error occurred: {e}')
@@ -92,12 +81,12 @@ def main():
 Example usage:
 Windows:
     headless:
-    ebook2audiobook.cmd --headless --ebook 'path_to_ebook' --voice 'path_to_voice'
     Graphic Interface:
     ebook2audiobook.cmd
 Linux/Mac:
     headless:
-    ./ebook2audiobook.sh --headless --ebook 'path_to_ebook' --voice 'path_to_voice'
     Graphic Interface:
     ./ebook2audiobook.sh
         ''',
@@ -106,11 +95,8 @@ Linux/Mac:
     options = [
         '--script_mode', '--share', '--headless',
         '--session', '--ebook', '--ebooks_dir',
-        '--voice', '--language', '--device',
-        #'--custom_model',
-        #'--custom_model_url',
-        '--temperature',
-        '--length_penalty', '--repetition_penalty',
         '--top_k', '--top_p', '--speed',
         '--enable_text_splitting', '--fine_tuned',
         '--version', '--help'
@@ -128,37 +114,30 @@ Linux/Mac:
     parser.add_argument(options[5], nargs='?', const='default', type=str,
                         help=f'Path to the directory containing ebooks for batch conversion. Default to "{os.path.basename(ebooks_dir)}" if "default" is provided.')
     parser.add_argument(options[6], type=str, default=None,
-                        help='Path to the target voice file for TTS. Optional, uses a default voice if not provided.')
     parser.add_argument(options[7], type=str, default=default_language_code,
                         help=f'Language for the audiobook conversion. Options: {lang_list_str}. Default to English (eng).')
     parser.add_argument(options[8], type=str, default='cpu', choices=['cpu', 'gpu'],
                         help=f'Type of processor unit for the audiobook conversion. If not specified: check first if gpu available, if not cpu is selected.')
-    """
     parser.add_argument(options[9], type=str,
-                        help='Path to the custom model file (.pth). Required if using a custom model.')
-    parser.add_argument(options[10], type=str,
-                        help=("URL to download the custom model as a zip file. Optional, but will be used if provided. "
-                              "Examples include David Attenborough's model: "
-                              "'https://huggingface.co/drewThomasson/xtts_David_Attenborough_fine_tune/resolve/main/Finished_model_files.zip?download=true'. "
-                              "More XTTS fine-tunes can be found on my Hugging Face at 'https://huggingface.co/drewThomasson'."))
-    """
-    parser.add_argument(options[9], type=float, default=0.65,
                         help='Temperature for the model. Default to 0.65. Higher temperatures lead to more creative outputs.')
-    parser.add_argument(options[10], type=float, default=1.0,
                         help='A length penalty applied to the autoregressive decoder. Default to 1.0. Not applied to custom models.')
-    parser.add_argument(options[11], type=float, default=2.5,
                         help='A penalty that prevents the autoregressive decoder from repeating itself. Default to 2.5')
-    parser.add_argument(options[12], type=int, default=50,
                         help='Top-k sampling. Lower values mean more likely outputs and increased audio generation speed. Default to 50')
-    parser.add_argument(options[13], type=float, default=0.8,
                         help='Top-p sampling. Lower values mean more likely outputs and increased audio generation speed. Default to 0.8')
-    parser.add_argument(options[14], type=float, default=1.0,
                         help='Speed factor for the speech generation. Default to 1.0')
-    parser.add_argument(options[15], type=str, default=default_fine_tuned,
-                        help='Name of the fine tuned model. Optional, uses the standard model according to the TTS engine and language.')
     parser.add_argument(options[16], action='store_true',
-                        help='Enable splitting text into sentences. Default to False.')
-    parser.add_argument(options[17], action='version',version=f'ebook2audiobook version {version}',
                         help='Show the version of the script and exit')
     for arg in sys.argv:
@@ -166,17 +145,17 @@ Linux/Mac:
             print(f'Error: Unrecognized option "{arg}"')
             sys.exit(1)
-    args = parser.parse_args()
     # Check if the port is already in use to prevent multiple launches
-    if not args.headless and is_port_in_use(gradio_interface_port):
-        print(f'Error: Port {gradio_interface_port} is already in use. The web interface may already be running.')
         sys.exit(1)
-    args.script_mode = args.script_mode if args.script_mode else NATIVE
-    args.share =  args.share if args.share else False
-    if args.script_mode == NATIVE:
         check_pkg = check_and_install_requirements(requirements_file)
         if check_pkg:
             print('Package requirements ok')
@@ -191,27 +170,27 @@ Linux/Mac:
     from lib.functions import web_interface, convert_ebook
     # Conditions based on the --headless flag
-    if args.headless:
-        args.is_gui_process = False
-        args.audiobooks_dir = audiobooks_cli_dir
         # Condition to stop if both --ebook and --ebooks_dir are provided
-        if args.ebook and args.ebooks_dir:
             print('Error: You cannot specify both --ebook and --ebooks_dir in headless mode.')
             sys.exit(1)
         # Condition 1: If --ebooks_dir exists, check value and set 'ebooks_dir'
-        if args.ebooks_dir:
             new_ebooks_dir = None
-            if args.ebooks_dir == 'default':
                 print(f'Using the default ebooks_dir: {ebooks_dir}')
                 new_ebooks_dir =  os.path.abspath(ebooks_dir)
             else:
                 # Check if the directory exists
-                if os.path.exists(args.ebooks_dir):
-                    new_ebooks_dir = os.path.abspath(args.ebooks_dir)
                 else:
-                    print(f'Error: The provided --ebooks_dir "{args.ebooks_dir}" does not exist.')
                     sys.exit(1)
             if os.path.exists(new_ebooks_dir):
@@ -220,7 +199,7 @@ Linux/Mac:
                     if any(file.endswith(ext) for ext in ebook_formats):
                         full_path = os.path.join(new_ebooks_dir, file)
                         print(f'Processing eBook file: {full_path}')
-                        args.ebook = full_path
                         progress_status, audiobook_file = convert_ebook(args)
                         if audiobook_file is None:
                             print(f'Conversion failed: {progress_status}')
@@ -229,7 +208,7 @@ Linux/Mac:
                 print(f'Error: The directory {new_ebooks_dir} does not exist.')
                 sys.exit(1)
-        elif args.ebook:
             progress_status, audiobook_file = convert_ebook(args)
             if audiobook_file is None:
                 print(f'Conversion failed: {progress_status}')
@@ -239,7 +218,7 @@ Linux/Mac:
             print('Error: In headless mode, you must specify either an ebook file using --ebook or an ebook directory using --ebooks_dir.')
             sys.exit(1)
     else:
-        args.is_gui_process = True
         passed_arguments = sys.argv[1:]
         allowed_arguments = {'--share', '--script_mode'}
         passed_args_set = {arg for arg in passed_arguments if arg.startswith('--')}

             except subprocess.CalledProcessError as e:
                 print(f'Failed to install packages: {e}')
                 return False
         return True
     except Exception as e:
         raise(f'An error occurred: {e}')
 Example usage:
 Windows:
     headless:
+    ebook2audiobook.cmd --headless --ebook 'path_to_ebook'
     Graphic Interface:
     ebook2audiobook.cmd
 Linux/Mac:
     headless:
+    ./ebook2audiobook.sh --headless --ebook 'path_to_ebook'
     Graphic Interface:
     ./ebook2audiobook.sh
         ''',
     options = [
         '--script_mode', '--share', '--headless',
         '--session', '--ebook', '--ebooks_dir',
+        '--voice', '--language', '--device', '--custom_model',
+        '--temperature', '--length_penalty', '--repetition_penalty',
         '--top_k', '--top_p', '--speed',
         '--enable_text_splitting', '--fine_tuned',
         '--version', '--help'
     parser.add_argument(options[5], nargs='?', const='default', type=str,
                         help=f'Path to the directory containing ebooks for batch conversion. Default to "{os.path.basename(ebooks_dir)}" if "default" is provided.')
     parser.add_argument(options[6], type=str, default=None,
+                        help='Path to the target voice file for TTS. Optional, must be 24khz for XTTS and 16khz for fairseq models, uses a default voice if not provided.')
     parser.add_argument(options[7], type=str, default=default_language_code,
                         help=f'Language for the audiobook conversion. Options: {lang_list_str}. Default to English (eng).')
     parser.add_argument(options[8], type=str, default='cpu', choices=['cpu', 'gpu'],
                         help=f'Type of processor unit for the audiobook conversion. If not specified: check first if gpu available, if not cpu is selected.')
     parser.add_argument(options[9], type=str,
+                        help=f'Path to the custom model (.zip file containing {default_model_files}). Required if using a custom model.')
+    parser.add_argument(options[10], type=float, default=0.65,
                         help='Temperature for the model. Default to 0.65. Higher temperatures lead to more creative outputs.')
+    parser.add_argument(options[11], type=float, default=1.0,
                         help='A length penalty applied to the autoregressive decoder. Default to 1.0. Not applied to custom models.')
+    parser.add_argument(options[12], type=float, default=2.5,
                         help='A penalty that prevents the autoregressive decoder from repeating itself. Default to 2.5')
+    parser.add_argument(options[13], type=int, default=50,
                         help='Top-k sampling. Lower values mean more likely outputs and increased audio generation speed. Default to 50')
+    parser.add_argument(options[14], type=float, default=0.8,
                         help='Top-p sampling. Lower values mean more likely outputs and increased audio generation speed. Default to 0.8')
+    parser.add_argument(options[15], type=float, default=1.0,
                         help='Speed factor for the speech generation. Default to 1.0')
     parser.add_argument(options[16], action='store_true',
+                        help='Enable splitting text into sentences. Default to False.')
+    parser.add_argument(options[17], type=str, default=default_fine_tuned,
+                        help='Name of the fine tuned model. Optional, uses the standard model according to the TTS engine and language.')
+    parser.add_argument(options[18], action='version',version=f'ebook2audiobook version {version}',
                         help='Show the version of the script and exit')
     for arg in sys.argv:
             print(f'Error: Unrecognized option "{arg}"')
             sys.exit(1)
+    args = vars(parser.parse_args())
     # Check if the port is already in use to prevent multiple launches
+    if not args['headless'] and is_port_in_use(interface_port):
+        print(f'Error: Port {interface_port} is already in use. The web interface may already be running.')
         sys.exit(1)
+    args['script_mode'] = args['script_mode'] if args['script_mode'] else NATIVE
+    args['share'] =  args['share'] if args['share'] else False
+    if args['script_mode'] == NATIVE:
         check_pkg = check_and_install_requirements(requirements_file)
         if check_pkg:
             print('Package requirements ok')
     from lib.functions import web_interface, convert_ebook
     # Conditions based on the --headless flag
+    if args['headless']:
+        args['is_gui_process'] = False
+        args['audiobooks_dir'] = audiobooks_cli_dir
         # Condition to stop if both --ebook and --ebooks_dir are provided
+        if args['ebook'] and args['ebooks_dir']:
             print('Error: You cannot specify both --ebook and --ebooks_dir in headless mode.')
             sys.exit(1)
         # Condition 1: If --ebooks_dir exists, check value and set 'ebooks_dir'
+        if args['ebooks_dir']:
             new_ebooks_dir = None
+            if args['ebooks_dir'] == 'default':
                 print(f'Using the default ebooks_dir: {ebooks_dir}')
                 new_ebooks_dir =  os.path.abspath(ebooks_dir)
             else:
                 # Check if the directory exists
+                if os.path.exists(args['ebooks_dir']):
+                    new_ebooks_dir = os.path.abspath(args['ebooks_dir'])
                 else:
+                    print(f'Error: The provided --ebooks_dir "{args['ebooks_dir']}" does not exist.')
                     sys.exit(1)
             if os.path.exists(new_ebooks_dir):
                     if any(file.endswith(ext) for ext in ebook_formats):
                         full_path = os.path.join(new_ebooks_dir, file)
                         print(f'Processing eBook file: {full_path}')
+                        args['ebook'] = full_path
                         progress_status, audiobook_file = convert_ebook(args)
                         if audiobook_file is None:
                             print(f'Conversion failed: {progress_status}')
                 print(f'Error: The directory {new_ebooks_dir} does not exist.')
                 sys.exit(1)
+        elif args['ebook']:
             progress_status, audiobook_file = convert_ebook(args)
             if audiobook_file is None:
                 print(f'Conversion failed: {progress_status}')
             print('Error: In headless mode, you must specify either an ebook file using --ebook or an ebook directory using --ebooks_dir.')
             sys.exit(1)
     else:
+        args['is_gui_process'] = True
         passed_arguments = sys.argv[1:]
         allowed_arguments = {'--share', '--script_mode'}
         passed_args_set = {arg for arg in passed_arguments if arg.startswith('--')}

ebook2audiobook.cmd ADDED Viewed

	@@ -0,0 +1,285 @@

+@echo off
+setlocal enabledelayedexpansion
+:: Capture all arguments into ARGS
+set "ARGS=%*"
+set "NATIVE=native"
+set "DOCKER_UTILS=docker_utils"
+set "FULL_DOCKER=full_docker"
+set "SCRIPT_MODE=%NATIVE%"
+set "SCRIPT_DIR=%~dp0"
+set "PYTHON_VERSION=3.12"
+set "DOCKER_UTILS_IMG=utils"
+set "PYTHON_ENV=python_env"
+set "CURRENT_ENV="
+set "PROGRAMS_LIST=calibre ffmpeg"
+set "CONDA_URL=https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe"
+set "CONDA_INSTALLER=%TEMP%\Miniconda3-latest-Windows-x86_64.exe"
+set "CONDA_INSTALL_DIR=%USERPROFILE%\miniconda3"
+set "CONDA_PATH=%USERPROFILE%\miniconda3\bin"
+set "PATH=%CONDA_PATH%;%PATH%"
+set "PROGRAMS_CHECK=0"
+set "CONDA_CHECK_STATUS=0"
+set "CONDA_RUN_INIT=0"
+set "DOCKER_CHECK_STATUS=0"
+set "DOCKER_BUILD_STATUS=0"
+set "CALIBRE_TEMP_DIR=C:\Windows\Temp\Calibre"
+if not exist "%CALIBRE_TEMP_DIR%" (
+    mkdir "%CALIBRE_TEMP_DIR%"
+)
+icacls "%CALIBRE_TEMP_DIR%" /grant Users:(OI)(CI)F /T
+for %%A in (%ARGS%) do (
+	if "%%A"=="%DOCKER_UTILS%" (
+		set "SCRIPT_MODE=%DOCKER_UTILS%"
+		break
+	)
+)
+cd /d "%SCRIPT_DIR%"
+:: Check if running inside Docker
+if defined CONTAINER (
+	echo Running in %FULL_DOCKER% mode
+	set "SCRIPT_MODE=%FULL_DOCKER%"
+	goto main
+)
+echo Running in %SCRIPT_MODE% mode
+:: Check if running in a Conda environment
+if defined CONDA_DEFAULT_ENV (
+	set "CURRENT_ENV=%CONDA_PREFIX%"
+)
+:: Check if running in a Python virtual environment
+if defined VIRTUAL_ENV (
+    set "CURRENT_ENV=%VIRTUAL_ENV%"
+)
+for /f "delims=" %%i in ('where python') do (
+    if defined CONDA_PREFIX (
+        if /i "%%i"=="%CONDA_PREFIX%\Scripts\python.exe" (
+            set "CURRENT_ENV=%CONDA_PREFIX%"
+			break
+        )
+    ) else if defined VIRTUAL_ENV (
+        if /i "%%i"=="%VIRTUAL_ENV%\Scripts\python.exe" (
+            set "CURRENT_ENV=%VIRTUAL_ENV%"
+			break
+        )
+    )
+)
+if not "%CURRENT_ENV%"=="" (
+	echo Current python virtual environment detected: %CURRENT_ENV%.
+	echo This script runs with its own virtual env and must be out of any other virtual environment when it's launched.
+	goto failed
+)
+goto conda_check
+:conda_check
+where conda >nul 2>&1
+if %errorlevel% neq 0 (
+    set "CONDA_CHECK_STATUS=1"
+) else (
+    if "%SCRIPT_MODE%"=="%DOCKER_UTILS%" (
+        goto docker_check
+		exit /b
+    ) else (
+        call :programs_check
+    )
+)
+goto dispatch
+exit /b
+:programs_check
+set "missing_prog_array="
+for %%p in (%PROGRAMS_LIST%) do (
+    set "FOUND="
+    for /f "delims=" %%i in ('where %%p 2^>nul') do (
+        set "FOUND=%%i"
+    )
+    if not defined FOUND (
+        echo %%p is not installed.
+        set "missing_prog_array=!missing_prog_array! %%p"
+    )
+)
+if not "%missing_prog_array%"=="" (
+	set "PROGRAMS_CHECK=1"
+)
+exit /b
+:docker_check
+docker --version >nul 2>&1
+if %errorlevel% neq 0 (
+	set "DOCKER_CHECK_STATUS=1"
+) else (
+	:: Verify Docker is running
+	call docker info >nul 2>&1
+	if %errorlevel% neq 0 (
+		set "DOCKER_CHECK_STATUS=1"
+	) else (
+		:: Check if the Docker socket is running
+		set "docker_socket="
+		if exist \\.\pipe\docker_engine (
+			set "docker_socket=Windows"
+		)
+		if not defined docker_socket (
+			echo Cannot connect to docker socket. Check if the docker socket is running.
+			goto failed
+			exit /b
+		) else (
+			:: Check if the Docker image is available
+			call docker images -q %DOCKER_UTILS_IMG% >nul 2>&1
+			if %errorlevel% neq 0 (
+				echo Docker image '%DOCKER_UTILS_IMG%' not found. Installing it now...
+				set "DOCKER_BUILD_STATUS=1"
+			) else (
+				goto dispatch
+				exit /b
+			)
+		)
+	)
+)
+goto install_components
+exit /b
+:install_components
+:: Check if running as administrator
+net session >nul 2>&1
+if %errorlevel% neq 0 (
+	echo This script needs to be run as administrator.
+	echo Attempting to restart with administrator privileges...
+	if defined ARGS (
+		 call powershell -ExecutionPolicy Bypass -Command "Start-Process '%~f0' -ArgumentList '%ARGS%' -WorkingDirectory '%SCRIPT_DIR%' -Verb RunAs"
+	) else (
+		 call powershell -ExecutionPolicy Bypass -Command "Start-Process '%~f0' -WorkingDirectory '%SCRIPT_DIR%' -Verb RunAs"
+	)
+	exit /b
+)
+:: Install Chocolatey if not already installed
+choco -v >nul 2>&1
+if %errorlevel% neq 0 (
+	echo Chocolatey is not installed. Installing Chocolatey...
+	call powershell -ExecutionPolicy Bypass -Command "Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; Invoke-Expression ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))"
+)
+:: Install Python if not already installed
+python --version >nul 2>&1
+if %errorlevel% neq 0 (
+	echo Python is not installed. Installing Python...
+	call choco install python -y
+)
+:: Install missing packages if any
+if not "%PROGRAMS_CHECK%"=="0" (
+	call choco install %missing_prog_array% -y --force
+	setx CALIBRE_TEMP_DIR "%CALIBRE_TEMP_DIR%" /M
+	set "PROGRAMS_CHECK=0"
+	set "missing_prog_array="
+)
+:: Install Conda if not already installed
+if not "%CONDA_CHECK_STATUS%"=="0" (
+	echo Installing Conda...
+	call powershell -Command "[System.Environment]::SetEnvironmentVariable('Path', [System.Environment]::GetEnvironmentVariable('Path','Machine') + ';' + [System.Environment]::GetEnvironmentVariable('Path','User'),'Process')"
+	echo Downloading Conda installer...
+	call bitsadmin /transfer "MinicondaDownload" %CONDA_URL% "%CONDA_INSTALLER%"
+	"%CONDA_INSTALLER%" /InstallationType=JustMe /RegisterPython=0 /AddToPath=1 /S /D=%CONDA_INSTALL_DIR%
+	if exist "%CONDA_INSTALL_DIR%\condabin\conda.bat" (
+		echo Conda installed successfully.
+		set "CONDA_RUN_INIT=1"
+		set "CONDA_CHECK_STATUS=0"
+		set "PATH=%CONDA_INSTALL_DIR%\condabin;%PATH%"
+	)
+)
+:: Install Docker if not already installed
+if not "%DOCKER_CHECK_STATUS%"=="0" (
+	echo Docker is not installed. Installing it now...
+	call choco install docker-cli docker-engine -y
+	call docker --version >nul 2>&1
+	if %errorlevel% equ 0 (
+		echo Starting Docker Engine...
+		net start com.docker.service >nul 2>&1
+		if %errorlevel% equ 0 (
+			echo Docker installed and started successfully.
+			set "DOCKER_CHECK_STATUS=0"
+		)
+	)
+)
+:: Build Docker image if required
+if not "%DOCKER_BUILD_STATUS%"=="0" (
+	call conda activate "%SCRIPT_DIR%\%PYTHON_ENV%"
+	call python -m pip install -e .
+	call docker build -f DockerfileUtils -t utils .
+	call conda deactivate
+	call docker images -q %DOCKER_UTILS_IMG% >nul 2>&1
+	if %errorlevel% equ 0 (
+		set "DOCKER_BUILD_STATUS=0"
+	)
+)
+net session >nul 2>&1
+if %errorlevel% equ 0 (
+    echo Restarting in user mode...
+    start "" /b cmd /c "%~f0" %ARGS%
+    exit /b
+)
+goto dispatch
+exit /b
+:dispatch
+if "%PROGRAMS_CHECK%"=="0" (
+    if "%CONDA_CHECK_STATUS%"=="0" (
+        if "%DOCKER_CHECK_STATUS%"=="0" (
+			if "%DOCKER_BUILD_STATUS%"=="0" (
+				goto main
+				exit /b
+			)
+		) else (
+			goto failed
+			exit /b
+		)
+    )
+)
+echo PROGRAMS_CHECK: %PROGRAMS_CHECK%
+echo CONDA_CHECK_STATUS: %CONDA_CHECK_STATUS%
+echo DOCKER_CHECK_STATUS: %DOCKER_CHECK_STATUS%
+echo DOCKER_BUILD_STATUS: %DOCKER_BUILD_STATUS%
+timeout /t 5 /nobreak >nul
+goto install_components
+exit /b
+:main
+if "%SCRIPT_MODE%"=="%FULL_DOCKER%" (
+    python %SCRIPT_DIR%\app.py --script_mode %FULL_DOCKER% %ARGS%
+) else (
+	if "%CONDA_RUN_INIT%"=="1" (
+		call conda init
+		set "CONDA_RUN_INIT=0"
+	)
+	if not exist "%SCRIPT_DIR%\%PYTHON_ENV%" (
+		call conda create --prefix %SCRIPT_DIR%\%PYTHON_ENV% python=%PYTHON_VERSION% -y
+		call conda activate %SCRIPT_DIR%\%PYTHON_ENV%
+		call python -m pip install --upgrade pip
+		call python -m pip install --upgrade -r requirements.txt --progress-bar=on
+	) else (
+		call conda activate %SCRIPT_DIR%\%PYTHON_ENV%
+	)
+	python %SCRIPT_DIR%\app.py --script_mode %SCRIPT_MODE% %ARGS%
+	call conda deactivate
+)
+exit /b
+:failed
+echo ebook2audiobook is not correctly installed or run.
+exit /b
+endlocal
+pause

ebook2audiobook.sh CHANGED Viewed

@@ -179,7 +179,9 @@ function install_programs {
 			fi
 		fi
 	done
-	if ! required_programs_check "${REQUIRED_PROGRAMS[@]}"; then
 		echo -e "\e[33mYou can run 'ebook2audiobook.sh --script_mode docker_utils' to avoid to install $REQUIRED_PROGRAMS natively.\e[0m"
 		return 1
 	fi

 			fi
 		fi
 	done
+	if required_programs_check "${REQUIRED_PROGRAMS[@]}"; then
+		return 0
+	else
 		echo -e "\e[33mYou can run 'ebook2audiobook.sh --script_mode docker_utils' to avoid to install $REQUIRED_PROGRAMS natively.\e[0m"
 		return 1
 	fi

lib/__pycache__/conf.cpython-312.pyc CHANGED Viewed

Binary files a/lib/__pycache__/conf.cpython-312.pyc and b/lib/__pycache__/conf.cpython-312.pyc differ

lib/__pycache__/functions.cpython-312.pyc CHANGED Viewed

Binary files a/lib/__pycache__/functions.cpython-312.pyc and b/lib/__pycache__/functions.cpython-312.pyc differ

lib/__pycache__/lang.cpython-312.pyc CHANGED Viewed

Binary files a/lib/__pycache__/lang.cpython-312.pyc and b/lib/__pycache__/lang.cpython-312.pyc differ

lib/conf.py CHANGED Viewed

@@ -12,14 +12,23 @@ max_python_version = (3,12)
 requirements_file = os.path.abspath(os.path.join('.','requirements.txt'))
 docker_utils_image = 'utils'
-gradio_interface_port = 7860
-gradio_shared_expire = 72 # hours
-concurrency_limit = 8 # or None for unlimited
 python_env_dir = os.path.abspath(os.path.join('.','python_env'))
 models_dir = os.path.abspath(os.path.join('.','models'))
 ebooks_dir = os.path.abspath(os.path.join('.','ebooks'))
 processes_dir = os.path.abspath(os.path.join('.','tmp'))
 audiobooks_gradio_dir = os.path.abspath(os.path.join('.','audiobooks','gui','gradio'))
 audiobooks_host_dir = os.path.abspath(os.path.join('.','audiobooks','gui','host'))
 audiobooks_cli_dir = os.path.abspath(os.path.join('.','audiobooks','cli'))
@@ -42,36 +51,98 @@ os.environ['XDG_CACHE_HOME'] = models_dir
 ebook_formats = ['.epub', '.mobi', '.azw3', 'fb2', 'lrf', 'rb', 'snb', 'tcr', '.pdf', '.txt', '.rtf', 'doc', '.docx', '.html', '.odt', '.azw']
 audiobook_format = 'm4b' # or 'mp3'
-audio_proc_format = 'wav' # only 'wav' is valid for now
 default_tts_engine = 'xtts'
 default_fine_tuned = 'std'
 models = {
     "xtts": {
         "std": {
             "lang": "multi",
-            "folder": "tts_models--multilingual--multi-dataset--xtts_v2",
-            "api": "tts_models/multilingual/multi-dataset/xtts_v2",
-            "voice": default_voice_file,
-            "files": ["config.json", "model.pth", "vocab.json"]  # Files needed for this fine-tuned model
         },
         "DavidAttenborough": {
             "lang": "eng",
-            "folder": "tts_models--multilingual--multi-dataset--xtts_v2--DavidAttenborough/",
-            "api": "drewThomasson/fineTunedTTSModels",  # Just the repo name
-            "subfolder": "xtts-v2/eng/DavidAttenborough",
-            "voice": os.path.abspath(os.path.join("voices", "eng", "elder", "male", "DavidAttenborough_24khz.wav")),
-            "files": ["config.json", "model.pth", "vocab.json"]  # Files needed for this fine-tuned model
-            #"actaul_download_location": models_dir + /tts/ + "folder" + "api"
         },
     },
     "fairseq": {
         "std": {
             "lang": "multi",
-            "folder": "tts_models--[lang]--fairseq--vits",
-            "api": "tts_models/[lang]/fairseq/vits",
             "voice": default_voice_file
         }
     }

 requirements_file = os.path.abspath(os.path.join('.','requirements.txt'))
 docker_utils_image = 'utils'
+interface_host = '0.0.0.0'
+interface_port = 7860
+interface_shared_expire = 72 # hours
+interface_concurrency_limit = 8 # or None for unlimited
+interface_component_options = {
+    "gr_tab_preferences": True,
+    "gr_voice_file": True,
+    "gr_group_custom_model": True
+}
 python_env_dir = os.path.abspath(os.path.join('.','python_env'))
 models_dir = os.path.abspath(os.path.join('.','models'))
 ebooks_dir = os.path.abspath(os.path.join('.','ebooks'))
 processes_dir = os.path.abspath(os.path.join('.','tmp'))
 audiobooks_gradio_dir = os.path.abspath(os.path.join('.','audiobooks','gui','gradio'))
 audiobooks_host_dir = os.path.abspath(os.path.join('.','audiobooks','gui','host'))
 audiobooks_cli_dir = os.path.abspath(os.path.join('.','audiobooks','cli'))
 ebook_formats = ['.epub', '.mobi', '.azw3', 'fb2', 'lrf', 'rb', 'snb', 'tcr', '.pdf', '.txt', '.rtf', 'doc', '.docx', '.html', '.odt', '.azw']
 audiobook_format = 'm4b' # or 'mp3'
+audioproc_format = 'wav' # only 'wav' is valid for now
 default_tts_engine = 'xtts'
 default_fine_tuned = 'std'
+default_model_files = ['config.json', 'vocab.json', 'model.pth', 'ref.wav']
 models = {
     "xtts": {
         "std": {
             "lang": "multi",
+            "repo": "tts_models/multilingual/multi-dataset/xtts_v2",
+            "sub": "",
+            "voice": default_voice_file
+        },
+        "AiExplained": {
+            "lang": "eng",
+            "repo": "drewThomasson/fineTunedTTSModels",
+            "sub": "xtts-v2/eng/AiExplained",
+            "voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "AiExplained_24khz.wav"))
+        },
+        "BobOdenkirk": {
+            "lang": "eng",
+            "repo": "drewThomasson/fineTunedTTSModels",
+            "sub": "xtts-v2/eng/BobOdenkirk",
+            "voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "BobOdenkirk_24khz.wav"))
+        },
+        "BobRoss": {
+            "lang": "eng",
+            "repo": "drewThomasson/fineTunedTTSModels",
+            "sub": "xtts-v2/eng/BobRoss",
+            "voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "BobRoss_24khz.wav"))
+        },
+        "BryanCranston": {
+            "lang": "eng",
+            "repo": "drewThomasson/fineTunedTTSModels",
+            "sub": "xtts-v2/eng/BryanCranston",
+            "voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "BryanCranston_24khz.wav"))
         },
         "DavidAttenborough": {
             "lang": "eng",
+            "repo": "drewThomasson/fineTunedTTSModels",
+            "sub": "xtts-v2/eng/DavidAttenborough",
+            "voice": os.path.abspath(os.path.join("voices", "eng", "elder", "male", "DavidAttenborough_24khz.wav"))
         },
+        "DeathPuss&Boots": {
+            "lang": "eng",
+            "repo": "drewThomasson/fineTunedTTSModels",
+            "sub": "xtts-v2/eng/DeathPuss&Boots",
+            "voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "DeathPuss&Boots_24khz.wav"))
+        },
+        "GhostMW2": {
+            "lang": "eng",
+            "repo": "drewThomasson/fineTunedTTSModels",
+            "sub": "xtts-v2/eng/GhostMW2",
+            "voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "GhostMW2_24khz.wav"))
+        },
+        "JhonButlerASMR": {
+            "lang": "eng",
+            "repo": "drewThomasson/fineTunedTTSModels",
+            "sub": "xtts-v2/eng/JhonButlerASMR",
+            "voice": os.path.abspath(os.path.join("voices", "eng", "elder", "male", "JhonButlerASMR_24khz.wav"))
+        },
+        "JhonMulaney": {
+            "lang": "eng",
+            "repo": "drewThomasson/fineTunedTTSModels",
+            "sub": "xtts-v2/eng/JhonMulaney",
+            "voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "JhonMulaney_24khz.wav"))
+        },
+        "MorganFreeman": {
+            "lang": "eng",
+            "repo": "drewThomasson/fineTunedTTSModels",
+            "sub": "xtts-v2/eng/MorganFreeman",
+            "voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "MorganFreeman_24khz.wav"))
+        },
+        "RainyDayHeadSpace": {
+            "lang": "eng",
+            "repo": "drewThomasson/fineTunedTTSModels",
+            "sub": "xtts-v2/eng/RainyDayHeadSpace",
+            "voice": os.path.abspath(os.path.join("voices", "eng", "elder", "male", "RainyDayHeadSpace_24khz.wav"))
+        },
+        "WhisperSalemASMR": {
+            "lang": "eng",
+            "repo": "drewThomasson/fineTunedTTSModels",
+            "sub": "xtts-v2/eng/WhisperSalemASMR",
+            "voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "WhisperSalemASMR_24khz.wav"))
+        }
     },
     "fairseq": {
         "std": {
             "lang": "multi",
+            "repo": "tts_models/[lang]/fairseq/vits",
+            "sub": "",
             "voice": default_voice_file
         }
     }

lib/functions.py CHANGED Viewed

@@ -4,6 +4,7 @@ import docker
 import ebooklib
 import gradio as gr
 import hashlib
 import numpy as np
 import os
 import regex as re
@@ -27,6 +28,7 @@ from collections.abc import MutableMapping
 from datetime import datetime
 from ebooklib import epub
 from glob import glob
 from iso639 import languages
 from multiprocessing import Manager, Event
 from pydub import AudioSegment
@@ -40,49 +42,6 @@ from urllib.parse import urlparse
 import lib.conf as conf
 import lib.lang as lang
-def download_fine_tuned_model(model_key):
-    """Download the fine-tuned model files from Hugging Face if missing."""
-    model = models['xtts'].get(model_key)
-    if not model:
-        raise ValueError(f"Fine-tuned model '{model_key}' not found in configuration.")
-    # Check if the model is fine-tuned (skip std models)
-    if model_key == "std":
-        print("Standard model detected. Skipping fine-tuned download process.")
-        return
-    # Construct the full directory path for the fine-tuned model
-    model_dir = os.path.join(models_dir, 'tts', model['folder'], model['api'])
-    os.makedirs(model_dir, exist_ok=True)
-    for file_name in model['files']:
-        file_path = os.path.join(model_dir, file_name)
-        if not os.path.exists(file_path):
-            print(f"Downloading {file_name} for fine-tuned model '{model_key}'...")
-            # Construct the download URL
-            url = f"https://huggingface.co/{model['api']}/resolve/main/{model.get('subfolder', '')}/{file_name}".strip('/')
-            try:
-                response = requests.get(url, stream=True)
-                response.raise_for_status()
-                total_size = int(response.headers.get('content-length', 0))
-                with open(file_path, 'wb') as file, tqdm(
-                    total=total_size, unit='B', unit_scale=True, desc=f"Downloading {file_name}"
-                ) as progress:
-                    for chunk in response.iter_content(chunk_size=1024):
-                        file.write(chunk)
-                        progress.update(len(chunk))
-                print(f"Downloaded: {file_name}")
-            except Exception as e:
-                raise RuntimeError(f"Failed to download {file_name}: {e}")
-    print(f"All files for fine-tuned model '{model_key}' are ready at {model_dir}.")
 def inject_configs(target_namespace):
     # Extract variables from both modules and inject them into the target namespace
     for module in (conf, lang):
@@ -122,6 +81,7 @@ class ConversionContext:
             self.sessions[session_id] = recursive_proxy({
                 "script_mode": NATIVE,
                 "client": None,
                 "audiobooks_dir": None,
                 "tmp_dir": None,
                 "src": None,
@@ -134,6 +94,7 @@ class ConversionContext:
                 "fine_tuned": None,
                 "voice_file": None,
                 "custom_model": None,
                 "chapters": None,
                 "cover": None,
                 "metadata": {
@@ -161,7 +122,7 @@ class ConversionContext:
             }, manager=self.manager)
         return self.sessions[session_id]
-context = None
 is_gui_process = False
 class DependencyError(Exception):
@@ -181,43 +142,12 @@ class DependencyError(Exception):
         if not is_gui_process:
             sys.exit(1)
-def check_missing_files(dir_path, f_list):
-    if not os.path.exists(dir_path):
-        return False, 'Folder does not exist', f_list
-    existing_files = os.listdir(dir_path)
-    missing_files = [file for file in f_list if file not in existing_files]
-    if missing_files:
-        return False, 'Some files are missing', missing_files
-    return True, 'All files are present', []
-def download_model(dest_dir, url):
-    try:
-        if not os.path.exists(dest_dir):
-            os.makedirs(dest_dir)
-        zip_path = os.path.join(dest_dir, models['xtts']['zip'])
-        print('Downloading the XTTS v2 model...')
-        response = requests.get(url, stream=True)
-        response.raise_for_status()  # Raise an error for bad status codes
-        total_size = int(response.headers.get('content-length', 0))
-        chunk_size = 1024  # Download in chunks of 1KB
-        with open(zip_path, 'wb') as file, tqdm(
-            total=total_size, unit='B', unit_scale=True, desc='Downloading'
-        ) as progress_bar:
-            for chunk in response.iter_content(chunk_size=chunk_size):
-                file.write(chunk)
-                progress_bar.update(len(chunk))
-        print('Extracting the model files...')
-        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
-            zip_ref.extractall(dest_dir)
-        os.remove(zip_path)
-        print('Model downloaded, extracted, and zip file removed successfully.')
-    except Exception as e:
-        raise DependencyError(e)
 def prepare_dirs(src, session):
     try:
         resume = False
         os.makedirs(session['tmp_dir'], exist_ok=True)
         os.makedirs(session['audiobooks_dir'], exist_ok=True)
         session['src'] = os.path.join(session['tmp_dir'], os.path.basename(src))
         if os.path.exists(session['src']):
@@ -258,44 +188,79 @@ def check_fine_tuned(fine_tuned, language):
     except Exception as e:
         raise RuntimeError(e)
-def download_custom_model(url, dest_dir, session):
     try:
-        parsed_url = urlparse(url)
-        fname = os.path.basename(parsed_url.path)
-        if not os.path.exists(dest_dir):
-            os.makedirs(dest_dir, exist_ok=True)
-        response = requests.get(url, stream=True)
-        response.raise_for_status()  # Raise an error for bad responses
-        f_path = os.path.join(dest_dir,fname)
-        with open(f_path, 'wb') as file:
-            for chunk in response.iter_content(chunk_size=8192):
-                file.write(chunk)
-        print(f'File saved at: {f_path}')
-        return extract_custom_model(f_path, dest_dir, session)
     except Exception as e:
-        raise RuntimeError(f'Error while downloading the file: {e}')
-def extract_custom_model(f_path, dest_dir, session):
     try:
-        with zipfile.ZipFile(f_path, 'r') as zip_ref:
             files = zip_ref.namelist()
-            with tqdm(total=len(files), unit='file', desc='Extracting Files') as t:
-                for file in files:
-                    if session['cancellation_requested']:
-                        msg = 'Cancel requested'
-                        raise ValueError()
-                    if os.path.isfile(file):
-                        extracted_path = zip_ref.extract(file, dest_dir)
-                    t.update(1)
-        os.remove(f_path)
-        print(f'Extracted files to {dest_dir}')
-        missing_files = [file for file in models['xtts']['files'] if not os.path.exists(os.path.join(dest_dir, file))]
-        if not missing_files:
-            print('All required model files found.')
-            return dest_dir
-        else:
-            print(f'Missing files: {missing_files}')
-            return False
     except Exception as e:
         raise DependencyError(e)
@@ -323,7 +288,8 @@ def has_metadata(f):
 def convert_to_epub(session):
     if session['cancellation_requested']:
         stop_and_detach_tts()
-        raise ValueError('Cancel requested')
     if session['script_mode'] == DOCKER_UTILS:
         try:
             docker_dir = os.path.basename(session['tmp_dir'])
@@ -363,6 +329,10 @@ def convert_to_epub(session):
 def get_cover(session):
     try:
         cover_image = False
         cover_path = os.path.join(session['tmp_dir'], session['filename_noext'] + '.jpg')
         for item in session['epub'].get_items_of_type(ebooklib.ITEM_COVER):
@@ -385,16 +355,17 @@ def get_chapters(language, session):
     try:
         if session['cancellation_requested']:
             stop_and_detach_tts()
-            raise ValueError('Cancel requested')
         all_docs = list(session['epub'].get_items_of_type(ebooklib.ITEM_DOCUMENT))
         if all_docs:
             all_docs = all_docs[1:]
             doc_patterns = [filter_pattern(str(doc)) for doc in all_docs if filter_pattern(str(doc))]
             most_common_pattern = filter_doc(doc_patterns)
             selected_docs = [doc for doc in all_docs if filter_pattern(str(doc)) == most_common_pattern]
-            chapters = [filter_chapter(doc, language, session) for doc in selected_docs]
             if session['metadata'].get('creator'):
-                intro = f"{session['metadata']['creator']}, {session['metadata']['title']}. "
                 chapters[0].insert(0, intro)
             return chapters
         return False
@@ -419,16 +390,11 @@ def filter_pattern(doc_identifier):
             return 'numbers'
     return None
-def filter_chapter(doc, language, session):
-    if session['cancellation_requested']:
-        stop_and_detach_tts()
-        raise ValueError('Cancel requested')
     soup = BeautifulSoup(doc.get_body_content(), 'html.parser')
     # Remove scripts and styles
     for script in soup(["script", "style"]):
         script.decompose()
     # Normalize lines and remove unnecessary spaces
     text = re.sub(r'(\r\n|\r|\n){3,}', '\r\n', soup.get_text().strip())
     text = replace_roman_numbers(text)
@@ -436,24 +402,19 @@ def filter_chapter(doc, language, session):
     chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
     text = '\n'.join(chunk for chunk in chunks if chunk)
     text = text.replace('»', '"').replace('«', '"')
     # Pattern 1: Add a space between UTF-8 characters and numbers
     text = re.sub(r'(?<=[\p{L}])(?=\d)|(?<=\d)(?=[\p{L}])', ' ', text)
     # Pattern 2: Split numbers into groups of 4
     text = re.sub(r'(\d{4})(?=\d)', r'\1 ', text)
-    chapter_sentences = get_sentences(text, language, session)
     return chapter_sentences
-def get_sentences(sentence, language, session, max_pauses=9):
     max_length = language_mapping[language]['char_limit']
     punctuation = language_mapping[language]['punctuation']
     parts = []
     while len(sentence) > max_length or sum(sentence.count(p) for p in punctuation) > max_pauses:
-        if session['cancellation_requested']:
-            stop_and_detach_tts()
-            raise ValueError('Cancel requested')
         # Step 1: Look for the last period (.) within max_length
         possible_splits = [i for i, char in enumerate(sentence[:max_length]) if char == '.']
         # Step 2: If no periods, look for the last comma (,)
@@ -485,14 +446,15 @@ def convert_chapters_to_audio(session):
     try:
         if session['cancellation_requested']:
             stop_and_detach_tts()
-            raise ValueError('Cancel requested')
         progress_bar = None
         params = {}
         if is_gui_process:
             progress_bar = gr.Progress(track_tqdm=True)
         params['tts_model'] = None
         '''
-        # List available TTS models
         print("Available Models:")
         print("=================")
         for index, model in enumerate(XTTS().list_models(), 1):
@@ -501,48 +463,54 @@ def convert_chapters_to_audio(session):
         if session['metadata']['language'] in language_xtts:
             params['tts_model'] = 'xtts'
             if session['custom_model'] is not None:
-                model_path = session['custom_model']
                 config_path = os.path.join(session['custom_model'],'config.json')
             else:
-                tts_load = XTTS(models[params['tts_model']]['std']['api'])
-                if session['fine_tuned'] == 'std':
-                    model_path = os.path.join(models_dir, 'tts', models[params['tts_model']][session['fine_tuned']]['folder'])
-                else:
-                    model_path = os.path.join(models_dir, 'tts', models[params['tts_model']][session['fine_tuned']]['folder']+os.path.normpath(models[params['tts_model']]['DavidAttenborough']['api']))
-                config_path = os.path.join( model_path,'config.json')
-            print(f"Loading TTS {params['tts_model']} model...")
-            config = XttsConfig()
-            config.models_dir = models_dir
-            """
-            start_time = time.time()
-            timeout = 120
-            while not os.path.isdir(model_path):
-                if timeout and (time.time() - start_time) > timeout:
-                    print(f"Timeout reached: {model_path} does not exist.")
-                    return False
-                time.sleep(1)
-            """
-            config.load_json(config_path)
-            params['tts'] = Xtts.init_from_config(config)
-            params['tts'].load_checkpoint(config, checkpoint_dir=model_path, eval=True)
             params['tts'].to(session['device'])
-            print('Computing speaker latents...')
-            params['voice_file'] = session['voice_file'] if session['voice_file'] is not None else models[params['tts_model']][session['fine_tuned']]['voice']
-            params['gpt_cond_latent'], params['speaker_embedding'] = params['tts'].get_conditioning_latents(audio_path=[params['voice_file']])
         else:
             params['tts_model'] = 'fairseq'
-            print(f"Loading TTS {params['tts_model']} model...")
-            model_api = models[params['tts_model']]['std']['api'].replace("[lang]", session['metadata']['language'])
-            params['tts'] = XTTS(model_api)
-            params['voice_file'] = session['voice_file'] if session['voice_file'] is not None else models[params['tts_model']]['std']['voice']
             params['tts'].to(session['device'])
         resume_chapter = 0
         resume_sentence = 0
         # Check existing files to resume the process if it was interrupted
-        existing_chapters = sorted([f for f in os.listdir(session['chapters_dir']) if f.endswith(f'.{audio_proc_format}')])
-        existing_sentences = sorted([f for f in os.listdir(session['chapters_dir_sentences']) if f.endswith(f'.{audio_proc_format}')])
         if existing_chapters:
             count_chapter_files = len(existing_chapters)
@@ -553,67 +521,86 @@ def convert_chapters_to_audio(session):
             print(f'Resuming from sentence {resume_sentence}')
         total_chapters = len(session['chapters'])
-        total_sentences = sum(len(array) for array in session['chapters']) + 1
         current_sentence = 0
-        with tqdm(total=total_sentences, desc='generate_ffmpeg_metadata 0.00%', bar_format='{desc}: {n_fmt}/{total_fmt} ', unit='step', initial=resume_sentence) as t:
             t.n = resume_sentence
             t.refresh()
             for x in range(resume_chapter, total_chapters):
                 chapter_num = x + 1
-                chapter_audio_file = f'chapter_{chapter_num}.{audio_proc_format}'
                 sentences = session['chapters'][x]
                 start = current_sentence  # Mark the starting sentence of the chapter
                 print(f"\nChapter {chapter_num} containing {len(sentences)} sentences...")
                 for i, sentence in enumerate(sentences):
                     if current_sentence >= resume_sentence and resume_sentence > 0 or resume_sentence == 0:
-                        if session['cancellation_requested']:
-                            stop_and_detach_tts(params['tts'])
-                            raise ValueError('Cancel requested')
-                        params['sentence_audio_file'] = os.path.join(session['chapters_dir_sentences'], f'{current_sentence}.{audio_proc_format}')
                         params['sentence'] = sentence
-                        print(f'Sentence: {sentence}...')
                         if convert_sentence_to_audio(params, session):
-                            t.update(1)  # Increment progress bar by 1
                             percentage = (current_sentence / total_sentences) * 100
                             t.set_description(f'Processing {percentage:.2f}%')
                             t.refresh()
                             if progress_bar is not None:
                                 progress_bar(current_sentence / total_sentences)
                         else:
-                            print('convert_sentence_to_audio() failed!')
                             return False
                     current_sentence += 1
                 end = current_sentence - 1
-                combine_audio_sentences(chapter_audio_file, start, end, session)
-                print(f'Combining chapter {chapter_num} to audio, sentence {start} to {end}')
         return True
     except Exception as e:
         raise DependencyError(e)
 def convert_sentence_to_audio(params, session):
     try:
         if params['tts_model'] == 'xtts':
-            output = params['tts'].inference(
-                text=params['sentence'],
-                language=session['metadata']['language_iso1'],
-                gpt_cond_latent=params['gpt_cond_latent'],
-                speaker_embedding=params['speaker_embedding'],
-                temperature=session['temperature'],
-                length_penalty=session["length_penalty"],
-                repetition_penalty=session['repetition_penalty'],
-                num_beams=int(session['length_penalty']) + 1 if session["length_penalty"] > 1 else 1,
-                top_k=session['top_k'], top_p=session['top_p'],
-                speed=session['speed'],
-                enable_text_splitting=session['enable_text_splitting']
-            )
-            torchaudio.save(params['sentence_audio_file'], torch.tensor(output[audio_proc_format]).unsqueeze(0), 24000)
         elif params['tts_model'] == 'fairseq':
             params['tts'].tts_with_vc_to_file(
                 text=params['sentence'],
-                #language=session['language'], # can be used only if multilingual model
-                speaker_wav=params['voice_file'].replace('_24khz','_22khz'),
                 file_path=params['sentence_audio_file'],
                 split_sentences=session['enable_text_splitting']
             )
         if os.path.exists(params['sentence_audio_file']):
@@ -626,27 +613,28 @@ def convert_sentence_to_audio(params, session):
 def combine_audio_sentences(chapter_audio_file, start, end, session):
     try:
         chapter_audio_file = os.path.join(session['chapters_dir'], chapter_audio_file)
-        combined_audio = AudioSegment.empty()
         # Get all audio sentence files sorted by their numeric indices
         sentence_files = [f for f in os.listdir(session['chapters_dir_sentences']) if f.endswith(".wav")]
         sentences_dir_ordered = sorted(sentence_files, key=lambda x: int(re.search(r'\d+', x).group()))
         # Filter the files in the range [start, end]
         selected_files = [
             file for file in sentences_dir_ordered
             if start <= int(''.join(filter(str.isdigit, os.path.basename(file)))) <= end
         ]
         for file in selected_files:
             if session['cancellation_requested']:
                 msg = 'Cancel requested'
                 raise ValueError(msg)
-            audio_segment = AudioSegment.from_file(os.path.join(session['chapters_dir_sentences'],file), format=audio_proc_format)
             combined_audio += audio_segment
-        combined_audio.export(chapter_audio_file, format=audio_proc_format)
         print(f'Combined audio saved to {chapter_audio_file}')
     except Exception as e:
         raise DependencyError(e)
@@ -662,25 +650,17 @@ def combine_audio_chapters(session):
             batch_size = 256
             # Process the chapter files in batches
             for i in range(0, len(chapter_files), batch_size):
-                if session['cancellation_requested']:
-                    msg = 'Cancel requested'
-                    raise ValueError(msg)
                 batch_files = chapter_files[i:i + batch_size]
                 batch_audio = AudioSegment.empty()  # Initialize an empty AudioSegment for the batch
                 # Sequentially append each file in the current batch to the batch_audio
                 for chapter_file in batch_files:
                     if session['cancellation_requested']:
-                        msg = 'Cancel requested'
-                        raise ValueError(msg)
                     audio_segment = AudioSegment.from_wav(os.path.join(session['chapters_dir'],chapter_file))
                     batch_audio += audio_segment
                 combined_audio += batch_audio
-            combined_audio.export(assembled_audio, format=audio_proc_format)
             print(f'Combined audio saved to {assembled_audio}')
             return True
         except Exception as e:
@@ -688,6 +668,9 @@ def combine_audio_chapters(session):
     def generate_ffmpeg_metadata():
         try:
             ffmpeg_metadata = ';FFMETADATA1\n'
             if session['metadata'].get('title'):
                 ffmpeg_metadata += f"title={session['metadata']['title']}\n"
@@ -718,7 +701,6 @@ def combine_audio_chapters(session):
                 mobi_asin = session['metadata']['identifiers'].get('mobi-asin', None)
                 if mobi_asin:
                     ffmpeg_metadata += f'asin={mobi_asin}\n'  # ASIN
             start_time = 0
             for index, chapter_file in enumerate(chapter_files):
                 if session['cancellation_requested']:
@@ -729,7 +711,6 @@ def combine_audio_chapters(session):
                 ffmpeg_metadata += f'[CHAPTER]\nTIMEBASE=1/1000\nSTART={start_time}\n'
                 ffmpeg_metadata += f'END={start_time + duration_ms}\ntitle=Chapter {index + 1}\n'
                 start_time += duration_ms
             # Write the metadata to the file
             with open(metadata_file, 'w', encoding='utf-8') as file:
                 file.write(ffmpeg_metadata)
@@ -739,6 +720,9 @@ def combine_audio_chapters(session):
     def export_audio():
         try:
             ffmpeg_cover = None
             if session['script_mode'] == DOCKER_UTILS:
                 docker_dir = os.path.basename(session['tmp_dir'])
@@ -746,36 +730,29 @@ def combine_audio_chapters(session):
                 ffmpeg_metadata_file = f'/files/{docker_dir}/' + os.path.basename(metadata_file)
                 ffmpeg_final_file = f'/files/{docker_dir}/' + os.path.basename(docker_final_file)
                 if session['cover'] is not None:
-                    ffmpeg_cover = f'/files/{docker_dir}/' + os.path.basename(session['cover'])
                 ffmpeg_cmd = ['ffmpeg', '-i', ffmpeg_combined_audio, '-i', ffmpeg_metadata_file]
             else:
                 ffmpeg_combined_audio = assembled_audio
                 ffmpeg_metadata_file = metadata_file
                 ffmpeg_final_file = final_file
                 if session['cover'] is not None:
-                    ffmpeg_cover = session['cover']
                 ffmpeg_cmd = [shutil.which('ffmpeg'), '-i', ffmpeg_combined_audio, '-i', ffmpeg_metadata_file]
             if ffmpeg_cover is not None:
                 ffmpeg_cmd += ['-i', ffmpeg_cover, '-map', '0:a', '-map', '2:v']
             else:
                 ffmpeg_cmd += ['-map', '0:a']
-            ffmpeg_cmd += ['-map_metadata', '1', '-c:a', 'aac', '-b:a', '128k', '-ar', '44100']
             if ffmpeg_cover is not None:
                 if ffmpeg_cover.endswith('.png'):
                     ffmpeg_cmd += ['-c:v', 'png', '-disposition:v', 'attached_pic']  # PNG cover
                 else:
-                    ffmpeg_cmd += ['-c:v', 'copy', '-disposition:v', 'attached_pic']  # JPEG cover (no re-encoding needed)
             if ffmpeg_cover is not None and ffmpeg_cover.endswith('.png'):
-                ffmpeg_cmd += ['-pix_fmt', 'yuv420p']
             ffmpeg_cmd += ['-movflags', '+faststart', '-y', ffmpeg_final_file]
             if session['script_mode'] == DOCKER_UTILS:
                 try:
                     container = session['client'].containers.run(
@@ -790,7 +767,6 @@ def combine_audio_chapters(session):
                     print(container.decode('utf-8'))
                     if shutil.copy(docker_final_file, final_file):
                         return True
                     return False
                 except docker.errors.ContainerError as e:
                     raise DependencyError(e)
@@ -811,7 +787,7 @@ def combine_audio_chapters(session):
     try:
         chapter_files = [f for f in os.listdir(session['chapters_dir']) if f.endswith(".wav")]
         chapter_files = sorted(chapter_files, key=lambda x: int(re.search(r'\d+', x).group()))
-        assembled_audio = os.path.join(session['tmp_dir'], 'assembled.'+audio_proc_format)
         metadata_file = os.path.join(session['tmp_dir'], 'metadata.txt')
         if assemble_audio():
             if generate_ffmpeg_metadata():
@@ -885,7 +861,7 @@ def delete_old_web_folders(root_dir):
             os.makedirs(root_dir)
             print(f'Created missing directory: {root_dir}')
         current_time = time.time()
-        age_limit = current_time - gradio_shared_expire * 60 * 60  # 24 hours in seconds
         for folder_name in os.listdir(root_dir):
             dir_path = os.path.join(root_dir, folder_name)
             if os.path.isdir(dir_path) and folder_name.startswith('web-'):
@@ -923,7 +899,6 @@ def convert_ebook(args):
             pass
         if args['language'] is not None and args['language'] in language_mapping.keys():
-            context = ConversionContext()
             session_id = args['session'] if args['session'] is not None else str(uuid.uuid4())
             session = context.get_session(session_id)
             session['id'] = session_id
@@ -941,13 +916,8 @@ def convert_ebook(args):
             top_p = args['top_p']
             speed = args['speed']
             enable_text_splitting = args['enable_text_splitting'] if args['enable_text_splitting'] is not None else True
-            custom_model_file = args['custom_model']
-            custom_model_url = args['custom_model_url'] if custom_model_file is None else None
-            fine_tuned = args['fine_tuned'] if check_fine_tuned(args['fine_tuned'], args['language']) else False
-            if fine_tuned:
-                print(f"Ensuring fine-tuned model '{fine_tuned}' is ready...")
-                download_fine_tuned_model(fine_tuned)
             if not fine_tuned:
                 raise ValueError('The fine tuned model does not exist.')
@@ -966,24 +936,19 @@ def convert_ebook(args):
                 session['client'] = docker.from_env()
             session['tmp_dir'] = os.path.join(processes_dir, f"ebook-{session['id']}")
-            session['chapters_dir'] = os.path.join(session['tmp_dir'], f"chapters_{hashlib.md5(args['ebook'].encode()).hexdigest()}")
             session['chapters_dir_sentences'] = os.path.join(session['chapters_dir'], 'sentences')
             if not is_gui_process:
                 print(f'*********** Session: {session_id}', '************* Store it in case of interruption or crash you can resume the conversion')
             if prepare_dirs(args['ebook'], session):
                 session['filename_noext'] = os.path.splitext(os.path.basename(session['src']))[0]
-                session['custom_model'] = None
-                if custom_model_file or custom_model_url:
-                    custom_model_dir = os.path.join(models_dir,'__sessions',f"model-{session['id']}")
-                    if os.isdir(custom_model_dir):
-                        shutil.rmtree(custom_model_dir)
-                    if custom_model_url:
-                        print(f'Get custom model: {custom_model_url}')
-                        session['custom_model'] = download_custom_model(custom_model_url, custom_model_dir, session)
-                    else:
-                        session['custom_model'] = extract_custom_model(custom_model_file, custom_model_dir, session)
                 if not torch.cuda.is_available() or device == 'cpu':
                     if device == 'gpu':
                         print('GPU is not available on your device!')
@@ -1030,7 +995,6 @@ def convert_ebook(args):
                                     final_file = combine_audio_chapters(session)
                                     if final_file is not None:
                                         progress_status = f'Audiobook {os.path.basename(final_file)} created!'
-                                        print(f"Temporary directory {session['tmp_dir']} removed successfully.")
                                         return progress_status, final_file
                                     else:
                                         error = 'combine_audio_chapters() error: final_file not created!'
@@ -1045,10 +1009,11 @@ def convert_ebook(args):
                 else:
                     error = 'convert_to_epub() failed!'
             else:
-                error = f"Temporary directory {session['tmp_dir']} not removed due to failure."
         else:
             error = f"Language {args['language']} is not supported."
         print(error)
         return error, None
     except Exception as e:
@@ -1070,6 +1035,7 @@ def web_interface(args):
         )
         for lang, details in language_mapping.items()
     ]
     fine_tuned_options = list(models['xtts'].keys())
     default_language_name =  next((name for name, key in language_options if key == default_language_code), None)
@@ -1080,7 +1046,7 @@ def web_interface(args):
         radius_size='lg',
         font_mono=['JetBrains Mono', 'monospace', 'Consolas', 'Menlo', 'Liberation Mono']
     )
     with gr.Blocks(theme=theme) as interface:
         gr.HTML(
             '''
@@ -1123,9 +1089,12 @@ def web_interface(args):
                     padding: 0 !important;
                     margin: 0 !important;
                 }
-                #component-7, #component-13, #component-14 {
                     height: 140px !important;
                 }
             </style>
             '''
         )
@@ -1133,26 +1102,37 @@ def web_interface(args):
             f'''
             # Ebook2Audiobook v{version}<br/>
             https://github.com/DrewThomasson/ebook2audiobook<br/>
-            Convert eBooks into immersive audiobooks with realistic voice TTS models.
             '''
         )
         with gr.Tabs():
-            with gr.TabItem('Input Options'):
                 with gr.Row():
                     with gr.Column(scale=3):
-                        gr_ebook_file = gr.File(label='eBook File (.epub, .mobi, .azw3, fb2, lrf, rb, snb, tcr, .pdf, .txt, .rtf, doc, .docx, .html, .odt, .azw)', file_types=['.epub', '.mobi', '.azw3', 'fb2', 'lrf', 'rb', 'snb', 'tcr', '.pdf', '.txt', '.rtf', 'doc', '.docx', '.html', '.odt', '.azw'])
-                        gr_device = gr.Radio(label='Processor Unit', choices=['CPU', 'GPU'], value='CPU')
-                        gr_language = gr.Dropdown(label='Language', choices=[name for name, _ in language_options], value=default_language_name)
-                    with gr.Column(scale=3):
                         with gr.Group():
-                            gr_voice_file = gr.File(label='Cloning Voice* (a .wav 24000hz for major language and 22050hz for others, no more than 6 sec)', file_types=['.wav'])
-                            gr_custom_model_file = gr.File(label='Model* (a .zip containing config.json, vocab.json, model.pth)', file_types=['.zip'], visible=False)
-                            gr_custom_model_url = gr.Textbox(placeholder='https://www.example.com/model.zip', label='Model from URL*', visible=False)
                             gr.Markdown('<p>&nbsp;&nbsp;* Optional</p>')
                         with gr.Group():
-                            gr_tts_engine = gr.Markdown(f'&nbsp;&nbsp;&nbsp;&nbsp;tts base: {default_tts_engine.upper()}')
                             gr_fine_tuned = gr.Dropdown(label='Fine Tuned Models', choices=fine_tuned_options, value=default_fine_tuned, interactive=True)
-            with gr.TabItem('Audio Generation Preferences'):
                 gr.Markdown(
                     '''
                     ### Customize Audio Generation Parameters
@@ -1214,7 +1194,6 @@ def web_interface(args):
                 )
         gr_state = gr.State(value="")  # Initialize state for each user session
-        gr_session_status = gr.Textbox(label='Session')
         gr_session = gr.Textbox(label='Session', visible=False)
         gr_conversion_progress = gr.Textbox(label='Progress')
         gr_convert_btn = gr.Button('Convert', variant='primary', interactive=False)
@@ -1286,7 +1265,7 @@ def web_interface(args):
         def update_interface():
             nonlocal is_converting
             is_converting = False
-            return gr.update('Convert', variant='primary', interactive=False), None, audiobook_file, update_audiobooks_ddn()
         def refresh_audiobook_list():
             files = []
@@ -1303,59 +1282,102 @@ def web_interface(args):
                     return link, link, gr.update(visible=True)
             return None, None, gr.update(visible=False)
-        def disable_convert_btn():
-            return gr.update('Convert', variant='primary', interactive=False)
         def update_audiobooks_ddn():
             files = refresh_audiobook_list()
             return gr.update(choices=files, label='Audiobooks', value=files[0] if files else None)
-        async def change_gr_ebook_file(btn, f, session_id):
             nonlocal is_converting
             if context and session_id:
                 session = context.get_session(session_id)
                 if f is None:
                     if is_converting:
                         session['cancellation_requested'] = True
-                        yield gr.update(interactive=False), show_modal('Cancellation requested, please wait...')
                         return
-                    else:
-                        session['cancellation_requested'] = False
-                        yield gr.update(interactive=False), hide_modal()
-                        return
-                else:
-                    session['cancellation_requested'] = False
-            yield gr.update(interactive=bool(f)), hide_modal()
             return
-        def change_gr_language(selected: str):
             if selected == 'zzzz':
                 new_language_name = default_language_name
                 new_language_key = default_language_code
             else:
                 new_language_name, new_language_key = next(((name, key) for name, key in language_options if key == selected), (None, None))
-            # Determine the TTS engine to use
-            tts_engine_value = 'xtts' if language_xtts.get(new_language_key, False) else 'fairseq'
-            # Get fine-tuned options filtered by language
             fine_tuned_options = [
                 model_name
-                for model_name, model_details in models.get(tts_engine_value, {}).items()
                 if model_details.get('lang') == 'multi' or model_details.get('lang') == new_language_key
             ]
-            # Update the dropdown and other elements
             return (
-                gr.update(value=new_language_name),  # Update the language dropdown
-                gr.update(value=f'&nbsp;&nbsp;&nbsp;&nbsp;tts base: {tts_engine_value.upper()}'),  # Update the TTS engine display
-                gr.update(choices=fine_tuned_options, value=fine_tuned_options[0] if fine_tuned_options else None)  # Update fine-tuned options
             )
-        def change_gr_custom_model_file(f):
-            if f is not None:
-                return gr.update(placeholder='https://www.example.com/model.zip', label='Model from URL*', visible=False)
-            return gr.update(placeholder='https://www.example.com/model.zip', label='Model from URL*', visible=True)
         def change_gr_data(data):
             data['event'] = 'change_data'
@@ -1363,9 +1385,8 @@ def web_interface(args):
         def change_gr_read_data(data):
             nonlocal audiobooks_dir
             warning_text_extra = ''
-            if is_gui_shared:
-                warning_text_extra = f' Note: access limit time: {gradio_shared_expire} hours'
             if not data:
                 data = {'session_id': str(uuid.uuid4())}
                 warning_text = f"Session: {data['session_id']}"
@@ -1375,17 +1396,23 @@ def web_interface(args):
                 warning_text = data['session_id']
                 event = data.get('event', '')
                 if event != 'load':
-                    return [gr.update(), gr.update(), gr.update()]
             if is_gui_shared:
                 audiobooks_dir = os.path.join(audiobooks_gradio_dir, f"web-{data['session_id']}")
                 delete_old_web_folders(audiobooks_gradio_dir)
             else:
                 audiobooks_dir = os.path.join(audiobooks_host_dir, f"web-{data['session_id']}")
-            return [data, f'{warning_text}{warning_text_extra}', data['session_id'], update_audiobooks_ddn()]
-        def process_conversion(
             session, device, ebook_file, voice_file, language,
-            custom_model_file, custom_model_url, temperature, length_penalty,
             repetition_penalty, top_k, top_p, speed, enable_text_splitting, fine_tuned
         ):
             nonlocal is_converting
@@ -1399,8 +1426,7 @@ def web_interface(args):
                 "audiobooks_dir": audiobooks_dir,
                 "voice": voice_file.name if voice_file else None,
                 "language": next((key for name, key in language_options if name == language), None),
-                "custom_model": custom_model_file.name if custom_model_file else None,
-                "custom_model_url": custom_model_url if custom_model_file is None else None,
                 "temperature": float(temperature),
                 "length_penalty": float(length_penalty),
                 "repetition_penalty": float(repetition_penalty),
@@ -1412,33 +1438,34 @@ def web_interface(args):
             }
             if args["ebook"] is None:
-                return 'Error: a file is required.', hide_modal()
             try:
                 is_converting = True
                 progress_status, audiobook_file = convert_ebook(args)
-                is_converting = False
                 if audiobook_file is None:
                     if is_converting:
-                        return 'Conversion cancelled.', hide_modal()
                     else:
-                        return 'Conversion failed.', hide_modal()
                 else:
-                    return progress_status, hide_modal()
             except Exception as e:
-                is_converting = False
                 return DependencyError(e)
         gr_ebook_file.change(
             fn=change_gr_ebook_file,
-            inputs=[gr_convert_btn, gr_ebook_file, gr_session],
-            outputs=[gr_convert_btn, gr_modal_html]
         )
         gr_language.change(
-            lambda selected: change_gr_language(dict(language_options).get(selected, 'Unknown')),
-            inputs=gr_language,
-            outputs=[gr_language, gr_tts_engine, gr_fine_tuned]
         )
         gr_audiobooks_ddn.change(
             fn=change_gr_audiobooks_ddn,
@@ -1447,8 +1474,18 @@ def web_interface(args):
         )
         gr_custom_model_file.change(
             fn=change_gr_custom_model_file,
-            inputs=gr_custom_model_file,
-            outputs=gr_custom_model_url
         )
         gr_session.change(
             fn=change_gr_data,
@@ -1469,24 +1506,20 @@ def web_interface(args):
         gr_read_data.change(
             fn=change_gr_read_data,
             inputs=gr_read_data,
-            outputs=[gr_data, gr_session_status, gr_session, gr_audiobooks_ddn]
         )
         gr_convert_btn.click(
-            fn=disable_convert_btn,
-            inputs=None,
-            outputs=gr_convert_btn
-        ).then(
-            fn=process_conversion,
             inputs=[
                 gr_session, gr_device, gr_ebook_file, gr_voice_file, gr_language,
-                gr_custom_model_file, gr_custom_model_url, gr_temperature, gr_length_penalty,
                 gr_repetition_penalty, gr_top_k, gr_top_p, gr_speed, gr_enable_text_splitting, gr_fine_tuned
             ],
-            outputs=[gr_conversion_progress, gr_modal_html]
         ).then(
             fn=update_interface,
             inputs=None,
-            outputs=[gr_convert_btn, gr_ebook_file, gr_audio_player, gr_audiobooks_ddn]
         )
         interface.load(
             fn=None,
@@ -1506,7 +1539,7 @@ def web_interface(args):
         )
     try:
-        interface.queue(default_concurrency_limit=concurrency_limit).launch(server_name="0.0.0.0", server_port=gradio_interface_port, share=is_gui_shared)
     except OSError as e:
         print(f'Connection error: {e}')
     except socket.error as e:

 import ebooklib
 import gradio as gr
 import hashlib
+import json
 import numpy as np
 import os
 import regex as re
 from datetime import datetime
 from ebooklib import epub
 from glob import glob
+from huggingface_hub import hf_hub_download
 from iso639 import languages
 from multiprocessing import Manager, Event
 from pydub import AudioSegment
 import lib.conf as conf
 import lib.lang as lang
 def inject_configs(target_namespace):
     # Extract variables from both modules and inject them into the target namespace
     for module in (conf, lang):
             self.sessions[session_id] = recursive_proxy({
                 "script_mode": NATIVE,
                 "client": None,
+                "language": default_language_code,
                 "audiobooks_dir": None,
                 "tmp_dir": None,
                 "src": None,
                 "fine_tuned": None,
                 "voice_file": None,
                 "custom_model": None,
+                "custom_model_dir": None,
                 "chapters": None,
                 "cover": None,
                 "metadata": {
             }, manager=self.manager)
         return self.sessions[session_id]
+context = ConversionContext()
 is_gui_process = False
 class DependencyError(Exception):
         if not is_gui_process:
             sys.exit(1)
 def prepare_dirs(src, session):
     try:
         resume = False
+        os.makedirs(os.path.join(models_dir,'tts'), exist_ok=True)
         os.makedirs(session['tmp_dir'], exist_ok=True)
+        os.makedirs(session['custom_model_dir'], exist_ok=True)
         os.makedirs(session['audiobooks_dir'], exist_ok=True)
         session['src'] = os.path.join(session['tmp_dir'], os.path.basename(src))
         if os.path.exists(session['src']):
     except Exception as e:
         raise RuntimeError(e)
+def analyze_uploaded_file(zip_path, required_files=None):
+    if required_files is None:
+        required_files = default_model_files
+    executable_extensions = {'.exe', '.bat', '.cmd', '.bash', '.bin', '.sh', '.msi', '.dll', '.com'}
     try:
+        with zipfile.ZipFile(zip_path, 'r') as zf:
+            files_in_zip = set()
+            executables_found = False
+            for file_info in zf.infolist():
+                file_name = file_info.filename
+                if file_info.is_dir():
+                    continue  # Skip directories
+                base_name = os.path.basename(file_name)
+                files_in_zip.add(base_name)
+                _, ext = os.path.splitext(base_name.lower())
+                if ext in executable_extensions:
+                    executables_found = True
+                    break
+            missing_files = [f for f in required_files if f not in files_in_zip]
+            is_valid = not executables_found and not missing_files
+            return is_valid,
+    except zipfile.BadZipFile:
+        raise ValueError("error: The file is not a valid ZIP archive.")
     except Exception as e:
+        raise RuntimeError(f'analyze_uploaded_file(): {e}')
+async def extract_custom_model(file_src, dest=None, session=None, required_files=None):
     try:
+        progress_bar = None
+        if is_gui_process:
+            progress_bar = gr.Progress(track_tqdm=True)
+        if dest is None:
+            dest = session['custom_model_dir'] = os.path.join(models_dir, '__sessions', f"model-{session['id']}")
+            os.makedirs(dest, exist_ok=True)
+        if required_files is None:
+            required_files = default_model_files
+        dir_src = os.path.dirname(file_src)
+        dir_name = os.path.basename(file_src).replace('.zip', '')
+        with zipfile.ZipFile(file_src, 'r') as zip_ref:
             files = zip_ref.namelist()
+            files_length = len(files)
+            dir_tts = 'fairseq'
+            xtts_config = 'config.json'
+            # Check the model type
+            config_data = {}
+            if xtts_config in zip_ref.namelist():
+                with zip_ref.open(xtts_config) as file:
+                    config_data = json.load(file)
+            if config_data.get('model') == 'xtts':
+                dir_tts = 'xtts'
+            dir_dest = os.path.join(dest, dir_tts, dir_name)
+            os.makedirs(dir_dest, exist_ok=True)
+            # Initialize progress bar
+            with tqdm(total=100, unit='%') as t:  # Track progress as a percentage
+                for i, file in enumerate(files):
+                    if file in required_files:
+                        zip_ref.extract(file, dir_dest)
+                    progress_percentage = ((i + 1) / files_length) * 100
+                    t.n = int(progress_percentage)
+                    t.refresh()
+                    if progress_bar is not None:
+                        progress_bar(downloaded / total_size)
+                        yield dir_name, progress_bar
+        os.remove(file_src)
+        print(f'Extracted files to {dir_dest}')
+        yield dir_name, progress_bar
+        return
     except Exception as e:
         raise DependencyError(e)
 def convert_to_epub(session):
     if session['cancellation_requested']:
         stop_and_detach_tts()
+        print('Cancel requested')
+        return False
     if session['script_mode'] == DOCKER_UTILS:
         try:
             docker_dir = os.path.basename(session['tmp_dir'])
 def get_cover(session):
     try:
+        if session['cancellation_requested']:
+            stop_and_detach_tts()
+            print('Cancel requested')
+            return False
         cover_image = False
         cover_path = os.path.join(session['tmp_dir'], session['filename_noext'] + '.jpg')
         for item in session['epub'].get_items_of_type(ebooklib.ITEM_COVER):
     try:
         if session['cancellation_requested']:
             stop_and_detach_tts()
+            print('Cancel requested')
+            return False
         all_docs = list(session['epub'].get_items_of_type(ebooklib.ITEM_DOCUMENT))
         if all_docs:
             all_docs = all_docs[1:]
             doc_patterns = [filter_pattern(str(doc)) for doc in all_docs if filter_pattern(str(doc))]
             most_common_pattern = filter_doc(doc_patterns)
             selected_docs = [doc for doc in all_docs if filter_pattern(str(doc)) == most_common_pattern]
+            chapters = [filter_chapter(doc, language) for doc in selected_docs]
             if session['metadata'].get('creator'):
+                intro = f"{session['metadata']['creator']}, {session['metadata']['title']};\n "
                 chapters[0].insert(0, intro)
             return chapters
         return False
             return 'numbers'
     return None
+def filter_chapter(doc, language):
     soup = BeautifulSoup(doc.get_body_content(), 'html.parser')
     # Remove scripts and styles
     for script in soup(["script", "style"]):
         script.decompose()
     # Normalize lines and remove unnecessary spaces
     text = re.sub(r'(\r\n|\r|\n){3,}', '\r\n', soup.get_text().strip())
     text = replace_roman_numbers(text)
     chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
     text = '\n'.join(chunk for chunk in chunks if chunk)
     text = text.replace('»', '"').replace('«', '"')
     # Pattern 1: Add a space between UTF-8 characters and numbers
     text = re.sub(r'(?<=[\p{L}])(?=\d)|(?<=\d)(?=[\p{L}])', ' ', text)
     # Pattern 2: Split numbers into groups of 4
     text = re.sub(r'(\d{4})(?=\d)', r'\1 ', text)
+    chapter_sentences = get_sentences(text, language)
     return chapter_sentences
+def get_sentences(sentence, language, max_pauses=9):
     max_length = language_mapping[language]['char_limit']
     punctuation = language_mapping[language]['punctuation']
+    sentence = sentence.replace(".", ";\n")
     parts = []
     while len(sentence) > max_length or sum(sentence.count(p) for p in punctuation) > max_pauses:
         # Step 1: Look for the last period (.) within max_length
         possible_splits = [i for i, char in enumerate(sentence[:max_length]) if char == '.']
         # Step 2: If no periods, look for the last comma (,)
     try:
         if session['cancellation_requested']:
             stop_and_detach_tts()
+            print('Cancel requested')
+            return False
         progress_bar = None
         params = {}
         if is_gui_process:
             progress_bar = gr.Progress(track_tqdm=True)
         params['tts_model'] = None
         '''
+        # List available TTS base models
         print("Available Models:")
         print("=================")
         for index, model in enumerate(XTTS().list_models(), 1):
         if session['metadata']['language'] in language_xtts:
             params['tts_model'] = 'xtts'
             if session['custom_model'] is not None:
+                print(f"Loading TTS {params['tts_model']} model from {session['custom_model']}...")
+                model_path = os.path.join(session['custom_model'], 'model.pth')
                 config_path = os.path.join(session['custom_model'],'config.json')
+                vocab_path = os.path.join(session['custom_model'],'vocab.json')
+                voice_path = os.path.join(session['custom_model'],'ref.wav')
+                config = XttsConfig()
+                config.models_dir = os.path.join(models_dir,'tts')
+                config.load_json(config_path)
+                params['tts'] = Xtts.init_from_config(config)
+                params['tts'].load_checkpoint(config, checkpoint_path=model_path, vocab_path=vocab_path, eval=True)
+                print('Computing speaker latents...')
+                params['voice_file'] = session['voice_file'] if session['voice_file'] is not None else voice_path
+                params['gpt_cond_latent'], params['speaker_embedding'] = params['tts'].get_conditioning_latents(audio_path=[params['voice_file']])
+            elif session['fine_tuned'] != 'std':
+                print(f"Loading TTS {params['tts_model']} model from {session['fine_tuned']}...")
+                hf_repo = models[params['tts_model']][session['fine_tuned']]['repo']
+                hf_sub = models[params['tts_model']][session['fine_tuned']]['sub']
+                cache_dir = os.path.join(models_dir,'tts')
+                model_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}/model.pth", cache_dir=cache_dir)
+                config_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}/config.json", cache_dir=cache_dir)
+                vocab_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}/vocab.json", cache_dir=cache_dir)
+                config = XttsConfig()
+                config.models_dir = cache_dir
+                config.load_json(config_path)
+                params['tts'] = Xtts.init_from_config(config)
+                params['tts'].load_checkpoint(config, checkpoint_path=model_path, vocab_path=vocab_path, eval=True)
+                print('Computing speaker latents...')
+                params['voice_file'] = session['voice_file'] if session['voice_file'] is not None else models[params['tts_model']][session['fine_tuned']]['voice']
+                params['gpt_cond_latent'], params['speaker_embedding'] = params['tts'].get_conditioning_latents(audio_path=[params['voice_file']])
             else:
+                print(f"Loading TTS {params['tts_model']} model from {models[params['tts_model']][session['fine_tuned']]['repo']}...")
+                params['tts'] = XTTS(model_name=models[params['tts_model']][session['fine_tuned']]['repo'])
+                params['voice_file'] = session['voice_file'] if session['voice_file'] is not None else models[params['tts_model']][session['fine_tuned']]['voice']
             params['tts'].to(session['device'])
         else:
             params['tts_model'] = 'fairseq'
+            model_repo = models[params['tts_model']][session['fine_tuned']]['repo'].replace("[lang]", session['metadata']['language'])
+            print(f"Loading TTS {model_repo} model from {model_repo}...")
+            params['tts'] = XTTS(model_repo)
+            params['voice_file'] = session['voice_file'] if session['voice_file'] is not None else models[params['tts_model']][session['fine_tuned']]['voice']
             params['tts'].to(session['device'])
         resume_chapter = 0
         resume_sentence = 0
         # Check existing files to resume the process if it was interrupted
+        existing_chapters = sorted([f for f in os.listdir(session['chapters_dir']) if f.endswith(f'.{audioproc_format}')])
+        existing_sentences = sorted([f for f in os.listdir(session['chapters_dir_sentences']) if f.endswith(f'.{audioproc_format}')])
         if existing_chapters:
             count_chapter_files = len(existing_chapters)
             print(f'Resuming from sentence {resume_sentence}')
         total_chapters = len(session['chapters'])
+        total_sentences = sum(len(array) for array in session['chapters'])
         current_sentence = 0
+        with tqdm(total=total_sentences, desc='convert_chapters_to_audio 0.00%', bar_format='{desc}: {n_fmt}/{total_fmt} ', unit='step', initial=resume_sentence) as t:
             t.n = resume_sentence
             t.refresh()
             for x in range(resume_chapter, total_chapters):
                 chapter_num = x + 1
+                chapter_audio_file = f'chapter_{chapter_num}.{audioproc_format}'
                 sentences = session['chapters'][x]
                 start = current_sentence  # Mark the starting sentence of the chapter
                 print(f"\nChapter {chapter_num} containing {len(sentences)} sentences...")
                 for i, sentence in enumerate(sentences):
                     if current_sentence >= resume_sentence and resume_sentence > 0 or resume_sentence == 0:
+                        params['sentence_audio_file'] = os.path.join(session['chapters_dir_sentences'], f'{current_sentence}.{audioproc_format}')
                         params['sentence'] = sentence
                         if convert_sentence_to_audio(params, session):
+                            t.update(1)
                             percentage = (current_sentence / total_sentences) * 100
                             t.set_description(f'Processing {percentage:.2f}%')
+                            print(f'Sentence: {sentence}')
                             t.refresh()
                             if progress_bar is not None:
                                 progress_bar(current_sentence / total_sentences)
                         else:
                             return False
                     current_sentence += 1
                 end = current_sentence - 1
+                if combine_audio_sentences(chapter_audio_file, start, end, session):
+                    print(f'Combining chapter {chapter_num} to audio, sentence {start} to {end}')
+                else:
+                    print('combine_audio_sentences() failed!')
+                    return False
         return True
     except Exception as e:
         raise DependencyError(e)
 def convert_sentence_to_audio(params, session):
     try:
+        if session['cancellation_requested']:
+            stop_and_detach_tts(params['tts'])
+            print('Cancel requested')
+            return False
+        generation_params = {
+            "temperature": session['temperature'],
+            "length_penalty": session["length_penalty"],
+            "repetition_penalty": session['repetition_penalty'],
+            "num_beams": int(session['length_penalty']) + 1 if session["length_penalty"] > 1 else 1,
+            "top_k": session['top_k'],
+            "top_p": session['top_p'],
+            "speed": session['speed'],
+            "enable_text_splitting": session['enable_text_splitting']
+        }
         if params['tts_model'] == 'xtts':
+            if session['custom_model'] is not None or session['fine_tuned'] != 'std':
+                output = params['tts'].inference(
+                    text=params['sentence'],
+                    language=session['metadata']['language_iso1'],
+                    gpt_cond_latent=params['gpt_cond_latent'],
+                    speaker_embedding=params['speaker_embedding'],
+                    **generation_params
+                )
+                torchaudio.save(
+                    params['sentence_audio_file'],
+                    torch.tensor(output[audioproc_format]).unsqueeze(0),
+                    sample_rate=24000
+                )
+            else:
+                params['tts'].tts_to_file(
+                    text=params['sentence'],
+                    language=session['metadata']['language_iso1'],
+                    file_path=params['sentence_audio_file'],
+                    speaker_wav=params['voice_file'],
+                    **generation_params
+                )
         elif params['tts_model'] == 'fairseq':
             params['tts'].tts_with_vc_to_file(
                 text=params['sentence'],
                 file_path=params['sentence_audio_file'],
+                speaker_wav=params['voice_file'].replace('_24khz','_16khz'),
                 split_sentences=session['enable_text_splitting']
             )
         if os.path.exists(params['sentence_audio_file']):
 def combine_audio_sentences(chapter_audio_file, start, end, session):
     try:
         chapter_audio_file = os.path.join(session['chapters_dir'], chapter_audio_file)
+        combined_audio = AudioSegment.empty()
         # Get all audio sentence files sorted by their numeric indices
         sentence_files = [f for f in os.listdir(session['chapters_dir_sentences']) if f.endswith(".wav")]
         sentences_dir_ordered = sorted(sentence_files, key=lambda x: int(re.search(r'\d+', x).group()))
         # Filter the files in the range [start, end]
         selected_files = [
             file for file in sentences_dir_ordered
             if start <= int(''.join(filter(str.isdigit, os.path.basename(file)))) <= end
         ]
         for file in selected_files:
+            if session['cancellation_requested']:
+                stop_and_detach_tts(params['tts'])
+                print('Cancel requested')
+                return False
             if session['cancellation_requested']:
                 msg = 'Cancel requested'
                 raise ValueError(msg)
+            audio_segment = AudioSegment.from_file(os.path.join(session['chapters_dir_sentences'],file), format=audioproc_format)
             combined_audio += audio_segment
+        combined_audio.export(chapter_audio_file, format=audioproc_format)
         print(f'Combined audio saved to {chapter_audio_file}')
+        return True
     except Exception as e:
         raise DependencyError(e)
             batch_size = 256
             # Process the chapter files in batches
             for i in range(0, len(chapter_files), batch_size):
                 batch_files = chapter_files[i:i + batch_size]
                 batch_audio = AudioSegment.empty()  # Initialize an empty AudioSegment for the batch
                 # Sequentially append each file in the current batch to the batch_audio
                 for chapter_file in batch_files:
                     if session['cancellation_requested']:
+                        print('Cancel requested')
+                        return False
                     audio_segment = AudioSegment.from_wav(os.path.join(session['chapters_dir'],chapter_file))
                     batch_audio += audio_segment
                 combined_audio += batch_audio
+            combined_audio.export(assembled_audio, format=audioproc_format)
             print(f'Combined audio saved to {assembled_audio}')
             return True
         except Exception as e:
     def generate_ffmpeg_metadata():
         try:
+            if session['cancellation_requested']:
+                print('Cancel requested')
+                return False
             ffmpeg_metadata = ';FFMETADATA1\n'
             if session['metadata'].get('title'):
                 ffmpeg_metadata += f"title={session['metadata']['title']}\n"
                 mobi_asin = session['metadata']['identifiers'].get('mobi-asin', None)
                 if mobi_asin:
                     ffmpeg_metadata += f'asin={mobi_asin}\n'  # ASIN
             start_time = 0
             for index, chapter_file in enumerate(chapter_files):
                 if session['cancellation_requested']:
                 ffmpeg_metadata += f'[CHAPTER]\nTIMEBASE=1/1000\nSTART={start_time}\n'
                 ffmpeg_metadata += f'END={start_time + duration_ms}\ntitle=Chapter {index + 1}\n'
                 start_time += duration_ms
             # Write the metadata to the file
             with open(metadata_file, 'w', encoding='utf-8') as file:
                 file.write(ffmpeg_metadata)
     def export_audio():
         try:
+            if session['cancellation_requested']:
+                print('Cancel requested')
+                return False
             ffmpeg_cover = None
             if session['script_mode'] == DOCKER_UTILS:
                 docker_dir = os.path.basename(session['tmp_dir'])
                 ffmpeg_metadata_file = f'/files/{docker_dir}/' + os.path.basename(metadata_file)
                 ffmpeg_final_file = f'/files/{docker_dir}/' + os.path.basename(docker_final_file)
                 if session['cover'] is not None:
+                    ffmpeg_cover = f'/files/{docker_dir}/' + os.path.basename(session['cover'])
                 ffmpeg_cmd = ['ffmpeg', '-i', ffmpeg_combined_audio, '-i', ffmpeg_metadata_file]
             else:
                 ffmpeg_combined_audio = assembled_audio
                 ffmpeg_metadata_file = metadata_file
                 ffmpeg_final_file = final_file
                 if session['cover'] is not None:
+                    ffmpeg_cover = session['cover']
                 ffmpeg_cmd = [shutil.which('ffmpeg'), '-i', ffmpeg_combined_audio, '-i', ffmpeg_metadata_file]
             if ffmpeg_cover is not None:
                 ffmpeg_cmd += ['-i', ffmpeg_cover, '-map', '0:a', '-map', '2:v']
             else:
                 ffmpeg_cmd += ['-map', '0:a']
+            ffmpeg_cmd += ['-map_metadata', '1', '-c:a', 'aac', '-b:a', '128k', '-ar', '44100']
             if ffmpeg_cover is not None:
                 if ffmpeg_cover.endswith('.png'):
                     ffmpeg_cmd += ['-c:v', 'png', '-disposition:v', 'attached_pic']  # PNG cover
                 else:
+                    ffmpeg_cmd += ['-c:v', 'copy', '-disposition:v', 'attached_pic']  # JPEG cover (no re-encoding needed)
             if ffmpeg_cover is not None and ffmpeg_cover.endswith('.png'):
+                ffmpeg_cmd += ['-pix_fmt', 'yuv420p']
+            ffmpeg_cmd += ['-af', 'agate=threshold=-33dB:ratio=2:attack=5:release=100,acompressor=threshold=-20dB:ratio=2.5:attack=50:release=200:makeup=0dB,loudnorm=I=-19:TP=-3:LRA=7:linear=true']
             ffmpeg_cmd += ['-movflags', '+faststart', '-y', ffmpeg_final_file]
             if session['script_mode'] == DOCKER_UTILS:
                 try:
                     container = session['client'].containers.run(
                     print(container.decode('utf-8'))
                     if shutil.copy(docker_final_file, final_file):
                         return True
                     return False
                 except docker.errors.ContainerError as e:
                     raise DependencyError(e)
     try:
         chapter_files = [f for f in os.listdir(session['chapters_dir']) if f.endswith(".wav")]
         chapter_files = sorted(chapter_files, key=lambda x: int(re.search(r'\d+', x).group()))
+        assembled_audio = os.path.join(session['tmp_dir'], session['metadata']['title'] + '.' + audioproc_format)
         metadata_file = os.path.join(session['tmp_dir'], 'metadata.txt')
         if assemble_audio():
             if generate_ffmpeg_metadata():
             os.makedirs(root_dir)
             print(f'Created missing directory: {root_dir}')
         current_time = time.time()
+        age_limit = current_time - interface_shared_expire * 60 * 60  # 24 hours in seconds
         for folder_name in os.listdir(root_dir):
             dir_path = os.path.join(root_dir, folder_name)
             if os.path.isdir(dir_path) and folder_name.startswith('web-'):
             pass
         if args['language'] is not None and args['language'] in language_mapping.keys():
             session_id = args['session'] if args['session'] is not None else str(uuid.uuid4())
             session = context.get_session(session_id)
             session['id'] = session_id
             top_p = args['top_p']
             speed = args['speed']
             enable_text_splitting = args['enable_text_splitting'] if args['enable_text_splitting'] is not None else True
+            custom_model_file = args['custom_model'] if args['custom_model'] != 'none'  and args['custom_model'] is not None else None
+            fine_tuned = args['fine_tuned'] if check_fine_tuned(args['fine_tuned'], args['language']) else None
             if not fine_tuned:
                 raise ValueError('The fine tuned model does not exist.')
                 session['client'] = docker.from_env()
             session['tmp_dir'] = os.path.join(processes_dir, f"ebook-{session['id']}")
+            session['chapters_dir'] = os.path.join(session['tmp_dir'], f'chapters_{hashlib.md5(args['ebook'].encode()).hexdigest()}')
             session['chapters_dir_sentences'] = os.path.join(session['chapters_dir'], 'sentences')
             if not is_gui_process:
                 print(f'*********** Session: {session_id}', '************* Store it in case of interruption or crash you can resume the conversion')
+                session['custom_model_dir'] = os.path.join(models_dir,'__sessions',f"model-{session['id']}")
+                if custom_model_file:
+                    session['custom_model'], progression_status = extract_custom_model(custom_model_file, session['custom_model_dir'])
+                    if not session['custom_model']:
+                        raise ValueError(f'{custom_model_file} could not be extracted or mandatory files are missing')
             if prepare_dirs(args['ebook'], session):
                 session['filename_noext'] = os.path.splitext(os.path.basename(session['src']))[0]
                 if not torch.cuda.is_available() or device == 'cpu':
                     if device == 'gpu':
                         print('GPU is not available on your device!')
                                     final_file = combine_audio_chapters(session)
                                     if final_file is not None:
                                         progress_status = f'Audiobook {os.path.basename(final_file)} created!'
                                         return progress_status, final_file
                                     else:
                                         error = 'combine_audio_chapters() error: final_file not created!'
                 else:
                     error = 'convert_to_epub() failed!'
             else:
+                error = f'Temporary directory {session['tmp_dir']} not removed due to failure.'
         else:
             error = f"Language {args['language']} is not supported."
+        if session['cancellation_requested']:
+            error = 'Cancelled'
         print(error)
         return error, None
     except Exception as e:
         )
         for lang, details in language_mapping.items()
     ]
+    custom_model_options = None
     fine_tuned_options = list(models['xtts'].keys())
     default_language_name =  next((name for name, key in language_options if key == default_language_code), None)
         radius_size='lg',
         font_mono=['JetBrains Mono', 'monospace', 'Consolas', 'Menlo', 'Liberation Mono']
     )
     with gr.Blocks(theme=theme) as interface:
         gr.HTML(
             '''
                     padding: 0 !important;
                     margin: 0 !important;
                 }
+                #component-7, #component-10, #component-20 {
                     height: 140px !important;
                 }
+                #component-47, #component-51 {
+                    height: 100px !important;
+                }
             </style>
             '''
         )
             f'''
             # Ebook2Audiobook v{version}<br/>
             https://github.com/DrewThomasson/ebook2audiobook<br/>
+            Convert eBooks into immersive audiobooks with realistic voice TTS models.<br/>
+            Multiuser, multiprocessing, multithread on a geo cluster to share the conversion to the Grid.
             '''
         )
         with gr.Tabs():
+            gr_tab_main = gr.TabItem('Input Options')
+            with gr_tab_main:
                 with gr.Row():
                     with gr.Column(scale=3):
                         with gr.Group():
+                            gr_ebook_file = gr.File(label='EBook File (.epub, .mobi, .azw3, fb2, lrf, rb, snb, tcr, .pdf, .txt, .rtf, doc, .docx, .html, .odt, .azw)', file_types=['.epub', '.mobi', '.azw3', 'fb2', 'lrf', 'rb', 'snb', 'tcr', '.pdf', '.txt', '.rtf', 'doc', '.docx', '.html', '.odt', '.azw'])
+                        with gr.Group():
+                            gr_voice_file = gr.File(label='*Cloning Voice (a .wav 24000hz for XTTS base model and 16000hz for FAIRSEQ base model, no more than 6 sec)', file_types=['.wav'], visible=interface_component_options['gr_voice_file'])
                             gr.Markdown('<p>&nbsp;&nbsp;* Optional</p>')
                         with gr.Group():
+                            gr_device = gr.Radio(label='Processor Unit', choices=['CPU', 'GPU'], value='CPU')
+                        with gr.Group():
+                            gr_language = gr.Dropdown(label='Language', choices=[name for name, _ in language_options], value=default_language_name)
+                    with gr.Column(scale=3):
+                        gr_group_custom_model = gr.Group(visible=interface_component_options['gr_group_custom_model'])
+                        with gr_group_custom_model:
+                            gr_custom_model_file = gr.File(label='*Custom XTTS Model (a .zip containing config.json, vocab.json, model.pth, ref.wav)', file_types=['.zip'])
+                            gr_custom_model_list = gr.Dropdown(label='', choices=['none'], interactive=True)
+                            gr.Markdown('<p>&nbsp;&nbsp;* Optional</p>')
+                        with gr.Group():
+                            gr_session_status = gr.Textbox(label='Session')
+                        with gr.Group():
+                            gr_tts_engine = gr.Dropdown(label='TTS Base', choices=[default_tts_engine], value=default_tts_engine, interactive=True)
                             gr_fine_tuned = gr.Dropdown(label='Fine Tuned Models', choices=fine_tuned_options, value=default_fine_tuned, interactive=True)
+            gr_tab_preferences = gr.TabItem('Audio Generation Preferences', visible=interface_component_options['gr_tab_preferences'])
+            with gr_tab_preferences:
                 gr.Markdown(
                     '''
                     ### Customize Audio Generation Parameters
                 )
         gr_state = gr.State(value="")  # Initialize state for each user session
         gr_session = gr.Textbox(label='Session', visible=False)
         gr_conversion_progress = gr.Textbox(label='Progress')
         gr_convert_btn = gr.Button('Convert', variant='primary', interactive=False)
         def update_interface():
             nonlocal is_converting
             is_converting = False
+            return gr.update('Convert', variant='primary', interactive=False), gr.update(), gr.update(value=audiobook_file), update_audiobooks_ddn(), hide_modal()
         def refresh_audiobook_list():
             files = []
                     return link, link, gr.update(visible=True)
             return None, None, gr.update(visible=False)
+        def update_convert_btn(upload_file, custom_model_file, session_id):
+            session = context.get_session(session_id)
+            if hasattr(upload_file, 'name') and not hasattr(custom_model_file, 'name'):
+                yield gr.update(variant='primary', interactive=True)
+            else:
+                yield gr.update(variant='primary', interactive=False)
+            return
         def update_audiobooks_ddn():
             files = refresh_audiobook_list()
             return gr.update(choices=files, label='Audiobooks', value=files[0] if files else None)
+        async def change_gr_ebook_file(f, session_id):
             nonlocal is_converting
             if context and session_id:
                 session = context.get_session(session_id)
                 if f is None:
                     if is_converting:
                         session['cancellation_requested'] = True
+                        yield show_modal('Cancellation requested, please wait...')
                         return
+            session['cancellation_requested'] = False
+            yield hide_modal()
             return
+        def change_gr_language(selected: str, session_id: str):
+            nonlocal custom_model_options
             if selected == 'zzzz':
                 new_language_name = default_language_name
                 new_language_key = default_language_code
             else:
                 new_language_name, new_language_key = next(((name, key) for name, key in language_options if key == selected), (None, None))
+            tts_engine_options = ['xtts'] if language_xtts.get(new_language_key, False) else ['fairseq']
             fine_tuned_options = [
                 model_name
+                for model_name, model_details in models.get(tts_engine_options[0], {}).items()
                 if model_details.get('lang') == 'multi' or model_details.get('lang') == new_language_key
             ]
+            custom_model_options = ['none']
+            if context and session_id:
+                session = context.get_session(session_id)
+                session['language'] = new_language_key
+                custom_model_tts = check_custom_model_tts(session)
+                custom_model_tts_dir = os.path.join(session['custom_model_dir'], custom_model_tts)
+                if os.path.exists(custom_model_tts_dir):
+                    custom_model_options += os.listdir(custom_model_tts_dir)
             return (
+                gr.update(value=new_language_name),
+                gr.update(choices=tts_engine_options, value=tts_engine_options[0]),
+                gr.update(choices=fine_tuned_options, value=fine_tuned_options[0] if fine_tuned_options else 'none'),
+                gr.update(choices=custom_model_options, value=custom_model_options[0])
             )
+        def check_custom_model_tts(session):
+            custom_model_tts = 'xtts'
+            if not language_xtts.get(session['language']):
+                custom_model_tts = 'fairseq'
+            custom_model_tts_dir = os.path.join(session['custom_model_dir'], custom_model_tts)
+            if not os.path.isdir(custom_model_tts_dir):
+                os.makedirs(custom_model_tts_dir, exist_ok=True)
+            return custom_model_tts
+        def change_gr_custom_model_list(custom_model_list):
+            if custom_model_list == 'none':
+                return gr.update(visible=True)
+            return gr.update(visible=False)
+        async def change_gr_custom_model_file(custom_model_file, session_id):
+            try:
+                nonlocal custom_model_options, gr_custom_model_file, gr_conversion_progress
+                if context and session_id:
+                    session = context.get_session(session_id)
+                    if custom_model_file is not None:
+                        if analyze_uploaded_file(custom_model_file):
+                            session['custom_model'], progress_status = extract_custom_model(custom_model_file, None, session)
+                            if session['custom_model']:
+                                custom_model_tts_dir = check_custom_model_tts(session)
+                                custom_model_options = ['none'] + os.listdir(os.path.join(session['custom_model_dir'], custom_model_tts_dir))
+                                yield (
+                                    gr.update(visible=False),
+                                    gr.update(choices=custom_model_options, value=session['custom_model']),
+                                    gr.update(value=f"{session['custom_model']} added to the custom list")
+                                )
+                                gr_custom_model_file = gr.File(label='*XTTS Model (a .zip containing config.json, vocab.json, model.pth, ref.wav)', value=None, file_types=['.zip'])
+                                return
+                        yield gr.update(), gr.update(), gr.update(value='Invalid file! Please upload a valid ZIP.')
+                        return
+            except Exception as e:
+                yield gr.update(), gr.update(), gr.update(value=f'Error: {str(e)}')
+                return
+        def change_gr_fine_tuned(fine_tuned):
+            visible = False
+            if fine_tuned == 'std':
+                visible = True
+            return gr.update(visible=visible)
         def change_gr_data(data):
             data['event'] = 'change_data'
         def change_gr_read_data(data):
             nonlocal audiobooks_dir
+            nonlocal custom_model_options
             warning_text_extra = ''
             if not data:
                 data = {'session_id': str(uuid.uuid4())}
                 warning_text = f"Session: {data['session_id']}"
                 warning_text = data['session_id']
                 event = data.get('event', '')
                 if event != 'load':
+                    return [gr.update(), gr.update(), gr.update(), gr.update(), gr.update()]
+            session = context.get_session(data['session_id'])
+            session['custom_model_dir'] = os.path.join(models_dir,'__sessions',f"model-{session['id']}")
+            os.makedirs(session['custom_model_dir'], exist_ok=True)
+            custom_model_tts_dir = check_custom_model_tts(session)
+            custom_model_options = ['none'] + os.listdir(os.path.join(session['custom_model_dir'],custom_model_tts_dir))
             if is_gui_shared:
+                warning_text_extra = f' Note: access limit time: {interface_shared_expire} hours'
                 audiobooks_dir = os.path.join(audiobooks_gradio_dir, f"web-{data['session_id']}")
                 delete_old_web_folders(audiobooks_gradio_dir)
             else:
                 audiobooks_dir = os.path.join(audiobooks_host_dir, f"web-{data['session_id']}")
+            return [data, f'{warning_text}{warning_text_extra}', data['session_id'], update_audiobooks_ddn(), gr.update(choices=custom_model_options, value='none')]
+        def submit_convert_btn(
             session, device, ebook_file, voice_file, language,
+            custom_model_file, temperature, length_penalty,
             repetition_penalty, top_k, top_p, speed, enable_text_splitting, fine_tuned
         ):
             nonlocal is_converting
                 "audiobooks_dir": audiobooks_dir,
                 "voice": voice_file.name if voice_file else None,
                 "language": next((key for name, key in language_options if name == language), None),
+                "custom_model": next((key for name, key in language_options if name != 'none'), None),
                 "temperature": float(temperature),
                 "length_penalty": float(length_penalty),
                 "repetition_penalty": float(repetition_penalty),
             }
             if args["ebook"] is None:
+                return gr.update(value='Error: a file is required.')
             try:
                 is_converting = True
                 progress_status, audiobook_file = convert_ebook(args)
                 if audiobook_file is None:
                     if is_converting:
+                        return gr.update(value='Conversion cancelled.')
                     else:
+                        return gr.update(value='Conversion failed.')
                 else:
+                    return progress_status
             except Exception as e:
                 return DependencyError(e)
         gr_ebook_file.change(
+            fn=update_convert_btn,
+            inputs=[gr_ebook_file, gr_custom_model_file, gr_session],
+            outputs=gr_convert_btn
+        ).then(
             fn=change_gr_ebook_file,
+            inputs=[gr_ebook_file, gr_session],
+            outputs=[gr_modal_html]
         )
         gr_language.change(
+            fn=lambda selected, session_id: change_gr_language(dict(language_options).get(selected, 'Unknown'), session_id),
+            inputs=[gr_language, gr_session],
+            outputs=[gr_language, gr_tts_engine, gr_fine_tuned, gr_custom_model_list]
         )
         gr_audiobooks_ddn.change(
             fn=change_gr_audiobooks_ddn,
         )
         gr_custom_model_file.change(
             fn=change_gr_custom_model_file,
+            inputs=[gr_custom_model_file, gr_session],
+            outputs=[gr_fine_tuned, gr_custom_model_list, gr_conversion_progress]
+        )
+        gr_custom_model_list.change(
+            fn=change_gr_custom_model_list,
+            inputs=gr_custom_model_list,
+            outputs=gr_fine_tuned
+        )
+        gr_fine_tuned.change(
+            fn=change_gr_fine_tuned,
+            inputs=gr_fine_tuned,
+            outputs=gr_group_custom_model
         )
         gr_session.change(
             fn=change_gr_data,
         gr_read_data.change(
             fn=change_gr_read_data,
             inputs=gr_read_data,
+            outputs=[gr_data, gr_session_status, gr_session, gr_audiobooks_ddn, gr_custom_model_list]
         )
         gr_convert_btn.click(
+            fn=submit_convert_btn,
             inputs=[
                 gr_session, gr_device, gr_ebook_file, gr_voice_file, gr_language,
+                gr_custom_model_list, gr_temperature, gr_length_penalty,
                 gr_repetition_penalty, gr_top_k, gr_top_p, gr_speed, gr_enable_text_splitting, gr_fine_tuned
             ],
+            outputs=gr_conversion_progress
         ).then(
             fn=update_interface,
             inputs=None,
+            outputs=[gr_convert_btn, gr_ebook_file, gr_audio_player, gr_audiobooks_ddn, gr_modal_html]
         )
         interface.load(
             fn=None,
         )
     try:
+        interface.queue(default_concurrency_limit=interface_concurrency_limit).launch(server_name=interface_host, server_port=interface_port, share=is_gui_shared)
     except OSError as e:
         print(f'Connection error: {e}')
     except socket.error as e:

lib/lang.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

setup.py CHANGED Viewed

@@ -1,50 +1,50 @@
-import subprocess
-import sys
-from setuptools import setup, find_packages
-from setuptools.command.develop import develop
-from setuptools.command.install import install
-import os
-cwd = os.path.dirname(os.path.abspath(__file__))
-with open("README.md", "r", encoding='utf-8') as fh:
-    long_description = fh.read()
-with open('requirements.txt') as f:
-    requirements = f.read().splitlines()
-class PostInstallCommand(install):
-    def run(self):
-        install.run(self)
-        try:
-            subprocess.run([sys.executable, 'python -m', 'unidic', 'download'], check=True)
-        except Exception:
-            print("unidic download failed during installation, but it will be re-attempted a diffrent way when the app itself runs.")
-setup(
-    name='ebook2audiobook',
-    version='2.0.0',
-    python_requires=">=3.10,<3.12",
-    author="Drew Thomasson",
-    description="Convert eBooks to audiobooks with chapters and metadata",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    url="https://github.com/DrewThomasson/ebook2audiobook",
-    packages=find_packages(),
-    install_requires=requirements,
-    classifiers=[
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: MIT License",
-        "Operating System :: OS Independent",
-    ],
-    include_package_data=True,
-    entry_points={
-        "console_scripts": [
-            "ebook2audiobook = app:main",
-        ],
-    },
-    cmdclass={
-        'install': PostInstallCommand,
-    }
-)

+import subprocess
+import sys
+from setuptools import setup, find_packages
+from setuptools.command.develop import develop
+from setuptools.command.install import install
+import os
+cwd = os.path.dirname(os.path.abspath(__file__))
+with open("README.md", "r", encoding='utf-8') as fh:
+    long_description = fh.read()
+with open('requirements.txt') as f:
+    requirements = f.read().splitlines()
+class PostInstallCommand(install):
+    def run(self):
+        install.run(self)
+        try:
+            subprocess.run([sys.executable, 'python -m', 'unidic', 'download'], check=True)
+        except Exception:
+            print("unidic download failed during installation, but it will be re-attempted a diffrent way when the app itself runs.")
+setup(
+    name='ebook2audiobook',
+    version='2.0.0',
+    python_requires=">=3.10,<3.13",
+    author="Drew Thomasson",
+    description="Convert eBooks to audiobooks with chapters and metadata",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/DrewThomasson/ebook2audiobook",
+    packages=find_packages(),
+    install_requires=requirements,
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+    include_package_data=True,
+    entry_points={
+        "console_scripts": [
+            "ebook2audiobook = app:main",
+        ],
+    },
+    cmdclass={
+        'install': PostInstallCommand,
+    }
+)