drewThomasson
commited on
Commit
•
5f0212d
1
Parent(s):
eb2f049
Upload 10 files
Browse files- app.py +35 -56
- ebook2audiobook.cmd +285 -0
- ebook2audiobook.sh +3 -1
- lib/__pycache__/conf.cpython-312.pyc +0 -0
- lib/__pycache__/functions.cpython-312.pyc +0 -0
- lib/__pycache__/lang.cpython-312.pyc +0 -0
- lib/conf.py +88 -17
- lib/functions.py +355 -322
- lib/lang.py +0 -0
- setup.py +50 -50
app.py
CHANGED
@@ -47,18 +47,7 @@ def check_and_install_requirements(file_path):
|
|
47 |
except subprocess.CalledProcessError as e:
|
48 |
print(f'Failed to install packages: {e}')
|
49 |
return False
|
50 |
-
|
51 |
-
from lib.functions import check_missing_files, download_model
|
52 |
-
for mod in models.keys():
|
53 |
-
if mod == 'xtts':
|
54 |
-
mod_exists, err, list = check_missing_files(models[mod]['local'], models[mod]['files'])
|
55 |
-
if mod_exists:
|
56 |
-
print('All specified xtts base model files are present in the folder.')
|
57 |
-
else:
|
58 |
-
print('The following files are missing:', list)
|
59 |
-
print(f'Downloading {mod} files . . .')
|
60 |
-
download_model(models[mod]['local'], models[mod]['url'])
|
61 |
-
'''
|
62 |
return True
|
63 |
except Exception as e:
|
64 |
raise(f'An error occurred: {e}')
|
@@ -92,12 +81,12 @@ def main():
|
|
92 |
Example usage:
|
93 |
Windows:
|
94 |
headless:
|
95 |
-
ebook2audiobook.cmd --headless --ebook 'path_to_ebook'
|
96 |
Graphic Interface:
|
97 |
ebook2audiobook.cmd
|
98 |
Linux/Mac:
|
99 |
headless:
|
100 |
-
./ebook2audiobook.sh --headless --ebook 'path_to_ebook'
|
101 |
Graphic Interface:
|
102 |
./ebook2audiobook.sh
|
103 |
''',
|
@@ -106,11 +95,8 @@ Linux/Mac:
|
|
106 |
options = [
|
107 |
'--script_mode', '--share', '--headless',
|
108 |
'--session', '--ebook', '--ebooks_dir',
|
109 |
-
'--voice', '--language', '--device',
|
110 |
-
|
111 |
-
#'--custom_model_url',
|
112 |
-
'--temperature',
|
113 |
-
'--length_penalty', '--repetition_penalty',
|
114 |
'--top_k', '--top_p', '--speed',
|
115 |
'--enable_text_splitting', '--fine_tuned',
|
116 |
'--version', '--help'
|
@@ -128,37 +114,30 @@ Linux/Mac:
|
|
128 |
parser.add_argument(options[5], nargs='?', const='default', type=str,
|
129 |
help=f'Path to the directory containing ebooks for batch conversion. Default to "{os.path.basename(ebooks_dir)}" if "default" is provided.')
|
130 |
parser.add_argument(options[6], type=str, default=None,
|
131 |
-
help='Path to the target voice file for TTS. Optional, uses a default voice if not provided.')
|
132 |
parser.add_argument(options[7], type=str, default=default_language_code,
|
133 |
help=f'Language for the audiobook conversion. Options: {lang_list_str}. Default to English (eng).')
|
134 |
parser.add_argument(options[8], type=str, default='cpu', choices=['cpu', 'gpu'],
|
135 |
help=f'Type of processor unit for the audiobook conversion. If not specified: check first if gpu available, if not cpu is selected.')
|
136 |
-
"""
|
137 |
parser.add_argument(options[9], type=str,
|
138 |
-
help='Path to the custom model file
|
139 |
-
parser.add_argument(options[10], type=
|
140 |
-
help=("URL to download the custom model as a zip file. Optional, but will be used if provided. "
|
141 |
-
"Examples include David Attenborough's model: "
|
142 |
-
"'https://huggingface.co/drewThomasson/xtts_David_Attenborough_fine_tune/resolve/main/Finished_model_files.zip?download=true'. "
|
143 |
-
"More XTTS fine-tunes can be found on my Hugging Face at 'https://huggingface.co/drewThomasson'."))
|
144 |
-
"""
|
145 |
-
parser.add_argument(options[9], type=float, default=0.65,
|
146 |
help='Temperature for the model. Default to 0.65. Higher temperatures lead to more creative outputs.')
|
147 |
-
parser.add_argument(options[
|
148 |
help='A length penalty applied to the autoregressive decoder. Default to 1.0. Not applied to custom models.')
|
149 |
-
parser.add_argument(options[
|
150 |
help='A penalty that prevents the autoregressive decoder from repeating itself. Default to 2.5')
|
151 |
-
parser.add_argument(options[
|
152 |
help='Top-k sampling. Lower values mean more likely outputs and increased audio generation speed. Default to 50')
|
153 |
-
parser.add_argument(options[
|
154 |
help='Top-p sampling. Lower values mean more likely outputs and increased audio generation speed. Default to 0.8')
|
155 |
-
parser.add_argument(options[
|
156 |
help='Speed factor for the speech generation. Default to 1.0')
|
157 |
-
parser.add_argument(options[15], type=str, default=default_fine_tuned,
|
158 |
-
help='Name of the fine tuned model. Optional, uses the standard model according to the TTS engine and language.')
|
159 |
parser.add_argument(options[16], action='store_true',
|
160 |
-
help='Enable splitting text into sentences. Default to False.')
|
161 |
-
parser.add_argument(options[17],
|
|
|
|
|
162 |
help='Show the version of the script and exit')
|
163 |
|
164 |
for arg in sys.argv:
|
@@ -166,17 +145,17 @@ Linux/Mac:
|
|
166 |
print(f'Error: Unrecognized option "{arg}"')
|
167 |
sys.exit(1)
|
168 |
|
169 |
-
args = parser.parse_args()
|
170 |
|
171 |
# Check if the port is already in use to prevent multiple launches
|
172 |
-
if not args
|
173 |
-
print(f'Error: Port {
|
174 |
sys.exit(1)
|
175 |
|
176 |
-
args
|
177 |
-
args
|
178 |
|
179 |
-
if args
|
180 |
check_pkg = check_and_install_requirements(requirements_file)
|
181 |
if check_pkg:
|
182 |
print('Package requirements ok')
|
@@ -191,27 +170,27 @@ Linux/Mac:
|
|
191 |
from lib.functions import web_interface, convert_ebook
|
192 |
|
193 |
# Conditions based on the --headless flag
|
194 |
-
if args
|
195 |
-
args
|
196 |
-
args
|
197 |
|
198 |
# Condition to stop if both --ebook and --ebooks_dir are provided
|
199 |
-
if args
|
200 |
print('Error: You cannot specify both --ebook and --ebooks_dir in headless mode.')
|
201 |
sys.exit(1)
|
202 |
|
203 |
# Condition 1: If --ebooks_dir exists, check value and set 'ebooks_dir'
|
204 |
-
if args
|
205 |
new_ebooks_dir = None
|
206 |
-
if args
|
207 |
print(f'Using the default ebooks_dir: {ebooks_dir}')
|
208 |
new_ebooks_dir = os.path.abspath(ebooks_dir)
|
209 |
else:
|
210 |
# Check if the directory exists
|
211 |
-
if os.path.exists(args
|
212 |
-
new_ebooks_dir = os.path.abspath(args
|
213 |
else:
|
214 |
-
print(f'Error: The provided --ebooks_dir "{args
|
215 |
sys.exit(1)
|
216 |
|
217 |
if os.path.exists(new_ebooks_dir):
|
@@ -220,7 +199,7 @@ Linux/Mac:
|
|
220 |
if any(file.endswith(ext) for ext in ebook_formats):
|
221 |
full_path = os.path.join(new_ebooks_dir, file)
|
222 |
print(f'Processing eBook file: {full_path}')
|
223 |
-
args
|
224 |
progress_status, audiobook_file = convert_ebook(args)
|
225 |
if audiobook_file is None:
|
226 |
print(f'Conversion failed: {progress_status}')
|
@@ -229,7 +208,7 @@ Linux/Mac:
|
|
229 |
print(f'Error: The directory {new_ebooks_dir} does not exist.')
|
230 |
sys.exit(1)
|
231 |
|
232 |
-
elif args
|
233 |
progress_status, audiobook_file = convert_ebook(args)
|
234 |
if audiobook_file is None:
|
235 |
print(f'Conversion failed: {progress_status}')
|
@@ -239,7 +218,7 @@ Linux/Mac:
|
|
239 |
print('Error: In headless mode, you must specify either an ebook file using --ebook or an ebook directory using --ebooks_dir.')
|
240 |
sys.exit(1)
|
241 |
else:
|
242 |
-
args
|
243 |
passed_arguments = sys.argv[1:]
|
244 |
allowed_arguments = {'--share', '--script_mode'}
|
245 |
passed_args_set = {arg for arg in passed_arguments if arg.startswith('--')}
|
|
|
47 |
except subprocess.CalledProcessError as e:
|
48 |
print(f'Failed to install packages: {e}')
|
49 |
return False
|
50 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
return True
|
52 |
except Exception as e:
|
53 |
raise(f'An error occurred: {e}')
|
|
|
81 |
Example usage:
|
82 |
Windows:
|
83 |
headless:
|
84 |
+
ebook2audiobook.cmd --headless --ebook 'path_to_ebook'
|
85 |
Graphic Interface:
|
86 |
ebook2audiobook.cmd
|
87 |
Linux/Mac:
|
88 |
headless:
|
89 |
+
./ebook2audiobook.sh --headless --ebook 'path_to_ebook'
|
90 |
Graphic Interface:
|
91 |
./ebook2audiobook.sh
|
92 |
''',
|
|
|
95 |
options = [
|
96 |
'--script_mode', '--share', '--headless',
|
97 |
'--session', '--ebook', '--ebooks_dir',
|
98 |
+
'--voice', '--language', '--device', '--custom_model',
|
99 |
+
'--temperature', '--length_penalty', '--repetition_penalty',
|
|
|
|
|
|
|
100 |
'--top_k', '--top_p', '--speed',
|
101 |
'--enable_text_splitting', '--fine_tuned',
|
102 |
'--version', '--help'
|
|
|
114 |
parser.add_argument(options[5], nargs='?', const='default', type=str,
|
115 |
help=f'Path to the directory containing ebooks for batch conversion. Default to "{os.path.basename(ebooks_dir)}" if "default" is provided.')
|
116 |
parser.add_argument(options[6], type=str, default=None,
|
117 |
+
help='Path to the target voice file for TTS. Optional, must be 24khz for XTTS and 16khz for fairseq models, uses a default voice if not provided.')
|
118 |
parser.add_argument(options[7], type=str, default=default_language_code,
|
119 |
help=f'Language for the audiobook conversion. Options: {lang_list_str}. Default to English (eng).')
|
120 |
parser.add_argument(options[8], type=str, default='cpu', choices=['cpu', 'gpu'],
|
121 |
help=f'Type of processor unit for the audiobook conversion. If not specified: check first if gpu available, if not cpu is selected.')
|
|
|
122 |
parser.add_argument(options[9], type=str,
|
123 |
+
help=f'Path to the custom model (.zip file containing {default_model_files}). Required if using a custom model.')
|
124 |
+
parser.add_argument(options[10], type=float, default=0.65,
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
help='Temperature for the model. Default to 0.65. Higher temperatures lead to more creative outputs.')
|
126 |
+
parser.add_argument(options[11], type=float, default=1.0,
|
127 |
help='A length penalty applied to the autoregressive decoder. Default to 1.0. Not applied to custom models.')
|
128 |
+
parser.add_argument(options[12], type=float, default=2.5,
|
129 |
help='A penalty that prevents the autoregressive decoder from repeating itself. Default to 2.5')
|
130 |
+
parser.add_argument(options[13], type=int, default=50,
|
131 |
help='Top-k sampling. Lower values mean more likely outputs and increased audio generation speed. Default to 50')
|
132 |
+
parser.add_argument(options[14], type=float, default=0.8,
|
133 |
help='Top-p sampling. Lower values mean more likely outputs and increased audio generation speed. Default to 0.8')
|
134 |
+
parser.add_argument(options[15], type=float, default=1.0,
|
135 |
help='Speed factor for the speech generation. Default to 1.0')
|
|
|
|
|
136 |
parser.add_argument(options[16], action='store_true',
|
137 |
+
help='Enable splitting text into sentences. Default to False.')
|
138 |
+
parser.add_argument(options[17], type=str, default=default_fine_tuned,
|
139 |
+
help='Name of the fine tuned model. Optional, uses the standard model according to the TTS engine and language.')
|
140 |
+
parser.add_argument(options[18], action='version',version=f'ebook2audiobook version {version}',
|
141 |
help='Show the version of the script and exit')
|
142 |
|
143 |
for arg in sys.argv:
|
|
|
145 |
print(f'Error: Unrecognized option "{arg}"')
|
146 |
sys.exit(1)
|
147 |
|
148 |
+
args = vars(parser.parse_args())
|
149 |
|
150 |
# Check if the port is already in use to prevent multiple launches
|
151 |
+
if not args['headless'] and is_port_in_use(interface_port):
|
152 |
+
print(f'Error: Port {interface_port} is already in use. The web interface may already be running.')
|
153 |
sys.exit(1)
|
154 |
|
155 |
+
args['script_mode'] = args['script_mode'] if args['script_mode'] else NATIVE
|
156 |
+
args['share'] = args['share'] if args['share'] else False
|
157 |
|
158 |
+
if args['script_mode'] == NATIVE:
|
159 |
check_pkg = check_and_install_requirements(requirements_file)
|
160 |
if check_pkg:
|
161 |
print('Package requirements ok')
|
|
|
170 |
from lib.functions import web_interface, convert_ebook
|
171 |
|
172 |
# Conditions based on the --headless flag
|
173 |
+
if args['headless']:
|
174 |
+
args['is_gui_process'] = False
|
175 |
+
args['audiobooks_dir'] = audiobooks_cli_dir
|
176 |
|
177 |
# Condition to stop if both --ebook and --ebooks_dir are provided
|
178 |
+
if args['ebook'] and args['ebooks_dir']:
|
179 |
print('Error: You cannot specify both --ebook and --ebooks_dir in headless mode.')
|
180 |
sys.exit(1)
|
181 |
|
182 |
# Condition 1: If --ebooks_dir exists, check value and set 'ebooks_dir'
|
183 |
+
if args['ebooks_dir']:
|
184 |
new_ebooks_dir = None
|
185 |
+
if args['ebooks_dir'] == 'default':
|
186 |
print(f'Using the default ebooks_dir: {ebooks_dir}')
|
187 |
new_ebooks_dir = os.path.abspath(ebooks_dir)
|
188 |
else:
|
189 |
# Check if the directory exists
|
190 |
+
if os.path.exists(args['ebooks_dir']):
|
191 |
+
new_ebooks_dir = os.path.abspath(args['ebooks_dir'])
|
192 |
else:
|
193 |
+
print(f'Error: The provided --ebooks_dir "{args['ebooks_dir']}" does not exist.')
|
194 |
sys.exit(1)
|
195 |
|
196 |
if os.path.exists(new_ebooks_dir):
|
|
|
199 |
if any(file.endswith(ext) for ext in ebook_formats):
|
200 |
full_path = os.path.join(new_ebooks_dir, file)
|
201 |
print(f'Processing eBook file: {full_path}')
|
202 |
+
args['ebook'] = full_path
|
203 |
progress_status, audiobook_file = convert_ebook(args)
|
204 |
if audiobook_file is None:
|
205 |
print(f'Conversion failed: {progress_status}')
|
|
|
208 |
print(f'Error: The directory {new_ebooks_dir} does not exist.')
|
209 |
sys.exit(1)
|
210 |
|
211 |
+
elif args['ebook']:
|
212 |
progress_status, audiobook_file = convert_ebook(args)
|
213 |
if audiobook_file is None:
|
214 |
print(f'Conversion failed: {progress_status}')
|
|
|
218 |
print('Error: In headless mode, you must specify either an ebook file using --ebook or an ebook directory using --ebooks_dir.')
|
219 |
sys.exit(1)
|
220 |
else:
|
221 |
+
args['is_gui_process'] = True
|
222 |
passed_arguments = sys.argv[1:]
|
223 |
allowed_arguments = {'--share', '--script_mode'}
|
224 |
passed_args_set = {arg for arg in passed_arguments if arg.startswith('--')}
|
ebook2audiobook.cmd
ADDED
@@ -0,0 +1,285 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@echo off
|
2 |
+
setlocal enabledelayedexpansion
|
3 |
+
|
4 |
+
:: Capture all arguments into ARGS
|
5 |
+
set "ARGS=%*"
|
6 |
+
|
7 |
+
set "NATIVE=native"
|
8 |
+
set "DOCKER_UTILS=docker_utils"
|
9 |
+
set "FULL_DOCKER=full_docker"
|
10 |
+
|
11 |
+
set "SCRIPT_MODE=%NATIVE%"
|
12 |
+
set "SCRIPT_DIR=%~dp0"
|
13 |
+
|
14 |
+
set "PYTHON_VERSION=3.12"
|
15 |
+
set "DOCKER_UTILS_IMG=utils"
|
16 |
+
set "PYTHON_ENV=python_env"
|
17 |
+
set "CURRENT_ENV="
|
18 |
+
set "PROGRAMS_LIST=calibre ffmpeg"
|
19 |
+
|
20 |
+
set "CONDA_URL=https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe"
|
21 |
+
set "CONDA_INSTALLER=%TEMP%\Miniconda3-latest-Windows-x86_64.exe"
|
22 |
+
set "CONDA_INSTALL_DIR=%USERPROFILE%\miniconda3"
|
23 |
+
set "CONDA_PATH=%USERPROFILE%\miniconda3\bin"
|
24 |
+
set "PATH=%CONDA_PATH%;%PATH%"
|
25 |
+
|
26 |
+
set "PROGRAMS_CHECK=0"
|
27 |
+
set "CONDA_CHECK_STATUS=0"
|
28 |
+
set "CONDA_RUN_INIT=0"
|
29 |
+
set "DOCKER_CHECK_STATUS=0"
|
30 |
+
set "DOCKER_BUILD_STATUS=0"
|
31 |
+
|
32 |
+
set "CALIBRE_TEMP_DIR=C:\Windows\Temp\Calibre"
|
33 |
+
|
34 |
+
if not exist "%CALIBRE_TEMP_DIR%" (
|
35 |
+
mkdir "%CALIBRE_TEMP_DIR%"
|
36 |
+
)
|
37 |
+
|
38 |
+
icacls "%CALIBRE_TEMP_DIR%" /grant Users:(OI)(CI)F /T
|
39 |
+
|
40 |
+
for %%A in (%ARGS%) do (
|
41 |
+
if "%%A"=="%DOCKER_UTILS%" (
|
42 |
+
set "SCRIPT_MODE=%DOCKER_UTILS%"
|
43 |
+
break
|
44 |
+
)
|
45 |
+
)
|
46 |
+
|
47 |
+
cd /d "%SCRIPT_DIR%"
|
48 |
+
|
49 |
+
:: Check if running inside Docker
|
50 |
+
if defined CONTAINER (
|
51 |
+
echo Running in %FULL_DOCKER% mode
|
52 |
+
set "SCRIPT_MODE=%FULL_DOCKER%"
|
53 |
+
goto main
|
54 |
+
)
|
55 |
+
|
56 |
+
echo Running in %SCRIPT_MODE% mode
|
57 |
+
|
58 |
+
:: Check if running in a Conda environment
|
59 |
+
if defined CONDA_DEFAULT_ENV (
|
60 |
+
set "CURRENT_ENV=%CONDA_PREFIX%"
|
61 |
+
)
|
62 |
+
|
63 |
+
:: Check if running in a Python virtual environment
|
64 |
+
if defined VIRTUAL_ENV (
|
65 |
+
set "CURRENT_ENV=%VIRTUAL_ENV%"
|
66 |
+
)
|
67 |
+
|
68 |
+
for /f "delims=" %%i in ('where python') do (
|
69 |
+
if defined CONDA_PREFIX (
|
70 |
+
if /i "%%i"=="%CONDA_PREFIX%\Scripts\python.exe" (
|
71 |
+
set "CURRENT_ENV=%CONDA_PREFIX%"
|
72 |
+
break
|
73 |
+
)
|
74 |
+
) else if defined VIRTUAL_ENV (
|
75 |
+
if /i "%%i"=="%VIRTUAL_ENV%\Scripts\python.exe" (
|
76 |
+
set "CURRENT_ENV=%VIRTUAL_ENV%"
|
77 |
+
break
|
78 |
+
)
|
79 |
+
)
|
80 |
+
)
|
81 |
+
|
82 |
+
if not "%CURRENT_ENV%"=="" (
|
83 |
+
echo Current python virtual environment detected: %CURRENT_ENV%.
|
84 |
+
echo This script runs with its own virtual env and must be out of any other virtual environment when it's launched.
|
85 |
+
goto failed
|
86 |
+
)
|
87 |
+
|
88 |
+
goto conda_check
|
89 |
+
|
90 |
+
:conda_check
|
91 |
+
where conda >nul 2>&1
|
92 |
+
if %errorlevel% neq 0 (
|
93 |
+
set "CONDA_CHECK_STATUS=1"
|
94 |
+
) else (
|
95 |
+
if "%SCRIPT_MODE%"=="%DOCKER_UTILS%" (
|
96 |
+
goto docker_check
|
97 |
+
exit /b
|
98 |
+
) else (
|
99 |
+
call :programs_check
|
100 |
+
)
|
101 |
+
)
|
102 |
+
goto dispatch
|
103 |
+
exit /b
|
104 |
+
|
105 |
+
:programs_check
|
106 |
+
set "missing_prog_array="
|
107 |
+
for %%p in (%PROGRAMS_LIST%) do (
|
108 |
+
set "FOUND="
|
109 |
+
for /f "delims=" %%i in ('where %%p 2^>nul') do (
|
110 |
+
set "FOUND=%%i"
|
111 |
+
)
|
112 |
+
if not defined FOUND (
|
113 |
+
echo %%p is not installed.
|
114 |
+
set "missing_prog_array=!missing_prog_array! %%p"
|
115 |
+
)
|
116 |
+
)
|
117 |
+
if not "%missing_prog_array%"=="" (
|
118 |
+
set "PROGRAMS_CHECK=1"
|
119 |
+
)
|
120 |
+
exit /b
|
121 |
+
|
122 |
+
:docker_check
|
123 |
+
docker --version >nul 2>&1
|
124 |
+
if %errorlevel% neq 0 (
|
125 |
+
set "DOCKER_CHECK_STATUS=1"
|
126 |
+
) else (
|
127 |
+
:: Verify Docker is running
|
128 |
+
call docker info >nul 2>&1
|
129 |
+
if %errorlevel% neq 0 (
|
130 |
+
set "DOCKER_CHECK_STATUS=1"
|
131 |
+
) else (
|
132 |
+
:: Check if the Docker socket is running
|
133 |
+
set "docker_socket="
|
134 |
+
if exist \\.\pipe\docker_engine (
|
135 |
+
set "docker_socket=Windows"
|
136 |
+
)
|
137 |
+
if not defined docker_socket (
|
138 |
+
echo Cannot connect to docker socket. Check if the docker socket is running.
|
139 |
+
goto failed
|
140 |
+
exit /b
|
141 |
+
) else (
|
142 |
+
:: Check if the Docker image is available
|
143 |
+
call docker images -q %DOCKER_UTILS_IMG% >nul 2>&1
|
144 |
+
if %errorlevel% neq 0 (
|
145 |
+
echo Docker image '%DOCKER_UTILS_IMG%' not found. Installing it now...
|
146 |
+
set "DOCKER_BUILD_STATUS=1"
|
147 |
+
) else (
|
148 |
+
goto dispatch
|
149 |
+
exit /b
|
150 |
+
)
|
151 |
+
)
|
152 |
+
)
|
153 |
+
)
|
154 |
+
goto install_components
|
155 |
+
exit /b
|
156 |
+
|
157 |
+
:install_components
|
158 |
+
:: Check if running as administrator
|
159 |
+
net session >nul 2>&1
|
160 |
+
if %errorlevel% neq 0 (
|
161 |
+
echo This script needs to be run as administrator.
|
162 |
+
echo Attempting to restart with administrator privileges...
|
163 |
+
if defined ARGS (
|
164 |
+
call powershell -ExecutionPolicy Bypass -Command "Start-Process '%~f0' -ArgumentList '%ARGS%' -WorkingDirectory '%SCRIPT_DIR%' -Verb RunAs"
|
165 |
+
) else (
|
166 |
+
call powershell -ExecutionPolicy Bypass -Command "Start-Process '%~f0' -WorkingDirectory '%SCRIPT_DIR%' -Verb RunAs"
|
167 |
+
)
|
168 |
+
exit /b
|
169 |
+
)
|
170 |
+
:: Install Chocolatey if not already installed
|
171 |
+
choco -v >nul 2>&1
|
172 |
+
if %errorlevel% neq 0 (
|
173 |
+
echo Chocolatey is not installed. Installing Chocolatey...
|
174 |
+
call powershell -ExecutionPolicy Bypass -Command "Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; Invoke-Expression ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))"
|
175 |
+
)
|
176 |
+
:: Install Python if not already installed
|
177 |
+
python --version >nul 2>&1
|
178 |
+
if %errorlevel% neq 0 (
|
179 |
+
echo Python is not installed. Installing Python...
|
180 |
+
call choco install python -y
|
181 |
+
)
|
182 |
+
:: Install missing packages if any
|
183 |
+
if not "%PROGRAMS_CHECK%"=="0" (
|
184 |
+
call choco install %missing_prog_array% -y --force
|
185 |
+
setx CALIBRE_TEMP_DIR "%CALIBRE_TEMP_DIR%" /M
|
186 |
+
set "PROGRAMS_CHECK=0"
|
187 |
+
set "missing_prog_array="
|
188 |
+
)
|
189 |
+
:: Install Conda if not already installed
|
190 |
+
if not "%CONDA_CHECK_STATUS%"=="0" (
|
191 |
+
echo Installing Conda...
|
192 |
+
call powershell -Command "[System.Environment]::SetEnvironmentVariable('Path', [System.Environment]::GetEnvironmentVariable('Path','Machine') + ';' + [System.Environment]::GetEnvironmentVariable('Path','User'),'Process')"
|
193 |
+
echo Downloading Conda installer...
|
194 |
+
call bitsadmin /transfer "MinicondaDownload" %CONDA_URL% "%CONDA_INSTALLER%"
|
195 |
+
"%CONDA_INSTALLER%" /InstallationType=JustMe /RegisterPython=0 /AddToPath=1 /S /D=%CONDA_INSTALL_DIR%
|
196 |
+
if exist "%CONDA_INSTALL_DIR%\condabin\conda.bat" (
|
197 |
+
echo Conda installed successfully.
|
198 |
+
set "CONDA_RUN_INIT=1"
|
199 |
+
set "CONDA_CHECK_STATUS=0"
|
200 |
+
set "PATH=%CONDA_INSTALL_DIR%\condabin;%PATH%"
|
201 |
+
)
|
202 |
+
)
|
203 |
+
:: Install Docker if not already installed
|
204 |
+
if not "%DOCKER_CHECK_STATUS%"=="0" (
|
205 |
+
echo Docker is not installed. Installing it now...
|
206 |
+
call choco install docker-cli docker-engine -y
|
207 |
+
call docker --version >nul 2>&1
|
208 |
+
if %errorlevel% equ 0 (
|
209 |
+
echo Starting Docker Engine...
|
210 |
+
net start com.docker.service >nul 2>&1
|
211 |
+
if %errorlevel% equ 0 (
|
212 |
+
echo Docker installed and started successfully.
|
213 |
+
set "DOCKER_CHECK_STATUS=0"
|
214 |
+
)
|
215 |
+
)
|
216 |
+
)
|
217 |
+
:: Build Docker image if required
|
218 |
+
if not "%DOCKER_BUILD_STATUS%"=="0" (
|
219 |
+
call conda activate "%SCRIPT_DIR%\%PYTHON_ENV%"
|
220 |
+
call python -m pip install -e .
|
221 |
+
call docker build -f DockerfileUtils -t utils .
|
222 |
+
call conda deactivate
|
223 |
+
call docker images -q %DOCKER_UTILS_IMG% >nul 2>&1
|
224 |
+
if %errorlevel% equ 0 (
|
225 |
+
set "DOCKER_BUILD_STATUS=0"
|
226 |
+
)
|
227 |
+
)
|
228 |
+
net session >nul 2>&1
|
229 |
+
if %errorlevel% equ 0 (
|
230 |
+
echo Restarting in user mode...
|
231 |
+
start "" /b cmd /c "%~f0" %ARGS%
|
232 |
+
exit /b
|
233 |
+
)
|
234 |
+
goto dispatch
|
235 |
+
exit /b
|
236 |
+
|
237 |
+
:dispatch
|
238 |
+
if "%PROGRAMS_CHECK%"=="0" (
|
239 |
+
if "%CONDA_CHECK_STATUS%"=="0" (
|
240 |
+
if "%DOCKER_CHECK_STATUS%"=="0" (
|
241 |
+
if "%DOCKER_BUILD_STATUS%"=="0" (
|
242 |
+
goto main
|
243 |
+
exit /b
|
244 |
+
)
|
245 |
+
) else (
|
246 |
+
goto failed
|
247 |
+
exit /b
|
248 |
+
)
|
249 |
+
)
|
250 |
+
)
|
251 |
+
echo PROGRAMS_CHECK: %PROGRAMS_CHECK%
|
252 |
+
echo CONDA_CHECK_STATUS: %CONDA_CHECK_STATUS%
|
253 |
+
echo DOCKER_CHECK_STATUS: %DOCKER_CHECK_STATUS%
|
254 |
+
echo DOCKER_BUILD_STATUS: %DOCKER_BUILD_STATUS%
|
255 |
+
timeout /t 5 /nobreak >nul
|
256 |
+
goto install_components
|
257 |
+
exit /b
|
258 |
+
|
259 |
+
:main
|
260 |
+
if "%SCRIPT_MODE%"=="%FULL_DOCKER%" (
|
261 |
+
python %SCRIPT_DIR%\app.py --script_mode %FULL_DOCKER% %ARGS%
|
262 |
+
) else (
|
263 |
+
if "%CONDA_RUN_INIT%"=="1" (
|
264 |
+
call conda init
|
265 |
+
set "CONDA_RUN_INIT=0"
|
266 |
+
)
|
267 |
+
if not exist "%SCRIPT_DIR%\%PYTHON_ENV%" (
|
268 |
+
call conda create --prefix %SCRIPT_DIR%\%PYTHON_ENV% python=%PYTHON_VERSION% -y
|
269 |
+
call conda activate %SCRIPT_DIR%\%PYTHON_ENV%
|
270 |
+
call python -m pip install --upgrade pip
|
271 |
+
call python -m pip install --upgrade -r requirements.txt --progress-bar=on
|
272 |
+
) else (
|
273 |
+
call conda activate %SCRIPT_DIR%\%PYTHON_ENV%
|
274 |
+
)
|
275 |
+
python %SCRIPT_DIR%\app.py --script_mode %SCRIPT_MODE% %ARGS%
|
276 |
+
call conda deactivate
|
277 |
+
)
|
278 |
+
exit /b
|
279 |
+
|
280 |
+
:failed
|
281 |
+
echo ebook2audiobook is not correctly installed or run.
|
282 |
+
exit /b
|
283 |
+
|
284 |
+
endlocal
|
285 |
+
pause
|
ebook2audiobook.sh
CHANGED
@@ -179,7 +179,9 @@ function install_programs {
|
|
179 |
fi
|
180 |
fi
|
181 |
done
|
182 |
-
if
|
|
|
|
|
183 |
echo -e "\e[33mYou can run 'ebook2audiobook.sh --script_mode docker_utils' to avoid to install $REQUIRED_PROGRAMS natively.\e[0m"
|
184 |
return 1
|
185 |
fi
|
|
|
179 |
fi
|
180 |
fi
|
181 |
done
|
182 |
+
if required_programs_check "${REQUIRED_PROGRAMS[@]}"; then
|
183 |
+
return 0
|
184 |
+
else
|
185 |
echo -e "\e[33mYou can run 'ebook2audiobook.sh --script_mode docker_utils' to avoid to install $REQUIRED_PROGRAMS natively.\e[0m"
|
186 |
return 1
|
187 |
fi
|
lib/__pycache__/conf.cpython-312.pyc
CHANGED
Binary files a/lib/__pycache__/conf.cpython-312.pyc and b/lib/__pycache__/conf.cpython-312.pyc differ
|
|
lib/__pycache__/functions.cpython-312.pyc
CHANGED
Binary files a/lib/__pycache__/functions.cpython-312.pyc and b/lib/__pycache__/functions.cpython-312.pyc differ
|
|
lib/__pycache__/lang.cpython-312.pyc
CHANGED
Binary files a/lib/__pycache__/lang.cpython-312.pyc and b/lib/__pycache__/lang.cpython-312.pyc differ
|
|
lib/conf.py
CHANGED
@@ -12,14 +12,23 @@ max_python_version = (3,12)
|
|
12 |
requirements_file = os.path.abspath(os.path.join('.','requirements.txt'))
|
13 |
|
14 |
docker_utils_image = 'utils'
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
python_env_dir = os.path.abspath(os.path.join('.','python_env'))
|
|
|
20 |
models_dir = os.path.abspath(os.path.join('.','models'))
|
21 |
ebooks_dir = os.path.abspath(os.path.join('.','ebooks'))
|
22 |
processes_dir = os.path.abspath(os.path.join('.','tmp'))
|
|
|
23 |
audiobooks_gradio_dir = os.path.abspath(os.path.join('.','audiobooks','gui','gradio'))
|
24 |
audiobooks_host_dir = os.path.abspath(os.path.join('.','audiobooks','gui','host'))
|
25 |
audiobooks_cli_dir = os.path.abspath(os.path.join('.','audiobooks','cli'))
|
@@ -42,36 +51,98 @@ os.environ['XDG_CACHE_HOME'] = models_dir
|
|
42 |
|
43 |
ebook_formats = ['.epub', '.mobi', '.azw3', 'fb2', 'lrf', 'rb', 'snb', 'tcr', '.pdf', '.txt', '.rtf', 'doc', '.docx', '.html', '.odt', '.azw']
|
44 |
audiobook_format = 'm4b' # or 'mp3'
|
45 |
-
|
46 |
|
47 |
default_tts_engine = 'xtts'
|
48 |
default_fine_tuned = 'std'
|
|
|
49 |
|
50 |
models = {
|
51 |
"xtts": {
|
52 |
"std": {
|
53 |
"lang": "multi",
|
54 |
-
"
|
55 |
-
"
|
56 |
-
"voice": default_voice_file
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
},
|
59 |
"DavidAttenborough": {
|
60 |
"lang": "eng",
|
61 |
-
"
|
62 |
-
"
|
63 |
-
"
|
64 |
-
"voice": os.path.abspath(os.path.join("voices", "eng", "elder", "male", "DavidAttenborough_24khz.wav")),
|
65 |
-
"files": ["config.json", "model.pth", "vocab.json"] # Files needed for this fine-tuned model
|
66 |
-
#"actaul_download_location": models_dir + /tts/ + "folder" + "api"
|
67 |
},
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
},
|
70 |
"fairseq": {
|
71 |
"std": {
|
72 |
"lang": "multi",
|
73 |
-
"
|
74 |
-
"
|
75 |
"voice": default_voice_file
|
76 |
}
|
77 |
}
|
|
|
12 |
requirements_file = os.path.abspath(os.path.join('.','requirements.txt'))
|
13 |
|
14 |
docker_utils_image = 'utils'
|
15 |
+
|
16 |
+
interface_host = '0.0.0.0'
|
17 |
+
interface_port = 7860
|
18 |
+
interface_shared_expire = 72 # hours
|
19 |
+
interface_concurrency_limit = 8 # or None for unlimited
|
20 |
+
interface_component_options = {
|
21 |
+
"gr_tab_preferences": True,
|
22 |
+
"gr_voice_file": True,
|
23 |
+
"gr_group_custom_model": True
|
24 |
+
}
|
25 |
|
26 |
python_env_dir = os.path.abspath(os.path.join('.','python_env'))
|
27 |
+
|
28 |
models_dir = os.path.abspath(os.path.join('.','models'))
|
29 |
ebooks_dir = os.path.abspath(os.path.join('.','ebooks'))
|
30 |
processes_dir = os.path.abspath(os.path.join('.','tmp'))
|
31 |
+
|
32 |
audiobooks_gradio_dir = os.path.abspath(os.path.join('.','audiobooks','gui','gradio'))
|
33 |
audiobooks_host_dir = os.path.abspath(os.path.join('.','audiobooks','gui','host'))
|
34 |
audiobooks_cli_dir = os.path.abspath(os.path.join('.','audiobooks','cli'))
|
|
|
51 |
|
52 |
ebook_formats = ['.epub', '.mobi', '.azw3', 'fb2', 'lrf', 'rb', 'snb', 'tcr', '.pdf', '.txt', '.rtf', 'doc', '.docx', '.html', '.odt', '.azw']
|
53 |
audiobook_format = 'm4b' # or 'mp3'
|
54 |
+
audioproc_format = 'wav' # only 'wav' is valid for now
|
55 |
|
56 |
default_tts_engine = 'xtts'
|
57 |
default_fine_tuned = 'std'
|
58 |
+
default_model_files = ['config.json', 'vocab.json', 'model.pth', 'ref.wav']
|
59 |
|
60 |
models = {
|
61 |
"xtts": {
|
62 |
"std": {
|
63 |
"lang": "multi",
|
64 |
+
"repo": "tts_models/multilingual/multi-dataset/xtts_v2",
|
65 |
+
"sub": "",
|
66 |
+
"voice": default_voice_file
|
67 |
+
},
|
68 |
+
"AiExplained": {
|
69 |
+
"lang": "eng",
|
70 |
+
"repo": "drewThomasson/fineTunedTTSModels",
|
71 |
+
"sub": "xtts-v2/eng/AiExplained",
|
72 |
+
"voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "AiExplained_24khz.wav"))
|
73 |
+
},
|
74 |
+
"BobOdenkirk": {
|
75 |
+
"lang": "eng",
|
76 |
+
"repo": "drewThomasson/fineTunedTTSModels",
|
77 |
+
"sub": "xtts-v2/eng/BobOdenkirk",
|
78 |
+
"voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "BobOdenkirk_24khz.wav"))
|
79 |
+
},
|
80 |
+
"BobRoss": {
|
81 |
+
"lang": "eng",
|
82 |
+
"repo": "drewThomasson/fineTunedTTSModels",
|
83 |
+
"sub": "xtts-v2/eng/BobRoss",
|
84 |
+
"voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "BobRoss_24khz.wav"))
|
85 |
+
},
|
86 |
+
"BryanCranston": {
|
87 |
+
"lang": "eng",
|
88 |
+
"repo": "drewThomasson/fineTunedTTSModels",
|
89 |
+
"sub": "xtts-v2/eng/BryanCranston",
|
90 |
+
"voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "BryanCranston_24khz.wav"))
|
91 |
},
|
92 |
"DavidAttenborough": {
|
93 |
"lang": "eng",
|
94 |
+
"repo": "drewThomasson/fineTunedTTSModels",
|
95 |
+
"sub": "xtts-v2/eng/DavidAttenborough",
|
96 |
+
"voice": os.path.abspath(os.path.join("voices", "eng", "elder", "male", "DavidAttenborough_24khz.wav"))
|
|
|
|
|
|
|
97 |
},
|
98 |
+
"DeathPuss&Boots": {
|
99 |
+
"lang": "eng",
|
100 |
+
"repo": "drewThomasson/fineTunedTTSModels",
|
101 |
+
"sub": "xtts-v2/eng/DeathPuss&Boots",
|
102 |
+
"voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "DeathPuss&Boots_24khz.wav"))
|
103 |
+
},
|
104 |
+
"GhostMW2": {
|
105 |
+
"lang": "eng",
|
106 |
+
"repo": "drewThomasson/fineTunedTTSModels",
|
107 |
+
"sub": "xtts-v2/eng/GhostMW2",
|
108 |
+
"voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "GhostMW2_24khz.wav"))
|
109 |
+
},
|
110 |
+
"JhonButlerASMR": {
|
111 |
+
"lang": "eng",
|
112 |
+
"repo": "drewThomasson/fineTunedTTSModels",
|
113 |
+
"sub": "xtts-v2/eng/JhonButlerASMR",
|
114 |
+
"voice": os.path.abspath(os.path.join("voices", "eng", "elder", "male", "JhonButlerASMR_24khz.wav"))
|
115 |
+
},
|
116 |
+
"JhonMulaney": {
|
117 |
+
"lang": "eng",
|
118 |
+
"repo": "drewThomasson/fineTunedTTSModels",
|
119 |
+
"sub": "xtts-v2/eng/JhonMulaney",
|
120 |
+
"voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "JhonMulaney_24khz.wav"))
|
121 |
+
},
|
122 |
+
"MorganFreeman": {
|
123 |
+
"lang": "eng",
|
124 |
+
"repo": "drewThomasson/fineTunedTTSModels",
|
125 |
+
"sub": "xtts-v2/eng/MorganFreeman",
|
126 |
+
"voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "MorganFreeman_24khz.wav"))
|
127 |
+
},
|
128 |
+
"RainyDayHeadSpace": {
|
129 |
+
"lang": "eng",
|
130 |
+
"repo": "drewThomasson/fineTunedTTSModels",
|
131 |
+
"sub": "xtts-v2/eng/RainyDayHeadSpace",
|
132 |
+
"voice": os.path.abspath(os.path.join("voices", "eng", "elder", "male", "RainyDayHeadSpace_24khz.wav"))
|
133 |
+
},
|
134 |
+
"WhisperSalemASMR": {
|
135 |
+
"lang": "eng",
|
136 |
+
"repo": "drewThomasson/fineTunedTTSModels",
|
137 |
+
"sub": "xtts-v2/eng/WhisperSalemASMR",
|
138 |
+
"voice": os.path.abspath(os.path.join("voices", "eng", "adult", "male", "WhisperSalemASMR_24khz.wav"))
|
139 |
+
}
|
140 |
},
|
141 |
"fairseq": {
|
142 |
"std": {
|
143 |
"lang": "multi",
|
144 |
+
"repo": "tts_models/[lang]/fairseq/vits",
|
145 |
+
"sub": "",
|
146 |
"voice": default_voice_file
|
147 |
}
|
148 |
}
|
lib/functions.py
CHANGED
@@ -4,6 +4,7 @@ import docker
|
|
4 |
import ebooklib
|
5 |
import gradio as gr
|
6 |
import hashlib
|
|
|
7 |
import numpy as np
|
8 |
import os
|
9 |
import regex as re
|
@@ -27,6 +28,7 @@ from collections.abc import MutableMapping
|
|
27 |
from datetime import datetime
|
28 |
from ebooklib import epub
|
29 |
from glob import glob
|
|
|
30 |
from iso639 import languages
|
31 |
from multiprocessing import Manager, Event
|
32 |
from pydub import AudioSegment
|
@@ -40,49 +42,6 @@ from urllib.parse import urlparse
|
|
40 |
import lib.conf as conf
|
41 |
import lib.lang as lang
|
42 |
|
43 |
-
def download_fine_tuned_model(model_key):
|
44 |
-
"""Download the fine-tuned model files from Hugging Face if missing."""
|
45 |
-
model = models['xtts'].get(model_key)
|
46 |
-
if not model:
|
47 |
-
raise ValueError(f"Fine-tuned model '{model_key}' not found in configuration.")
|
48 |
-
|
49 |
-
# Check if the model is fine-tuned (skip std models)
|
50 |
-
if model_key == "std":
|
51 |
-
print("Standard model detected. Skipping fine-tuned download process.")
|
52 |
-
return
|
53 |
-
|
54 |
-
# Construct the full directory path for the fine-tuned model
|
55 |
-
model_dir = os.path.join(models_dir, 'tts', model['folder'], model['api'])
|
56 |
-
os.makedirs(model_dir, exist_ok=True)
|
57 |
-
|
58 |
-
for file_name in model['files']:
|
59 |
-
file_path = os.path.join(model_dir, file_name)
|
60 |
-
if not os.path.exists(file_path):
|
61 |
-
print(f"Downloading {file_name} for fine-tuned model '{model_key}'...")
|
62 |
-
# Construct the download URL
|
63 |
-
url = f"https://huggingface.co/{model['api']}/resolve/main/{model.get('subfolder', '')}/{file_name}".strip('/')
|
64 |
-
try:
|
65 |
-
response = requests.get(url, stream=True)
|
66 |
-
response.raise_for_status()
|
67 |
-
total_size = int(response.headers.get('content-length', 0))
|
68 |
-
with open(file_path, 'wb') as file, tqdm(
|
69 |
-
total=total_size, unit='B', unit_scale=True, desc=f"Downloading {file_name}"
|
70 |
-
) as progress:
|
71 |
-
for chunk in response.iter_content(chunk_size=1024):
|
72 |
-
file.write(chunk)
|
73 |
-
progress.update(len(chunk))
|
74 |
-
print(f"Downloaded: {file_name}")
|
75 |
-
except Exception as e:
|
76 |
-
raise RuntimeError(f"Failed to download {file_name}: {e}")
|
77 |
-
|
78 |
-
print(f"All files for fine-tuned model '{model_key}' are ready at {model_dir}.")
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
def inject_configs(target_namespace):
|
87 |
# Extract variables from both modules and inject them into the target namespace
|
88 |
for module in (conf, lang):
|
@@ -122,6 +81,7 @@ class ConversionContext:
|
|
122 |
self.sessions[session_id] = recursive_proxy({
|
123 |
"script_mode": NATIVE,
|
124 |
"client": None,
|
|
|
125 |
"audiobooks_dir": None,
|
126 |
"tmp_dir": None,
|
127 |
"src": None,
|
@@ -134,6 +94,7 @@ class ConversionContext:
|
|
134 |
"fine_tuned": None,
|
135 |
"voice_file": None,
|
136 |
"custom_model": None,
|
|
|
137 |
"chapters": None,
|
138 |
"cover": None,
|
139 |
"metadata": {
|
@@ -161,7 +122,7 @@ class ConversionContext:
|
|
161 |
}, manager=self.manager)
|
162 |
return self.sessions[session_id]
|
163 |
|
164 |
-
context =
|
165 |
is_gui_process = False
|
166 |
|
167 |
class DependencyError(Exception):
|
@@ -181,43 +142,12 @@ class DependencyError(Exception):
|
|
181 |
if not is_gui_process:
|
182 |
sys.exit(1)
|
183 |
|
184 |
-
def check_missing_files(dir_path, f_list):
|
185 |
-
if not os.path.exists(dir_path):
|
186 |
-
return False, 'Folder does not exist', f_list
|
187 |
-
existing_files = os.listdir(dir_path)
|
188 |
-
missing_files = [file for file in f_list if file not in existing_files]
|
189 |
-
if missing_files:
|
190 |
-
return False, 'Some files are missing', missing_files
|
191 |
-
return True, 'All files are present', []
|
192 |
-
|
193 |
-
def download_model(dest_dir, url):
|
194 |
-
try:
|
195 |
-
if not os.path.exists(dest_dir):
|
196 |
-
os.makedirs(dest_dir)
|
197 |
-
zip_path = os.path.join(dest_dir, models['xtts']['zip'])
|
198 |
-
print('Downloading the XTTS v2 model...')
|
199 |
-
response = requests.get(url, stream=True)
|
200 |
-
response.raise_for_status() # Raise an error for bad status codes
|
201 |
-
total_size = int(response.headers.get('content-length', 0))
|
202 |
-
chunk_size = 1024 # Download in chunks of 1KB
|
203 |
-
with open(zip_path, 'wb') as file, tqdm(
|
204 |
-
total=total_size, unit='B', unit_scale=True, desc='Downloading'
|
205 |
-
) as progress_bar:
|
206 |
-
for chunk in response.iter_content(chunk_size=chunk_size):
|
207 |
-
file.write(chunk)
|
208 |
-
progress_bar.update(len(chunk))
|
209 |
-
print('Extracting the model files...')
|
210 |
-
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
211 |
-
zip_ref.extractall(dest_dir)
|
212 |
-
os.remove(zip_path)
|
213 |
-
print('Model downloaded, extracted, and zip file removed successfully.')
|
214 |
-
except Exception as e:
|
215 |
-
raise DependencyError(e)
|
216 |
-
|
217 |
def prepare_dirs(src, session):
|
218 |
try:
|
219 |
resume = False
|
|
|
220 |
os.makedirs(session['tmp_dir'], exist_ok=True)
|
|
|
221 |
os.makedirs(session['audiobooks_dir'], exist_ok=True)
|
222 |
session['src'] = os.path.join(session['tmp_dir'], os.path.basename(src))
|
223 |
if os.path.exists(session['src']):
|
@@ -258,44 +188,79 @@ def check_fine_tuned(fine_tuned, language):
|
|
258 |
except Exception as e:
|
259 |
raise RuntimeError(e)
|
260 |
|
261 |
-
def
|
|
|
|
|
|
|
262 |
try:
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
except Exception as e:
|
276 |
-
raise RuntimeError(f'
|
277 |
-
|
278 |
-
def extract_custom_model(
|
279 |
try:
|
280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
files = zip_ref.namelist()
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
299 |
except Exception as e:
|
300 |
raise DependencyError(e)
|
301 |
|
@@ -323,7 +288,8 @@ def has_metadata(f):
|
|
323 |
def convert_to_epub(session):
|
324 |
if session['cancellation_requested']:
|
325 |
stop_and_detach_tts()
|
326 |
-
|
|
|
327 |
if session['script_mode'] == DOCKER_UTILS:
|
328 |
try:
|
329 |
docker_dir = os.path.basename(session['tmp_dir'])
|
@@ -363,6 +329,10 @@ def convert_to_epub(session):
|
|
363 |
|
364 |
def get_cover(session):
|
365 |
try:
|
|
|
|
|
|
|
|
|
366 |
cover_image = False
|
367 |
cover_path = os.path.join(session['tmp_dir'], session['filename_noext'] + '.jpg')
|
368 |
for item in session['epub'].get_items_of_type(ebooklib.ITEM_COVER):
|
@@ -385,16 +355,17 @@ def get_chapters(language, session):
|
|
385 |
try:
|
386 |
if session['cancellation_requested']:
|
387 |
stop_and_detach_tts()
|
388 |
-
|
|
|
389 |
all_docs = list(session['epub'].get_items_of_type(ebooklib.ITEM_DOCUMENT))
|
390 |
if all_docs:
|
391 |
all_docs = all_docs[1:]
|
392 |
doc_patterns = [filter_pattern(str(doc)) for doc in all_docs if filter_pattern(str(doc))]
|
393 |
most_common_pattern = filter_doc(doc_patterns)
|
394 |
selected_docs = [doc for doc in all_docs if filter_pattern(str(doc)) == most_common_pattern]
|
395 |
-
chapters = [filter_chapter(doc, language
|
396 |
if session['metadata'].get('creator'):
|
397 |
-
intro = f"{session['metadata']['creator']}, {session['metadata']['title']}
|
398 |
chapters[0].insert(0, intro)
|
399 |
return chapters
|
400 |
return False
|
@@ -419,16 +390,11 @@ def filter_pattern(doc_identifier):
|
|
419 |
return 'numbers'
|
420 |
return None
|
421 |
|
422 |
-
def filter_chapter(doc, language
|
423 |
-
if session['cancellation_requested']:
|
424 |
-
stop_and_detach_tts()
|
425 |
-
raise ValueError('Cancel requested')
|
426 |
-
|
427 |
soup = BeautifulSoup(doc.get_body_content(), 'html.parser')
|
428 |
# Remove scripts and styles
|
429 |
for script in soup(["script", "style"]):
|
430 |
script.decompose()
|
431 |
-
|
432 |
# Normalize lines and remove unnecessary spaces
|
433 |
text = re.sub(r'(\r\n|\r|\n){3,}', '\r\n', soup.get_text().strip())
|
434 |
text = replace_roman_numbers(text)
|
@@ -436,24 +402,19 @@ def filter_chapter(doc, language, session):
|
|
436 |
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
437 |
text = '\n'.join(chunk for chunk in chunks if chunk)
|
438 |
text = text.replace('»', '"').replace('«', '"')
|
439 |
-
|
440 |
# Pattern 1: Add a space between UTF-8 characters and numbers
|
441 |
text = re.sub(r'(?<=[\p{L}])(?=\d)|(?<=\d)(?=[\p{L}])', ' ', text)
|
442 |
-
|
443 |
# Pattern 2: Split numbers into groups of 4
|
444 |
text = re.sub(r'(\d{4})(?=\d)', r'\1 ', text)
|
445 |
-
|
446 |
-
chapter_sentences = get_sentences(text, language, session)
|
447 |
return chapter_sentences
|
448 |
|
449 |
-
def get_sentences(sentence, language,
|
450 |
max_length = language_mapping[language]['char_limit']
|
451 |
punctuation = language_mapping[language]['punctuation']
|
|
|
452 |
parts = []
|
453 |
while len(sentence) > max_length or sum(sentence.count(p) for p in punctuation) > max_pauses:
|
454 |
-
if session['cancellation_requested']:
|
455 |
-
stop_and_detach_tts()
|
456 |
-
raise ValueError('Cancel requested')
|
457 |
# Step 1: Look for the last period (.) within max_length
|
458 |
possible_splits = [i for i, char in enumerate(sentence[:max_length]) if char == '.']
|
459 |
# Step 2: If no periods, look for the last comma (,)
|
@@ -485,14 +446,15 @@ def convert_chapters_to_audio(session):
|
|
485 |
try:
|
486 |
if session['cancellation_requested']:
|
487 |
stop_and_detach_tts()
|
488 |
-
|
|
|
489 |
progress_bar = None
|
490 |
params = {}
|
491 |
if is_gui_process:
|
492 |
progress_bar = gr.Progress(track_tqdm=True)
|
493 |
params['tts_model'] = None
|
494 |
'''
|
495 |
-
# List available TTS models
|
496 |
print("Available Models:")
|
497 |
print("=================")
|
498 |
for index, model in enumerate(XTTS().list_models(), 1):
|
@@ -501,48 +463,54 @@ def convert_chapters_to_audio(session):
|
|
501 |
if session['metadata']['language'] in language_xtts:
|
502 |
params['tts_model'] = 'xtts'
|
503 |
if session['custom_model'] is not None:
|
504 |
-
|
|
|
505 |
config_path = os.path.join(session['custom_model'],'config.json')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
506 |
else:
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
else:
|
511 |
-
model_path = os.path.join(models_dir, 'tts', models[params['tts_model']][session['fine_tuned']]['folder']+os.path.normpath(models[params['tts_model']]['DavidAttenborough']['api']))
|
512 |
-
config_path = os.path.join( model_path,'config.json')
|
513 |
-
print(f"Loading TTS {params['tts_model']} model...")
|
514 |
-
config = XttsConfig()
|
515 |
-
config.models_dir = models_dir
|
516 |
-
"""
|
517 |
-
start_time = time.time()
|
518 |
-
timeout = 120
|
519 |
-
while not os.path.isdir(model_path):
|
520 |
-
if timeout and (time.time() - start_time) > timeout:
|
521 |
-
print(f"Timeout reached: {model_path} does not exist.")
|
522 |
-
return False
|
523 |
-
time.sleep(1)
|
524 |
-
"""
|
525 |
-
config.load_json(config_path)
|
526 |
-
params['tts'] = Xtts.init_from_config(config)
|
527 |
-
params['tts'].load_checkpoint(config, checkpoint_dir=model_path, eval=True)
|
528 |
params['tts'].to(session['device'])
|
529 |
-
print('Computing speaker latents...')
|
530 |
-
params['voice_file'] = session['voice_file'] if session['voice_file'] is not None else models[params['tts_model']][session['fine_tuned']]['voice']
|
531 |
-
params['gpt_cond_latent'], params['speaker_embedding'] = params['tts'].get_conditioning_latents(audio_path=[params['voice_file']])
|
532 |
else:
|
533 |
params['tts_model'] = 'fairseq'
|
534 |
-
|
535 |
-
|
536 |
-
params['tts'] = XTTS(
|
537 |
-
params['voice_file'] = session['voice_file'] if session['voice_file'] is not None else models[params['tts_model']]['
|
538 |
params['tts'].to(session['device'])
|
539 |
|
540 |
resume_chapter = 0
|
541 |
resume_sentence = 0
|
542 |
|
543 |
# Check existing files to resume the process if it was interrupted
|
544 |
-
existing_chapters = sorted([f for f in os.listdir(session['chapters_dir']) if f.endswith(f'.{
|
545 |
-
existing_sentences = sorted([f for f in os.listdir(session['chapters_dir_sentences']) if f.endswith(f'.{
|
546 |
|
547 |
if existing_chapters:
|
548 |
count_chapter_files = len(existing_chapters)
|
@@ -553,67 +521,86 @@ def convert_chapters_to_audio(session):
|
|
553 |
print(f'Resuming from sentence {resume_sentence}')
|
554 |
|
555 |
total_chapters = len(session['chapters'])
|
556 |
-
total_sentences = sum(len(array) for array in session['chapters'])
|
557 |
current_sentence = 0
|
558 |
|
559 |
-
with tqdm(total=total_sentences, desc='
|
560 |
t.n = resume_sentence
|
561 |
t.refresh()
|
562 |
for x in range(resume_chapter, total_chapters):
|
563 |
chapter_num = x + 1
|
564 |
-
chapter_audio_file = f'chapter_{chapter_num}.{
|
565 |
sentences = session['chapters'][x]
|
566 |
start = current_sentence # Mark the starting sentence of the chapter
|
567 |
print(f"\nChapter {chapter_num} containing {len(sentences)} sentences...")
|
568 |
for i, sentence in enumerate(sentences):
|
569 |
if current_sentence >= resume_sentence and resume_sentence > 0 or resume_sentence == 0:
|
570 |
-
|
571 |
-
stop_and_detach_tts(params['tts'])
|
572 |
-
raise ValueError('Cancel requested')
|
573 |
-
params['sentence_audio_file'] = os.path.join(session['chapters_dir_sentences'], f'{current_sentence}.{audio_proc_format}')
|
574 |
params['sentence'] = sentence
|
575 |
-
print(f'Sentence: {sentence}...')
|
576 |
if convert_sentence_to_audio(params, session):
|
577 |
-
t.update(1)
|
578 |
percentage = (current_sentence / total_sentences) * 100
|
579 |
t.set_description(f'Processing {percentage:.2f}%')
|
|
|
580 |
t.refresh()
|
581 |
if progress_bar is not None:
|
582 |
progress_bar(current_sentence / total_sentences)
|
583 |
else:
|
584 |
-
print('convert_sentence_to_audio() failed!')
|
585 |
return False
|
586 |
current_sentence += 1
|
587 |
end = current_sentence - 1
|
588 |
-
combine_audio_sentences(chapter_audio_file, start, end, session)
|
589 |
-
|
|
|
|
|
|
|
590 |
return True
|
591 |
except Exception as e:
|
592 |
raise DependencyError(e)
|
593 |
|
594 |
def convert_sentence_to_audio(params, session):
|
595 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
596 |
if params['tts_model'] == 'xtts':
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
610 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
611 |
elif params['tts_model'] == 'fairseq':
|
612 |
params['tts'].tts_with_vc_to_file(
|
613 |
text=params['sentence'],
|
614 |
-
#language=session['language'], # can be used only if multilingual model
|
615 |
-
speaker_wav=params['voice_file'].replace('_24khz','_22khz'),
|
616 |
file_path=params['sentence_audio_file'],
|
|
|
617 |
split_sentences=session['enable_text_splitting']
|
618 |
)
|
619 |
if os.path.exists(params['sentence_audio_file']):
|
@@ -626,27 +613,28 @@ def convert_sentence_to_audio(params, session):
|
|
626 |
def combine_audio_sentences(chapter_audio_file, start, end, session):
|
627 |
try:
|
628 |
chapter_audio_file = os.path.join(session['chapters_dir'], chapter_audio_file)
|
629 |
-
combined_audio = AudioSegment.empty()
|
630 |
-
|
631 |
# Get all audio sentence files sorted by their numeric indices
|
632 |
sentence_files = [f for f in os.listdir(session['chapters_dir_sentences']) if f.endswith(".wav")]
|
633 |
sentences_dir_ordered = sorted(sentence_files, key=lambda x: int(re.search(r'\d+', x).group()))
|
634 |
-
|
635 |
# Filter the files in the range [start, end]
|
636 |
selected_files = [
|
637 |
file for file in sentences_dir_ordered
|
638 |
if start <= int(''.join(filter(str.isdigit, os.path.basename(file)))) <= end
|
639 |
]
|
640 |
-
|
641 |
for file in selected_files:
|
|
|
|
|
|
|
|
|
642 |
if session['cancellation_requested']:
|
643 |
msg = 'Cancel requested'
|
644 |
raise ValueError(msg)
|
645 |
-
audio_segment = AudioSegment.from_file(os.path.join(session['chapters_dir_sentences'],file), format=
|
646 |
combined_audio += audio_segment
|
647 |
-
|
648 |
-
combined_audio.export(chapter_audio_file, format=audio_proc_format)
|
649 |
print(f'Combined audio saved to {chapter_audio_file}')
|
|
|
650 |
except Exception as e:
|
651 |
raise DependencyError(e)
|
652 |
|
@@ -662,25 +650,17 @@ def combine_audio_chapters(session):
|
|
662 |
batch_size = 256
|
663 |
# Process the chapter files in batches
|
664 |
for i in range(0, len(chapter_files), batch_size):
|
665 |
-
if session['cancellation_requested']:
|
666 |
-
msg = 'Cancel requested'
|
667 |
-
raise ValueError(msg)
|
668 |
-
|
669 |
batch_files = chapter_files[i:i + batch_size]
|
670 |
batch_audio = AudioSegment.empty() # Initialize an empty AudioSegment for the batch
|
671 |
-
|
672 |
# Sequentially append each file in the current batch to the batch_audio
|
673 |
for chapter_file in batch_files:
|
674 |
if session['cancellation_requested']:
|
675 |
-
|
676 |
-
|
677 |
-
|
678 |
audio_segment = AudioSegment.from_wav(os.path.join(session['chapters_dir'],chapter_file))
|
679 |
batch_audio += audio_segment
|
680 |
-
|
681 |
combined_audio += batch_audio
|
682 |
-
|
683 |
-
combined_audio.export(assembled_audio, format=audio_proc_format)
|
684 |
print(f'Combined audio saved to {assembled_audio}')
|
685 |
return True
|
686 |
except Exception as e:
|
@@ -688,6 +668,9 @@ def combine_audio_chapters(session):
|
|
688 |
|
689 |
def generate_ffmpeg_metadata():
|
690 |
try:
|
|
|
|
|
|
|
691 |
ffmpeg_metadata = ';FFMETADATA1\n'
|
692 |
if session['metadata'].get('title'):
|
693 |
ffmpeg_metadata += f"title={session['metadata']['title']}\n"
|
@@ -718,7 +701,6 @@ def combine_audio_chapters(session):
|
|
718 |
mobi_asin = session['metadata']['identifiers'].get('mobi-asin', None)
|
719 |
if mobi_asin:
|
720 |
ffmpeg_metadata += f'asin={mobi_asin}\n' # ASIN
|
721 |
-
|
722 |
start_time = 0
|
723 |
for index, chapter_file in enumerate(chapter_files):
|
724 |
if session['cancellation_requested']:
|
@@ -729,7 +711,6 @@ def combine_audio_chapters(session):
|
|
729 |
ffmpeg_metadata += f'[CHAPTER]\nTIMEBASE=1/1000\nSTART={start_time}\n'
|
730 |
ffmpeg_metadata += f'END={start_time + duration_ms}\ntitle=Chapter {index + 1}\n'
|
731 |
start_time += duration_ms
|
732 |
-
|
733 |
# Write the metadata to the file
|
734 |
with open(metadata_file, 'w', encoding='utf-8') as file:
|
735 |
file.write(ffmpeg_metadata)
|
@@ -739,6 +720,9 @@ def combine_audio_chapters(session):
|
|
739 |
|
740 |
def export_audio():
|
741 |
try:
|
|
|
|
|
|
|
742 |
ffmpeg_cover = None
|
743 |
if session['script_mode'] == DOCKER_UTILS:
|
744 |
docker_dir = os.path.basename(session['tmp_dir'])
|
@@ -746,36 +730,29 @@ def combine_audio_chapters(session):
|
|
746 |
ffmpeg_metadata_file = f'/files/{docker_dir}/' + os.path.basename(metadata_file)
|
747 |
ffmpeg_final_file = f'/files/{docker_dir}/' + os.path.basename(docker_final_file)
|
748 |
if session['cover'] is not None:
|
749 |
-
ffmpeg_cover = f'/files/{docker_dir}/' + os.path.basename(session['cover'])
|
750 |
-
|
751 |
ffmpeg_cmd = ['ffmpeg', '-i', ffmpeg_combined_audio, '-i', ffmpeg_metadata_file]
|
752 |
else:
|
753 |
ffmpeg_combined_audio = assembled_audio
|
754 |
ffmpeg_metadata_file = metadata_file
|
755 |
ffmpeg_final_file = final_file
|
756 |
if session['cover'] is not None:
|
757 |
-
ffmpeg_cover = session['cover']
|
758 |
-
|
759 |
ffmpeg_cmd = [shutil.which('ffmpeg'), '-i', ffmpeg_combined_audio, '-i', ffmpeg_metadata_file]
|
760 |
-
|
761 |
if ffmpeg_cover is not None:
|
762 |
ffmpeg_cmd += ['-i', ffmpeg_cover, '-map', '0:a', '-map', '2:v']
|
763 |
else:
|
764 |
ffmpeg_cmd += ['-map', '0:a']
|
765 |
-
|
766 |
-
ffmpeg_cmd += ['-map_metadata', '1', '-c:a', 'aac', '-b:a', '128k', '-ar', '44100']
|
767 |
-
|
768 |
if ffmpeg_cover is not None:
|
769 |
if ffmpeg_cover.endswith('.png'):
|
770 |
ffmpeg_cmd += ['-c:v', 'png', '-disposition:v', 'attached_pic'] # PNG cover
|
771 |
else:
|
772 |
-
ffmpeg_cmd += ['-c:v', 'copy', '-disposition:v', 'attached_pic'] # JPEG cover (no re-encoding needed)
|
773 |
-
|
774 |
if ffmpeg_cover is not None and ffmpeg_cover.endswith('.png'):
|
775 |
-
ffmpeg_cmd += ['-pix_fmt', 'yuv420p']
|
776 |
-
|
777 |
ffmpeg_cmd += ['-movflags', '+faststart', '-y', ffmpeg_final_file]
|
778 |
-
|
779 |
if session['script_mode'] == DOCKER_UTILS:
|
780 |
try:
|
781 |
container = session['client'].containers.run(
|
@@ -790,7 +767,6 @@ def combine_audio_chapters(session):
|
|
790 |
print(container.decode('utf-8'))
|
791 |
if shutil.copy(docker_final_file, final_file):
|
792 |
return True
|
793 |
-
|
794 |
return False
|
795 |
except docker.errors.ContainerError as e:
|
796 |
raise DependencyError(e)
|
@@ -811,7 +787,7 @@ def combine_audio_chapters(session):
|
|
811 |
try:
|
812 |
chapter_files = [f for f in os.listdir(session['chapters_dir']) if f.endswith(".wav")]
|
813 |
chapter_files = sorted(chapter_files, key=lambda x: int(re.search(r'\d+', x).group()))
|
814 |
-
assembled_audio = os.path.join(session['tmp_dir'], '
|
815 |
metadata_file = os.path.join(session['tmp_dir'], 'metadata.txt')
|
816 |
if assemble_audio():
|
817 |
if generate_ffmpeg_metadata():
|
@@ -885,7 +861,7 @@ def delete_old_web_folders(root_dir):
|
|
885 |
os.makedirs(root_dir)
|
886 |
print(f'Created missing directory: {root_dir}')
|
887 |
current_time = time.time()
|
888 |
-
age_limit = current_time -
|
889 |
for folder_name in os.listdir(root_dir):
|
890 |
dir_path = os.path.join(root_dir, folder_name)
|
891 |
if os.path.isdir(dir_path) and folder_name.startswith('web-'):
|
@@ -923,7 +899,6 @@ def convert_ebook(args):
|
|
923 |
pass
|
924 |
|
925 |
if args['language'] is not None and args['language'] in language_mapping.keys():
|
926 |
-
context = ConversionContext()
|
927 |
session_id = args['session'] if args['session'] is not None else str(uuid.uuid4())
|
928 |
session = context.get_session(session_id)
|
929 |
session['id'] = session_id
|
@@ -941,13 +916,8 @@ def convert_ebook(args):
|
|
941 |
top_p = args['top_p']
|
942 |
speed = args['speed']
|
943 |
enable_text_splitting = args['enable_text_splitting'] if args['enable_text_splitting'] is not None else True
|
944 |
-
custom_model_file = args['custom_model']
|
945 |
-
|
946 |
-
fine_tuned = args['fine_tuned'] if check_fine_tuned(args['fine_tuned'], args['language']) else False
|
947 |
-
|
948 |
-
if fine_tuned:
|
949 |
-
print(f"Ensuring fine-tuned model '{fine_tuned}' is ready...")
|
950 |
-
download_fine_tuned_model(fine_tuned)
|
951 |
|
952 |
if not fine_tuned:
|
953 |
raise ValueError('The fine tuned model does not exist.')
|
@@ -966,24 +936,19 @@ def convert_ebook(args):
|
|
966 |
session['client'] = docker.from_env()
|
967 |
|
968 |
session['tmp_dir'] = os.path.join(processes_dir, f"ebook-{session['id']}")
|
969 |
-
session['chapters_dir'] = os.path.join(session['tmp_dir'], f
|
970 |
session['chapters_dir_sentences'] = os.path.join(session['chapters_dir'], 'sentences')
|
971 |
|
972 |
if not is_gui_process:
|
973 |
print(f'*********** Session: {session_id}', '************* Store it in case of interruption or crash you can resume the conversion')
|
|
|
|
|
|
|
|
|
|
|
974 |
|
975 |
if prepare_dirs(args['ebook'], session):
|
976 |
session['filename_noext'] = os.path.splitext(os.path.basename(session['src']))[0]
|
977 |
-
session['custom_model'] = None
|
978 |
-
if custom_model_file or custom_model_url:
|
979 |
-
custom_model_dir = os.path.join(models_dir,'__sessions',f"model-{session['id']}")
|
980 |
-
if os.isdir(custom_model_dir):
|
981 |
-
shutil.rmtree(custom_model_dir)
|
982 |
-
if custom_model_url:
|
983 |
-
print(f'Get custom model: {custom_model_url}')
|
984 |
-
session['custom_model'] = download_custom_model(custom_model_url, custom_model_dir, session)
|
985 |
-
else:
|
986 |
-
session['custom_model'] = extract_custom_model(custom_model_file, custom_model_dir, session)
|
987 |
if not torch.cuda.is_available() or device == 'cpu':
|
988 |
if device == 'gpu':
|
989 |
print('GPU is not available on your device!')
|
@@ -1030,7 +995,6 @@ def convert_ebook(args):
|
|
1030 |
final_file = combine_audio_chapters(session)
|
1031 |
if final_file is not None:
|
1032 |
progress_status = f'Audiobook {os.path.basename(final_file)} created!'
|
1033 |
-
print(f"Temporary directory {session['tmp_dir']} removed successfully.")
|
1034 |
return progress_status, final_file
|
1035 |
else:
|
1036 |
error = 'combine_audio_chapters() error: final_file not created!'
|
@@ -1045,10 +1009,11 @@ def convert_ebook(args):
|
|
1045 |
else:
|
1046 |
error = 'convert_to_epub() failed!'
|
1047 |
else:
|
1048 |
-
error = f
|
1049 |
-
|
1050 |
else:
|
1051 |
error = f"Language {args['language']} is not supported."
|
|
|
|
|
1052 |
print(error)
|
1053 |
return error, None
|
1054 |
except Exception as e:
|
@@ -1070,6 +1035,7 @@ def web_interface(args):
|
|
1070 |
)
|
1071 |
for lang, details in language_mapping.items()
|
1072 |
]
|
|
|
1073 |
fine_tuned_options = list(models['xtts'].keys())
|
1074 |
default_language_name = next((name for name, key in language_options if key == default_language_code), None)
|
1075 |
|
@@ -1080,7 +1046,7 @@ def web_interface(args):
|
|
1080 |
radius_size='lg',
|
1081 |
font_mono=['JetBrains Mono', 'monospace', 'Consolas', 'Menlo', 'Liberation Mono']
|
1082 |
)
|
1083 |
-
|
1084 |
with gr.Blocks(theme=theme) as interface:
|
1085 |
gr.HTML(
|
1086 |
'''
|
@@ -1123,9 +1089,12 @@ def web_interface(args):
|
|
1123 |
padding: 0 !important;
|
1124 |
margin: 0 !important;
|
1125 |
}
|
1126 |
-
#component-7, #component-
|
1127 |
height: 140px !important;
|
1128 |
}
|
|
|
|
|
|
|
1129 |
</style>
|
1130 |
'''
|
1131 |
)
|
@@ -1133,26 +1102,37 @@ def web_interface(args):
|
|
1133 |
f'''
|
1134 |
# Ebook2Audiobook v{version}<br/>
|
1135 |
https://github.com/DrewThomasson/ebook2audiobook<br/>
|
1136 |
-
Convert eBooks into immersive audiobooks with realistic voice TTS models
|
|
|
1137 |
'''
|
1138 |
)
|
1139 |
with gr.Tabs():
|
1140 |
-
|
|
|
1141 |
with gr.Row():
|
1142 |
with gr.Column(scale=3):
|
1143 |
-
gr_ebook_file = gr.File(label='eBook File (.epub, .mobi, .azw3, fb2, lrf, rb, snb, tcr, .pdf, .txt, .rtf, doc, .docx, .html, .odt, .azw)', file_types=['.epub', '.mobi', '.azw3', 'fb2', 'lrf', 'rb', 'snb', 'tcr', '.pdf', '.txt', '.rtf', 'doc', '.docx', '.html', '.odt', '.azw'])
|
1144 |
-
gr_device = gr.Radio(label='Processor Unit', choices=['CPU', 'GPU'], value='CPU')
|
1145 |
-
gr_language = gr.Dropdown(label='Language', choices=[name for name, _ in language_options], value=default_language_name)
|
1146 |
-
with gr.Column(scale=3):
|
1147 |
with gr.Group():
|
1148 |
-
|
1149 |
-
|
1150 |
-
|
1151 |
gr.Markdown('<p> * Optional</p>')
|
1152 |
with gr.Group():
|
1153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1154 |
gr_fine_tuned = gr.Dropdown(label='Fine Tuned Models', choices=fine_tuned_options, value=default_fine_tuned, interactive=True)
|
1155 |
-
|
|
|
1156 |
gr.Markdown(
|
1157 |
'''
|
1158 |
### Customize Audio Generation Parameters
|
@@ -1214,7 +1194,6 @@ def web_interface(args):
|
|
1214 |
)
|
1215 |
|
1216 |
gr_state = gr.State(value="") # Initialize state for each user session
|
1217 |
-
gr_session_status = gr.Textbox(label='Session')
|
1218 |
gr_session = gr.Textbox(label='Session', visible=False)
|
1219 |
gr_conversion_progress = gr.Textbox(label='Progress')
|
1220 |
gr_convert_btn = gr.Button('Convert', variant='primary', interactive=False)
|
@@ -1286,7 +1265,7 @@ def web_interface(args):
|
|
1286 |
def update_interface():
|
1287 |
nonlocal is_converting
|
1288 |
is_converting = False
|
1289 |
-
return gr.update('Convert', variant='primary', interactive=False),
|
1290 |
|
1291 |
def refresh_audiobook_list():
|
1292 |
files = []
|
@@ -1303,59 +1282,102 @@ def web_interface(args):
|
|
1303 |
return link, link, gr.update(visible=True)
|
1304 |
return None, None, gr.update(visible=False)
|
1305 |
|
1306 |
-
def
|
1307 |
-
|
|
|
|
|
|
|
|
|
|
|
1308 |
|
1309 |
def update_audiobooks_ddn():
|
1310 |
files = refresh_audiobook_list()
|
1311 |
return gr.update(choices=files, label='Audiobooks', value=files[0] if files else None)
|
1312 |
|
1313 |
-
async def change_gr_ebook_file(
|
1314 |
nonlocal is_converting
|
1315 |
if context and session_id:
|
1316 |
session = context.get_session(session_id)
|
1317 |
if f is None:
|
1318 |
if is_converting:
|
1319 |
session['cancellation_requested'] = True
|
1320 |
-
yield
|
1321 |
return
|
1322 |
-
|
1323 |
-
|
1324 |
-
yield gr.update(interactive=False), hide_modal()
|
1325 |
-
return
|
1326 |
-
else:
|
1327 |
-
session['cancellation_requested'] = False
|
1328 |
-
yield gr.update(interactive=bool(f)), hide_modal()
|
1329 |
return
|
1330 |
|
1331 |
-
def change_gr_language(selected: str):
|
|
|
1332 |
if selected == 'zzzz':
|
1333 |
new_language_name = default_language_name
|
1334 |
new_language_key = default_language_code
|
1335 |
else:
|
1336 |
new_language_name, new_language_key = next(((name, key) for name, key in language_options if key == selected), (None, None))
|
1337 |
-
|
1338 |
-
# Determine the TTS engine to use
|
1339 |
-
tts_engine_value = 'xtts' if language_xtts.get(new_language_key, False) else 'fairseq'
|
1340 |
-
|
1341 |
-
# Get fine-tuned options filtered by language
|
1342 |
fine_tuned_options = [
|
1343 |
model_name
|
1344 |
-
for model_name, model_details in models.get(
|
1345 |
if model_details.get('lang') == 'multi' or model_details.get('lang') == new_language_key
|
1346 |
]
|
1347 |
-
|
1348 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
1349 |
return (
|
1350 |
-
gr.update(value=new_language_name),
|
1351 |
-
gr.update(value=
|
1352 |
-
gr.update(choices=fine_tuned_options, value=fine_tuned_options[0] if fine_tuned_options else
|
|
|
1353 |
)
|
1354 |
|
1355 |
-
def
|
1356 |
-
|
1357 |
-
|
1358 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1359 |
|
1360 |
def change_gr_data(data):
|
1361 |
data['event'] = 'change_data'
|
@@ -1363,9 +1385,8 @@ def web_interface(args):
|
|
1363 |
|
1364 |
def change_gr_read_data(data):
|
1365 |
nonlocal audiobooks_dir
|
|
|
1366 |
warning_text_extra = ''
|
1367 |
-
if is_gui_shared:
|
1368 |
-
warning_text_extra = f' Note: access limit time: {gradio_shared_expire} hours'
|
1369 |
if not data:
|
1370 |
data = {'session_id': str(uuid.uuid4())}
|
1371 |
warning_text = f"Session: {data['session_id']}"
|
@@ -1375,17 +1396,23 @@ def web_interface(args):
|
|
1375 |
warning_text = data['session_id']
|
1376 |
event = data.get('event', '')
|
1377 |
if event != 'load':
|
1378 |
-
return [gr.update(), gr.update(), gr.update()]
|
|
|
|
|
|
|
|
|
|
|
1379 |
if is_gui_shared:
|
|
|
1380 |
audiobooks_dir = os.path.join(audiobooks_gradio_dir, f"web-{data['session_id']}")
|
1381 |
delete_old_web_folders(audiobooks_gradio_dir)
|
1382 |
else:
|
1383 |
audiobooks_dir = os.path.join(audiobooks_host_dir, f"web-{data['session_id']}")
|
1384 |
-
return [data, f'{warning_text}{warning_text_extra}', data['session_id'], update_audiobooks_ddn()]
|
1385 |
|
1386 |
-
def
|
1387 |
session, device, ebook_file, voice_file, language,
|
1388 |
-
custom_model_file,
|
1389 |
repetition_penalty, top_k, top_p, speed, enable_text_splitting, fine_tuned
|
1390 |
):
|
1391 |
nonlocal is_converting
|
@@ -1399,8 +1426,7 @@ def web_interface(args):
|
|
1399 |
"audiobooks_dir": audiobooks_dir,
|
1400 |
"voice": voice_file.name if voice_file else None,
|
1401 |
"language": next((key for name, key in language_options if name == language), None),
|
1402 |
-
"custom_model":
|
1403 |
-
"custom_model_url": custom_model_url if custom_model_file is None else None,
|
1404 |
"temperature": float(temperature),
|
1405 |
"length_penalty": float(length_penalty),
|
1406 |
"repetition_penalty": float(repetition_penalty),
|
@@ -1412,33 +1438,34 @@ def web_interface(args):
|
|
1412 |
}
|
1413 |
|
1414 |
if args["ebook"] is None:
|
1415 |
-
return 'Error: a file is required.'
|
1416 |
|
1417 |
try:
|
1418 |
is_converting = True
|
1419 |
progress_status, audiobook_file = convert_ebook(args)
|
1420 |
-
is_converting = False
|
1421 |
-
|
1422 |
if audiobook_file is None:
|
1423 |
if is_converting:
|
1424 |
-
return 'Conversion cancelled.'
|
1425 |
else:
|
1426 |
-
return 'Conversion failed.'
|
1427 |
else:
|
1428 |
-
return progress_status
|
1429 |
except Exception as e:
|
1430 |
-
is_converting = False
|
1431 |
return DependencyError(e)
|
1432 |
|
1433 |
gr_ebook_file.change(
|
|
|
|
|
|
|
|
|
1434 |
fn=change_gr_ebook_file,
|
1435 |
-
inputs=[
|
1436 |
-
outputs=[
|
1437 |
)
|
1438 |
gr_language.change(
|
1439 |
-
lambda selected: change_gr_language(dict(language_options).get(selected, 'Unknown')),
|
1440 |
-
inputs=gr_language,
|
1441 |
-
outputs=[gr_language, gr_tts_engine, gr_fine_tuned]
|
1442 |
)
|
1443 |
gr_audiobooks_ddn.change(
|
1444 |
fn=change_gr_audiobooks_ddn,
|
@@ -1447,8 +1474,18 @@ def web_interface(args):
|
|
1447 |
)
|
1448 |
gr_custom_model_file.change(
|
1449 |
fn=change_gr_custom_model_file,
|
1450 |
-
inputs=gr_custom_model_file,
|
1451 |
-
outputs=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1452 |
)
|
1453 |
gr_session.change(
|
1454 |
fn=change_gr_data,
|
@@ -1469,24 +1506,20 @@ def web_interface(args):
|
|
1469 |
gr_read_data.change(
|
1470 |
fn=change_gr_read_data,
|
1471 |
inputs=gr_read_data,
|
1472 |
-
outputs=[gr_data, gr_session_status, gr_session, gr_audiobooks_ddn]
|
1473 |
)
|
1474 |
gr_convert_btn.click(
|
1475 |
-
fn=
|
1476 |
-
inputs=None,
|
1477 |
-
outputs=gr_convert_btn
|
1478 |
-
).then(
|
1479 |
-
fn=process_conversion,
|
1480 |
inputs=[
|
1481 |
gr_session, gr_device, gr_ebook_file, gr_voice_file, gr_language,
|
1482 |
-
|
1483 |
gr_repetition_penalty, gr_top_k, gr_top_p, gr_speed, gr_enable_text_splitting, gr_fine_tuned
|
1484 |
],
|
1485 |
-
outputs=
|
1486 |
).then(
|
1487 |
fn=update_interface,
|
1488 |
inputs=None,
|
1489 |
-
outputs=[gr_convert_btn, gr_ebook_file, gr_audio_player, gr_audiobooks_ddn]
|
1490 |
)
|
1491 |
interface.load(
|
1492 |
fn=None,
|
@@ -1506,7 +1539,7 @@ def web_interface(args):
|
|
1506 |
)
|
1507 |
|
1508 |
try:
|
1509 |
-
interface.queue(default_concurrency_limit=
|
1510 |
except OSError as e:
|
1511 |
print(f'Connection error: {e}')
|
1512 |
except socket.error as e:
|
|
|
4 |
import ebooklib
|
5 |
import gradio as gr
|
6 |
import hashlib
|
7 |
+
import json
|
8 |
import numpy as np
|
9 |
import os
|
10 |
import regex as re
|
|
|
28 |
from datetime import datetime
|
29 |
from ebooklib import epub
|
30 |
from glob import glob
|
31 |
+
from huggingface_hub import hf_hub_download
|
32 |
from iso639 import languages
|
33 |
from multiprocessing import Manager, Event
|
34 |
from pydub import AudioSegment
|
|
|
42 |
import lib.conf as conf
|
43 |
import lib.lang as lang
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
def inject_configs(target_namespace):
|
46 |
# Extract variables from both modules and inject them into the target namespace
|
47 |
for module in (conf, lang):
|
|
|
81 |
self.sessions[session_id] = recursive_proxy({
|
82 |
"script_mode": NATIVE,
|
83 |
"client": None,
|
84 |
+
"language": default_language_code,
|
85 |
"audiobooks_dir": None,
|
86 |
"tmp_dir": None,
|
87 |
"src": None,
|
|
|
94 |
"fine_tuned": None,
|
95 |
"voice_file": None,
|
96 |
"custom_model": None,
|
97 |
+
"custom_model_dir": None,
|
98 |
"chapters": None,
|
99 |
"cover": None,
|
100 |
"metadata": {
|
|
|
122 |
}, manager=self.manager)
|
123 |
return self.sessions[session_id]
|
124 |
|
125 |
+
context = ConversionContext()
|
126 |
is_gui_process = False
|
127 |
|
128 |
class DependencyError(Exception):
|
|
|
142 |
if not is_gui_process:
|
143 |
sys.exit(1)
|
144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
def prepare_dirs(src, session):
|
146 |
try:
|
147 |
resume = False
|
148 |
+
os.makedirs(os.path.join(models_dir,'tts'), exist_ok=True)
|
149 |
os.makedirs(session['tmp_dir'], exist_ok=True)
|
150 |
+
os.makedirs(session['custom_model_dir'], exist_ok=True)
|
151 |
os.makedirs(session['audiobooks_dir'], exist_ok=True)
|
152 |
session['src'] = os.path.join(session['tmp_dir'], os.path.basename(src))
|
153 |
if os.path.exists(session['src']):
|
|
|
188 |
except Exception as e:
|
189 |
raise RuntimeError(e)
|
190 |
|
191 |
+
def analyze_uploaded_file(zip_path, required_files=None):
|
192 |
+
if required_files is None:
|
193 |
+
required_files = default_model_files
|
194 |
+
executable_extensions = {'.exe', '.bat', '.cmd', '.bash', '.bin', '.sh', '.msi', '.dll', '.com'}
|
195 |
try:
|
196 |
+
with zipfile.ZipFile(zip_path, 'r') as zf:
|
197 |
+
files_in_zip = set()
|
198 |
+
executables_found = False
|
199 |
+
for file_info in zf.infolist():
|
200 |
+
file_name = file_info.filename
|
201 |
+
if file_info.is_dir():
|
202 |
+
continue # Skip directories
|
203 |
+
base_name = os.path.basename(file_name)
|
204 |
+
files_in_zip.add(base_name)
|
205 |
+
_, ext = os.path.splitext(base_name.lower())
|
206 |
+
if ext in executable_extensions:
|
207 |
+
executables_found = True
|
208 |
+
break
|
209 |
+
missing_files = [f for f in required_files if f not in files_in_zip]
|
210 |
+
is_valid = not executables_found and not missing_files
|
211 |
+
return is_valid,
|
212 |
+
except zipfile.BadZipFile:
|
213 |
+
raise ValueError("error: The file is not a valid ZIP archive.")
|
214 |
except Exception as e:
|
215 |
+
raise RuntimeError(f'analyze_uploaded_file(): {e}')
|
216 |
+
|
217 |
+
async def extract_custom_model(file_src, dest=None, session=None, required_files=None):
|
218 |
try:
|
219 |
+
progress_bar = None
|
220 |
+
if is_gui_process:
|
221 |
+
progress_bar = gr.Progress(track_tqdm=True)
|
222 |
+
if dest is None:
|
223 |
+
dest = session['custom_model_dir'] = os.path.join(models_dir, '__sessions', f"model-{session['id']}")
|
224 |
+
os.makedirs(dest, exist_ok=True)
|
225 |
+
if required_files is None:
|
226 |
+
required_files = default_model_files
|
227 |
+
|
228 |
+
dir_src = os.path.dirname(file_src)
|
229 |
+
dir_name = os.path.basename(file_src).replace('.zip', '')
|
230 |
+
|
231 |
+
with zipfile.ZipFile(file_src, 'r') as zip_ref:
|
232 |
files = zip_ref.namelist()
|
233 |
+
files_length = len(files)
|
234 |
+
dir_tts = 'fairseq'
|
235 |
+
xtts_config = 'config.json'
|
236 |
+
|
237 |
+
# Check the model type
|
238 |
+
config_data = {}
|
239 |
+
if xtts_config in zip_ref.namelist():
|
240 |
+
with zip_ref.open(xtts_config) as file:
|
241 |
+
config_data = json.load(file)
|
242 |
+
if config_data.get('model') == 'xtts':
|
243 |
+
dir_tts = 'xtts'
|
244 |
+
|
245 |
+
dir_dest = os.path.join(dest, dir_tts, dir_name)
|
246 |
+
os.makedirs(dir_dest, exist_ok=True)
|
247 |
+
|
248 |
+
# Initialize progress bar
|
249 |
+
with tqdm(total=100, unit='%') as t: # Track progress as a percentage
|
250 |
+
for i, file in enumerate(files):
|
251 |
+
if file in required_files:
|
252 |
+
zip_ref.extract(file, dir_dest)
|
253 |
+
progress_percentage = ((i + 1) / files_length) * 100
|
254 |
+
t.n = int(progress_percentage)
|
255 |
+
t.refresh()
|
256 |
+
if progress_bar is not None:
|
257 |
+
progress_bar(downloaded / total_size)
|
258 |
+
yield dir_name, progress_bar
|
259 |
+
|
260 |
+
os.remove(file_src)
|
261 |
+
print(f'Extracted files to {dir_dest}')
|
262 |
+
yield dir_name, progress_bar
|
263 |
+
return
|
264 |
except Exception as e:
|
265 |
raise DependencyError(e)
|
266 |
|
|
|
288 |
def convert_to_epub(session):
|
289 |
if session['cancellation_requested']:
|
290 |
stop_and_detach_tts()
|
291 |
+
print('Cancel requested')
|
292 |
+
return False
|
293 |
if session['script_mode'] == DOCKER_UTILS:
|
294 |
try:
|
295 |
docker_dir = os.path.basename(session['tmp_dir'])
|
|
|
329 |
|
330 |
def get_cover(session):
|
331 |
try:
|
332 |
+
if session['cancellation_requested']:
|
333 |
+
stop_and_detach_tts()
|
334 |
+
print('Cancel requested')
|
335 |
+
return False
|
336 |
cover_image = False
|
337 |
cover_path = os.path.join(session['tmp_dir'], session['filename_noext'] + '.jpg')
|
338 |
for item in session['epub'].get_items_of_type(ebooklib.ITEM_COVER):
|
|
|
355 |
try:
|
356 |
if session['cancellation_requested']:
|
357 |
stop_and_detach_tts()
|
358 |
+
print('Cancel requested')
|
359 |
+
return False
|
360 |
all_docs = list(session['epub'].get_items_of_type(ebooklib.ITEM_DOCUMENT))
|
361 |
if all_docs:
|
362 |
all_docs = all_docs[1:]
|
363 |
doc_patterns = [filter_pattern(str(doc)) for doc in all_docs if filter_pattern(str(doc))]
|
364 |
most_common_pattern = filter_doc(doc_patterns)
|
365 |
selected_docs = [doc for doc in all_docs if filter_pattern(str(doc)) == most_common_pattern]
|
366 |
+
chapters = [filter_chapter(doc, language) for doc in selected_docs]
|
367 |
if session['metadata'].get('creator'):
|
368 |
+
intro = f"{session['metadata']['creator']}, {session['metadata']['title']};\n "
|
369 |
chapters[0].insert(0, intro)
|
370 |
return chapters
|
371 |
return False
|
|
|
390 |
return 'numbers'
|
391 |
return None
|
392 |
|
393 |
+
def filter_chapter(doc, language):
|
|
|
|
|
|
|
|
|
394 |
soup = BeautifulSoup(doc.get_body_content(), 'html.parser')
|
395 |
# Remove scripts and styles
|
396 |
for script in soup(["script", "style"]):
|
397 |
script.decompose()
|
|
|
398 |
# Normalize lines and remove unnecessary spaces
|
399 |
text = re.sub(r'(\r\n|\r|\n){3,}', '\r\n', soup.get_text().strip())
|
400 |
text = replace_roman_numbers(text)
|
|
|
402 |
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
403 |
text = '\n'.join(chunk for chunk in chunks if chunk)
|
404 |
text = text.replace('»', '"').replace('«', '"')
|
|
|
405 |
# Pattern 1: Add a space between UTF-8 characters and numbers
|
406 |
text = re.sub(r'(?<=[\p{L}])(?=\d)|(?<=\d)(?=[\p{L}])', ' ', text)
|
|
|
407 |
# Pattern 2: Split numbers into groups of 4
|
408 |
text = re.sub(r'(\d{4})(?=\d)', r'\1 ', text)
|
409 |
+
chapter_sentences = get_sentences(text, language)
|
|
|
410 |
return chapter_sentences
|
411 |
|
412 |
+
def get_sentences(sentence, language, max_pauses=9):
|
413 |
max_length = language_mapping[language]['char_limit']
|
414 |
punctuation = language_mapping[language]['punctuation']
|
415 |
+
sentence = sentence.replace(".", ";\n")
|
416 |
parts = []
|
417 |
while len(sentence) > max_length or sum(sentence.count(p) for p in punctuation) > max_pauses:
|
|
|
|
|
|
|
418 |
# Step 1: Look for the last period (.) within max_length
|
419 |
possible_splits = [i for i, char in enumerate(sentence[:max_length]) if char == '.']
|
420 |
# Step 2: If no periods, look for the last comma (,)
|
|
|
446 |
try:
|
447 |
if session['cancellation_requested']:
|
448 |
stop_and_detach_tts()
|
449 |
+
print('Cancel requested')
|
450 |
+
return False
|
451 |
progress_bar = None
|
452 |
params = {}
|
453 |
if is_gui_process:
|
454 |
progress_bar = gr.Progress(track_tqdm=True)
|
455 |
params['tts_model'] = None
|
456 |
'''
|
457 |
+
# List available TTS base models
|
458 |
print("Available Models:")
|
459 |
print("=================")
|
460 |
for index, model in enumerate(XTTS().list_models(), 1):
|
|
|
463 |
if session['metadata']['language'] in language_xtts:
|
464 |
params['tts_model'] = 'xtts'
|
465 |
if session['custom_model'] is not None:
|
466 |
+
print(f"Loading TTS {params['tts_model']} model from {session['custom_model']}...")
|
467 |
+
model_path = os.path.join(session['custom_model'], 'model.pth')
|
468 |
config_path = os.path.join(session['custom_model'],'config.json')
|
469 |
+
vocab_path = os.path.join(session['custom_model'],'vocab.json')
|
470 |
+
voice_path = os.path.join(session['custom_model'],'ref.wav')
|
471 |
+
config = XttsConfig()
|
472 |
+
config.models_dir = os.path.join(models_dir,'tts')
|
473 |
+
config.load_json(config_path)
|
474 |
+
params['tts'] = Xtts.init_from_config(config)
|
475 |
+
params['tts'].load_checkpoint(config, checkpoint_path=model_path, vocab_path=vocab_path, eval=True)
|
476 |
+
print('Computing speaker latents...')
|
477 |
+
params['voice_file'] = session['voice_file'] if session['voice_file'] is not None else voice_path
|
478 |
+
params['gpt_cond_latent'], params['speaker_embedding'] = params['tts'].get_conditioning_latents(audio_path=[params['voice_file']])
|
479 |
+
elif session['fine_tuned'] != 'std':
|
480 |
+
print(f"Loading TTS {params['tts_model']} model from {session['fine_tuned']}...")
|
481 |
+
hf_repo = models[params['tts_model']][session['fine_tuned']]['repo']
|
482 |
+
hf_sub = models[params['tts_model']][session['fine_tuned']]['sub']
|
483 |
+
cache_dir = os.path.join(models_dir,'tts')
|
484 |
+
model_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}/model.pth", cache_dir=cache_dir)
|
485 |
+
config_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}/config.json", cache_dir=cache_dir)
|
486 |
+
vocab_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}/vocab.json", cache_dir=cache_dir)
|
487 |
+
config = XttsConfig()
|
488 |
+
config.models_dir = cache_dir
|
489 |
+
config.load_json(config_path)
|
490 |
+
params['tts'] = Xtts.init_from_config(config)
|
491 |
+
params['tts'].load_checkpoint(config, checkpoint_path=model_path, vocab_path=vocab_path, eval=True)
|
492 |
+
print('Computing speaker latents...')
|
493 |
+
params['voice_file'] = session['voice_file'] if session['voice_file'] is not None else models[params['tts_model']][session['fine_tuned']]['voice']
|
494 |
+
params['gpt_cond_latent'], params['speaker_embedding'] = params['tts'].get_conditioning_latents(audio_path=[params['voice_file']])
|
495 |
else:
|
496 |
+
print(f"Loading TTS {params['tts_model']} model from {models[params['tts_model']][session['fine_tuned']]['repo']}...")
|
497 |
+
params['tts'] = XTTS(model_name=models[params['tts_model']][session['fine_tuned']]['repo'])
|
498 |
+
params['voice_file'] = session['voice_file'] if session['voice_file'] is not None else models[params['tts_model']][session['fine_tuned']]['voice']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
499 |
params['tts'].to(session['device'])
|
|
|
|
|
|
|
500 |
else:
|
501 |
params['tts_model'] = 'fairseq'
|
502 |
+
model_repo = models[params['tts_model']][session['fine_tuned']]['repo'].replace("[lang]", session['metadata']['language'])
|
503 |
+
print(f"Loading TTS {model_repo} model from {model_repo}...")
|
504 |
+
params['tts'] = XTTS(model_repo)
|
505 |
+
params['voice_file'] = session['voice_file'] if session['voice_file'] is not None else models[params['tts_model']][session['fine_tuned']]['voice']
|
506 |
params['tts'].to(session['device'])
|
507 |
|
508 |
resume_chapter = 0
|
509 |
resume_sentence = 0
|
510 |
|
511 |
# Check existing files to resume the process if it was interrupted
|
512 |
+
existing_chapters = sorted([f for f in os.listdir(session['chapters_dir']) if f.endswith(f'.{audioproc_format}')])
|
513 |
+
existing_sentences = sorted([f for f in os.listdir(session['chapters_dir_sentences']) if f.endswith(f'.{audioproc_format}')])
|
514 |
|
515 |
if existing_chapters:
|
516 |
count_chapter_files = len(existing_chapters)
|
|
|
521 |
print(f'Resuming from sentence {resume_sentence}')
|
522 |
|
523 |
total_chapters = len(session['chapters'])
|
524 |
+
total_sentences = sum(len(array) for array in session['chapters'])
|
525 |
current_sentence = 0
|
526 |
|
527 |
+
with tqdm(total=total_sentences, desc='convert_chapters_to_audio 0.00%', bar_format='{desc}: {n_fmt}/{total_fmt} ', unit='step', initial=resume_sentence) as t:
|
528 |
t.n = resume_sentence
|
529 |
t.refresh()
|
530 |
for x in range(resume_chapter, total_chapters):
|
531 |
chapter_num = x + 1
|
532 |
+
chapter_audio_file = f'chapter_{chapter_num}.{audioproc_format}'
|
533 |
sentences = session['chapters'][x]
|
534 |
start = current_sentence # Mark the starting sentence of the chapter
|
535 |
print(f"\nChapter {chapter_num} containing {len(sentences)} sentences...")
|
536 |
for i, sentence in enumerate(sentences):
|
537 |
if current_sentence >= resume_sentence and resume_sentence > 0 or resume_sentence == 0:
|
538 |
+
params['sentence_audio_file'] = os.path.join(session['chapters_dir_sentences'], f'{current_sentence}.{audioproc_format}')
|
|
|
|
|
|
|
539 |
params['sentence'] = sentence
|
|
|
540 |
if convert_sentence_to_audio(params, session):
|
541 |
+
t.update(1)
|
542 |
percentage = (current_sentence / total_sentences) * 100
|
543 |
t.set_description(f'Processing {percentage:.2f}%')
|
544 |
+
print(f'Sentence: {sentence}')
|
545 |
t.refresh()
|
546 |
if progress_bar is not None:
|
547 |
progress_bar(current_sentence / total_sentences)
|
548 |
else:
|
|
|
549 |
return False
|
550 |
current_sentence += 1
|
551 |
end = current_sentence - 1
|
552 |
+
if combine_audio_sentences(chapter_audio_file, start, end, session):
|
553 |
+
print(f'Combining chapter {chapter_num} to audio, sentence {start} to {end}')
|
554 |
+
else:
|
555 |
+
print('combine_audio_sentences() failed!')
|
556 |
+
return False
|
557 |
return True
|
558 |
except Exception as e:
|
559 |
raise DependencyError(e)
|
560 |
|
561 |
def convert_sentence_to_audio(params, session):
|
562 |
try:
|
563 |
+
if session['cancellation_requested']:
|
564 |
+
stop_and_detach_tts(params['tts'])
|
565 |
+
print('Cancel requested')
|
566 |
+
return False
|
567 |
+
generation_params = {
|
568 |
+
"temperature": session['temperature'],
|
569 |
+
"length_penalty": session["length_penalty"],
|
570 |
+
"repetition_penalty": session['repetition_penalty'],
|
571 |
+
"num_beams": int(session['length_penalty']) + 1 if session["length_penalty"] > 1 else 1,
|
572 |
+
"top_k": session['top_k'],
|
573 |
+
"top_p": session['top_p'],
|
574 |
+
"speed": session['speed'],
|
575 |
+
"enable_text_splitting": session['enable_text_splitting']
|
576 |
+
}
|
577 |
if params['tts_model'] == 'xtts':
|
578 |
+
if session['custom_model'] is not None or session['fine_tuned'] != 'std':
|
579 |
+
output = params['tts'].inference(
|
580 |
+
text=params['sentence'],
|
581 |
+
language=session['metadata']['language_iso1'],
|
582 |
+
gpt_cond_latent=params['gpt_cond_latent'],
|
583 |
+
speaker_embedding=params['speaker_embedding'],
|
584 |
+
**generation_params
|
585 |
+
)
|
586 |
+
torchaudio.save(
|
587 |
+
params['sentence_audio_file'],
|
588 |
+
torch.tensor(output[audioproc_format]).unsqueeze(0),
|
589 |
+
sample_rate=24000
|
590 |
+
)
|
591 |
+
else:
|
592 |
+
params['tts'].tts_to_file(
|
593 |
+
text=params['sentence'],
|
594 |
+
language=session['metadata']['language_iso1'],
|
595 |
+
file_path=params['sentence_audio_file'],
|
596 |
+
speaker_wav=params['voice_file'],
|
597 |
+
**generation_params
|
598 |
+
)
|
599 |
elif params['tts_model'] == 'fairseq':
|
600 |
params['tts'].tts_with_vc_to_file(
|
601 |
text=params['sentence'],
|
|
|
|
|
602 |
file_path=params['sentence_audio_file'],
|
603 |
+
speaker_wav=params['voice_file'].replace('_24khz','_16khz'),
|
604 |
split_sentences=session['enable_text_splitting']
|
605 |
)
|
606 |
if os.path.exists(params['sentence_audio_file']):
|
|
|
613 |
def combine_audio_sentences(chapter_audio_file, start, end, session):
|
614 |
try:
|
615 |
chapter_audio_file = os.path.join(session['chapters_dir'], chapter_audio_file)
|
616 |
+
combined_audio = AudioSegment.empty()
|
|
|
617 |
# Get all audio sentence files sorted by their numeric indices
|
618 |
sentence_files = [f for f in os.listdir(session['chapters_dir_sentences']) if f.endswith(".wav")]
|
619 |
sentences_dir_ordered = sorted(sentence_files, key=lambda x: int(re.search(r'\d+', x).group()))
|
|
|
620 |
# Filter the files in the range [start, end]
|
621 |
selected_files = [
|
622 |
file for file in sentences_dir_ordered
|
623 |
if start <= int(''.join(filter(str.isdigit, os.path.basename(file)))) <= end
|
624 |
]
|
|
|
625 |
for file in selected_files:
|
626 |
+
if session['cancellation_requested']:
|
627 |
+
stop_and_detach_tts(params['tts'])
|
628 |
+
print('Cancel requested')
|
629 |
+
return False
|
630 |
if session['cancellation_requested']:
|
631 |
msg = 'Cancel requested'
|
632 |
raise ValueError(msg)
|
633 |
+
audio_segment = AudioSegment.from_file(os.path.join(session['chapters_dir_sentences'],file), format=audioproc_format)
|
634 |
combined_audio += audio_segment
|
635 |
+
combined_audio.export(chapter_audio_file, format=audioproc_format)
|
|
|
636 |
print(f'Combined audio saved to {chapter_audio_file}')
|
637 |
+
return True
|
638 |
except Exception as e:
|
639 |
raise DependencyError(e)
|
640 |
|
|
|
650 |
batch_size = 256
|
651 |
# Process the chapter files in batches
|
652 |
for i in range(0, len(chapter_files), batch_size):
|
|
|
|
|
|
|
|
|
653 |
batch_files = chapter_files[i:i + batch_size]
|
654 |
batch_audio = AudioSegment.empty() # Initialize an empty AudioSegment for the batch
|
|
|
655 |
# Sequentially append each file in the current batch to the batch_audio
|
656 |
for chapter_file in batch_files:
|
657 |
if session['cancellation_requested']:
|
658 |
+
print('Cancel requested')
|
659 |
+
return False
|
|
|
660 |
audio_segment = AudioSegment.from_wav(os.path.join(session['chapters_dir'],chapter_file))
|
661 |
batch_audio += audio_segment
|
|
|
662 |
combined_audio += batch_audio
|
663 |
+
combined_audio.export(assembled_audio, format=audioproc_format)
|
|
|
664 |
print(f'Combined audio saved to {assembled_audio}')
|
665 |
return True
|
666 |
except Exception as e:
|
|
|
668 |
|
669 |
def generate_ffmpeg_metadata():
|
670 |
try:
|
671 |
+
if session['cancellation_requested']:
|
672 |
+
print('Cancel requested')
|
673 |
+
return False
|
674 |
ffmpeg_metadata = ';FFMETADATA1\n'
|
675 |
if session['metadata'].get('title'):
|
676 |
ffmpeg_metadata += f"title={session['metadata']['title']}\n"
|
|
|
701 |
mobi_asin = session['metadata']['identifiers'].get('mobi-asin', None)
|
702 |
if mobi_asin:
|
703 |
ffmpeg_metadata += f'asin={mobi_asin}\n' # ASIN
|
|
|
704 |
start_time = 0
|
705 |
for index, chapter_file in enumerate(chapter_files):
|
706 |
if session['cancellation_requested']:
|
|
|
711 |
ffmpeg_metadata += f'[CHAPTER]\nTIMEBASE=1/1000\nSTART={start_time}\n'
|
712 |
ffmpeg_metadata += f'END={start_time + duration_ms}\ntitle=Chapter {index + 1}\n'
|
713 |
start_time += duration_ms
|
|
|
714 |
# Write the metadata to the file
|
715 |
with open(metadata_file, 'w', encoding='utf-8') as file:
|
716 |
file.write(ffmpeg_metadata)
|
|
|
720 |
|
721 |
def export_audio():
|
722 |
try:
|
723 |
+
if session['cancellation_requested']:
|
724 |
+
print('Cancel requested')
|
725 |
+
return False
|
726 |
ffmpeg_cover = None
|
727 |
if session['script_mode'] == DOCKER_UTILS:
|
728 |
docker_dir = os.path.basename(session['tmp_dir'])
|
|
|
730 |
ffmpeg_metadata_file = f'/files/{docker_dir}/' + os.path.basename(metadata_file)
|
731 |
ffmpeg_final_file = f'/files/{docker_dir}/' + os.path.basename(docker_final_file)
|
732 |
if session['cover'] is not None:
|
733 |
+
ffmpeg_cover = f'/files/{docker_dir}/' + os.path.basename(session['cover'])
|
|
|
734 |
ffmpeg_cmd = ['ffmpeg', '-i', ffmpeg_combined_audio, '-i', ffmpeg_metadata_file]
|
735 |
else:
|
736 |
ffmpeg_combined_audio = assembled_audio
|
737 |
ffmpeg_metadata_file = metadata_file
|
738 |
ffmpeg_final_file = final_file
|
739 |
if session['cover'] is not None:
|
740 |
+
ffmpeg_cover = session['cover']
|
|
|
741 |
ffmpeg_cmd = [shutil.which('ffmpeg'), '-i', ffmpeg_combined_audio, '-i', ffmpeg_metadata_file]
|
|
|
742 |
if ffmpeg_cover is not None:
|
743 |
ffmpeg_cmd += ['-i', ffmpeg_cover, '-map', '0:a', '-map', '2:v']
|
744 |
else:
|
745 |
ffmpeg_cmd += ['-map', '0:a']
|
746 |
+
ffmpeg_cmd += ['-map_metadata', '1', '-c:a', 'aac', '-b:a', '128k', '-ar', '44100']
|
|
|
|
|
747 |
if ffmpeg_cover is not None:
|
748 |
if ffmpeg_cover.endswith('.png'):
|
749 |
ffmpeg_cmd += ['-c:v', 'png', '-disposition:v', 'attached_pic'] # PNG cover
|
750 |
else:
|
751 |
+
ffmpeg_cmd += ['-c:v', 'copy', '-disposition:v', 'attached_pic'] # JPEG cover (no re-encoding needed)
|
|
|
752 |
if ffmpeg_cover is not None and ffmpeg_cover.endswith('.png'):
|
753 |
+
ffmpeg_cmd += ['-pix_fmt', 'yuv420p']
|
754 |
+
ffmpeg_cmd += ['-af', 'agate=threshold=-33dB:ratio=2:attack=5:release=100,acompressor=threshold=-20dB:ratio=2.5:attack=50:release=200:makeup=0dB,loudnorm=I=-19:TP=-3:LRA=7:linear=true']
|
755 |
ffmpeg_cmd += ['-movflags', '+faststart', '-y', ffmpeg_final_file]
|
|
|
756 |
if session['script_mode'] == DOCKER_UTILS:
|
757 |
try:
|
758 |
container = session['client'].containers.run(
|
|
|
767 |
print(container.decode('utf-8'))
|
768 |
if shutil.copy(docker_final_file, final_file):
|
769 |
return True
|
|
|
770 |
return False
|
771 |
except docker.errors.ContainerError as e:
|
772 |
raise DependencyError(e)
|
|
|
787 |
try:
|
788 |
chapter_files = [f for f in os.listdir(session['chapters_dir']) if f.endswith(".wav")]
|
789 |
chapter_files = sorted(chapter_files, key=lambda x: int(re.search(r'\d+', x).group()))
|
790 |
+
assembled_audio = os.path.join(session['tmp_dir'], session['metadata']['title'] + '.' + audioproc_format)
|
791 |
metadata_file = os.path.join(session['tmp_dir'], 'metadata.txt')
|
792 |
if assemble_audio():
|
793 |
if generate_ffmpeg_metadata():
|
|
|
861 |
os.makedirs(root_dir)
|
862 |
print(f'Created missing directory: {root_dir}')
|
863 |
current_time = time.time()
|
864 |
+
age_limit = current_time - interface_shared_expire * 60 * 60 # 24 hours in seconds
|
865 |
for folder_name in os.listdir(root_dir):
|
866 |
dir_path = os.path.join(root_dir, folder_name)
|
867 |
if os.path.isdir(dir_path) and folder_name.startswith('web-'):
|
|
|
899 |
pass
|
900 |
|
901 |
if args['language'] is not None and args['language'] in language_mapping.keys():
|
|
|
902 |
session_id = args['session'] if args['session'] is not None else str(uuid.uuid4())
|
903 |
session = context.get_session(session_id)
|
904 |
session['id'] = session_id
|
|
|
916 |
top_p = args['top_p']
|
917 |
speed = args['speed']
|
918 |
enable_text_splitting = args['enable_text_splitting'] if args['enable_text_splitting'] is not None else True
|
919 |
+
custom_model_file = args['custom_model'] if args['custom_model'] != 'none' and args['custom_model'] is not None else None
|
920 |
+
fine_tuned = args['fine_tuned'] if check_fine_tuned(args['fine_tuned'], args['language']) else None
|
|
|
|
|
|
|
|
|
|
|
921 |
|
922 |
if not fine_tuned:
|
923 |
raise ValueError('The fine tuned model does not exist.')
|
|
|
936 |
session['client'] = docker.from_env()
|
937 |
|
938 |
session['tmp_dir'] = os.path.join(processes_dir, f"ebook-{session['id']}")
|
939 |
+
session['chapters_dir'] = os.path.join(session['tmp_dir'], f'chapters_{hashlib.md5(args['ebook'].encode()).hexdigest()}')
|
940 |
session['chapters_dir_sentences'] = os.path.join(session['chapters_dir'], 'sentences')
|
941 |
|
942 |
if not is_gui_process:
|
943 |
print(f'*********** Session: {session_id}', '************* Store it in case of interruption or crash you can resume the conversion')
|
944 |
+
session['custom_model_dir'] = os.path.join(models_dir,'__sessions',f"model-{session['id']}")
|
945 |
+
if custom_model_file:
|
946 |
+
session['custom_model'], progression_status = extract_custom_model(custom_model_file, session['custom_model_dir'])
|
947 |
+
if not session['custom_model']:
|
948 |
+
raise ValueError(f'{custom_model_file} could not be extracted or mandatory files are missing')
|
949 |
|
950 |
if prepare_dirs(args['ebook'], session):
|
951 |
session['filename_noext'] = os.path.splitext(os.path.basename(session['src']))[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
952 |
if not torch.cuda.is_available() or device == 'cpu':
|
953 |
if device == 'gpu':
|
954 |
print('GPU is not available on your device!')
|
|
|
995 |
final_file = combine_audio_chapters(session)
|
996 |
if final_file is not None:
|
997 |
progress_status = f'Audiobook {os.path.basename(final_file)} created!'
|
|
|
998 |
return progress_status, final_file
|
999 |
else:
|
1000 |
error = 'combine_audio_chapters() error: final_file not created!'
|
|
|
1009 |
else:
|
1010 |
error = 'convert_to_epub() failed!'
|
1011 |
else:
|
1012 |
+
error = f'Temporary directory {session['tmp_dir']} not removed due to failure.'
|
|
|
1013 |
else:
|
1014 |
error = f"Language {args['language']} is not supported."
|
1015 |
+
if session['cancellation_requested']:
|
1016 |
+
error = 'Cancelled'
|
1017 |
print(error)
|
1018 |
return error, None
|
1019 |
except Exception as e:
|
|
|
1035 |
)
|
1036 |
for lang, details in language_mapping.items()
|
1037 |
]
|
1038 |
+
custom_model_options = None
|
1039 |
fine_tuned_options = list(models['xtts'].keys())
|
1040 |
default_language_name = next((name for name, key in language_options if key == default_language_code), None)
|
1041 |
|
|
|
1046 |
radius_size='lg',
|
1047 |
font_mono=['JetBrains Mono', 'monospace', 'Consolas', 'Menlo', 'Liberation Mono']
|
1048 |
)
|
1049 |
+
|
1050 |
with gr.Blocks(theme=theme) as interface:
|
1051 |
gr.HTML(
|
1052 |
'''
|
|
|
1089 |
padding: 0 !important;
|
1090 |
margin: 0 !important;
|
1091 |
}
|
1092 |
+
#component-7, #component-10, #component-20 {
|
1093 |
height: 140px !important;
|
1094 |
}
|
1095 |
+
#component-47, #component-51 {
|
1096 |
+
height: 100px !important;
|
1097 |
+
}
|
1098 |
</style>
|
1099 |
'''
|
1100 |
)
|
|
|
1102 |
f'''
|
1103 |
# Ebook2Audiobook v{version}<br/>
|
1104 |
https://github.com/DrewThomasson/ebook2audiobook<br/>
|
1105 |
+
Convert eBooks into immersive audiobooks with realistic voice TTS models.<br/>
|
1106 |
+
Multiuser, multiprocessing, multithread on a geo cluster to share the conversion to the Grid.
|
1107 |
'''
|
1108 |
)
|
1109 |
with gr.Tabs():
|
1110 |
+
gr_tab_main = gr.TabItem('Input Options')
|
1111 |
+
with gr_tab_main:
|
1112 |
with gr.Row():
|
1113 |
with gr.Column(scale=3):
|
|
|
|
|
|
|
|
|
1114 |
with gr.Group():
|
1115 |
+
gr_ebook_file = gr.File(label='EBook File (.epub, .mobi, .azw3, fb2, lrf, rb, snb, tcr, .pdf, .txt, .rtf, doc, .docx, .html, .odt, .azw)', file_types=['.epub', '.mobi', '.azw3', 'fb2', 'lrf', 'rb', 'snb', 'tcr', '.pdf', '.txt', '.rtf', 'doc', '.docx', '.html', '.odt', '.azw'])
|
1116 |
+
with gr.Group():
|
1117 |
+
gr_voice_file = gr.File(label='*Cloning Voice (a .wav 24000hz for XTTS base model and 16000hz for FAIRSEQ base model, no more than 6 sec)', file_types=['.wav'], visible=interface_component_options['gr_voice_file'])
|
1118 |
gr.Markdown('<p> * Optional</p>')
|
1119 |
with gr.Group():
|
1120 |
+
gr_device = gr.Radio(label='Processor Unit', choices=['CPU', 'GPU'], value='CPU')
|
1121 |
+
with gr.Group():
|
1122 |
+
gr_language = gr.Dropdown(label='Language', choices=[name for name, _ in language_options], value=default_language_name)
|
1123 |
+
with gr.Column(scale=3):
|
1124 |
+
gr_group_custom_model = gr.Group(visible=interface_component_options['gr_group_custom_model'])
|
1125 |
+
with gr_group_custom_model:
|
1126 |
+
gr_custom_model_file = gr.File(label='*Custom XTTS Model (a .zip containing config.json, vocab.json, model.pth, ref.wav)', file_types=['.zip'])
|
1127 |
+
gr_custom_model_list = gr.Dropdown(label='', choices=['none'], interactive=True)
|
1128 |
+
gr.Markdown('<p> * Optional</p>')
|
1129 |
+
with gr.Group():
|
1130 |
+
gr_session_status = gr.Textbox(label='Session')
|
1131 |
+
with gr.Group():
|
1132 |
+
gr_tts_engine = gr.Dropdown(label='TTS Base', choices=[default_tts_engine], value=default_tts_engine, interactive=True)
|
1133 |
gr_fine_tuned = gr.Dropdown(label='Fine Tuned Models', choices=fine_tuned_options, value=default_fine_tuned, interactive=True)
|
1134 |
+
gr_tab_preferences = gr.TabItem('Audio Generation Preferences', visible=interface_component_options['gr_tab_preferences'])
|
1135 |
+
with gr_tab_preferences:
|
1136 |
gr.Markdown(
|
1137 |
'''
|
1138 |
### Customize Audio Generation Parameters
|
|
|
1194 |
)
|
1195 |
|
1196 |
gr_state = gr.State(value="") # Initialize state for each user session
|
|
|
1197 |
gr_session = gr.Textbox(label='Session', visible=False)
|
1198 |
gr_conversion_progress = gr.Textbox(label='Progress')
|
1199 |
gr_convert_btn = gr.Button('Convert', variant='primary', interactive=False)
|
|
|
1265 |
def update_interface():
|
1266 |
nonlocal is_converting
|
1267 |
is_converting = False
|
1268 |
+
return gr.update('Convert', variant='primary', interactive=False), gr.update(), gr.update(value=audiobook_file), update_audiobooks_ddn(), hide_modal()
|
1269 |
|
1270 |
def refresh_audiobook_list():
|
1271 |
files = []
|
|
|
1282 |
return link, link, gr.update(visible=True)
|
1283 |
return None, None, gr.update(visible=False)
|
1284 |
|
1285 |
+
def update_convert_btn(upload_file, custom_model_file, session_id):
|
1286 |
+
session = context.get_session(session_id)
|
1287 |
+
if hasattr(upload_file, 'name') and not hasattr(custom_model_file, 'name'):
|
1288 |
+
yield gr.update(variant='primary', interactive=True)
|
1289 |
+
else:
|
1290 |
+
yield gr.update(variant='primary', interactive=False)
|
1291 |
+
return
|
1292 |
|
1293 |
def update_audiobooks_ddn():
|
1294 |
files = refresh_audiobook_list()
|
1295 |
return gr.update(choices=files, label='Audiobooks', value=files[0] if files else None)
|
1296 |
|
1297 |
+
async def change_gr_ebook_file(f, session_id):
|
1298 |
nonlocal is_converting
|
1299 |
if context and session_id:
|
1300 |
session = context.get_session(session_id)
|
1301 |
if f is None:
|
1302 |
if is_converting:
|
1303 |
session['cancellation_requested'] = True
|
1304 |
+
yield show_modal('Cancellation requested, please wait...')
|
1305 |
return
|
1306 |
+
session['cancellation_requested'] = False
|
1307 |
+
yield hide_modal()
|
|
|
|
|
|
|
|
|
|
|
1308 |
return
|
1309 |
|
1310 |
+
def change_gr_language(selected: str, session_id: str):
|
1311 |
+
nonlocal custom_model_options
|
1312 |
if selected == 'zzzz':
|
1313 |
new_language_name = default_language_name
|
1314 |
new_language_key = default_language_code
|
1315 |
else:
|
1316 |
new_language_name, new_language_key = next(((name, key) for name, key in language_options if key == selected), (None, None))
|
1317 |
+
tts_engine_options = ['xtts'] if language_xtts.get(new_language_key, False) else ['fairseq']
|
|
|
|
|
|
|
|
|
1318 |
fine_tuned_options = [
|
1319 |
model_name
|
1320 |
+
for model_name, model_details in models.get(tts_engine_options[0], {}).items()
|
1321 |
if model_details.get('lang') == 'multi' or model_details.get('lang') == new_language_key
|
1322 |
]
|
1323 |
+
custom_model_options = ['none']
|
1324 |
+
if context and session_id:
|
1325 |
+
session = context.get_session(session_id)
|
1326 |
+
session['language'] = new_language_key
|
1327 |
+
custom_model_tts = check_custom_model_tts(session)
|
1328 |
+
custom_model_tts_dir = os.path.join(session['custom_model_dir'], custom_model_tts)
|
1329 |
+
if os.path.exists(custom_model_tts_dir):
|
1330 |
+
custom_model_options += os.listdir(custom_model_tts_dir)
|
1331 |
return (
|
1332 |
+
gr.update(value=new_language_name),
|
1333 |
+
gr.update(choices=tts_engine_options, value=tts_engine_options[0]),
|
1334 |
+
gr.update(choices=fine_tuned_options, value=fine_tuned_options[0] if fine_tuned_options else 'none'),
|
1335 |
+
gr.update(choices=custom_model_options, value=custom_model_options[0])
|
1336 |
)
|
1337 |
|
1338 |
+
def check_custom_model_tts(session):
|
1339 |
+
custom_model_tts = 'xtts'
|
1340 |
+
if not language_xtts.get(session['language']):
|
1341 |
+
custom_model_tts = 'fairseq'
|
1342 |
+
custom_model_tts_dir = os.path.join(session['custom_model_dir'], custom_model_tts)
|
1343 |
+
if not os.path.isdir(custom_model_tts_dir):
|
1344 |
+
os.makedirs(custom_model_tts_dir, exist_ok=True)
|
1345 |
+
return custom_model_tts
|
1346 |
+
|
1347 |
+
def change_gr_custom_model_list(custom_model_list):
|
1348 |
+
if custom_model_list == 'none':
|
1349 |
+
return gr.update(visible=True)
|
1350 |
+
return gr.update(visible=False)
|
1351 |
+
|
1352 |
+
async def change_gr_custom_model_file(custom_model_file, session_id):
|
1353 |
+
try:
|
1354 |
+
nonlocal custom_model_options, gr_custom_model_file, gr_conversion_progress
|
1355 |
+
if context and session_id:
|
1356 |
+
session = context.get_session(session_id)
|
1357 |
+
if custom_model_file is not None:
|
1358 |
+
if analyze_uploaded_file(custom_model_file):
|
1359 |
+
session['custom_model'], progress_status = extract_custom_model(custom_model_file, None, session)
|
1360 |
+
if session['custom_model']:
|
1361 |
+
custom_model_tts_dir = check_custom_model_tts(session)
|
1362 |
+
custom_model_options = ['none'] + os.listdir(os.path.join(session['custom_model_dir'], custom_model_tts_dir))
|
1363 |
+
yield (
|
1364 |
+
gr.update(visible=False),
|
1365 |
+
gr.update(choices=custom_model_options, value=session['custom_model']),
|
1366 |
+
gr.update(value=f"{session['custom_model']} added to the custom list")
|
1367 |
+
)
|
1368 |
+
gr_custom_model_file = gr.File(label='*XTTS Model (a .zip containing config.json, vocab.json, model.pth, ref.wav)', value=None, file_types=['.zip'])
|
1369 |
+
return
|
1370 |
+
yield gr.update(), gr.update(), gr.update(value='Invalid file! Please upload a valid ZIP.')
|
1371 |
+
return
|
1372 |
+
except Exception as e:
|
1373 |
+
yield gr.update(), gr.update(), gr.update(value=f'Error: {str(e)}')
|
1374 |
+
return
|
1375 |
+
|
1376 |
+
def change_gr_fine_tuned(fine_tuned):
|
1377 |
+
visible = False
|
1378 |
+
if fine_tuned == 'std':
|
1379 |
+
visible = True
|
1380 |
+
return gr.update(visible=visible)
|
1381 |
|
1382 |
def change_gr_data(data):
|
1383 |
data['event'] = 'change_data'
|
|
|
1385 |
|
1386 |
def change_gr_read_data(data):
|
1387 |
nonlocal audiobooks_dir
|
1388 |
+
nonlocal custom_model_options
|
1389 |
warning_text_extra = ''
|
|
|
|
|
1390 |
if not data:
|
1391 |
data = {'session_id': str(uuid.uuid4())}
|
1392 |
warning_text = f"Session: {data['session_id']}"
|
|
|
1396 |
warning_text = data['session_id']
|
1397 |
event = data.get('event', '')
|
1398 |
if event != 'load':
|
1399 |
+
return [gr.update(), gr.update(), gr.update(), gr.update(), gr.update()]
|
1400 |
+
session = context.get_session(data['session_id'])
|
1401 |
+
session['custom_model_dir'] = os.path.join(models_dir,'__sessions',f"model-{session['id']}")
|
1402 |
+
os.makedirs(session['custom_model_dir'], exist_ok=True)
|
1403 |
+
custom_model_tts_dir = check_custom_model_tts(session)
|
1404 |
+
custom_model_options = ['none'] + os.listdir(os.path.join(session['custom_model_dir'],custom_model_tts_dir))
|
1405 |
if is_gui_shared:
|
1406 |
+
warning_text_extra = f' Note: access limit time: {interface_shared_expire} hours'
|
1407 |
audiobooks_dir = os.path.join(audiobooks_gradio_dir, f"web-{data['session_id']}")
|
1408 |
delete_old_web_folders(audiobooks_gradio_dir)
|
1409 |
else:
|
1410 |
audiobooks_dir = os.path.join(audiobooks_host_dir, f"web-{data['session_id']}")
|
1411 |
+
return [data, f'{warning_text}{warning_text_extra}', data['session_id'], update_audiobooks_ddn(), gr.update(choices=custom_model_options, value='none')]
|
1412 |
|
1413 |
+
def submit_convert_btn(
|
1414 |
session, device, ebook_file, voice_file, language,
|
1415 |
+
custom_model_file, temperature, length_penalty,
|
1416 |
repetition_penalty, top_k, top_p, speed, enable_text_splitting, fine_tuned
|
1417 |
):
|
1418 |
nonlocal is_converting
|
|
|
1426 |
"audiobooks_dir": audiobooks_dir,
|
1427 |
"voice": voice_file.name if voice_file else None,
|
1428 |
"language": next((key for name, key in language_options if name == language), None),
|
1429 |
+
"custom_model": next((key for name, key in language_options if name != 'none'), None),
|
|
|
1430 |
"temperature": float(temperature),
|
1431 |
"length_penalty": float(length_penalty),
|
1432 |
"repetition_penalty": float(repetition_penalty),
|
|
|
1438 |
}
|
1439 |
|
1440 |
if args["ebook"] is None:
|
1441 |
+
return gr.update(value='Error: a file is required.')
|
1442 |
|
1443 |
try:
|
1444 |
is_converting = True
|
1445 |
progress_status, audiobook_file = convert_ebook(args)
|
|
|
|
|
1446 |
if audiobook_file is None:
|
1447 |
if is_converting:
|
1448 |
+
return gr.update(value='Conversion cancelled.')
|
1449 |
else:
|
1450 |
+
return gr.update(value='Conversion failed.')
|
1451 |
else:
|
1452 |
+
return progress_status
|
1453 |
except Exception as e:
|
|
|
1454 |
return DependencyError(e)
|
1455 |
|
1456 |
gr_ebook_file.change(
|
1457 |
+
fn=update_convert_btn,
|
1458 |
+
inputs=[gr_ebook_file, gr_custom_model_file, gr_session],
|
1459 |
+
outputs=gr_convert_btn
|
1460 |
+
).then(
|
1461 |
fn=change_gr_ebook_file,
|
1462 |
+
inputs=[gr_ebook_file, gr_session],
|
1463 |
+
outputs=[gr_modal_html]
|
1464 |
)
|
1465 |
gr_language.change(
|
1466 |
+
fn=lambda selected, session_id: change_gr_language(dict(language_options).get(selected, 'Unknown'), session_id),
|
1467 |
+
inputs=[gr_language, gr_session],
|
1468 |
+
outputs=[gr_language, gr_tts_engine, gr_fine_tuned, gr_custom_model_list]
|
1469 |
)
|
1470 |
gr_audiobooks_ddn.change(
|
1471 |
fn=change_gr_audiobooks_ddn,
|
|
|
1474 |
)
|
1475 |
gr_custom_model_file.change(
|
1476 |
fn=change_gr_custom_model_file,
|
1477 |
+
inputs=[gr_custom_model_file, gr_session],
|
1478 |
+
outputs=[gr_fine_tuned, gr_custom_model_list, gr_conversion_progress]
|
1479 |
+
)
|
1480 |
+
gr_custom_model_list.change(
|
1481 |
+
fn=change_gr_custom_model_list,
|
1482 |
+
inputs=gr_custom_model_list,
|
1483 |
+
outputs=gr_fine_tuned
|
1484 |
+
)
|
1485 |
+
gr_fine_tuned.change(
|
1486 |
+
fn=change_gr_fine_tuned,
|
1487 |
+
inputs=gr_fine_tuned,
|
1488 |
+
outputs=gr_group_custom_model
|
1489 |
)
|
1490 |
gr_session.change(
|
1491 |
fn=change_gr_data,
|
|
|
1506 |
gr_read_data.change(
|
1507 |
fn=change_gr_read_data,
|
1508 |
inputs=gr_read_data,
|
1509 |
+
outputs=[gr_data, gr_session_status, gr_session, gr_audiobooks_ddn, gr_custom_model_list]
|
1510 |
)
|
1511 |
gr_convert_btn.click(
|
1512 |
+
fn=submit_convert_btn,
|
|
|
|
|
|
|
|
|
1513 |
inputs=[
|
1514 |
gr_session, gr_device, gr_ebook_file, gr_voice_file, gr_language,
|
1515 |
+
gr_custom_model_list, gr_temperature, gr_length_penalty,
|
1516 |
gr_repetition_penalty, gr_top_k, gr_top_p, gr_speed, gr_enable_text_splitting, gr_fine_tuned
|
1517 |
],
|
1518 |
+
outputs=gr_conversion_progress
|
1519 |
).then(
|
1520 |
fn=update_interface,
|
1521 |
inputs=None,
|
1522 |
+
outputs=[gr_convert_btn, gr_ebook_file, gr_audio_player, gr_audiobooks_ddn, gr_modal_html]
|
1523 |
)
|
1524 |
interface.load(
|
1525 |
fn=None,
|
|
|
1539 |
)
|
1540 |
|
1541 |
try:
|
1542 |
+
interface.queue(default_concurrency_limit=interface_concurrency_limit).launch(server_name=interface_host, server_port=interface_port, share=is_gui_shared)
|
1543 |
except OSError as e:
|
1544 |
print(f'Connection error: {e}')
|
1545 |
except socket.error as e:
|
lib/lang.py
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
setup.py
CHANGED
@@ -1,50 +1,50 @@
|
|
1 |
-
import subprocess
|
2 |
-
import sys
|
3 |
-
from setuptools import setup, find_packages
|
4 |
-
from setuptools.command.develop import develop
|
5 |
-
from setuptools.command.install import install
|
6 |
-
import os
|
7 |
-
|
8 |
-
cwd = os.path.dirname(os.path.abspath(__file__))
|
9 |
-
|
10 |
-
with open("README.md", "r", encoding='utf-8') as fh:
|
11 |
-
long_description = fh.read()
|
12 |
-
|
13 |
-
with open('requirements.txt') as f:
|
14 |
-
requirements = f.read().splitlines()
|
15 |
-
|
16 |
-
class PostInstallCommand(install):
|
17 |
-
def run(self):
|
18 |
-
install.run(self)
|
19 |
-
try:
|
20 |
-
subprocess.run([sys.executable, 'python -m', 'unidic', 'download'], check=True)
|
21 |
-
except Exception:
|
22 |
-
print("unidic download failed during installation, but it will be re-attempted a diffrent way when the app itself runs.")
|
23 |
-
|
24 |
-
|
25 |
-
setup(
|
26 |
-
name='ebook2audiobook',
|
27 |
-
version='2.0.0',
|
28 |
-
python_requires=">=3.10,<3.
|
29 |
-
author="Drew Thomasson",
|
30 |
-
description="Convert eBooks to audiobooks with chapters and metadata",
|
31 |
-
long_description=long_description,
|
32 |
-
long_description_content_type="text/markdown",
|
33 |
-
url="https://github.com/DrewThomasson/ebook2audiobook",
|
34 |
-
packages=find_packages(),
|
35 |
-
install_requires=requirements,
|
36 |
-
classifiers=[
|
37 |
-
"Programming Language :: Python :: 3",
|
38 |
-
"License :: OSI Approved :: MIT License",
|
39 |
-
"Operating System :: OS Independent",
|
40 |
-
],
|
41 |
-
include_package_data=True,
|
42 |
-
entry_points={
|
43 |
-
"console_scripts": [
|
44 |
-
"ebook2audiobook = app:main",
|
45 |
-
],
|
46 |
-
},
|
47 |
-
cmdclass={
|
48 |
-
'install': PostInstallCommand,
|
49 |
-
}
|
50 |
-
)
|
|
|
1 |
+
import subprocess
|
2 |
+
import sys
|
3 |
+
from setuptools import setup, find_packages
|
4 |
+
from setuptools.command.develop import develop
|
5 |
+
from setuptools.command.install import install
|
6 |
+
import os
|
7 |
+
|
8 |
+
cwd = os.path.dirname(os.path.abspath(__file__))
|
9 |
+
|
10 |
+
with open("README.md", "r", encoding='utf-8') as fh:
|
11 |
+
long_description = fh.read()
|
12 |
+
|
13 |
+
with open('requirements.txt') as f:
|
14 |
+
requirements = f.read().splitlines()
|
15 |
+
|
16 |
+
class PostInstallCommand(install):
|
17 |
+
def run(self):
|
18 |
+
install.run(self)
|
19 |
+
try:
|
20 |
+
subprocess.run([sys.executable, 'python -m', 'unidic', 'download'], check=True)
|
21 |
+
except Exception:
|
22 |
+
print("unidic download failed during installation, but it will be re-attempted a diffrent way when the app itself runs.")
|
23 |
+
|
24 |
+
|
25 |
+
setup(
|
26 |
+
name='ebook2audiobook',
|
27 |
+
version='2.0.0',
|
28 |
+
python_requires=">=3.10,<3.13",
|
29 |
+
author="Drew Thomasson",
|
30 |
+
description="Convert eBooks to audiobooks with chapters and metadata",
|
31 |
+
long_description=long_description,
|
32 |
+
long_description_content_type="text/markdown",
|
33 |
+
url="https://github.com/DrewThomasson/ebook2audiobook",
|
34 |
+
packages=find_packages(),
|
35 |
+
install_requires=requirements,
|
36 |
+
classifiers=[
|
37 |
+
"Programming Language :: Python :: 3",
|
38 |
+
"License :: OSI Approved :: MIT License",
|
39 |
+
"Operating System :: OS Independent",
|
40 |
+
],
|
41 |
+
include_package_data=True,
|
42 |
+
entry_points={
|
43 |
+
"console_scripts": [
|
44 |
+
"ebook2audiobook = app:main",
|
45 |
+
],
|
46 |
+
},
|
47 |
+
cmdclass={
|
48 |
+
'install': PostInstallCommand,
|
49 |
+
}
|
50 |
+
)
|