mkutarna commited on
Commit
1d427a4
1 Parent(s): a7e6fda

Added files from github repo

Browse files
Files changed (9) hide show
  1. .gitignore +131 -0
  2. README.md +6 -3
  3. app.py +35 -0
  4. latest_silero_models.yml +563 -0
  5. requirements.txt +6 -0
  6. src/__init__.py +0 -0
  7. src/output.py +14 -0
  8. src/parser.py +44 -0
  9. src/predict.py +40 -0
.gitignore ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Gemfile.lock
2
+ collect_env.py
3
+ tmp*
4
+ Untitled*.ipynb
5
+ *.bak
6
+ token
7
+ .idea/
8
+ docs/
9
+ conda/
10
+ tmp/
11
+ notebooks/outputs/
12
+ outputs/
13
+ tests/__pycache__
14
+ tests/.pytest_cache
15
+
16
+ tags
17
+ *~
18
+ ~*
19
+ *.swp
20
+ .gitconfig
21
+ # Byte-compiled / optimized / DLL files
22
+ __pycache__/
23
+ **/__pycache__/
24
+ *.py[cod]
25
+ *$py.class
26
+
27
+ # C extensions
28
+ *.so
29
+
30
+ # Distribution / packaging
31
+ .Python
32
+ build/
33
+ develop-eggs/
34
+ dist/
35
+ downloads/
36
+ eggs/
37
+ .eggs/
38
+ lib/
39
+ lib64/
40
+ parts/
41
+ sdist/
42
+ var/
43
+ wheels/
44
+ *.egg-info/
45
+ .installed.cfg
46
+ *.egg
47
+ MANIFEST
48
+
49
+ # PyInstaller
50
+ # Usually these files are written by a python script from a template
51
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
52
+ *.manifest
53
+ *.spec
54
+
55
+ # Installer logs
56
+ pip-log.txt
57
+ pip-delete-this-directory.txt
58
+
59
+ # Unit test / coverage reports
60
+ htmlcov/
61
+ .tox/
62
+ .coverage
63
+ .coverage.*
64
+ .cache
65
+ nosetests.xml
66
+ coverage.xml
67
+ *.cover
68
+ .hypothesis/
69
+ .pytest_cache/
70
+
71
+ # Translations
72
+ *.mo
73
+ *.pot
74
+
75
+ # Django stuff:
76
+ *.log
77
+ local_settings.py
78
+ db.sqlite3
79
+
80
+ # Flask stuff:
81
+ instance/
82
+ .webassets-cache
83
+
84
+ # Scrapy stuff:
85
+ .scrapy
86
+
87
+ # Sphinx documentation
88
+ docs/_build/
89
+
90
+ # PyBuilder
91
+ target/
92
+
93
+ # Jupyter Notebook
94
+ .ipynb_checkpoints
95
+ **/.ipynb_checkpoints
96
+
97
+ # pyenv
98
+ .python-version
99
+
100
+ .vscode/
101
+
102
+ # celery beat schedule file
103
+ celerybeat-schedule
104
+
105
+ # SageMath parsed files
106
+ *.sage.py
107
+
108
+ # Environments
109
+ .env
110
+ .venv
111
+ env/
112
+ venv/
113
+ ENV/
114
+ env.bak/
115
+ venv.bak/
116
+
117
+ # Spyder project settings
118
+ .spyderproject
119
+ .spyproject
120
+
121
+ # Rope project settings
122
+ .ropeproject
123
+
124
+ # mkdocs documentation
125
+ /site
126
+
127
+ # mypy
128
+ .mypy_cache/
129
+
130
+ # Mac stuff
131
+ .DS_Store
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  title: Audiobook Gen
3
- emoji: 📉
4
- colorFrom: red
5
  colorTo: indigo
6
  sdk: streamlit
7
  sdk_version: 1.10.0
@@ -10,4 +10,7 @@ pinned: false
10
  license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
1
  ---
2
  title: Audiobook Gen
3
+ emoji: 📉
4
+ colorFrom: blue
5
  colorTo: indigo
6
  sdk: streamlit
7
  sdk_version: 1.10.0
 
10
  license: mit
11
  ---
12
 
13
+ Audiobook Gen
14
+ =============
15
+
16
+ Audiobook Gen is a tool that allows the users to generate an audio file of text (e.g. audiobook), read in the voice of the user's choice. It will take in 3 inputs: the desired text for audio generation, as well as a pair of text / audio files for the desired voice.
app.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from src.parser import read_epub, read_txt
4
+ from src.predict import audiobook_gen, load_models
5
+ from src.output import assemble_zip
6
+
7
+ st.title('Audiobook Generation Tool')
8
+ st.markdown("This tool generates audiobook files from an imported ebook file.")
9
+
10
+ with st.sidebar:
11
+ ebook_upload = st.file_uploader(
12
+ label = "Upload the target ebook (.epub only)",
13
+ type = ['epub'])
14
+
15
+ if st.button('Click to run!'):
16
+ ebook, title = read_epub(ebook_upload)
17
+ model = load_models()
18
+ st.success('Parsing complete!')
19
+
20
+ with st.spinner('Generating audio...'):
21
+ audiobook_gen(ebook, title, model)
22
+ st.success('TTS generation complete!')
23
+
24
+ with st.spinner('Building zip file...'):
25
+ zip_file = assemble_zip(title)
26
+ title_name = f'{title}.zip'
27
+ st.success('Zip file prepared!')
28
+
29
+ with open(zip_file, "rb") as fp:
30
+ btn = st.download_button(
31
+ label="Download Audiobook",
32
+ data=fp,
33
+ file_name=title_name,
34
+ mime="application/zip"
35
+ )
latest_silero_models.yml ADDED
@@ -0,0 +1,563 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pre-trained STT models
2
+ stt_models:
3
+ en:
4
+ latest:
5
+ meta:
6
+ name: "en_v6"
7
+ sample: "https://models.silero.ai/examples/en_sample.wav"
8
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
9
+ jit: "https://models.silero.ai/models/en/en_v6.jit"
10
+ onnx: "https://models.silero.ai/models/en/en_v5.onnx"
11
+ jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
12
+ jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
13
+ onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
14
+ v6:
15
+ meta:
16
+ name: "en_v6"
17
+ sample: "https://models.silero.ai/examples/en_sample.wav"
18
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
19
+ jit: "https://models.silero.ai/models/en/en_v6.jit"
20
+ onnx: "https://models.silero.ai/models/en/en_v5.onnx"
21
+ jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
22
+ jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
23
+ onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
24
+ v5:
25
+ meta:
26
+ name: "en_v5"
27
+ sample: "https://models.silero.ai/examples/en_sample.wav"
28
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
29
+ jit: "https://models.silero.ai/models/en/en_v5.jit"
30
+ onnx: "https://models.silero.ai/models/en/en_v5.onnx"
31
+ onnx_q: "https://models.silero.ai/models/en/en_v5_q.onnx"
32
+ jit_q: "https://models.silero.ai/models/en/en_v5_q.jit"
33
+ jit_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.jit"
34
+ onnx_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.onnx"
35
+ v4_0:
36
+ meta:
37
+ name: "en_v4_0"
38
+ sample: "https://models.silero.ai/examples/en_sample.wav"
39
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
40
+ jit_large: "https://models.silero.ai/models/en/en_v4_0_jit_large.model"
41
+ onnx_large: "https://models.silero.ai/models/en/en_v4_0_large.onnx"
42
+ v3:
43
+ meta:
44
+ name: "en_v3"
45
+ sample: "https://models.silero.ai/examples/en_sample.wav"
46
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
47
+ jit: "https://models.silero.ai/models/en/en_v3_jit.model"
48
+ onnx: "https://models.silero.ai/models/en/en_v3.onnx"
49
+ jit_q: "https://models.silero.ai/models/en/en_v3_jit_q.model"
50
+ jit_skip: "https://models.silero.ai/models/en/en_v3_jit_skips.model"
51
+ jit_large: "https://models.silero.ai/models/en/en_v3_jit_large.model"
52
+ onnx_large: "https://models.silero.ai/models/en/en_v3_large.onnx"
53
+ jit_xsmall: "https://models.silero.ai/models/en/en_v3_jit_xsmall.model"
54
+ jit_q_xsmall: "https://models.silero.ai/models/en/en_v3_jit_q_xsmall.model"
55
+ onnx_xsmall: "https://models.silero.ai/models/en/en_v3_xsmall.onnx"
56
+ v2:
57
+ meta:
58
+ name: "en_v2"
59
+ sample: "https://models.silero.ai/examples/en_sample.wav"
60
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
61
+ jit: "https://models.silero.ai/models/en/en_v2_jit.model"
62
+ onnx: "https://models.silero.ai/models/en/en_v2.onnx"
63
+ tf: "https://models.silero.ai/models/en/en_v2_tf.tar.gz"
64
+ v1:
65
+ meta:
66
+ name: "en_v1"
67
+ sample: "https://models.silero.ai/examples/en_sample.wav"
68
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
69
+ jit: "https://models.silero.ai/models/en/en_v1_jit.model"
70
+ onnx: "https://models.silero.ai/models/en/en_v1.onnx"
71
+ tf: "https://models.silero.ai/models/en/en_v1_tf.tar.gz"
72
+ de:
73
+ latest:
74
+ meta:
75
+ name: "de_v1"
76
+ sample: "https://models.silero.ai/examples/de_sample.wav"
77
+ labels: "https://models.silero.ai/models/de/de_v1_labels.json"
78
+ jit: "https://models.silero.ai/models/de/de_v1_jit.model"
79
+ onnx: "https://models.silero.ai/models/de/de_v1.onnx"
80
+ tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
81
+ v1:
82
+ meta:
83
+ name: "de_v1"
84
+ sample: "https://models.silero.ai/examples/de_sample.wav"
85
+ labels: "https://models.silero.ai/models/de/de_v1_labels.json"
86
+ jit_large: "https://models.silero.ai/models/de/de_v1_jit.model"
87
+ onnx: "https://models.silero.ai/models/de/de_v1.onnx"
88
+ tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
89
+ v3:
90
+ meta:
91
+ name: "de_v3"
92
+ sample: "https://models.silero.ai/examples/de_sample.wav"
93
+ labels: "https://models.silero.ai/models/de/de_v1_labels.json"
94
+ jit_large: "https://models.silero.ai/models/de/de_v3_large.jit"
95
+ v4:
96
+ meta:
97
+ name: "de_v4"
98
+ sample: "https://models.silero.ai/examples/de_sample.wav"
99
+ labels: "https://models.silero.ai/models/de/de_v1_labels.json"
100
+ jit_large: "https://models.silero.ai/models/de/de_v4_large.jit"
101
+ onnx_large: "https://models.silero.ai/models/de/de_v4_large.onnx"
102
+ es:
103
+ latest:
104
+ meta:
105
+ name: "es_v1"
106
+ sample: "https://models.silero.ai/examples/es_sample.wav"
107
+ labels: "https://models.silero.ai/models/es/es_v1_labels.json"
108
+ jit: "https://models.silero.ai/models/es/es_v1_jit.model"
109
+ onnx: "https://models.silero.ai/models/es/es_v1.onnx"
110
+ tf: "https://models.silero.ai/models/es/es_v1_tf.tar.gz"
111
+ ua:
112
+ latest:
113
+ meta:
114
+ name: "ua_v3"
115
+ sample: "https://models.silero.ai/examples/ua_sample.wav"
116
+ credits:
117
+ datasets:
118
+ speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
119
+ labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
120
+ jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
121
+ jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
122
+ onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
123
+ v3:
124
+ meta:
125
+ name: "ua_v3"
126
+ sample: "https://models.silero.ai/examples/ua_sample.wav"
127
+ credits:
128
+ datasets:
129
+ speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
130
+ labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
131
+ jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
132
+ jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
133
+ onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
134
+ v1:
135
+ meta:
136
+ name: "ua_v1"
137
+ sample: "https://models.silero.ai/examples/ua_sample.wav"
138
+ credits:
139
+ datasets:
140
+ speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
141
+ labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
142
+ jit: "https://models.silero.ai/models/ua/ua_v1_jit.model"
143
+ jit_q: "https://models.silero.ai/models/ua/ua_v1_jit_q.model"
144
+ tts_models:
145
+ ru:
146
+ v3_1_ru:
147
+ latest:
148
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
149
+ package: 'https://models.silero.ai/models/tts/ru/v3_1_ru.pt'
150
+ sample_rate: [8000, 24000, 48000]
151
+ ru_v3:
152
+ latest:
153
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
154
+ package: 'https://models.silero.ai/models/tts/ru/ru_v3.pt'
155
+ sample_rate: [8000, 24000, 48000]
156
+ aidar_v2:
157
+ latest:
158
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
159
+ package: 'https://models.silero.ai/models/tts/ru/v2_aidar.pt'
160
+ sample_rate: [8000, 16000]
161
+ aidar_8khz:
162
+ latest:
163
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
164
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
165
+ jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
166
+ sample_rate: 8000
167
+ v1:
168
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
169
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
170
+ jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
171
+ sample_rate: 8000
172
+ aidar_16khz:
173
+ latest:
174
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
175
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
176
+ jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
177
+ sample_rate: 16000
178
+ v1:
179
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
180
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
181
+ jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
182
+ sample_rate: 16000
183
+ baya_v2:
184
+ latest:
185
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
186
+ package: 'https://models.silero.ai/models/tts/ru/v2_baya.pt'
187
+ sample_rate: [8000, 16000]
188
+ baya_8khz:
189
+ latest:
190
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
191
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
192
+ jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
193
+ sample_rate: 8000
194
+ v1:
195
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
196
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
197
+ jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
198
+ sample_rate: 8000
199
+ baya_16khz:
200
+ latest:
201
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
202
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
203
+ jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
204
+ sample_rate: 16000
205
+ v1:
206
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
207
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
208
+ jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
209
+ sample_rate: 16000
210
+ irina_v2:
211
+ latest:
212
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
213
+ package: 'https://models.silero.ai/models/tts/ru/v2_irina.pt'
214
+ sample_rate: [8000, 16000]
215
+ irina_8khz:
216
+ latest:
217
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
218
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
219
+ jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
220
+ sample_rate: 8000
221
+ v1:
222
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
223
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
224
+ jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
225
+ sample_rate: 8000
226
+ irina_16khz:
227
+ latest:
228
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
229
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
230
+ jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
231
+ sample_rate: 16000
232
+ v1:
233
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
234
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
235
+ jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
236
+ sample_rate: 16000
237
+ kseniya_v2:
238
+ latest:
239
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
240
+ package: 'https://models.silero.ai/models/tts/ru/v2_kseniya.pt'
241
+ sample_rate: [8000, 16000]
242
+ kseniya_8khz:
243
+ latest:
244
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
245
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
246
+ jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
247
+ sample_rate: 8000
248
+ v1:
249
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
250
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
251
+ jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
252
+ sample_rate: 8000
253
+ kseniya_16khz:
254
+ latest:
255
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
256
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
257
+ jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
258
+ sample_rate: 16000
259
+ v1:
260
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
261
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
262
+ jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
263
+ sample_rate: 16000
264
+ natasha_v2:
265
+ latest:
266
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
267
+ package: 'https://models.silero.ai/models/tts/ru/v2_natasha.pt'
268
+ sample_rate: [8000, 16000]
269
+ natasha_8khz:
270
+ latest:
271
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
272
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
273
+ jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
274
+ sample_rate: 8000
275
+ v1:
276
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
277
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
278
+ jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
279
+ sample_rate: 8000
280
+ natasha_16khz:
281
+ latest:
282
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
283
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
284
+ jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
285
+ sample_rate: 16000
286
+ v1:
287
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
288
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
289
+ jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
290
+ sample_rate: 16000
291
+ ruslan_v2:
292
+ latest:
293
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
294
+ package: 'https://models.silero.ai/models/tts/ru/v2_ruslan.pt'
295
+ sample_rate: [8000, 16000]
296
+ ruslan_8khz:
297
+ latest:
298
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
299
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
300
+ jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
301
+ sample_rate: 8000
302
+ v1:
303
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
304
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
305
+ jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
306
+ sample_rate: 8000
307
+ ruslan_16khz:
308
+ latest:
309
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
310
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
311
+ jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
312
+ sample_rate: 16000
313
+ v1:
314
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
315
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
316
+ jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
317
+ sample_rate: 16000
318
+ en:
319
+ v3_en:
320
+ latest:
321
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
322
+ package: 'https://models.silero.ai/models/tts/en/v3_en.pt'
323
+ sample_rate: [8000, 24000, 48000]
324
+ v3_en_indic:
325
+ latest:
326
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
327
+ package: 'https://models.silero.ai/models/tts/en/v3_en_indic.pt'
328
+ sample_rate: [8000, 24000, 48000]
329
+ lj_v2:
330
+ latest:
331
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
332
+ package: 'https://models.silero.ai/models/tts/en/v2_lj.pt'
333
+ sample_rate: [8000, 16000]
334
+ lj_8khz:
335
+ latest:
336
+ tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
337
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
338
+ jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
339
+ sample_rate: 8000
340
+ v1:
341
+ tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
342
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
343
+ jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
344
+ sample_rate: 8000
345
+ lj_16khz:
346
+ latest:
347
+ tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
348
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
349
+ jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
350
+ sample_rate: 16000
351
+ v1:
352
+ tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
353
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
354
+ jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
355
+ sample_rate: 16000
356
+ de:
357
+ v3_de:
358
+ latest:
359
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
360
+ package: 'https://models.silero.ai/models/tts/de/v3_de.pt'
361
+ sample_rate: [8000, 24000, 48000]
362
+ thorsten_v2:
363
+ latest:
364
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
365
+ package: 'https://models.silero.ai/models/tts/de/v2_thorsten.pt'
366
+ sample_rate: [8000, 16000]
367
+ thorsten_8khz:
368
+ latest:
369
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
370
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
371
+ jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
372
+ sample_rate: 8000
373
+ v1:
374
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
375
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
376
+ jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
377
+ sample_rate: 8000
378
+ thorsten_16khz:
379
+ latest:
380
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
381
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
382
+ jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
383
+ sample_rate: 16000
384
+ v1:
385
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
386
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
387
+ jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
388
+ sample_rate: 16000
389
+ es:
390
+ v3_es:
391
+ latest:
392
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
393
+ package: 'https://models.silero.ai/models/tts/es/v3_es.pt'
394
+ sample_rate: [8000, 24000, 48000]
395
+ tux_v2:
396
+ latest:
397
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
398
+ package: 'https://models.silero.ai/models/tts/es/v2_tux.pt'
399
+ sample_rate: [8000, 16000]
400
+ tux_8khz:
401
+ latest:
402
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
403
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
404
+ jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
405
+ sample_rate: 8000
406
+ v1:
407
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
408
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
409
+ jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
410
+ sample_rate: 8000
411
+ tux_16khz:
412
+ latest:
413
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
414
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
415
+ jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
416
+ sample_rate: 16000
417
+ v1:
418
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
419
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
420
+ jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
421
+ sample_rate: 16000
422
+ fr:
423
+ v3_fr:
424
+ latest:
425
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
426
+ package: 'https://models.silero.ai/models/tts/fr/v3_fr.pt'
427
+ sample_rate: [8000, 24000, 48000]
428
+ gilles_v2:
429
+ latest:
430
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
431
+ package: 'https://models.silero.ai/models/tts/fr/v2_gilles.pt'
432
+ sample_rate: [8000, 16000]
433
+ gilles_8khz:
434
+ latest:
435
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
436
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
437
+ jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
438
+ sample_rate: 8000
439
+ v1:
440
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
441
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
442
+ jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
443
+ sample_rate: 8000
444
+ gilles_16khz:
445
+ latest:
446
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
447
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
448
+ jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
449
+ sample_rate: 16000
450
+ v1:
451
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
452
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
453
+ jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
454
+ sample_rate: 16000
455
+ ba:
456
+ aigul_v2:
457
+ latest:
458
+ example: 'Салауат Юлаевтың тормошо һәм яҙмышы хаҡындағы документтарҙың һәм шиғри әҫәрҙәренең бик аҙ өлөшө генә һаҡланған.'
459
+ package: 'https://models.silero.ai/models/tts/ba/v2_aigul.pt'
460
+ sample_rate: [8000, 16000]
461
+ language_name: 'bashkir'
462
+ xal:
463
+ v3_xal:
464
+ latest:
465
+ example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.'
466
+ package: 'https://models.silero.ai/models/tts/xal/v3_xal.pt'
467
+ sample_rate: [8000, 24000, 48000]
468
+ erdni_v2:
469
+ latest:
470
+ example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.'
471
+ package: 'https://models.silero.ai/models/tts/xal/v2_erdni.pt'
472
+ sample_rate: [8000, 16000]
473
+ language_name: 'kalmyk'
474
+ tt:
475
+ v3_tt:
476
+ latest:
477
+ example: 'Исәнмесез, саумысез, нишләп кәҗәгезне саумыйсыз, әтәчегез күкәй салган, нишләп чыгып алмыйсыз.'
478
+ package: 'https://models.silero.ai/models/tts/tt/v3_tt.pt'
479
+ sample_rate: [8000, 24000, 48000]
480
+ dilyara_v2:
481
+ latest:
482
+ example: 'Ис+әнмесез, с+аумысез, нишл+әп кәҗәгезн+е с+аумыйсыз, әтәчег+ез күк+әй салг+ан, нишл+әп чыг+ып +алмыйсыз.'
483
+ package: 'https://models.silero.ai/models/tts/tt/v2_dilyara.pt'
484
+ sample_rate: [8000, 16000]
485
+ language_name: 'tatar'
486
+ uz:
487
+ v3_uz:
488
+ latest:
489
+ example: 'Tanishganimdan xursandman.'
490
+ package: 'https://models.silero.ai/models/tts/uz/v3_uz.pt'
491
+ sample_rate: [8000, 24000, 48000]
492
+ dilnavoz_v2:
493
+ latest:
494
+ example: 'Tanishganimdan xursandman.'
495
+ package: 'https://models.silero.ai/models/tts/uz/v2_dilnavoz.pt'
496
+ sample_rate: [8000, 16000]
497
+ language_name: 'uzbek'
498
+ ua:
499
+ v3_ua:
500
+ latest:
501
+ example: 'К+отики - пухн+асті жив+отики.'
502
+ package: 'https://models.silero.ai/models/tts/ua/v3_ua.pt'
503
+ sample_rate: [8000, 24000, 48000]
504
+ mykyta_v2:
505
+ latest:
506
+ example: 'К+отики - пухн+асті жив+отики.'
507
+ package: 'https://models.silero.ai/models/tts/ua/v22_mykyta_48k.pt'
508
+ sample_rate: [8000, 24000, 48000]
509
+ language_name: 'ukrainian'
510
+ indic:
511
+ v3_indic:
512
+ latest:
513
+ example: 'prasidda kabīra adhyētā, puruṣōttama agravāla kā yaha śōdha ālēkha, usa rāmānaṁda kī khōja karatā hai'
514
+ package: 'https://models.silero.ai/models/tts/indic/v3_indic.pt'
515
+ sample_rate: [8000, 24000, 48000]
516
+ multi:
517
+ multi_v2:
518
+ latest:
519
+ package: 'https://models.silero.ai/models/tts/multi/v2_multi.pt'
520
+ sample_rate: [8000, 16000]
521
+ speakers:
522
+ aidar:
523
+ lang: 'ru'
524
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
525
+ baya:
526
+ lang: 'ru'
527
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
528
+ kseniya:
529
+ lang: 'ru'
530
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
531
+ irina:
532
+ lang: 'ru'
533
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
534
+ ruslan:
535
+ lang: 'ru'
536
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
537
+ natasha:
538
+ lang: 'ru'
539
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
540
+ thorsten:
541
+ lang: 'de'
542
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
543
+ tux:
544
+ lang: 'es'
545
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
546
+ gilles:
547
+ lang: 'fr'
548
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
549
+ lj:
550
+ lang: 'en'
551
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
552
+ dilyara:
553
+ lang: 'tt'
554
+ example: 'Пес+и пес+и песик+әй, борыннар+ы бәләк+әй.'
555
+ te_models:
556
+ latest:
557
+ package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
558
+ languages: ['en', 'de', 'ru', 'es']
559
+ punct: '.,-!?—'
560
+ v2:
561
+ package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
562
+ languages: ['en', 'de', 'ru', 'es']
563
+ punct: '.,-!?—'
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ silero
2
+ streamlit
3
+ ebooklib
4
+ bs4
5
+ nltk
6
+ stqdm
src/__init__.py ADDED
File without changes
src/output.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def assemble_zip(title):
2
+ import pathlib
3
+ import zipfile
4
+ from stqdm import stqdm
5
+
6
+ directory = pathlib.Path("outputs/")
7
+ zip_name = f"outputs/{title}.zip"
8
+
9
+ with zipfile.ZipFile(zip_name, mode="w") as archive:
10
+ for file_path in stqdm(directory.iterdir()):
11
+ if pathlib.Path(file_path).suffix == '.wav':
12
+ archive.write(file_path, arcname=file_path.name)
13
+
14
+ return zip_name
src/parser.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def read_txt(txt_path):
2
+ # function to read in txt files here.
3
+ print("Nothing here yet.")
4
+
5
+ def read_epub(ebook_path):
6
+ import ebooklib
7
+ from ebooklib import epub
8
+ from bs4 import BeautifulSoup
9
+ from nltk import tokenize, download
10
+ from textwrap import TextWrapper
11
+ from stqdm import stqdm
12
+
13
+ max_char_len = 150
14
+
15
+ download('punkt', quiet=True)
16
+ wrapper = TextWrapper(max_char_len, fix_sentence_endings=True)
17
+
18
+ book = epub.read_epub(ebook_path)
19
+
20
+ ebook_title = book.get_metadata('DC', 'title')[0][0]
21
+ ebook_title = ebook_title.lower().replace(' ', '_')
22
+
23
+ corpus = []
24
+ for item in stqdm(list(book.get_items()), desc="Chapters in ebook:"):
25
+ if item.get_type() == ebooklib.ITEM_DOCUMENT:
26
+ input_text = BeautifulSoup(item.get_content(), "html.parser").text
27
+ text_list = []
28
+ for paragraph in input_text.split('\n'):
29
+ paragraph = paragraph.replace('—', '-')
30
+ sentences = tokenize.sent_tokenize(paragraph)
31
+
32
+ # Truncate sentences to maximum character limit
33
+ sentence_list = []
34
+ for sentence in sentences:
35
+ wrapped_sentences = wrapper.wrap(sentence)
36
+ sentence_list.append(wrapped_sentences)
37
+ # Flatten list of list of sentences
38
+ trunc_sentences = [phrase for sublist in sentence_list for phrase in sublist]
39
+
40
+ text_list.append(trunc_sentences)
41
+ text_list = [text for sentences in text_list for text in sentences]
42
+ corpus.append(text_list)
43
+
44
+ return corpus, ebook_title
src/predict.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def load_models():
2
+ import torch
3
+ from silero import silero_tts
4
+
5
+ language = 'en'
6
+ model_id = 'v3_en'
7
+ model, _ = silero_tts(language=language,
8
+ speaker=model_id)
9
+
10
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
11
+ model.to(device) # gpu or cpu
12
+ return model
13
+
14
+ def audiobook_gen(ebook, title, model):
15
+ import torch
16
+ import torchaudio
17
+ from stqdm import stqdm
18
+
19
+ sample_rate = 24000
20
+ speaker = 'en_0'
21
+
22
+ for chapter in stqdm(ebook, desc="Chapters in ebook:"):
23
+ chapter_index = f'chapter{ebook.index(chapter):03}'
24
+ audio_list = []
25
+ for sentence in stqdm(chapter, desc="Sentences in chapter:"):
26
+ audio = model.apply_tts(text=sentence,
27
+ speaker=speaker,
28
+ sample_rate=sample_rate)
29
+ if len(audio) > 0 and isinstance(audio, torch.Tensor):
30
+ audio_list.append(audio)
31
+ else:
32
+ print(f'Tensor for sentence is not valid: \n {sentence}')
33
+
34
+ sample_path = f'outputs/{title}_{chapter_index}.wav'
35
+
36
+ if len(audio_list) > 0:
37
+ audio_file = torch.cat(audio_list).reshape(1, -1)
38
+ torchaudio.save(sample_path, audio_file, sample_rate)
39
+ else:
40
+ print(f'Chapter {chapter_index} is empty.')