basit123796 commited on
Commit
db748f8
1 Parent(s): f1aee3b

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +241 -0
  2. model.py +739 -0
  3. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ #
3
+ # Copyright 2022-2023 Xiaomi Corp. (authors: Fangjun Kuang)
4
+ #
5
+ # See LICENSE for clarification regarding multiple authors
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+
19
+ # References:
20
+ # https://gradio.app/docs/#dropdown
21
+
22
+ import logging
23
+ import os
24
+ import time
25
+ import uuid
26
+
27
+ import gradio as gr
28
+ import soundfile as sf
29
+
30
+ from model import get_pretrained_model, language_to_models
31
+
32
+ title = "# Next-gen Kaldi: Text-to-speech (TTS)"
33
+
34
+ description = """
35
+ This space shows how to convert text to speech with Next-gen Kaldi.
36
+
37
+ It is running on CPU within a docker container provided by Hugging Face.
38
+
39
+ See more information by visiting the following links:
40
+
41
+ - <https://github.com/k2-fsa/sherpa-onnx>
42
+
43
+ If you want to deploy it locally, please see
44
+ <https://k2-fsa.github.io/sherpa/>
45
+
46
+ If you want to use Android APKs, please see
47
+ <https://k2-fsa.github.io/sherpa/onnx/tts/apk.html>
48
+
49
+ If you want to use Android text-to-speech engine APKs, please see
50
+ <https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html>
51
+
52
+ If you want to download an all-in-one exe for Windows, please see
53
+ <https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models>
54
+
55
+ """
56
+
57
+ # css style is copied from
58
+ # https://huggingface.co/spaces/alphacep/asr/blob/main/app.py#L113
59
+ css = """
60
+ .result {display:flex;flex-direction:column}
61
+ .result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
62
+ .result_item_success {background-color:mediumaquamarine;color:white;align-self:start}
63
+ .result_item_error {background-color:#ff7070;color:white;align-self:start}
64
+ """
65
+
66
+ examples = [
67
+ ["Min-nan (闽南话)", "csukuangfj/vits-mms-nan", "ài piaǸ chiah ē iaN̂", 0, 1.0],
68
+ ["Thai", "csukuangfj/vits-mms-tha", "ฉันรักคุณ", 0, 1.0],
69
+ ]
70
+
71
+
72
+ def update_model_dropdown(language: str):
73
+ if language in language_to_models:
74
+ choices = language_to_models[language]
75
+ return gr.Dropdown(
76
+ choices=choices,
77
+ value=choices[0],
78
+ interactive=True,
79
+ )
80
+
81
+ raise ValueError(f"Unsupported language: {language}")
82
+
83
+
84
+ def build_html_output(s: str, style: str = "result_item_success"):
85
+ return f"""
86
+ <div class='result'>
87
+ <div class='result_item {style}'>
88
+ {s}
89
+ </div>
90
+ </div>
91
+ """
92
+
93
+
94
+ def process(language: str, repo_id: str, text: str, sid: str, speed: float):
95
+ logging.info(f"Input text: {text}. sid: {sid}, speed: {speed}")
96
+ sid = int(sid)
97
+ tts = get_pretrained_model(repo_id, speed)
98
+
99
+ start = time.time()
100
+ audio = tts.generate(text, sid=sid)
101
+ end = time.time()
102
+
103
+ if len(audio.samples) == 0:
104
+ raise ValueError(
105
+ "Error in generating audios. Please read previous error messages."
106
+ )
107
+
108
+ duration = len(audio.samples) / audio.sample_rate
109
+
110
+ elapsed_seconds = end - start
111
+ rtf = elapsed_seconds / duration
112
+
113
+ info = f"""
114
+ Wave duration : {duration:.3f} s <br/>
115
+ Processing time: {elapsed_seconds:.3f} s <br/>
116
+ RTF: {elapsed_seconds:.3f}/{duration:.3f} = {rtf:.3f} <br/>
117
+ """
118
+
119
+ logging.info(info)
120
+ logging.info(f"\nrepo_id: {repo_id}\ntext: {text}\nsid: {sid}\nspeed: {speed}")
121
+
122
+ filename = str(uuid.uuid4())
123
+ filename = f"{filename}.wav"
124
+ sf.write(
125
+ filename,
126
+ audio.samples,
127
+ samplerate=audio.sample_rate,
128
+ subtype="PCM_16",
129
+ )
130
+
131
+ return filename, build_html_output(info)
132
+
133
+
134
+ demo = gr.Blocks(css=css)
135
+
136
+
137
+ with demo:
138
+ gr.Markdown(title)
139
+ language_choices = list(language_to_models.keys())
140
+
141
+ language_radio = gr.Radio(
142
+ label="Language",
143
+ choices=language_choices,
144
+ value=language_choices[0],
145
+ )
146
+
147
+ model_dropdown = gr.Dropdown(
148
+ choices=language_to_models[language_choices[0]],
149
+ label="Select a model",
150
+ value=language_to_models[language_choices[0]][0],
151
+ )
152
+
153
+ language_radio.change(
154
+ update_model_dropdown,
155
+ inputs=language_radio,
156
+ outputs=model_dropdown,
157
+ )
158
+
159
+ with gr.Tabs():
160
+ with gr.TabItem("Please input your text"):
161
+ input_text = gr.Textbox(
162
+ label="Input text",
163
+ info="Your text",
164
+ lines=3,
165
+ placeholder="Please input your text here",
166
+ )
167
+
168
+ input_sid = gr.Textbox(
169
+ label="Speaker ID",
170
+ info="Speaker ID",
171
+ lines=1,
172
+ max_lines=1,
173
+ value="0",
174
+ placeholder="Speaker ID. Valid only for mult-speaker model",
175
+ )
176
+
177
+ input_speed = gr.Slider(
178
+ minimum=0.1,
179
+ maximum=10,
180
+ value=1,
181
+ step=0.1,
182
+ label="Speed (larger->faster; smaller->slower)",
183
+ )
184
+
185
+ input_button = gr.Button("Submit")
186
+
187
+ output_audio = gr.Audio(label="Output")
188
+
189
+ output_info = gr.HTML(label="Info")
190
+
191
+ gr.Examples(
192
+ examples=examples,
193
+ fn=process,
194
+ inputs=[
195
+ language_radio,
196
+ model_dropdown,
197
+ input_text,
198
+ input_sid,
199
+ input_speed,
200
+ ],
201
+ outputs=[
202
+ output_audio,
203
+ output_info,
204
+ ],
205
+ )
206
+
207
+ input_button.click(
208
+ process,
209
+ inputs=[
210
+ language_radio,
211
+ model_dropdown,
212
+ input_text,
213
+ input_sid,
214
+ input_speed,
215
+ ],
216
+ outputs=[
217
+ output_audio,
218
+ output_info,
219
+ ],
220
+ )
221
+
222
+ gr.Markdown(description)
223
+
224
+
225
+ def download_espeak_ng_data():
226
+ os.system(
227
+ """
228
+ cd /tmp
229
+ wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
230
+ tar xf espeak-ng-data.tar.bz2
231
+ """
232
+ )
233
+
234
+
235
+ if __name__ == "__main__":
236
+ download_espeak_ng_data()
237
+ formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
238
+
239
+ logging.basicConfig(format=formatter, level=logging.INFO)
240
+
241
+ demo.launch()
model.py ADDED
@@ -0,0 +1,739 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2022-2023 Xiaomi Corp. (authors: Fangjun Kuang)
2
+ #
3
+ # See LICENSE for clarification regarding multiple authors
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ from functools import lru_cache
18
+
19
+ import sherpa_onnx
20
+ from huggingface_hub import hf_hub_download
21
+
22
+
23
+ def get_file(
24
+ repo_id: str,
25
+ filename: str,
26
+ subfolder: str = ".",
27
+ ) -> str:
28
+ model_filename = hf_hub_download(
29
+ repo_id=repo_id,
30
+ filename=filename,
31
+ subfolder=subfolder,
32
+ )
33
+ return model_filename
34
+
35
+
36
+ @lru_cache(maxsize=10)
37
+ def _get_vits_vctk(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
38
+ assert repo_id == "csukuangfj/vits-vctk"
39
+
40
+ model = get_file(
41
+ repo_id=repo_id,
42
+ filename="vits-vctk.onnx",
43
+ subfolder=".",
44
+ )
45
+
46
+ lexicon = get_file(
47
+ repo_id=repo_id,
48
+ filename="lexicon.txt",
49
+ subfolder=".",
50
+ )
51
+
52
+ tokens = get_file(
53
+ repo_id=repo_id,
54
+ filename="tokens.txt",
55
+ subfolder=".",
56
+ )
57
+
58
+ tts_config = sherpa_onnx.OfflineTtsConfig(
59
+ model=sherpa_onnx.OfflineTtsModelConfig(
60
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
61
+ model=model,
62
+ lexicon=lexicon,
63
+ tokens=tokens,
64
+ length_scale=1.0 / speed,
65
+ ),
66
+ provider="cpu",
67
+ debug=True,
68
+ num_threads=2,
69
+ )
70
+ )
71
+ tts = sherpa_onnx.OfflineTts(tts_config)
72
+
73
+ return tts
74
+
75
+
76
+ @lru_cache(maxsize=10)
77
+ def _get_vits_ljs(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
78
+ assert repo_id == "csukuangfj/vits-ljs"
79
+
80
+ model = get_file(
81
+ repo_id=repo_id,
82
+ filename="vits-ljs.onnx",
83
+ subfolder=".",
84
+ )
85
+
86
+ lexicon = get_file(
87
+ repo_id=repo_id,
88
+ filename="lexicon.txt",
89
+ subfolder=".",
90
+ )
91
+
92
+ tokens = get_file(
93
+ repo_id=repo_id,
94
+ filename="tokens.txt",
95
+ subfolder=".",
96
+ )
97
+
98
+ tts_config = sherpa_onnx.OfflineTtsConfig(
99
+ model=sherpa_onnx.OfflineTtsModelConfig(
100
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
101
+ model=model,
102
+ lexicon=lexicon,
103
+ tokens=tokens,
104
+ length_scale=1.0 / speed,
105
+ ),
106
+ provider="cpu",
107
+ debug=True,
108
+ num_threads=2,
109
+ )
110
+ )
111
+ tts = sherpa_onnx.OfflineTts(tts_config)
112
+
113
+ return tts
114
+
115
+
116
+ @lru_cache(maxsize=10)
117
+ def _get_vits_piper(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
118
+ data_dir = "/tmp/espeak-ng-data"
119
+ if "coqui" in repo_id or "vits-mms" in repo_id:
120
+ name = "model"
121
+ elif "piper" in repo_id:
122
+ n = len("vits-piper-")
123
+ name = repo_id.split("/")[1][n:]
124
+ elif "mimic3" in repo_id:
125
+ n = len("vits-mimic3-")
126
+ name = repo_id.split("/")[1][n:]
127
+ else:
128
+ raise ValueError(f"Unsupported {repo_id}")
129
+
130
+ if "vits-coqui-uk-mai" in repo_id or "vits-mms" in repo_id:
131
+ data_dir = ""
132
+
133
+ model = get_file(
134
+ repo_id=repo_id,
135
+ filename=f"{name}.onnx",
136
+ subfolder=".",
137
+ )
138
+
139
+ tokens = get_file(
140
+ repo_id=repo_id,
141
+ filename="tokens.txt",
142
+ subfolder=".",
143
+ )
144
+
145
+ tts_config = sherpa_onnx.OfflineTtsConfig(
146
+ model=sherpa_onnx.OfflineTtsModelConfig(
147
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
148
+ model=model,
149
+ lexicon="",
150
+ data_dir=data_dir,
151
+ tokens=tokens,
152
+ length_scale=1.0 / speed,
153
+ ),
154
+ provider="cpu",
155
+ debug=True,
156
+ num_threads=2,
157
+ )
158
+ )
159
+ tts = sherpa_onnx.OfflineTts(tts_config)
160
+
161
+ return tts
162
+
163
+
164
+ @lru_cache(maxsize=10)
165
+ def _get_vits_mms(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
166
+ return _get_vits_piper(repo_id, speed)
167
+
168
+
169
+ @lru_cache(maxsize=10)
170
+ def _get_vits_zh_aishell3(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
171
+ assert repo_id == "csukuangfj/vits-zh-aishell3"
172
+
173
+ model = get_file(
174
+ repo_id=repo_id,
175
+ filename="vits-aishell3.onnx",
176
+ subfolder=".",
177
+ )
178
+
179
+ lexicon = get_file(
180
+ repo_id=repo_id,
181
+ filename="lexicon.txt",
182
+ subfolder=".",
183
+ )
184
+
185
+ tokens = get_file(
186
+ repo_id=repo_id,
187
+ filename="tokens.txt",
188
+ subfolder=".",
189
+ )
190
+
191
+ rule_fst = get_file(
192
+ repo_id=repo_id,
193
+ filename="rule.fst",
194
+ subfolder=".",
195
+ )
196
+
197
+ tts_config = sherpa_onnx.OfflineTtsConfig(
198
+ model=sherpa_onnx.OfflineTtsModelConfig(
199
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
200
+ model=model,
201
+ lexicon=lexicon,
202
+ tokens=tokens,
203
+ length_scale=1.0 / speed,
204
+ ),
205
+ provider="cpu",
206
+ debug=True,
207
+ num_threads=2,
208
+ ),
209
+ rule_fsts=rule_fst,
210
+ )
211
+ tts = sherpa_onnx.OfflineTts(tts_config)
212
+
213
+ return tts
214
+
215
+
216
+ @lru_cache(maxsize=10)
217
+ def _get_vits_hf(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
218
+ if "fanchen" in repo_id or "vits-cantonese-hf-xiaomaiiwn" in repo_id:
219
+ model = repo_id.split("/")[-1]
220
+ else:
221
+ model = repo_id.split("-")[-1]
222
+
223
+ model = get_file(
224
+ repo_id=repo_id,
225
+ filename=f"{model}.onnx",
226
+ subfolder=".",
227
+ )
228
+
229
+ lexicon = get_file(
230
+ repo_id=repo_id,
231
+ filename="lexicon.txt",
232
+ subfolder=".",
233
+ )
234
+
235
+ tokens = get_file(
236
+ repo_id=repo_id,
237
+ filename="tokens.txt",
238
+ subfolder=".",
239
+ )
240
+
241
+ rule_fst = get_file(
242
+ repo_id=repo_id,
243
+ filename="rule.fst",
244
+ subfolder=".",
245
+ )
246
+
247
+ tts_config = sherpa_onnx.OfflineTtsConfig(
248
+ model=sherpa_onnx.OfflineTtsModelConfig(
249
+ vits=sherpa_onnx.OfflineTtsVitsModelConfig(
250
+ model=model,
251
+ lexicon=lexicon,
252
+ tokens=tokens,
253
+ length_scale=1.0 / speed,
254
+ ),
255
+ provider="cpu",
256
+ debug=True,
257
+ num_threads=2,
258
+ ),
259
+ rule_fsts=rule_fst,
260
+ )
261
+ tts = sherpa_onnx.OfflineTts(tts_config)
262
+
263
+ return tts
264
+
265
+
266
+ @lru_cache(maxsize=10)
267
+ def get_pretrained_model(repo_id: str, speed: float) -> sherpa_onnx.OfflineTts:
268
+ if repo_id in chinese_models:
269
+ return chinese_models[repo_id](repo_id, speed)
270
+ if repo_id in cantonese_models:
271
+ return cantonese_models[repo_id](repo_id, speed)
272
+ elif repo_id in english_models:
273
+ return english_models[repo_id](repo_id, speed)
274
+ elif repo_id in german_models:
275
+ return german_models[repo_id](repo_id, speed)
276
+ elif repo_id in spanish_models:
277
+ return spanish_models[repo_id](repo_id, speed)
278
+ elif repo_id in french_models:
279
+ return french_models[repo_id](repo_id, speed)
280
+ elif repo_id in ukrainian_models:
281
+ return ukrainian_models[repo_id](repo_id, speed)
282
+ elif repo_id in russian_models:
283
+ return russian_models[repo_id](repo_id, speed)
284
+ elif repo_id in arabic_models:
285
+ return arabic_models[repo_id](repo_id, speed)
286
+ elif repo_id in catalan_models:
287
+ return catalan_models[repo_id](repo_id, speed)
288
+ elif repo_id in czech_models:
289
+ return czech_models[repo_id](repo_id, speed)
290
+ elif repo_id in danish_models:
291
+ return danish_models[repo_id](repo_id, speed)
292
+ elif repo_id in greek_models:
293
+ return greek_models[repo_id](repo_id, speed)
294
+ elif repo_id in finnish_models:
295
+ return finnish_models[repo_id](repo_id, speed)
296
+ elif repo_id in hungarian_models:
297
+ return hungarian_models[repo_id](repo_id, speed)
298
+ elif repo_id in icelandic_models:
299
+ return icelandic_models[repo_id](repo_id, speed)
300
+ elif repo_id in italian_models:
301
+ return italian_models[repo_id](repo_id, speed)
302
+ elif repo_id in georgian_models:
303
+ return georgian_models[repo_id](repo_id, speed)
304
+ elif repo_id in kazakh_models:
305
+ return kazakh_models[repo_id](repo_id, speed)
306
+ elif repo_id in luxembourgish_models:
307
+ return luxembourgish_models[repo_id](repo_id, speed)
308
+ elif repo_id in nepali_models:
309
+ return nepali_models[repo_id](repo_id, speed)
310
+ elif repo_id in dutch_models:
311
+ return dutch_models[repo_id](repo_id, speed)
312
+ elif repo_id in norwegian_models:
313
+ return norwegian_models[repo_id](repo_id, speed)
314
+ elif repo_id in polish_models:
315
+ return polish_models[repo_id](repo_id, speed)
316
+ elif repo_id in portuguese_models:
317
+ return portuguese_models[repo_id](repo_id, speed)
318
+ elif repo_id in romanian_models:
319
+ return romanian_models[repo_id](repo_id, speed)
320
+ elif repo_id in slovak_models:
321
+ return slovak_models[repo_id](repo_id, speed)
322
+ elif repo_id in serbian_models:
323
+ return serbian_models[repo_id](repo_id, speed)
324
+ elif repo_id in swedish_models:
325
+ return swedish_models[repo_id](repo_id, speed)
326
+ elif repo_id in swahili_models:
327
+ return swahili_models[repo_id](repo_id, speed)
328
+ elif repo_id in turkish_models:
329
+ return turkish_models[repo_id](repo_id, speed)
330
+ elif repo_id in vietnamese_models:
331
+ return vietnamese_models[repo_id](repo_id, speed)
332
+ elif repo_id in bulgarian_models:
333
+ return bulgarian_models[repo_id](repo_id, speed)
334
+ elif repo_id in estonian_models:
335
+ return estonian_models[repo_id](repo_id, speed)
336
+ elif repo_id in irish_models:
337
+ return irish_models[repo_id](repo_id, speed)
338
+ elif repo_id in croatian_models:
339
+ return croatian_models[repo_id](repo_id, speed)
340
+ elif repo_id in lithuanian_models:
341
+ return lithuanian_models[repo_id](repo_id, speed)
342
+ elif repo_id in latvian_models:
343
+ return latvian_models[repo_id](repo_id, speed)
344
+ elif repo_id in maltese_models:
345
+ return maltese_models[repo_id](repo_id, speed)
346
+ elif repo_id in slovenian_models:
347
+ return slovenian_models[repo_id](repo_id, speed)
348
+ elif repo_id in bengali_models:
349
+ return bengali_models[repo_id](repo_id, speed)
350
+ elif repo_id in min_nan_models:
351
+ return min_nan_models[repo_id](repo_id, speed)
352
+ elif repo_id in thai_models:
353
+ return thai_models[repo_id](repo_id, speed)
354
+ elif repo_id in persian_models:
355
+ return persian_models[repo_id](repo_id, speed)
356
+ elif repo_id in korean_models:
357
+ return korean_models[repo_id](repo_id, speed)
358
+ elif repo_id in afrikaans_models:
359
+ return afrikaans_models[repo_id](repo_id, speed)
360
+ elif repo_id in gujarati_models:
361
+ return gujarati_models[repo_id](repo_id, speed)
362
+ elif repo_id in tswana_models:
363
+ return tswana_models[repo_id](repo_id, speed)
364
+ else:
365
+ raise ValueError(f"Unsupported repo_id: {repo_id}")
366
+
367
+
368
+ cantonese_models = {
369
+ "csukuangfj/vits-cantonese-hf-xiaomaiiwn": _get_vits_hf,
370
+ }
371
+
372
+ chinese_models = {
373
+ "csukuangfj/vits-piper-zh_CN-huayan-medium": _get_vits_piper,
374
+ "csukuangfj/vits-zh-hf-theresa": _get_vits_hf,
375
+ "csukuangfj/vits-zh-hf-eula": _get_vits_hf,
376
+ "csukuangfj/vits-zh-hf-echo": _get_vits_hf,
377
+ "csukuangfj/vits-zh-hf-bronya": _get_vits_hf,
378
+ "csukuangfj/vits-zh-aishell3": _get_vits_zh_aishell3,
379
+ "csukuangfj/vits-zh-hf-fanchen-wnj": _get_vits_hf,
380
+ "csukuangfj/vits-zh-hf-fanchen-C": _get_vits_hf,
381
+ "csukuangfj/vits-zh-hf-fanchen-ZhiHuiLaoZhe": _get_vits_hf,
382
+ "csukuangfj/vits-zh-hf-fanchen-ZhiHuiLaoZhe_new": _get_vits_hf,
383
+ "csukuangfj/vits-zh-hf-fanchen-unity": _get_vits_hf,
384
+ "csukuangfj/vits-zh-hf-doom": _get_vits_hf,
385
+ "csukuangfj/vits-zh-hf-zenyatta": _get_vits_hf, # 804
386
+ "csukuangfj/vits-zh-hf-abyssinvoker": _get_vits_hf,
387
+ "csukuangfj/vits-zh-hf-keqing": _get_vits_hf,
388
+ # "csukuangfj/vits-piper-zh_CN-huayan-x_low": _get_vits_piper,
389
+ }
390
+
391
+ english_models = {
392
+ "csukuangfj/vits-piper-en_US-glados": _get_vits_piper,
393
+ # coqui-ai
394
+ "csukuangfj/vits-coqui-en-ljspeech": _get_vits_piper,
395
+ "csukuangfj/vits-coqui-en-ljspeech-neon": _get_vits_piper,
396
+ "csukuangfj/vits-coqui-en-vctk": _get_vits_piper,
397
+ # piper, US
398
+ "csukuangfj/vits-piper-en_GB-sweetbbak-amy": _get_vits_piper,
399
+ "csukuangfj/vits-piper-en_US-amy-low": _get_vits_piper,
400
+ "csukuangfj/vits-piper-en_US-amy-medium": _get_vits_piper,
401
+ "csukuangfj/vits-piper-en_US-arctic-medium": _get_vits_piper, # 18 speakers
402
+ "csukuangfj/vits-piper-en_US-danny-low": _get_vits_piper,
403
+ "csukuangfj/vits-piper-en_US-hfc_male-medium": _get_vits_piper,
404
+ "csukuangfj/vits-piper-en_US-joe-medium": _get_vits_piper,
405
+ "csukuangfj/vits-piper-en_US-kathleen-low": _get_vits_piper,
406
+ "csukuangfj/vits-piper-en_US-kusal-medium": _get_vits_piper,
407
+ "csukuangfj/vits-piper-en_US-l2arctic-medium": _get_vits_piper, # 24 speakers
408
+ "csukuangfj/vits-piper-en_US-lessac-low": _get_vits_piper,
409
+ "csukuangfj/vits-piper-en_US-lessac-medium": _get_vits_piper,
410
+ "csukuangfj/vits-piper-en_US-lessac-high": _get_vits_piper,
411
+ "csukuangfj/vits-piper-en_US-libritts-high": _get_vits_piper, # 904 speakers
412
+ "csukuangfj/vits-piper-en_US-libritts_r-medium": _get_vits_piper, # 904 speakers
413
+ "csukuangfj/vits-piper-en_US-ryan-low": _get_vits_piper,
414
+ "csukuangfj/vits-piper-en_US-ryan-medium": _get_vits_piper,
415
+ "csukuangfj/vits-piper-en_US-ryan-high": _get_vits_piper,
416
+ # piper, GB
417
+ "csukuangfj/vits-piper-en_GB-alan-low": _get_vits_piper,
418
+ "csukuangfj/vits-piper-en_GB-alan-medium": _get_vits_piper,
419
+ "csukuangfj/vits-piper-en_GB-alba-medium": _get_vits_piper,
420
+ "csukuangfj/vits-piper-en_GB-jenny_dioco-medium": _get_vits_piper,
421
+ "csukuangfj/vits-piper-en_GB-northern_english_male-medium": _get_vits_piper,
422
+ "csukuangfj/vits-piper-en_GB-semaine-medium": _get_vits_piper,
423
+ "csukuangfj/vits-piper-en_GB-southern_english_female-low": _get_vits_piper,
424
+ "csukuangfj/vits-piper-en_GB-vctk-medium": _get_vits_piper,
425
+ #
426
+ "csukuangfj/vits-vctk": _get_vits_vctk, # 109 speakers
427
+ "csukuangfj/vits-ljs": _get_vits_ljs,
428
+ }
429
+
430
+ german_models = {
431
+ "csukuangfj/vits-coqui-de-css10": _get_vits_piper,
432
+ "csukuangfj/vits-piper-de_DE-eva_k-x_low": _get_vits_piper,
433
+ "csukuangfj/vits-piper-de_DE-karlsson-low": _get_vits_piper,
434
+ "csukuangfj/vits-piper-de_DE-kerstin-low": _get_vits_piper,
435
+ "csukuangfj/vits-piper-de_DE-pavoque-low": _get_vits_piper,
436
+ "csukuangfj/vits-piper-de_DE-ramona-low": _get_vits_piper,
437
+ "csukuangfj/vits-piper-de_DE-thorsten-low": _get_vits_piper,
438
+ "csukuangfj/vits-piper-de_DE-thorsten-medium": _get_vits_piper,
439
+ "csukuangfj/vits-piper-de_DE-thorsten-high": _get_vits_piper,
440
+ "csukuangfj/vits-piper-de_DE-thorsten_emotional-medium": _get_vits_piper, # 8 speakers
441
+ }
442
+
443
+ spanish_models = {
444
+ "csukuangfj/vits-coqui-es-css10": _get_vits_piper,
445
+ "csukuangfj/vits-piper-es-glados-medium": _get_vits_piper,
446
+ "csukuangfj/vits-piper-es_ES-carlfm-x_low": _get_vits_piper,
447
+ "csukuangfj/vits-piper-es_ES-davefx-medium": _get_vits_piper,
448
+ # "csukuangfj/vits-piper-es_ES-mls_10246-low": _get_vits_piper,
449
+ # "csukuangfj/vits-piper-es_ES-mls_9972-low": _get_vits_piper,
450
+ "csukuangfj/vits-piper-es_ES-sharvard-medium": _get_vits_piper, # 2 speakers
451
+ "csukuangfj/vits-piper-es_MX-ald-medium": _get_vits_piper,
452
+ "csukuangfj/vits-mimic3-es_ES-m-ailabs_low": _get_vits_piper,
453
+ }
454
+
455
+ french_models = {
456
+ "csukuangfj/vits-coqui-fr-css10": _get_vits_piper,
457
+ # "csukuangfj/vits-piper-fr_FR-gilles-low": _get_vits_piper,
458
+ # "csukuangfj/vits-piper-fr_FR-mls_1840-low": _get_vits_piper,
459
+ "csukuangfj/vits-piper-fr_FR-upmc-medium": _get_vits_piper, # 2 speakers, 0-femal, 1-male
460
+ "csukuangfj/vits-piper-fr_FR-siwis-low": _get_vits_piper, # female
461
+ "csukuangfj/vits-piper-fr_FR-siwis-medium": _get_vits_piper,
462
+ "csukuangfj/vits-piper-fr_FR-tjiho-model1": _get_vits_piper,
463
+ "csukuangfj/vits-piper-fr_FR-tjiho-model2": _get_vits_piper,
464
+ "csukuangfj/vits-piper-fr_FR-tjiho-model3": _get_vits_piper,
465
+ }
466
+
467
+ ukrainian_models = {
468
+ "csukuangfj/vits-piper-uk_UA-lada-x_low": _get_vits_piper,
469
+ "csukuangfj/vits-coqui-uk-mai": _get_vits_piper,
470
+ # "csukuangfj/vits-piper-uk_UA-ukrainian_tts-medium": _get_vits_piper, # does not work somehow
471
+ }
472
+
473
+ russian_models = {
474
+ "csukuangfj/vits-piper-ru_RU-denis-medium": _get_vits_piper,
475
+ "csukuangfj/vits-piper-ru_RU-dmitri-medium": _get_vits_piper,
476
+ "csukuangfj/vits-piper-ru_RU-irina-medium": _get_vits_piper,
477
+ "csukuangfj/vits-piper-ru_RU-ruslan-medium": _get_vits_piper,
478
+ }
479
+
480
+ arabic_models = {
481
+ "csukuangfj/vits-piper-ar_JO-kareem-low": _get_vits_piper,
482
+ "csukuangfj/vits-piper-ar_JO-kareem-medium": _get_vits_piper,
483
+ }
484
+
485
+ catalan_models = {
486
+ "csukuangfj/vits-piper-ca_ES-upc_ona-x_low": _get_vits_piper,
487
+ "csukuangfj/vits-piper-ca_ES-upc_ona-medium": _get_vits_piper,
488
+ "csukuangfj/vits-piper-ca_ES-upc_pau-x_low": _get_vits_piper,
489
+ }
490
+
491
+ czech_models = {
492
+ "csukuangfj/vits-piper-cs_CZ-jirka-low": _get_vits_piper,
493
+ "csukuangfj/vits-piper-cs_CZ-jirka-medium": _get_vits_piper,
494
+ "csukuangfj/vits-coqui-cs-cv": _get_vits_piper,
495
+ }
496
+
497
+ danish_models = {
498
+ "csukuangfj/vits-coqui-da-cv": _get_vits_piper,
499
+ "csukuangfj/vits-piper-da_DK-talesyntese-medium": _get_vits_piper,
500
+ }
501
+
502
+ greek_models = {
503
+ "csukuangfj/vits-piper-el_GR-rapunzelina-low": _get_vits_piper,
504
+ # "csukuangfj/vits-mimic3-el_GR-rapunzelina_low": _get_vits_piper,
505
+ }
506
+
507
+ finnish_models = {
508
+ "csukuangfj/vits-coqui-fi-css10": _get_vits_piper,
509
+ "csukuangfj/vits-piper-fi_FI-harri-low": _get_vits_piper,
510
+ "csukuangfj/vits-piper-fi_FI-harri-medium": _get_vits_piper,
511
+ "csukuangfj/vits-mimic3-fi_FI-harri-tapani-ylilammi_low": _get_vits_piper,
512
+ }
513
+
514
+ hungarian_models = {
515
+ # "csukuangfj/vits-coqui-hu-css10": _get_vits_piper,
516
+ "csukuangfj/vits-piper-hu_HU-anna-medium": _get_vits_piper,
517
+ "csukuangfj/vits-piper-hu_HU-berta-medium": _get_vits_piper,
518
+ "csukuangfj/vits-piper-hu_HU-imre-medium": _get_vits_piper,
519
+ "csukuangfj/vits-mimic3-hu_HU-diana-majlinger_low": _get_vits_piper,
520
+ }
521
+
522
+ icelandic_models = {
523
+ "csukuangfj/vits-piper-is_IS-bui-medium": _get_vits_piper,
524
+ "csukuangfj/vits-piper-is_IS-salka-medium": _get_vits_piper,
525
+ "csukuangfj/vits-piper-is_IS-steinn-medium": _get_vits_piper,
526
+ "csukuangfj/vits-piper-is_IS-ugla-medium": _get_vits_piper,
527
+ }
528
+
529
+ italian_models = {
530
+ "csukuangfj/vits-piper-it_IT-riccardo-x_low": _get_vits_piper,
531
+ }
532
+
533
+ georgian_models = {
534
+ "csukuangfj/vits-piper-ka_GE-natia-medium": _get_vits_piper,
535
+ }
536
+
537
+ kazakh_models = {
538
+ "csukuangfj/vits-piper-kk_KZ-iseke-x_low": _get_vits_piper,
539
+ "csukuangfj/vits-piper-kk_KZ-issai-high": _get_vits_piper,
540
+ "csukuangfj/vits-piper-kk_KZ-raya-x_low": _get_vits_piper,
541
+ }
542
+
543
+ luxembourgish_models = {
544
+ "csukuangfj/vits-piper-lb_LU-marylux-medium": _get_vits_piper,
545
+ }
546
+
547
+ nepali_models = {
548
+ "csukuangfj/vits-piper-ne_NP-google-medium": _get_vits_piper,
549
+ "csukuangfj/vits-piper-ne_NP-google-x_low": _get_vits_piper,
550
+ "csukuangfj/vits-mimic3-ne_NP-ne-google_low": _get_vits_piper,
551
+ }
552
+
553
+ dutch_models = {
554
+ "csukuangfj/vits-coqui-nl-css10": _get_vits_piper,
555
+ "csukuangfj/vits-piper-nl_BE-nathalie-medium": _get_vits_piper,
556
+ "csukuangfj/vits-piper-nl_BE-nathalie-x_low": _get_vits_piper,
557
+ "csukuangfj/vits-piper-nl_BE-rdh-medium": _get_vits_piper,
558
+ "csukuangfj/vits-piper-nl_BE-rdh-x_low": _get_vits_piper,
559
+ "csukuangfj/vits-piper-nl_NL-mls_5809-low": _get_vits_piper,
560
+ "csukuangfj/vits-piper-nl_NL-mls_7432-low": _get_vits_piper,
561
+ }
562
+
563
+ norwegian_models = {
564
+ "csukuangfj/vits-piper-no_NO-talesyntese-medium": _get_vits_piper,
565
+ }
566
+
567
+ polish_models = {
568
+ "csukuangfj/vits-coqui-pl-mai_female": _get_vits_piper,
569
+ "csukuangfj/vits-piper-pl_PL-darkman-medium": _get_vits_piper,
570
+ "csukuangfj/vits-piper-pl_PL-gosia-medium": _get_vits_piper,
571
+ "csukuangfj/vits-piper-pl_PL-mc_speech-medium": _get_vits_piper,
572
+ # "csukuangfj/vits-piper-pl_PL-mls_6892-low": _get_vits_piper,
573
+ "csukuangfj/vits-mimic3-pl_PL-m-ailabs_low": _get_vits_piper,
574
+ }
575
+
576
+ portuguese_models = {
577
+ "csukuangfj/vits-coqui-pt-cv": _get_vits_piper,
578
+ "csukuangfj/vits-piper-pt_BR-edresson-low": _get_vits_piper,
579
+ "csukuangfj/vits-piper-pt_BR-faber-medium": _get_vits_piper,
580
+ "csukuangfj/vits-piper-pt_PT-tugao-medium": _get_vits_piper,
581
+ }
582
+
583
+ romanian_models = {
584
+ "csukuangfj/vits-coqui-ro-cv": _get_vits_piper,
585
+ "csukuangfj/vits-piper-ro_RO-mihai-medium": _get_vits_piper,
586
+ }
587
+
588
+
589
+ slovak_models = {
590
+ "csukuangfj/vits-coqui-sk-cv": _get_vits_piper,
591
+ "csukuangfj/vits-piper-sk_SK-lili-medium": _get_vits_piper,
592
+ }
593
+
594
+ serbian_models = {
595
+ "csukuangfj/vits-piper-sr_RS-serbski_institut-medium": _get_vits_piper,
596
+ }
597
+
598
+ swedish_models = {
599
+ "csukuangfj/vits-coqui-sv-cv": _get_vits_piper,
600
+ "csukuangfj/vits-piper-sv_SE-nst-medium": _get_vits_piper,
601
+ }
602
+
603
+ swahili_models = {
604
+ "csukuangfj/vits-piper-sw_CD-lanfrica-medium": _get_vits_piper,
605
+ }
606
+
607
+ turkish_models = {
608
+ "csukuangfj/vits-piper-tr_TR-dfki-medium": _get_vits_piper,
609
+ "csukuangfj/vits-piper-tr_TR-fahrettin-medium": _get_vits_piper,
610
+ }
611
+
612
+ vietnamese_models = {
613
+ "csukuangfj/vits-piper-vi_VN-25hours_single-low": _get_vits_piper,
614
+ "csukuangfj/vits-piper-vi_VN-vais1000-medium": _get_vits_piper,
615
+ "csukuangfj/vits-piper-vi_VN-vivos-x_low": _get_vits_piper,
616
+ "csukuangfj/vits-mimic3-vi_VN-vais1000_low": _get_vits_piper,
617
+ }
618
+
619
+ bulgarian_models = {
620
+ "csukuangfj/vits-coqui-bg-cv": _get_vits_piper,
621
+ }
622
+
623
+ estonian_models = {
624
+ "csukuangfj/vits-coqui-et-cv": _get_vits_piper,
625
+ }
626
+
627
+ irish_models = {
628
+ "csukuangfj/vits-coqui-ga-cv": _get_vits_piper,
629
+ }
630
+
631
+ croatian_models = {
632
+ "csukuangfj/vits-coqui-hr-cv": _get_vits_piper,
633
+ }
634
+
635
+ lithuanian_models = {
636
+ "csukuangfj/vits-coqui-lt-cv": _get_vits_piper,
637
+ }
638
+
639
+ latvian_models = {
640
+ "csukuangfj/vits-coqui-lv-cv": _get_vits_piper,
641
+ }
642
+
643
+ maltese_models = {
644
+ "csukuangfj/vits-coqui-mt-cv": _get_vits_piper,
645
+ }
646
+
647
+ slovenian_models = {
648
+ "csukuangfj/vits-piper-sl_SI-artur-medium": _get_vits_piper,
649
+ "csukuangfj/vits-coqui-sl-cv": _get_vits_piper,
650
+ }
651
+
652
+ # Bangla
653
+ bengali_models = {
654
+ "csukuangfj/vits-coqui-bn-custom_female": _get_vits_piper,
655
+ "csukuangfj/vits-mimic3-bn-multi_low": _get_vits_piper,
656
+ }
657
+
658
+ min_nan_models = {
659
+ "csukuangfj/vits-mms-nan": _get_vits_mms,
660
+ }
661
+
662
+ thai_models = {
663
+ "csukuangfj/vits-mms-tha": _get_vits_mms,
664
+ }
665
+
666
+ persian_models = {
667
+ "csukuangfj/vits-piper-fa_IR-amir-medium": _get_vits_piper,
668
+ "csukuangfj/vits-piper-fa_IR-gyro-medium": _get_vits_piper,
669
+ "csukuangfj/vits-mimic3-fa-haaniye_low": _get_vits_piper,
670
+ }
671
+
672
+ korean_models = {
673
+ "csukuangfj/vits-mimic3-ko_KO-kss_low": _get_vits_piper,
674
+ }
675
+
676
+
677
+ afrikaans_models = {
678
+ "csukuangfj/vits-mimic3-af_ZA-google-nwu_low": _get_vits_piper,
679
+ }
680
+
681
+ gujarati_models = {
682
+ "csukuangfj/vits-mimic3-gu_IN-cmu-indic_low": _get_vits_piper,
683
+ }
684
+
685
+ tswana_models = {
686
+ "csukuangfj/vits-mimic3-tn_ZA-google-nwu_low": _get_vits_piper,
687
+ }
688
+
689
+
690
+ language_to_models = {
691
+ "English": list(english_models.keys()),
692
+ "Chinese (Mandarin, 普通话)": list(chinese_models.keys()),
693
+ "Cantonese (粤语)": list(cantonese_models.keys()),
694
+ "Min-nan (闽南话)": list(min_nan_models.keys()),
695
+ "Arabic": list(arabic_models.keys()),
696
+ "Afrikaans": list(afrikaans_models.keys()),
697
+ "Bengali": list(bengali_models.keys()),
698
+ "Bulgarian": list(bulgarian_models.keys()),
699
+ "Catalan": list(catalan_models.keys()),
700
+ "Croatian": list(croatian_models.keys()),
701
+ "Czech": list(czech_models.keys()),
702
+ "Danish": list(danish_models.keys()),
703
+ "Dutch": list(dutch_models.keys()),
704
+ "Estonian": list(estonian_models.keys()),
705
+ "Finnish": list(finnish_models.keys()),
706
+ "French": list(french_models.keys()),
707
+ "Georgian": list(georgian_models.keys()),
708
+ "German": list(german_models.keys()),
709
+ "Greek": list(greek_models.keys()),
710
+ "Gujarati": list(gujarati_models.keys()),
711
+ "Hungarian": list(hungarian_models.keys()),
712
+ "Icelandic": list(icelandic_models.keys()),
713
+ "Irish": list(irish_models.keys()),
714
+ "Italian": list(italian_models.keys()),
715
+ "Kazakh": list(kazakh_models.keys()),
716
+ "Korean": list(korean_models.keys()),
717
+ "Latvian": list(latvian_models.keys()),
718
+ "Lithuanian": list(lithuanian_models.keys()),
719
+ "Luxembourgish": list(luxembourgish_models.keys()),
720
+ "Maltese": list(maltese_models.keys()),
721
+ "Nepali": list(nepali_models.keys()),
722
+ "Norwegian": list(norwegian_models.keys()),
723
+ "Persian": list(persian_models.keys()),
724
+ "Polish": list(polish_models.keys()),
725
+ "Portuguese": list(portuguese_models.keys()),
726
+ "Romanian": list(romanian_models.keys()),
727
+ "Russian": list(russian_models.keys()),
728
+ "Serbian": list(serbian_models.keys()),
729
+ "Slovak": list(slovak_models.keys()),
730
+ "Slovenian": list(slovenian_models.keys()),
731
+ "Spanish": list(spanish_models.keys()),
732
+ "Swahili": list(swahili_models.keys()),
733
+ "Swedish": list(swedish_models.keys()),
734
+ "Thai": list(thai_models.keys()),
735
+ "Tswana": list(tswana_models.keys()),
736
+ "Turkish": list(turkish_models.keys()),
737
+ "Ukrainian": list(ukrainian_models.keys()),
738
+ "Vietnamese": list(vietnamese_models.keys()),
739
+ }
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ #https://huggingface.co/csukuangfj/wheels/resolve/main/sherpa_onnx-1.8.11-cp38-cp38-linux_x86_64.whl
2
+ sherpa-onnx
3
+
4
+ soundfile