Spaces:
Sleeping
Sleeping
Commit
·
2098a12
1
Parent(s):
c07d473
Update app.py
Browse files
app.py
CHANGED
@@ -1,271 +1,134 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
import gradio as gr
|
7 |
-
from TTS.api import TTS
|
8 |
|
9 |
-
model_names = TTS().list_models()
|
10 |
-
m = model_names[0]
|
11 |
-
print(model_names)
|
12 |
-
tts = TTS(m, gpu=False)
|
13 |
-
tts.to("cpu") # no GPU or Amd
|
14 |
-
#tts.to("cuda") # cuda only
|
15 |
|
16 |
-
|
17 |
-
if agree == True:
|
18 |
-
if language == "fa":
|
19 |
-
tts_init("saillab/xtts_v2_fa_revision1")
|
20 |
-
else:
|
21 |
-
tts_init(m)
|
22 |
-
if use_mic == True:
|
23 |
-
if mic_file_path is not None:
|
24 |
-
speaker_wav=mic_file_path
|
25 |
-
else:
|
26 |
-
gr.Warning("Please record your voice with Microphone, or uncheck Use Microphone to use reference audios")
|
27 |
-
return (
|
28 |
-
None,
|
29 |
-
None,
|
30 |
-
)
|
31 |
-
|
32 |
-
else:
|
33 |
-
speaker_wav=audio_file_pth
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
None,
|
39 |
-
None,
|
40 |
-
)
|
41 |
-
if len(prompt)>10000:
|
42 |
-
gr.Warning("Text length limited to 10000 characters for this demo, please try shorter text")
|
43 |
-
return (
|
44 |
-
None,
|
45 |
-
None,
|
46 |
-
)
|
47 |
-
try:
|
48 |
-
if language == "fr":
|
49 |
-
if m.find("your") != -1:
|
50 |
-
language = "fr-fr"
|
51 |
-
if m.find("/fr/") != -1:
|
52 |
-
language = None
|
53 |
-
tts.tts_to_file(
|
54 |
-
text=prompt,
|
55 |
-
file_path="output.wav",
|
56 |
-
speaker_wav=speaker_wav,
|
57 |
-
language=language
|
58 |
-
)
|
59 |
-
except RuntimeError as e :
|
60 |
-
if "device-assert" in str(e):
|
61 |
-
# cannot do anything on cuda device side error, need tor estart
|
62 |
-
gr.Warning("Unhandled Exception encounter, please retry in a minute")
|
63 |
-
print("Cuda device-assert Runtime encountered need restart")
|
64 |
-
sys.exit("Exit due to cuda device-assert")
|
65 |
-
else:
|
66 |
-
raise e
|
67 |
-
|
68 |
-
return (
|
69 |
-
gr.make_waveform(
|
70 |
-
audio="output.wav",
|
71 |
-
),
|
72 |
-
"output.wav",
|
73 |
-
)
|
74 |
-
else:
|
75 |
-
gr.Warning("Please accept the Terms & Condition!")
|
76 |
-
return (
|
77 |
-
None,
|
78 |
-
None,
|
79 |
-
)
|
80 |
|
81 |
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
|
|
|
|
99 |
|
100 |
-
article = """
|
101 |
-
<div style='margin:20px auto;'>
|
102 |
-
<p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
|
103 |
-
</div>
|
104 |
-
"""
|
105 |
-
examples = [
|
106 |
-
[
|
107 |
-
"Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality",
|
108 |
-
"en",
|
109 |
-
"examples/female.wav",
|
110 |
-
None,
|
111 |
-
False,
|
112 |
-
True,
|
113 |
-
],
|
114 |
-
[
|
115 |
-
"اگر بدنبال یادگیری فارسی هستید و در زبان زاد ��ا را دنبال کنید",
|
116 |
-
"fa",
|
117 |
-
"examples/female.wav",
|
118 |
-
None,
|
119 |
-
False,
|
120 |
-
True,
|
121 |
-
],
|
122 |
-
[
|
123 |
-
"Je suis un lycéen français de 17 ans, passioner par la Cyber-Sécuritée et les models d'IA.",
|
124 |
-
"fr",
|
125 |
-
"examples/male.wav",
|
126 |
-
None,
|
127 |
-
False,
|
128 |
-
True,
|
129 |
-
],
|
130 |
-
[
|
131 |
-
"Als ich sechs war, sah ich einmal ein wunderbares Bild",
|
132 |
-
"de",
|
133 |
-
"examples/female.wav",
|
134 |
-
None,
|
135 |
-
False,
|
136 |
-
True,
|
137 |
-
],
|
138 |
-
[
|
139 |
-
"Cuando tenía seis años, vi una vez una imagen magnífica",
|
140 |
-
"es",
|
141 |
-
"examples/male.wav",
|
142 |
-
None,
|
143 |
-
False,
|
144 |
-
True,
|
145 |
-
],
|
146 |
-
[
|
147 |
-
"Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
|
148 |
-
"pt",
|
149 |
-
"examples/female.wav",
|
150 |
-
None,
|
151 |
-
False,
|
152 |
-
True,
|
153 |
-
],
|
154 |
-
[
|
155 |
-
"Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
|
156 |
-
"pl",
|
157 |
-
"examples/male.wav",
|
158 |
-
None,
|
159 |
-
False,
|
160 |
-
True,
|
161 |
-
],
|
162 |
-
[
|
163 |
-
"Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
|
164 |
-
"it",
|
165 |
-
"examples/female.wav",
|
166 |
-
None,
|
167 |
-
False,
|
168 |
-
True,
|
169 |
-
],
|
170 |
-
[
|
171 |
-
"Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
|
172 |
-
"tr",
|
173 |
-
"examples/female.wav",
|
174 |
-
None,
|
175 |
-
False,
|
176 |
-
True,
|
177 |
-
],
|
178 |
-
[
|
179 |
-
"Когда мне было шесть лет, я увидел однажды удивительную картинку",
|
180 |
-
"ru",
|
181 |
-
"examples/female.wav",
|
182 |
-
None,
|
183 |
-
False,
|
184 |
-
True,
|
185 |
-
],
|
186 |
-
[
|
187 |
-
"Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
|
188 |
-
"nl",
|
189 |
-
"examples/male.wav",
|
190 |
-
None,
|
191 |
-
False,
|
192 |
-
True,
|
193 |
-
],
|
194 |
-
[
|
195 |
-
"Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
|
196 |
-
"cs",
|
197 |
-
"examples/female.wav",
|
198 |
-
None,
|
199 |
-
False,
|
200 |
-
True,
|
201 |
-
],
|
202 |
-
[
|
203 |
-
"当我还只有六岁的时候, 看到了一副精彩的插画",
|
204 |
-
"zh-cn",
|
205 |
-
"examples/female.wav",
|
206 |
-
None,
|
207 |
-
False,
|
208 |
-
True,
|
209 |
-
],
|
210 |
]
|
211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
|
214 |
-
|
215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
inputs=[
|
217 |
gr.Textbox(
|
218 |
-
label="Text
|
219 |
-
|
220 |
-
value="Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality",
|
221 |
-
),
|
222 |
-
gr.Dropdown(
|
223 |
-
label="Language",
|
224 |
-
info="Select an output language for the synthesised speech",
|
225 |
-
choices=[
|
226 |
-
"en",
|
227 |
-
"es",
|
228 |
-
"fr",
|
229 |
-
"de",
|
230 |
-
"it",
|
231 |
-
"pt",
|
232 |
-
"pl",
|
233 |
-
"tr",
|
234 |
-
"ru",
|
235 |
-
"nl",
|
236 |
-
"cs",
|
237 |
-
"ar",
|
238 |
-
"zh-cn",
|
239 |
-
"fa",
|
240 |
-
],
|
241 |
-
max_choices=1,
|
242 |
-
value="en",
|
243 |
),
|
244 |
-
gr.
|
245 |
-
label="
|
246 |
-
|
247 |
-
|
248 |
-
value="examples/female.wav",
|
249 |
),
|
250 |
-
gr.Audio(source="microphone",
|
251 |
-
type="filepath",
|
252 |
-
info="Use your microphone to record audio",
|
253 |
-
label="Use Microphone for Reference"),
|
254 |
-
gr.Checkbox(label="Check to use Microphone as Reference",
|
255 |
-
value=False,
|
256 |
-
info="Notice: Microphone input may not work properly under traffic",),
|
257 |
-
gr.Checkbox(
|
258 |
-
label="Agree",
|
259 |
-
value=True,
|
260 |
-
info="I agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml",
|
261 |
-
),
|
262 |
-
],
|
263 |
-
outputs=[
|
264 |
-
gr.Video(label="Waveform Visual"),
|
265 |
-
gr.Audio(label="Synthesised Audio"),
|
266 |
],
|
267 |
-
|
|
|
|
|
268 |
description=description,
|
269 |
article=article,
|
270 |
-
|
271 |
-
)
|
|
|
|
|
|
1 |
+
Hugging Face's logo
|
2 |
+
Hugging Face
|
3 |
+
Search models, datasets, users...
|
4 |
+
Models
|
5 |
+
Datasets
|
6 |
+
Spaces
|
7 |
+
Docs
|
8 |
+
Solutions
|
9 |
+
Pricing
|
10 |
|
|
|
|
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
Spaces:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
+
Kamtera
|
16 |
+
/
|
17 |
+
Persian-tts-CoquiTTS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
|
20 |
+
like
|
21 |
+
13
|
22 |
+
App
|
23 |
+
Files
|
24 |
+
Community
|
25 |
+
Persian-tts-CoquiTTS
|
26 |
+
/
|
27 |
+
app.py
|
28 |
+
Kamtera's picture
|
29 |
+
Kamtera
|
30 |
+
Upload app.py with huggingface_hub
|
31 |
+
b9d7413
|
32 |
+
3 months ago
|
33 |
+
raw
|
34 |
+
history
|
35 |
+
blame
|
36 |
+
contribute
|
37 |
+
delete
|
38 |
+
No virus
|
39 |
+
5.05 kB
|
40 |
|
41 |
+
import tempfile ,os
|
42 |
+
from TTS.config import load_config
|
43 |
+
import gradio as gr
|
44 |
+
|
45 |
+
from TTS.utils.manage import ModelManager
|
46 |
+
from TTS.utils.synthesizer import Synthesizer
|
47 |
+
|
48 |
+
MODEL_NAMES=[
|
49 |
+
"xtts-farsi",
|
50 |
+
]
|
51 |
+
MAX_TXT_LEN = 800
|
52 |
+
model_path = os.getcwd() + "/best_model.pth"
|
53 |
+
config_path = os.getcwd() + "/config.json"
|
54 |
+
|
55 |
+
|
56 |
+
|
57 |
+
from TTS.utils.download import download_url
|
58 |
+
modelInfo=[
|
59 |
+
["xtts-farsi","best_model_31680.pth","config.json","https://huggingface.co/saillab/xtts_v2_fa_revision1"],
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
]
|
62 |
|
63 |
+
for d in modelInfo:
|
64 |
+
directory=d[0]
|
65 |
+
if not os.path.exists(directory):
|
66 |
+
os.makedirs(directory)
|
67 |
+
print("|> Downloading: ",directory)
|
68 |
+
download_url(
|
69 |
+
d[3]+d[1],directory,"best_model.pth"
|
70 |
+
)
|
71 |
+
download_url(
|
72 |
+
d[3]+d[2],directory,"config.json"
|
73 |
+
)
|
74 |
+
def tts(text: str,model_name: str):
|
75 |
+
if len(text) > MAX_TXT_LEN:
|
76 |
+
text = text[:MAX_TXT_LEN]
|
77 |
+
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
|
78 |
+
print(text)
|
79 |
|
80 |
+
|
81 |
+
# synthesize
|
82 |
+
synthesizer = Synthesizer(
|
83 |
+
model_name+"/best_model.pth", model_name+"/config.json"
|
84 |
+
)
|
85 |
+
if synthesizer is None:
|
86 |
+
raise NameError("model not found")
|
87 |
+
wavs = synthesizer.tts(text)
|
88 |
+
# return output
|
89 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
90 |
+
synthesizer.save_wav(wavs, fp)
|
91 |
+
return fp.name
|
92 |
|
93 |
+
|
94 |
+
description="""
|
95 |
+
This is a demo of persian text to speech model.
|
96 |
+
**Github : https://github.com/karim23657/Persian-tts-coqui **
|
97 |
+
Models can be found here: <br>
|
98 |
+
|Model|Dataset|
|
99 |
+
|----|------|
|
100 |
+
|[vits female (best)](https://huggingface.co/Kamtera/persian-tts-female-vits)|[persian-tts-dataset-famale](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-famale)|
|
101 |
+
|[vits male1 (best)](https://huggingface.co/Kamtera/persian-tts-male1-vits)|[persian-tts-dataset-male](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-male)|
|
102 |
+
|[vits female1](https://huggingface.co/Kamtera/persian-tts-female1-vits)|[ParsiGoo](https://github.com/karim23657/ParsiGoo)|
|
103 |
+
|[vits male](https://huggingface.co/Kamtera/persian-tts-male-vits)|[persian-tts-dataset](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset)|
|
104 |
+
|[glowtts female](https://huggingface.co/Kamtera/persian-tts-female-glow_tts)|[persian-tts-dataset-famale](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-famale)|
|
105 |
+
|[glowtts male](https://huggingface.co/Kamtera/persian-tts-male-glow_tts)|[persian-tts-dataset](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset)|
|
106 |
+
|[tacotron2 female](https://huggingface.co/Kamtera/persian-tts-female-tacotron2)|[persian-tts-dataset-famale](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-famale)|
|
107 |
+
"""
|
108 |
+
article= ""
|
109 |
+
examples=[
|
110 |
+
["و خداوند شما را با ارسال روح در جسم زندگانی و حیات بخشید","xtts-farsi"],
|
111 |
+
["تاجر تو چه تجارت می کنی ، تو را چه که چه تجارت می کنم؟","xtts-farsi"],
|
112 |
+
]
|
113 |
+
iface = gr.Interface(
|
114 |
+
fn=tts,
|
115 |
inputs=[
|
116 |
gr.Textbox(
|
117 |
+
label="Text",
|
118 |
+
value="زندگی فقط یک بار است؛ از آن به خوبی استفاده کن",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
),
|
120 |
+
gr.Radio(
|
121 |
+
label="Pick a TTS Model ",
|
122 |
+
choices=MODEL_NAMES,
|
123 |
+
value="xtts-farsi",
|
|
|
124 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
],
|
126 |
+
outputs=gr.Audio(label="Output",type='filepath'),
|
127 |
+
examples=examples,
|
128 |
+
title="🗣️ Persian tts 🗣️",
|
129 |
description=description,
|
130 |
article=article,
|
131 |
+
live=False
|
132 |
+
)
|
133 |
+
iface.launch(share=False)
|
134 |
+
|