barghavani commited on
Commit
2098a12
·
1 Parent(s): c07d473

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -252
app.py CHANGED
@@ -1,271 +1,134 @@
1
- import sys
2
- import os
3
- # By using XTTS you agree to CPML license https://coqui.ai/cpml
4
- os.environ["COQUI_TOS_AGREED"] = "1"
 
 
 
 
 
5
 
6
- import gradio as gr
7
- from TTS.api import TTS
8
 
9
- model_names = TTS().list_models()
10
- m = model_names[0]
11
- print(model_names)
12
- tts = TTS(m, gpu=False)
13
- tts.to("cpu") # no GPU or Amd
14
- #tts.to("cuda") # cuda only
15
 
16
- def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree):
17
- if agree == True:
18
- if language == "fa":
19
- tts_init("saillab/xtts_v2_fa_revision1")
20
- else:
21
- tts_init(m)
22
- if use_mic == True:
23
- if mic_file_path is not None:
24
- speaker_wav=mic_file_path
25
- else:
26
- gr.Warning("Please record your voice with Microphone, or uncheck Use Microphone to use reference audios")
27
- return (
28
- None,
29
- None,
30
- )
31
-
32
- else:
33
- speaker_wav=audio_file_pth
34
 
35
- if len(prompt)<2:
36
- gr.Warning("Please give a longer prompt text")
37
- return (
38
- None,
39
- None,
40
- )
41
- if len(prompt)>10000:
42
- gr.Warning("Text length limited to 10000 characters for this demo, please try shorter text")
43
- return (
44
- None,
45
- None,
46
- )
47
- try:
48
- if language == "fr":
49
- if m.find("your") != -1:
50
- language = "fr-fr"
51
- if m.find("/fr/") != -1:
52
- language = None
53
- tts.tts_to_file(
54
- text=prompt,
55
- file_path="output.wav",
56
- speaker_wav=speaker_wav,
57
- language=language
58
- )
59
- except RuntimeError as e :
60
- if "device-assert" in str(e):
61
- # cannot do anything on cuda device side error, need tor estart
62
- gr.Warning("Unhandled Exception encounter, please retry in a minute")
63
- print("Cuda device-assert Runtime encountered need restart")
64
- sys.exit("Exit due to cuda device-assert")
65
- else:
66
- raise e
67
-
68
- return (
69
- gr.make_waveform(
70
- audio="output.wav",
71
- ),
72
- "output.wav",
73
- )
74
- else:
75
- gr.Warning("Please accept the Terms & Condition!")
76
- return (
77
- None,
78
- None,
79
- )
80
 
81
 
82
- title = "XTTS Glz's remake (Fonctional Text-2-Speech)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- description = """
85
- <a href="https://huggingface.co/coqui/XTTS-v1">XTTS</a> is a Voice generation model that lets you clone voices into different languages by using just a quick 3-second audio clip.
86
- <br/>
87
- XTTS is built on previous research, like Tortoise, with additional architectural innovations and training to make cross-language voice cloning and multilingual speech generation possible.
88
- <br/>
89
- This is the same model that powers our creator application <a href="https://coqui.ai">Coqui Studio</a> as well as the <a href="https://docs.coqui.ai">Coqui API</a>. In production we apply modifications to make low-latency streaming possible.
90
- <br/>
91
- Leave a star on the Github <a href="https://github.com/coqui-ai/TTS">TTS</a>, where our open-source inference and training code lives.
92
- <br/>
93
- <p>For faster inference without waiting in the queue, you should duplicate this space and upgrade to GPU via the settings.
94
- <br/>
95
- <a href="https://huggingface.co/spaces/coqui/xtts?duplicate=true">
96
- <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
97
- </p>
98
- """
 
 
 
 
99
 
100
- article = """
101
- <div style='margin:20px auto;'>
102
- <p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
103
- </div>
104
- """
105
- examples = [
106
- [
107
- "Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality",
108
- "en",
109
- "examples/female.wav",
110
- None,
111
- False,
112
- True,
113
- ],
114
- [
115
- "اگر بدنبال یادگیری فارسی هستید و در زبان زاد ��ا را دنبال کنید",
116
- "fa",
117
- "examples/female.wav",
118
- None,
119
- False,
120
- True,
121
- ],
122
- [
123
- "Je suis un lycéen français de 17 ans, passioner par la Cyber-Sécuritée et les models d'IA.",
124
- "fr",
125
- "examples/male.wav",
126
- None,
127
- False,
128
- True,
129
- ],
130
- [
131
- "Als ich sechs war, sah ich einmal ein wunderbares Bild",
132
- "de",
133
- "examples/female.wav",
134
- None,
135
- False,
136
- True,
137
- ],
138
- [
139
- "Cuando tenía seis años, vi una vez una imagen magnífica",
140
- "es",
141
- "examples/male.wav",
142
- None,
143
- False,
144
- True,
145
- ],
146
- [
147
- "Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
148
- "pt",
149
- "examples/female.wav",
150
- None,
151
- False,
152
- True,
153
- ],
154
- [
155
- "Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
156
- "pl",
157
- "examples/male.wav",
158
- None,
159
- False,
160
- True,
161
- ],
162
- [
163
- "Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
164
- "it",
165
- "examples/female.wav",
166
- None,
167
- False,
168
- True,
169
- ],
170
- [
171
- "Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
172
- "tr",
173
- "examples/female.wav",
174
- None,
175
- False,
176
- True,
177
- ],
178
- [
179
- "Когда мне было шесть лет, я увидел однажды удивительную картинку",
180
- "ru",
181
- "examples/female.wav",
182
- None,
183
- False,
184
- True,
185
- ],
186
- [
187
- "Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
188
- "nl",
189
- "examples/male.wav",
190
- None,
191
- False,
192
- True,
193
- ],
194
- [
195
- "Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
196
- "cs",
197
- "examples/female.wav",
198
- None,
199
- False,
200
- True,
201
- ],
202
- [
203
- "当我还只有六岁的时候, 看到了一副精彩的插画",
204
- "zh-cn",
205
- "examples/female.wav",
206
- None,
207
- False,
208
- True,
209
- ],
210
  ]
211
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
- gr.Interface(
215
- fn=predict,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  inputs=[
217
  gr.Textbox(
218
- label="Text Prompt",
219
- info="One or two sentences at a time is better",
220
- value="Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality",
221
- ),
222
- gr.Dropdown(
223
- label="Language",
224
- info="Select an output language for the synthesised speech",
225
- choices=[
226
- "en",
227
- "es",
228
- "fr",
229
- "de",
230
- "it",
231
- "pt",
232
- "pl",
233
- "tr",
234
- "ru",
235
- "nl",
236
- "cs",
237
- "ar",
238
- "zh-cn",
239
- "fa",
240
- ],
241
- max_choices=1,
242
- value="en",
243
  ),
244
- gr.Audio(
245
- label="Reference Audio",
246
- info="Click on the ✎ button to upload your own target speaker audio",
247
- type="filepath",
248
- value="examples/female.wav",
249
  ),
250
- gr.Audio(source="microphone",
251
- type="filepath",
252
- info="Use your microphone to record audio",
253
- label="Use Microphone for Reference"),
254
- gr.Checkbox(label="Check to use Microphone as Reference",
255
- value=False,
256
- info="Notice: Microphone input may not work properly under traffic",),
257
- gr.Checkbox(
258
- label="Agree",
259
- value=True,
260
- info="I agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml",
261
- ),
262
- ],
263
- outputs=[
264
- gr.Video(label="Waveform Visual"),
265
- gr.Audio(label="Synthesised Audio"),
266
  ],
267
- title=title,
 
 
268
  description=description,
269
  article=article,
270
- examples=examples,
271
- ).queue().launch(debug=True)
 
 
 
1
+ Hugging Face's logo
2
+ Hugging Face
3
+ Search models, datasets, users...
4
+ Models
5
+ Datasets
6
+ Spaces
7
+ Docs
8
+ Solutions
9
+ Pricing
10
 
 
 
11
 
 
 
 
 
 
 
12
 
13
+ Spaces:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ Kamtera
16
+ /
17
+ Persian-tts-CoquiTTS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
+ like
21
+ 13
22
+ App
23
+ Files
24
+ Community
25
+ Persian-tts-CoquiTTS
26
+ /
27
+ app.py
28
+ Kamtera's picture
29
+ Kamtera
30
+ Upload app.py with huggingface_hub
31
+ b9d7413
32
+ 3 months ago
33
+ raw
34
+ history
35
+ blame
36
+ contribute
37
+ delete
38
+ No virus
39
+ 5.05 kB
40
 
41
+ import tempfile ,os
42
+ from TTS.config import load_config
43
+ import gradio as gr
44
+
45
+ from TTS.utils.manage import ModelManager
46
+ from TTS.utils.synthesizer import Synthesizer
47
+
48
+ MODEL_NAMES=[
49
+ "xtts-farsi",
50
+ ]
51
+ MAX_TXT_LEN = 800
52
+ model_path = os.getcwd() + "/best_model.pth"
53
+ config_path = os.getcwd() + "/config.json"
54
+
55
+
56
+
57
+ from TTS.utils.download import download_url
58
+ modelInfo=[
59
+ ["xtts-farsi","best_model_31680.pth","config.json","https://huggingface.co/saillab/xtts_v2_fa_revision1"],
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  ]
62
 
63
+ for d in modelInfo:
64
+ directory=d[0]
65
+ if not os.path.exists(directory):
66
+ os.makedirs(directory)
67
+ print("|> Downloading: ",directory)
68
+ download_url(
69
+ d[3]+d[1],directory,"best_model.pth"
70
+ )
71
+ download_url(
72
+ d[3]+d[2],directory,"config.json"
73
+ )
74
+ def tts(text: str,model_name: str):
75
+ if len(text) > MAX_TXT_LEN:
76
+ text = text[:MAX_TXT_LEN]
77
+ print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
78
+ print(text)
79
 
80
+
81
+ # synthesize
82
+ synthesizer = Synthesizer(
83
+ model_name+"/best_model.pth", model_name+"/config.json"
84
+ )
85
+ if synthesizer is None:
86
+ raise NameError("model not found")
87
+ wavs = synthesizer.tts(text)
88
+ # return output
89
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
90
+ synthesizer.save_wav(wavs, fp)
91
+ return fp.name
92
 
93
+
94
+ description="""
95
+ This is a demo of persian text to speech model.
96
+ **Github : https://github.com/karim23657/Persian-tts-coqui **
97
+ Models can be found here: <br>
98
+ |Model|Dataset|
99
+ |----|------|
100
+ |[vits female (best)](https://huggingface.co/Kamtera/persian-tts-female-vits)|[persian-tts-dataset-famale](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-famale)|
101
+ |[vits male1 (best)](https://huggingface.co/Kamtera/persian-tts-male1-vits)|[persian-tts-dataset-male](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-male)|
102
+ |[vits female1](https://huggingface.co/Kamtera/persian-tts-female1-vits)|[ParsiGoo](https://github.com/karim23657/ParsiGoo)|
103
+ |[vits male](https://huggingface.co/Kamtera/persian-tts-male-vits)|[persian-tts-dataset](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset)|
104
+ |[glowtts female](https://huggingface.co/Kamtera/persian-tts-female-glow_tts)|[persian-tts-dataset-famale](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-famale)|
105
+ |[glowtts male](https://huggingface.co/Kamtera/persian-tts-male-glow_tts)|[persian-tts-dataset](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset)|
106
+ |[tacotron2 female](https://huggingface.co/Kamtera/persian-tts-female-tacotron2)|[persian-tts-dataset-famale](https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-famale)|
107
+ """
108
+ article= ""
109
+ examples=[
110
+ ["و خداوند شما را با ارسال روح در جسم زندگانی و حیات بخشید","xtts-farsi"],
111
+ ["تاجر تو چه تجارت می کنی ، تو را چه که چه تجارت می کنم؟","xtts-farsi"],
112
+ ]
113
+ iface = gr.Interface(
114
+ fn=tts,
115
  inputs=[
116
  gr.Textbox(
117
+ label="Text",
118
+ value="زندگی فقط یک بار است؛ از آن به خوبی استفاده کن",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  ),
120
+ gr.Radio(
121
+ label="Pick a TTS Model ",
122
+ choices=MODEL_NAMES,
123
+ value="xtts-farsi",
 
124
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  ],
126
+ outputs=gr.Audio(label="Output",type='filepath'),
127
+ examples=examples,
128
+ title="🗣️ Persian tts 🗣️",
129
  description=description,
130
  article=article,
131
+ live=False
132
+ )
133
+ iface.launch(share=False)
134
+