Olivier-Truong
commited on
Commit
•
915ef6e
1
Parent(s):
b7e7e06
Update app.py
Browse files
app.py
CHANGED
@@ -8,28 +8,9 @@ from TTS.api import TTS
|
|
8 |
|
9 |
model_names = TTS().list_models()
|
10 |
m = model_names[0]
|
11 |
-
#for model in model_names:
|
12 |
-
# if model.find("/fr/") != -1:
|
13 |
-
# m = model
|
14 |
-
# break
|
15 |
print(model_names)
|
16 |
-
|
17 |
-
|
18 |
-
print(os.listdir("/home/user/.local/lib/python3.10/site-packages/TTS/utils"))
|
19 |
-
old = open("/home/user/.local/lib/python3.10/site-packages/TTS/utils/io.py", "r").read()
|
20 |
-
new_one = old.replace("return torch.load(f, map_location=map_location, **kwargs)", "return torch.load(f, map_location=torch.device('cpu'), **kwargs)")
|
21 |
-
open("/home/user/.local/lib/python3.10/site-packages/TTS/utils/io.py", "w").write(new_one)
|
22 |
-
|
23 |
-
old = open("/home/user/.local/lib/python3.10/site-packages/TTS/tts/models/xtts.py", "r").read()
|
24 |
-
new_one = old.replace("self.load_state_dict(load_fsspec(model_path)[\"model\"], strict=strict)", "self.load_state_dict(load_fsspec(model_path, ** map_location=self.device ** )[\"model\"], strict=strict)")
|
25 |
-
open("/home/user/.local/lib/python3.10/site-packages/TTS/tts/models/xtts.py", "w").write(new_one)
|
26 |
-
|
27 |
-
try:
|
28 |
-
print(open("/home/user/.local/lib/python3.10/site-packages/TTS/utils/io.py", "r").read())
|
29 |
-
except:
|
30 |
-
print("mauvais fichier")
|
31 |
-
"""
|
32 |
-
tts = TTS(m, gpu=False).to("cpu")
|
33 |
#tts.to("cuda") # cuda only
|
34 |
|
35 |
def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree):
|
@@ -53,7 +34,7 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree):
|
|
53 |
None,
|
54 |
None,
|
55 |
)
|
56 |
-
if len(prompt)>
|
57 |
gr.Warning("Text length limited to 200 characters for this demo, please try shorter text")
|
58 |
return (
|
59 |
None,
|
@@ -71,14 +52,6 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree):
|
|
71 |
speaker_wav=speaker_wav,
|
72 |
language=language
|
73 |
)
|
74 |
-
"""
|
75 |
-
tts.tts_to_file(
|
76 |
-
text=prompt,
|
77 |
-
file_path="output.wav",
|
78 |
-
speaker_wav=speaker_wav,
|
79 |
-
language=language,
|
80 |
-
)
|
81 |
-
"""
|
82 |
except RuntimeError as e :
|
83 |
if "device-assert" in str(e):
|
84 |
# cannot do anything on cuda device side error, need tor estart
|
@@ -102,7 +75,7 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree):
|
|
102 |
)
|
103 |
|
104 |
|
105 |
-
title = "Coqui
|
106 |
|
107 |
description = """
|
108 |
<a href="https://huggingface.co/coqui/XTTS-v1">XTTS</a> is a Voice generation model that lets you clone voices into different languages by using just a quick 3-second audio clip.
|
@@ -232,7 +205,7 @@ gr.Interface(
|
|
232 |
gr.Textbox(
|
233 |
label="Text Prompt",
|
234 |
info="One or two sentences at a time is better",
|
235 |
-
value="
|
236 |
),
|
237 |
gr.Dropdown(
|
238 |
label="Language",
|
@@ -253,7 +226,7 @@ gr.Interface(
|
|
253 |
"zh-cn",
|
254 |
],
|
255 |
max_choices=1,
|
256 |
-
value="
|
257 |
),
|
258 |
gr.Audio(
|
259 |
label="Reference Audio",
|
|
|
8 |
|
9 |
model_names = TTS().list_models()
|
10 |
m = model_names[0]
|
|
|
|
|
|
|
|
|
11 |
print(model_names)
|
12 |
+
tts = TTS(m, gpu=False)
|
13 |
+
tts.to("cpu") # no GPU or Amd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
#tts.to("cuda") # cuda only
|
15 |
|
16 |
def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree):
|
|
|
34 |
None,
|
35 |
None,
|
36 |
)
|
37 |
+
if len(prompt)>10000:
|
38 |
gr.Warning("Text length limited to 200 characters for this demo, please try shorter text")
|
39 |
return (
|
40 |
None,
|
|
|
52 |
speaker_wav=speaker_wav,
|
53 |
language=language
|
54 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
except RuntimeError as e :
|
56 |
if "device-assert" in str(e):
|
57 |
# cannot do anything on cuda device side error, need tor estart
|
|
|
75 |
)
|
76 |
|
77 |
|
78 |
+
title = "Coqui XTTS Glz's remake"
|
79 |
|
80 |
description = """
|
81 |
<a href="https://huggingface.co/coqui/XTTS-v1">XTTS</a> is a Voice generation model that lets you clone voices into different languages by using just a quick 3-second audio clip.
|
|
|
205 |
gr.Textbox(
|
206 |
label="Text Prompt",
|
207 |
info="One or two sentences at a time is better",
|
208 |
+
value="Hello, World !, here is an example of light voice clonaaning. Try your best to upload quality audio",
|
209 |
),
|
210 |
gr.Dropdown(
|
211 |
label="Language",
|
|
|
226 |
"zh-cn",
|
227 |
],
|
228 |
max_choices=1,
|
229 |
+
value="fr",
|
230 |
),
|
231 |
gr.Audio(
|
232 |
label="Reference Audio",
|