mrfakename
commited on
Commit
•
2f1c8d2
1
Parent(s):
e6f3c81
more steps
Browse files
app.py
CHANGED
@@ -20,14 +20,15 @@ global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_
|
|
20 |
# else:
|
21 |
for v in voicelist:
|
22 |
voices[v] = styletts2importable.compute_style(f'voices/{v}.wav')
|
23 |
-
def synthesize(text, voice):
|
24 |
if text.strip() == "":
|
25 |
raise gr.Error("You must enter some text")
|
26 |
# if len(global_phonemizer.phonemize([text])) > 300:
|
27 |
if len(text) > 300:
|
28 |
raise gr.Error("Text must be under 300 characters")
|
29 |
v = voice.lower()
|
30 |
-
return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
|
|
|
31 |
def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
32 |
if password == os.environ['ACCESS_CODE']:
|
33 |
if text.strip() == "":
|
@@ -44,13 +45,14 @@ def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
|
44 |
return (24000, np.concatenate(audios))
|
45 |
else:
|
46 |
raise gr.Error('Wrong access code')
|
47 |
-
def clsynthesize(text, voice):
|
48 |
if text.strip() == "":
|
49 |
raise gr.Error("You must enter some text")
|
50 |
# if global_phonemizer.phonemize([text]) > 300:
|
51 |
if len(text) > 300:
|
52 |
raise gr.Error("Text must be under 300 characters")
|
53 |
-
return (24000, styletts2importable.inference(text, styletts2importable.compute_style(voice), alpha=0.3, beta=0.7, diffusion_steps=20, embedding_scale=1))
|
|
|
54 |
def ljsynthesize(text):
|
55 |
if text.strip() == "":
|
56 |
raise gr.Error("You must enter some text")
|
@@ -61,31 +63,33 @@ def ljsynthesize(text):
|
|
61 |
return (24000, ljspeechimportable.inference(text, noise, diffusion_steps=7, embedding_scale=1))
|
62 |
|
63 |
|
64 |
-
with gr.Blocks() as vctk:
|
65 |
with gr.Row():
|
66 |
with gr.Column(scale=1):
|
67 |
inp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
|
68 |
voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-1', interactive=True)
|
|
|
69 |
# use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
|
70 |
with gr.Column(scale=1):
|
71 |
btn = gr.Button("Synthesize", variant="primary")
|
72 |
audio = gr.Audio(interactive=False, label="Synthesized Audio")
|
73 |
-
btn.click(synthesize, inputs=[inp, voice], outputs=[audio], concurrency_limit=4)
|
74 |
with gr.Blocks() as clone:
|
75 |
with gr.Row():
|
76 |
with gr.Column(scale=1):
|
77 |
clinp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
|
78 |
clvoice = gr.Audio(label="Voice", interactive=True, type='filepath', max_length=300)
|
|
|
79 |
with gr.Column(scale=1):
|
80 |
clbtn = gr.Button("Synthesize", variant="primary")
|
81 |
claudio = gr.Audio(interactive=False, label="Synthesized Audio")
|
82 |
-
clbtn.click(clsynthesize, inputs=[clinp, clvoice], outputs=[claudio], concurrency_limit=4)
|
83 |
with gr.Blocks() as longText:
|
84 |
with gr.Row():
|
85 |
with gr.Column(scale=1):
|
86 |
lnginp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
|
87 |
lngvoice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-1', interactive=True)
|
88 |
-
lngsteps = gr.Slider(minimum=5, maximum=25, value=10, step=1, label="Diffusion Steps", info="Higher = better quality, but
|
89 |
lngpwd = gr.Textbox(label="Access code", info="This feature is in beta. You need an access code to use it as it uses more resources and we would like to prevent abuse")
|
90 |
with gr.Column(scale=1):
|
91 |
lngbtn = gr.Button("Synthesize", variant="primary")
|
|
|
20 |
# else:
|
21 |
for v in voicelist:
|
22 |
voices[v] = styletts2importable.compute_style(f'voices/{v}.wav')
|
23 |
+
def synthesize(text, voice, multispeakersteps):
|
24 |
if text.strip() == "":
|
25 |
raise gr.Error("You must enter some text")
|
26 |
# if len(global_phonemizer.phonemize([text])) > 300:
|
27 |
if len(text) > 300:
|
28 |
raise gr.Error("Text must be under 300 characters")
|
29 |
v = voice.lower()
|
30 |
+
# return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
|
31 |
+
return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
|
32 |
def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
33 |
if password == os.environ['ACCESS_CODE']:
|
34 |
if text.strip() == "":
|
|
|
45 |
return (24000, np.concatenate(audios))
|
46 |
else:
|
47 |
raise gr.Error('Wrong access code')
|
48 |
+
def clsynthesize(text, voice, vcsteps):
|
49 |
if text.strip() == "":
|
50 |
raise gr.Error("You must enter some text")
|
51 |
# if global_phonemizer.phonemize([text]) > 300:
|
52 |
if len(text) > 300:
|
53 |
raise gr.Error("Text must be under 300 characters")
|
54 |
+
# return (24000, styletts2importable.inference(text, styletts2importable.compute_style(voice), alpha=0.3, beta=0.7, diffusion_steps=20, embedding_scale=1))
|
55 |
+
return (24000, styletts2importable.inference(text, styletts2importable.compute_style(voice), alpha=0.3, beta=0.7, diffusion_steps=vcsteps, embedding_scale=1))
|
56 |
def ljsynthesize(text):
|
57 |
if text.strip() == "":
|
58 |
raise gr.Error("You must enter some text")
|
|
|
63 |
return (24000, ljspeechimportable.inference(text, noise, diffusion_steps=7, embedding_scale=1))
|
64 |
|
65 |
|
66 |
+
with gr.Blocks() as vctk: # just realized it isn't vctk but libritts but i'm too lazy to change it rn
|
67 |
with gr.Row():
|
68 |
with gr.Column(scale=1):
|
69 |
inp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
|
70 |
voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-1', interactive=True)
|
71 |
+
multispeakersteps = gr.Slider(minimum=5, maximum=15, value=7, step=1, label="Diffusion Steps", info="Higher = better quality, but slower", interactive=True)
|
72 |
# use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
|
73 |
with gr.Column(scale=1):
|
74 |
btn = gr.Button("Synthesize", variant="primary")
|
75 |
audio = gr.Audio(interactive=False, label="Synthesized Audio")
|
76 |
+
btn.click(synthesize, inputs=[inp, voice, multispeakersteps], outputs=[audio], concurrency_limit=4)
|
77 |
with gr.Blocks() as clone:
|
78 |
with gr.Row():
|
79 |
with gr.Column(scale=1):
|
80 |
clinp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
|
81 |
clvoice = gr.Audio(label="Voice", interactive=True, type='filepath', max_length=300)
|
82 |
+
vcsteps = gr.Slider(minimum=5, maximum=20, value=20, step=1, label="Diffusion Steps", info="Higher = better quality, but slower", interactive=True)
|
83 |
with gr.Column(scale=1):
|
84 |
clbtn = gr.Button("Synthesize", variant="primary")
|
85 |
claudio = gr.Audio(interactive=False, label="Synthesized Audio")
|
86 |
+
clbtn.click(clsynthesize, inputs=[clinp, clvoice, vcsteps], outputs=[claudio], concurrency_limit=4)
|
87 |
with gr.Blocks() as longText:
|
88 |
with gr.Row():
|
89 |
with gr.Column(scale=1):
|
90 |
lnginp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
|
91 |
lngvoice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-1', interactive=True)
|
92 |
+
lngsteps = gr.Slider(minimum=5, maximum=25, value=10, step=1, label="Diffusion Steps", info="Higher = better quality, but slower", interactive=True)
|
93 |
lngpwd = gr.Textbox(label="Access code", info="This feature is in beta. You need an access code to use it as it uses more resources and we would like to prevent abuse")
|
94 |
with gr.Column(scale=1):
|
95 |
lngbtn = gr.Button("Synthesize", variant="primary")
|