Spaces:
Running
Running
mrfakename
commited on
Sync from GitHub repo
Browse filesThis Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there
app.py
CHANGED
@@ -240,23 +240,28 @@ with gr.Blocks() as app_multistyle:
|
|
240 |
|
241 |
# Regular speech type (mandatory)
|
242 |
with gr.Row():
|
243 |
-
|
|
|
|
|
244 |
regular_audio = gr.Audio(label="Regular Reference Audio", type="filepath")
|
245 |
regular_ref_text = gr.Textbox(label="Reference Text (Regular)", lines=2)
|
246 |
|
247 |
# Additional speech types (up to 99 more)
|
248 |
max_speech_types = 100
|
249 |
speech_type_rows = []
|
250 |
-
speech_type_names = []
|
251 |
speech_type_audios = []
|
252 |
speech_type_ref_texts = []
|
253 |
speech_type_delete_btns = []
|
|
|
|
|
254 |
|
255 |
for i in range(max_speech_types - 1):
|
256 |
with gr.Row(visible=False) as row:
|
257 |
with gr.Column():
|
258 |
name_input = gr.Textbox(label="Speech Type Name")
|
259 |
delete_btn = gr.Button("Delete", variant="secondary")
|
|
|
260 |
audio_input = gr.Audio(label="Reference Audio", type="filepath")
|
261 |
ref_text_input = gr.Textbox(label="Reference Text", lines=2)
|
262 |
speech_type_rows.append(row)
|
@@ -264,6 +269,7 @@ with gr.Blocks() as app_multistyle:
|
|
264 |
speech_type_audios.append(audio_input)
|
265 |
speech_type_ref_texts.append(ref_text_input)
|
266 |
speech_type_delete_btns.append(delete_btn)
|
|
|
267 |
|
268 |
# Button to add speech type
|
269 |
add_speech_type_btn = gr.Button("Add Speech Type")
|
@@ -321,6 +327,22 @@ with gr.Blocks() as app_multistyle:
|
|
321 |
placeholder="Enter the script with speaker names (or emotion types) at the start of each block, e.g.:\n\n{Regular} Hello, I'd like to order a sandwich please.\n{Surprised} What do you mean you're out of bread?\n{Sad} I really wanted a sandwich though...\n{Angry} You know what, darn you and your little shop!\n{Whisper} I'll just go back home and cry now.\n{Shouting} Why me?!",
|
322 |
)
|
323 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
324 |
# Model choice
|
325 |
model_choice_multistyle = gr.Radio(choices=["F5-TTS", "E2-TTS"], label="Choose TTS Model", value="F5-TTS")
|
326 |
|
@@ -347,7 +369,7 @@ with gr.Blocks() as app_multistyle:
|
|
347 |
speech_type_names_list = args[:num_additional_speech_types]
|
348 |
speech_type_audios_list = args[num_additional_speech_types : 2 * num_additional_speech_types]
|
349 |
speech_type_ref_texts_list = args[2 * num_additional_speech_types : 3 * num_additional_speech_types]
|
350 |
-
model_choice = args[3 * num_additional_speech_types]
|
351 |
remove_silence = args[3 * num_additional_speech_types + 1]
|
352 |
|
353 |
# Collect the speech types and their audios into a dict
|
|
|
240 |
|
241 |
# Regular speech type (mandatory)
|
242 |
with gr.Row():
|
243 |
+
with gr.Column():
|
244 |
+
regular_name = gr.Textbox(value="Regular", label="Speech Type Name")
|
245 |
+
regular_insert = gr.Button("Insert", variant="secondary")
|
246 |
regular_audio = gr.Audio(label="Regular Reference Audio", type="filepath")
|
247 |
regular_ref_text = gr.Textbox(label="Reference Text (Regular)", lines=2)
|
248 |
|
249 |
# Additional speech types (up to 99 more)
|
250 |
max_speech_types = 100
|
251 |
speech_type_rows = []
|
252 |
+
speech_type_names = [regular_name]
|
253 |
speech_type_audios = []
|
254 |
speech_type_ref_texts = []
|
255 |
speech_type_delete_btns = []
|
256 |
+
speech_type_insert_btns = []
|
257 |
+
speech_type_insert_btns.append(regular_insert)
|
258 |
|
259 |
for i in range(max_speech_types - 1):
|
260 |
with gr.Row(visible=False) as row:
|
261 |
with gr.Column():
|
262 |
name_input = gr.Textbox(label="Speech Type Name")
|
263 |
delete_btn = gr.Button("Delete", variant="secondary")
|
264 |
+
insert_btn = gr.Button("Insert", variant="secondary")
|
265 |
audio_input = gr.Audio(label="Reference Audio", type="filepath")
|
266 |
ref_text_input = gr.Textbox(label="Reference Text", lines=2)
|
267 |
speech_type_rows.append(row)
|
|
|
269 |
speech_type_audios.append(audio_input)
|
270 |
speech_type_ref_texts.append(ref_text_input)
|
271 |
speech_type_delete_btns.append(delete_btn)
|
272 |
+
speech_type_insert_btns.append(insert_btn)
|
273 |
|
274 |
# Button to add speech type
|
275 |
add_speech_type_btn = gr.Button("Add Speech Type")
|
|
|
327 |
placeholder="Enter the script with speaker names (or emotion types) at the start of each block, e.g.:\n\n{Regular} Hello, I'd like to order a sandwich please.\n{Surprised} What do you mean you're out of bread?\n{Sad} I really wanted a sandwich though...\n{Angry} You know what, darn you and your little shop!\n{Whisper} I'll just go back home and cry now.\n{Shouting} Why me?!",
|
328 |
)
|
329 |
|
330 |
+
def make_insert_speech_type_fn(index):
|
331 |
+
def insert_speech_type_fn(current_text, speech_type_name):
|
332 |
+
current_text = current_text or ""
|
333 |
+
speech_type_name = speech_type_name or "None"
|
334 |
+
updated_text = current_text + f"{{{speech_type_name}}} "
|
335 |
+
return gr.update(value=updated_text)
|
336 |
+
return insert_speech_type_fn
|
337 |
+
|
338 |
+
for i, insert_btn in enumerate(speech_type_insert_btns):
|
339 |
+
insert_fn = make_insert_speech_type_fn(i)
|
340 |
+
insert_btn.click(
|
341 |
+
insert_fn,
|
342 |
+
inputs=[gen_text_input_multistyle, speech_type_names[i]],
|
343 |
+
outputs=gen_text_input_multistyle,
|
344 |
+
)
|
345 |
+
|
346 |
# Model choice
|
347 |
model_choice_multistyle = gr.Radio(choices=["F5-TTS", "E2-TTS"], label="Choose TTS Model", value="F5-TTS")
|
348 |
|
|
|
369 |
speech_type_names_list = args[:num_additional_speech_types]
|
370 |
speech_type_audios_list = args[num_additional_speech_types : 2 * num_additional_speech_types]
|
371 |
speech_type_ref_texts_list = args[2 * num_additional_speech_types : 3 * num_additional_speech_types]
|
372 |
+
model_choice = args[3 * num_additional_speech_types + 1]
|
373 |
remove_silence = args[3 * num_additional_speech_types + 1]
|
374 |
|
375 |
# Collect the speech types and their audios into a dict
|