zero-shot-tts

Sleeping

mrfakename commited on 26 days ago

Commit

43fa799

•

1 Parent(s): 7daec1c

Sync from GitHub repo

This Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there

Files changed (2) hide show

app.py CHANGED Viewed

@@ -80,9 +80,9 @@ def generate_response(messages, model, tokenizer):
 @gpu_decorator
 def infer(
-    ref_audio_orig, ref_text, gen_text, model, remove_silence, cross_fade_duration=0.15, speed=1
 ):
-    ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=gr.Info)
     if model == "F5-TTS":
         ema_model = F5TTS_ema_model
@@ -97,6 +97,7 @@ def infer(
         vocoder,
         cross_fade_duration=cross_fade_duration,
         speed=speed,
         progress=gr.Progress(),
     )
@@ -404,7 +405,7 @@ with gr.Blocks() as app_multistyle:
             # Generate speech for this segment
             audio, _ = infer(
-                ref_audio, ref_text, text, model_choice, remove_silence, 0
             )  # show_info=print no pull to top when generating
             sr, audio_data = audio

 @gpu_decorator
 def infer(
+    ref_audio_orig, ref_text, gen_text, model, remove_silence, cross_fade_duration=0.15, speed=1, show_info=gr.Info
 ):
+    ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=show_info)
     if model == "F5-TTS":
         ema_model = F5TTS_ema_model
         vocoder,
         cross_fade_duration=cross_fade_duration,
         speed=speed,
+        show_info=show_info,
         progress=gr.Progress(),
     )
             # Generate speech for this segment
             audio, _ = infer(
+                ref_audio, ref_text, text, model_choice, remove_silence, 0, show_info=print
             )  # show_info=print no pull to top when generating
             sr, audio_data = audio

src/f5_tts/infer/utils_infer.py CHANGED Viewed

@@ -278,7 +278,8 @@ def infer_process(
     gen_text_batches = chunk_text(gen_text, max_chars=max_chars)
     for i, gen_text in enumerate(gen_text_batches):
         print(f"gen_text {i}", gen_text)
     return infer_batch_process(
         (audio, sr),
         ref_text,

     gen_text_batches = chunk_text(gen_text, max_chars=max_chars)
     for i, gen_text in enumerate(gen_text_batches):
         print(f"gen_text {i}", gen_text)
+    show_info(f"Generating audio in {len(gen_text_batches)} batches...")
     return infer_batch_process(
         (audio, sr),
         ref_text,