Multi-language_Text-to-Speech

Running

App Files Files Community

Fabrice-TIERCELIN commited on Nov 16, 2024

Commit

dd59b92

verified ·

1 Parent(s): 1d9aa0b

randomize_seed

Browse files

Files changed (1) hide show

app.py +23 -4

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import re
 import time
 import math
 import torch
 import spaces
 # By using XTTS you agree to CPML license https://coqui.ai/cpml
 os.environ["COQUI_TOS_AGREED"] = "1"
@@ -11,6 +12,8 @@ os.environ["COQUI_TOS_AGREED"] = "1"
 import gradio as gr
 from TTS.api import TTS
 from TTS.utils.manage import ModelManager
 model_names = TTS().list_models()
 print(model_names.__dict__)
 print(model_names.__dir__())
@@ -32,8 +35,12 @@ else:
 tts = TTS(model_name, gpu=torch.cuda.is_available())
 tts.to(device_type)
-def predict(prompt, language, gender, audio_file_pth, mic_file_path, use_mic):
     start = time.time()
     if len(prompt) < 2:
         gr.Warning("Please give a longer prompt text")
         return (
@@ -76,7 +83,7 @@ def predict(prompt, language, gender, audio_file_pth, mic_file_path, use_mic):
                 language = "fr-fr"
         if m.find("/fr/") != -1:
             language = None
-        predict_on_gpu(prompt, speaker_wav, language, output_filename)
     except RuntimeError as e :
         if "device-assert" in str(e):
             # cannot do anything on cuda device side error, need to restart
@@ -102,7 +109,10 @@ def predict(prompt, language, gender, audio_file_pth, mic_file_path, use_mic):
     )
 @spaces.GPU(duration=60)
-def predict_on_gpu(prompt, speaker_wav, language, output_filename):
     tts.tts_to_file(
         text = prompt,
         file_path = output_filename,
@@ -174,6 +184,8 @@ Leave a star on the Github <a href="https://github.com/coqui-ai/TTS">TTS</a>, wh
                     info="Notice: Microphone input may not work properly under traffic",)
         with gr.Accordion("Advanced options", open = False):
              debug_mode = gr.Checkbox(label = "Debug mode", value = False, info = "Show intermediate results")
         submit = gr.Button("🚀 Speak", variant = "primary")
@@ -182,7 +194,14 @@ Leave a star on the Github <a href="https://github.com/coqui-ai/TTS">TTS</a>, wh
         information = gr.HTML()
     submit.click(predict, inputs = [
-        prompt, language, gender, audio_file_pth, mic_file_path, use_mic
     ], outputs = [
         waveform_visual,
         synthesised_audio,

 import time
 import math
 import torch
+import random
 import spaces
 # By using XTTS you agree to CPML license https://coqui.ai/cpml
 os.environ["COQUI_TOS_AGREED"] = "1"
 import gradio as gr
 from TTS.api import TTS
 from TTS.utils.manage import ModelManager
+max_64_bit_int = 2**63 - 1
 model_names = TTS().list_models()
 print(model_names.__dict__)
 print(model_names.__dir__())
 tts = TTS(model_name, gpu=torch.cuda.is_available())
 tts.to(device_type)
+def predict(prompt, language, gender, audio_file_pth, mic_file_path, use_mic, randomize_seed, seed):
     start = time.time()
+    if randomize_seed:
+        seed = random.randint(0, max_64_bit_int)
     if len(prompt) < 2:
         gr.Warning("Please give a longer prompt text")
         return (
                 language = "fr-fr"
         if m.find("/fr/") != -1:
             language = None
+        predict_on_gpu(prompt, speaker_wav, language, output_filename, seed)
     except RuntimeError as e :
         if "device-assert" in str(e):
             # cannot do anything on cuda device side error, need to restart
     )
 @spaces.GPU(duration=60)
+def predict_on_gpu(prompt, speaker_wav, language, output_filename, seed):
+    random.seed(seed)
+    torch.manual_seed(seed)
     tts.tts_to_file(
         text = prompt,
         file_path = output_filename,
                     info="Notice: Microphone input may not work properly under traffic",)
         with gr.Accordion("Advanced options", open = False):
              debug_mode = gr.Checkbox(label = "Debug mode", value = False, info = "Show intermediate results")
+             randomize_seed = gr.Checkbox(label = "\U0001F3B2 Randomize seed", value = True, info = "If checked, result is always different")
+             seed = gr.Slider(minimum = 0, maximum = max_64_bit_int, step = 1, randomize = True, label = "Seed")
         submit = gr.Button("🚀 Speak", variant = "primary")
         information = gr.HTML()
     submit.click(predict, inputs = [
+        prompt,
+        language,
+        gender,
+        audio_file_pth,
+        mic_file_path,
+        use_mic,
+        randomize_seed,
+        seed
     ], outputs = [
         waveform_visual,
         synthesised_audio,