Spaces:

Pranjal12345
/

Text_to_Speech

Sleeping

App Files Files Community

Pranjal12345 commited on Oct 31, 2023

Commit

a638c29

1 Parent(s): ebf7396

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -73

app.py CHANGED Viewed

@@ -1,26 +1,14 @@
-import os
 import torch
 import gradio as gr
-import torchaudio
-import time
-from datetime import datetime
 from tortoise.api import TextToSpeech
 from tortoise.utils.audio import load_voice, load_voices
-VOICE_OPTIONS = [
-    "angie",
-    "deniro",
-    "freeman",
-    "emma",
-]
-def inference(
-    text,
-    voice,
-    Emotion,
-    Preset,
-):
     texts = [text]
     Angry_tone = "[I am so angry]"
@@ -37,12 +25,7 @@ def inference(
     if Emotion == "Scared":
         text = Scared_tone + text
-    voices = [voice]
-    if len(voices) == 1:
-        voice_samples, conditioning_latents = load_voice(voice)
-    else:
-        voice_samples, conditioning_latents = load_voices(voices)
     audio_frames = []
@@ -54,55 +37,26 @@ def inference(
             preset=Preset,
             k=1
         ):
-            audio_frames.append(torch.from_numpy(audio_frame.cpu().detach().numpy()))
     complete_audio = torch.cat(audio_frames, dim=0)
-    yield (24000, complete_audio.numpy())
-def main():
-    title = "Tortoise TTS "
-    text = gr.Textbox(
-        lines=4,
-        label="Text:",
-    )
-    voice = gr.Dropdown(
-        VOICE_OPTIONS, value="jane_eyre", label="Select voice:", type="value"
-    )
-    Emotion = gr.Radio(
-        ["Angry", "Sad", "Happy", "Scared"],
-        type="value",
-    )
-    Preset = gr.Radio(
-        ["ultra_fast", "fast", "standard", "high_quality"],
-        type="value",
-        value="ultra_fast",
-    )
-    output_audio = gr.Audio(label="streaming audio:", streaming=True, autoplay=True)
-    interface = gr.Interface(
-        fn=inference,
-        inputs=[
-            text,
-            voice,
-            Emotion,
-            Preset,
-        ],
-        title=title,
-        outputs=[output_audio],
-    )
-    interface.queue().launch()
-if __name__ == "__main__":
-    tts = TextToSpeech(kv_cache=True, use_deepspeed=True, half=True)
-    with open("Tortoise_TTS_Runs_Scripts.log", "a") as f:
-        f.write(
-            f"\n\n-------------------------Tortoise TTS Scripts Logs, {datetime.now()}-------------------------\n"
-        )
-    main()

 import torch
 import gradio as gr
 from tortoise.api import TextToSpeech
 from tortoise.utils.audio import load_voice, load_voices
+tts = TextToSpeech(kv_cache=True, use_deepspeed=True, half=True)
+languages = ['Male', 'Female']
+voices = {'Male': ['deniro', 'freeman'], 'Female': ['emma', 'angie']}
+def inference(text, Gender, voice, Emotion, Preset):
     texts = [text]
     Angry_tone = "[I am so angry]"
     if Emotion == "Scared":
         text = Scared_tone + text
+    voice_samples, conditioning_latents = load_voice(voice)
     audio_frames = []
             preset=Preset,
             k=1
         ):
+            audio_frames.append(torch.from_numpy(audio_frame.cpu().detach().numpy()))
     complete_audio = torch.cat(audio_frames, dim=0)
+    yield (24000, complete_audio.numpy())
+def rs_change(rs):
+    new_choices = voices[rs]
+    return gr.update(choices=new_choices, value=new_choices[0] if new_choices else None)
+title = "Tortoise TTS"
+with gr.Blocks() as app:
+    text = gr.Textbox(lines=4, label="Text:")
+    rs = gr.Dropdown(choices=languages, value='Male', label="Gender")
+    rs_hw = gr.Dropdown(choices=voices['Male'], interactive=True, label="Voice")
+    rs.change(fn=rs_change, inputs=[rs], outputs=[rs_hw])
+    Emotion = gr.Radio(["Angry", "Sad", "Happy", "Scared"], type="value", label="Emotion")
+    Preset = gr.Radio(["ultra_fast", "fast", "standard", "high_quality"], type="value", value="ultra_fast", label="Preset")
+    output_audio = gr.Audio(label="Streaming audio:", streaming=True, autoplay=True)
+    btn = gr.Button("Generate")
+    btn.click(inference, inputs=[text, rs, rs_hw, Emotion, Preset], outputs=[output_audio])
+app.launch()