edwko commited on
Commit
0b3a1f3
·
verified ·
1 Parent(s): d6571a5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from outetts.v0_1.interface import InterfaceHF
3
+
4
+ interface = InterfaceHF("OuteAI/OuteTTS-0.1-350M")
5
+
6
+ def generate_tts(text, temperature, repetition_penalty, reference_audio, reference_text):
7
+
8
+ if reference_audio and reference_text:
9
+ speaker = interface.create_speaker(reference_audio, reference_text)
10
+ else:
11
+ speaker = None
12
+
13
+ output = interface.generate(
14
+ text=text,
15
+ speaker=speaker,
16
+ temperature=temperature,
17
+ repetition_penalty=repetition_penalty
18
+ )
19
+ output.save("output.wav")
20
+ return "output.wav"
21
+
22
+ with gr.Blocks() as demo:
23
+ gr.Markdown("# OuteTTS-0.1-350M Text-to-Speech Demo")
24
+
25
+ with gr.Row():
26
+ with gr.Column():
27
+ text_input = gr.Textbox(label="Text to Synthesize", placeholder="Enter text here...")
28
+ temperature = gr.Slider(0.1, 1.0, value=0.1, label="Temperature")
29
+ repetition_penalty = gr.Slider(0.5, 2.0, value=1.1, label="Repetition Penalty")
30
+
31
+ gr.Markdown("""
32
+ **Note**: For voice cloning, both a reference audio file and its corresponding transcription must be provided.
33
+ If either the audio file or transcription is missing, the model will generate audio with random characteristics.""")
34
+ reference_audio = gr.Audio(label="Reference Audio (for voice cloning)", type="filepath")
35
+ reference_text = gr.Textbox(label="Reference Transcription Text (matching the audio)", placeholder="Enter reference text here if using voice cloning")
36
+ submit_button = gr.Button("Generate Speech")
37
+ with gr.Column():
38
+ audio_output = gr.Audio(label="Generated Audio", type="filepath")
39
+
40
+ submit_button.click(
41
+ fn=generate_tts,
42
+ inputs=[text_input, temperature, repetition_penalty, reference_audio, reference_text],
43
+ outputs=audio_output
44
+ )
45
+
46
+ demo.launch()