Chillarmo commited on
Commit
5a39a85
1 Parent(s): d1e13a5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from outetts.v0_1.interface import InterfaceHF
4
+ import soundfile as sf
5
+ import tempfile
6
+
7
+ def initialize_model():
8
+ """Initialize the OuteTTS model"""
9
+ interface = InterfaceHF("OuteAI/OuteTTS-0.1-350M")
10
+ return interface
11
+
12
+ def process_audio_file(audio_path, reference_text, text_to_speak, temperature=0.1, repetition_penalty=1.1):
13
+ """Process the audio file and generate speech with the cloned voice"""
14
+ try:
15
+ # Initialize model
16
+ interface = initialize_model()
17
+
18
+ # Create speaker from reference audio
19
+ speaker = interface.create_speaker(
20
+ audio_path,
21
+ reference_text
22
+ )
23
+
24
+ # Generate speech with cloned voice
25
+ output = interface.generate(
26
+ text=text_to_speak,
27
+ speaker=speaker,
28
+ temperature=temperature,
29
+ repetition_penalty=repetition_penalty,
30
+ max_lenght=4096
31
+ )
32
+
33
+ # Save to temporary file and return path
34
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
35
+ output.save(temp_file.name)
36
+ return temp_file.name, "Voice cloning successful!"
37
+
38
+ except Exception as e:
39
+ return None, f"Error: {str(e)}"
40
+
41
+ # Create Gradio interface
42
+ def create_interface():
43
+ with gr.Blocks(title="Voice Cloning with OuteTTS") as app:
44
+ gr.Markdown("# 🎙️ Voice Cloning with OuteTTS")
45
+ gr.Markdown("""
46
+ This app uses OuteTTS to clone voices. Upload a reference audio file, provide the text being spoken in that audio,
47
+ and enter the new text you want to be spoken in the cloned voice.
48
+
49
+ Note: For best results, use clear audio with minimal background noise.
50
+ """)
51
+
52
+ with gr.Row():
53
+ with gr.Column():
54
+ # Input components
55
+ audio_input = gr.Audio(label="Upload Reference Audio", type="filepath")
56
+ reference_text = gr.Textbox(label="Reference Text (what is being said in the audio)")
57
+ text_to_speak = gr.Textbox(label="Text to Speak (what you want the cloned voice to say)")
58
+
59
+ with gr.Row():
60
+ temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.1, step=0.1,
61
+ label="Temperature (higher = more variation)")
62
+ repetition_penalty = gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.1,
63
+ label="Repetition Penalty")
64
+
65
+ # Submit button
66
+ submit_btn = gr.Button("Generate Voice", variant="primary")
67
+
68
+ with gr.Column():
69
+ # Output components
70
+ output_audio = gr.Audio(label="Generated Speech")
71
+ output_message = gr.Textbox(label="Status")
72
+
73
+ # Handle submission
74
+ submit_btn.click(
75
+ fn=process_audio_file,
76
+ inputs=[audio_input, reference_text, text_to_speak, temperature, repetition_penalty],
77
+ outputs=[output_audio, output_message]
78
+ )
79
+
80
+ gr.Markdown("""
81
+ ### Tips for best results:
82
+ 1. Use high-quality reference audio (clear speech, minimal background noise)
83
+ 2. Ensure reference text matches the audio exactly
84
+ 3. Keep generated text relatively short for better quality
85
+ 4. Adjust temperature and repetition penalty if needed:
86
+ - Lower temperature (0.1-0.3) for more consistent output
87
+ - Higher repetition penalty (1.1-1.3) to avoid repetition
88
+ """)
89
+
90
+ return app
91
+
92
+ # Launch the interface
93
+ if __name__ == "__main__":
94
+ app = create_interface()
95
+ app.launch(share=True)