Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
from outetts.v0_1.interface import InterfaceHF
|
4 |
+
import soundfile as sf
|
5 |
+
import tempfile
|
6 |
+
|
7 |
+
def initialize_model():
|
8 |
+
"""Initialize the OuteTTS model"""
|
9 |
+
interface = InterfaceHF("OuteAI/OuteTTS-0.1-350M")
|
10 |
+
return interface
|
11 |
+
|
12 |
+
def process_audio_file(audio_path, reference_text, text_to_speak, temperature=0.1, repetition_penalty=1.1):
|
13 |
+
"""Process the audio file and generate speech with the cloned voice"""
|
14 |
+
try:
|
15 |
+
# Initialize model
|
16 |
+
interface = initialize_model()
|
17 |
+
|
18 |
+
# Create speaker from reference audio
|
19 |
+
speaker = interface.create_speaker(
|
20 |
+
audio_path,
|
21 |
+
reference_text
|
22 |
+
)
|
23 |
+
|
24 |
+
# Generate speech with cloned voice
|
25 |
+
output = interface.generate(
|
26 |
+
text=text_to_speak,
|
27 |
+
speaker=speaker,
|
28 |
+
temperature=temperature,
|
29 |
+
repetition_penalty=repetition_penalty,
|
30 |
+
max_lenght=4096
|
31 |
+
)
|
32 |
+
|
33 |
+
# Save to temporary file and return path
|
34 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
35 |
+
output.save(temp_file.name)
|
36 |
+
return temp_file.name, "Voice cloning successful!"
|
37 |
+
|
38 |
+
except Exception as e:
|
39 |
+
return None, f"Error: {str(e)}"
|
40 |
+
|
41 |
+
# Create Gradio interface
|
42 |
+
def create_interface():
|
43 |
+
with gr.Blocks(title="Voice Cloning with OuteTTS") as app:
|
44 |
+
gr.Markdown("# 🎙️ Voice Cloning with OuteTTS")
|
45 |
+
gr.Markdown("""
|
46 |
+
This app uses OuteTTS to clone voices. Upload a reference audio file, provide the text being spoken in that audio,
|
47 |
+
and enter the new text you want to be spoken in the cloned voice.
|
48 |
+
|
49 |
+
Note: For best results, use clear audio with minimal background noise.
|
50 |
+
""")
|
51 |
+
|
52 |
+
with gr.Row():
|
53 |
+
with gr.Column():
|
54 |
+
# Input components
|
55 |
+
audio_input = gr.Audio(label="Upload Reference Audio", type="filepath")
|
56 |
+
reference_text = gr.Textbox(label="Reference Text (what is being said in the audio)")
|
57 |
+
text_to_speak = gr.Textbox(label="Text to Speak (what you want the cloned voice to say)")
|
58 |
+
|
59 |
+
with gr.Row():
|
60 |
+
temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.1, step=0.1,
|
61 |
+
label="Temperature (higher = more variation)")
|
62 |
+
repetition_penalty = gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.1,
|
63 |
+
label="Repetition Penalty")
|
64 |
+
|
65 |
+
# Submit button
|
66 |
+
submit_btn = gr.Button("Generate Voice", variant="primary")
|
67 |
+
|
68 |
+
with gr.Column():
|
69 |
+
# Output components
|
70 |
+
output_audio = gr.Audio(label="Generated Speech")
|
71 |
+
output_message = gr.Textbox(label="Status")
|
72 |
+
|
73 |
+
# Handle submission
|
74 |
+
submit_btn.click(
|
75 |
+
fn=process_audio_file,
|
76 |
+
inputs=[audio_input, reference_text, text_to_speak, temperature, repetition_penalty],
|
77 |
+
outputs=[output_audio, output_message]
|
78 |
+
)
|
79 |
+
|
80 |
+
gr.Markdown("""
|
81 |
+
### Tips for best results:
|
82 |
+
1. Use high-quality reference audio (clear speech, minimal background noise)
|
83 |
+
2. Ensure reference text matches the audio exactly
|
84 |
+
3. Keep generated text relatively short for better quality
|
85 |
+
4. Adjust temperature and repetition penalty if needed:
|
86 |
+
- Lower temperature (0.1-0.3) for more consistent output
|
87 |
+
- Higher repetition penalty (1.1-1.3) to avoid repetition
|
88 |
+
""")
|
89 |
+
|
90 |
+
return app
|
91 |
+
|
92 |
+
# Launch the interface
|
93 |
+
if __name__ == "__main__":
|
94 |
+
app = create_interface()
|
95 |
+
app.launch(share=True)
|