Chillarmo commited on
Commit
7ce428c
·
verified ·
1 Parent(s): b1fd4dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -49
app.py CHANGED
@@ -3,6 +3,7 @@ import torch
3
  from outetts.v0_1.interface import InterfaceHF
4
  import soundfile as sf
5
  import tempfile
 
6
 
7
  def initialize_model():
8
  """Initialize the OuteTTS model"""
@@ -39,57 +40,52 @@ def process_audio_file(audio_path, reference_text, text_to_speak, temperature=0.
39
  return None, f"Error: {str(e)}"
40
 
41
  # Create Gradio interface
42
- def create_interface():
43
- with gr.Blocks(title="Voice Cloning with OuteTTS") as app:
44
- gr.Markdown("# 🎙️ Voice Cloning with OuteTTS")
45
- gr.Markdown("""
46
- This app uses OuteTTS to clone voices. Upload a reference audio file, provide the text being spoken in that audio,
47
- and enter the new text you want to be spoken in the cloned voice.
48
-
49
- Note: For best results, use clear audio with minimal background noise.
50
- """)
51
-
52
- with gr.Row():
53
- with gr.Column():
54
- # Input components
55
- audio_input = gr.Audio(label="Upload Reference Audio", type="filepath")
56
- reference_text = gr.Textbox(label="Reference Text (what is being said in the audio)")
57
- text_to_speak = gr.Textbox(label="Text to Speak (what you want the cloned voice to say)")
58
-
59
- with gr.Row():
60
- temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.1, step=0.1,
61
- label="Temperature (higher = more variation)")
62
- repetition_penalty = gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.1,
63
- label="Repetition Penalty")
64
-
65
- # Submit button
66
- submit_btn = gr.Button("Generate Voice", variant="primary")
67
 
68
- with gr.Column():
69
- # Output components
70
- output_audio = gr.Audio(label="Generated Speech")
71
- output_message = gr.Textbox(label="Status")
72
-
73
- # Handle submission
74
- submit_btn.click(
75
- fn=process_audio_file,
76
- inputs=[audio_input, reference_text, text_to_speak, temperature, repetition_penalty],
77
- outputs=[output_audio, output_message]
78
- )
79
 
80
- gr.Markdown("""
81
- ### Tips for best results:
82
- 1. Use high-quality reference audio (clear speech, minimal background noise)
83
- 2. Ensure reference text matches the audio exactly
84
- 3. Keep generated text relatively short for better quality
85
- 4. Adjust temperature and repetition penalty if needed:
86
- - Lower temperature (0.1-0.3) for more consistent output
87
- - Higher repetition penalty (1.1-1.3) to avoid repetition
88
- """)
 
 
89
 
90
- return app
 
 
 
 
 
 
 
 
91
 
92
- # Launch the interface
93
  if __name__ == "__main__":
94
- app = create_interface()
95
- app.launch(share=True)
 
3
  from outetts.v0_1.interface import InterfaceHF
4
  import soundfile as sf
5
  import tempfile
6
+ import os
7
 
8
  def initialize_model():
9
  """Initialize the OuteTTS model"""
 
40
  return None, f"Error: {str(e)}"
41
 
42
  # Create Gradio interface
43
+ with gr.Blocks(title="Voice Cloning with OuteTTS") as demo:
44
+ gr.Markdown("# 🎙️ Voice Cloning with OuteTTS")
45
+ gr.Markdown("""
46
+ This app uses OuteTTS to clone voices. Upload a reference audio file, provide the text being spoken in that audio,
47
+ and enter the new text you want to be spoken in the cloned voice.
48
+
49
+ Note: For best results, use clear audio with minimal background noise.
50
+ """)
51
+
52
+ with gr.Row():
53
+ with gr.Column():
54
+ # Input components
55
+ audio_input = gr.Audio(label="Upload Reference Audio", type="filepath")
56
+ reference_text = gr.Textbox(label="Reference Text (what is being said in the audio)")
57
+ text_to_speak = gr.Textbox(label="Text to Speak (what you want the cloned voice to say)")
 
 
 
 
 
 
 
 
 
 
58
 
59
+ with gr.Row():
60
+ temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.1, step=0.1,
61
+ label="Temperature (higher = more variation)")
62
+ repetition_penalty = gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.1,
63
+ label="Repetition Penalty")
64
+
65
+ # Submit button
66
+ submit_btn = gr.Button("Generate Voice", variant="primary")
 
 
 
67
 
68
+ with gr.Column():
69
+ # Output components
70
+ output_audio = gr.Audio(label="Generated Speech")
71
+ output_message = gr.Textbox(label="Status")
72
+
73
+ # Handle submission
74
+ submit_btn.click(
75
+ fn=process_audio_file,
76
+ inputs=[audio_input, reference_text, text_to_speak, temperature, repetition_penalty],
77
+ outputs=[output_audio, output_message]
78
+ )
79
 
80
+ gr.Markdown("""
81
+ ### Tips for best results:
82
+ 1. Use high-quality reference audio (clear speech, minimal background noise)
83
+ 2. Ensure reference text matches the audio exactly
84
+ 3. Keep generated text relatively short for better quality
85
+ 4. Adjust temperature and repetition penalty if needed:
86
+ - Lower temperature (0.1-0.3) for more consistent output
87
+ - Higher repetition penalty (1.1-1.3) to avoid repetition
88
+ """)
89
 
 
90
  if __name__ == "__main__":
91
+ demo.launch()