Spaces:

Chillarmo
/

Voice_Cloning_with_OuteTTS

Running

App Files Files Community

Chillarmo commited on Nov 5, 2024

Commit

7ce428c

verified ·

1 Parent(s): b1fd4dd

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -49

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import torch
 from outetts.v0_1.interface import InterfaceHF
 import soundfile as sf
 import tempfile
 def initialize_model():
     """Initialize the OuteTTS model"""
@@ -39,57 +40,52 @@ def process_audio_file(audio_path, reference_text, text_to_speak, temperature=0.
         return None, f"Error: {str(e)}"
 # Create Gradio interface
-def create_interface():
-    with gr.Blocks(title="Voice Cloning with OuteTTS") as app:
-        gr.Markdown("# 🎙️ Voice Cloning with OuteTTS")
-        gr.Markdown("""
-        This app uses OuteTTS to clone voices. Upload a reference audio file, provide the text being spoken in that audio,
-        and enter the new text you want to be spoken in the cloned voice.
-        Note: For best results, use clear audio with minimal background noise.
-        """)
-        with gr.Row():
-            with gr.Column():
-                # Input components
-                audio_input = gr.Audio(label="Upload Reference Audio", type="filepath")
-                reference_text = gr.Textbox(label="Reference Text (what is being said in the audio)")
-                text_to_speak = gr.Textbox(label="Text to Speak (what you want the cloned voice to say)")
-                with gr.Row():
-                    temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.1, step=0.1,
-                                         label="Temperature (higher = more variation)")
-                    repetition_penalty = gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.1,
-                                                 label="Repetition Penalty")
-                # Submit button
-                submit_btn = gr.Button("Generate Voice", variant="primary")
-            with gr.Column():
-                # Output components
-                output_audio = gr.Audio(label="Generated Speech")
-                output_message = gr.Textbox(label="Status")
-        # Handle submission
-        submit_btn.click(
-            fn=process_audio_file,
-            inputs=[audio_input, reference_text, text_to_speak, temperature, repetition_penalty],
-            outputs=[output_audio, output_message]
-        )
-        gr.Markdown("""
-        ### Tips for best results:
-        1. Use high-quality reference audio (clear speech, minimal background noise)
-        2. Ensure reference text matches the audio exactly
-        3. Keep generated text relatively short for better quality
-        4. Adjust temperature and repetition penalty if needed:
-           - Lower temperature (0.1-0.3) for more consistent output
-           - Higher repetition penalty (1.1-1.3) to avoid repetition
-        """)
-    return app
-# Launch the interface
 if __name__ == "__main__":
-    app = create_interface()
-    app.launch(share=True)

 from outetts.v0_1.interface import InterfaceHF
 import soundfile as sf
 import tempfile
+import os
 def initialize_model():
     """Initialize the OuteTTS model"""
         return None, f"Error: {str(e)}"
 # Create Gradio interface
+with gr.Blocks(title="Voice Cloning with OuteTTS") as demo:
+    gr.Markdown("# 🎙️ Voice Cloning with OuteTTS")
+    gr.Markdown("""
+    This app uses OuteTTS to clone voices. Upload a reference audio file, provide the text being spoken in that audio,
+    and enter the new text you want to be spoken in the cloned voice.
+    Note: For best results, use clear audio with minimal background noise.
+    """)
+    with gr.Row():
+        with gr.Column():
+            # Input components
+            audio_input = gr.Audio(label="Upload Reference Audio", type="filepath")
+            reference_text = gr.Textbox(label="Reference Text (what is being said in the audio)")
+            text_to_speak = gr.Textbox(label="Text to Speak (what you want the cloned voice to say)")
+            with gr.Row():
+                temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.1, step=0.1,
+                                     label="Temperature (higher = more variation)")
+                repetition_penalty = gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.1,
+                                             label="Repetition Penalty")
+            # Submit button
+            submit_btn = gr.Button("Generate Voice", variant="primary")
+        with gr.Column():
+            # Output components
+            output_audio = gr.Audio(label="Generated Speech")
+            output_message = gr.Textbox(label="Status")
+    # Handle submission
+    submit_btn.click(
+        fn=process_audio_file,
+        inputs=[audio_input, reference_text, text_to_speak, temperature, repetition_penalty],
+        outputs=[output_audio, output_message]
+    )
+    gr.Markdown("""
+    ### Tips for best results:
+    1. Use high-quality reference audio (clear speech, minimal background noise)
+    2. Ensure reference text matches the audio exactly
+    3. Keep generated text relatively short for better quality
+    4. Adjust temperature and repetition penalty if needed:
+       - Lower temperature (0.1-0.3) for more consistent output
+       - Higher repetition penalty (1.1-1.3) to avoid repetition
+    """)
 if __name__ == "__main__":
+    demo.launch()