Spaces:

Chillarmo
/

Voice_Cloning_with_OuteTTS

Running

App Files Files Community

Chillarmo commited on Nov 5

Commit

5a39a85

•

1 Parent(s): d1e13a5

Create app.py

Browse files

Files changed (1) hide show

app.py +95 -0

app.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import gradio as gr
+import torch
+from outetts.v0_1.interface import InterfaceHF
+import soundfile as sf
+import tempfile
+def initialize_model():
+    """Initialize the OuteTTS model"""
+    interface = InterfaceHF("OuteAI/OuteTTS-0.1-350M")
+    return interface
+def process_audio_file(audio_path, reference_text, text_to_speak, temperature=0.1, repetition_penalty=1.1):
+    """Process the audio file and generate speech with the cloned voice"""
+    try:
+        # Initialize model
+        interface = initialize_model()
+        # Create speaker from reference audio
+        speaker = interface.create_speaker(
+            audio_path,
+            reference_text
+        )
+        # Generate speech with cloned voice
+        output = interface.generate(
+            text=text_to_speak,
+            speaker=speaker,
+            temperature=temperature,
+            repetition_penalty=repetition_penalty,
+            max_lenght=4096
+        )
+        # Save to temporary file and return path
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+        output.save(temp_file.name)
+        return temp_file.name, "Voice cloning successful!"
+    except Exception as e:
+        return None, f"Error: {str(e)}"
+# Create Gradio interface
+def create_interface():
+    with gr.Blocks(title="Voice Cloning with OuteTTS") as app:
+        gr.Markdown("# 🎙️ Voice Cloning with OuteTTS")
+        gr.Markdown("""
+        This app uses OuteTTS to clone voices. Upload a reference audio file, provide the text being spoken in that audio,
+        and enter the new text you want to be spoken in the cloned voice.
+        Note: For best results, use clear audio with minimal background noise.
+        """)
+        with gr.Row():
+            with gr.Column():
+                # Input components
+                audio_input = gr.Audio(label="Upload Reference Audio", type="filepath")
+                reference_text = gr.Textbox(label="Reference Text (what is being said in the audio)")
+                text_to_speak = gr.Textbox(label="Text to Speak (what you want the cloned voice to say)")
+                with gr.Row():
+                    temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.1, step=0.1,
+                                         label="Temperature (higher = more variation)")
+                    repetition_penalty = gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.1,
+                                                 label="Repetition Penalty")
+                # Submit button
+                submit_btn = gr.Button("Generate Voice", variant="primary")
+            with gr.Column():
+                # Output components
+                output_audio = gr.Audio(label="Generated Speech")
+                output_message = gr.Textbox(label="Status")
+        # Handle submission
+        submit_btn.click(
+            fn=process_audio_file,
+            inputs=[audio_input, reference_text, text_to_speak, temperature, repetition_penalty],
+            outputs=[output_audio, output_message]
+        )
+        gr.Markdown("""
+        ### Tips for best results:
+        1. Use high-quality reference audio (clear speech, minimal background noise)
+        2. Ensure reference text matches the audio exactly
+        3. Keep generated text relatively short for better quality
+        4. Adjust temperature and repetition penalty if needed:
+           - Lower temperature (0.1-0.3) for more consistent output
+           - Higher repetition penalty (1.1-1.3) to avoid repetition
+        """)
+    return app
+# Launch the interface
+if __name__ == "__main__":
+    app = create_interface()
+    app.launch(share=True)