Voice-Conversion

Running

App Files Files Community

drewThomasson commited on Sep 26, 2024

Commit

f274110

verified ·

1 Parent(s): cd428c2

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -14

app.py CHANGED Viewed

@@ -3,9 +3,9 @@ import torch
 from TTS.api import TTS
 import os
 import librosa
 from datetime import datetime
 # Get device
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -19,36 +19,61 @@ def convert_audio_to_wav(file_path):
     librosa.output.write_wav(output_path, audio, sr)  # Convert to wav
     return output_path
-def voice_conversion(input_audio, target_voice, uploaded_target_voice):
     print(datetime.now())
     output_path = "output.wav"
     # Check if the user uploaded a target voice, otherwise use selected from examples
     if uploaded_target_voice is not None:
         target_voice_path = uploaded_target_voice
-        # Convert uploaded target to wav if necessary
         if not uploaded_target_voice.endswith(".wav"):
             target_voice_path = convert_audio_to_wav(uploaded_target_voice)
     else:
         target_voice_path = os.path.join("Examples", target_voice)
         if not os.path.exists(target_voice_path):
             return "Error: Target voice file not found."
     # Convert input audio to wav if necessary
     if not input_audio.endswith(".wav"):
         input_audio = convert_audio_to_wav(input_audio)
     # Perform voice conversion
     tts.voice_conversion_to_file(source_wav=input_audio, target_wav=target_voice_path, file_path=output_path)
     return output_path
 # Get examples from Examples folder
 examples_folder = "Examples/"
 example_files = [f for f in os.listdir(examples_folder) if f.endswith(".wav")]
-# Define Gradio Interface
 with gr.Blocks() as demo:
     gr.Markdown("## Voice Conversion using Coqui TTS")
     with gr.Row():
         input_audio = gr.Audio(label="Record or Upload Your Voice", type="filepath")
@@ -67,22 +92,21 @@ with gr.Blocks() as demo:
         play_button = gr.Button("Preview Selected Target Voice")
         preview_audio = gr.Audio(label="Preview Target Voice", type="filepath")
-    # Add convert button and output audio
     convert_button = gr.Button("Convert Voice")
     output_audio = gr.Audio(label="Converted Voice", type="filepath")
     # Preview button for listening to the selected target voice from examples
     def preview_target_voice(selected_target_voice):
         return os.path.join(examples_folder, selected_target_voice)
     play_button.click(preview_target_voice, inputs=[target_voice], outputs=preview_audio)
-    # Conversion process
     convert_button.click(
         voice_conversion,
-        inputs=[input_audio, target_voice, uploaded_target_voice],
         outputs=output_audio
     )
 # Launch with public=True for public URL access and share link
-demo.launch(share=True)

 from TTS.api import TTS
 import os
 import librosa
+import requests
 from datetime import datetime
 # Get device
 device = "cuda" if torch.cuda.is_available() else "cpu"
     librosa.output.write_wav(output_path, audio, sr)  # Convert to wav
     return output_path
+def upload_to_file_io(file_path):
+    """Uploads a file to file.io and returns the temporary link"""
+    url = "https://file.io"
+    with open(file_path, 'rb') as f:
+        response = requests.post(url, files={"file": f})
+    if response.status_code == 200:
+        temp_link = response.json().get('link')
+        return temp_link
+    return None
+def voice_conversion(input_audio, target_voice, uploaded_target_voice, restrict_duration):
     print(datetime.now())
     output_path = "output.wav"
+    # Check audio duration if the flag is True
+    if restrict_duration:
+        duration = librosa.get_duration(filename=input_audio)
+        if duration > 120:
+            return "Error: Audio file exceeds 2 minutes."
     # Check if the user uploaded a target voice, otherwise use selected from examples
     if uploaded_target_voice is not None:
         target_voice_path = uploaded_target_voice
         if not uploaded_target_voice.endswith(".wav"):
             target_voice_path = convert_audio_to_wav(uploaded_target_voice)
     else:
         target_voice_path = os.path.join("Examples", target_voice)
         if not os.path.exists(target_voice_path):
             return "Error: Target voice file not found."
     # Convert input audio to wav if necessary
     if not input_audio.endswith(".wav"):
         input_audio = convert_audio_to_wav(input_audio)
     # Perform voice conversion
     tts.voice_conversion_to_file(source_wav=input_audio, target_wav=target_voice_path, file_path=output_path)
+    # Upload converted file to file.io
+    temp_link = upload_to_file_io(output_path)
+    if temp_link:
+        print(f"File uploaded to: {temp_link}")  # Log the file link to the terminal
+    else:
+        print("Error uploading the file to file.io")
     return output_path
 # Get examples from Examples folder
 examples_folder = "Examples/"
 example_files = [f for f in os.listdir(examples_folder) if f.endswith(".wav")]
+# Define Gradio Interface with Boolean to activate restriction
 with gr.Blocks() as demo:
     gr.Markdown("## Voice Conversion using Coqui TTS")
+    restrict_duration = gr.Checkbox(label="Restrict audio to 2 minutes or less?", value=True)
     with gr.Row():
         input_audio = gr.Audio(label="Record or Upload Your Voice", type="filepath")
         play_button = gr.Button("Preview Selected Target Voice")
         preview_audio = gr.Audio(label="Preview Target Voice", type="filepath")
     convert_button = gr.Button("Convert Voice")
     output_audio = gr.Audio(label="Converted Voice", type="filepath")
     # Preview button for listening to the selected target voice from examples
     def preview_target_voice(selected_target_voice):
         return os.path.join(examples_folder, selected_target_voice)
     play_button.click(preview_target_voice, inputs=[target_voice], outputs=preview_audio)
+    # Conversion process with duration restriction and file.io upload
     convert_button.click(
         voice_conversion,
+        inputs=[input_audio, target_voice, uploaded_target_voice, restrict_duration],
         outputs=output_audio
     )
 # Launch with public=True for public URL access and share link
+demo.launch(share=True)