poemsforaphrodite commited on
Commit
88f7073
·
verified ·
1 Parent(s): 390fe1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -26
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import streamlit as st
2
  import torch
3
  from TTS.api import TTS
4
  import os
@@ -9,7 +9,6 @@ os.environ["COQUI_TOS_AGREED"] = "1"
9
  device = "cuda" if torch.cuda.is_available() else "cpu"
10
 
11
  # Initialize TTS model
12
- @st.cache_resource
13
  def load_tts_model():
14
  return TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
15
 
@@ -32,30 +31,30 @@ def clone(text, audio_file, language, speaking_rate, pitch, volume,
32
 
33
  return temp_audio_path
34
 
35
- st.title('Advanced Voice Clone')
36
- st.write('Customize your voice cloning experience with various parameters.')
37
-
38
- text = st.text_area('Text')
39
- audio_file = st.file_uploader('Voice reference audio file', type=['wav', 'mp3'])
40
- language = st.selectbox('Language', ["en", "es", "fr", "de", "it"], index=0)
41
- speaking_rate = st.slider('Speaking Rate', 0.5, 2.0, 1.0)
42
- pitch = st.slider('Pitch Adjustment', -10, 10, 0)
43
- volume = st.slider('Volume', 0.1, 2.0, 1.0)
44
- emotion = st.selectbox('Emotion', ["neutral", "happy", "sad", "angry"], index=0)
45
- sample_rate = st.selectbox('Sample Rate', [22050, 24000, 44100, 48000], index=1)
46
- temperature = st.slider('Temperature', 0.1, 1.0, 0.8)
47
- seed = st.number_input('Seed (optional)', value=None)
48
-
49
- if st.button('Generate'):
50
- if text and audio_file:
51
- with st.spinner('Generating audio...'):
52
- output_path = clone(text, audio_file, language, speaking_rate, pitch, volume,
53
- emotion, sample_rate, temperature, seed)
54
- st.audio(output_path)
55
- else:
56
- st.warning('Please provide both text and a voice reference audio file.')
57
-
58
- # Clean up temporary files
59
  for file in os.listdir():
60
  if file.endswith('.wav') and file.startswith('tmp'):
61
  os.remove(file)
 
1
+ import gradio as gr
2
  import torch
3
  from TTS.api import TTS
4
  import os
 
9
  device = "cuda" if torch.cuda.is_available() else "cpu"
10
 
11
  # Initialize TTS model
 
12
  def load_tts_model():
13
  return TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
14
 
 
31
 
32
  return temp_audio_path
33
 
34
+ # Define Gradio interface
35
+ iface = gr.Interface(
36
+ fn=clone,
37
+ inputs=[
38
+ gr.Textbox(label="Text"),
39
+ gr.Audio(label="Voice reference audio file", type="filepath"),
40
+ gr.Dropdown(["en", "es", "fr", "de", "it"], label="Language", value="en"),
41
+ gr.Slider(0.5, 2.0, value=1.0, label="Speaking Rate"),
42
+ gr.Slider(-10, 10, value=0, label="Pitch Adjustment"),
43
+ gr.Slider(0.1, 2.0, value=1.0, label="Volume"),
44
+ gr.Dropdown(["neutral", "happy", "sad", "angry"], label="Emotion", value="neutral"),
45
+ gr.Dropdown([22050, 24000, 44100, 48000], label="Sample Rate", value=24000),
46
+ gr.Slider(0.1, 1.0, value=0.8, label="Temperature"),
47
+ gr.Number(label="Seed (optional)")
48
+ ],
49
+ outputs=gr.Audio(label="Generated Audio"),
50
+ title="Advanced Voice Clone",
51
+ description="Customize your voice cloning experience with various parameters."
52
+ )
53
+
54
+ # Launch the interface
55
+ iface.launch()
56
+
57
+ # Clean up temporary files (this will run after the Gradio server is closed)
58
  for file in os.listdir():
59
  if file.endswith('.wav') and file.startswith('tmp'):
60
  os.remove(file)