Sami commited on
Commit
802577d
·
1 Parent(s): af5d148

Adjust for ZeroGPU

Browse files
.history/app_20250202161356.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+
3
+ # Run the setup.py install command
4
+ try:
5
+ subprocess.run(['python', 'setup.py', 'install', '--user'], check=True)
6
+ print("Installation successful.")
7
+ except subprocess.CalledProcessError as e:
8
+ print(f"Installation failed with error: {e}")
9
+
10
+ import gradio as gr
11
+ import torch
12
+ from TTS.api import TTS
13
+
14
+ # Get device
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
+
17
+ # Init TTS
18
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
19
+
20
+ def voice_clone(text: str, speaker_wav: str, language: str):
21
+ # Run TTS
22
+ print("Speaker wav:", speaker_wav)
23
+ tts.tts_to_file(text=text, speaker_wav=speaker_wav, language=language, file_path="output.wav")
24
+ return "output.wav"
25
+
26
+ iface = gr.Interface(fn=voice_clone, theme="Nymbo/Nymbo_Theme",
27
+ inputs=[gr.Textbox(lines=2, placeholder="Enter the text...", label="Text"),
28
+ gr.Audio(type="filepath", label="Upload audio file"),
29
+ gr.Radio(['ru', 'en', 'zh-cn', 'ja', 'de', 'fr', 'it', 'pt', 'pl', 'tr', 'ko', 'nl', 'cs', 'ar', 'es', 'hu'], label="language"),
30
+ ],
31
+ outputs=gr.Audio(type="filepath", label="Generated audio file"),
32
+ title="Voice Cloning")
33
+
34
+ iface.launch()
.history/app_20250202161532.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import spaces
3
+ import os
4
+
5
+ # Run the setup.py install command
6
+ try:
7
+ subprocess.run(['python', 'setup.py', 'install', '--user'], check=True)
8
+ print("Installation successful.")
9
+ except subprocess.CalledProcessError as e:
10
+ print(f"Installation failed with error: {e}")
11
+
12
+ import gradio as gr
13
+ import torch
14
+ from TTS.api import TTS
15
+
16
+ # Get device
17
+ device = "cuda" if torch.cuda.is_available() else "cpu"
18
+
19
+ # Initialize TTS model globally but load it inside the GPU-decorated function
20
+ tts = None
21
+
22
+ @spaces.GPU(duration=120) # Voice cloning can take longer than default 60s
23
+ def initialize_tts():
24
+ global tts
25
+ if tts is None:
26
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
27
+ return tts
28
+
29
+ @spaces.GPU(duration=120)
30
+ def voice_clone(text: str, speaker_wav: str, language: str):
31
+ global tts
32
+ # Initialize TTS if not already done
33
+ if tts is None:
34
+ tts = initialize_tts()
35
+
36
+ # Create output directory if it doesn't exist
37
+ os.makedirs("outputs", exist_ok=True)
38
+ output_path = os.path.join("outputs", "output.wav")
39
+
40
+ # Run TTS
41
+ print("Speaker wav:", speaker_wav)
42
+ tts.tts_to_file(text=text,
43
+ speaker_wav=speaker_wav,
44
+ language=language,
45
+ file_path=output_path)
46
+ return output_path
47
+
48
+ # Create Gradio interface
49
+ iface = gr.Interface(
50
+ fn=voice_clone,
51
+ theme="Nymbo/Nymbo_Theme",
52
+ inputs=[
53
+ gr.Textbox(lines=2, placeholder="Enter the text...", label="Text"),
54
+ gr.Audio(type="filepath", label="Upload audio file"),
55
+ gr.Radio(
56
+ ['ru', 'en', 'zh-cn', 'ja', 'de', 'fr', 'it', 'pt', 'pl', 'tr', 'ko', 'nl', 'cs', 'ar', 'es', 'hu'],
57
+ label="language"
58
+ ),
59
+ ],
60
+ outputs=gr.Audio(type="filepath", label="Generated audio file"),
61
+ title="Voice Cloning",
62
+ description="Upload a voice sample and enter text to clone the voice. Processing may take 1-2 minutes."
63
+ )
64
+
65
+ # Launch with queue enabled for better handling of GPU resources
66
+ iface.queue().launch()
app.py CHANGED
@@ -1,4 +1,6 @@
1
  import subprocess
 
 
2
 
3
  # Run the setup.py install command
4
  try:
@@ -14,21 +16,51 @@ from TTS.api import TTS
14
  # Get device
15
  device = "cuda" if torch.cuda.is_available() else "cpu"
16
 
17
- # Init TTS
18
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
19
 
 
 
 
 
 
 
 
 
20
  def voice_clone(text: str, speaker_wav: str, language: str):
 
 
 
 
 
 
 
 
 
21
  # Run TTS
22
  print("Speaker wav:", speaker_wav)
23
- tts.tts_to_file(text=text, speaker_wav=speaker_wav, language=language, file_path="output.wav")
24
- return "output.wav"
25
-
26
- iface = gr.Interface(fn=voice_clone, theme="Nymbo/Nymbo_Theme",
27
- inputs=[gr.Textbox(lines=2, placeholder="Enter the text...", label="Text"),
28
- gr.Audio(type="filepath", label="Upload audio file"),
29
- gr.Radio(['ru', 'en', 'zh-cn', 'ja', 'de', 'fr', 'it', 'pt', 'pl', 'tr', 'ko', 'nl', 'cs', 'ar', 'es', 'hu'], label="language"),
30
- ],
31
- outputs=gr.Audio(type="filepath", label="Generated audio file"),
32
- title="Voice Cloning")
33
-
34
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import subprocess
2
+ import spaces
3
+ import os
4
 
5
  # Run the setup.py install command
6
  try:
 
16
  # Get device
17
  device = "cuda" if torch.cuda.is_available() else "cpu"
18
 
19
+ # Initialize TTS model globally but load it inside the GPU-decorated function
20
+ tts = None
21
 
22
+ @spaces.GPU(duration=120) # Voice cloning can take longer than default 60s
23
+ def initialize_tts():
24
+ global tts
25
+ if tts is None:
26
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
27
+ return tts
28
+
29
+ @spaces.GPU(duration=120)
30
  def voice_clone(text: str, speaker_wav: str, language: str):
31
+ global tts
32
+ # Initialize TTS if not already done
33
+ if tts is None:
34
+ tts = initialize_tts()
35
+
36
+ # Create output directory if it doesn't exist
37
+ os.makedirs("outputs", exist_ok=True)
38
+ output_path = os.path.join("outputs", "output.wav")
39
+
40
  # Run TTS
41
  print("Speaker wav:", speaker_wav)
42
+ tts.tts_to_file(text=text,
43
+ speaker_wav=speaker_wav,
44
+ language=language,
45
+ file_path=output_path)
46
+ return output_path
47
+
48
+ # Create Gradio interface
49
+ iface = gr.Interface(
50
+ fn=voice_clone,
51
+ theme="Nymbo/Nymbo_Theme",
52
+ inputs=[
53
+ gr.Textbox(lines=2, placeholder="Enter the text...", label="Text"),
54
+ gr.Audio(type="filepath", label="Upload audio file"),
55
+ gr.Radio(
56
+ ['ru', 'en', 'zh-cn', 'ja', 'de', 'fr', 'it', 'pt', 'pl', 'tr', 'ko', 'nl', 'cs', 'ar', 'es', 'hu'],
57
+ label="language"
58
+ ),
59
+ ],
60
+ outputs=gr.Audio(type="filepath", label="Generated audio file"),
61
+ title="Voice Cloning",
62
+ description="Upload a voice sample and enter text to clone the voice. Processing may take 1-2 minutes."
63
+ )
64
+
65
+ # Launch with queue enabled for better handling of GPU resources
66
+ iface.queue().launch()