Spaces:

AkhilTolani
/

vocals

Runtime error

App Files Files Community

AkhilTolani commited on Jul 25

Commit

2029e38

•

1 Parent(s): be0bc58

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -17

app.py CHANGED Viewed

@@ -1,24 +1,12 @@
 import gradio as gr
 from parler_tts import ParlerTTSForConditionalGeneration
-from transformers import AutoTokenizer
 from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
 import soundfile as sf
 import torch
 import os
 os.system("bash install.sh")
-# Set the seed for reproducibility
-seed = 456
-torch.manual_seed(seed)
-if torch.cuda.is_available():
-    torch.cuda.manual_seed_all(seed)
-if torch.backends.mps.is_available():
-    torch.backends.mps.manual_seed(seed)
-if torch.xpu.is_available():
-    torch.xpu.manual_seed(seed)
 device = "cpu"
 if torch.cuda.is_available():
     device = "cuda:0"
@@ -31,7 +19,17 @@ torch_dtype = torch.float16 if device != "cpu" else torch.float32
 model = ParlerTTSForConditionalGeneration.from_pretrained("AkhilTolani/parler-tts-music-200000").to(device, dtype=torch_dtype)
 tokenizer = AutoTokenizer.from_pretrained("AkhilTolani/parler-tts-music-200000")
-def generate_audio(prompt, description):
     input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
     prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
@@ -40,7 +38,7 @@ def generate_audio(prompt, description):
     # Set up generation arguments
     gen_kwargs = {
-        "do_sample": False,
         "temperature": 1.0,
         "max_length": 2580,
         "min_new_tokens": num_codebooks + 1,
@@ -60,13 +58,21 @@ def generate_audio(prompt, description):
     return "parler_tts_out.wav"
 interface = gr.Interface(
     fn=generate_audio,
-    inputs=[gr.Textbox(label="Prompt"), gr.Textbox(label="Description")],
     outputs=gr.Audio(label="Generated Audio"),
     title="Parler TTS Audio Generation",
-    description="Generate audio using the Parler TTS model. Provide a prompt and description to generate the corresponding audio."
 )
 if __name__ == "__main__":
-    interface.launch()

 import gradio as gr
 from parler_tts import ParlerTTSForConditionalGeneration
 from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
 import soundfile as sf
 import torch
 import os
 os.system("bash install.sh")
 device = "cpu"
 if torch.cuda.is_available():
     device = "cuda:0"
 model = ParlerTTSForConditionalGeneration.from_pretrained("AkhilTolani/parler-tts-music-200000").to(device, dtype=torch_dtype)
 tokenizer = AutoTokenizer.from_pretrained("AkhilTolani/parler-tts-music-200000")
+def generate_audio(prompt, description, seed):
+    # Set the seed for reproducibility
+    seed = int(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+    if torch.backends.mps.is_available():
+        torch.backends.mps.manual_seed(seed)
+    if torch.xpu.is_available():
+        torch.xpu.manual_seed(seed)
     input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
     prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
     # Set up generation arguments
     gen_kwargs = {
+        "do_sample": True,
         "temperature": 1.0,
         "max_length": 2580,
         "min_new_tokens": num_codebooks + 1,
     return "parler_tts_out.wav"
+default_prompt = "free monster when you fucking hoes walk in the room chilettoes sitting up straight my fucking pasha been exposed somoomits start to hurt ive been shitting real gold doing dumb shit you never know or never seen they call me leopard i stay with the green"
+default_description = "Experience the vibrant energy of a hip hop track featuring a male rapper delivering smooth verses over a catchy synth lead melody, supported by punchy kicks, deep 808 bass, claps, and shimmering hi-hats. The song transitions seamlessly to a female vocalist singing melodically alongside a raw synth bass line. Perfect for setting the mood in a solo dance session at home or keeping the party going in a lively club environment."
+default_seed = "456"
 interface = gr.Interface(
     fn=generate_audio,
+    inputs=[
+        gr.Textbox(label="Prompt", default=default_prompt),
+        gr.Textbox(label="Description", default=default_description),
+        gr.Textbox(label="Seed", default=default_seed)
+    ],
     outputs=gr.Audio(label="Generated Audio"),
     title="Parler TTS Audio Generation",
+    description="Generate audio using the Parler TTS model. Provide a prompt, description, and seed to generate the corresponding audio."
 )
 if __name__ == "__main__":
+    interface.launch()