Spaces:
Runtime error
Runtime error
AkhilTolani
commited on
Commit
•
2029e38
1
Parent(s):
be0bc58
Update app.py
Browse files
app.py
CHANGED
@@ -1,24 +1,12 @@
|
|
1 |
import gradio as gr
|
2 |
from parler_tts import ParlerTTSForConditionalGeneration
|
3 |
-
from transformers import AutoTokenizer
|
4 |
from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
|
5 |
-
|
6 |
import soundfile as sf
|
7 |
import torch
|
8 |
import os
|
9 |
|
10 |
os.system("bash install.sh")
|
11 |
|
12 |
-
# Set the seed for reproducibility
|
13 |
-
seed = 456
|
14 |
-
torch.manual_seed(seed)
|
15 |
-
if torch.cuda.is_available():
|
16 |
-
torch.cuda.manual_seed_all(seed)
|
17 |
-
if torch.backends.mps.is_available():
|
18 |
-
torch.backends.mps.manual_seed(seed)
|
19 |
-
if torch.xpu.is_available():
|
20 |
-
torch.xpu.manual_seed(seed)
|
21 |
-
|
22 |
device = "cpu"
|
23 |
if torch.cuda.is_available():
|
24 |
device = "cuda:0"
|
@@ -31,7 +19,17 @@ torch_dtype = torch.float16 if device != "cpu" else torch.float32
|
|
31 |
model = ParlerTTSForConditionalGeneration.from_pretrained("AkhilTolani/parler-tts-music-200000").to(device, dtype=torch_dtype)
|
32 |
tokenizer = AutoTokenizer.from_pretrained("AkhilTolani/parler-tts-music-200000")
|
33 |
|
34 |
-
def generate_audio(prompt, description):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
36 |
prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
|
37 |
|
@@ -40,7 +38,7 @@ def generate_audio(prompt, description):
|
|
40 |
|
41 |
# Set up generation arguments
|
42 |
gen_kwargs = {
|
43 |
-
"do_sample":
|
44 |
"temperature": 1.0,
|
45 |
"max_length": 2580,
|
46 |
"min_new_tokens": num_codebooks + 1,
|
@@ -60,13 +58,21 @@ def generate_audio(prompt, description):
|
|
60 |
|
61 |
return "parler_tts_out.wav"
|
62 |
|
|
|
|
|
|
|
|
|
63 |
interface = gr.Interface(
|
64 |
fn=generate_audio,
|
65 |
-
inputs=[
|
|
|
|
|
|
|
|
|
66 |
outputs=gr.Audio(label="Generated Audio"),
|
67 |
title="Parler TTS Audio Generation",
|
68 |
-
description="Generate audio using the Parler TTS model. Provide a prompt and
|
69 |
)
|
70 |
|
71 |
if __name__ == "__main__":
|
72 |
-
interface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
from parler_tts import ParlerTTSForConditionalGeneration
|
|
|
3 |
from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
|
|
|
4 |
import soundfile as sf
|
5 |
import torch
|
6 |
import os
|
7 |
|
8 |
os.system("bash install.sh")
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
device = "cpu"
|
11 |
if torch.cuda.is_available():
|
12 |
device = "cuda:0"
|
|
|
19 |
model = ParlerTTSForConditionalGeneration.from_pretrained("AkhilTolani/parler-tts-music-200000").to(device, dtype=torch_dtype)
|
20 |
tokenizer = AutoTokenizer.from_pretrained("AkhilTolani/parler-tts-music-200000")
|
21 |
|
22 |
+
def generate_audio(prompt, description, seed):
|
23 |
+
# Set the seed for reproducibility
|
24 |
+
seed = int(seed)
|
25 |
+
torch.manual_seed(seed)
|
26 |
+
if torch.cuda.is_available():
|
27 |
+
torch.cuda.manual_seed_all(seed)
|
28 |
+
if torch.backends.mps.is_available():
|
29 |
+
torch.backends.mps.manual_seed(seed)
|
30 |
+
if torch.xpu.is_available():
|
31 |
+
torch.xpu.manual_seed(seed)
|
32 |
+
|
33 |
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
34 |
prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
|
35 |
|
|
|
38 |
|
39 |
# Set up generation arguments
|
40 |
gen_kwargs = {
|
41 |
+
"do_sample": True,
|
42 |
"temperature": 1.0,
|
43 |
"max_length": 2580,
|
44 |
"min_new_tokens": num_codebooks + 1,
|
|
|
58 |
|
59 |
return "parler_tts_out.wav"
|
60 |
|
61 |
+
default_prompt = "free monster when you fucking hoes walk in the room chilettoes sitting up straight my fucking pasha been exposed somoomits start to hurt ive been shitting real gold doing dumb shit you never know or never seen they call me leopard i stay with the green"
|
62 |
+
default_description = "Experience the vibrant energy of a hip hop track featuring a male rapper delivering smooth verses over a catchy synth lead melody, supported by punchy kicks, deep 808 bass, claps, and shimmering hi-hats. The song transitions seamlessly to a female vocalist singing melodically alongside a raw synth bass line. Perfect for setting the mood in a solo dance session at home or keeping the party going in a lively club environment."
|
63 |
+
default_seed = "456"
|
64 |
+
|
65 |
interface = gr.Interface(
|
66 |
fn=generate_audio,
|
67 |
+
inputs=[
|
68 |
+
gr.Textbox(label="Prompt", default=default_prompt),
|
69 |
+
gr.Textbox(label="Description", default=default_description),
|
70 |
+
gr.Textbox(label="Seed", default=default_seed)
|
71 |
+
],
|
72 |
outputs=gr.Audio(label="Generated Audio"),
|
73 |
title="Parler TTS Audio Generation",
|
74 |
+
description="Generate audio using the Parler TTS model. Provide a prompt, description, and seed to generate the corresponding audio."
|
75 |
)
|
76 |
|
77 |
if __name__ == "__main__":
|
78 |
+
interface.launch()
|