AkhilTolani commited on
Commit
2029e38
1 Parent(s): be0bc58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -17
app.py CHANGED
@@ -1,24 +1,12 @@
1
  import gradio as gr
2
  from parler_tts import ParlerTTSForConditionalGeneration
3
- from transformers import AutoTokenizer
4
  from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
5
-
6
  import soundfile as sf
7
  import torch
8
  import os
9
 
10
  os.system("bash install.sh")
11
 
12
- # Set the seed for reproducibility
13
- seed = 456
14
- torch.manual_seed(seed)
15
- if torch.cuda.is_available():
16
- torch.cuda.manual_seed_all(seed)
17
- if torch.backends.mps.is_available():
18
- torch.backends.mps.manual_seed(seed)
19
- if torch.xpu.is_available():
20
- torch.xpu.manual_seed(seed)
21
-
22
  device = "cpu"
23
  if torch.cuda.is_available():
24
  device = "cuda:0"
@@ -31,7 +19,17 @@ torch_dtype = torch.float16 if device != "cpu" else torch.float32
31
  model = ParlerTTSForConditionalGeneration.from_pretrained("AkhilTolani/parler-tts-music-200000").to(device, dtype=torch_dtype)
32
  tokenizer = AutoTokenizer.from_pretrained("AkhilTolani/parler-tts-music-200000")
33
 
34
- def generate_audio(prompt, description):
 
 
 
 
 
 
 
 
 
 
35
  input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
36
  prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
37
 
@@ -40,7 +38,7 @@ def generate_audio(prompt, description):
40
 
41
  # Set up generation arguments
42
  gen_kwargs = {
43
- "do_sample": False,
44
  "temperature": 1.0,
45
  "max_length": 2580,
46
  "min_new_tokens": num_codebooks + 1,
@@ -60,13 +58,21 @@ def generate_audio(prompt, description):
60
 
61
  return "parler_tts_out.wav"
62
 
 
 
 
 
63
  interface = gr.Interface(
64
  fn=generate_audio,
65
- inputs=[gr.Textbox(label="Prompt"), gr.Textbox(label="Description")],
 
 
 
 
66
  outputs=gr.Audio(label="Generated Audio"),
67
  title="Parler TTS Audio Generation",
68
- description="Generate audio using the Parler TTS model. Provide a prompt and description to generate the corresponding audio."
69
  )
70
 
71
  if __name__ == "__main__":
72
- interface.launch()
 
1
  import gradio as gr
2
  from parler_tts import ParlerTTSForConditionalGeneration
 
3
  from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
 
4
  import soundfile as sf
5
  import torch
6
  import os
7
 
8
  os.system("bash install.sh")
9
 
 
 
 
 
 
 
 
 
 
 
10
  device = "cpu"
11
  if torch.cuda.is_available():
12
  device = "cuda:0"
 
19
  model = ParlerTTSForConditionalGeneration.from_pretrained("AkhilTolani/parler-tts-music-200000").to(device, dtype=torch_dtype)
20
  tokenizer = AutoTokenizer.from_pretrained("AkhilTolani/parler-tts-music-200000")
21
 
22
+ def generate_audio(prompt, description, seed):
23
+ # Set the seed for reproducibility
24
+ seed = int(seed)
25
+ torch.manual_seed(seed)
26
+ if torch.cuda.is_available():
27
+ torch.cuda.manual_seed_all(seed)
28
+ if torch.backends.mps.is_available():
29
+ torch.backends.mps.manual_seed(seed)
30
+ if torch.xpu.is_available():
31
+ torch.xpu.manual_seed(seed)
32
+
33
  input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
34
  prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
35
 
 
38
 
39
  # Set up generation arguments
40
  gen_kwargs = {
41
+ "do_sample": True,
42
  "temperature": 1.0,
43
  "max_length": 2580,
44
  "min_new_tokens": num_codebooks + 1,
 
58
 
59
  return "parler_tts_out.wav"
60
 
61
+ default_prompt = "free monster when you fucking hoes walk in the room chilettoes sitting up straight my fucking pasha been exposed somoomits start to hurt ive been shitting real gold doing dumb shit you never know or never seen they call me leopard i stay with the green"
62
+ default_description = "Experience the vibrant energy of a hip hop track featuring a male rapper delivering smooth verses over a catchy synth lead melody, supported by punchy kicks, deep 808 bass, claps, and shimmering hi-hats. The song transitions seamlessly to a female vocalist singing melodically alongside a raw synth bass line. Perfect for setting the mood in a solo dance session at home or keeping the party going in a lively club environment."
63
+ default_seed = "456"
64
+
65
  interface = gr.Interface(
66
  fn=generate_audio,
67
+ inputs=[
68
+ gr.Textbox(label="Prompt", default=default_prompt),
69
+ gr.Textbox(label="Description", default=default_description),
70
+ gr.Textbox(label="Seed", default=default_seed)
71
+ ],
72
  outputs=gr.Audio(label="Generated Audio"),
73
  title="Parler TTS Audio Generation",
74
+ description="Generate audio using the Parler TTS model. Provide a prompt, description, and seed to generate the corresponding audio."
75
  )
76
 
77
  if __name__ == "__main__":
78
+ interface.launch()