georg-suno commited on
Commit
1892f86
·
1 Parent(s): bbc001b

add new prompts

Browse files
Files changed (1) hide show
  1. app.py +13 -8
app.py CHANGED
@@ -1,19 +1,27 @@
1
  import numpy as np
2
  import gradio as gr
3
  from bark import SAMPLE_RATE, generate_audio, preload_models
 
4
 
5
  DEBUG_MODE = False
6
 
7
  if not DEBUG_MODE:
8
  _ = preload_models()
9
 
 
 
 
 
 
 
 
 
 
 
10
  default_text = "Hello, my name is Suno. And, uh — and I like pizza. [laughs]\nBut I also have other interests such as playing tic tac toe."
11
 
12
  def gen_tts(text, history_prompt, temp_semantic, temp_waveform):
13
- if history_prompt == "Unconditional":
14
- history_prompt = None
15
- else:
16
- history_prompt = history_prompt.lower().replace(" ", "_").replace("speaker", "speech")
17
  if DEBUG_MODE:
18
  audio_arr = np.zeros(SAMPLE_RATE)
19
  else:
@@ -26,10 +34,7 @@ iface = gr.Interface(
26
  fn=gen_tts,
27
  inputs=[
28
  gr.Textbox(label="Input Text", lines=3, value=default_text),
29
- gr.Dropdown(
30
- ["Unconditional"] + [f"Speaker {n}" for n in range(8)] + [f"Music {n}" for n in range(6)],
31
- value="None", label="Acoustic Prompt", info="This choice primes the model on how to condition the generated audio."
32
- ),
33
  gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Temp 1", info="Gen. temperature of semantic tokens. (lower is more conservative, higher is more diverse)"),
34
  gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Temp 2", info="Gen. temperature of waveform tokens. (lower is more conservative, higher is more diverse)"),
35
  ],
 
1
  import numpy as np
2
  import gradio as gr
3
  from bark import SAMPLE_RATE, generate_audio, preload_models
4
+ from bark.generation import SUPPORTED_LANGS
5
 
6
  DEBUG_MODE = False
7
 
8
  if not DEBUG_MODE:
9
  _ = preload_models()
10
 
11
+ AVAILABLE_PROMPTS = ["Unconditional", "Announcer"]
12
+ PROMPT_LOOKUP = {}
13
+ for _, lang in SUPPORTED_LANGS:
14
+ for n in range(10):
15
+ label = f"Speaker {n} ({lang})"
16
+ AVAILABLE_PROMPTS.append(label)
17
+ PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}"
18
+ PROMPT_LOOKUP["Unconditional"] = None
19
+ PROMPT_LOOKUP["Announcer"] = "announcer"
20
+
21
  default_text = "Hello, my name is Suno. And, uh — and I like pizza. [laughs]\nBut I also have other interests such as playing tic tac toe."
22
 
23
  def gen_tts(text, history_prompt, temp_semantic, temp_waveform):
24
+ history_prompt = PROMPT_LOOKUP[history_prompt]
 
 
 
25
  if DEBUG_MODE:
26
  audio_arr = np.zeros(SAMPLE_RATE)
27
  else:
 
34
  fn=gen_tts,
35
  inputs=[
36
  gr.Textbox(label="Input Text", lines=3, value=default_text),
37
+ gr.Dropdown(AVAILABLE_PROMPTS, value="None", label="Acoustic Prompt", info="This choice primes the model on how to condition the generated audio."),
 
 
 
38
  gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Temp 1", info="Gen. temperature of semantic tokens. (lower is more conservative, higher is more diverse)"),
39
  gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Temp 2", info="Gen. temperature of waveform tokens. (lower is more conservative, higher is more diverse)"),
40
  ],