db commited on
Commit
a2df0c5
·
1 Parent(s): 5ad97cb
Files changed (2) hide show
  1. .idea/workspace.xml +72 -2
  2. app.py +213 -0
.idea/workspace.xml CHANGED
@@ -2,7 +2,7 @@
2
  <project version="4">
3
  <component name="ChangeListManager">
4
  <list default="true" id="6f3a79aa-e3bd-440d-b0d9-38be2ab06fa3" name="Changes" comment="init">
5
- <change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" />
6
  </list>
7
  <option name="SHOW_DIALOG" value="false" />
8
  <option name="HIGHLIGHT_CONFLICTS" value="true" />
@@ -41,7 +41,77 @@
41
  <option name="project" value="LOCAL" />
42
  <updated>1683456502470</updated>
43
  </task>
44
- <option name="localTasksCounter" value="2" />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  <servers />
46
  </component>
47
  <component name="Vcs.Log.Tabs.Properties">
 
2
  <project version="4">
3
  <component name="ChangeListManager">
4
  <list default="true" id="6f3a79aa-e3bd-440d-b0d9-38be2ab06fa3" name="Changes" comment="init">
5
+ <change afterPath="$PROJECT_DIR$/app.py" afterDir="false" />
6
  </list>
7
  <option name="SHOW_DIALOG" value="false" />
8
  <option name="HIGHLIGHT_CONFLICTS" value="true" />
 
41
  <option name="project" value="LOCAL" />
42
  <updated>1683456502470</updated>
43
  </task>
44
+ <task id="LOCAL-00002" summary="init">
45
+ <created>1683456698781</created>
46
+ <option name="number" value="00002" />
47
+ <option name="presentableId" value="LOCAL-00002" />
48
+ <option name="project" value="LOCAL" />
49
+ <updated>1683456698781</updated>
50
+ </task>
51
+ <task id="LOCAL-00003" summary="init">
52
+ <created>1683456708549</created>
53
+ <option name="number" value="00003" />
54
+ <option name="presentableId" value="LOCAL-00003" />
55
+ <option name="project" value="LOCAL" />
56
+ <updated>1683456708549</updated>
57
+ </task>
58
+ <task id="LOCAL-00004" summary="init">
59
+ <created>1683456749208</created>
60
+ <option name="number" value="00004" />
61
+ <option name="presentableId" value="LOCAL-00004" />
62
+ <option name="project" value="LOCAL" />
63
+ <updated>1683456749208</updated>
64
+ </task>
65
+ <task id="LOCAL-00005" summary="init">
66
+ <created>1683456785738</created>
67
+ <option name="number" value="00005" />
68
+ <option name="presentableId" value="LOCAL-00005" />
69
+ <option name="project" value="LOCAL" />
70
+ <updated>1683456785738</updated>
71
+ </task>
72
+ <task id="LOCAL-00006" summary="init">
73
+ <created>1683456838521</created>
74
+ <option name="number" value="00006" />
75
+ <option name="presentableId" value="LOCAL-00006" />
76
+ <option name="project" value="LOCAL" />
77
+ <updated>1683456838521</updated>
78
+ </task>
79
+ <task id="LOCAL-00007" summary="init">
80
+ <created>1683456856631</created>
81
+ <option name="number" value="00007" />
82
+ <option name="presentableId" value="LOCAL-00007" />
83
+ <option name="project" value="LOCAL" />
84
+ <updated>1683456856631</updated>
85
+ </task>
86
+ <task id="LOCAL-00008" summary="init">
87
+ <created>1683456878691</created>
88
+ <option name="number" value="00008" />
89
+ <option name="presentableId" value="LOCAL-00008" />
90
+ <option name="project" value="LOCAL" />
91
+ <updated>1683456878691</updated>
92
+ </task>
93
+ <task id="LOCAL-00009" summary="init">
94
+ <created>1683456940183</created>
95
+ <option name="number" value="00009" />
96
+ <option name="presentableId" value="LOCAL-00009" />
97
+ <option name="project" value="LOCAL" />
98
+ <updated>1683456940183</updated>
99
+ </task>
100
+ <task id="LOCAL-00010" summary="init">
101
+ <created>1683456950180</created>
102
+ <option name="number" value="00010" />
103
+ <option name="presentableId" value="LOCAL-00010" />
104
+ <option name="project" value="LOCAL" />
105
+ <updated>1683456950180</updated>
106
+ </task>
107
+ <task id="LOCAL-00011" summary="init">
108
+ <created>1683456957352</created>
109
+ <option name="number" value="00011" />
110
+ <option name="presentableId" value="LOCAL-00011" />
111
+ <option name="project" value="LOCAL" />
112
+ <updated>1683456957352</updated>
113
+ </task>
114
+ <option name="localTasksCounter" value="12" />
115
  <servers />
116
  </component>
117
  <component name="Vcs.Log.Tabs.Properties">
app.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Hugging Face's logo
2
+ Hugging Face
3
+ Search models, datasets, users...
4
+ Models
5
+ Datasets
6
+ Spaces
7
+ Docs
8
+ Solutions
9
+ Pricing
10
+
11
+
12
+
13
+ Spaces:
14
+
15
+ suno
16
+ /
17
+ bark Copied
18
+ like
19
+ 758
20
+ App
21
+ Files
22
+ Community
23
+ 54
24
+ bark
25
+ /
26
+ app.py
27
+ georg-suno's picture
28
+ georg-suno
29
+ fix-header-button (#11)
30
+ 589dfd0
31
+ 11 days ago
32
+ raw
33
+ history
34
+ blame
35
+ contribute
36
+ delete
37
+ 8.24 kB
38
+ import numpy as np
39
+ import gradio as gr
40
+ from bark import SAMPLE_RATE, generate_audio, preload_models
41
+ from bark.generation import SUPPORTED_LANGS
42
+ from share_btn import community_icon_html, loading_icon_html, share_js
43
+
44
+ DEBUG_MODE = False
45
+
46
+ if not DEBUG_MODE:
47
+ _ = preload_models()
48
+
49
+ AVAILABLE_PROMPTS = ["Unconditional", "Announcer"]
50
+ PROMPT_LOOKUP = {}
51
+ for _, lang in SUPPORTED_LANGS:
52
+ for n in range(10):
53
+ label = f"Speaker {n} ({lang})"
54
+ AVAILABLE_PROMPTS.append(label)
55
+ PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}"
56
+ PROMPT_LOOKUP["Unconditional"] = None
57
+ PROMPT_LOOKUP["Announcer"] = "announcer"
58
+
59
+ default_text = "Hello, my name is Suno. And, uh — and I like pizza. [laughs]\nBut I also have other interests such as playing tic tac toe."
60
+
61
+ title = "# 🐶 Bark</div>"
62
+
63
+ description = """
64
+ <div>
65
+ <a style="display:inline-block" href='https://github.com/suno-ai/bark'><img src='https://img.shields.io/github/stars/suno-ai/bark?style=social' /></a>
66
+ <a style='display:inline-block' href='https://discord.gg/J2B2vsjKuE'><img src='https://dcbadge.vercel.app/api/server/J2B2vsjKuE?compact=true&style=flat' /></a>
67
+ <a style="display:inline-block; margin-left: 1em" href="https://huggingface.co/spaces/suno/bark?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space%20to%20skip%20the%20queue-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a>
68
+ </div>
69
+ Bark is a universal text-to-audio model created by [Suno](www.suno.ai), with code publicly available [here](https://github.com/suno-ai/bark). \
70
+ Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. \
71
+ This demo should be used for research purposes only. Commercial use is strictly prohibited. \
72
+ The model output is not censored and the authors do not endorse the opinions in the generated content. \
73
+ Use at your own risk.
74
+ """
75
+
76
+ article = """
77
+ ## 🌎 Foreign Language
78
+ Bark supports various languages out-of-the-box and automatically determines language from input text. \
79
+ When prompted with code-switched text, Bark will even attempt to employ the native accent for the respective languages in the same voice.
80
+ Try the prompt:
81
+ ```
82
+ Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible.
83
+ ```
84
+ ## 🤭 Non-Speech Sounds
85
+ Below is a list of some known non-speech sounds, but we are finding more every day. \
86
+ Please let us know if you find patterns that work particularly well on Discord!
87
+ * [laughter]
88
+ * [laughs]
89
+ * [sighs]
90
+ * [music]
91
+ * [gasps]
92
+ * [clears throat]
93
+ * — or ... for hesitations
94
+ * ♪ for song lyrics
95
+ * capitalization for emphasis of a word
96
+ * MAN/WOMAN: for bias towards speaker
97
+ Try the prompt:
98
+ ```
99
+ " [clears throat] Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as... ♪ singing ♪."
100
+ ```
101
+ ## 🎶 Music
102
+ Bark can generate all types of audio, and, in principle, doesn't see a difference between speech and music. \
103
+ Sometimes Bark chooses to generate text as music, but you can help it out by adding music notes around your lyrics.
104
+ Try the prompt:
105
+ ```
106
+ ♪ In the jungle, the mighty jungle, the lion barks tonight ♪
107
+ ```
108
+ ## 🧬 Voice Cloning
109
+ Bark has the capability to fully clone voices - including tone, pitch, emotion and prosody. \
110
+ The model also attempts to preserve music, ambient noise, etc. from input audio. \
111
+ However, to mitigate misuse of this technology, we limit the audio history prompts to a limited set of Suno-provided, fully synthetic options to choose from.
112
+ ## 👥 Speaker Prompts
113
+ You can provide certain speaker prompts such as NARRATOR, MAN, WOMAN, etc. \
114
+ Please note that these are not always respected, especially if a conflicting audio history prompt is given.
115
+ Try the prompt:
116
+ ```
117
+ WOMAN: I would like an oatmilk latte please.
118
+ MAN: Wow, that's expensive!
119
+ ```
120
+ ## Details
121
+ Bark model by [Suno](https://suno.ai/), including official [code](https://github.com/suno-ai/bark) and model weights. \
122
+ Gradio demo supported by 🤗 Hugging Face. Bark is licensed under a non-commercial license: CC-BY 4.0 NC, see details on [GitHub](https://github.com/suno-ai/bark).
123
+ """
124
+
125
+ examples = [
126
+ ["Please surprise me and speak in whatever voice you enjoy. Vielen Dank und Gesundheit!",
127
+ "Unconditional"], # , 0.7, 0.7],
128
+ ["Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe.",
129
+ "Speaker 1 (en)"], # , 0.7, 0.7],
130
+ ["Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible.",
131
+ "Speaker 0 (es)"], # , 0.7, 0.7],
132
+ ]
133
+
134
+
135
+ def gen_tts(text, history_prompt): # , temp_semantic, temp_waveform):
136
+ history_prompt = PROMPT_LOOKUP[history_prompt]
137
+ if DEBUG_MODE:
138
+ audio_arr = np.zeros(SAMPLE_RATE)
139
+ else:
140
+ # , text_temp=temp_semantic, waveform_temp=temp_waveform)
141
+ audio_arr = generate_audio(text, history_prompt=history_prompt)
142
+ audio_arr = (audio_arr * 32767).astype(np.int16)
143
+ return (SAMPLE_RATE, audio_arr)
144
+
145
+
146
+ css = """
147
+ #share-btn-container {
148
+ display: flex;
149
+ padding-left: 0.5rem !important;
150
+ padding-right: 0.5rem !important;
151
+ background-color: #000000;
152
+ justify-content: center;
153
+ align-items: center;
154
+ border-radius: 9999px !important;
155
+ width: 13rem;
156
+ margin-top: 10px;
157
+ margin-left: auto;
158
+ flex: unset !important;
159
+ }
160
+ #share-btn {
161
+ all: initial;
162
+ color: #ffffff;
163
+ font-weight: 600;
164
+ cursor: pointer;
165
+ font-family: 'IBM Plex Sans', sans-serif;
166
+ margin-left: 0.5rem !important;
167
+ padding-top: 0.25rem !important;
168
+ padding-bottom: 0.25rem !important;
169
+ right:0;
170
+ }
171
+ #share-btn * {
172
+ all: unset !important;
173
+ }
174
+ #share-btn-container div:nth-child(-n+2){
175
+ width: auto !important;
176
+ min-height: 0px !important;
177
+ }
178
+ #share-btn-container .wrap {
179
+ display: none !important;
180
+ }
181
+ """
182
+ with gr.Blocks(css=css) as block:
183
+ gr.Markdown(title)
184
+ gr.Markdown(description)
185
+ with gr.Row():
186
+ with gr.Column():
187
+ input_text = gr.Textbox(
188
+ label="Input Text", lines=2, value=default_text, elem_id="input_text")
189
+ options = gr.Dropdown(
190
+ AVAILABLE_PROMPTS, value="Speaker 1 (en)", label="Acoustic Prompt", elem_id="speaker_option")
191
+ run_button = gr.Button(text="Generate Audio", type="button")
192
+ with gr.Column():
193
+ audio_out = gr.Audio(label="Generated Audio",
194
+ type="numpy", elem_id="audio_out")
195
+ with gr.Row(visible=False) as share_row:
196
+ with gr.Group(elem_id="share-btn-container"):
197
+ community_icon = gr.HTML(community_icon_html)
198
+ loading_icon = gr.HTML(loading_icon_html)
199
+ share_button = gr.Button(
200
+ "Share to community", elem_id="share-btn")
201
+ share_button.click(None, [], [], _js=share_js)
202
+ inputs = [input_text, options]
203
+ outputs = [audio_out]
204
+ gr.Examples(examples=examples, fn=gen_tts, inputs=inputs,
205
+ outputs=outputs, cache_examples=True)
206
+ gr.Markdown(article)
207
+ run_button.click(fn=lambda: gr.update(visible=False), inputs=None, outputs=share_row, queue=False).then(
208
+ fn=gen_tts, inputs=inputs, outputs=outputs, queue=True).then(
209
+ fn=lambda: gr.update(visible=True), inputs=None, outputs=share_row, queue=False)
210
+
211
+ block.queue()
212
+ block.launch()
213
+