fffiloni commited on
Commit
274d8f8
1 Parent(s): e5daf13

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +196 -53
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
  import os
3
  import shutil
4
 
@@ -9,6 +10,18 @@ from pydub import AudioSegment
9
 
10
  file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD")
11
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  """
13
  model_ids = [
14
  'suno/bark',
@@ -52,6 +65,14 @@ def cut_wav(input_path, max_duration):
52
  cut_audio.export(output_path, format="wav")
53
 
54
  return output_path
 
 
 
 
 
 
 
 
55
 
56
  def infer(prompt, input_wav_file):
57
 
@@ -72,36 +93,6 @@ def infer(prompt, input_wav_file):
72
 
73
  # Move the WAV file to the new directory
74
  shutil.move(source_path, os.path.join(destination_path, f"{file_name}.wav"))
75
-
76
- """
77
- text = prompt
78
-
79
- print("SYNTHETIZING...")
80
- # with random speaker
81
- #output_dict = model.synthesize(text, config, speaker_id="random", voice_dirs=None)
82
-
83
- # cloning a speaker.
84
- # It assumes that you have a speaker file in `bark_voices/speaker_n/speaker.wav` or `bark_voices/speaker_n/speaker.npz`
85
- output_dict = model.synthesize(
86
- text,
87
- config,
88
- speaker_id=f"{file_name}",
89
- voice_dirs="bark_voices/",
90
- gpu=True
91
- )
92
-
93
- print(output_dict)
94
-
95
-
96
-
97
- sample_rate = 24000 # Replace with the actual sample rate
98
- print("WRITING WAVE FILE")
99
- wavfile.write(
100
- 'output.wav',
101
- sample_rate,
102
- output_dict['wav']
103
- )
104
- """
105
 
106
  tts.tts_to_file(text=prompt,
107
  file_path="output.wav",
@@ -117,11 +108,77 @@ def infer(prompt, input_wav_file):
117
 
118
  tts_video = gr.make_waveform(audio="output.wav")
119
 
120
- return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True)
 
 
 
 
 
 
 
 
 
 
 
121
 
122
 
123
  css = """
124
  #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  img[src*='#center'] {
126
  display: block;
127
  margin: auto;
@@ -171,39 +228,95 @@ with gr.Blocks(css=css) as demo:
171
  with gr.Row():
172
  with gr.Column():
173
  prompt = gr.Textbox(
174
- label="Text to speech prompt"
 
175
  )
176
 
177
- if file_upload_available == "True":
178
- audio_in = gr.Audio(
179
- label="WAV voice to clone",
180
- type="filepath",
181
- source="upload"
182
- )
183
- else:
184
- audio_in = gr.Audio(
185
- label="WAV voice to clone",
186
- type="filepath",
187
- source="upload",
188
- interactive = False
189
- )
190
-
191
- submit_btn = gr.Button("Submit")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
  with gr.Column():
194
 
195
  cloned_out = gr.Audio(
196
- label="Text to speech output"
 
197
  )
198
 
199
  video_out = gr.Video(
200
- label = "Waveform video"
 
201
  )
202
 
203
  npz_file = gr.File(
204
  label = ".npz file",
205
  visible = False
206
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
 
209
 
@@ -226,9 +339,10 @@ with gr.Blocks(css=css) as demo:
226
  outputs = [
227
  cloned_out,
228
  video_out,
229
- npz_file
 
230
  ],
231
- cache_examples = True
232
  )
233
 
234
  gr.HTML("""
@@ -256,8 +370,37 @@ with gr.Blocks(css=css) as demo:
256
  outputs = [
257
  cloned_out,
258
  video_out,
259
- npz_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  ]
261
  )
262
 
263
- demo.queue(api_open=False, max_size=20).launch()
 
1
  import gradio as gr
2
+ from share_btn import community_icon_html, loading_icon_html, share_js
3
  import os
4
  import shutil
5
 
 
10
 
11
  file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD")
12
 
13
+ import json
14
+ with open("characters.json", "r") as file:
15
+ data = json.load(file)
16
+ characters = [
17
+ {
18
+ "image": item["image"],
19
+ "title": item["title"],
20
+ "speaker": item["speaker"]
21
+ }
22
+ for item in data
23
+ ]
24
+
25
  """
26
  model_ids = [
27
  'suno/bark',
 
65
  cut_audio.export(output_path, format="wav")
66
 
67
  return output_path
68
+
69
+ def update_selection(selected_state: gr.SelectData):
70
+ c_image = characters[selected_state.index]["image"]
71
+ c_title = characters[selected_state.index]["title"]
72
+ c_speaker = characters[selected_state.index]["speaker"]
73
+
74
+ return c_title, selected_state
75
+
76
 
77
  def infer(prompt, input_wav_file):
78
 
 
93
 
94
  # Move the WAV file to the new directory
95
  shutil.move(source_path, os.path.join(destination_path, f"{file_name}.wav"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  tts.tts_to_file(text=prompt,
98
  file_path="output.wav",
 
108
 
109
  tts_video = gr.make_waveform(audio="output.wav")
110
 
111
+ return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True)
112
+
113
+ def infer_from_c(prompt, c_name):
114
+
115
+ tts.tts_to_file(text=prompt,
116
+ file_path="output.wav",
117
+ voice_dir="examples/library/",
118
+ speaker=f"{c_name}")
119
+
120
+ tts_video = gr.make_waveform(audio="output.wav")
121
+
122
+ return "output.wav", tts_video, gr.update(value=f"examples/library/{c_name}/{c_name}.npz", visible=True), gr.Group.update(visible=True)
123
 
124
 
125
  css = """
126
  #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
127
+ a {text-decoration-line: underline; font-weight: 600;}
128
+ .animate-spin {
129
+ animation: spin 1s linear infinite;
130
+ }
131
+ @keyframes spin {
132
+ from {
133
+ transform: rotate(0deg);
134
+ }
135
+ to {
136
+ transform: rotate(360deg);
137
+ }
138
+ }
139
+ #share-btn-container {
140
+ display: flex;
141
+ padding-left: 0.5rem !important;
142
+ padding-right: 0.5rem !important;
143
+ background-color: #000000;
144
+ justify-content: center;
145
+ align-items: center;
146
+ border-radius: 9999px !important;
147
+ max-width: 15rem;
148
+ height: 36px;
149
+ }
150
+ div#share-btn-container > div {
151
+ flex-direction: row;
152
+ background: black;
153
+ align-items: center;
154
+ }
155
+ #share-btn-container:hover {
156
+ background-color: #060606;
157
+ }
158
+ #share-btn {
159
+ all: initial;
160
+ color: #ffffff;
161
+ font-weight: 600;
162
+ cursor:pointer;
163
+ font-family: 'IBM Plex Sans', sans-serif;
164
+ margin-left: 0.5rem !important;
165
+ padding-top: 0.5rem !important;
166
+ padding-bottom: 0.5rem !important;
167
+ right:0;
168
+ }
169
+ #share-btn * {
170
+ all: unset;
171
+ }
172
+ #share-btn-container div:nth-child(-n+2){
173
+ width: auto !important;
174
+ min-height: 0px !important;
175
+ }
176
+ #share-btn-container .wrap {
177
+ display: none !important;
178
+ }
179
+ #share-btn-container.hidden {
180
+ display: none!important;
181
+ }
182
  img[src*='#center'] {
183
  display: block;
184
  margin: auto;
 
228
  with gr.Row():
229
  with gr.Column():
230
  prompt = gr.Textbox(
231
+ label="Text to speech prompt",
232
+ elem_id = "tts-prompt"
233
  )
234
 
235
+ with gr.Tab("File upload"):
236
+
237
+ with gr.Column():
238
+
239
+ if file_upload_available == "True":
240
+ audio_in = gr.Audio(
241
+ label="WAV voice to clone",
242
+ type="filepath",
243
+ source="upload"
244
+ )
245
+ else:
246
+ audio_in = gr.Audio(
247
+ label="WAV voice to clone",
248
+ type="filepath",
249
+ source="upload",
250
+ interactive = False
251
+ )
252
+
253
+ submit_btn = gr.Button("Submit")
254
+
255
+ with gr.Tab("Microphone"):
256
+ micro_in = gr.Audio(
257
+ label="Record voice to clone",
258
+ type="filepath",
259
+ source="microphone",
260
+ interactive = True
261
+ )
262
+ micro_submit_btn = gr.Button("Submit")
263
+
264
+ with gr.Tab("Voices Characters"):
265
+ selected_state = gr.State()
266
+ gallery_in = gr.Gallery(
267
+ label="Character Gallery",
268
+ value=[(item["image"], item["title"]) for item in characters],
269
+ interactive = True,
270
+ allow_preview=False,
271
+ columns=2,
272
+ elem_id="gallery",
273
+ show_share_button=False
274
+ )
275
+ c_submit_btn = gr.Button("Submit")
276
+
277
 
278
  with gr.Column():
279
 
280
  cloned_out = gr.Audio(
281
+ label="Text to speech output",
282
+ visible = False
283
  )
284
 
285
  video_out = gr.Video(
286
+ label = "Waveform video",
287
+ elem_id = "voice-video-out"
288
  )
289
 
290
  npz_file = gr.File(
291
  label = ".npz file",
292
  visible = False
293
  )
294
+
295
+ character_name = gr.Textbox(
296
+ label="Character Name",
297
+ placeholder="Name that voice character",
298
+ elem_id = "character-name"
299
+ )
300
+
301
+ voice_description = gr.Textbox(
302
+ label="description",
303
+ placeholder="How would you describe that voice ? ",
304
+ elem_id = "voice-description"
305
+ )
306
+
307
+ with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
308
+ community_icon = gr.HTML(community_icon_html)
309
+ loading_icon = gr.HTML(loading_icon_html)
310
+ share_button = gr.Button("Share with Community", elem_id="share-btn")
311
+
312
+ share_button.click(None, [], [], _js=share_js)
313
+
314
+ gallery_in.select(
315
+ update_selection,
316
+ outputs=[character_name, selected_state],
317
+ queue=False,
318
+ show_progress=False,
319
+ )
320
 
321
 
322
 
 
339
  outputs = [
340
  cloned_out,
341
  video_out,
342
+ npz_file,
343
+ share_group
344
  ],
345
+ cache_examples = False
346
  )
347
 
348
  gr.HTML("""
 
370
  outputs = [
371
  cloned_out,
372
  video_out,
373
+ npz_file,
374
+ share_group
375
+ ]
376
+ )
377
+
378
+ micro_submit_btn.click(
379
+ fn = infer,
380
+ inputs = [
381
+ prompt,
382
+ micro_in
383
+ ],
384
+ outputs = [
385
+ cloned_out,
386
+ video_out,
387
+ npz_file,
388
+ share_group
389
+ ]
390
+ )
391
+
392
+ c_submit_btn.click(
393
+ fn = infer_from_c,
394
+ inputs = [
395
+ prompt,
396
+ character_name
397
+ ],
398
+ outputs = [
399
+ cloned_out,
400
+ video_out,
401
+ npz_file,
402
+ share_group
403
  ]
404
  )
405
 
406
+ demo.queue(api_open=False, max_size=10).launch()