kevinwang676 commited on
Commit
2907a7d
·
1 Parent(s): 6737785

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -15
app.py CHANGED
@@ -82,14 +82,20 @@ def generate_text_to_speech(text, selected_speaker, text_temp, waveform_temp, qu
82
  use_last_generation_as_history = "Use last generation as history" in complete_settings
83
  progress(0, desc="Generating")
84
 
85
- silence = np.zeros(int(0.25 * SAMPLE_RATE), dtype=np.float32) # quarter second of silence
 
86
 
87
  all_parts = []
88
  text = text.lstrip()
89
  if is_ssml(text):
90
  list_speak = create_clips_from_ssml(text)
 
91
  for i, clip in tqdm(enumerate(list_speak), total=len(list_speak)):
92
  selected_speaker = clip[0]
 
 
 
 
93
  text = clip[1]
94
  text = saxutils.unescape(text)
95
  if selected_speaker == "None":
@@ -99,7 +105,7 @@ def generate_text_to_speech(text, selected_speaker, text_temp, waveform_temp, qu
99
  audio_array = generate_audio(text, selected_speaker, text_temp, waveform_temp)
100
  if len(list_speak) > 1:
101
  save_wav(audio_array, create_filename(OUTPUTFOLDER, "audioclip",".wav"))
102
- all_parts += [audio_array, silence.copy()]
103
  else:
104
  texts = split_and_recombine_text(text)
105
  for i, text in tqdm(enumerate(texts), total=len(texts)):
@@ -139,7 +145,10 @@ def generate_text_to_speech(text, selected_speaker, text_temp, waveform_temp, qu
139
  full_generation['fine_prompt'])
140
  # loading voice from custom folder needs to have extension
141
  voice_name = voice_name + ".npz"
142
- all_parts += [audio_array, silence.copy()]
 
 
 
143
 
144
  # save & play audio
145
  result = create_filename(OUTPUTFOLDER, "final",".wav")
@@ -234,18 +243,20 @@ for root, dirs, files in os.walk("./bark/assets/prompts/v2"):
234
  for file in files:
235
  if(file.endswith(".npz")):
236
  pathpart = root.replace("./bark/assets/prompts/v2", "")
237
- if len(pathpart) < 1:
238
- pathpart = ""
239
- speakers_list.append(os.path.join(pathpart, file[:-4]))
 
240
 
241
  speakers_list = sorted(speakers_list, key=lambda x: x.lower())
242
  speakers_list.insert(0, "nana.npz")
 
243
  #speakers_list.insert(0, 'None')
244
 
245
  # Create Gradio Blocks
246
 
247
  with gr.Blocks(title="Bark Enhanced Gradio GUI", mode="Bark Enhanced") as barkgui:
248
- gr.Markdown("### [Bark Enhanced](https://github.com/C0untFloyd/bark-gui)")
249
  with gr.Tab("TTS"):
250
  with gr.Row():
251
  with gr.Column():
@@ -282,13 +293,7 @@ with gr.Blocks(title="Bark Enhanced Gradio GUI", mode="Bark Enhanced") as barkgu
282
  gr.Markdown("[Voice Prompt Library](https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c)")
283
  speaker = gr.Dropdown(speakers_list, value=speakers_list[0], label="Voice")
284
  with gr.Column():
285
- text_temp = gr.Slider(
286
- 0.1,
287
- 1.0,
288
- value=0.7,
289
- label="Generation Temperature",
290
- info="1.0 more diverse, 0.1 more conservative"
291
- )
292
  waveform_temp = gr.Slider(0.1, 1.0, value=0.7, label="Waveform temperature", info="1.0 more diverse, 0.1 more conservative")
293
 
294
  with gr.Row():
@@ -301,7 +306,7 @@ with gr.Blocks(title="Bark Enhanced Gradio GUI", mode="Bark Enhanced") as barkgu
301
 
302
  with gr.Row():
303
  with gr.Column():
304
- tts_create_button = gr.Button("Create")
305
  with gr.Column():
306
  hidden_checkbox = gr.Checkbox(visible=False)
307
  button_delete_files = gr.Button("Clear output folder", visible=False)
 
82
  use_last_generation_as_history = "Use last generation as history" in complete_settings
83
  progress(0, desc="Generating")
84
 
85
+ silenceshort = np.zeros(int(0.25 * SAMPLE_RATE), dtype=np.float32) # quarter second of silence
86
+ silencelong = np.zeros(int(0.50 * SAMPLE_RATE), dtype=np.float32) # half a second of silence
87
 
88
  all_parts = []
89
  text = text.lstrip()
90
  if is_ssml(text):
91
  list_speak = create_clips_from_ssml(text)
92
+ prev_speaker = None
93
  for i, clip in tqdm(enumerate(list_speak), total=len(list_speak)):
94
  selected_speaker = clip[0]
95
+ # Add pause break between speakers
96
+ if i > 0 and selected_speaker != prev_speaker:
97
+ all_parts += [silencelong.copy()]
98
+ prev_speaker = selected_speaker
99
  text = clip[1]
100
  text = saxutils.unescape(text)
101
  if selected_speaker == "None":
 
105
  audio_array = generate_audio(text, selected_speaker, text_temp, waveform_temp)
106
  if len(list_speak) > 1:
107
  save_wav(audio_array, create_filename(OUTPUTFOLDER, "audioclip",".wav"))
108
+ all_parts += [audio_array]
109
  else:
110
  texts = split_and_recombine_text(text)
111
  for i, text in tqdm(enumerate(texts), total=len(texts)):
 
145
  full_generation['fine_prompt'])
146
  # loading voice from custom folder needs to have extension
147
  voice_name = voice_name + ".npz"
148
+ all_parts += [audio_array]
149
+ # Add short pause between sentences
150
+ if text[-1] in "!?.\n" and i > 1:
151
+ all_parts += [silenceshort.copy()]
152
 
153
  # save & play audio
154
  result = create_filename(OUTPUTFOLDER, "final",".wav")
 
243
  for file in files:
244
  if(file.endswith(".npz")):
245
  pathpart = root.replace("./bark/assets/prompts/v2", "")
246
+ name = os.path.join(pathpart, file[:-4])
247
+ if name.startswith("/") or name.startswith("\\"):
248
+ name = name[1:]
249
+ speakers_list.append(name)
250
 
251
  speakers_list = sorted(speakers_list, key=lambda x: x.lower())
252
  speakers_list.insert(0, "nana.npz")
253
+
254
  #speakers_list.insert(0, 'None')
255
 
256
  # Create Gradio Blocks
257
 
258
  with gr.Blocks(title="Bark Enhanced Gradio GUI", mode="Bark Enhanced") as barkgui:
259
+ gr.Markdown("### [Bark Enhanced v0.4.0](https://github.com/C0untFloyd/bark-gui)")
260
  with gr.Tab("TTS"):
261
  with gr.Row():
262
  with gr.Column():
 
293
  gr.Markdown("[Voice Prompt Library](https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c)")
294
  speaker = gr.Dropdown(speakers_list, value=speakers_list[0], label="Voice")
295
  with gr.Column():
296
+ text_temp = gr.Slider(0.1, 1.0, value=0.6, label="Generation Temperature", info="1.0 more diverse, 0.1 more conservative")
 
 
 
 
 
 
297
  waveform_temp = gr.Slider(0.1, 1.0, value=0.7, label="Waveform temperature", info="1.0 more diverse, 0.1 more conservative")
298
 
299
  with gr.Row():
 
306
 
307
  with gr.Row():
308
  with gr.Column():
309
+ tts_create_button = gr.Button("Generate")
310
  with gr.Column():
311
  hidden_checkbox = gr.Checkbox(visible=False)
312
  button_delete_files = gr.Button("Clear output folder", visible=False)