Kit-Lemonfoot commited on
Commit
f2a3f8d
1 Parent(s): 587b3fe

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +10 -26
  2. voicelist.json +14 -0
app.py CHANGED
@@ -230,8 +230,8 @@ def tts_fn(
230
  emotion_weight,
231
  speaker,
232
  ):
233
- if not text:
234
- return "Please enter some text.", (44100, None)
235
  #logger.info(f"Start TTS with {language}:\n{text}")
236
  #logger.info(f"Model: {model_holder.current_model.model_path}")
237
  #logger.info(f"SDP: {sdp_ratio}, Noise: {noise_scale}, Noise_W: {noise_scale_w}, Length: {length_scale}")
@@ -239,7 +239,7 @@ def tts_fn(
239
  #logger.info(f"Style: {emotion}, Style weight: {emotion_weight}")
240
 
241
  if is_hf_spaces and len(text) > limit:
242
- return f"Too long! There is a character limit of {limit} characters.", (44100, None)
243
 
244
  if(not model_holder.current_model):
245
  model_holder.load_model(model_name, model_path)
@@ -275,7 +275,7 @@ def tts_fn(
275
  return f"Success, time: {duration} seconds.", (sr, audio)
276
 
277
  def load_voicedata():
278
- logger.info("Loading voice data...")
279
  voices = []
280
  styledict = {}
281
  with open("voicelist.json", "r", encoding="utf-8") as f:
@@ -292,21 +292,21 @@ def load_voicedata():
292
  hps = utils.get_hparams_from_file(conf)
293
  s2id = hps.data.style2id
294
  styledict[model_path] = s2id.keys()
 
295
  voices.append((name, model_path, voice_name, speakerid, image))
296
  return voices, styledict
297
 
298
 
299
- initial_text = "Hello there! This is test audio of Lemonfoot S B V 2."
300
 
301
  initial_md = """
302
- # LemonfootSBV2 😊🍋
303
  ### Space by [Kit Lemonfoot](https://huggingface.co/Kit-Lemonfoot)/[Noel Shirogane's High Flying Birds](https://www.youtube.com/channel/UCG9A0OJsJTluLOXfMZjJ9xA)
304
  ### Based on code originally by [fishaudio](https://github.com/fishaudio) and [litagin02](https://github.com/litagin02)
305
- This HuggingFace space is designed to demonstrate multiple experimental [Style-Bert-VITS2](https://github.com/litagin02/Style-Bert-VITS2) models made by Kit Lemonfoot.
306
 
307
  Do no evil.
308
 
309
- **Note:** Most of my models are a *work in progress.* They may not sound fully correct.
310
  """
311
 
312
  style_md = """
@@ -317,22 +317,6 @@ style_md = """
317
  - If you're using preexisting audio data to style the output, try to use a voice that is similar to the desired speaker.
318
  """
319
 
320
-
321
- def make_interactive():
322
- return gr.update(interactive=True, value="Synthesize")
323
-
324
-
325
- def make_non_interactive():
326
- return gr.update(interactive=False, value="Synthesize (Please load a model!)")
327
-
328
-
329
- def gr_util(item):
330
- if item == "Select from presets":
331
- return (gr.update(visible=True), gr.Audio(visible=False, value=None))
332
- else:
333
- return (gr.update(visible=False), gr.update(visible=True))
334
-
335
-
336
  if __name__ == "__main__":
337
  parser = argparse.ArgumentParser()
338
  parser.add_argument("--cpu", action="store_true", help="Use CPU instead of GPU")
@@ -357,7 +341,7 @@ if __name__ == "__main__":
357
  sys.exit(1)
358
  initial_id = 0
359
  initial_pth_files = model_holder.model_files_dict[model_names[initial_id]]
360
- print(initial_pth_files)
361
 
362
  voicedata, styledict = load_voicedata()
363
 
@@ -401,7 +385,7 @@ if __name__ == "__main__":
401
  )
402
 
403
 
404
- with gr.Blocks(theme=gr.themes.Base(primary_hue="emerald", secondary_hue="green"), title="LemonfootSBV2") as app:
405
  gr.Markdown(initial_md)
406
 
407
  for (name, model_path, voice_name, speakerid, image) in voicedata:
 
230
  emotion_weight,
231
  speaker,
232
  ):
233
+ if len(text)<2:
234
+ return "Please enter some text.", None
235
  #logger.info(f"Start TTS with {language}:\n{text}")
236
  #logger.info(f"Model: {model_holder.current_model.model_path}")
237
  #logger.info(f"SDP: {sdp_ratio}, Noise: {noise_scale}, Noise_W: {noise_scale_w}, Length: {length_scale}")
 
239
  #logger.info(f"Style: {emotion}, Style weight: {emotion_weight}")
240
 
241
  if is_hf_spaces and len(text) > limit:
242
+ return f"Too long! There is a character limit of {limit} characters.", None
243
 
244
  if(not model_holder.current_model):
245
  model_holder.load_model(model_name, model_path)
 
275
  return f"Success, time: {duration} seconds.", (sr, audio)
276
 
277
  def load_voicedata():
278
+ print("Loading voice data...")
279
  voices = []
280
  styledict = {}
281
  with open("voicelist.json", "r", encoding="utf-8") as f:
 
292
  hps = utils.get_hparams_from_file(conf)
293
  s2id = hps.data.style2id
294
  styledict[model_path] = s2id.keys()
295
+ print(f"Indexed voice {voice_name}")
296
  voices.append((name, model_path, voice_name, speakerid, image))
297
  return voices, styledict
298
 
299
 
300
+ initial_text = "Hello there! This is test audio of Hololive Style Bert Vits 2."
301
 
302
  initial_md = """
303
+ # Hololive [Style-Bert-VITS2](https://github.com/litagin02/Style-Bert-VITS2)
304
  ### Space by [Kit Lemonfoot](https://huggingface.co/Kit-Lemonfoot)/[Noel Shirogane's High Flying Birds](https://www.youtube.com/channel/UCG9A0OJsJTluLOXfMZjJ9xA)
305
  ### Based on code originally by [fishaudio](https://github.com/fishaudio) and [litagin02](https://github.com/litagin02)
 
306
 
307
  Do no evil.
308
 
309
+ **Note:** Most of the models are a *work in progress.* They may not sound fully correct.
310
  """
311
 
312
  style_md = """
 
317
  - If you're using preexisting audio data to style the output, try to use a voice that is similar to the desired speaker.
318
  """
319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  if __name__ == "__main__":
321
  parser = argparse.ArgumentParser()
322
  parser.add_argument("--cpu", action="store_true", help="Use CPU instead of GPU")
 
341
  sys.exit(1)
342
  initial_id = 0
343
  initial_pth_files = model_holder.model_files_dict[model_names[initial_id]]
344
+ #print(initial_pth_files)
345
 
346
  voicedata, styledict = load_voicedata()
347
 
 
385
  )
386
 
387
 
388
+ with gr.Blocks(theme=gr.themes.Base(primary_hue="emerald", secondary_hue="green"), title="Hololive Style-Bert-VITS2") as app:
389
  gr.Markdown(initial_md)
390
 
391
  for (name, model_path, voice_name, speakerid, image) in voicedata:
voicelist.json CHANGED
@@ -97,11 +97,25 @@
97
  "speakerid": "AiraniIofifteen",
98
  "cover": "iofi.png"
99
  },
 
 
 
 
 
 
 
100
  "Anya": {
101
  "enable": true,
102
  "model_path": "SBV2_HoloESL",
103
  "title": "Anya Melfissa",
104
  "speakerid": "AnyaMelfissa",
105
  "cover": "anya.png"
 
 
 
 
 
 
 
106
  }
107
  }
 
97
  "speakerid": "AiraniIofifteen",
98
  "cover": "iofi.png"
99
  },
100
+ "Ollie": {
101
+ "enable": true,
102
+ "model_path": "SBV2_HoloIDFlu",
103
+ "title": "Kureiji Ollie",
104
+ "speakerid": "KureijiOllie",
105
+ "cover": "ollie.png"
106
+ },
107
  "Anya": {
108
  "enable": true,
109
  "model_path": "SBV2_HoloESL",
110
  "title": "Anya Melfissa",
111
  "speakerid": "AnyaMelfissa",
112
  "cover": "anya.png"
113
+ },
114
+ "Zeta": {
115
+ "enable": true,
116
+ "model_path": "SBV2_HoloIDFlu",
117
+ "title": "Vestia Zeta",
118
+ "speakerid": "VestiaZeta",
119
+ "cover": "zeta.png"
120
  }
121
  }