Hololive-Style-Bert-VITS2

Running

App Files Files Community

Kit-Lemonfoot commited on Feb 7

Commit

f2a3f8d

•

1 Parent(s): 587b3fe

Upload 2 files

Browse files

Files changed (2) hide show

app.py +10 -26
voicelist.json +14 -0

app.py CHANGED Viewed

@@ -230,8 +230,8 @@ def tts_fn(
     emotion_weight,
     speaker,
 ):
-    if not text:
-        return "Please enter some text.", (44100, None)
     #logger.info(f"Start TTS with {language}:\n{text}")
     #logger.info(f"Model: {model_holder.current_model.model_path}")
     #logger.info(f"SDP: {sdp_ratio}, Noise: {noise_scale}, Noise_W: {noise_scale_w}, Length: {length_scale}")
@@ -239,7 +239,7 @@ def tts_fn(
     #logger.info(f"Style: {emotion}, Style weight: {emotion_weight}")
     if is_hf_spaces and len(text) > limit:
-        return f"Too long! There is a character limit of {limit} characters.", (44100, None)
     if(not model_holder.current_model):
         model_holder.load_model(model_name, model_path)
@@ -275,7 +275,7 @@ def tts_fn(
     return f"Success, time: {duration} seconds.", (sr, audio)
 def load_voicedata():
-    logger.info("Loading voice data...")
     voices = []
     styledict = {}
     with open("voicelist.json", "r", encoding="utf-8") as f:
@@ -292,21 +292,21 @@ def load_voicedata():
            hps = utils.get_hparams_from_file(conf)
            s2id = hps.data.style2id
            styledict[model_path] = s2id.keys()
         voices.append((name, model_path, voice_name, speakerid, image))
     return voices, styledict
-initial_text = "Hello there! This is test audio of Lemonfoot S B V 2."
 initial_md = """
-# LemonfootSBV2 😊🍋
 ### Space by [Kit Lemonfoot](https://huggingface.co/Kit-Lemonfoot)/[Noel Shirogane's High Flying Birds](https://www.youtube.com/channel/UCG9A0OJsJTluLOXfMZjJ9xA)
 ### Based on code originally by [fishaudio](https://github.com/fishaudio) and [litagin02](https://github.com/litagin02)
-This HuggingFace space is designed to demonstrate multiple experimental [Style-Bert-VITS2](https://github.com/litagin02/Style-Bert-VITS2) models made by Kit Lemonfoot.
 Do no evil.
-**Note:** Most of my models are a *work in progress.* They may not sound fully correct.
 """
 style_md = """
@@ -317,22 +317,6 @@ style_md = """
 - If you're using preexisting audio data to style the output, try to use a voice that is similar to the desired speaker.
 """
-def make_interactive():
-    return gr.update(interactive=True, value="Synthesize")
-def make_non_interactive():
-    return gr.update(interactive=False, value="Synthesize (Please load a model!)")
-def gr_util(item):
-    if item == "Select from presets":
-        return (gr.update(visible=True), gr.Audio(visible=False, value=None))
-    else:
-        return (gr.update(visible=False), gr.update(visible=True))
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--cpu", action="store_true", help="Use CPU instead of GPU")
@@ -357,7 +341,7 @@ if __name__ == "__main__":
         sys.exit(1)
     initial_id = 0
     initial_pth_files = model_holder.model_files_dict[model_names[initial_id]]
-    print(initial_pth_files)
     voicedata, styledict = load_voicedata()
@@ -401,7 +385,7 @@ if __name__ == "__main__":
     )
-    with gr.Blocks(theme=gr.themes.Base(primary_hue="emerald", secondary_hue="green"), title="LemonfootSBV2") as app:
         gr.Markdown(initial_md)
         for (name, model_path, voice_name, speakerid, image) in voicedata:

     emotion_weight,
     speaker,
 ):
+    if len(text)<2:
+        return "Please enter some text.", None
     #logger.info(f"Start TTS with {language}:\n{text}")
     #logger.info(f"Model: {model_holder.current_model.model_path}")
     #logger.info(f"SDP: {sdp_ratio}, Noise: {noise_scale}, Noise_W: {noise_scale_w}, Length: {length_scale}")
     #logger.info(f"Style: {emotion}, Style weight: {emotion_weight}")
     if is_hf_spaces and len(text) > limit:
+        return f"Too long! There is a character limit of {limit} characters.", None
     if(not model_holder.current_model):
         model_holder.load_model(model_name, model_path)
     return f"Success, time: {duration} seconds.", (sr, audio)
 def load_voicedata():
+    print("Loading voice data...")
     voices = []
     styledict = {}
     with open("voicelist.json", "r", encoding="utf-8") as f:
            hps = utils.get_hparams_from_file(conf)
            s2id = hps.data.style2id
            styledict[model_path] = s2id.keys()
+        print(f"Indexed voice {voice_name}")
         voices.append((name, model_path, voice_name, speakerid, image))
     return voices, styledict
+initial_text = "Hello there! This is test audio of Hololive Style Bert Vits 2."
 initial_md = """
+# Hololive [Style-Bert-VITS2](https://github.com/litagin02/Style-Bert-VITS2)
 ### Space by [Kit Lemonfoot](https://huggingface.co/Kit-Lemonfoot)/[Noel Shirogane's High Flying Birds](https://www.youtube.com/channel/UCG9A0OJsJTluLOXfMZjJ9xA)
 ### Based on code originally by [fishaudio](https://github.com/fishaudio) and [litagin02](https://github.com/litagin02)
 Do no evil.
+**Note:** Most of the models are a *work in progress.* They may not sound fully correct.
 """
 style_md = """
 - If you're using preexisting audio data to style the output, try to use a voice that is similar to the desired speaker.
 """
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--cpu", action="store_true", help="Use CPU instead of GPU")
         sys.exit(1)
     initial_id = 0
     initial_pth_files = model_holder.model_files_dict[model_names[initial_id]]
+    #print(initial_pth_files)
     voicedata, styledict = load_voicedata()
     )
+    with gr.Blocks(theme=gr.themes.Base(primary_hue="emerald", secondary_hue="green"), title="Hololive Style-Bert-VITS2") as app:
         gr.Markdown(initial_md)
         for (name, model_path, voice_name, speakerid, image) in voicedata:

voicelist.json CHANGED Viewed

@@ -97,11 +97,25 @@
 	"speakerid": "AiraniIofifteen",
     "cover": "iofi.png"
   },
   "Anya": {
     "enable": true,
     "model_path": "SBV2_HoloESL",
     "title": "Anya Melfissa",
 	"speakerid": "AnyaMelfissa",
     "cover": "anya.png"
   }
 }

 	"speakerid": "AiraniIofifteen",
     "cover": "iofi.png"
   },
+  "Ollie": {
+    "enable": true,
+    "model_path": "SBV2_HoloIDFlu",
+    "title": "Kureiji Ollie",
+	"speakerid": "KureijiOllie",
+    "cover": "ollie.png"
+  },
   "Anya": {
     "enable": true,
     "model_path": "SBV2_HoloESL",
     "title": "Anya Melfissa",
 	"speakerid": "AnyaMelfissa",
     "cover": "anya.png"
+  },
+  "Zeta": {
+    "enable": true,
+    "model_path": "SBV2_HoloIDFlu",
+    "title": "Vestia Zeta",
+	"speakerid": "VestiaZeta",
+    "cover": "zeta.png"
   }
 }