Hololive-Style-Bert-VITS2

Running

App Files Files Community

litagin commited on Dec 27, 2023

Commit

ef0e26b

•

1 Parent(s): 78b6bf0

Fix app.py for demo

Browse files

Files changed (1) hide show

app.py +25 -26

app.py CHANGED Viewed

@@ -222,7 +222,23 @@ def tts_fn(
     emotion,
     emotion_weight,
 ):
     if is_hf_spaces and len(text) > limit:
         raise Exception(f"文字数が{limit}文字を超えています")
     assert model_holder.current_model is not None
@@ -248,6 +264,7 @@ def tts_fn(
     end_time = datetime.datetime.now()
     duration = (end_time - start_time).total_seconds()
     return f"Success, time: {duration} seconds.", (sr, audio)
@@ -317,35 +334,19 @@ example_hf_spaces = [
     ["あはははっ！この漫画めっちゃ笑える、見てよこれ、ふふふ、あはは。", "JP"],
     ["あなたがいなくなって、私は一人になっちゃって、泣いちゃいそうなほど悲しい。", "JP"],
     ["深層学習の応用により、感情やアクセントを含む声質の微妙な変化も再現されている。", "JP"],
 ]
 initial_md = """
-# Style-Bert-VITS2 音声合成
-注意: 初期からある[jvnvのモデル](https://huggingface.co/litagin/style_bert_vits2_jvnv)は、[JVNVコーパス（言語音声と非言語音声を持つ日本語感情音声コーパス）](https://sites.google.com/site/shinnosuketakamichi/research-topics/jvnv_corpus)で学習されたモデルです。ライセンスは[CC BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/deed.ja)です。
-"""
-how_to_md = """
-下のように`model_assets`ディレクトリの中にモデルファイルたちを置いてください。
-```
-model_assets
-├── your_model
-│   ├── config.json
-│   ├── your_model_file1.safetensors
-│   ├── your_model_file2.safetensors
-│   ├── ...
-│   └── style_vectors.npy
-└── another_model
-    ├── ...
-```
-各モデルにはファイルたちが必要です：
-- `config.json`：学習時の設定ファイル
-- `*.safetensors`：学習済みモデルファイル（1つ以上が必要、複数可）
-- `style_vectors.npy`：スタイルベクトルファイル
-上2つは`Train.bat`による学習で自動的に正しい位置に保存されます。`style_vectors.npy`は`Style.bat`を実行して指示に従って生成してください。
-TODO: 現在のところはspeaker_id = 0に固定しており複数話者の合成には対応していません。
 """
 style_md = """
@@ -400,8 +401,6 @@ if __name__ == "__main__":
     with gr.Blocks(theme="NoCrypt/miku") as app:
         gr.Markdown(initial_md)
-        with gr.Accordion(label="使い方", open=False):
-            gr.Markdown(how_to_md)
         with gr.Row():
             with gr.Column():
                 with gr.Row():

     emotion,
     emotion_weight,
 ):
+    logger.info(f"Start TTS:\n{text}")
+    logger.info(f"Language: {language}")
+    logger.info(f"Reference audio path: {reference_audio_path}")
+    logger.info(f"SDP ratio: {sdp_ratio}")
+    logger.info(f"Noise scale: {noise_scale}")
+    logger.info(f"Noise scale W: {noise_scale_w}")
+    logger.info(f"Length scale: {length_scale}")
+    logger.info(f"Line split: {line_split}")
+    logger.info(f"Split interval: {split_interval}")
+    logger.info(f"Style text: {style_text}")
+    logger.info(f"Style weight: {style_weight}")
+    logger.info(f"Use style text: {use_style_text}")
+    logger.info(f"Emotion: {emotion}")
+    logger.info(f"Emotion weight: {emotion_weight}")
     if is_hf_spaces and len(text) > limit:
+        logger.error(f"文字数が{limit}文字を超えています")
         raise Exception(f"文字数が{limit}文字を超えています")
     assert model_holder.current_model is not None
     end_time = datetime.datetime.now()
     duration = (end_time - start_time).total_seconds()
+    logger.info(f"End TTS, duration: {duration} seconds")
     return f"Success, time: {duration} seconds.", (sr, audio)
     ["あはははっ！この漫画めっちゃ笑える、見てよこれ、ふふふ、あはは。", "JP"],
     ["あなたがいなくなって、私は一人になっちゃって、泣いちゃいそうなほど悲しい。", "JP"],
     ["深層学習の応用により、感情やアクセントを含む声質の微妙な変化も再現されている。", "JP"],
+    [
+        "Speech synthesis is the artificial production of human speech. A computer system used for this purpose is called a speech synthesizer, and can be implemented in software or hardware products.",
+        "EN",
+    ],
+    ["语音合成是人工制造人类语音。用于此目的的计算机系统称为语音合成器，可以通过软件或硬件产品实现。", "ZH"],
 ]
 initial_md = """
+# Style-Bert-VITS2 JVNVコーパスデモ
+怒り・悲しみ・喜び等の感情スタイルを強弱付きで制御できる、[Style-Bert-VITS2](https://github.com/litagin02/Style-Bert-VITS2)のデモです。
+このデモでは[jvnvのモデル](https://huggingface.co/litagin/style_bert_vits2_jvnv)を使っており、[JVNVコーパス（言語音声と非言語音声を持つ日本語感情音声コーパス）](https://sites.google.com/site/shinnosuketakamichi/research-topics/jvnv_corpus)で学習されたモデルです。
 """
 style_md = """
     with gr.Blocks(theme="NoCrypt/miku") as app:
         gr.Markdown(initial_md)
         with gr.Row():
             with gr.Column():
                 with gr.Row():