Merge remote-tracking branch 'origin/main'

Browse files

Files changed (11) hide show

InferenceInterfaces/ControllableInterface.py +3 -1
Models/Embedding/embedding_gan.pt +3 -0
Models/Embedding/init +0 -0
Models/ToucanTTS_Meta/best.pt +3 -0
Models/ToucanTTS_Meta/init +0 -0
Models/Vocoder/best.pt +3 -0
Models/Vocoder/init +0 -0
Models/__init__.py +0 -0
Models/audioseal/generator.pth +3 -0
Models/audioseal/init +0 -0
app.py +11 -5

InferenceInterfaces/ControllableInterface.py CHANGED Viewed

@@ -92,8 +92,10 @@ class ControllableInterface:
                 if self.current_accent != "eng":
                     self.model.set_accent_language("eng")
                     self.current_accent = "eng"
         print(prompt)
         wav, sr, fig = self.model(prompt,
                                   input_is_phones=False,
                                   duration_scaling_factor=duration_scaling_factor,

                 if self.current_accent != "eng":
                     self.model.set_accent_language("eng")
                     self.current_accent = "eng"
+        print("\n\n")
         print(prompt)
+        print(language)
+        print("\n\n")
         wav, sr, fig = self.model(prompt,
                                   input_is_phones=False,
                                   duration_scaling_factor=duration_scaling_factor,

Models/Embedding/embedding_gan.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8e9cdad432cc4ee0301754cce730d000c37b26a4cf3baab21a5c3447b725fc9
+size 1261865

Models/Embedding/init ADDED Viewed

File without changes

Models/ToucanTTS_Meta/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a875446cd80a7b27f8a7a65b5fd1df6bd2f11c89409ab2e4342d6b7e6dff1755
+size 366674077

Models/ToucanTTS_Meta/init ADDED Viewed

File without changes

Models/Vocoder/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e3e4b03bd4ec1665706bcb78c21f47386439774cc53010e1f7c8d3b82d6006d
+size 56113099

Models/Vocoder/init ADDED Viewed

File without changes

Models/__init__.py ADDED Viewed

File without changes

Models/audioseal/generator.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a845b5fbe9364a63a3909d8ab3fe064d13a76ae4c2e983573e08c69b7b51748
+size 58805980

Models/audioseal/init ADDED Viewed

File without changes

app.py CHANGED Viewed

@@ -2,8 +2,8 @@ import os
 from run_model_downloader import download_models
-if not os.path.exists("Models/ToucanTTS_Meta/best.pt"):
-    download_models()
 import gradio as gr
 from Preprocessing.multilinguality.SimilaritySolver import load_json_from_path
 from Utility.utils import float2pcm
@@ -46,7 +46,7 @@ class ControllableInterface(torch.nn.Module):
              loudness_in_db
              ):
         if self.current_language != language:
-            self.model = ToucanTTSInterface(device="cpu", tts_model_path="Meta", language=language)
             self.current_language = language
         self.wgan.set_latent(voice_seed)
@@ -59,11 +59,17 @@ class ControllableInterface(torch.nn.Module):
         embedding = self.wgan.modify_embed(controllability_vector)
         self.model.set_utterance_embedding(embedding=embedding)
         phones = self.model.text2phone.get_phone_string(prompt)
         if len(phones) > 1800:
-            return
         print(prompt)
         wav, sr, fig = self.model(prompt,
                                   input_is_phones=False,
                                   duration_scaling_factor=duration_scaling_factor,
@@ -114,7 +120,7 @@ def read(prompt,
 iface = gr.Interface(fn=read,
                      inputs=[gr.Textbox(lines=2,
                                         placeholder="write what you want the synthesis to read here...",
-                                        value="The woods are lovely, dark and deep, but I have promises to keep, and miles to go, before I sleep.",
                                         label="Text input"),
                              gr.Dropdown(text_selection,
                                          type="value",

 from run_model_downloader import download_models
+#if not os.path.exists("Models/ToucanTTS_Meta/best.pt"):
+#    download_models()
 import gradio as gr
 from Preprocessing.multilinguality.SimilaritySolver import load_json_from_path
 from Utility.utils import float2pcm
              loudness_in_db
              ):
         if self.current_language != language:
+            self.model.set_language(language)
             self.current_language = language
         self.wgan.set_latent(voice_seed)
         embedding = self.wgan.modify_embed(controllability_vector)
         self.model.set_utterance_embedding(embedding=embedding)
+        if len(prompt) > 1800:
+            raise AssertionError("The input is too long!")
         phones = self.model.text2phone.get_phone_string(prompt)
         if len(phones) > 1800:
+            raise AssertionError("The input is too long!")
+        print("\n\n")
         print(prompt)
+        print(language)
+        print("\n\n")
         wav, sr, fig = self.model(prompt,
                                   input_is_phones=False,
                                   duration_scaling_factor=duration_scaling_factor,
 iface = gr.Interface(fn=read,
                      inputs=[gr.Textbox(lines=2,
                                         placeholder="write what you want the synthesis to read here...",
+                                        value="What I cannot create, I do not understand.",
                                         label="Text input"),
                              gr.Dropdown(text_selection,
                                          type="value",