Flux9665 commited on
Commit
e208c87
·
2 Parent(s): 9fc040d 9ad08b6

Merge remote-tracking branch 'origin/main'

Browse files
InferenceInterfaces/ControllableInterface.py CHANGED
@@ -92,8 +92,10 @@ class ControllableInterface:
92
  if self.current_accent != "eng":
93
  self.model.set_accent_language("eng")
94
  self.current_accent = "eng"
95
-
96
  print(prompt)
 
 
97
  wav, sr, fig = self.model(prompt,
98
  input_is_phones=False,
99
  duration_scaling_factor=duration_scaling_factor,
 
92
  if self.current_accent != "eng":
93
  self.model.set_accent_language("eng")
94
  self.current_accent = "eng"
95
+ print("\n\n")
96
  print(prompt)
97
+ print(language)
98
+ print("\n\n")
99
  wav, sr, fig = self.model(prompt,
100
  input_is_phones=False,
101
  duration_scaling_factor=duration_scaling_factor,
Models/Embedding/embedding_gan.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8e9cdad432cc4ee0301754cce730d000c37b26a4cf3baab21a5c3447b725fc9
3
+ size 1261865
Models/Embedding/init ADDED
File without changes
Models/ToucanTTS_Meta/best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a875446cd80a7b27f8a7a65b5fd1df6bd2f11c89409ab2e4342d6b7e6dff1755
3
+ size 366674077
Models/ToucanTTS_Meta/init ADDED
File without changes
Models/Vocoder/best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e3e4b03bd4ec1665706bcb78c21f47386439774cc53010e1f7c8d3b82d6006d
3
+ size 56113099
Models/Vocoder/init ADDED
File without changes
Models/__init__.py ADDED
File without changes
Models/audioseal/generator.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a845b5fbe9364a63a3909d8ab3fe064d13a76ae4c2e983573e08c69b7b51748
3
+ size 58805980
Models/audioseal/init ADDED
File without changes
app.py CHANGED
@@ -2,8 +2,8 @@ import os
2
 
3
  from run_model_downloader import download_models
4
 
5
- if not os.path.exists("Models/ToucanTTS_Meta/best.pt"):
6
- download_models()
7
  import gradio as gr
8
  from Preprocessing.multilinguality.SimilaritySolver import load_json_from_path
9
  from Utility.utils import float2pcm
@@ -46,7 +46,7 @@ class ControllableInterface(torch.nn.Module):
46
  loudness_in_db
47
  ):
48
  if self.current_language != language:
49
- self.model = ToucanTTSInterface(device="cpu", tts_model_path="Meta", language=language)
50
  self.current_language = language
51
 
52
  self.wgan.set_latent(voice_seed)
@@ -59,11 +59,17 @@ class ControllableInterface(torch.nn.Module):
59
  embedding = self.wgan.modify_embed(controllability_vector)
60
  self.model.set_utterance_embedding(embedding=embedding)
61
 
 
 
62
  phones = self.model.text2phone.get_phone_string(prompt)
63
  if len(phones) > 1800:
64
- return
65
 
 
66
  print(prompt)
 
 
 
67
  wav, sr, fig = self.model(prompt,
68
  input_is_phones=False,
69
  duration_scaling_factor=duration_scaling_factor,
@@ -114,7 +120,7 @@ def read(prompt,
114
  iface = gr.Interface(fn=read,
115
  inputs=[gr.Textbox(lines=2,
116
  placeholder="write what you want the synthesis to read here...",
117
- value="The woods are lovely, dark and deep, but I have promises to keep, and miles to go, before I sleep.",
118
  label="Text input"),
119
  gr.Dropdown(text_selection,
120
  type="value",
 
2
 
3
  from run_model_downloader import download_models
4
 
5
+ #if not os.path.exists("Models/ToucanTTS_Meta/best.pt"):
6
+ # download_models()
7
  import gradio as gr
8
  from Preprocessing.multilinguality.SimilaritySolver import load_json_from_path
9
  from Utility.utils import float2pcm
 
46
  loudness_in_db
47
  ):
48
  if self.current_language != language:
49
+ self.model.set_language(language)
50
  self.current_language = language
51
 
52
  self.wgan.set_latent(voice_seed)
 
59
  embedding = self.wgan.modify_embed(controllability_vector)
60
  self.model.set_utterance_embedding(embedding=embedding)
61
 
62
+ if len(prompt) > 1800:
63
+ raise AssertionError("The input is too long!")
64
  phones = self.model.text2phone.get_phone_string(prompt)
65
  if len(phones) > 1800:
66
+ raise AssertionError("The input is too long!")
67
 
68
+ print("\n\n")
69
  print(prompt)
70
+ print(language)
71
+ print("\n\n")
72
+
73
  wav, sr, fig = self.model(prompt,
74
  input_is_phones=False,
75
  duration_scaling_factor=duration_scaling_factor,
 
120
  iface = gr.Interface(fn=read,
121
  inputs=[gr.Textbox(lines=2,
122
  placeholder="write what you want the synthesis to read here...",
123
+ value="What I cannot create, I do not understand.",
124
  label="Text input"),
125
  gr.Dropdown(text_selection,
126
  type="value",