Kit-Lemonfoot commited on
Commit
2a50cb7
1 Parent(s): 09353f1

Added Pekora, updated images. Adjusted model loading for buggy test voices.

Browse files
app.py CHANGED
@@ -150,7 +150,6 @@ def tts_fn(
150
 
151
  def load_voicedata():
152
  print("Loading voice data...")
153
- #voices = []
154
  envoices = []
155
  jpvoices = []
156
  styledict = {}
@@ -169,17 +168,20 @@ def load_voicedata():
169
  image = info['cover']
170
  if not os.path.exists(f"images/{image}"):
171
  image="none.png"
 
 
 
 
172
  if not model_path in styledict.keys():
173
  conf=f"{model_dir}/{model_path}/config.json"
174
  hps = utils.get_hparams_from_file(conf)
175
  s2id = hps.data.style2id
176
  styledict[model_path] = s2id.keys()
177
  print(f"Set up hyperparameters for model {model_path}")
178
- #print(f"Indexed voice {voice_name}")
179
  if(info['primarylang']=="JP"):
180
- jpvoices.append((name, model_path, model_path_full, voice_name, speakerid, datasetauthor, image))
181
  else:
182
- envoices.append((name, model_path, model_path_full, voice_name, speakerid, datasetauthor, image))
183
  return [envoices, jpvoices], styledict
184
 
185
 
@@ -301,7 +303,9 @@ if __name__ == "__main__":
301
  #for (name, model_path, voice_name, speakerid, datasetauthor, image) in voicedata:
302
  for vi in range(len(voicedata)):
303
  with gr.TabItem(langnames[vi]):
304
- for (name, model_path, model_path_full, voice_name, speakerid, datasetauthor, image) in voicedata[vi]:
 
 
305
  with gr.TabItem(name):
306
  mn = gr.Textbox(value=model_path, visible=False, interactive=False)
307
  mp = gr.Textbox(value=model_path_full, visible=False, interactive=False)
 
150
 
151
  def load_voicedata():
152
  print("Loading voice data...")
 
153
  envoices = []
154
  jpvoices = []
155
  styledict = {}
 
168
  image = info['cover']
169
  if not os.path.exists(f"images/{image}"):
170
  image="none.png"
171
+ # for voices that are either known buggy or abnormal
172
+ nospace=False
173
+ if 'disableonspace' in info:
174
+ nospace=info['disableonspace']
175
  if not model_path in styledict.keys():
176
  conf=f"{model_dir}/{model_path}/config.json"
177
  hps = utils.get_hparams_from_file(conf)
178
  s2id = hps.data.style2id
179
  styledict[model_path] = s2id.keys()
180
  print(f"Set up hyperparameters for model {model_path}")
 
181
  if(info['primarylang']=="JP"):
182
+ jpvoices.append((name, model_path, model_path_full, voice_name, speakerid, datasetauthor, image, nospace))
183
  else:
184
+ envoices.append((name, model_path, model_path_full, voice_name, speakerid, datasetauthor, image, nospace))
185
  return [envoices, jpvoices], styledict
186
 
187
 
 
303
  #for (name, model_path, voice_name, speakerid, datasetauthor, image) in voicedata:
304
  for vi in range(len(voicedata)):
305
  with gr.TabItem(langnames[vi]):
306
+ for (name, model_path, model_path_full, voice_name, speakerid, datasetauthor, image, nospace) in voicedata[vi]:
307
+ if(nospace and is_hf_spaces):
308
+ continue
309
  with gr.TabItem(name):
310
  mn = gr.Textbox(value=model_path, visible=False, interactive=False)
311
  mp = gr.Textbox(value=model_path_full, visible=False, interactive=False)
images/bijou.png CHANGED
images/lui.png CHANGED
images/nerissa.png CHANGED
images/pekora.png ADDED
images/raden.png CHANGED
images/ririka.png CHANGED
images/shiori.png CHANGED
model_assets/SBV2_UsadaPekora/SBV2_UsadaPekora.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb16c8c20d9dcddb1dd7f871998730b6b115306a317a21ada4d64da0cb55c382
3
+ size 198769212
model_assets/SBV2_UsadaPekora/config.json ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "SBV2_UsadaPekora",
3
+ "train": {
4
+ "log_interval": 200,
5
+ "eval_interval": 1000,
6
+ "seed": 42,
7
+ "epochs": 100,
8
+ "learning_rate": 0.0002,
9
+ "betas": [
10
+ 0.8,
11
+ 0.99
12
+ ],
13
+ "eps": 1e-09,
14
+ "batch_size": 2,
15
+ "bf16_run": false,
16
+ "lr_decay": 0.99995,
17
+ "segment_size": 16384,
18
+ "init_lr_ratio": 1,
19
+ "warmup_epochs": 0,
20
+ "c_mel": 45,
21
+ "c_kl": 1.0,
22
+ "skip_optimizer": false,
23
+ "freeze_ZH_bert": false,
24
+ "freeze_JP_bert": false,
25
+ "freeze_EN_bert": false,
26
+ "freeze_style": false,
27
+ "freeze_encoder": false,
28
+ "freeze_decoder": false
29
+ },
30
+ "data": {
31
+ "use_jp_extra": false,
32
+ "training_files": "Data\\SBV2_UsadaPekora\\train.list",
33
+ "validation_files": "Data\\SBV2_UsadaPekora\\val.list",
34
+ "max_wav_value": 32768.0,
35
+ "sampling_rate": 44100,
36
+ "filter_length": 2048,
37
+ "hop_length": 512,
38
+ "win_length": 2048,
39
+ "n_mel_channels": 128,
40
+ "mel_fmin": 0.0,
41
+ "mel_fmax": null,
42
+ "add_blank": true,
43
+ "n_speakers": 2,
44
+ "cleaned_text": true,
45
+ "num_styles": 6,
46
+ "style2id": {
47
+ "Neutral": 0,
48
+ "Excited": 1,
49
+ "Thoughtful": 2,
50
+ "Explaining": 3,
51
+ "Mama": 4,
52
+ "Sad": 5
53
+ },
54
+ "spk2id": {
55
+ "UsadaPekora": 0,
56
+ "Pekomama": 1
57
+ }
58
+ },
59
+ "model": {
60
+ "use_spk_conditioned_encoder": true,
61
+ "use_noise_scaled_mas": true,
62
+ "use_mel_posterior_encoder": false,
63
+ "use_duration_discriminator": true,
64
+ "inter_channels": 192,
65
+ "hidden_channels": 192,
66
+ "filter_channels": 768,
67
+ "n_heads": 2,
68
+ "n_layers": 6,
69
+ "kernel_size": 3,
70
+ "p_dropout": 0.1,
71
+ "resblock": "1",
72
+ "resblock_kernel_sizes": [
73
+ 3,
74
+ 7,
75
+ 11
76
+ ],
77
+ "resblock_dilation_sizes": [
78
+ [
79
+ 1,
80
+ 3,
81
+ 5
82
+ ],
83
+ [
84
+ 1,
85
+ 3,
86
+ 5
87
+ ],
88
+ [
89
+ 1,
90
+ 3,
91
+ 5
92
+ ]
93
+ ],
94
+ "upsample_rates": [
95
+ 8,
96
+ 8,
97
+ 2,
98
+ 2,
99
+ 2
100
+ ],
101
+ "upsample_initial_channel": 512,
102
+ "upsample_kernel_sizes": [
103
+ 16,
104
+ 16,
105
+ 8,
106
+ 2,
107
+ 2
108
+ ],
109
+ "n_layers_q": 3,
110
+ "use_spectral_norm": false,
111
+ "gin_channels": 256
112
+ },
113
+ "version": "2.5.0"
114
+ }
model_assets/SBV2_UsadaPekora/style_vectors.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12b73c03f08086906421f0942df5021672f6d49a0c5b7bea55455a3a3583de01
3
+ size 6272
voicelist.json CHANGED
@@ -179,6 +179,16 @@
179
  "primarylang": "EN",
180
  "cover": "zeta.png"
181
  },
 
 
 
 
 
 
 
 
 
 
182
  "Sora": {
183
  "enable": true,
184
  "model_path": "SBV2_HoloJPTest2",
@@ -296,6 +306,25 @@
296
  "primarylang": "JP",
297
  "cover": "okayu.png"
298
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  "Rushia": {
300
  "enable": true,
301
  "model_path": "SBV2_HoloJPTest3",
 
179
  "primarylang": "EN",
180
  "cover": "zeta.png"
181
  },
182
+ "Kobo": {
183
+ "enable": true,
184
+ "model_path": "SBV2_HoloESL",
185
+ "title": "Kobo Kanaeru",
186
+ "speakerid": "KoboKanaeru",
187
+ "datasetauthor": "megaaziib",
188
+ "primarylang": "EN",
189
+ "cover": "kobo.png",
190
+ "disableonspace": true
191
+ },
192
  "Sora": {
193
  "enable": true,
194
  "model_path": "SBV2_HoloJPTest2",
 
306
  "primarylang": "JP",
307
  "cover": "okayu.png"
308
  },
309
+ "Pekora": {
310
+ "enable": true,
311
+ "model_path": "SBV2_UsadaPekora",
312
+ "title": "Usada Pekora",
313
+ "speakerid": "UsadaPekora",
314
+ "datasetauthor": "Kit Lemonfoot",
315
+ "primarylang": "JP",
316
+ "cover": "pekora.png"
317
+ },
318
+ "Pekomama": {
319
+ "enable": true,
320
+ "model_path": "SBV2_UsadaPekora",
321
+ "title": "Pekomama",
322
+ "speakerid": "Pekomama",
323
+ "datasetauthor": "dacoolkid44",
324
+ "primarylang": "JP",
325
+ "cover": "pekomama.png",
326
+ "disableonspace": true
327
+ },
328
  "Rushia": {
329
  "enable": true,
330
  "model_path": "SBV2_HoloJPTest3",