TangRain commited on
Commit
491133e
·
1 Parent(s): 26379a2

add svs demo

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +2 -0
  2. app.py +316 -0
  3. requirements.txt +10 -0
  4. resource/Muskits_ESPnet_logo.png +0 -0
  5. resource/__init__.py +0 -0
  6. resource/all_plans.json +0 -0
  7. resource/midi-note.scp +152 -0
  8. resource/pinyin_dict.py +423 -0
  9. resource/singer/singer_embedding_ace-1.npy +3 -0
  10. resource/singer/singer_embedding_ace-10.npy +3 -0
  11. resource/singer/singer_embedding_ace-11.npy +3 -0
  12. resource/singer/singer_embedding_ace-12.npy +3 -0
  13. resource/singer/singer_embedding_ace-13.npy +3 -0
  14. resource/singer/singer_embedding_ace-14.npy +3 -0
  15. resource/singer/singer_embedding_ace-15.npy +3 -0
  16. resource/singer/singer_embedding_ace-16.npy +3 -0
  17. resource/singer/singer_embedding_ace-17.npy +3 -0
  18. resource/singer/singer_embedding_ace-18.npy +3 -0
  19. resource/singer/singer_embedding_ace-19.npy +3 -0
  20. resource/singer/singer_embedding_ace-2.npy +3 -0
  21. resource/singer/singer_embedding_ace-20.npy +3 -0
  22. resource/singer/singer_embedding_ace-21.npy +3 -0
  23. resource/singer/singer_embedding_ace-22.npy +3 -0
  24. resource/singer/singer_embedding_ace-23.npy +3 -0
  25. resource/singer/singer_embedding_ace-24.npy +3 -0
  26. resource/singer/singer_embedding_ace-25.npy +3 -0
  27. resource/singer/singer_embedding_ace-26.npy +3 -0
  28. resource/singer/singer_embedding_ace-27.npy +3 -0
  29. resource/singer/singer_embedding_ace-28.npy +3 -0
  30. resource/singer/singer_embedding_ace-29.npy +3 -0
  31. resource/singer/singer_embedding_ace-3.npy +3 -0
  32. resource/singer/singer_embedding_ace-30.npy +3 -0
  33. resource/singer/singer_embedding_ace-4.npy +3 -0
  34. resource/singer/singer_embedding_ace-5.npy +3 -0
  35. resource/singer/singer_embedding_ace-6.npy +3 -0
  36. resource/singer/singer_embedding_ace-7.npy +3 -0
  37. resource/singer/singer_embedding_ace-8.npy +3 -0
  38. resource/singer/singer_embedding_ace-9.npy +3 -0
  39. resource/singer/singer_embedding_ameboshi.npy +3 -0
  40. resource/singer/singer_embedding_itako.npy +3 -0
  41. resource/singer/singer_embedding_kiritan.npy +3 -0
  42. resource/singer/singer_embedding_kising_barber.npy +3 -0
  43. resource/singer/singer_embedding_kising_blanca.npy +3 -0
  44. resource/singer/singer_embedding_kising_changge.npy +3 -0
  45. resource/singer/singer_embedding_kising_chuci.npy +3 -0
  46. resource/singer/singer_embedding_kising_chuming.npy +3 -0
  47. resource/singer/singer_embedding_kising_crimson.npy +3 -0
  48. resource/singer/singer_embedding_kising_david.npy +3 -0
  49. resource/singer/singer_embedding_kising_dvaid.npy +3 -0
  50. resource/singer/singer_embedding_kising_ghost.npy +3 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ __pycache__
2
+ .gradio
app.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import librosa
4
+ import numpy as np
5
+ import gradio as gr
6
+ import pyopenjtalk
7
+ from util import preprocess_input, postprocess_phn, get_tokenizer, load_pitch_dict, get_pinyin
8
+
9
+ from espnet_model_zoo.downloader import ModelDownloader
10
+ from espnet2.bin.svs_inference import SingingGenerate
11
+
12
+
13
+ singer_embeddings = {
14
+ "Model①(Chinese)-zh": {
15
+ "singer1 (male)": 1,
16
+ "singer2 (female)": 12,
17
+ "singer3 (male)": 23,
18
+ "singer4 (female)": 29,
19
+ "singer5 (male)": 18,
20
+ "singer6 (female)": 8,
21
+ "singer7 (male)": 25,
22
+ "singer8 (female)": 5,
23
+ "singer9 (male)": 10,
24
+ "singer10 (female)": 15,
25
+ },
26
+ "Model②(Multilingual)-zh": {
27
+ "singer1 (male)": "resource/singer/singer_embedding_ace-1.npy",
28
+ "singer2 (female)": "resource/singer/singer_embedding_ace-2.npy",
29
+ "singer3 (male)": "resource/singer/singer_embedding_ace-3.npy",
30
+ "singer4 (female)": "resource/singer/singer_embedding_ace-8.npy",
31
+ "singer5 (male)": "resource/singer/singer_embedding_ace-7.npy",
32
+ "singer6 (female)": "resource/singer/singer_embedding_itako.npy",
33
+ "singer7 (male)": "resource/singer/singer_embedding_ofuton.npy",
34
+ "singer8 (female)": "resource/singer/singer_embedding_kising_orange.npy",
35
+ "singer9 (male)": "resource/singer/singer_embedding_m4singer_Tenor-1.npy",
36
+ "singer10 (female)": "resource/singer/singer_embedding_m4singer_Alto-4.npy",
37
+ },
38
+ "Model②(Multilingual)-jp": {
39
+ "singer1 (male)": "resource/singer/singer_embedding_ace-1.npy",
40
+ "singer2 (female)": "resource/singer/singer_embedding_ace-2.npy",
41
+ "singer3 (male)": "resource/singer/singer_embedding_ace-3.npy",
42
+ "singer4 (female)": "resource/singer/singer_embedding_ace-8.npy",
43
+ "singer5 (male)": "resource/singer/singer_embedding_ace-7.npy",
44
+ "singer6 (female)": "resource/singer/singer_embedding_itako.npy",
45
+ "singer7 (male)": "resource/singer/singer_embedding_ofuton.npy",
46
+ "singer8 (female)": "resource/singer/singer_embedding_kising_orange.npy",
47
+ "singer9 (male)": "resource/singer/singer_embedding_m4singer_Tenor-1.npy",
48
+ "singer10 (female)": "resource/singer/singer_embedding_m4singer_Alto-4.npy",
49
+ }
50
+ }
51
+
52
+ model_dict = {
53
+ "Model①(Chinese)-zh": "espnet/aceopencpop_svs_visinger2_40singer_pretrain",
54
+ "Model②(Multilingual)-zh": "espnet/mixdata_svs_visinger2_spkembed_lang_pretrained",
55
+ "Model②(Multilingual)-jp": "espnet/mixdata_svs_visinger2_spkembed_lang_pretrained",
56
+ }
57
+
58
+ total_singers = list(singer_embeddings["Model②(Multilingual)-zh"].keys())
59
+
60
+ langs = {
61
+ "zh": 2,
62
+ "jp": 1,
63
+ }
64
+
65
+ def gen_song(model_name, spk, texts, durs, pitchs):
66
+ fs = 44100
67
+ tempo = 120
68
+ lang = model_name.split("-")[-1]
69
+ PRETRAIN_MODEL = model_dict[model_name]
70
+ if texts is None:
71
+ return (fs, np.array([0.0])), "Error: No Text provided!"
72
+ if durs is None:
73
+ return (fs, np.array([0.0])), "Error: No Dur provided!"
74
+ if pitchs is None:
75
+ return (fs, np.array([0.0])), "Error: No Pitch provided!"
76
+
77
+ # preprocess
78
+ if lang == "zh":
79
+ texts = preprocess_input(texts, "")
80
+ text_list = get_pinyin(texts)
81
+ elif lang == "jp":
82
+ texts = preprocess_input(texts, " ")
83
+ text_list = texts.strip().split()
84
+ durs = preprocess_input(durs, " ")
85
+ dur_list = durs.strip().split()
86
+ pitchs = preprocess_input(pitchs, " ")
87
+ pitch_list = pitchs.strip().split()
88
+
89
+ if len(text_list) != len(dur_list):
90
+ return (fs, np.array([0.0])), f"Error: len in text({len(text_list)}) mismatch with duration({len(dur_list)})!"
91
+ if len(text_list) != len(pitch_list):
92
+ return (fs, np.array([0.0])), f"Error: len in text({len(text_list)}) mismatch with pitch({len(pitch_list)})!"
93
+
94
+ ## text to phoneme
95
+ tokenizer = get_tokenizer(model_name, lang)
96
+ sybs = []
97
+ for text in text_list:
98
+ if text == "AP" or text == "SP":
99
+ rev = [text]
100
+ elif text == "-" or text == "——":
101
+ rev = [text]
102
+ else:
103
+ rev = tokenizer(text)
104
+ if rev == False:
105
+ return (fs, np.array([0.0])), f"Error: text `{text}` is invalid!"
106
+ rev = postprocess_phn(rev, model_name, lang)
107
+ phns = "_".join(rev)
108
+ sybs.append(phns)
109
+
110
+ pitch_dict = load_pitch_dict()
111
+
112
+ labels = []
113
+ notes = []
114
+ st = 0
115
+ pre_phn = ""
116
+ for phns, dur, pitch in zip(sybs, dur_list, pitch_list):
117
+ if phns == "-" or phns == "——":
118
+ phns = pre_phn
119
+ if pitch not in pitch_dict:
120
+ return (fs, np.array([0.0])), f"Error: pitch `{pitch}` is invalid!"
121
+ pitch = pitch_dict[pitch]
122
+ phn_list = phns.split("_")
123
+ lyric = "".join(phn_list)
124
+ dur = float(dur)
125
+ note = [st, st + dur, lyric, pitch, phns]
126
+ st += dur
127
+ notes.append(note)
128
+ for phn in phn_list:
129
+ labels.append(phn)
130
+ pre_phn = labels[-1]
131
+
132
+ phns_str = " ".join(labels)
133
+ batch = {
134
+ "score": (
135
+ int(tempo),
136
+ notes,
137
+ ),
138
+ "text": phns_str,
139
+ }
140
+ print(batch)
141
+ # return (fs, np.array([0.0])), "success!"
142
+
143
+ # Infer
144
+ device = "cpu"
145
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
146
+ d = ModelDownloader()
147
+ pretrain_downloaded = d.download_and_unpack(PRETRAIN_MODEL)
148
+ svs = SingingGenerate(
149
+ train_config = pretrain_downloaded["train_config"],
150
+ model_file = pretrain_downloaded["model_file"],
151
+ device = device
152
+ )
153
+ if model_name == "Model①(Chinese)-zh":
154
+ sid = np.array([singer_embeddings[model_name][spk]])
155
+ output_dict = svs(batch, sids=sid)
156
+ else:
157
+ lid = np.array([langs[lang]])
158
+ spk_embed = np.load(singer_embeddings[model_name][spk])
159
+ output_dict = svs(batch, lids=lid, spembs=spk_embed)
160
+ wav_info = output_dict["wav"].cpu().numpy()
161
+
162
+ # mos prediction with sr=16k
163
+ predictor = torch.hub.load("South-Twilight/SingMOS:v0.2.0", "singing_ssl_mos", trust_repo=True)
164
+ wav_mos = librosa.resample(wav_info, orig_sr=fs, target_sr=16000)
165
+ wav_mos = torch.from_numpy(wav_mos).unsqueeze(0)
166
+ len_mos = torch.tensor([wav_mos.shape[1]])
167
+ score = predictor(wav_mos, len_mos)
168
+ return (fs, wav_info), "success!", round(score.item(), 2)
169
+
170
+
171
+ # SP: silence, AP: aspirate.
172
+ examples = [
173
+ ["Model①(Chinese)-zh", "singer1 (male)", "雨 淋 湿 了 SP 天 空 AP\n毁 的 SP 很 讲 究 AP", "0.23 0.16 0.36 0.16 0.07 0.28 0.5 0.21\n0.3 0.12 0.12 0.25 0.5 0.48 0.34", "60 62 62 62 0 62 58 0\n58 58 0 58 58 63 0"],
174
+ ["Model①(Chinese)-zh", "singer3 (male)", "雨 淋 湿 了 SP 天 空 AP\n毁 的 SP 很 讲 究 AP", "0.23 0.16 0.36 0.16 0.07 0.28 0.5 0.21\n0.3 0.12 0.12 0.25 0.5 0.48 0.34", "C4 D4 D4 D4 rest D4 A#3 rest\nA#3 A#3 rest A#3 A#3 D#4 rest"], # midi note
175
+ ["Model①(Chinese)-zh", "singer3 (male)", "雨 淋 湿 了 SP 天 空 AP\n毁 的 SP 很 讲 究 AP", "0.23 0.16 0.36 0.16 0.07 0.28 0.5 0.21\n0.3 0.12 0.12 0.25 0.5 0.48 0.34", "C#4 D#4 D#4 D#4 rest D#4 B3 rest\nB3 B3 rest B3 B3 E4 rest"], # up 1 key
176
+ ["Model①(Chinese)-zh", "singer3 (male)", "雨 淋 湿 了 SP 大 地 AP\n毁 的 SP 很 讲 究 AP", "0.23 0.16 0.36 0.16 0.07 0.28 0.5 0.21\n0.3 0.12 0.12 0.25 0.5 0.48 0.34", "C4 D4 D4 D4 rest D4 A#3 rest\nA#3 A#3 rest A#3 A#3 D#4 rest"], # lyrics
177
+ ["Model②(Multilingual)-zh", "singer3 (male)", "你 说 你 不 SP 懂\n 为 何 在 这 时 牵 手 AP", "0.11 0.33 0.29 0.13 0.15 0.48\n0.24 0.18 0.34 0.15 0.27 0.28 0.63 0.44", "63 63 63 63 0 63\n62 62 62 63 65 63 62 0"],
178
+ ["Model②(Multilingual)-zh", "singer3 (male)", "你 说 你 不 SP 懂\n 为 何 在 这 时 牵 手 AP", "0.23 0.66 0.58 0.27 0.3 0.97\n0.48 0.36 0.69 0.3 0.53 0.56 1.27 0.89", "63 63 63 63 0 63\n62 62 62 63 65 63 62 0"], # double duration
179
+ ["Model①(Chinese)-zh", "singer3 (male)", "雨 淋 湿 了 SP 天 空 AP\n毁 的 SP 很 讲 究 AP\n你 说 你 不 SP 懂\n 为 何 在 这 时 牵 手 AP", "0.23 0.16 0.36 0.16 0.07 0.28 0.5 0.21\n0.3 0.12 0.12 0.25 0.5 0.48 0.34\n0.11 0.33 0.29 0.13 0.15 0.48\n0.24 0.18 0.34 0.15 0.27 0.28 0.63 0.44", "60 62 62 62 0 62 58 0\n58 58 0 58 58 63 0\n63 63 63 63 0 63\n62 62 62 63 65 63 62 0"], # long
180
+ ["Model①(Chinese)-zh", "singer3 (male)", "修 炼 爱 情 的 心 酸 SP AP", "0.42 0.21 0.19 0.28 0.22 0.33 1.53 0.1 0.29", "68 70 68 66 63 68 68 0 0"],
181
+ ["Model①(Chinese)-zh", "singer3 (male)", "学 会 放 好 以 前 的 渴 望 SP AP", "0.3 0.22 0.29 0.27 0.25 0.44 0.54 0.29 1.03 0.08 0.39", "68 70 68 66 61 68 68 65 66 0 0"],
182
+ ["Model①(Chinese)-zh", "singer3 (male)", "SP 我 不 - 是 一 定 要 你 回 - 来 SP", "0.37 0.45 0.47 0.17 0.52 0.28 0.46 0.31 0.44 0.45 0.2 2.54 0.19", "0 51 60 61 59 59 57 57 59 60 61 59 0"], # slur
183
+ ["Model①(Chinese)-zh", "singer4 (female)", "AP 我 多 想 再 见 你\n哪 怕 匆 - 匆 一 AP 眼 就 别 离 AP", "0.13 0.24 0.68 0.78 0.86 0.4 0.94 0.54 0.3 0.56 0.16 0.86 0.26 0.22 0.28 0.78 0.68 1.5 0.32", "0 57 66 63 63 63 63 60 61 61 63 66 66 0 61 61 59 58 0"],
184
+ ["Model②(Multilingual)-jp", "singer8 (female)", "い じ ん さ ん に つ れ ら れ て", "0.6 0.3 0.3 0.3 0.3 0.6 0.6 0.3 0.3 0.6 0.23", "60 60 60 56 56 56 55 55 55 53 56"],
185
+ ["Model②(Multilingual)-jp", "singer8 (female)", "い じ ん さ ん に つ れ ら れ て", "0.6 0.3 0.3 0.3 0.3 0.6 0.6 0.3 0.3 0.6 0.23", "62 62 62 58 58 58 57 57 57 55 58"], # pitch
186
+ ["Model②(Multilingual)-jp", "singer8 (female)", "い じ ん さ ん に つ れ ら れ て", "1.2 0.6 0.6 0.6 0.6 1.2 1.2 0.6 0.6 1.2 0.45", "60 60 60 56 56 56 55 55 55 53 56"], # double dur
187
+ ["Model②(Multilingual)-jp", "singer8 (female)", "い じ ん さ ん に つ れ ら れ て", "0.3 0.15 0.15 0.15 0.15 0.3 0.3 0.15 0.15 0.3 0.11", "60 60 60 56 56 56 55 55 55 53 56"], # half dur
188
+ ["Model②(Multilingual)-jp", "singer8 (female)", "きっ と と べ ば そ ら ま で と ど く AP", "0.39 2.76 0.2 0.2 0.39 0.39 0.2 0.2 0.39 0.2 0.2 0.59 1.08", "64 71 68 69 71 71 69 68 66 68 69 68 0"],
189
+ ["Model②(Multilingual)-jp", "singer8 (female)", "じゃ の め で お む か え う れ し い な", "0.43 0.14 0.43 0.14 0.43 0.14 0.43 0.14 0.43 0.14 0.43 0.14 0.65", "60 60 60 62 64 67 69 69 64 64 64 62 60"],
190
+ ["Model②(Multilingual)-jp", "singer10 (female)", "お と め わ ら い か ふぁ い や ら い か ん な い す ぶ ろ うぃ ん ぶ ろ うぃ ん い ん ざ うぃ ん", "0.15 0.15 0.15 0.15 0.3 0.15 0.3 0.15 0.15 0.3 0.07 0.07 0.15 0.15 0.15 0.15 0.15 0.15 0.45 0.07 0.07 0.07 0.38 0.07 0.07 0.15 0.15 0.3 0.15 0.15", "67 67 67 67 67 67 69 67 67 69 67 67 64 64 64 64 64 64 62 64 64 62 62 64 64 62 62 59 59 59"],
191
+ ]
192
+
193
+ with gr.Blocks() as demo:
194
+ gr.Markdown(
195
+ """
196
+ <h1 align="center"> Demo of Singing Voice Synthesis in Muskits-ESPnet </h1>
197
+
198
+ <div style="font-size: 20px;">
199
+ This is the demo page of our toolkit <a href="https://arxiv.org/abs/2409.07226"><b>Muskits-ESPnet: A Comprehensive Toolkit for Singing Voice Synthesis in New Paradigm</b></a>.
200
+
201
+ Singing Voice Synthesis (SVS) takes a music score as input and generates singing vocal with the voice of a specific singer.
202
+
203
+ Music score usually includes lyrics, as well as duration and pitch of each word in lyrics,
204
+
205
+ <h2>How to use:</h2>
206
+ <ol>
207
+ <li><b>Choose Model-Language</b>:
208
+ <ul>
209
+ <li>"zh" indicates lyrics input in Chinese, and "jp" indicates lyrics input in Japanese.</li>
210
+ <li>For example, "Model②(Mulitlingual)-zh" means model "Model②(Multilingual)" with lyrics input in Chinese.</li>
211
+ </ul>
212
+ </li>
213
+ <li><b>[Optional] Choose Singer</b>: Choose one singer you like from the drop-down list.</li>
214
+ <li><b>Input lyrics</b>:
215
+ <ul>
216
+ <li>Lyrics use Chinese characters when the language is 'zh' and hiragana when the language is 'jp'.</li>
217
+ <li>Special characters such as 'AP' (breath), 'SP' (silence), and '-' (slur, only for Chinese lyrics) can also be used.</li>
218
+ <li>Lyrics sequence should be separated by either a space (' ') or a newline ('\\n'), without the quotation marks.</li>
219
+ </ul>
220
+ </li>
221
+ <li><b>Input durations</b>:
222
+ <ul>
223
+ <li>Durations use float number as input.</li>
224
+ <li>Length of duration sequence should <b>be same as lyric sequence</b>, with each duration corresponding to the respective lyric.</li>
225
+ <li>Durations sequence should be separated by either a space (' ') or a newline ('\\n'), without the quotation marks.</li>
226
+ </ul>
227
+ </li>
228
+ <li><b>Input pitches</b>:
229
+ <ul>
230
+ <li>Pitches use MIDI note or MIDI note number as input. Specially, "69" in MIDI note number represents "A4" in MIDI note.</li>
231
+ <li>Length of pitch sequence should <b>be same as lyric sequence</b>, with each pitch corresponding to the respective lyric.</li>
232
+ <li>Pitches sequence should be separated by either a space (' ') or a newline ('\\n'), without the quotation marks.</li>
233
+ </ul>
234
+ </li>
235
+ <li><b>Hit "Generate" and listen to the result!</b></li>
236
+ </ol>
237
+ </div>
238
+
239
+ <h2>Notice:</h2>
240
+ <ul>
241
+ <li> Plenty of exmpales are provided. </li>
242
+ <li> Extreme values may result in suboptimal generation quality! </li>
243
+ </ul>
244
+ """
245
+ )
246
+ # Row-1
247
+ with gr.Row():
248
+ with gr.Column(variant="panel"):
249
+ model_name = gr.Radio(
250
+ label="Model-Language",
251
+ choices=[
252
+ "Model①(Chinese)-zh",
253
+ "Model②(Multilingual)-zh",
254
+ "Model②(Multilingual)-jp",
255
+ ],
256
+ )
257
+
258
+ with gr.Column(variant="panel"):
259
+ singer = gr.Dropdown(
260
+ label="Singer",
261
+ choices=total_singers,
262
+ )
263
+
264
+ # def set_model(model_name_str: str):
265
+ # """
266
+ # gets value from `model_name`. either
267
+ # uses cached list of speakers for the given model name
268
+ # or loads the addon and checks what are the speakers.
269
+ # """
270
+ # speakers = list(singer_embeddings[model_name_str].keys())
271
+ # value = speakers[0]
272
+ # return gr.update(
273
+ # choices=speakers, value=value, visible=True, interactive=True
274
+ # )
275
+
276
+ # model_name.change(set_model, inputs=model_name, outputs=singer)
277
+
278
+ # Row-2
279
+ with gr.Row():
280
+ with gr.Column(variant="panel"):
281
+ lyrics = gr.Textbox(label="Lyrics")
282
+ duration = gr.Textbox(label="Duration")
283
+ pitch = gr.Textbox(label="Pitch")
284
+ generate = gr.Button("Generate")
285
+ with gr.Column(variant="panel"):
286
+ gened_song = gr.Audio(label="Generated Song", type="numpy")
287
+ run_status = gr.Textbox(label="Running Status")
288
+ pred_mos = gr.Textbox(label=" Pseudo MOS")
289
+
290
+ gr.Examples(
291
+ examples=examples,
292
+ inputs=[model_name, singer, lyrics, duration, pitch],
293
+ outputs=[singer],
294
+ label="Examples",
295
+ examples_per_page=20,
296
+ )
297
+
298
+ gr.Markdown("""
299
+ <div style='margin:20px auto;'>
300
+
301
+ <p>References: <a href="https://arxiv.org/abs/2409.07226">Muskits-ESPnet paper</a> |
302
+ <a href="https://github.com/espnet/espnet">espnet</a> |
303
+ <a href="https://huggingface.co/espnet/aceopencpop_svs_visinger2_40singer_pretrain">Model①(Chinese)</a> |
304
+ <a href="https://huggingface.co/espnet/mixdata_svs_visinger2_spkembed_lang_pretrained">Model②(Multilingual)</a></p>
305
+
306
+ </div>
307
+ """
308
+ )
309
+
310
+ generate.click(
311
+ fn=gen_song,
312
+ inputs=[model_name, singer, lyrics, duration, pitch],
313
+ outputs=[gened_song, run_status, pred_mos],
314
+ )
315
+
316
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ git+https://github.com/South-Twilight/espnet
2
+ torch
3
+ numpy
4
+ librosa
5
+ espnet_model_zoo
6
+ importlib
7
+ pathlib
8
+ pypinyin
9
+ torchaudio
10
+ pyopenjtalk
resource/Muskits_ESPnet_logo.png ADDED
resource/__init__.py ADDED
File without changes
resource/all_plans.json ADDED
The diff for this file is too large to render. See raw diff
 
resource/midi-note.scp ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ G9 127
2
+ F#9 126
3
+ Gb9 126
4
+ F9 125
5
+ E9 124
6
+ D#9 123
7
+ Eb9 123
8
+ D9 122
9
+ C#9 121
10
+ Db9 121
11
+ C9 120
12
+ B8 119
13
+ A#8 118
14
+ Bb8 118
15
+ A8 117
16
+ G#8 116
17
+ Ab8 116
18
+ G8 115
19
+ F#8 114
20
+ Gb8 114
21
+ F8 113
22
+ E8 112
23
+ D#8 111
24
+ Eb8 111
25
+ D8 110
26
+ C#8 109
27
+ Db8 109
28
+ C8 108
29
+ B7 107
30
+ A#7 106
31
+ Bb7 106
32
+ A7 105
33
+ G#7 104
34
+ Ab7 104
35
+ G7 103
36
+ F#7 102
37
+ Gb7 102
38
+ F7 101
39
+ E7 100
40
+ D#7 99
41
+ Eb7 99
42
+ D7 98
43
+ C#7 97
44
+ Db7 97
45
+ C7 96
46
+ B6 95
47
+ A#6 94
48
+ Bb6 94
49
+ A6 93
50
+ G#6 92
51
+ Ab6 92
52
+ G6 91
53
+ F#6 90
54
+ Gb6 90
55
+ F6 89
56
+ E6 88
57
+ D#6 87
58
+ Eb6 87
59
+ D6 86
60
+ C#6 85
61
+ Db6 85
62
+ C6 84
63
+ B5 83
64
+ A#5 82
65
+ Bb5 82
66
+ A5 81
67
+ G#5 80
68
+ Ab5 80
69
+ G5 79
70
+ F#5 78
71
+ Gb5 78
72
+ F5 77
73
+ E5 76
74
+ D#5 75
75
+ Eb5 75
76
+ D5 74
77
+ C#5 73
78
+ Db5 73
79
+ C5 72
80
+ B4 71
81
+ A#4 70
82
+ Bb4 70
83
+ A4 69
84
+ G#4 68
85
+ Ab4 68
86
+ G4 67
87
+ F#4 66
88
+ Gb4 66
89
+ F4 65
90
+ E4 64
91
+ D#4 63
92
+ Eb4 63
93
+ D4 62
94
+ C#4 61
95
+ Db4 61
96
+ C4 60
97
+ B3 59
98
+ A#3 58
99
+ Bb3 58
100
+ A3 57
101
+ G#3 56
102
+ Ab3 56
103
+ G3 55
104
+ F#3 54
105
+ Gb3 54
106
+ F3 53
107
+ E3 52
108
+ D#3 51
109
+ Eb3 51
110
+ D3 50
111
+ C#3 49
112
+ Db3 49
113
+ C3 48
114
+ B2 47
115
+ A#2 46
116
+ Bb2 46
117
+ A2 45
118
+ G#2 44
119
+ Ab2 44
120
+ G2 43
121
+ F#2 42
122
+ Gb2 42
123
+ F2 41
124
+ E2 40
125
+ D#2 39
126
+ Eb2 39
127
+ D2 38
128
+ C#2 37
129
+ Db2 37
130
+ C2 36
131
+ B1 35
132
+ A#1 34
133
+ Bb1 34
134
+ A1 33
135
+ G#1 32
136
+ Ab1 32
137
+ G1 31
138
+ F#1 30
139
+ Gb1 30
140
+ F1 29
141
+ E1 28
142
+ D#1 27
143
+ Eb1 27
144
+ D1 26
145
+ C#1 25
146
+ Db1 25
147
+ C1 24
148
+ B0 23
149
+ A#0 22
150
+ Bb0 22
151
+ A0 21
152
+ rest 0
resource/pinyin_dict.py ADDED
@@ -0,0 +1,423 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Adapted from Opencpop's pinyin to phoneme mapping table:
2
+ # https://wenet.org.cn/opencpop/resources/annotationformat/
3
+ PINYIN_DICT = {
4
+ "a": ("a",),
5
+ "ai": ("ai",),
6
+ "an": ("an",),
7
+ "ang": ("ang",),
8
+ "ao": ("ao",),
9
+ "ba": ("b", "a"),
10
+ "bai": ("b", "ai"),
11
+ "ban": ("b", "an"),
12
+ "bang": ("b", "ang"),
13
+ "bao": ("b", "ao"),
14
+ "bei": ("b", "ei"),
15
+ "ben": ("b", "en"),
16
+ "beng": ("b", "eng"),
17
+ "bi": ("b", "i"),
18
+ "bian": ("b", "ian"),
19
+ "biao": ("b", "iao"),
20
+ "bie": ("b", "ie"),
21
+ "bin": ("b", "in"),
22
+ "bing": ("b", "ing"),
23
+ "bo": ("b", "o"),
24
+ "bu": ("b", "u"),
25
+ "ca": ("c", "a"),
26
+ "cai": ("c", "ai"),
27
+ "can": ("c", "an"),
28
+ "cang": ("c", "ang"),
29
+ "cao": ("c", "ao"),
30
+ "ce": ("c", "e"),
31
+ "cei": ("c", "ei"),
32
+ "cen": ("c", "en"),
33
+ "ceng": ("c", "eng"),
34
+ "cha": ("ch", "a"),
35
+ "chai": ("ch", "ai"),
36
+ "chan": ("ch", "an"),
37
+ "chang": ("ch", "ang"),
38
+ "chao": ("ch", "ao"),
39
+ "che": ("ch", "e"),
40
+ "chen": ("ch", "en"),
41
+ "cheng": ("ch", "eng"),
42
+ "chi": ("ch", "i"),
43
+ "chong": ("ch", "ong"),
44
+ "chou": ("ch", "ou"),
45
+ "chu": ("ch", "u"),
46
+ "chua": ("ch", "ua"),
47
+ "chuai": ("ch", "uai"),
48
+ "chuan": ("ch", "uan"),
49
+ "chuang": ("ch", "uang"),
50
+ "chui": ("ch", "ui"),
51
+ "chun": ("ch", "un"),
52
+ "chuo": ("ch", "uo"),
53
+ "ci": ("c", "i"),
54
+ "cong": ("c", "ong"),
55
+ "cou": ("c", "ou"),
56
+ "cu": ("c", "u"),
57
+ "cuan": ("c", "uan"),
58
+ "cui": ("c", "ui"),
59
+ "cun": ("c", "un"),
60
+ "cuo": ("c", "uo"),
61
+ "da": ("d", "a"),
62
+ "dai": ("d", "ai"),
63
+ "dan": ("d", "an"),
64
+ "dang": ("d", "ang"),
65
+ "dao": ("d", "ao"),
66
+ "de": ("d", "e"),
67
+ "dei": ("d", "ei"),
68
+ "den": ("d", "en"),
69
+ "deng": ("d", "eng"),
70
+ "di": ("d", "i"),
71
+ "dia": ("d", "ia"),
72
+ "dian": ("d", "ian"),
73
+ "diao": ("d", "iao"),
74
+ "die": ("d", "ie"),
75
+ "ding": ("d", "ing"),
76
+ "diu": ("d", "iu"),
77
+ "dong": ("d", "ong"),
78
+ "dou": ("d", "ou"),
79
+ "du": ("d", "u"),
80
+ "duan": ("d", "uan"),
81
+ "dui": ("d", "ui"),
82
+ "dun": ("d", "un"),
83
+ "duo": ("d", "uo"),
84
+ "e": ("e",),
85
+ "ei": ("ei",),
86
+ "en": ("en",),
87
+ "eng": ("eng",),
88
+ "er": ("er",),
89
+ "fa": ("f", "a"),
90
+ "fan": ("f", "an"),
91
+ "fang": ("f", "ang"),
92
+ "fei": ("f", "ei"),
93
+ "fen": ("f", "en"),
94
+ "feng": ("f", "eng"),
95
+ "fo": ("f", "o"),
96
+ "fou": ("f", "ou"),
97
+ "fu": ("f", "u"),
98
+ "ga": ("g", "a"),
99
+ "gai": ("g", "ai"),
100
+ "gan": ("g", "an"),
101
+ "gang": ("g", "ang"),
102
+ "gao": ("g", "ao"),
103
+ "ge": ("g", "e"),
104
+ "gei": ("g", "ei"),
105
+ "gen": ("g", "en"),
106
+ "geng": ("g", "eng"),
107
+ "gong": ("g", "ong"),
108
+ "gou": ("g", "ou"),
109
+ "gu": ("g", "u"),
110
+ "gua": ("g", "ua"),
111
+ "guai": ("g", "uai"),
112
+ "guan": ("g", "uan"),
113
+ "guang": ("g", "uang"),
114
+ "gui": ("g", "ui"),
115
+ "gun": ("g", "un"),
116
+ "guo": ("g", "uo"),
117
+ "ha": ("h", "a"),
118
+ "hai": ("h", "ai"),
119
+ "han": ("h", "an"),
120
+ "hang": ("h", "ang"),
121
+ "hao": ("h", "ao"),
122
+ "he": ("h", "e"),
123
+ "hei": ("h", "ei"),
124
+ "hen": ("h", "en"),
125
+ "heng": ("h", "eng"),
126
+ "hm": ("h", "m"),
127
+ "hng": ("h", "ng"),
128
+ "hong": ("h", "ong"),
129
+ "hou": ("h", "ou"),
130
+ "hu": ("h", "u"),
131
+ "hua": ("h", "ua"),
132
+ "huai": ("h", "uai"),
133
+ "huan": ("h", "uan"),
134
+ "huang": ("h", "uang"),
135
+ "hui": ("h", "ui"),
136
+ "hun": ("h", "un"),
137
+ "huo": ("h", "uo"),
138
+ "ji": ("j", "i"),
139
+ "jia": ("j", "ia"),
140
+ "jian": ("j", "ian"),
141
+ "jiang": ("j", "iang"),
142
+ "jiao": ("j", "iao"),
143
+ "jie": ("j", "ie"),
144
+ "jin": ("j", "in"),
145
+ "jing": ("j", "ing"),
146
+ "jiong": ("j", "iong"),
147
+ "jiu": ("j", "iu"),
148
+ "ju": ("j", "v"),
149
+ "juan": ("j", "van"),
150
+ "jue": ("j", "ve"),
151
+ "jun": ("j", "vn"),
152
+ "ka": ("k", "a"),
153
+ "kai": ("k", "ai"),
154
+ "kan": ("k", "an"),
155
+ "kang": ("k", "ang"),
156
+ "kao": ("k", "ao"),
157
+ "ke": ("k", "e"),
158
+ "kei": ("k", "ei"),
159
+ "ken": ("k", "en"),
160
+ "keng": ("k", "eng"),
161
+ "kong": ("k", "ong"),
162
+ "kou": ("k", "ou"),
163
+ "ku": ("k", "u"),
164
+ "kua": ("k", "ua"),
165
+ "kuai": ("k", "uai"),
166
+ "kuan": ("k", "uan"),
167
+ "kuang": ("k", "uang"),
168
+ "kui": ("k", "ui"),
169
+ "kun": ("k", "un"),
170
+ "kuo": ("k", "uo"),
171
+ "la": ("l", "a"),
172
+ "lai": ("l", "ai"),
173
+ "lan": ("l", "an"),
174
+ "lang": ("l", "ang"),
175
+ "lao": ("l", "ao"),
176
+ "le": ("l", "e"),
177
+ "lei": ("l", "ei"),
178
+ "leng": ("l", "eng"),
179
+ "li": ("l", "i"),
180
+ "lia": ("l", "ia"),
181
+ "lian": ("l", "ian"),
182
+ "liang": ("l", "iang"),
183
+ "liao": ("l", "iao"),
184
+ "lie": ("l", "ie"),
185
+ "lin": ("l", "in"),
186
+ "ling": ("l", "ing"),
187
+ "liu": ("l", "iu"),
188
+ "lo": ("l", "o"),
189
+ "long": ("l", "ong"),
190
+ "lou": ("l", "ou"),
191
+ "lu": ("l", "u"),
192
+ "luan": ("l", "uan"),
193
+ "lun": ("l", "un"),
194
+ "luo": ("l", "uo"),
195
+ "lv": ("l", "v"),
196
+ "lve": ("l", "ve"),
197
+ "m": ("m",),
198
+ "ma": ("m", "a"),
199
+ "mai": ("m", "ai"),
200
+ "man": ("m", "an"),
201
+ "mang": ("m", "ang"),
202
+ "mao": ("m", "ao"),
203
+ "me": ("m", "e"),
204
+ "mei": ("m", "ei"),
205
+ "men": ("m", "en"),
206
+ "meng": ("m", "eng"),
207
+ "mi": ("m", "i"),
208
+ "mian": ("m", "ian"),
209
+ "miao": ("m", "iao"),
210
+ "mie": ("m", "ie"),
211
+ "min": ("m", "in"),
212
+ "ming": ("m", "ing"),
213
+ "miu": ("m", "iu"),
214
+ "mo": ("m", "o"),
215
+ "mou": ("m", "ou"),
216
+ "mu": ("m", "u"),
217
+ "n": ("n",),
218
+ "na": ("n", "a"),
219
+ "nai": ("n", "ai"),
220
+ "nan": ("n", "an"),
221
+ "nang": ("n", "ang"),
222
+ "nao": ("n", "ao"),
223
+ "ne": ("n", "e"),
224
+ "nei": ("n", "ei"),
225
+ "nen": ("n", "en"),
226
+ "neng": ("n", "eng"),
227
+ "ng": ("n", "g"),
228
+ "ni": ("n", "i"),
229
+ "nian": ("n", "ian"),
230
+ "niang": ("n", "iang"),
231
+ "niao": ("n", "iao"),
232
+ "nie": ("n", "ie"),
233
+ "nin": ("n", "in"),
234
+ "ning": ("n", "ing"),
235
+ "niu": ("n", "iu"),
236
+ "nong": ("n", "ong"),
237
+ "nou": ("n", "ou"),
238
+ "nu": ("n", "u"),
239
+ "nuan": ("n", "uan"),
240
+ "nun": ("n", "un"),
241
+ "nuo": ("n", "uo"),
242
+ "nv": ("n", "v"),
243
+ "nve": ("n", "ve"),
244
+ "o": ("o",),
245
+ "ou": ("ou",),
246
+ "pa": ("p", "a"),
247
+ "pai": ("p", "ai"),
248
+ "pan": ("p", "an"),
249
+ "pang": ("p", "ang"),
250
+ "pao": ("p", "ao"),
251
+ "pei": ("p", "ei"),
252
+ "pen": ("p", "en"),
253
+ "peng": ("p", "eng"),
254
+ "pi": ("p", "i"),
255
+ "pian": ("p", "ian"),
256
+ "piao": ("p", "iao"),
257
+ "pie": ("p", "ie"),
258
+ "pin": ("p", "in"),
259
+ "ping": ("p", "ing"),
260
+ "po": ("p", "o"),
261
+ "pou": ("p", "ou"),
262
+ "pu": ("p", "u"),
263
+ "qi": ("q", "i"),
264
+ "qia": ("q", "ia"),
265
+ "qian": ("q", "ian"),
266
+ "qiang": ("q", "iang"),
267
+ "qiao": ("q", "iao"),
268
+ "qie": ("q", "ie"),
269
+ "qin": ("q", "in"),
270
+ "qing": ("q", "ing"),
271
+ "qiong": ("q", "iong"),
272
+ "qiu": ("q", "iu"),
273
+ "qu": ("q", "v"),
274
+ "quan": ("q", "van"),
275
+ "que": ("q", "ve"),
276
+ "qun": ("q", "vn"),
277
+ "ran": ("r", "an"),
278
+ "rang": ("r", "ang"),
279
+ "rao": ("r", "ao"),
280
+ "re": ("r", "e"),
281
+ "ren": ("r", "en"),
282
+ "reng": ("r", "eng"),
283
+ "ri": ("r", "i"),
284
+ "rong": ("r", "ong"),
285
+ "rou": ("r", "ou"),
286
+ "ru": ("r", "u"),
287
+ "rua": ("r", "ua"),
288
+ "ruan": ("r", "uan"),
289
+ "rui": ("r", "ui"),
290
+ "run": ("r", "un"),
291
+ "ruo": ("r", "uo"),
292
+ "sa": ("s", "a"),
293
+ "sai": ("s", "ai"),
294
+ "san": ("s", "an"),
295
+ "sang": ("s", "ang"),
296
+ "sao": ("s", "ao"),
297
+ "se": ("s", "e"),
298
+ "sen": ("s", "en"),
299
+ "seng": ("s", "eng"),
300
+ "sha": ("sh", "a"),
301
+ "shai": ("sh", "ai"),
302
+ "shan": ("sh", "an"),
303
+ "shang": ("sh", "ang"),
304
+ "shao": ("sh", "ao"),
305
+ "she": ("sh", "e"),
306
+ "shei": ("sh", "ei"),
307
+ "shen": ("sh", "en"),
308
+ "sheng": ("sh", "eng"),
309
+ "shi": ("sh", "i"),
310
+ "shou": ("sh", "ou"),
311
+ "shu": ("sh", "u"),
312
+ "shua": ("sh", "ua"),
313
+ "shuai": ("sh", "uai"),
314
+ "shuan": ("sh", "uan"),
315
+ "shuang": ("sh", "uang"),
316
+ "shui": ("sh", "ui"),
317
+ "shun": ("sh", "un"),
318
+ "shuo": ("sh", "uo"),
319
+ "si": ("s", "i"),
320
+ "song": ("s", "ong"),
321
+ "sou": ("s", "ou"),
322
+ "su": ("s", "u"),
323
+ "suan": ("s", "uan"),
324
+ "sui": ("s", "ui"),
325
+ "sun": ("s", "un"),
326
+ "suo": ("s", "uo"),
327
+ "ta": ("t", "a"),
328
+ "tai": ("t", "ai"),
329
+ "tan": ("t", "an"),
330
+ "tang": ("t", "ang"),
331
+ "tao": ("t", "ao"),
332
+ "te": ("t", "e"),
333
+ "tei": ("t", "ei"),
334
+ "teng": ("t", "eng"),
335
+ "ti": ("t", "i"),
336
+ "tian": ("t", "ian"),
337
+ "tiao": ("t", "iao"),
338
+ "tie": ("t", "ie"),
339
+ "ting": ("t", "ing"),
340
+ "tong": ("t", "ong"),
341
+ "tou": ("t", "ou"),
342
+ "tu": ("t", "u"),
343
+ "tuan": ("t", "uan"),
344
+ "tui": ("t", "ui"),
345
+ "tun": ("t", "un"),
346
+ "tuo": ("t", "uo"),
347
+ "wa": ("w", "a"),
348
+ "wai": ("w", "ai"),
349
+ "wan": ("w", "an"),
350
+ "wang": ("w", "ang"),
351
+ "wei": ("w", "ei"),
352
+ "wen": ("w", "en"),
353
+ "weng": ("w", "eng"),
354
+ "wo": ("w", "o"),
355
+ "wu": ("w", "u"),
356
+ "xi": ("x", "i"),
357
+ "xia": ("x", "ia"),
358
+ "xian": ("x", "ian"),
359
+ "xiang": ("x", "iang"),
360
+ "xiao": ("x", "iao"),
361
+ "xie": ("x", "ie"),
362
+ "xin": ("x", "in"),
363
+ "xing": ("x", "ing"),
364
+ "xiong": ("x", "iong"),
365
+ "xiu": ("x", "iu"),
366
+ "xu": ("x", "v"),
367
+ "xuan": ("x", "van"),
368
+ "xue": ("x", "ve"),
369
+ "xun": ("x", "vn"),
370
+ "ya": ("y", "a"),
371
+ "yan": ("y", "an"),
372
+ "yang": ("y", "ang"),
373
+ "yao": ("y", "ao"),
374
+ "ye": ("y", "e"),
375
+ "yi": ("y", "i"),
376
+ "yin": ("y", "in"),
377
+ "ying": ("y", "ing"),
378
+ "yo": ("y", "o"),
379
+ "yong": ("y", "ong"),
380
+ "you": ("y", "ou"),
381
+ "yu": ("y", "v"),
382
+ "yuan": ("y", "van"),
383
+ "yue": ("y", "ve"),
384
+ "yun": ("y", "vn"),
385
+ "za": ("z", "a"),
386
+ "zai": ("z", "ai"),
387
+ "zan": ("z", "an"),
388
+ "zang": ("z", "ang"),
389
+ "zao": ("z", "ao"),
390
+ "ze": ("z", "e"),
391
+ "zei": ("z", "ei"),
392
+ "zen": ("z", "en"),
393
+ "zeng": ("z", "eng"),
394
+ "zha": ("zh", "a"),
395
+ "zhai": ("zh", "ai"),
396
+ "zhan": ("zh", "an"),
397
+ "zhang": ("zh", "ang"),
398
+ "zhao": ("zh", "ao"),
399
+ "zhe": ("zh", "e"),
400
+ "zhei": ("zh", "ei"),
401
+ "zhen": ("zh", "en"),
402
+ "zheng": ("zh", "eng"),
403
+ "zhi": ("zh", "i"),
404
+ "zhong": ("zh", "ong"),
405
+ "zhou": ("zh", "ou"),
406
+ "zhu": ("zh", "u"),
407
+ "zhua": ("zh", "ua"),
408
+ "zhuai": ("zh", "uai"),
409
+ "zhuan": ("zh", "uan"),
410
+ "zhuang": ("zh", "uang"),
411
+ "zhui": ("zh", "ui"),
412
+ "zhun": ("zh", "un"),
413
+ "zhuo": ("zh", "uo"),
414
+ "zi": ("z", "i"),
415
+ "zong": ("z", "ong"),
416
+ "zou": ("z", "ou"),
417
+ "zu": ("z", "u"),
418
+ "zuan": ("z", "uan"),
419
+ "zui": ("z", "ui"),
420
+ "zun": ("z", "un"),
421
+ "zuo": ("z", "uo"),
422
+ }
423
+
resource/singer/singer_embedding_ace-1.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac07d73eb172609c87329b35af91f0d27c1c20c30dcd90f23f1d4eb8c6450ca6
3
+ size 896
resource/singer/singer_embedding_ace-10.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6b4da3cb369b0d4f2bcdb76986e6fbfb3ebba727a8e5a899227d6b70f73769d
3
+ size 896
resource/singer/singer_embedding_ace-11.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf65222a416d9786feea3ac91672683727973aa7f7a89b533117a2bdf5dd706b
3
+ size 896
resource/singer/singer_embedding_ace-12.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f07e7da7629c222318beaafee2b43f913be6f5137842f7d771225bb76268c7f
3
+ size 896
resource/singer/singer_embedding_ace-13.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edfc4aba22a1adb4680a70ab872d2522a12587180d91176d64b03bdb4f181dfd
3
+ size 896
resource/singer/singer_embedding_ace-14.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f43b58502afc550af2eff7c358e370b72433163b319b97b4c3b3d3eff9557ce
3
+ size 896
resource/singer/singer_embedding_ace-15.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee8771b428b1f2bd5f680d54287eff436b3a9b84c6dd15797502942fe22ad18b
3
+ size 896
resource/singer/singer_embedding_ace-16.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edea4a167679d619d66fc972a273b310290f9e5eac75a7857e81a7f25286b67b
3
+ size 896
resource/singer/singer_embedding_ace-17.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bffeb7f3364168310939320f885d3ccf1502915365873050602eac7b557b12f
3
+ size 896
resource/singer/singer_embedding_ace-18.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e3ffe2c7f0e583f97d58c13bc7318e64f1a33c0549e171f01d90c1fc0659a10
3
+ size 896
resource/singer/singer_embedding_ace-19.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32d8abcf1ce64bdbe38c959a43103da2f08b7e0399de1de44ab0ba64caf63deb
3
+ size 896
resource/singer/singer_embedding_ace-2.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11410fa00026e13862d6636e77e3f8932b6f1986ceec3039dc5ebfcfb865cd8e
3
+ size 896
resource/singer/singer_embedding_ace-20.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:911d7f112b52b8644742d18c97346a5d10731dc2f001c3523297e01641e22b21
3
+ size 896
resource/singer/singer_embedding_ace-21.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c62c1b06d0448bdc3709d994bb54593a41d27356f1cabd6ec5818cf523ee66f
3
+ size 896
resource/singer/singer_embedding_ace-22.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db4b2821e3a955da34a3369f36f7eac366fd7c7e742f43676d753ca5f60ba7fa
3
+ size 896
resource/singer/singer_embedding_ace-23.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9e98c6885c860bd9393c6829a430c9956dd6d0a7e286dc0e19e58a8cdb26ddc
3
+ size 896
resource/singer/singer_embedding_ace-24.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5753ebb8bed09c7d03565166282c38def512c9a686f8cc6ce672b0ad71d7fd7c
3
+ size 896
resource/singer/singer_embedding_ace-25.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfdeb67333e1c316bc50ef9ce05cd4e894f9ad3af307a5c41e5cf2314a021fc3
3
+ size 896
resource/singer/singer_embedding_ace-26.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea4a02e11fa0287f247f057396e70bde203883c00f0fbb266bc28ecff44a1533
3
+ size 896
resource/singer/singer_embedding_ace-27.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eecbf3c017c1b6cffbc5ae393a6f78544154ab1e88f714904f56889a1065a74f
3
+ size 896
resource/singer/singer_embedding_ace-28.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3f23bf949e26a7486aa0d8905b79bd80af6ab77afa1f7f5e6c8d6ac074558ff
3
+ size 896
resource/singer/singer_embedding_ace-29.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b6807858aff6652a354e2945ca36de2cce5f766d09e0a14efcca6cc5576a2bd
3
+ size 896
resource/singer/singer_embedding_ace-3.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab0dd74df2d47ee5de50eab7adc0a771b52e1cb10ebabdf47620001027a6f113
3
+ size 896
resource/singer/singer_embedding_ace-30.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d0f99a41d7b459e5e96bea65ac793d68d62e92c75c42e0bfb0e04d8c0313ad3
3
+ size 896
resource/singer/singer_embedding_ace-4.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c0361d54e0df795e1d60b8d10d32e459aadcd0a867ba1888c36a9c6f09b2eb2
3
+ size 896
resource/singer/singer_embedding_ace-5.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50a9b21470c213422954ce1ce3f1cc8d5c295862896acc9fb772fa5c54f8bb57
3
+ size 896
resource/singer/singer_embedding_ace-6.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3a7de10b1f824cc6ed4f4c203a434fa0990bc6cf58e9e1c73c3103b92041a0c
3
+ size 896
resource/singer/singer_embedding_ace-7.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce7059ef8ae835777f53e3c89af722773f0e5792eeced0a4c131c1aeeeb342f1
3
+ size 896
resource/singer/singer_embedding_ace-8.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b27ddeda52d4e2298d95c77ae85d22d35ac83393f3b00417d21041be38ebacf3
3
+ size 896
resource/singer/singer_embedding_ace-9.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17547452ee7b536dbf3ccda567cd0230bf53f103c27d66687a1ca8959782b45c
3
+ size 896
resource/singer/singer_embedding_ameboshi.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58f65d89d76060bb5406cdde4001bde87c29b0ef3fefd3a2072ab937a379cc9c
3
+ size 896
resource/singer/singer_embedding_itako.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c94e137f090bb8376114a3aadf42423db4e59d0236a46049f8cee9c387fd5fd6
3
+ size 896
resource/singer/singer_embedding_kiritan.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68583154587392d29729c270bc82778c203bbfe2f2101f5ec5bc68c8aa40bd22
3
+ size 896
resource/singer/singer_embedding_kising_barber.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5643c312089790dc95d97e4a8b24f152d7e4caa703170143f1c1f9ecb0179564
3
+ size 896
resource/singer/singer_embedding_kising_blanca.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fec31eacdb733f13185daf155f75d73f0fdcf1eda1cce92d02b93d02878da3e0
3
+ size 896
resource/singer/singer_embedding_kising_changge.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0ad51d26b771f2460b146a8984bab73f929e943ce15932c287f6252a241026a
3
+ size 896
resource/singer/singer_embedding_kising_chuci.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:764e6424b167dd4ff798f7c608985de31e6dadea68ee578b5dd66eba938cc835
3
+ size 896
resource/singer/singer_embedding_kising_chuming.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4d1847080412e765208bdc09db64cf1e117e998e410e3afe6c061171afcab92
3
+ size 896
resource/singer/singer_embedding_kising_crimson.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c6a67b36899aa0f4c62144fa63fc719e2448618fc730a0073ad66b16ee59593
3
+ size 896
resource/singer/singer_embedding_kising_david.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eef8cd6416850059b5e8fc7c11a8fc2fb2165ea59177623133e33a30497a41b0
3
+ size 896
resource/singer/singer_embedding_kising_dvaid.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ae99f524321f6836c71e83115308ba1117f7235b352f8c2135489bcd54dde5f
3
+ size 896
resource/singer/singer_embedding_kising_ghost.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed8b0f26a4bf60b5dea57c2bad28c1757052d04fcb131b64515da95cd2addcfa
3
+ size 896