Upload 25 files
Browse files- .gitattributes +3 -0
- config.json +207 -0
- kokoro-v1_1-zh.pth +3 -0
- samples/HEARME_en.wav +3 -0
- samples/HEARME_zf_001.wav +3 -0
- samples/HEARME_zm_010.wav +3 -0
- samples/make_en.py +71 -0
- samples/make_zh.py +86 -0
- voices/af_maple.pt +3 -0
- voices/af_sol.pt +3 -0
- voices/bf_vale.pt +3 -0
- voices/zf_001.pt +3 -0
- voices/zf_002.pt +3 -0
- voices/zf_003.pt +3 -0
- voices/zf_004.pt +3 -0
- voices/zf_005.pt +3 -0
- voices/zf_006.pt +3 -0
- voices/zf_007.pt +3 -0
- voices/zf_008.pt +3 -0
- voices/zm_009.pt +3 -0
- voices/zm_010.pt +3 -0
- voices/zm_011.pt +3 -0
- voices/zm_012.pt +3 -0
- voices/zm_013.pt +3 -0
- voices/zm_014.pt +3 -0
- voices/zm_015.pt +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
samples/HEARME_en.wav filter=lfs diff=lfs merge=lfs -text
|
37 |
+
samples/HEARME_zf_001.wav filter=lfs diff=lfs merge=lfs -text
|
38 |
+
samples/HEARME_zm_010.wav filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"istftnet": {
|
3 |
+
"upsample_kernel_sizes": [20, 12],
|
4 |
+
"upsample_rates": [10, 6],
|
5 |
+
"gen_istft_hop_size": 5,
|
6 |
+
"gen_istft_n_fft": 20,
|
7 |
+
"resblock_dilation_sizes": [
|
8 |
+
[1, 3, 5],
|
9 |
+
[1, 3, 5],
|
10 |
+
[1, 3, 5]
|
11 |
+
],
|
12 |
+
"resblock_kernel_sizes": [3, 7, 11],
|
13 |
+
"upsample_initial_channel": 512
|
14 |
+
},
|
15 |
+
"dim_in": 64,
|
16 |
+
"dropout": 0.2,
|
17 |
+
"hidden_dim": 512,
|
18 |
+
"max_conv_dim": 512,
|
19 |
+
"max_dur": 50,
|
20 |
+
"multispeaker": true,
|
21 |
+
"n_layer": 3,
|
22 |
+
"n_mels": 80,
|
23 |
+
"n_token": 178,
|
24 |
+
"style_dim": 128,
|
25 |
+
"text_encoder_kernel_size": 5,
|
26 |
+
"plbert": {
|
27 |
+
"hidden_size": 768,
|
28 |
+
"num_attention_heads": 12,
|
29 |
+
"intermediate_size": 2048,
|
30 |
+
"max_position_embeddings": 512,
|
31 |
+
"num_hidden_layers": 12,
|
32 |
+
"dropout": 0.1
|
33 |
+
},
|
34 |
+
"vocab": {
|
35 |
+
";": 1,
|
36 |
+
":": 2,
|
37 |
+
",": 3,
|
38 |
+
".": 4,
|
39 |
+
"!": 5,
|
40 |
+
"?": 6,
|
41 |
+
"/": 7,
|
42 |
+
"—": 9,
|
43 |
+
"…": 10,
|
44 |
+
"\"": 11,
|
45 |
+
"(": 12,
|
46 |
+
")": 13,
|
47 |
+
"“": 14,
|
48 |
+
"”": 15,
|
49 |
+
" ": 16,
|
50 |
+
"\u0303": 17,
|
51 |
+
"ʣ": 18,
|
52 |
+
"ʥ": 19,
|
53 |
+
"ʦ": 20,
|
54 |
+
"ʨ": 21,
|
55 |
+
"ᵝ": 22,
|
56 |
+
"ㄓ": 23,
|
57 |
+
"A": 24,
|
58 |
+
"I": 25,
|
59 |
+
"ㄅ": 30,
|
60 |
+
"O": 31,
|
61 |
+
"ㄆ": 32,
|
62 |
+
"Q": 33,
|
63 |
+
"R": 34,
|
64 |
+
"S": 35,
|
65 |
+
"T": 36,
|
66 |
+
"ㄇ": 37,
|
67 |
+
"ㄈ": 38,
|
68 |
+
"W": 39,
|
69 |
+
"ㄉ": 40,
|
70 |
+
"Y": 41,
|
71 |
+
"ᵊ": 42,
|
72 |
+
"a": 43,
|
73 |
+
"b": 44,
|
74 |
+
"c": 45,
|
75 |
+
"d": 46,
|
76 |
+
"e": 47,
|
77 |
+
"f": 48,
|
78 |
+
"ㄊ": 49,
|
79 |
+
"h": 50,
|
80 |
+
"i": 51,
|
81 |
+
"j": 52,
|
82 |
+
"k": 53,
|
83 |
+
"l": 54,
|
84 |
+
"m": 55,
|
85 |
+
"n": 56,
|
86 |
+
"o": 57,
|
87 |
+
"p": 58,
|
88 |
+
"q": 59,
|
89 |
+
"r": 60,
|
90 |
+
"s": 61,
|
91 |
+
"t": 62,
|
92 |
+
"u": 63,
|
93 |
+
"v": 64,
|
94 |
+
"w": 65,
|
95 |
+
"x": 66,
|
96 |
+
"y": 67,
|
97 |
+
"z": 68,
|
98 |
+
"ɑ": 69,
|
99 |
+
"ɐ": 70,
|
100 |
+
"ɒ": 71,
|
101 |
+
"æ": 72,
|
102 |
+
"ㄋ": 73,
|
103 |
+
"ㄌ": 74,
|
104 |
+
"β": 75,
|
105 |
+
"ɔ": 76,
|
106 |
+
"ɕ": 77,
|
107 |
+
"ç": 78,
|
108 |
+
"ㄍ": 79,
|
109 |
+
"ɖ": 80,
|
110 |
+
"ð": 81,
|
111 |
+
"ʤ": 82,
|
112 |
+
"ə": 83,
|
113 |
+
"ㄎ": 84,
|
114 |
+
"ㄦ": 85,
|
115 |
+
"ɛ": 86,
|
116 |
+
"ɜ": 87,
|
117 |
+
"ㄏ": 88,
|
118 |
+
"ㄐ": 89,
|
119 |
+
"ɟ": 90,
|
120 |
+
"ㄑ": 91,
|
121 |
+
"ɡ": 92,
|
122 |
+
"ㄒ": 93,
|
123 |
+
"ㄔ": 94,
|
124 |
+
"ㄕ": 95,
|
125 |
+
"ㄗ": 96,
|
126 |
+
"ㄘ": 97,
|
127 |
+
"ㄙ": 98,
|
128 |
+
"月": 99,
|
129 |
+
"ㄚ": 100,
|
130 |
+
"ɨ": 101,
|
131 |
+
"ɪ": 102,
|
132 |
+
"ʝ": 103,
|
133 |
+
"ㄛ": 104,
|
134 |
+
"ㄝ": 105,
|
135 |
+
"ㄞ": 106,
|
136 |
+
"ㄟ": 107,
|
137 |
+
"ㄠ": 108,
|
138 |
+
"ㄡ": 109,
|
139 |
+
"ɯ": 110,
|
140 |
+
"ɰ": 111,
|
141 |
+
"ŋ": 112,
|
142 |
+
"ɳ": 113,
|
143 |
+
"ɲ": 114,
|
144 |
+
"ɴ": 115,
|
145 |
+
"ø": 116,
|
146 |
+
"ㄢ": 117,
|
147 |
+
"ɸ": 118,
|
148 |
+
"θ": 119,
|
149 |
+
"œ": 120,
|
150 |
+
"ㄣ": 121,
|
151 |
+
"ㄤ": 122,
|
152 |
+
"ɹ": 123,
|
153 |
+
"ㄥ": 124,
|
154 |
+
"ɾ": 125,
|
155 |
+
"ㄖ": 126,
|
156 |
+
"ㄧ": 127,
|
157 |
+
"ʁ": 128,
|
158 |
+
"ɽ": 129,
|
159 |
+
"ʂ": 130,
|
160 |
+
"ʃ": 131,
|
161 |
+
"ʈ": 132,
|
162 |
+
"ʧ": 133,
|
163 |
+
"ㄨ": 134,
|
164 |
+
"ʊ": 135,
|
165 |
+
"ʋ": 136,
|
166 |
+
"ㄩ": 137,
|
167 |
+
"ʌ": 138,
|
168 |
+
"ɣ": 139,
|
169 |
+
"ㄜ": 140,
|
170 |
+
"ㄭ": 141,
|
171 |
+
"χ": 142,
|
172 |
+
"ʎ": 143,
|
173 |
+
"十": 144,
|
174 |
+
"压": 145,
|
175 |
+
"言": 146,
|
176 |
+
"ʒ": 147,
|
177 |
+
"ʔ": 148,
|
178 |
+
"阳": 149,
|
179 |
+
"要": 150,
|
180 |
+
"阴": 151,
|
181 |
+
"应": 152,
|
182 |
+
"用": 153,
|
183 |
+
"又": 154,
|
184 |
+
"中": 155,
|
185 |
+
"ˈ": 156,
|
186 |
+
"ˌ": 157,
|
187 |
+
"ː": 158,
|
188 |
+
"穵": 159,
|
189 |
+
"外": 160,
|
190 |
+
"万": 161,
|
191 |
+
"ʰ": 162,
|
192 |
+
"王": 163,
|
193 |
+
"ʲ": 164,
|
194 |
+
"为": 165,
|
195 |
+
"文": 166,
|
196 |
+
"瓮": 167,
|
197 |
+
"我": 168,
|
198 |
+
"3": 169,
|
199 |
+
"5": 170,
|
200 |
+
"1": 171,
|
201 |
+
"2": 172,
|
202 |
+
"4": 173,
|
203 |
+
"元": 175,
|
204 |
+
"云": 176,
|
205 |
+
"ᵻ": 177
|
206 |
+
}
|
207 |
+
}
|
kokoro-v1_1-zh.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1d8410fa44dfb5c15471fd6c4225ea6b4e9ac7fa03c98e8bea47a9928476e2b
|
3 |
+
size 327247856
|
samples/HEARME_en.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b759a65788991932d031d6fc8440f7a8efc402273fc1c2ca9d52ffd8a16a6666
|
3 |
+
size 4528044
|
samples/HEARME_zf_001.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c8685f06fd809ca2e892f8b71f3549d0640ab992b37648781f9138be33ef035
|
3 |
+
size 4267644
|
samples/HEARME_zm_010.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:915d93163e2e5319370b539b72a90c69c214c143206024c086c57e5fbdd67484
|
3 |
+
size 4253244
|
samples/make_en.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file is hardcoded to transparently reproduce HEARME_en.wav
|
2 |
+
# Therefore it may NOT generalize gracefully to other texts
|
3 |
+
# Refer to Usage in README.md for more general usage patterns
|
4 |
+
|
5 |
+
# pip install kokoro>=0.8.1
|
6 |
+
from kokoro import KModel, KPipeline
|
7 |
+
from pathlib import Path
|
8 |
+
import numpy as np
|
9 |
+
import soundfile as sf
|
10 |
+
import torch
|
11 |
+
import tqdm
|
12 |
+
|
13 |
+
REPO_ID = 'hexgrad/Kokoro-82M-v1.1-zh'
|
14 |
+
SAMPLE_RATE = 24000
|
15 |
+
|
16 |
+
# How much silence to insert between paragraphs: 5000 is about 0.2 seconds
|
17 |
+
N_ZEROS = 5000
|
18 |
+
|
19 |
+
# Whether to join sentences in paragraphs 1 and 3
|
20 |
+
JOIN_SENTENCES = True
|
21 |
+
|
22 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
23 |
+
|
24 |
+
texts = [(
|
25 |
+
"[Kokoro](/kˈQkəɹQ/) is an open-weight series of small but powerful TTS models.",
|
26 |
+
), (
|
27 |
+
"This model is the result of a short training run that added 100 Chinese speakers from a professional dataset.",
|
28 |
+
"The Chinese data was freely and permissively granted to us by LongMaoData, a professional dataset company. Thank you for making this model possible.",
|
29 |
+
), (
|
30 |
+
"Separately, some crowdsourced synthetic English data also entered the training mix:",
|
31 |
+
"1 hour of Maple, an American female.",
|
32 |
+
"1 hour of [Sol](/sˈOl/), another American female.",
|
33 |
+
"And 1 hour of Vale, an older British female.",
|
34 |
+
), (
|
35 |
+
"This model is not a strict upgrade over its predecessor since it drops many voices, but it is released early to gather feedback on new voices and tokenization.",
|
36 |
+
"Aside from the Chinese dataset and the 3 hours of English, the rest of the data was left behind for this training run.",
|
37 |
+
"The goal is to push the model series forward and ultimately restore some of the voices that were left behind.",
|
38 |
+
), (
|
39 |
+
"Current guidance from the U.S. Copyright Office indicates that synthetic data generally does not qualify for copyright protection.",
|
40 |
+
"Since this synthetic data is crowdsourced, the model trainer is not bound by any Terms of Service.",
|
41 |
+
"This Apache licensed model also aligns with OpenAI's stated mission of broadly distributing the benefits of AI.",
|
42 |
+
"If you would like to help further that mission, consider contributing permissive audio data to the cause.",
|
43 |
+
)]
|
44 |
+
|
45 |
+
if JOIN_SENTENCES:
|
46 |
+
for i in (1, 3):
|
47 |
+
texts[i] = [' '.join(texts[i])]
|
48 |
+
|
49 |
+
model = KModel(repo_id=REPO_ID).to(device).eval()
|
50 |
+
en_pipelines = [KPipeline(lang_code='b' if british else 'a', repo_id=REPO_ID, model=model) for british in (False, True)]
|
51 |
+
|
52 |
+
path = Path(__file__).parent
|
53 |
+
|
54 |
+
wavs = []
|
55 |
+
for paragraph in tqdm.tqdm(texts):
|
56 |
+
for i, sentence in enumerate(paragraph):
|
57 |
+
voice, british = 'bf_vale', True
|
58 |
+
if 'Maple' in sentence:
|
59 |
+
voice, british = 'af_maple', False
|
60 |
+
elif 'Sol' in sentence:
|
61 |
+
voice, british = 'af_sol', False
|
62 |
+
generator = en_pipelines[british](sentence, voice=voice)
|
63 |
+
f = path / f'en{len(wavs):02}.wav'
|
64 |
+
result = next(generator)
|
65 |
+
wav = result.audio
|
66 |
+
sf.write(f, wav, SAMPLE_RATE)
|
67 |
+
if i == 0 and wavs and N_ZEROS > 0:
|
68 |
+
wav = np.concatenate([np.zeros(N_ZEROS), wav])
|
69 |
+
wavs.append(wav)
|
70 |
+
|
71 |
+
sf.write(path / 'HEARME_en.wav', np.concatenate(wavs), SAMPLE_RATE)
|
samples/make_zh.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file is hardcoded to transparently reproduce HEARME_zh.wav
|
2 |
+
# Therefore it may NOT generalize gracefully to other texts
|
3 |
+
# Refer to Usage in README.md for more general usage patterns
|
4 |
+
|
5 |
+
# pip install kokoro>=0.8.1 "misaki[zh]>=0.8.1"
|
6 |
+
from kokoro import KModel, KPipeline
|
7 |
+
from pathlib import Path
|
8 |
+
import numpy as np
|
9 |
+
import soundfile as sf
|
10 |
+
import torch
|
11 |
+
import tqdm
|
12 |
+
|
13 |
+
REPO_ID = 'hexgrad/Kokoro-82M-v1.1-zh'
|
14 |
+
SAMPLE_RATE = 24000
|
15 |
+
|
16 |
+
# How much silence to insert between paragraphs: 5000 is about 0.2 seconds
|
17 |
+
N_ZEROS = 5000
|
18 |
+
|
19 |
+
# Whether to join sentences in paragraphs 1 and 3
|
20 |
+
JOIN_SENTENCES = True
|
21 |
+
|
22 |
+
VOICE = 'zf_001' if True else 'zm_010'
|
23 |
+
|
24 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
25 |
+
|
26 |
+
texts = [(
|
27 |
+
"Kokoro 是一系列体积虽小但功能强大的 TTS 模型。",
|
28 |
+
), (
|
29 |
+
"该模型是经过短期训练的结果,从专业数据集中添加了100名中文使用者。",
|
30 |
+
"中文数据由专业数据集公司「龙猫数据」免费且无偿地提供给我们。感谢你们让这个模型成为可能。",
|
31 |
+
), (
|
32 |
+
"另外,一些众包合成英语数据也进入了训练组合:",
|
33 |
+
"1小时的 Maple,美国女性。",
|
34 |
+
"1小时的 Sol,另一位美国女性。",
|
35 |
+
"和1小时的 Vale,一位年长的英国女性。",
|
36 |
+
), (
|
37 |
+
"由于该模型删除了许多声音,因此它并不是对其前身的严格升级,但它提前发布以收集有关新声音和标记化的反馈。",
|
38 |
+
"除了中文数据集和3小时的英语之外,其余数据都留在本次训练中。",
|
39 |
+
"目标是推动模型系列的发展,并最终恢复一些被遗留的声音。",
|
40 |
+
), (
|
41 |
+
"美国版权局目前的指导表明,合成数据通常不符合版权保护的资格。",
|
42 |
+
"由于这些合成数据是众包的,因此模型训练师不受任何服务条款的约束。",
|
43 |
+
"该 Apache 许可模式也符合 OpenAI 所宣称的广泛传播 AI 优势的使命。",
|
44 |
+
"如果您愿意帮助进一步完成这一使命,请考虑为此贡献许可的音频数据。",
|
45 |
+
)]
|
46 |
+
|
47 |
+
if JOIN_SENTENCES:
|
48 |
+
for i in (1, 3):
|
49 |
+
texts[i] = [''.join(texts[i])]
|
50 |
+
|
51 |
+
en_pipeline = KPipeline(lang_code='a', repo_id=REPO_ID, model=False)
|
52 |
+
def en_callable(text):
|
53 |
+
if text == 'Kokoro':
|
54 |
+
return 'kˈOkəɹO'
|
55 |
+
elif text == 'Sol':
|
56 |
+
return 'sˈOl'
|
57 |
+
return next(en_pipeline(text)).phonemes
|
58 |
+
|
59 |
+
# HACK: Mitigate rushing caused by lack of training data beyond ~100 tokens
|
60 |
+
# Simple piecewise linear fn that decreases speed as len_ps increases
|
61 |
+
def speed_callable(len_ps):
|
62 |
+
speed = 0.8
|
63 |
+
if len_ps <= 83:
|
64 |
+
speed = 1
|
65 |
+
elif len_ps < 183:
|
66 |
+
speed = 1 - (len_ps - 83) / 500
|
67 |
+
return speed * 1.1
|
68 |
+
|
69 |
+
# model = KModel(repo_id=REPO_ID).to(device).eval()
|
70 |
+
zh_pipeline = KPipeline(lang_code='z', repo_id=REPO_ID, model=model, en_callable=en_callable)
|
71 |
+
|
72 |
+
path = Path(__file__).parent
|
73 |
+
|
74 |
+
wavs = []
|
75 |
+
for paragraph in tqdm.tqdm(texts):
|
76 |
+
for i, sentence in enumerate(paragraph):
|
77 |
+
generator = zh_pipeline(sentence, voice=voice, speed=speed_callable)
|
78 |
+
f = path / f'zh{len(wavs):02}.wav'
|
79 |
+
result = next(generator)
|
80 |
+
wav = result.audio
|
81 |
+
sf.write(f, wav, SAMPLE_RATE)
|
82 |
+
if i == 0 and wavs and N_ZEROS > 0:
|
83 |
+
wav = np.concatenate([np.zeros(N_ZEROS), wav])
|
84 |
+
wavs.append(wav)
|
85 |
+
|
86 |
+
sf.write(path / f'HEARME_{voice}.wav', np.concatenate(wavs), SAMPLE_RATE)
|
voices/af_maple.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1211a6b94795d843cb7957568ccf2208e6ce76d2fbb36c7279b24e1be9b862f
|
3 |
+
size 523425
|
voices/af_sol.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d24aad751d7f62618506264c1cf3436276901447d85f1209231e9be29da4261
|
3 |
+
size 523351
|
voices/bf_vale.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e66bc4578345d490985ce73c49464e6f6a9e7c58586b99a9ae14c988ae14e01f
|
3 |
+
size 523420
|
voices/zf_001.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bdc9a87e13e9bb1ea3e7803259c2ecbfebaeeb2ff80b5d0c76df1a464c1c962
|
3 |
+
size 523331
|
voices/zf_002.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c8cf221ff2e0915fc807cac5f233f42798ee8e2bd58bc5ad0259fd95e405a26
|
3 |
+
size 523331
|
voices/zf_003.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac28a59eefaa7e37b2aabffc792d40081392aa89d679b579859debf5209441a1
|
3 |
+
size 523331
|
voices/zf_004.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d50c3a87071a11d703d9d4ff7dd1f77fe6b8c5c3a9e60e81bc848816c0e959f
|
3 |
+
size 523331
|
voices/zf_005.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64d656103a908954496676529f4e8dee783afd4c8dccd1a9042cd8dbe05e39f4
|
3 |
+
size 523331
|
voices/zf_006.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef341ad2c4ec5dab3bf32daa0a70b8779c5aba10a9e18f57e5b6b29c7ec93d37
|
3 |
+
size 523331
|
voices/zf_007.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52a966710a29b50d9d11df15b5572c28062d2edf89585fe2c14abe281e2e49a8
|
3 |
+
size 523331
|
voices/zf_008.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:361c1da6b087284a66c803d413225a09d57334ab515a93d5e16a2d553d9941f6
|
3 |
+
size 523331
|
voices/zm_009.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eff75f26089f9f986b547985a420f901661057a951088ac7c7d8473a8d6327bd
|
3 |
+
size 523331
|
voices/zm_010.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2eeba86192eee269f600ca6821038034abd017532a1fe68ff7b0e86c2983b2a
|
3 |
+
size 523331
|
voices/zm_011.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:978f00b0485869b2461249235429352767c661b0eeef65c37ae393a5c1531f46
|
3 |
+
size 523331
|
voices/zm_012.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a94386850fc6e115e298c50583ea8b38eabe415a138fca756cf4f14ca63c1b4
|
3 |
+
size 523331
|
voices/zm_013.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef528a5444ab0001bb32c8149ddf44a53412af615f81e050f95d5b05fd10c34a
|
3 |
+
size 523331
|
voices/zm_014.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9eacf15e6b9b073e44f7e62f7ed8582ad774da6cbaaa5dd707839e8af3ba6855
|
3 |
+
size 523331
|
voices/zm_015.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad69ea6143d656dec64997412c7cc60d3fd9e6f7bc27b5bdafebc6bcf3c70a68
|
3 |
+
size 523331
|