Blane187 commited on
Commit
16aea9f
·
verified ·
1 Parent(s): 09eb127

Delete configs

Browse files
configs/32k.json DELETED
@@ -1,46 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 50,
4
- "seed": 1234,
5
- "epochs": 20000,
6
- "learning_rate": 1e-4,
7
- "betas": [0.8, 0.99],
8
- "eps": 1e-9,
9
- "batch_size": 4,
10
- "fp16_run": true,
11
- "lr_decay": 0.99975,
12
- "segment_size": 12800,
13
- "init_lr_ratio": 1,
14
- "warmup_epochs": 100,
15
- "c_mel": 45,
16
- "c_kl": 1.0
17
- },
18
- "data": {
19
- "max_wav_value": 32768.0,
20
- "sampling_rate": 32000,
21
- "filter_length": 1024,
22
- "hop_length": 320,
23
- "win_length": 1024,
24
- "n_mel_channels": 80,
25
- "mel_fmin": 0.0,
26
- "mel_fmax": null
27
- },
28
- "model": {
29
- "inter_channels": 192,
30
- "hidden_channels": 192,
31
- "filter_channels": 768,
32
- "n_heads": 2,
33
- "n_layers": 6,
34
- "kernel_size": 3,
35
- "p_dropout": 0,
36
- "resblock": "1",
37
- "resblock_kernel_sizes": [3,7,11],
38
- "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
39
- "upsample_rates": [10,8,2,2],
40
- "upsample_initial_channel": 512,
41
- "upsample_kernel_sizes": [20,16,4,4],
42
- "use_spectral_norm": false,
43
- "gin_channels": 256,
44
- "spk_embed_dim": 109
45
- }
46
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/40k.json DELETED
@@ -1,46 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 50,
4
- "seed": 1234,
5
- "epochs": 20000,
6
- "learning_rate": 1e-4,
7
- "betas": [0.8, 0.99],
8
- "eps": 1e-9,
9
- "batch_size": 4,
10
- "fp16_run": true,
11
- "lr_decay": 0.99975,
12
- "segment_size": 12800,
13
- "init_lr_ratio": 1,
14
- "warmup_epochs": 100,
15
- "c_mel": 45,
16
- "c_kl": 1.0
17
- },
18
- "data": {
19
- "max_wav_value": 32768.0,
20
- "sampling_rate": 40000,
21
- "filter_length": 2048,
22
- "hop_length": 400,
23
- "win_length": 2048,
24
- "n_mel_channels": 125,
25
- "mel_fmin": 0.0,
26
- "mel_fmax": null
27
- },
28
- "model": {
29
- "inter_channels": 192,
30
- "hidden_channels": 192,
31
- "filter_channels": 768,
32
- "n_heads": 2,
33
- "n_layers": 6,
34
- "kernel_size": 3,
35
- "p_dropout": 0,
36
- "resblock": "1",
37
- "resblock_kernel_sizes": [3,7,11],
38
- "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
39
- "upsample_rates": [10,10,2,2],
40
- "upsample_initial_channel": 512,
41
- "upsample_kernel_sizes": [16,16,4,4],
42
- "use_spectral_norm": false,
43
- "gin_channels": 256,
44
- "spk_embed_dim": 109
45
- }
46
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/48k.json DELETED
@@ -1,46 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 50,
4
- "seed": 1234,
5
- "epochs": 20000,
6
- "learning_rate": 1e-4,
7
- "betas": [0.8, 0.99],
8
- "eps": 1e-9,
9
- "batch_size": 4,
10
- "fp16_run": true,
11
- "lr_decay": 0.99975,
12
- "segment_size": 17280,
13
- "init_lr_ratio": 2,
14
- "warmup_epochs": 100,
15
- "c_mel": 45,
16
- "c_kl": 1.0
17
- },
18
- "data": {
19
- "max_wav_value": 32768.0,
20
- "sampling_rate": 48000,
21
- "filter_length": 2048,
22
- "hop_length": 480,
23
- "win_length": 2048,
24
- "n_mel_channels": 128,
25
- "mel_fmin": 0.0,
26
- "mel_fmax": null
27
- },
28
- "model": {
29
- "inter_channels": 192,
30
- "hidden_channels": 192,
31
- "filter_channels": 768,
32
- "n_heads": 2,
33
- "n_layers": 6,
34
- "kernel_size": 3,
35
- "p_dropout": 0,
36
- "resblock": "1",
37
- "resblock_kernel_sizes": [3,7,11],
38
- "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
39
- "upsample_rates": [12,10,2,2],
40
- "upsample_initial_channel": 512,
41
- "upsample_kernel_sizes": [24,20,4,4],
42
- "use_spectral_norm": false,
43
- "gin_channels": 256,
44
- "spk_embed_dim": 109
45
- }
46
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/__pycache__/config.cpython-39.pyc DELETED
Binary file (5.69 kB)
 
configs/config.json DELETED
@@ -1 +0,0 @@
1
- {"pth_path": "assets/weights/kikiV1.pth", "index_path": "logs/kikiV1.index", "sg_input_device": "VoiceMeeter Output (VB-Audio Vo (MME)", "sg_output_device": "VoiceMeeter Input (VB-Audio Voi (MME)", "threhold": -45.0, "pitch": 2.0, "rms_mix_rate": 0.0, "index_rate": 0.0, "block_time": 0.52, "crossfade_length": 0.15, "extra_time": 2.46, "n_cpu": 6.0, "use_jit": false, "f0method": "rmvpe"}
 
 
configs/config.py DELETED
@@ -1,251 +0,0 @@
1
- import argparse
2
- import os
3
- import sys
4
- import json
5
- from multiprocessing import cpu_count
6
-
7
- import torch
8
-
9
- try:
10
- import intel_extension_for_pytorch as ipex # pylint: disable=import-error, unused-import
11
-
12
- if torch.xpu.is_available():
13
- from infer.modules.ipex import ipex_init
14
-
15
- ipex_init()
16
- except Exception: # pylint: disable=broad-exception-caught
17
- pass
18
- import logging
19
-
20
- logger = logging.getLogger(__name__)
21
-
22
-
23
- version_config_list = [
24
- "v1/32k.json",
25
- "v1/40k.json",
26
- "v1/48k.json",
27
- "v2/48k.json",
28
- "v2/32k.json",
29
- ]
30
-
31
-
32
- def singleton_variable(func):
33
- def wrapper(*args, **kwargs):
34
- if not wrapper.instance:
35
- wrapper.instance = func(*args, **kwargs)
36
- return wrapper.instance
37
-
38
- wrapper.instance = None
39
- return wrapper
40
-
41
-
42
- @singleton_variable
43
- class Config:
44
- def __init__(self):
45
- self.device = "cuda:0"
46
- self.is_half = True
47
- self.use_jit = False
48
- self.n_cpu = 0
49
- self.gpu_name = None
50
- self.json_config = self.load_config_json()
51
- self.gpu_mem = None
52
- (
53
- self.python_cmd,
54
- self.listen_port,
55
- self.iscolab,
56
- self.noparallel,
57
- self.noautoopen,
58
- self.dml,
59
- ) = self.arg_parse()
60
- self.instead = ""
61
- self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
62
-
63
- @staticmethod
64
- def load_config_json() -> dict:
65
- d = {}
66
- for config_file in version_config_list:
67
- with open(f"configs/{config_file}", "r") as f:
68
- d[config_file] = json.load(f)
69
- return d
70
-
71
- @staticmethod
72
- def arg_parse() -> tuple:
73
- exe = sys.executable or "python"
74
- parser = argparse.ArgumentParser()
75
- parser.add_argument("--port", type=int, default=7865, help="Listen port")
76
- parser.add_argument("--pycmd", type=str, default=exe, help="Python command")
77
- parser.add_argument("--colab", action="store_true", help="Launch in colab")
78
- parser.add_argument(
79
- "--noparallel", action="store_true", help="Disable parallel processing"
80
- )
81
- parser.add_argument(
82
- "--noautoopen",
83
- action="store_true",
84
- help="Do not open in browser automatically",
85
- )
86
- parser.add_argument(
87
- "--dml",
88
- action="store_true",
89
- help="torch_dml",
90
- )
91
- cmd_opts = parser.parse_args()
92
-
93
- cmd_opts.port = cmd_opts.port if 0 <= cmd_opts.port <= 65535 else 7865
94
-
95
- return (
96
- cmd_opts.pycmd,
97
- cmd_opts.port,
98
- cmd_opts.colab,
99
- cmd_opts.noparallel,
100
- cmd_opts.noautoopen,
101
- cmd_opts.dml,
102
- )
103
-
104
- # has_mps is only available in nightly pytorch (for now) and MasOS 12.3+.
105
- # check `getattr` and try it for compatibility
106
- @staticmethod
107
- def has_mps() -> bool:
108
- if not torch.backends.mps.is_available():
109
- return False
110
- try:
111
- torch.zeros(1).to(torch.device("mps"))
112
- return True
113
- except Exception:
114
- return False
115
-
116
- @staticmethod
117
- def has_xpu() -> bool:
118
- if hasattr(torch, "xpu") and torch.xpu.is_available():
119
- return True
120
- else:
121
- return False
122
-
123
- def use_fp32_config(self):
124
- for config_file in version_config_list:
125
- self.json_config[config_file]["train"]["fp16_run"] = False
126
- with open(f"configs/{config_file}", "r") as f:
127
- strr = f.read().replace("true", "false")
128
- with open(f"configs/{config_file}", "w") as f:
129
- f.write(strr)
130
- with open("infer/modules/train/preprocess.py", "r") as f:
131
- strr = f.read().replace("3.7", "3.0")
132
- with open("infer/modules/train/preprocess.py", "w") as f:
133
- f.write(strr)
134
- print("overwrite preprocess and configs.json")
135
-
136
- def device_config(self) -> tuple:
137
- if torch.cuda.is_available():
138
- if self.has_xpu():
139
- self.device = self.instead = "xpu:0"
140
- self.is_half = True
141
- i_device = int(self.device.split(":")[-1])
142
- self.gpu_name = torch.cuda.get_device_name(i_device)
143
- if (
144
- ("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
145
- or "P40" in self.gpu_name.upper()
146
- or "P10" in self.gpu_name.upper()
147
- or "1060" in self.gpu_name
148
- or "1070" in self.gpu_name
149
- or "1080" in self.gpu_name
150
- ):
151
- logger.info("Found GPU %s, force to fp32", self.gpu_name)
152
- self.is_half = False
153
- self.use_fp32_config()
154
- else:
155
- logger.info("Found GPU %s", self.gpu_name)
156
- self.gpu_mem = int(
157
- torch.cuda.get_device_properties(i_device).total_memory
158
- / 1024
159
- / 1024
160
- / 1024
161
- + 0.4
162
- )
163
- if self.gpu_mem <= 4:
164
- with open("infer/modules/train/preprocess.py", "r") as f:
165
- strr = f.read().replace("3.7", "3.0")
166
- with open("infer/modules/train/preprocess.py", "w") as f:
167
- f.write(strr)
168
- elif self.has_mps():
169
- logger.info("No supported Nvidia GPU found")
170
- self.device = self.instead = "mps"
171
- self.is_half = False
172
- self.use_fp32_config()
173
- else:
174
- logger.info("No supported Nvidia GPU found")
175
- self.device = self.instead = "cpu"
176
- self.is_half = False
177
- self.use_fp32_config()
178
-
179
- if self.n_cpu == 0:
180
- self.n_cpu = cpu_count()
181
-
182
- if self.is_half:
183
- # 6G显存配置
184
- x_pad = 3
185
- x_query = 10
186
- x_center = 60
187
- x_max = 65
188
- else:
189
- # 5G显存配置
190
- x_pad = 1
191
- x_query = 6
192
- x_center = 38
193
- x_max = 41
194
-
195
- if self.gpu_mem is not None and self.gpu_mem <= 4:
196
- x_pad = 1
197
- x_query = 5
198
- x_center = 30
199
- x_max = 32
200
- if self.dml:
201
- logger.info("Use DirectML instead")
202
- if (
203
- os.path.exists(
204
- "runtime\Lib\site-packages\onnxruntime\capi\DirectML.dll"
205
- )
206
- == False
207
- ):
208
- try:
209
- os.rename(
210
- "runtime\Lib\site-packages\onnxruntime",
211
- "runtime\Lib\site-packages\onnxruntime-cuda",
212
- )
213
- except:
214
- pass
215
- try:
216
- os.rename(
217
- "runtime\Lib\site-packages\onnxruntime-dml",
218
- "runtime\Lib\site-packages\onnxruntime",
219
- )
220
- except:
221
- pass
222
- # if self.device != "cpu":
223
- import torch_directml
224
-
225
- self.device = torch_directml.device(torch_directml.default_device())
226
- self.is_half = False
227
- else:
228
- if self.instead:
229
- logger.info(f"Use {self.instead} instead")
230
- if (
231
- os.path.exists(
232
- "runtime\Lib\site-packages\onnxruntime\capi\onnxruntime_providers_cuda.dll"
233
- )
234
- == False
235
- ):
236
- try:
237
- os.rename(
238
- "runtime\Lib\site-packages\onnxruntime",
239
- "runtime\Lib\site-packages\onnxruntime-dml",
240
- )
241
- except:
242
- pass
243
- try:
244
- os.rename(
245
- "runtime\Lib\site-packages\onnxruntime-cuda",
246
- "runtime\Lib\site-packages\onnxruntime",
247
- )
248
- except:
249
- pass
250
- print("is_half:%s, device:%s" % (self.is_half, self.device))
251
- return x_pad, x_query, x_center, x_max
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v1/32k.json DELETED
@@ -1,46 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 200,
4
- "seed": 1234,
5
- "epochs": 20000,
6
- "learning_rate": 1e-4,
7
- "betas": [0.8, 0.99],
8
- "eps": 1e-9,
9
- "batch_size": 4,
10
- "fp16_run": true,
11
- "lr_decay": 0.999875,
12
- "segment_size": 12800,
13
- "init_lr_ratio": 1,
14
- "warmup_epochs": 0,
15
- "c_mel": 45,
16
- "c_kl": 1.0
17
- },
18
- "data": {
19
- "max_wav_value": 32768.0,
20
- "sampling_rate": 32000,
21
- "filter_length": 1024,
22
- "hop_length": 320,
23
- "win_length": 1024,
24
- "n_mel_channels": 80,
25
- "mel_fmin": 0.0,
26
- "mel_fmax": null
27
- },
28
- "model": {
29
- "inter_channels": 192,
30
- "hidden_channels": 192,
31
- "filter_channels": 768,
32
- "n_heads": 2,
33
- "n_layers": 6,
34
- "kernel_size": 3,
35
- "p_dropout": 0,
36
- "resblock": "1",
37
- "resblock_kernel_sizes": [3,7,11],
38
- "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
39
- "upsample_rates": [10,4,2,2,2],
40
- "upsample_initial_channel": 512,
41
- "upsample_kernel_sizes": [16,16,4,4,4],
42
- "use_spectral_norm": false,
43
- "gin_channels": 256,
44
- "spk_embed_dim": 109
45
- }
46
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v1/40k.json DELETED
@@ -1,46 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 200,
4
- "seed": 1234,
5
- "epochs": 20000,
6
- "learning_rate": 1e-4,
7
- "betas": [0.8, 0.99],
8
- "eps": 1e-9,
9
- "batch_size": 4,
10
- "fp16_run": true,
11
- "lr_decay": 0.999875,
12
- "segment_size": 12800,
13
- "init_lr_ratio": 1,
14
- "warmup_epochs": 0,
15
- "c_mel": 45,
16
- "c_kl": 1.0
17
- },
18
- "data": {
19
- "max_wav_value": 32768.0,
20
- "sampling_rate": 40000,
21
- "filter_length": 2048,
22
- "hop_length": 400,
23
- "win_length": 2048,
24
- "n_mel_channels": 125,
25
- "mel_fmin": 0.0,
26
- "mel_fmax": null
27
- },
28
- "model": {
29
- "inter_channels": 192,
30
- "hidden_channels": 192,
31
- "filter_channels": 768,
32
- "n_heads": 2,
33
- "n_layers": 6,
34
- "kernel_size": 3,
35
- "p_dropout": 0,
36
- "resblock": "1",
37
- "resblock_kernel_sizes": [3,7,11],
38
- "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
39
- "upsample_rates": [10,10,2,2],
40
- "upsample_initial_channel": 512,
41
- "upsample_kernel_sizes": [16,16,4,4],
42
- "use_spectral_norm": false,
43
- "gin_channels": 256,
44
- "spk_embed_dim": 109
45
- }
46
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v1/48k.json DELETED
@@ -1,46 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 200,
4
- "seed": 1234,
5
- "epochs": 20000,
6
- "learning_rate": 1e-4,
7
- "betas": [0.8, 0.99],
8
- "eps": 1e-9,
9
- "batch_size": 4,
10
- "fp16_run": true,
11
- "lr_decay": 0.999875,
12
- "segment_size": 11520,
13
- "init_lr_ratio": 1,
14
- "warmup_epochs": 0,
15
- "c_mel": 45,
16
- "c_kl": 1.0
17
- },
18
- "data": {
19
- "max_wav_value": 32768.0,
20
- "sampling_rate": 48000,
21
- "filter_length": 2048,
22
- "hop_length": 480,
23
- "win_length": 2048,
24
- "n_mel_channels": 128,
25
- "mel_fmin": 0.0,
26
- "mel_fmax": null
27
- },
28
- "model": {
29
- "inter_channels": 192,
30
- "hidden_channels": 192,
31
- "filter_channels": 768,
32
- "n_heads": 2,
33
- "n_layers": 6,
34
- "kernel_size": 3,
35
- "p_dropout": 0,
36
- "resblock": "1",
37
- "resblock_kernel_sizes": [3,7,11],
38
- "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
39
- "upsample_rates": [10,6,2,2,2],
40
- "upsample_initial_channel": 512,
41
- "upsample_kernel_sizes": [16,16,4,4,4],
42
- "use_spectral_norm": false,
43
- "gin_channels": 256,
44
- "spk_embed_dim": 109
45
- }
46
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v2/32k.json DELETED
@@ -1,46 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 50,
4
- "seed": 1234,
5
- "epochs": 20000,
6
- "learning_rate": 1e-4,
7
- "betas": [0.8, 0.99],
8
- "eps": 1e-9,
9
- "batch_size": 4,
10
- "fp16_run": true,
11
- "lr_decay": 0.99975,
12
- "segment_size": 12800,
13
- "init_lr_ratio": 1,
14
- "warmup_epochs": 100,
15
- "c_mel": 45,
16
- "c_kl": 1.0
17
- },
18
- "data": {
19
- "max_wav_value": 32768.0,
20
- "sampling_rate": 32000,
21
- "filter_length": 1024,
22
- "hop_length": 320,
23
- "win_length": 1024,
24
- "n_mel_channels": 80,
25
- "mel_fmin": 0.0,
26
- "mel_fmax": null
27
- },
28
- "model": {
29
- "inter_channels": 192,
30
- "hidden_channels": 192,
31
- "filter_channels": 768,
32
- "n_heads": 2,
33
- "n_layers": 6,
34
- "kernel_size": 3,
35
- "p_dropout": 0,
36
- "resblock": "1",
37
- "resblock_kernel_sizes": [3,7,11],
38
- "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
39
- "upsample_rates": [10,8,2,2],
40
- "upsample_initial_channel": 512,
41
- "upsample_kernel_sizes": [20,16,4,4],
42
- "use_spectral_norm": false,
43
- "gin_channels": 256,
44
- "spk_embed_dim": 109
45
- }
46
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v2/40k.json DELETED
@@ -1,46 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 50,
4
- "seed": 1234,
5
- "epochs": 20000,
6
- "learning_rate": 1e-4,
7
- "betas": [0.8, 0.99],
8
- "eps": 1e-9,
9
- "batch_size": 4,
10
- "fp16_run": true,
11
- "lr_decay": 0.99975,
12
- "segment_size": 12800,
13
- "init_lr_ratio": 1,
14
- "warmup_epochs": 100,
15
- "c_mel": 45,
16
- "c_kl": 1.0
17
- },
18
- "data": {
19
- "max_wav_value": 32768.0,
20
- "sampling_rate": 40000,
21
- "filter_length": 2048,
22
- "hop_length": 400,
23
- "win_length": 2048,
24
- "n_mel_channels": 125,
25
- "mel_fmin": 0.0,
26
- "mel_fmax": null
27
- },
28
- "model": {
29
- "inter_channels": 192,
30
- "hidden_channels": 192,
31
- "filter_channels": 768,
32
- "n_heads": 2,
33
- "n_layers": 6,
34
- "kernel_size": 3,
35
- "p_dropout": 0,
36
- "resblock": "1",
37
- "resblock_kernel_sizes": [3,7,11],
38
- "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
39
- "upsample_rates": [10,10,2,2],
40
- "upsample_initial_channel": 512,
41
- "upsample_kernel_sizes": [16,16,4,4],
42
- "use_spectral_norm": false,
43
- "gin_channels": 256,
44
- "spk_embed_dim": 109
45
- }
46
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v2/48k.json DELETED
@@ -1,46 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 50,
4
- "seed": 1234,
5
- "epochs": 20000,
6
- "learning_rate": 1e-4,
7
- "betas": [0.8, 0.99],
8
- "eps": 1e-9,
9
- "batch_size": 4,
10
- "fp16_run": true,
11
- "lr_decay": 0.99975,
12
- "segment_size": 17280,
13
- "init_lr_ratio": 2,
14
- "warmup_epochs": 100,
15
- "c_mel": 45,
16
- "c_kl": 1.0
17
- },
18
- "data": {
19
- "max_wav_value": 32768.0,
20
- "sampling_rate": 48000,
21
- "filter_length": 2048,
22
- "hop_length": 480,
23
- "win_length": 2048,
24
- "n_mel_channels": 128,
25
- "mel_fmin": 0.0,
26
- "mel_fmax": null
27
- },
28
- "model": {
29
- "inter_channels": 192,
30
- "hidden_channels": 192,
31
- "filter_channels": 768,
32
- "n_heads": 2,
33
- "n_layers": 6,
34
- "kernel_size": 3,
35
- "p_dropout": 0,
36
- "resblock": "1",
37
- "resblock_kernel_sizes": [3,7,11],
38
- "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
39
- "upsample_rates": [12,10,2,2],
40
- "upsample_initial_channel": 512,
41
- "upsample_kernel_sizes": [24,20,4,4],
42
- "use_spectral_norm": false,
43
- "gin_channels": 256,
44
- "spk_embed_dim": 109
45
- }
46
- }