Spaces:
Paused
Paused
JacobLinCool
commited on
Commit
•
3a010aa
1
Parent(s):
4b56fbf
feat: infer
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +1 -0
- README.md +4 -2
- app.py +114 -27
- assets/pretrained_v2/D40k.pth +0 -3
- assets/pretrained_v2/G40k.pth +0 -3
- config.json +1 -1
- configs/config.py +245 -0
- infer/lib/audio.py +1 -1
- infer/lib/rmvpe.py +2 -12
- infer/lib/train/process_ckpt.py +2 -2
- infer/lib/uvr5_pack/lib_v5/dataset.py +0 -183
- infer/lib/uvr5_pack/lib_v5/layers.py +0 -118
- infer/lib/uvr5_pack/lib_v5/layers_123812KB .py +0 -118
- infer/lib/uvr5_pack/lib_v5/layers_123821KB.py +0 -118
- infer/lib/uvr5_pack/lib_v5/layers_33966KB.py +0 -126
- infer/lib/uvr5_pack/lib_v5/layers_537227KB.py +0 -126
- infer/lib/uvr5_pack/lib_v5/layers_537238KB.py +0 -126
- infer/lib/uvr5_pack/lib_v5/layers_new.py +0 -125
- infer/lib/uvr5_pack/lib_v5/model_param_init.py +0 -69
- infer/lib/uvr5_pack/lib_v5/modelparams/1band_sr16000_hl512.json +0 -19
- infer/lib/uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json +0 -19
- infer/lib/uvr5_pack/lib_v5/modelparams/1band_sr33075_hl384.json +0 -19
- infer/lib/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl1024.json +0 -19
- infer/lib/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl256.json +0 -19
- infer/lib/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl512.json +0 -19
- infer/lib/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl512_cut.json +0 -19
- infer/lib/uvr5_pack/lib_v5/modelparams/2band_32000.json +0 -30
- infer/lib/uvr5_pack/lib_v5/modelparams/2band_44100_lofi.json +0 -30
- infer/lib/uvr5_pack/lib_v5/modelparams/2band_48000.json +0 -30
- infer/lib/uvr5_pack/lib_v5/modelparams/3band_44100.json +0 -42
- infer/lib/uvr5_pack/lib_v5/modelparams/3band_44100_mid.json +0 -43
- infer/lib/uvr5_pack/lib_v5/modelparams/3band_44100_msb2.json +0 -43
- infer/lib/uvr5_pack/lib_v5/modelparams/4band_44100.json +0 -54
- infer/lib/uvr5_pack/lib_v5/modelparams/4band_44100_mid.json +0 -55
- infer/lib/uvr5_pack/lib_v5/modelparams/4band_44100_msb.json +0 -55
- infer/lib/uvr5_pack/lib_v5/modelparams/4band_44100_msb2.json +0 -55
- infer/lib/uvr5_pack/lib_v5/modelparams/4band_44100_reverse.json +0 -55
- infer/lib/uvr5_pack/lib_v5/modelparams/4band_44100_sw.json +0 -55
- infer/lib/uvr5_pack/lib_v5/modelparams/4band_v2.json +0 -54
- infer/lib/uvr5_pack/lib_v5/modelparams/4band_v2_sn.json +0 -55
- infer/lib/uvr5_pack/lib_v5/modelparams/4band_v3.json +0 -54
- infer/lib/uvr5_pack/lib_v5/modelparams/ensemble.json +0 -43
- infer/lib/uvr5_pack/lib_v5/nets.py +0 -123
- infer/lib/uvr5_pack/lib_v5/nets_123812KB.py +0 -122
- infer/lib/uvr5_pack/lib_v5/nets_123821KB.py +0 -122
- infer/lib/uvr5_pack/lib_v5/nets_33966KB.py +0 -122
- infer/lib/uvr5_pack/lib_v5/nets_537227KB.py +0 -123
- infer/lib/uvr5_pack/lib_v5/nets_537238KB.py +0 -123
- infer/lib/uvr5_pack/lib_v5/nets_61968KB.py +0 -122
- infer/lib/uvr5_pack/lib_v5/nets_new.py +0 -133
.gitignore
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
.DS_Store
|
2 |
*.pyc
|
|
|
|
1 |
.DS_Store
|
2 |
*.pyc
|
3 |
+
__pycache__
|
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: 🦀
|
4 |
colorFrom: gray
|
5 |
colorTo: gray
|
@@ -9,4 +9,6 @@ app_file: app.py
|
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
-
|
|
|
|
|
|
1 |
---
|
2 |
+
title: ZeroRVC
|
3 |
emoji: 🦀
|
4 |
colorFrom: gray
|
5 |
colorTo: gray
|
|
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
+
# ZeroRVC
|
13 |
+
|
14 |
+
Run Retrieval-based Voice Conversion training and inference on HuggingFace ZeroGPU.
|
app.py
CHANGED
@@ -1,11 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import traceback
|
3 |
-
|
4 |
import numpy as np
|
5 |
from sklearn.cluster import MiniBatchKMeans
|
6 |
-
|
7 |
-
os.environ["PYTORCH_JIT"] = "0v"
|
8 |
-
|
9 |
from random import shuffle
|
10 |
import gradio as gr
|
11 |
import zipfile
|
@@ -18,23 +19,12 @@ from infer.modules.train.extract.extract_f0_rmvpe import FeatureInput
|
|
18 |
from infer.modules.train.extract_feature_print import HubertFeatureExtractor
|
19 |
from infer.modules.train.train import train
|
20 |
from infer.lib.train.process_ckpt import extract_small_model
|
|
|
|
|
|
|
|
|
21 |
from zero import zero
|
22 |
-
|
23 |
-
# patch for jit script
|
24 |
-
# if we find `def expand_2d_or_3d_tensor(x,` in /usr/local/lib/python3.10/site-packages/fairseq/models/model_utils.py
|
25 |
-
# patch it with `def expand_2d_or_3d_tensor(x: Tensor,`
|
26 |
-
FAIRSEQ_CODE = "/usr/local/lib/python3.10/site-packages/fairseq/models/model_utils.py"
|
27 |
-
if os.path.exists(FAIRSEQ_CODE):
|
28 |
-
with open(FAIRSEQ_CODE, "r") as f:
|
29 |
-
lines = f.readlines()
|
30 |
-
with open(FAIRSEQ_CODE, "w") as f:
|
31 |
-
for line in lines:
|
32 |
-
if "def expand_2d_or_3d_tensor(x, trg_dim: int, padding_idx: int):" in line:
|
33 |
-
f.write(
|
34 |
-
"def expand_2d_or_3d_tensor(x: Tensor, trg_dim: int, padding_idx: int) -> Tensor:\n"
|
35 |
-
)
|
36 |
-
else:
|
37 |
-
f.write(line)
|
38 |
|
39 |
|
40 |
def extract_audio_files(zip_file: str, target_dir: str) -> list[str]:
|
@@ -189,13 +179,15 @@ def download_weight(exp_dir: str) -> str:
|
|
189 |
raise gr.Error("No model found")
|
190 |
|
191 |
latest_model = max(models, key=os.path.getctime)
|
|
|
192 |
|
193 |
name = os.path.basename(exp_dir)
|
|
|
194 |
extract_small_model(
|
195 |
-
latest_model,
|
196 |
)
|
197 |
|
198 |
-
return
|
199 |
|
200 |
|
201 |
def train_index(exp_dir: str) -> str:
|
@@ -269,9 +261,70 @@ def restore_expdir(zip: str) -> str:
|
|
269 |
return exp_dir
|
270 |
|
271 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
272 |
with gr.Blocks() as app:
|
273 |
# allow user to manually select the experiment directory
|
274 |
-
exp_dir = gr.Textbox(
|
|
|
|
|
|
|
|
|
275 |
|
276 |
with gr.Tabs():
|
277 |
with gr.Tab(label="New / Restore"):
|
@@ -284,10 +337,10 @@ with gr.Blocks() as app:
|
|
284 |
preprocess_output = gr.Textbox(
|
285 |
label="Preprocessing output", lines=5
|
286 |
)
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
|
292 |
with gr.Row():
|
293 |
restore_zip_file = gr.File(
|
@@ -327,6 +380,26 @@ with gr.Blocks() as app:
|
|
327 |
)
|
328 |
download_expdir_output = gr.File(label="Download experiment directory")
|
329 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
preprocess_btn.click(
|
331 |
fn=preprocess,
|
332 |
inputs=[zip_file],
|
@@ -343,6 +416,10 @@ with gr.Blocks() as app:
|
|
343 |
fn=train_model,
|
344 |
inputs=[exp_dir],
|
345 |
outputs=[latest_model],
|
|
|
|
|
|
|
|
|
346 |
)
|
347 |
|
348 |
train_index_btn.click(
|
@@ -369,4 +446,14 @@ with gr.Blocks() as app:
|
|
369 |
outputs=[exp_dir],
|
370 |
)
|
371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
372 |
app.launch()
|
|
|
1 |
+
from typing import Tuple
|
2 |
+
from prelude import prelude
|
3 |
+
|
4 |
+
prelude()
|
5 |
+
|
6 |
import os
|
7 |
import traceback
|
|
|
8 |
import numpy as np
|
9 |
from sklearn.cluster import MiniBatchKMeans
|
|
|
|
|
|
|
10 |
from random import shuffle
|
11 |
import gradio as gr
|
12 |
import zipfile
|
|
|
19 |
from infer.modules.train.extract_feature_print import HubertFeatureExtractor
|
20 |
from infer.modules.train.train import train
|
21 |
from infer.lib.train.process_ckpt import extract_small_model
|
22 |
+
from infer.modules.vc.modules import VC
|
23 |
+
from configs.config import Config
|
24 |
+
import demucs.separate
|
25 |
+
import soundfile as sf
|
26 |
from zero import zero
|
27 |
+
from model import device
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
|
30 |
def extract_audio_files(zip_file: str, target_dir: str) -> list[str]:
|
|
|
179 |
raise gr.Error("No model found")
|
180 |
|
181 |
latest_model = max(models, key=os.path.getctime)
|
182 |
+
print(f"Latest model: {latest_model}")
|
183 |
|
184 |
name = os.path.basename(exp_dir)
|
185 |
+
out = os.path.join(exp_dir, f"{name}.pth")
|
186 |
extract_small_model(
|
187 |
+
latest_model, out, "40k", True, "Model trained by ZeroGPU.", "v2"
|
188 |
)
|
189 |
|
190 |
+
return out
|
191 |
|
192 |
|
193 |
def train_index(exp_dir: str) -> str:
|
|
|
261 |
return exp_dir
|
262 |
|
263 |
|
264 |
+
@zero(duration=120)
|
265 |
+
def infer(exp_dir: str, original_audio: str, f0add: int) -> Tuple[int, np.ndarray]:
|
266 |
+
name = os.path.basename(exp_dir)
|
267 |
+
model = os.path.join(exp_dir, f"{name}.pth")
|
268 |
+
if not os.path.exists(model):
|
269 |
+
raise gr.Error("Model not found")
|
270 |
+
|
271 |
+
index = glob(f"{exp_dir}/added_*.index")
|
272 |
+
if not index:
|
273 |
+
raise gr.Error("Index not found")
|
274 |
+
|
275 |
+
base = os.path.basename(original_audio)
|
276 |
+
base = os.path.splitext(base)[0]
|
277 |
+
demucs.separate.main(
|
278 |
+
["--two-stems", "vocals", "-d", str(device), "-n", "htdemucs", original_audio]
|
279 |
+
)
|
280 |
+
out = os.path.join("separated", "htdemucs", base, "vocals.wav")
|
281 |
+
|
282 |
+
cfg = Config()
|
283 |
+
vc = VC(cfg)
|
284 |
+
vc.get_vc(model)
|
285 |
+
_, wav_opt = vc.vc_single(
|
286 |
+
0,
|
287 |
+
out,
|
288 |
+
f0add,
|
289 |
+
None,
|
290 |
+
"rmvpe",
|
291 |
+
index,
|
292 |
+
None,
|
293 |
+
0.5,
|
294 |
+
3,
|
295 |
+
0,
|
296 |
+
1,
|
297 |
+
0.33,
|
298 |
+
)
|
299 |
+
|
300 |
+
sr = wav_opt[0]
|
301 |
+
data = wav_opt[1]
|
302 |
+
|
303 |
+
return sr, data
|
304 |
+
|
305 |
+
|
306 |
+
def merge(exp_dir: str, original_audio: str, vocal: Tuple[int, np.ndarray]) -> str:
|
307 |
+
base = os.path.basename(original_audio)
|
308 |
+
base = os.path.splitext(base)[0]
|
309 |
+
music = os.path.join("separated", "htdemucs", base, "no-vocals.wav")
|
310 |
+
|
311 |
+
tmp = os.path.join(exp_dir, "tmp.wav")
|
312 |
+
sf.write(tmp, vocal[1], vocal[0])
|
313 |
+
|
314 |
+
os.system(
|
315 |
+
f"ffmpeg -i {music} -i {tmp} -filter_complex '[1]volume=2[a];[0][a]amix=inputs=2:duration=first:dropout_transition=2' {tmp}.merged.mp3"
|
316 |
+
)
|
317 |
+
|
318 |
+
return f"{tmp}.merged.mp3"
|
319 |
+
|
320 |
+
|
321 |
with gr.Blocks() as app:
|
322 |
# allow user to manually select the experiment directory
|
323 |
+
exp_dir = gr.Textbox(
|
324 |
+
label="Experiment directory (don't touch it unless you know what you are doing)",
|
325 |
+
visible=True,
|
326 |
+
interactive=True,
|
327 |
+
)
|
328 |
|
329 |
with gr.Tabs():
|
330 |
with gr.Tab(label="New / Restore"):
|
|
|
337 |
preprocess_output = gr.Textbox(
|
338 |
label="Preprocessing output", lines=5
|
339 |
)
|
340 |
+
|
341 |
+
preprocess_btn = gr.Button(
|
342 |
+
value="Start New Experiment", variant="primary"
|
343 |
+
)
|
344 |
|
345 |
with gr.Row():
|
346 |
restore_zip_file = gr.File(
|
|
|
380 |
)
|
381 |
download_expdir_output = gr.File(label="Download experiment directory")
|
382 |
|
383 |
+
with gr.Tab(label="Inference"):
|
384 |
+
with gr.Row():
|
385 |
+
original_audio = gr.Audio(
|
386 |
+
label="Upload original audio",
|
387 |
+
type="filepath",
|
388 |
+
show_download_button=True,
|
389 |
+
)
|
390 |
+
f0add = gr.Slider(
|
391 |
+
label="F0 add",
|
392 |
+
minimum=-16,
|
393 |
+
maximum=16,
|
394 |
+
step=1,
|
395 |
+
value=0,
|
396 |
+
)
|
397 |
+
infer_btn = gr.Button(value="Infer", variant="primary")
|
398 |
+
with gr.Row():
|
399 |
+
infer_output = gr.Audio(label="Inferred audio")
|
400 |
+
with gr.Row():
|
401 |
+
merge_output = gr.Audio(label="Merged audio")
|
402 |
+
|
403 |
preprocess_btn.click(
|
404 |
fn=preprocess,
|
405 |
inputs=[zip_file],
|
|
|
416 |
fn=train_model,
|
417 |
inputs=[exp_dir],
|
418 |
outputs=[latest_model],
|
419 |
+
).success(
|
420 |
+
fn=train_model,
|
421 |
+
inputs=[exp_dir],
|
422 |
+
outputs=[latest_model],
|
423 |
)
|
424 |
|
425 |
train_index_btn.click(
|
|
|
446 |
outputs=[exp_dir],
|
447 |
)
|
448 |
|
449 |
+
infer_btn.click(
|
450 |
+
fn=infer,
|
451 |
+
inputs=[exp_dir, original_audio, f0add],
|
452 |
+
outputs=[infer_output],
|
453 |
+
).success(
|
454 |
+
fn=merge,
|
455 |
+
inputs=[exp_dir, original_audio, infer_output],
|
456 |
+
outputs=[merge_output],
|
457 |
+
)
|
458 |
+
|
459 |
app.launch()
|
assets/pretrained_v2/D40k.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:471378e894e7191f89a94eda8288c5947b16bbe0b10c3f1f17efdb7a1d998242
|
3 |
-
size 142875703
|
|
|
|
|
|
|
|
assets/pretrained_v2/G40k.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a3843da7fde33db1dab176146c70d6c2df06eafe9457f4e3aa10024e9c6a4b69
|
3 |
-
size 72959671
|
|
|
|
|
|
|
|
config.json
CHANGED
@@ -67,7 +67,7 @@
|
|
67 |
"c_mel": 45,
|
68 |
"epochs": 20000,
|
69 |
"eps": 1e-09,
|
70 |
-
"fp16_run":
|
71 |
"init_lr_ratio": 1,
|
72 |
"learning_rate": 0.0001,
|
73 |
"log_interval": 200,
|
|
|
67 |
"c_mel": 45,
|
68 |
"epochs": 20000,
|
69 |
"eps": 1e-09,
|
70 |
+
"fp16_run": true,
|
71 |
"init_lr_ratio": 1,
|
72 |
"learning_rate": 0.0001,
|
73 |
"log_interval": 200,
|
configs/config.py
ADDED
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import json
|
5 |
+
import shutil
|
6 |
+
from multiprocessing import cpu_count
|
7 |
+
|
8 |
+
import torch
|
9 |
+
import logging
|
10 |
+
from model import device, fp16
|
11 |
+
|
12 |
+
logger = logging.getLogger(__name__)
|
13 |
+
|
14 |
+
|
15 |
+
version_config_list = [
|
16 |
+
"v1/32k.json",
|
17 |
+
"v1/40k.json",
|
18 |
+
"v1/48k.json",
|
19 |
+
"v2/48k.json",
|
20 |
+
"v2/32k.json",
|
21 |
+
]
|
22 |
+
|
23 |
+
|
24 |
+
def singleton_variable(func):
|
25 |
+
def wrapper(*args, **kwargs):
|
26 |
+
if not wrapper.instance:
|
27 |
+
wrapper.instance = func(*args, **kwargs)
|
28 |
+
return wrapper.instance
|
29 |
+
|
30 |
+
wrapper.instance = None
|
31 |
+
return wrapper
|
32 |
+
|
33 |
+
|
34 |
+
@singleton_variable
|
35 |
+
class Config:
|
36 |
+
def __init__(self):
|
37 |
+
self.device = str(device)
|
38 |
+
self.is_half = fp16
|
39 |
+
self.use_jit = False
|
40 |
+
self.n_cpu = 0
|
41 |
+
self.gpu_name = None
|
42 |
+
self.json_config = self.load_config_json()
|
43 |
+
self.gpu_mem = None
|
44 |
+
(
|
45 |
+
self.python_cmd,
|
46 |
+
self.listen_port,
|
47 |
+
self.iscolab,
|
48 |
+
self.noparallel,
|
49 |
+
self.noautoopen,
|
50 |
+
self.dml,
|
51 |
+
) = self.arg_parse()
|
52 |
+
self.instead = ""
|
53 |
+
self.preprocess_per = 3.7
|
54 |
+
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
|
55 |
+
|
56 |
+
@staticmethod
|
57 |
+
def load_config_json() -> dict:
|
58 |
+
d = {}
|
59 |
+
# for config_file in version_config_list:
|
60 |
+
# p = f"configs/inuse/{config_file}"
|
61 |
+
# if not os.path.exists(p):
|
62 |
+
# shutil.copy(f"configs/{config_file}", p)
|
63 |
+
# with open(f"configs/inuse/{config_file}", "r") as f:
|
64 |
+
# d[config_file] = json.load(f)
|
65 |
+
return d
|
66 |
+
|
67 |
+
@staticmethod
|
68 |
+
def arg_parse() -> tuple:
|
69 |
+
exe = sys.executable or "python"
|
70 |
+
parser = argparse.ArgumentParser()
|
71 |
+
parser.add_argument("--port", type=int, default=7865, help="Listen port")
|
72 |
+
parser.add_argument("--pycmd", type=str, default=exe, help="Python command")
|
73 |
+
parser.add_argument("--colab", action="store_true", help="Launch in colab")
|
74 |
+
parser.add_argument(
|
75 |
+
"--noparallel", action="store_true", help="Disable parallel processing"
|
76 |
+
)
|
77 |
+
parser.add_argument(
|
78 |
+
"--noautoopen",
|
79 |
+
action="store_true",
|
80 |
+
help="Do not open in browser automatically",
|
81 |
+
)
|
82 |
+
parser.add_argument(
|
83 |
+
"--dml",
|
84 |
+
action="store_true",
|
85 |
+
help="torch_dml",
|
86 |
+
)
|
87 |
+
cmd_opts = parser.parse_args()
|
88 |
+
|
89 |
+
cmd_opts.port = cmd_opts.port if 0 <= cmd_opts.port <= 65535 else 7865
|
90 |
+
|
91 |
+
return (
|
92 |
+
cmd_opts.pycmd,
|
93 |
+
cmd_opts.port,
|
94 |
+
cmd_opts.colab,
|
95 |
+
cmd_opts.noparallel,
|
96 |
+
cmd_opts.noautoopen,
|
97 |
+
cmd_opts.dml,
|
98 |
+
)
|
99 |
+
|
100 |
+
# has_mps is only available in nightly pytorch (for now) and MasOS 12.3+.
|
101 |
+
# check `getattr` and try it for compatibility
|
102 |
+
@staticmethod
|
103 |
+
def has_mps() -> bool:
|
104 |
+
if not torch.backends.mps.is_available():
|
105 |
+
return False
|
106 |
+
try:
|
107 |
+
torch.zeros(1).to(torch.device("mps"))
|
108 |
+
return True
|
109 |
+
except Exception:
|
110 |
+
return False
|
111 |
+
|
112 |
+
@staticmethod
|
113 |
+
def has_xpu() -> bool:
|
114 |
+
if hasattr(torch, "xpu") and torch.xpu.is_available():
|
115 |
+
return True
|
116 |
+
else:
|
117 |
+
return False
|
118 |
+
|
119 |
+
def use_fp32_config(self):
|
120 |
+
for config_file in version_config_list:
|
121 |
+
self.json_config[config_file]["train"]["fp16_run"] = False
|
122 |
+
with open(f"configs/inuse/{config_file}", "r") as f:
|
123 |
+
strr = f.read().replace("true", "false")
|
124 |
+
with open(f"configs/inuse/{config_file}", "w") as f:
|
125 |
+
f.write(strr)
|
126 |
+
logger.info("overwrite " + config_file)
|
127 |
+
self.preprocess_per = 3.0
|
128 |
+
logger.info("overwrite preprocess_per to %d" % (self.preprocess_per))
|
129 |
+
|
130 |
+
def device_config(self) -> tuple:
|
131 |
+
if torch.cuda.is_available():
|
132 |
+
if self.has_xpu():
|
133 |
+
self.device = self.instead = "xpu:0"
|
134 |
+
self.is_half = True
|
135 |
+
i_device = int(self.device.split(":")[-1])
|
136 |
+
self.gpu_name = torch.cuda.get_device_name(i_device)
|
137 |
+
if (
|
138 |
+
("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
|
139 |
+
or "P40" in self.gpu_name.upper()
|
140 |
+
or "P10" in self.gpu_name.upper()
|
141 |
+
or "1060" in self.gpu_name
|
142 |
+
or "1070" in self.gpu_name
|
143 |
+
or "1080" in self.gpu_name
|
144 |
+
):
|
145 |
+
logger.info("Found GPU %s, force to fp32", self.gpu_name)
|
146 |
+
self.is_half = False
|
147 |
+
self.use_fp32_config()
|
148 |
+
else:
|
149 |
+
logger.info("Found GPU %s", self.gpu_name)
|
150 |
+
self.gpu_mem = int(
|
151 |
+
torch.cuda.get_device_properties(i_device).total_memory
|
152 |
+
/ 1024
|
153 |
+
/ 1024
|
154 |
+
/ 1024
|
155 |
+
+ 0.4
|
156 |
+
)
|
157 |
+
if self.gpu_mem <= 4:
|
158 |
+
self.preprocess_per = 3.0
|
159 |
+
elif self.has_mps():
|
160 |
+
logger.info("No supported Nvidia GPU found")
|
161 |
+
self.device = self.instead = "mps"
|
162 |
+
self.is_half = False
|
163 |
+
self.use_fp32_config()
|
164 |
+
else:
|
165 |
+
logger.info("No supported Nvidia GPU found")
|
166 |
+
self.device = self.instead = "cpu"
|
167 |
+
self.is_half = False
|
168 |
+
self.use_fp32_config()
|
169 |
+
|
170 |
+
if self.n_cpu == 0:
|
171 |
+
self.n_cpu = cpu_count()
|
172 |
+
|
173 |
+
if self.is_half:
|
174 |
+
# 6G显存配置
|
175 |
+
x_pad = 3
|
176 |
+
x_query = 10
|
177 |
+
x_center = 60
|
178 |
+
x_max = 65
|
179 |
+
else:
|
180 |
+
# 5G显存配置
|
181 |
+
x_pad = 1
|
182 |
+
x_query = 6
|
183 |
+
x_center = 38
|
184 |
+
x_max = 41
|
185 |
+
|
186 |
+
if self.gpu_mem is not None and self.gpu_mem <= 4:
|
187 |
+
x_pad = 1
|
188 |
+
x_query = 5
|
189 |
+
x_center = 30
|
190 |
+
x_max = 32
|
191 |
+
if self.dml:
|
192 |
+
logger.info("Use DirectML instead")
|
193 |
+
if (
|
194 |
+
os.path.exists(
|
195 |
+
"runtime\Lib\site-packages\onnxruntime\capi\DirectML.dll"
|
196 |
+
)
|
197 |
+
== False
|
198 |
+
):
|
199 |
+
try:
|
200 |
+
os.rename(
|
201 |
+
"runtime\Lib\site-packages\onnxruntime",
|
202 |
+
"runtime\Lib\site-packages\onnxruntime-cuda",
|
203 |
+
)
|
204 |
+
except:
|
205 |
+
pass
|
206 |
+
try:
|
207 |
+
os.rename(
|
208 |
+
"runtime\Lib\site-packages\onnxruntime-dml",
|
209 |
+
"runtime\Lib\site-packages\onnxruntime",
|
210 |
+
)
|
211 |
+
except:
|
212 |
+
pass
|
213 |
+
# if self.device != "cpu":
|
214 |
+
import torch_directml
|
215 |
+
|
216 |
+
self.device = torch_directml.device(torch_directml.default_device())
|
217 |
+
self.is_half = False
|
218 |
+
else:
|
219 |
+
if self.instead:
|
220 |
+
logger.info(f"Use {self.instead} instead")
|
221 |
+
if (
|
222 |
+
os.path.exists(
|
223 |
+
"runtime\Lib\site-packages\onnxruntime\capi\onnxruntime_providers_cuda.dll"
|
224 |
+
)
|
225 |
+
== False
|
226 |
+
):
|
227 |
+
try:
|
228 |
+
os.rename(
|
229 |
+
"runtime\Lib\site-packages\onnxruntime",
|
230 |
+
"runtime\Lib\site-packages\onnxruntime-dml",
|
231 |
+
)
|
232 |
+
except:
|
233 |
+
pass
|
234 |
+
try:
|
235 |
+
os.rename(
|
236 |
+
"runtime\Lib\site-packages\onnxruntime-cuda",
|
237 |
+
"runtime\Lib\site-packages\onnxruntime",
|
238 |
+
)
|
239 |
+
except:
|
240 |
+
pass
|
241 |
+
logger.info(
|
242 |
+
"Half-precision floating-point: %s, device: %s"
|
243 |
+
% (self.is_half, self.device)
|
244 |
+
)
|
245 |
+
return x_pad, x_query, x_center, x_max
|
infer/lib/audio.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
import platform, os
|
|
|
2 |
import ffmpeg
|
3 |
import numpy as np
|
4 |
import av
|
5 |
-
from io import BytesIO
|
6 |
|
7 |
|
8 |
def wav2(i, o, format):
|
|
|
1 |
import platform, os
|
2 |
+
import traceback
|
3 |
import ffmpeg
|
4 |
import numpy as np
|
5 |
import av
|
|
|
6 |
|
7 |
|
8 |
def wav2(i, o, format):
|
infer/lib/rmvpe.py
CHANGED
@@ -1,24 +1,14 @@
|
|
1 |
from io import BytesIO
|
2 |
import os
|
3 |
-
from typing import List
|
4 |
import numpy as np
|
5 |
import torch
|
6 |
|
7 |
from infer.lib import jit
|
8 |
|
9 |
-
try:
|
10 |
-
# Fix "Torch not compiled with CUDA enabled"
|
11 |
-
import intel_extension_for_pytorch as ipex # pylint: disable=import-error, unused-import
|
12 |
-
|
13 |
-
if torch.xpu.is_available():
|
14 |
-
from infer.modules.ipex import ipex_init
|
15 |
-
|
16 |
-
ipex_init()
|
17 |
-
except Exception: # pylint: disable=broad-exception-caught
|
18 |
-
pass
|
19 |
import torch.nn as nn
|
20 |
import torch.nn.functional as F
|
21 |
-
from librosa.util import
|
22 |
from scipy.signal import get_window
|
23 |
|
24 |
import logging
|
|
|
1 |
from io import BytesIO
|
2 |
import os
|
3 |
+
from typing import List
|
4 |
import numpy as np
|
5 |
import torch
|
6 |
|
7 |
from infer.lib import jit
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
import torch.nn as nn
|
10 |
import torch.nn.functional as F
|
11 |
+
from librosa.util import pad_center
|
12 |
from scipy.signal import get_window
|
13 |
|
14 |
import logging
|
infer/lib/train/process_ckpt.py
CHANGED
@@ -61,7 +61,7 @@ def show_info(path):
|
|
61 |
return traceback.format_exc()
|
62 |
|
63 |
|
64 |
-
def extract_small_model(path,
|
65 |
try:
|
66 |
ckpt = torch.load(path, map_location="cpu")
|
67 |
if "model" in ckpt:
|
@@ -185,7 +185,7 @@ def extract_small_model(path, name, sr, if_f0, info, version):
|
|
185 |
opt["version"] = version
|
186 |
opt["sr"] = sr
|
187 |
opt["f0"] = int(if_f0)
|
188 |
-
torch.save(opt,
|
189 |
return "Success."
|
190 |
except:
|
191 |
return traceback.format_exc()
|
|
|
61 |
return traceback.format_exc()
|
62 |
|
63 |
|
64 |
+
def extract_small_model(path, out, sr, if_f0, info, version):
|
65 |
try:
|
66 |
ckpt = torch.load(path, map_location="cpu")
|
67 |
if "model" in ckpt:
|
|
|
185 |
opt["version"] = version
|
186 |
opt["sr"] = sr
|
187 |
opt["f0"] = int(if_f0)
|
188 |
+
torch.save(opt, out)
|
189 |
return "Success."
|
190 |
except:
|
191 |
return traceback.format_exc()
|
infer/lib/uvr5_pack/lib_v5/dataset.py
DELETED
@@ -1,183 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import random
|
3 |
-
|
4 |
-
import numpy as np
|
5 |
-
import torch
|
6 |
-
import torch.utils.data
|
7 |
-
from tqdm import tqdm
|
8 |
-
|
9 |
-
from . import spec_utils
|
10 |
-
|
11 |
-
|
12 |
-
class VocalRemoverValidationSet(torch.utils.data.Dataset):
|
13 |
-
def __init__(self, patch_list):
|
14 |
-
self.patch_list = patch_list
|
15 |
-
|
16 |
-
def __len__(self):
|
17 |
-
return len(self.patch_list)
|
18 |
-
|
19 |
-
def __getitem__(self, idx):
|
20 |
-
path = self.patch_list[idx]
|
21 |
-
data = np.load(path)
|
22 |
-
|
23 |
-
X, y = data["X"], data["y"]
|
24 |
-
|
25 |
-
X_mag = np.abs(X)
|
26 |
-
y_mag = np.abs(y)
|
27 |
-
|
28 |
-
return X_mag, y_mag
|
29 |
-
|
30 |
-
|
31 |
-
def make_pair(mix_dir, inst_dir):
|
32 |
-
input_exts = [".wav", ".m4a", ".mp3", ".mp4", ".flac"]
|
33 |
-
|
34 |
-
X_list = sorted(
|
35 |
-
[
|
36 |
-
os.path.join(mix_dir, fname)
|
37 |
-
for fname in os.listdir(mix_dir)
|
38 |
-
if os.path.splitext(fname)[1] in input_exts
|
39 |
-
]
|
40 |
-
)
|
41 |
-
y_list = sorted(
|
42 |
-
[
|
43 |
-
os.path.join(inst_dir, fname)
|
44 |
-
for fname in os.listdir(inst_dir)
|
45 |
-
if os.path.splitext(fname)[1] in input_exts
|
46 |
-
]
|
47 |
-
)
|
48 |
-
|
49 |
-
filelist = list(zip(X_list, y_list))
|
50 |
-
|
51 |
-
return filelist
|
52 |
-
|
53 |
-
|
54 |
-
def train_val_split(dataset_dir, split_mode, val_rate, val_filelist):
|
55 |
-
if split_mode == "random":
|
56 |
-
filelist = make_pair(
|
57 |
-
os.path.join(dataset_dir, "mixtures"),
|
58 |
-
os.path.join(dataset_dir, "instruments"),
|
59 |
-
)
|
60 |
-
|
61 |
-
random.shuffle(filelist)
|
62 |
-
|
63 |
-
if len(val_filelist) == 0:
|
64 |
-
val_size = int(len(filelist) * val_rate)
|
65 |
-
train_filelist = filelist[:-val_size]
|
66 |
-
val_filelist = filelist[-val_size:]
|
67 |
-
else:
|
68 |
-
train_filelist = [
|
69 |
-
pair for pair in filelist if list(pair) not in val_filelist
|
70 |
-
]
|
71 |
-
elif split_mode == "subdirs":
|
72 |
-
if len(val_filelist) != 0:
|
73 |
-
raise ValueError(
|
74 |
-
"The `val_filelist` option is not available in `subdirs` mode"
|
75 |
-
)
|
76 |
-
|
77 |
-
train_filelist = make_pair(
|
78 |
-
os.path.join(dataset_dir, "training/mixtures"),
|
79 |
-
os.path.join(dataset_dir, "training/instruments"),
|
80 |
-
)
|
81 |
-
|
82 |
-
val_filelist = make_pair(
|
83 |
-
os.path.join(dataset_dir, "validation/mixtures"),
|
84 |
-
os.path.join(dataset_dir, "validation/instruments"),
|
85 |
-
)
|
86 |
-
|
87 |
-
return train_filelist, val_filelist
|
88 |
-
|
89 |
-
|
90 |
-
def augment(X, y, reduction_rate, reduction_mask, mixup_rate, mixup_alpha):
|
91 |
-
perm = np.random.permutation(len(X))
|
92 |
-
for i, idx in enumerate(tqdm(perm)):
|
93 |
-
if np.random.uniform() < reduction_rate:
|
94 |
-
y[idx] = spec_utils.reduce_vocal_aggressively(
|
95 |
-
X[idx], y[idx], reduction_mask
|
96 |
-
)
|
97 |
-
|
98 |
-
if np.random.uniform() < 0.5:
|
99 |
-
# swap channel
|
100 |
-
X[idx] = X[idx, ::-1]
|
101 |
-
y[idx] = y[idx, ::-1]
|
102 |
-
if np.random.uniform() < 0.02:
|
103 |
-
# mono
|
104 |
-
X[idx] = X[idx].mean(axis=0, keepdims=True)
|
105 |
-
y[idx] = y[idx].mean(axis=0, keepdims=True)
|
106 |
-
if np.random.uniform() < 0.02:
|
107 |
-
# inst
|
108 |
-
X[idx] = y[idx]
|
109 |
-
|
110 |
-
if np.random.uniform() < mixup_rate and i < len(perm) - 1:
|
111 |
-
lam = np.random.beta(mixup_alpha, mixup_alpha)
|
112 |
-
X[idx] = lam * X[idx] + (1 - lam) * X[perm[i + 1]]
|
113 |
-
y[idx] = lam * y[idx] + (1 - lam) * y[perm[i + 1]]
|
114 |
-
|
115 |
-
return X, y
|
116 |
-
|
117 |
-
|
118 |
-
def make_padding(width, cropsize, offset):
|
119 |
-
left = offset
|
120 |
-
roi_size = cropsize - left * 2
|
121 |
-
if roi_size == 0:
|
122 |
-
roi_size = cropsize
|
123 |
-
right = roi_size - (width % roi_size) + left
|
124 |
-
|
125 |
-
return left, right, roi_size
|
126 |
-
|
127 |
-
|
128 |
-
def make_training_set(filelist, cropsize, patches, sr, hop_length, n_fft, offset):
|
129 |
-
len_dataset = patches * len(filelist)
|
130 |
-
|
131 |
-
X_dataset = np.zeros((len_dataset, 2, n_fft // 2 + 1, cropsize), dtype=np.complex64)
|
132 |
-
y_dataset = np.zeros((len_dataset, 2, n_fft // 2 + 1, cropsize), dtype=np.complex64)
|
133 |
-
|
134 |
-
for i, (X_path, y_path) in enumerate(tqdm(filelist)):
|
135 |
-
X, y = spec_utils.cache_or_load(X_path, y_path, sr, hop_length, n_fft)
|
136 |
-
coef = np.max([np.abs(X).max(), np.abs(y).max()])
|
137 |
-
X, y = X / coef, y / coef
|
138 |
-
|
139 |
-
l, r, roi_size = make_padding(X.shape[2], cropsize, offset)
|
140 |
-
X_pad = np.pad(X, ((0, 0), (0, 0), (l, r)), mode="constant")
|
141 |
-
y_pad = np.pad(y, ((0, 0), (0, 0), (l, r)), mode="constant")
|
142 |
-
|
143 |
-
starts = np.random.randint(0, X_pad.shape[2] - cropsize, patches)
|
144 |
-
ends = starts + cropsize
|
145 |
-
for j in range(patches):
|
146 |
-
idx = i * patches + j
|
147 |
-
X_dataset[idx] = X_pad[:, :, starts[j] : ends[j]]
|
148 |
-
y_dataset[idx] = y_pad[:, :, starts[j] : ends[j]]
|
149 |
-
|
150 |
-
return X_dataset, y_dataset
|
151 |
-
|
152 |
-
|
153 |
-
def make_validation_set(filelist, cropsize, sr, hop_length, n_fft, offset):
|
154 |
-
patch_list = []
|
155 |
-
patch_dir = "cs{}_sr{}_hl{}_nf{}_of{}".format(
|
156 |
-
cropsize, sr, hop_length, n_fft, offset
|
157 |
-
)
|
158 |
-
os.makedirs(patch_dir, exist_ok=True)
|
159 |
-
|
160 |
-
for i, (X_path, y_path) in enumerate(tqdm(filelist)):
|
161 |
-
basename = os.path.splitext(os.path.basename(X_path))[0]
|
162 |
-
|
163 |
-
X, y = spec_utils.cache_or_load(X_path, y_path, sr, hop_length, n_fft)
|
164 |
-
coef = np.max([np.abs(X).max(), np.abs(y).max()])
|
165 |
-
X, y = X / coef, y / coef
|
166 |
-
|
167 |
-
l, r, roi_size = make_padding(X.shape[2], cropsize, offset)
|
168 |
-
X_pad = np.pad(X, ((0, 0), (0, 0), (l, r)), mode="constant")
|
169 |
-
y_pad = np.pad(y, ((0, 0), (0, 0), (l, r)), mode="constant")
|
170 |
-
|
171 |
-
len_dataset = int(np.ceil(X.shape[2] / roi_size))
|
172 |
-
for j in range(len_dataset):
|
173 |
-
outpath = os.path.join(patch_dir, "{}_p{}.npz".format(basename, j))
|
174 |
-
start = j * roi_size
|
175 |
-
if not os.path.exists(outpath):
|
176 |
-
np.savez(
|
177 |
-
outpath,
|
178 |
-
X=X_pad[:, :, start : start + cropsize],
|
179 |
-
y=y_pad[:, :, start : start + cropsize],
|
180 |
-
)
|
181 |
-
patch_list.append(outpath)
|
182 |
-
|
183 |
-
return VocalRemoverValidationSet(patch_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/layers.py
DELETED
@@ -1,118 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn.functional as F
|
3 |
-
from torch import nn
|
4 |
-
|
5 |
-
from . import spec_utils
|
6 |
-
|
7 |
-
|
8 |
-
class Conv2DBNActiv(nn.Module):
|
9 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
10 |
-
super(Conv2DBNActiv, self).__init__()
|
11 |
-
self.conv = nn.Sequential(
|
12 |
-
nn.Conv2d(
|
13 |
-
nin,
|
14 |
-
nout,
|
15 |
-
kernel_size=ksize,
|
16 |
-
stride=stride,
|
17 |
-
padding=pad,
|
18 |
-
dilation=dilation,
|
19 |
-
bias=False,
|
20 |
-
),
|
21 |
-
nn.BatchNorm2d(nout),
|
22 |
-
activ(),
|
23 |
-
)
|
24 |
-
|
25 |
-
def __call__(self, x):
|
26 |
-
return self.conv(x)
|
27 |
-
|
28 |
-
|
29 |
-
class SeperableConv2DBNActiv(nn.Module):
|
30 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
31 |
-
super(SeperableConv2DBNActiv, self).__init__()
|
32 |
-
self.conv = nn.Sequential(
|
33 |
-
nn.Conv2d(
|
34 |
-
nin,
|
35 |
-
nin,
|
36 |
-
kernel_size=ksize,
|
37 |
-
stride=stride,
|
38 |
-
padding=pad,
|
39 |
-
dilation=dilation,
|
40 |
-
groups=nin,
|
41 |
-
bias=False,
|
42 |
-
),
|
43 |
-
nn.Conv2d(nin, nout, kernel_size=1, bias=False),
|
44 |
-
nn.BatchNorm2d(nout),
|
45 |
-
activ(),
|
46 |
-
)
|
47 |
-
|
48 |
-
def __call__(self, x):
|
49 |
-
return self.conv(x)
|
50 |
-
|
51 |
-
|
52 |
-
class Encoder(nn.Module):
|
53 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
|
54 |
-
super(Encoder, self).__init__()
|
55 |
-
self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
56 |
-
self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
|
57 |
-
|
58 |
-
def __call__(self, x):
|
59 |
-
skip = self.conv1(x)
|
60 |
-
h = self.conv2(skip)
|
61 |
-
|
62 |
-
return h, skip
|
63 |
-
|
64 |
-
|
65 |
-
class Decoder(nn.Module):
|
66 |
-
def __init__(
|
67 |
-
self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False
|
68 |
-
):
|
69 |
-
super(Decoder, self).__init__()
|
70 |
-
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
71 |
-
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
72 |
-
|
73 |
-
def __call__(self, x, skip=None):
|
74 |
-
x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True)
|
75 |
-
if skip is not None:
|
76 |
-
skip = spec_utils.crop_center(skip, x)
|
77 |
-
x = torch.cat([x, skip], dim=1)
|
78 |
-
h = self.conv(x)
|
79 |
-
|
80 |
-
if self.dropout is not None:
|
81 |
-
h = self.dropout(h)
|
82 |
-
|
83 |
-
return h
|
84 |
-
|
85 |
-
|
86 |
-
class ASPPModule(nn.Module):
|
87 |
-
def __init__(self, nin, nout, dilations=(4, 8, 16), activ=nn.ReLU):
|
88 |
-
super(ASPPModule, self).__init__()
|
89 |
-
self.conv1 = nn.Sequential(
|
90 |
-
nn.AdaptiveAvgPool2d((1, None)),
|
91 |
-
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ),
|
92 |
-
)
|
93 |
-
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
94 |
-
self.conv3 = SeperableConv2DBNActiv(
|
95 |
-
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ
|
96 |
-
)
|
97 |
-
self.conv4 = SeperableConv2DBNActiv(
|
98 |
-
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ
|
99 |
-
)
|
100 |
-
self.conv5 = SeperableConv2DBNActiv(
|
101 |
-
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
102 |
-
)
|
103 |
-
self.bottleneck = nn.Sequential(
|
104 |
-
Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1)
|
105 |
-
)
|
106 |
-
|
107 |
-
def forward(self, x):
|
108 |
-
_, _, h, w = x.size()
|
109 |
-
feat1 = F.interpolate(
|
110 |
-
self.conv1(x), size=(h, w), mode="bilinear", align_corners=True
|
111 |
-
)
|
112 |
-
feat2 = self.conv2(x)
|
113 |
-
feat3 = self.conv3(x)
|
114 |
-
feat4 = self.conv4(x)
|
115 |
-
feat5 = self.conv5(x)
|
116 |
-
out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
|
117 |
-
bottle = self.bottleneck(out)
|
118 |
-
return bottle
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/layers_123812KB .py
DELETED
@@ -1,118 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn.functional as F
|
3 |
-
from torch import nn
|
4 |
-
|
5 |
-
from . import spec_utils
|
6 |
-
|
7 |
-
|
8 |
-
class Conv2DBNActiv(nn.Module):
|
9 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
10 |
-
super(Conv2DBNActiv, self).__init__()
|
11 |
-
self.conv = nn.Sequential(
|
12 |
-
nn.Conv2d(
|
13 |
-
nin,
|
14 |
-
nout,
|
15 |
-
kernel_size=ksize,
|
16 |
-
stride=stride,
|
17 |
-
padding=pad,
|
18 |
-
dilation=dilation,
|
19 |
-
bias=False,
|
20 |
-
),
|
21 |
-
nn.BatchNorm2d(nout),
|
22 |
-
activ(),
|
23 |
-
)
|
24 |
-
|
25 |
-
def __call__(self, x):
|
26 |
-
return self.conv(x)
|
27 |
-
|
28 |
-
|
29 |
-
class SeperableConv2DBNActiv(nn.Module):
|
30 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
31 |
-
super(SeperableConv2DBNActiv, self).__init__()
|
32 |
-
self.conv = nn.Sequential(
|
33 |
-
nn.Conv2d(
|
34 |
-
nin,
|
35 |
-
nin,
|
36 |
-
kernel_size=ksize,
|
37 |
-
stride=stride,
|
38 |
-
padding=pad,
|
39 |
-
dilation=dilation,
|
40 |
-
groups=nin,
|
41 |
-
bias=False,
|
42 |
-
),
|
43 |
-
nn.Conv2d(nin, nout, kernel_size=1, bias=False),
|
44 |
-
nn.BatchNorm2d(nout),
|
45 |
-
activ(),
|
46 |
-
)
|
47 |
-
|
48 |
-
def __call__(self, x):
|
49 |
-
return self.conv(x)
|
50 |
-
|
51 |
-
|
52 |
-
class Encoder(nn.Module):
|
53 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
|
54 |
-
super(Encoder, self).__init__()
|
55 |
-
self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
56 |
-
self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
|
57 |
-
|
58 |
-
def __call__(self, x):
|
59 |
-
skip = self.conv1(x)
|
60 |
-
h = self.conv2(skip)
|
61 |
-
|
62 |
-
return h, skip
|
63 |
-
|
64 |
-
|
65 |
-
class Decoder(nn.Module):
|
66 |
-
def __init__(
|
67 |
-
self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False
|
68 |
-
):
|
69 |
-
super(Decoder, self).__init__()
|
70 |
-
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
71 |
-
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
72 |
-
|
73 |
-
def __call__(self, x, skip=None):
|
74 |
-
x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True)
|
75 |
-
if skip is not None:
|
76 |
-
skip = spec_utils.crop_center(skip, x)
|
77 |
-
x = torch.cat([x, skip], dim=1)
|
78 |
-
h = self.conv(x)
|
79 |
-
|
80 |
-
if self.dropout is not None:
|
81 |
-
h = self.dropout(h)
|
82 |
-
|
83 |
-
return h
|
84 |
-
|
85 |
-
|
86 |
-
class ASPPModule(nn.Module):
|
87 |
-
def __init__(self, nin, nout, dilations=(4, 8, 16), activ=nn.ReLU):
|
88 |
-
super(ASPPModule, self).__init__()
|
89 |
-
self.conv1 = nn.Sequential(
|
90 |
-
nn.AdaptiveAvgPool2d((1, None)),
|
91 |
-
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ),
|
92 |
-
)
|
93 |
-
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
94 |
-
self.conv3 = SeperableConv2DBNActiv(
|
95 |
-
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ
|
96 |
-
)
|
97 |
-
self.conv4 = SeperableConv2DBNActiv(
|
98 |
-
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ
|
99 |
-
)
|
100 |
-
self.conv5 = SeperableConv2DBNActiv(
|
101 |
-
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
102 |
-
)
|
103 |
-
self.bottleneck = nn.Sequential(
|
104 |
-
Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1)
|
105 |
-
)
|
106 |
-
|
107 |
-
def forward(self, x):
|
108 |
-
_, _, h, w = x.size()
|
109 |
-
feat1 = F.interpolate(
|
110 |
-
self.conv1(x), size=(h, w), mode="bilinear", align_corners=True
|
111 |
-
)
|
112 |
-
feat2 = self.conv2(x)
|
113 |
-
feat3 = self.conv3(x)
|
114 |
-
feat4 = self.conv4(x)
|
115 |
-
feat5 = self.conv5(x)
|
116 |
-
out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
|
117 |
-
bottle = self.bottleneck(out)
|
118 |
-
return bottle
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/layers_123821KB.py
DELETED
@@ -1,118 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn.functional as F
|
3 |
-
from torch import nn
|
4 |
-
|
5 |
-
from . import spec_utils
|
6 |
-
|
7 |
-
|
8 |
-
class Conv2DBNActiv(nn.Module):
|
9 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
10 |
-
super(Conv2DBNActiv, self).__init__()
|
11 |
-
self.conv = nn.Sequential(
|
12 |
-
nn.Conv2d(
|
13 |
-
nin,
|
14 |
-
nout,
|
15 |
-
kernel_size=ksize,
|
16 |
-
stride=stride,
|
17 |
-
padding=pad,
|
18 |
-
dilation=dilation,
|
19 |
-
bias=False,
|
20 |
-
),
|
21 |
-
nn.BatchNorm2d(nout),
|
22 |
-
activ(),
|
23 |
-
)
|
24 |
-
|
25 |
-
def __call__(self, x):
|
26 |
-
return self.conv(x)
|
27 |
-
|
28 |
-
|
29 |
-
class SeperableConv2DBNActiv(nn.Module):
|
30 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
31 |
-
super(SeperableConv2DBNActiv, self).__init__()
|
32 |
-
self.conv = nn.Sequential(
|
33 |
-
nn.Conv2d(
|
34 |
-
nin,
|
35 |
-
nin,
|
36 |
-
kernel_size=ksize,
|
37 |
-
stride=stride,
|
38 |
-
padding=pad,
|
39 |
-
dilation=dilation,
|
40 |
-
groups=nin,
|
41 |
-
bias=False,
|
42 |
-
),
|
43 |
-
nn.Conv2d(nin, nout, kernel_size=1, bias=False),
|
44 |
-
nn.BatchNorm2d(nout),
|
45 |
-
activ(),
|
46 |
-
)
|
47 |
-
|
48 |
-
def __call__(self, x):
|
49 |
-
return self.conv(x)
|
50 |
-
|
51 |
-
|
52 |
-
class Encoder(nn.Module):
|
53 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
|
54 |
-
super(Encoder, self).__init__()
|
55 |
-
self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
56 |
-
self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
|
57 |
-
|
58 |
-
def __call__(self, x):
|
59 |
-
skip = self.conv1(x)
|
60 |
-
h = self.conv2(skip)
|
61 |
-
|
62 |
-
return h, skip
|
63 |
-
|
64 |
-
|
65 |
-
class Decoder(nn.Module):
|
66 |
-
def __init__(
|
67 |
-
self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False
|
68 |
-
):
|
69 |
-
super(Decoder, self).__init__()
|
70 |
-
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
71 |
-
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
72 |
-
|
73 |
-
def __call__(self, x, skip=None):
|
74 |
-
x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True)
|
75 |
-
if skip is not None:
|
76 |
-
skip = spec_utils.crop_center(skip, x)
|
77 |
-
x = torch.cat([x, skip], dim=1)
|
78 |
-
h = self.conv(x)
|
79 |
-
|
80 |
-
if self.dropout is not None:
|
81 |
-
h = self.dropout(h)
|
82 |
-
|
83 |
-
return h
|
84 |
-
|
85 |
-
|
86 |
-
class ASPPModule(nn.Module):
|
87 |
-
def __init__(self, nin, nout, dilations=(4, 8, 16), activ=nn.ReLU):
|
88 |
-
super(ASPPModule, self).__init__()
|
89 |
-
self.conv1 = nn.Sequential(
|
90 |
-
nn.AdaptiveAvgPool2d((1, None)),
|
91 |
-
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ),
|
92 |
-
)
|
93 |
-
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
94 |
-
self.conv3 = SeperableConv2DBNActiv(
|
95 |
-
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ
|
96 |
-
)
|
97 |
-
self.conv4 = SeperableConv2DBNActiv(
|
98 |
-
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ
|
99 |
-
)
|
100 |
-
self.conv5 = SeperableConv2DBNActiv(
|
101 |
-
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
102 |
-
)
|
103 |
-
self.bottleneck = nn.Sequential(
|
104 |
-
Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1)
|
105 |
-
)
|
106 |
-
|
107 |
-
def forward(self, x):
|
108 |
-
_, _, h, w = x.size()
|
109 |
-
feat1 = F.interpolate(
|
110 |
-
self.conv1(x), size=(h, w), mode="bilinear", align_corners=True
|
111 |
-
)
|
112 |
-
feat2 = self.conv2(x)
|
113 |
-
feat3 = self.conv3(x)
|
114 |
-
feat4 = self.conv4(x)
|
115 |
-
feat5 = self.conv5(x)
|
116 |
-
out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
|
117 |
-
bottle = self.bottleneck(out)
|
118 |
-
return bottle
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/layers_33966KB.py
DELETED
@@ -1,126 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn.functional as F
|
3 |
-
from torch import nn
|
4 |
-
|
5 |
-
from . import spec_utils
|
6 |
-
|
7 |
-
|
8 |
-
class Conv2DBNActiv(nn.Module):
|
9 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
10 |
-
super(Conv2DBNActiv, self).__init__()
|
11 |
-
self.conv = nn.Sequential(
|
12 |
-
nn.Conv2d(
|
13 |
-
nin,
|
14 |
-
nout,
|
15 |
-
kernel_size=ksize,
|
16 |
-
stride=stride,
|
17 |
-
padding=pad,
|
18 |
-
dilation=dilation,
|
19 |
-
bias=False,
|
20 |
-
),
|
21 |
-
nn.BatchNorm2d(nout),
|
22 |
-
activ(),
|
23 |
-
)
|
24 |
-
|
25 |
-
def __call__(self, x):
|
26 |
-
return self.conv(x)
|
27 |
-
|
28 |
-
|
29 |
-
class SeperableConv2DBNActiv(nn.Module):
|
30 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
31 |
-
super(SeperableConv2DBNActiv, self).__init__()
|
32 |
-
self.conv = nn.Sequential(
|
33 |
-
nn.Conv2d(
|
34 |
-
nin,
|
35 |
-
nin,
|
36 |
-
kernel_size=ksize,
|
37 |
-
stride=stride,
|
38 |
-
padding=pad,
|
39 |
-
dilation=dilation,
|
40 |
-
groups=nin,
|
41 |
-
bias=False,
|
42 |
-
),
|
43 |
-
nn.Conv2d(nin, nout, kernel_size=1, bias=False),
|
44 |
-
nn.BatchNorm2d(nout),
|
45 |
-
activ(),
|
46 |
-
)
|
47 |
-
|
48 |
-
def __call__(self, x):
|
49 |
-
return self.conv(x)
|
50 |
-
|
51 |
-
|
52 |
-
class Encoder(nn.Module):
|
53 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
|
54 |
-
super(Encoder, self).__init__()
|
55 |
-
self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
56 |
-
self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
|
57 |
-
|
58 |
-
def __call__(self, x):
|
59 |
-
skip = self.conv1(x)
|
60 |
-
h = self.conv2(skip)
|
61 |
-
|
62 |
-
return h, skip
|
63 |
-
|
64 |
-
|
65 |
-
class Decoder(nn.Module):
|
66 |
-
def __init__(
|
67 |
-
self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False
|
68 |
-
):
|
69 |
-
super(Decoder, self).__init__()
|
70 |
-
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
71 |
-
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
72 |
-
|
73 |
-
def __call__(self, x, skip=None):
|
74 |
-
x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True)
|
75 |
-
if skip is not None:
|
76 |
-
skip = spec_utils.crop_center(skip, x)
|
77 |
-
x = torch.cat([x, skip], dim=1)
|
78 |
-
h = self.conv(x)
|
79 |
-
|
80 |
-
if self.dropout is not None:
|
81 |
-
h = self.dropout(h)
|
82 |
-
|
83 |
-
return h
|
84 |
-
|
85 |
-
|
86 |
-
class ASPPModule(nn.Module):
|
87 |
-
def __init__(self, nin, nout, dilations=(4, 8, 16, 32, 64), activ=nn.ReLU):
|
88 |
-
super(ASPPModule, self).__init__()
|
89 |
-
self.conv1 = nn.Sequential(
|
90 |
-
nn.AdaptiveAvgPool2d((1, None)),
|
91 |
-
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ),
|
92 |
-
)
|
93 |
-
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
94 |
-
self.conv3 = SeperableConv2DBNActiv(
|
95 |
-
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ
|
96 |
-
)
|
97 |
-
self.conv4 = SeperableConv2DBNActiv(
|
98 |
-
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ
|
99 |
-
)
|
100 |
-
self.conv5 = SeperableConv2DBNActiv(
|
101 |
-
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
102 |
-
)
|
103 |
-
self.conv6 = SeperableConv2DBNActiv(
|
104 |
-
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
105 |
-
)
|
106 |
-
self.conv7 = SeperableConv2DBNActiv(
|
107 |
-
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
108 |
-
)
|
109 |
-
self.bottleneck = nn.Sequential(
|
110 |
-
Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1)
|
111 |
-
)
|
112 |
-
|
113 |
-
def forward(self, x):
|
114 |
-
_, _, h, w = x.size()
|
115 |
-
feat1 = F.interpolate(
|
116 |
-
self.conv1(x), size=(h, w), mode="bilinear", align_corners=True
|
117 |
-
)
|
118 |
-
feat2 = self.conv2(x)
|
119 |
-
feat3 = self.conv3(x)
|
120 |
-
feat4 = self.conv4(x)
|
121 |
-
feat5 = self.conv5(x)
|
122 |
-
feat6 = self.conv6(x)
|
123 |
-
feat7 = self.conv7(x)
|
124 |
-
out = torch.cat((feat1, feat2, feat3, feat4, feat5, feat6, feat7), dim=1)
|
125 |
-
bottle = self.bottleneck(out)
|
126 |
-
return bottle
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/layers_537227KB.py
DELETED
@@ -1,126 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn.functional as F
|
3 |
-
from torch import nn
|
4 |
-
|
5 |
-
from . import spec_utils
|
6 |
-
|
7 |
-
|
8 |
-
class Conv2DBNActiv(nn.Module):
|
9 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
10 |
-
super(Conv2DBNActiv, self).__init__()
|
11 |
-
self.conv = nn.Sequential(
|
12 |
-
nn.Conv2d(
|
13 |
-
nin,
|
14 |
-
nout,
|
15 |
-
kernel_size=ksize,
|
16 |
-
stride=stride,
|
17 |
-
padding=pad,
|
18 |
-
dilation=dilation,
|
19 |
-
bias=False,
|
20 |
-
),
|
21 |
-
nn.BatchNorm2d(nout),
|
22 |
-
activ(),
|
23 |
-
)
|
24 |
-
|
25 |
-
def __call__(self, x):
|
26 |
-
return self.conv(x)
|
27 |
-
|
28 |
-
|
29 |
-
class SeperableConv2DBNActiv(nn.Module):
|
30 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
31 |
-
super(SeperableConv2DBNActiv, self).__init__()
|
32 |
-
self.conv = nn.Sequential(
|
33 |
-
nn.Conv2d(
|
34 |
-
nin,
|
35 |
-
nin,
|
36 |
-
kernel_size=ksize,
|
37 |
-
stride=stride,
|
38 |
-
padding=pad,
|
39 |
-
dilation=dilation,
|
40 |
-
groups=nin,
|
41 |
-
bias=False,
|
42 |
-
),
|
43 |
-
nn.Conv2d(nin, nout, kernel_size=1, bias=False),
|
44 |
-
nn.BatchNorm2d(nout),
|
45 |
-
activ(),
|
46 |
-
)
|
47 |
-
|
48 |
-
def __call__(self, x):
|
49 |
-
return self.conv(x)
|
50 |
-
|
51 |
-
|
52 |
-
class Encoder(nn.Module):
|
53 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
|
54 |
-
super(Encoder, self).__init__()
|
55 |
-
self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
56 |
-
self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
|
57 |
-
|
58 |
-
def __call__(self, x):
|
59 |
-
skip = self.conv1(x)
|
60 |
-
h = self.conv2(skip)
|
61 |
-
|
62 |
-
return h, skip
|
63 |
-
|
64 |
-
|
65 |
-
class Decoder(nn.Module):
|
66 |
-
def __init__(
|
67 |
-
self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False
|
68 |
-
):
|
69 |
-
super(Decoder, self).__init__()
|
70 |
-
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
71 |
-
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
72 |
-
|
73 |
-
def __call__(self, x, skip=None):
|
74 |
-
x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True)
|
75 |
-
if skip is not None:
|
76 |
-
skip = spec_utils.crop_center(skip, x)
|
77 |
-
x = torch.cat([x, skip], dim=1)
|
78 |
-
h = self.conv(x)
|
79 |
-
|
80 |
-
if self.dropout is not None:
|
81 |
-
h = self.dropout(h)
|
82 |
-
|
83 |
-
return h
|
84 |
-
|
85 |
-
|
86 |
-
class ASPPModule(nn.Module):
|
87 |
-
def __init__(self, nin, nout, dilations=(4, 8, 16, 32, 64), activ=nn.ReLU):
|
88 |
-
super(ASPPModule, self).__init__()
|
89 |
-
self.conv1 = nn.Sequential(
|
90 |
-
nn.AdaptiveAvgPool2d((1, None)),
|
91 |
-
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ),
|
92 |
-
)
|
93 |
-
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
94 |
-
self.conv3 = SeperableConv2DBNActiv(
|
95 |
-
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ
|
96 |
-
)
|
97 |
-
self.conv4 = SeperableConv2DBNActiv(
|
98 |
-
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ
|
99 |
-
)
|
100 |
-
self.conv5 = SeperableConv2DBNActiv(
|
101 |
-
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
102 |
-
)
|
103 |
-
self.conv6 = SeperableConv2DBNActiv(
|
104 |
-
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
105 |
-
)
|
106 |
-
self.conv7 = SeperableConv2DBNActiv(
|
107 |
-
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
108 |
-
)
|
109 |
-
self.bottleneck = nn.Sequential(
|
110 |
-
Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1)
|
111 |
-
)
|
112 |
-
|
113 |
-
def forward(self, x):
|
114 |
-
_, _, h, w = x.size()
|
115 |
-
feat1 = F.interpolate(
|
116 |
-
self.conv1(x), size=(h, w), mode="bilinear", align_corners=True
|
117 |
-
)
|
118 |
-
feat2 = self.conv2(x)
|
119 |
-
feat3 = self.conv3(x)
|
120 |
-
feat4 = self.conv4(x)
|
121 |
-
feat5 = self.conv5(x)
|
122 |
-
feat6 = self.conv6(x)
|
123 |
-
feat7 = self.conv7(x)
|
124 |
-
out = torch.cat((feat1, feat2, feat3, feat4, feat5, feat6, feat7), dim=1)
|
125 |
-
bottle = self.bottleneck(out)
|
126 |
-
return bottle
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/layers_537238KB.py
DELETED
@@ -1,126 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn.functional as F
|
3 |
-
from torch import nn
|
4 |
-
|
5 |
-
from . import spec_utils
|
6 |
-
|
7 |
-
|
8 |
-
class Conv2DBNActiv(nn.Module):
|
9 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
10 |
-
super(Conv2DBNActiv, self).__init__()
|
11 |
-
self.conv = nn.Sequential(
|
12 |
-
nn.Conv2d(
|
13 |
-
nin,
|
14 |
-
nout,
|
15 |
-
kernel_size=ksize,
|
16 |
-
stride=stride,
|
17 |
-
padding=pad,
|
18 |
-
dilation=dilation,
|
19 |
-
bias=False,
|
20 |
-
),
|
21 |
-
nn.BatchNorm2d(nout),
|
22 |
-
activ(),
|
23 |
-
)
|
24 |
-
|
25 |
-
def __call__(self, x):
|
26 |
-
return self.conv(x)
|
27 |
-
|
28 |
-
|
29 |
-
class SeperableConv2DBNActiv(nn.Module):
|
30 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
31 |
-
super(SeperableConv2DBNActiv, self).__init__()
|
32 |
-
self.conv = nn.Sequential(
|
33 |
-
nn.Conv2d(
|
34 |
-
nin,
|
35 |
-
nin,
|
36 |
-
kernel_size=ksize,
|
37 |
-
stride=stride,
|
38 |
-
padding=pad,
|
39 |
-
dilation=dilation,
|
40 |
-
groups=nin,
|
41 |
-
bias=False,
|
42 |
-
),
|
43 |
-
nn.Conv2d(nin, nout, kernel_size=1, bias=False),
|
44 |
-
nn.BatchNorm2d(nout),
|
45 |
-
activ(),
|
46 |
-
)
|
47 |
-
|
48 |
-
def __call__(self, x):
|
49 |
-
return self.conv(x)
|
50 |
-
|
51 |
-
|
52 |
-
class Encoder(nn.Module):
|
53 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
|
54 |
-
super(Encoder, self).__init__()
|
55 |
-
self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
56 |
-
self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
|
57 |
-
|
58 |
-
def __call__(self, x):
|
59 |
-
skip = self.conv1(x)
|
60 |
-
h = self.conv2(skip)
|
61 |
-
|
62 |
-
return h, skip
|
63 |
-
|
64 |
-
|
65 |
-
class Decoder(nn.Module):
|
66 |
-
def __init__(
|
67 |
-
self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False
|
68 |
-
):
|
69 |
-
super(Decoder, self).__init__()
|
70 |
-
self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
71 |
-
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
72 |
-
|
73 |
-
def __call__(self, x, skip=None):
|
74 |
-
x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True)
|
75 |
-
if skip is not None:
|
76 |
-
skip = spec_utils.crop_center(skip, x)
|
77 |
-
x = torch.cat([x, skip], dim=1)
|
78 |
-
h = self.conv(x)
|
79 |
-
|
80 |
-
if self.dropout is not None:
|
81 |
-
h = self.dropout(h)
|
82 |
-
|
83 |
-
return h
|
84 |
-
|
85 |
-
|
86 |
-
class ASPPModule(nn.Module):
|
87 |
-
def __init__(self, nin, nout, dilations=(4, 8, 16, 32, 64), activ=nn.ReLU):
|
88 |
-
super(ASPPModule, self).__init__()
|
89 |
-
self.conv1 = nn.Sequential(
|
90 |
-
nn.AdaptiveAvgPool2d((1, None)),
|
91 |
-
Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ),
|
92 |
-
)
|
93 |
-
self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
|
94 |
-
self.conv3 = SeperableConv2DBNActiv(
|
95 |
-
nin, nin, 3, 1, dilations[0], dilations[0], activ=activ
|
96 |
-
)
|
97 |
-
self.conv4 = SeperableConv2DBNActiv(
|
98 |
-
nin, nin, 3, 1, dilations[1], dilations[1], activ=activ
|
99 |
-
)
|
100 |
-
self.conv5 = SeperableConv2DBNActiv(
|
101 |
-
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
102 |
-
)
|
103 |
-
self.conv6 = SeperableConv2DBNActiv(
|
104 |
-
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
105 |
-
)
|
106 |
-
self.conv7 = SeperableConv2DBNActiv(
|
107 |
-
nin, nin, 3, 1, dilations[2], dilations[2], activ=activ
|
108 |
-
)
|
109 |
-
self.bottleneck = nn.Sequential(
|
110 |
-
Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ), nn.Dropout2d(0.1)
|
111 |
-
)
|
112 |
-
|
113 |
-
def forward(self, x):
|
114 |
-
_, _, h, w = x.size()
|
115 |
-
feat1 = F.interpolate(
|
116 |
-
self.conv1(x), size=(h, w), mode="bilinear", align_corners=True
|
117 |
-
)
|
118 |
-
feat2 = self.conv2(x)
|
119 |
-
feat3 = self.conv3(x)
|
120 |
-
feat4 = self.conv4(x)
|
121 |
-
feat5 = self.conv5(x)
|
122 |
-
feat6 = self.conv6(x)
|
123 |
-
feat7 = self.conv7(x)
|
124 |
-
out = torch.cat((feat1, feat2, feat3, feat4, feat5, feat6, feat7), dim=1)
|
125 |
-
bottle = self.bottleneck(out)
|
126 |
-
return bottle
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/layers_new.py
DELETED
@@ -1,125 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn.functional as F
|
3 |
-
from torch import nn
|
4 |
-
|
5 |
-
from . import spec_utils
|
6 |
-
|
7 |
-
|
8 |
-
class Conv2DBNActiv(nn.Module):
|
9 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
10 |
-
super(Conv2DBNActiv, self).__init__()
|
11 |
-
self.conv = nn.Sequential(
|
12 |
-
nn.Conv2d(
|
13 |
-
nin,
|
14 |
-
nout,
|
15 |
-
kernel_size=ksize,
|
16 |
-
stride=stride,
|
17 |
-
padding=pad,
|
18 |
-
dilation=dilation,
|
19 |
-
bias=False,
|
20 |
-
),
|
21 |
-
nn.BatchNorm2d(nout),
|
22 |
-
activ(),
|
23 |
-
)
|
24 |
-
|
25 |
-
def __call__(self, x):
|
26 |
-
return self.conv(x)
|
27 |
-
|
28 |
-
|
29 |
-
class Encoder(nn.Module):
|
30 |
-
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
|
31 |
-
super(Encoder, self).__init__()
|
32 |
-
self.conv1 = Conv2DBNActiv(nin, nout, ksize, stride, pad, activ=activ)
|
33 |
-
self.conv2 = Conv2DBNActiv(nout, nout, ksize, 1, pad, activ=activ)
|
34 |
-
|
35 |
-
def __call__(self, x):
|
36 |
-
h = self.conv1(x)
|
37 |
-
h = self.conv2(h)
|
38 |
-
|
39 |
-
return h
|
40 |
-
|
41 |
-
|
42 |
-
class Decoder(nn.Module):
|
43 |
-
def __init__(
|
44 |
-
self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False
|
45 |
-
):
|
46 |
-
super(Decoder, self).__init__()
|
47 |
-
self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
48 |
-
# self.conv2 = Conv2DBNActiv(nout, nout, ksize, 1, pad, activ=activ)
|
49 |
-
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
50 |
-
|
51 |
-
def __call__(self, x, skip=None):
|
52 |
-
x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True)
|
53 |
-
|
54 |
-
if skip is not None:
|
55 |
-
skip = spec_utils.crop_center(skip, x)
|
56 |
-
x = torch.cat([x, skip], dim=1)
|
57 |
-
|
58 |
-
h = self.conv1(x)
|
59 |
-
# h = self.conv2(h)
|
60 |
-
|
61 |
-
if self.dropout is not None:
|
62 |
-
h = self.dropout(h)
|
63 |
-
|
64 |
-
return h
|
65 |
-
|
66 |
-
|
67 |
-
class ASPPModule(nn.Module):
|
68 |
-
def __init__(self, nin, nout, dilations=(4, 8, 12), activ=nn.ReLU, dropout=False):
|
69 |
-
super(ASPPModule, self).__init__()
|
70 |
-
self.conv1 = nn.Sequential(
|
71 |
-
nn.AdaptiveAvgPool2d((1, None)),
|
72 |
-
Conv2DBNActiv(nin, nout, 1, 1, 0, activ=activ),
|
73 |
-
)
|
74 |
-
self.conv2 = Conv2DBNActiv(nin, nout, 1, 1, 0, activ=activ)
|
75 |
-
self.conv3 = Conv2DBNActiv(
|
76 |
-
nin, nout, 3, 1, dilations[0], dilations[0], activ=activ
|
77 |
-
)
|
78 |
-
self.conv4 = Conv2DBNActiv(
|
79 |
-
nin, nout, 3, 1, dilations[1], dilations[1], activ=activ
|
80 |
-
)
|
81 |
-
self.conv5 = Conv2DBNActiv(
|
82 |
-
nin, nout, 3, 1, dilations[2], dilations[2], activ=activ
|
83 |
-
)
|
84 |
-
self.bottleneck = Conv2DBNActiv(nout * 5, nout, 1, 1, 0, activ=activ)
|
85 |
-
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
86 |
-
|
87 |
-
def forward(self, x):
|
88 |
-
_, _, h, w = x.size()
|
89 |
-
feat1 = F.interpolate(
|
90 |
-
self.conv1(x), size=(h, w), mode="bilinear", align_corners=True
|
91 |
-
)
|
92 |
-
feat2 = self.conv2(x)
|
93 |
-
feat3 = self.conv3(x)
|
94 |
-
feat4 = self.conv4(x)
|
95 |
-
feat5 = self.conv5(x)
|
96 |
-
out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
|
97 |
-
out = self.bottleneck(out)
|
98 |
-
|
99 |
-
if self.dropout is not None:
|
100 |
-
out = self.dropout(out)
|
101 |
-
|
102 |
-
return out
|
103 |
-
|
104 |
-
|
105 |
-
class LSTMModule(nn.Module):
|
106 |
-
def __init__(self, nin_conv, nin_lstm, nout_lstm):
|
107 |
-
super(LSTMModule, self).__init__()
|
108 |
-
self.conv = Conv2DBNActiv(nin_conv, 1, 1, 1, 0)
|
109 |
-
self.lstm = nn.LSTM(
|
110 |
-
input_size=nin_lstm, hidden_size=nout_lstm // 2, bidirectional=True
|
111 |
-
)
|
112 |
-
self.dense = nn.Sequential(
|
113 |
-
nn.Linear(nout_lstm, nin_lstm), nn.BatchNorm1d(nin_lstm), nn.ReLU()
|
114 |
-
)
|
115 |
-
|
116 |
-
def forward(self, x):
|
117 |
-
N, _, nbins, nframes = x.size()
|
118 |
-
h = self.conv(x)[:, 0] # N, nbins, nframes
|
119 |
-
h = h.permute(2, 0, 1) # nframes, N, nbins
|
120 |
-
h, _ = self.lstm(h)
|
121 |
-
h = self.dense(h.reshape(-1, h.size()[-1])) # nframes * N, nbins
|
122 |
-
h = h.reshape(nframes, N, 1, nbins)
|
123 |
-
h = h.permute(1, 2, 3, 0)
|
124 |
-
|
125 |
-
return h
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/model_param_init.py
DELETED
@@ -1,69 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
import os
|
3 |
-
import pathlib
|
4 |
-
|
5 |
-
default_param = {}
|
6 |
-
default_param["bins"] = 768
|
7 |
-
default_param["unstable_bins"] = 9 # training only
|
8 |
-
default_param["reduction_bins"] = 762 # training only
|
9 |
-
default_param["sr"] = 44100
|
10 |
-
default_param["pre_filter_start"] = 757
|
11 |
-
default_param["pre_filter_stop"] = 768
|
12 |
-
default_param["band"] = {}
|
13 |
-
|
14 |
-
|
15 |
-
default_param["band"][1] = {
|
16 |
-
"sr": 11025,
|
17 |
-
"hl": 128,
|
18 |
-
"n_fft": 960,
|
19 |
-
"crop_start": 0,
|
20 |
-
"crop_stop": 245,
|
21 |
-
"lpf_start": 61, # inference only
|
22 |
-
"res_type": "polyphase",
|
23 |
-
}
|
24 |
-
|
25 |
-
default_param["band"][2] = {
|
26 |
-
"sr": 44100,
|
27 |
-
"hl": 512,
|
28 |
-
"n_fft": 1536,
|
29 |
-
"crop_start": 24,
|
30 |
-
"crop_stop": 547,
|
31 |
-
"hpf_start": 81, # inference only
|
32 |
-
"res_type": "sinc_best",
|
33 |
-
}
|
34 |
-
|
35 |
-
|
36 |
-
def int_keys(d):
|
37 |
-
r = {}
|
38 |
-
for k, v in d:
|
39 |
-
if k.isdigit():
|
40 |
-
k = int(k)
|
41 |
-
r[k] = v
|
42 |
-
return r
|
43 |
-
|
44 |
-
|
45 |
-
class ModelParameters(object):
|
46 |
-
def __init__(self, config_path=""):
|
47 |
-
if ".pth" == pathlib.Path(config_path).suffix:
|
48 |
-
import zipfile
|
49 |
-
|
50 |
-
with zipfile.ZipFile(config_path, "r") as zip:
|
51 |
-
self.param = json.loads(
|
52 |
-
zip.read("param.json"), object_pairs_hook=int_keys
|
53 |
-
)
|
54 |
-
elif ".json" == pathlib.Path(config_path).suffix:
|
55 |
-
with open(config_path, "r") as f:
|
56 |
-
self.param = json.loads(f.read(), object_pairs_hook=int_keys)
|
57 |
-
else:
|
58 |
-
self.param = default_param
|
59 |
-
|
60 |
-
for k in [
|
61 |
-
"mid_side",
|
62 |
-
"mid_side_b",
|
63 |
-
"mid_side_b2",
|
64 |
-
"stereo_w",
|
65 |
-
"stereo_n",
|
66 |
-
"reverse",
|
67 |
-
]:
|
68 |
-
if not k in self.param:
|
69 |
-
self.param[k] = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/1band_sr16000_hl512.json
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bins": 1024,
|
3 |
-
"unstable_bins": 0,
|
4 |
-
"reduction_bins": 0,
|
5 |
-
"band": {
|
6 |
-
"1": {
|
7 |
-
"sr": 16000,
|
8 |
-
"hl": 512,
|
9 |
-
"n_fft": 2048,
|
10 |
-
"crop_start": 0,
|
11 |
-
"crop_stop": 1024,
|
12 |
-
"hpf_start": -1,
|
13 |
-
"res_type": "sinc_best"
|
14 |
-
}
|
15 |
-
},
|
16 |
-
"sr": 16000,
|
17 |
-
"pre_filter_start": 1023,
|
18 |
-
"pre_filter_stop": 1024
|
19 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bins": 1024,
|
3 |
-
"unstable_bins": 0,
|
4 |
-
"reduction_bins": 0,
|
5 |
-
"band": {
|
6 |
-
"1": {
|
7 |
-
"sr": 32000,
|
8 |
-
"hl": 512,
|
9 |
-
"n_fft": 2048,
|
10 |
-
"crop_start": 0,
|
11 |
-
"crop_stop": 1024,
|
12 |
-
"hpf_start": -1,
|
13 |
-
"res_type": "kaiser_fast"
|
14 |
-
}
|
15 |
-
},
|
16 |
-
"sr": 32000,
|
17 |
-
"pre_filter_start": 1000,
|
18 |
-
"pre_filter_stop": 1021
|
19 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/1band_sr33075_hl384.json
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bins": 1024,
|
3 |
-
"unstable_bins": 0,
|
4 |
-
"reduction_bins": 0,
|
5 |
-
"band": {
|
6 |
-
"1": {
|
7 |
-
"sr": 33075,
|
8 |
-
"hl": 384,
|
9 |
-
"n_fft": 2048,
|
10 |
-
"crop_start": 0,
|
11 |
-
"crop_stop": 1024,
|
12 |
-
"hpf_start": -1,
|
13 |
-
"res_type": "sinc_best"
|
14 |
-
}
|
15 |
-
},
|
16 |
-
"sr": 33075,
|
17 |
-
"pre_filter_start": 1000,
|
18 |
-
"pre_filter_stop": 1021
|
19 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl1024.json
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bins": 1024,
|
3 |
-
"unstable_bins": 0,
|
4 |
-
"reduction_bins": 0,
|
5 |
-
"band": {
|
6 |
-
"1": {
|
7 |
-
"sr": 44100,
|
8 |
-
"hl": 1024,
|
9 |
-
"n_fft": 2048,
|
10 |
-
"crop_start": 0,
|
11 |
-
"crop_stop": 1024,
|
12 |
-
"hpf_start": -1,
|
13 |
-
"res_type": "sinc_best"
|
14 |
-
}
|
15 |
-
},
|
16 |
-
"sr": 44100,
|
17 |
-
"pre_filter_start": 1023,
|
18 |
-
"pre_filter_stop": 1024
|
19 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl256.json
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bins": 256,
|
3 |
-
"unstable_bins": 0,
|
4 |
-
"reduction_bins": 0,
|
5 |
-
"band": {
|
6 |
-
"1": {
|
7 |
-
"sr": 44100,
|
8 |
-
"hl": 256,
|
9 |
-
"n_fft": 512,
|
10 |
-
"crop_start": 0,
|
11 |
-
"crop_stop": 256,
|
12 |
-
"hpf_start": -1,
|
13 |
-
"res_type": "sinc_best"
|
14 |
-
}
|
15 |
-
},
|
16 |
-
"sr": 44100,
|
17 |
-
"pre_filter_start": 256,
|
18 |
-
"pre_filter_stop": 256
|
19 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl512.json
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bins": 1024,
|
3 |
-
"unstable_bins": 0,
|
4 |
-
"reduction_bins": 0,
|
5 |
-
"band": {
|
6 |
-
"1": {
|
7 |
-
"sr": 44100,
|
8 |
-
"hl": 512,
|
9 |
-
"n_fft": 2048,
|
10 |
-
"crop_start": 0,
|
11 |
-
"crop_stop": 1024,
|
12 |
-
"hpf_start": -1,
|
13 |
-
"res_type": "sinc_best"
|
14 |
-
}
|
15 |
-
},
|
16 |
-
"sr": 44100,
|
17 |
-
"pre_filter_start": 1023,
|
18 |
-
"pre_filter_stop": 1024
|
19 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl512_cut.json
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bins": 1024,
|
3 |
-
"unstable_bins": 0,
|
4 |
-
"reduction_bins": 0,
|
5 |
-
"band": {
|
6 |
-
"1": {
|
7 |
-
"sr": 44100,
|
8 |
-
"hl": 512,
|
9 |
-
"n_fft": 2048,
|
10 |
-
"crop_start": 0,
|
11 |
-
"crop_stop": 700,
|
12 |
-
"hpf_start": -1,
|
13 |
-
"res_type": "sinc_best"
|
14 |
-
}
|
15 |
-
},
|
16 |
-
"sr": 44100,
|
17 |
-
"pre_filter_start": 1023,
|
18 |
-
"pre_filter_stop": 700
|
19 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/2band_32000.json
DELETED
@@ -1,30 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bins": 768,
|
3 |
-
"unstable_bins": 7,
|
4 |
-
"reduction_bins": 705,
|
5 |
-
"band": {
|
6 |
-
"1": {
|
7 |
-
"sr": 6000,
|
8 |
-
"hl": 66,
|
9 |
-
"n_fft": 512,
|
10 |
-
"crop_start": 0,
|
11 |
-
"crop_stop": 240,
|
12 |
-
"lpf_start": 60,
|
13 |
-
"lpf_stop": 118,
|
14 |
-
"res_type": "sinc_fastest"
|
15 |
-
},
|
16 |
-
"2": {
|
17 |
-
"sr": 32000,
|
18 |
-
"hl": 352,
|
19 |
-
"n_fft": 1024,
|
20 |
-
"crop_start": 22,
|
21 |
-
"crop_stop": 505,
|
22 |
-
"hpf_start": 44,
|
23 |
-
"hpf_stop": 23,
|
24 |
-
"res_type": "sinc_medium"
|
25 |
-
}
|
26 |
-
},
|
27 |
-
"sr": 32000,
|
28 |
-
"pre_filter_start": 710,
|
29 |
-
"pre_filter_stop": 731
|
30 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/2band_44100_lofi.json
DELETED
@@ -1,30 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bins": 512,
|
3 |
-
"unstable_bins": 7,
|
4 |
-
"reduction_bins": 510,
|
5 |
-
"band": {
|
6 |
-
"1": {
|
7 |
-
"sr": 11025,
|
8 |
-
"hl": 160,
|
9 |
-
"n_fft": 768,
|
10 |
-
"crop_start": 0,
|
11 |
-
"crop_stop": 192,
|
12 |
-
"lpf_start": 41,
|
13 |
-
"lpf_stop": 139,
|
14 |
-
"res_type": "sinc_fastest"
|
15 |
-
},
|
16 |
-
"2": {
|
17 |
-
"sr": 44100,
|
18 |
-
"hl": 640,
|
19 |
-
"n_fft": 1024,
|
20 |
-
"crop_start": 10,
|
21 |
-
"crop_stop": 320,
|
22 |
-
"hpf_start": 47,
|
23 |
-
"hpf_stop": 15,
|
24 |
-
"res_type": "sinc_medium"
|
25 |
-
}
|
26 |
-
},
|
27 |
-
"sr": 44100,
|
28 |
-
"pre_filter_start": 510,
|
29 |
-
"pre_filter_stop": 512
|
30 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/2band_48000.json
DELETED
@@ -1,30 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bins": 768,
|
3 |
-
"unstable_bins": 7,
|
4 |
-
"reduction_bins": 705,
|
5 |
-
"band": {
|
6 |
-
"1": {
|
7 |
-
"sr": 6000,
|
8 |
-
"hl": 66,
|
9 |
-
"n_fft": 512,
|
10 |
-
"crop_start": 0,
|
11 |
-
"crop_stop": 240,
|
12 |
-
"lpf_start": 60,
|
13 |
-
"lpf_stop": 240,
|
14 |
-
"res_type": "sinc_fastest"
|
15 |
-
},
|
16 |
-
"2": {
|
17 |
-
"sr": 48000,
|
18 |
-
"hl": 528,
|
19 |
-
"n_fft": 1536,
|
20 |
-
"crop_start": 22,
|
21 |
-
"crop_stop": 505,
|
22 |
-
"hpf_start": 82,
|
23 |
-
"hpf_stop": 22,
|
24 |
-
"res_type": "sinc_medium"
|
25 |
-
}
|
26 |
-
},
|
27 |
-
"sr": 48000,
|
28 |
-
"pre_filter_start": 710,
|
29 |
-
"pre_filter_stop": 731
|
30 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/3band_44100.json
DELETED
@@ -1,42 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bins": 768,
|
3 |
-
"unstable_bins": 5,
|
4 |
-
"reduction_bins": 733,
|
5 |
-
"band": {
|
6 |
-
"1": {
|
7 |
-
"sr": 11025,
|
8 |
-
"hl": 128,
|
9 |
-
"n_fft": 768,
|
10 |
-
"crop_start": 0,
|
11 |
-
"crop_stop": 278,
|
12 |
-
"lpf_start": 28,
|
13 |
-
"lpf_stop": 140,
|
14 |
-
"res_type": "polyphase"
|
15 |
-
},
|
16 |
-
"2": {
|
17 |
-
"sr": 22050,
|
18 |
-
"hl": 256,
|
19 |
-
"n_fft": 768,
|
20 |
-
"crop_start": 14,
|
21 |
-
"crop_stop": 322,
|
22 |
-
"hpf_start": 70,
|
23 |
-
"hpf_stop": 14,
|
24 |
-
"lpf_start": 283,
|
25 |
-
"lpf_stop": 314,
|
26 |
-
"res_type": "polyphase"
|
27 |
-
},
|
28 |
-
"3": {
|
29 |
-
"sr": 44100,
|
30 |
-
"hl": 512,
|
31 |
-
"n_fft": 768,
|
32 |
-
"crop_start": 131,
|
33 |
-
"crop_stop": 313,
|
34 |
-
"hpf_start": 154,
|
35 |
-
"hpf_stop": 141,
|
36 |
-
"res_type": "sinc_medium"
|
37 |
-
}
|
38 |
-
},
|
39 |
-
"sr": 44100,
|
40 |
-
"pre_filter_start": 757,
|
41 |
-
"pre_filter_stop": 768
|
42 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/3band_44100_mid.json
DELETED
@@ -1,43 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"mid_side": true,
|
3 |
-
"bins": 768,
|
4 |
-
"unstable_bins": 5,
|
5 |
-
"reduction_bins": 733,
|
6 |
-
"band": {
|
7 |
-
"1": {
|
8 |
-
"sr": 11025,
|
9 |
-
"hl": 128,
|
10 |
-
"n_fft": 768,
|
11 |
-
"crop_start": 0,
|
12 |
-
"crop_stop": 278,
|
13 |
-
"lpf_start": 28,
|
14 |
-
"lpf_stop": 140,
|
15 |
-
"res_type": "polyphase"
|
16 |
-
},
|
17 |
-
"2": {
|
18 |
-
"sr": 22050,
|
19 |
-
"hl": 256,
|
20 |
-
"n_fft": 768,
|
21 |
-
"crop_start": 14,
|
22 |
-
"crop_stop": 322,
|
23 |
-
"hpf_start": 70,
|
24 |
-
"hpf_stop": 14,
|
25 |
-
"lpf_start": 283,
|
26 |
-
"lpf_stop": 314,
|
27 |
-
"res_type": "polyphase"
|
28 |
-
},
|
29 |
-
"3": {
|
30 |
-
"sr": 44100,
|
31 |
-
"hl": 512,
|
32 |
-
"n_fft": 768,
|
33 |
-
"crop_start": 131,
|
34 |
-
"crop_stop": 313,
|
35 |
-
"hpf_start": 154,
|
36 |
-
"hpf_stop": 141,
|
37 |
-
"res_type": "sinc_medium"
|
38 |
-
}
|
39 |
-
},
|
40 |
-
"sr": 44100,
|
41 |
-
"pre_filter_start": 757,
|
42 |
-
"pre_filter_stop": 768
|
43 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/3band_44100_msb2.json
DELETED
@@ -1,43 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"mid_side_b2": true,
|
3 |
-
"bins": 640,
|
4 |
-
"unstable_bins": 7,
|
5 |
-
"reduction_bins": 565,
|
6 |
-
"band": {
|
7 |
-
"1": {
|
8 |
-
"sr": 11025,
|
9 |
-
"hl": 108,
|
10 |
-
"n_fft": 1024,
|
11 |
-
"crop_start": 0,
|
12 |
-
"crop_stop": 187,
|
13 |
-
"lpf_start": 92,
|
14 |
-
"lpf_stop": 186,
|
15 |
-
"res_type": "polyphase"
|
16 |
-
},
|
17 |
-
"2": {
|
18 |
-
"sr": 22050,
|
19 |
-
"hl": 216,
|
20 |
-
"n_fft": 768,
|
21 |
-
"crop_start": 0,
|
22 |
-
"crop_stop": 212,
|
23 |
-
"hpf_start": 68,
|
24 |
-
"hpf_stop": 34,
|
25 |
-
"lpf_start": 174,
|
26 |
-
"lpf_stop": 209,
|
27 |
-
"res_type": "polyphase"
|
28 |
-
},
|
29 |
-
"3": {
|
30 |
-
"sr": 44100,
|
31 |
-
"hl": 432,
|
32 |
-
"n_fft": 640,
|
33 |
-
"crop_start": 66,
|
34 |
-
"crop_stop": 307,
|
35 |
-
"hpf_start": 86,
|
36 |
-
"hpf_stop": 72,
|
37 |
-
"res_type": "kaiser_fast"
|
38 |
-
}
|
39 |
-
},
|
40 |
-
"sr": 44100,
|
41 |
-
"pre_filter_start": 639,
|
42 |
-
"pre_filter_stop": 640
|
43 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/4band_44100.json
DELETED
@@ -1,54 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bins": 768,
|
3 |
-
"unstable_bins": 7,
|
4 |
-
"reduction_bins": 668,
|
5 |
-
"band": {
|
6 |
-
"1": {
|
7 |
-
"sr": 11025,
|
8 |
-
"hl": 128,
|
9 |
-
"n_fft": 1024,
|
10 |
-
"crop_start": 0,
|
11 |
-
"crop_stop": 186,
|
12 |
-
"lpf_start": 37,
|
13 |
-
"lpf_stop": 73,
|
14 |
-
"res_type": "polyphase"
|
15 |
-
},
|
16 |
-
"2": {
|
17 |
-
"sr": 11025,
|
18 |
-
"hl": 128,
|
19 |
-
"n_fft": 512,
|
20 |
-
"crop_start": 4,
|
21 |
-
"crop_stop": 185,
|
22 |
-
"hpf_start": 36,
|
23 |
-
"hpf_stop": 18,
|
24 |
-
"lpf_start": 93,
|
25 |
-
"lpf_stop": 185,
|
26 |
-
"res_type": "polyphase"
|
27 |
-
},
|
28 |
-
"3": {
|
29 |
-
"sr": 22050,
|
30 |
-
"hl": 256,
|
31 |
-
"n_fft": 512,
|
32 |
-
"crop_start": 46,
|
33 |
-
"crop_stop": 186,
|
34 |
-
"hpf_start": 93,
|
35 |
-
"hpf_stop": 46,
|
36 |
-
"lpf_start": 164,
|
37 |
-
"lpf_stop": 186,
|
38 |
-
"res_type": "polyphase"
|
39 |
-
},
|
40 |
-
"4": {
|
41 |
-
"sr": 44100,
|
42 |
-
"hl": 512,
|
43 |
-
"n_fft": 768,
|
44 |
-
"crop_start": 121,
|
45 |
-
"crop_stop": 382,
|
46 |
-
"hpf_start": 138,
|
47 |
-
"hpf_stop": 123,
|
48 |
-
"res_type": "sinc_medium"
|
49 |
-
}
|
50 |
-
},
|
51 |
-
"sr": 44100,
|
52 |
-
"pre_filter_start": 740,
|
53 |
-
"pre_filter_stop": 768
|
54 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/4band_44100_mid.json
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bins": 768,
|
3 |
-
"unstable_bins": 7,
|
4 |
-
"mid_side": true,
|
5 |
-
"reduction_bins": 668,
|
6 |
-
"band": {
|
7 |
-
"1": {
|
8 |
-
"sr": 11025,
|
9 |
-
"hl": 128,
|
10 |
-
"n_fft": 1024,
|
11 |
-
"crop_start": 0,
|
12 |
-
"crop_stop": 186,
|
13 |
-
"lpf_start": 37,
|
14 |
-
"lpf_stop": 73,
|
15 |
-
"res_type": "polyphase"
|
16 |
-
},
|
17 |
-
"2": {
|
18 |
-
"sr": 11025,
|
19 |
-
"hl": 128,
|
20 |
-
"n_fft": 512,
|
21 |
-
"crop_start": 4,
|
22 |
-
"crop_stop": 185,
|
23 |
-
"hpf_start": 36,
|
24 |
-
"hpf_stop": 18,
|
25 |
-
"lpf_start": 93,
|
26 |
-
"lpf_stop": 185,
|
27 |
-
"res_type": "polyphase"
|
28 |
-
},
|
29 |
-
"3": {
|
30 |
-
"sr": 22050,
|
31 |
-
"hl": 256,
|
32 |
-
"n_fft": 512,
|
33 |
-
"crop_start": 46,
|
34 |
-
"crop_stop": 186,
|
35 |
-
"hpf_start": 93,
|
36 |
-
"hpf_stop": 46,
|
37 |
-
"lpf_start": 164,
|
38 |
-
"lpf_stop": 186,
|
39 |
-
"res_type": "polyphase"
|
40 |
-
},
|
41 |
-
"4": {
|
42 |
-
"sr": 44100,
|
43 |
-
"hl": 512,
|
44 |
-
"n_fft": 768,
|
45 |
-
"crop_start": 121,
|
46 |
-
"crop_stop": 382,
|
47 |
-
"hpf_start": 138,
|
48 |
-
"hpf_stop": 123,
|
49 |
-
"res_type": "sinc_medium"
|
50 |
-
}
|
51 |
-
},
|
52 |
-
"sr": 44100,
|
53 |
-
"pre_filter_start": 740,
|
54 |
-
"pre_filter_stop": 768
|
55 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/4band_44100_msb.json
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"mid_side_b": true,
|
3 |
-
"bins": 768,
|
4 |
-
"unstable_bins": 7,
|
5 |
-
"reduction_bins": 668,
|
6 |
-
"band": {
|
7 |
-
"1": {
|
8 |
-
"sr": 11025,
|
9 |
-
"hl": 128,
|
10 |
-
"n_fft": 1024,
|
11 |
-
"crop_start": 0,
|
12 |
-
"crop_stop": 186,
|
13 |
-
"lpf_start": 37,
|
14 |
-
"lpf_stop": 73,
|
15 |
-
"res_type": "polyphase"
|
16 |
-
},
|
17 |
-
"2": {
|
18 |
-
"sr": 11025,
|
19 |
-
"hl": 128,
|
20 |
-
"n_fft": 512,
|
21 |
-
"crop_start": 4,
|
22 |
-
"crop_stop": 185,
|
23 |
-
"hpf_start": 36,
|
24 |
-
"hpf_stop": 18,
|
25 |
-
"lpf_start": 93,
|
26 |
-
"lpf_stop": 185,
|
27 |
-
"res_type": "polyphase"
|
28 |
-
},
|
29 |
-
"3": {
|
30 |
-
"sr": 22050,
|
31 |
-
"hl": 256,
|
32 |
-
"n_fft": 512,
|
33 |
-
"crop_start": 46,
|
34 |
-
"crop_stop": 186,
|
35 |
-
"hpf_start": 93,
|
36 |
-
"hpf_stop": 46,
|
37 |
-
"lpf_start": 164,
|
38 |
-
"lpf_stop": 186,
|
39 |
-
"res_type": "polyphase"
|
40 |
-
},
|
41 |
-
"4": {
|
42 |
-
"sr": 44100,
|
43 |
-
"hl": 512,
|
44 |
-
"n_fft": 768,
|
45 |
-
"crop_start": 121,
|
46 |
-
"crop_stop": 382,
|
47 |
-
"hpf_start": 138,
|
48 |
-
"hpf_stop": 123,
|
49 |
-
"res_type": "sinc_medium"
|
50 |
-
}
|
51 |
-
},
|
52 |
-
"sr": 44100,
|
53 |
-
"pre_filter_start": 740,
|
54 |
-
"pre_filter_stop": 768
|
55 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/4band_44100_msb2.json
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"mid_side_b": true,
|
3 |
-
"bins": 768,
|
4 |
-
"unstable_bins": 7,
|
5 |
-
"reduction_bins": 668,
|
6 |
-
"band": {
|
7 |
-
"1": {
|
8 |
-
"sr": 11025,
|
9 |
-
"hl": 128,
|
10 |
-
"n_fft": 1024,
|
11 |
-
"crop_start": 0,
|
12 |
-
"crop_stop": 186,
|
13 |
-
"lpf_start": 37,
|
14 |
-
"lpf_stop": 73,
|
15 |
-
"res_type": "polyphase"
|
16 |
-
},
|
17 |
-
"2": {
|
18 |
-
"sr": 11025,
|
19 |
-
"hl": 128,
|
20 |
-
"n_fft": 512,
|
21 |
-
"crop_start": 4,
|
22 |
-
"crop_stop": 185,
|
23 |
-
"hpf_start": 36,
|
24 |
-
"hpf_stop": 18,
|
25 |
-
"lpf_start": 93,
|
26 |
-
"lpf_stop": 185,
|
27 |
-
"res_type": "polyphase"
|
28 |
-
},
|
29 |
-
"3": {
|
30 |
-
"sr": 22050,
|
31 |
-
"hl": 256,
|
32 |
-
"n_fft": 512,
|
33 |
-
"crop_start": 46,
|
34 |
-
"crop_stop": 186,
|
35 |
-
"hpf_start": 93,
|
36 |
-
"hpf_stop": 46,
|
37 |
-
"lpf_start": 164,
|
38 |
-
"lpf_stop": 186,
|
39 |
-
"res_type": "polyphase"
|
40 |
-
},
|
41 |
-
"4": {
|
42 |
-
"sr": 44100,
|
43 |
-
"hl": 512,
|
44 |
-
"n_fft": 768,
|
45 |
-
"crop_start": 121,
|
46 |
-
"crop_stop": 382,
|
47 |
-
"hpf_start": 138,
|
48 |
-
"hpf_stop": 123,
|
49 |
-
"res_type": "sinc_medium"
|
50 |
-
}
|
51 |
-
},
|
52 |
-
"sr": 44100,
|
53 |
-
"pre_filter_start": 740,
|
54 |
-
"pre_filter_stop": 768
|
55 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/4band_44100_reverse.json
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"reverse": true,
|
3 |
-
"bins": 768,
|
4 |
-
"unstable_bins": 7,
|
5 |
-
"reduction_bins": 668,
|
6 |
-
"band": {
|
7 |
-
"1": {
|
8 |
-
"sr": 11025,
|
9 |
-
"hl": 128,
|
10 |
-
"n_fft": 1024,
|
11 |
-
"crop_start": 0,
|
12 |
-
"crop_stop": 186,
|
13 |
-
"lpf_start": 37,
|
14 |
-
"lpf_stop": 73,
|
15 |
-
"res_type": "polyphase"
|
16 |
-
},
|
17 |
-
"2": {
|
18 |
-
"sr": 11025,
|
19 |
-
"hl": 128,
|
20 |
-
"n_fft": 512,
|
21 |
-
"crop_start": 4,
|
22 |
-
"crop_stop": 185,
|
23 |
-
"hpf_start": 36,
|
24 |
-
"hpf_stop": 18,
|
25 |
-
"lpf_start": 93,
|
26 |
-
"lpf_stop": 185,
|
27 |
-
"res_type": "polyphase"
|
28 |
-
},
|
29 |
-
"3": {
|
30 |
-
"sr": 22050,
|
31 |
-
"hl": 256,
|
32 |
-
"n_fft": 512,
|
33 |
-
"crop_start": 46,
|
34 |
-
"crop_stop": 186,
|
35 |
-
"hpf_start": 93,
|
36 |
-
"hpf_stop": 46,
|
37 |
-
"lpf_start": 164,
|
38 |
-
"lpf_stop": 186,
|
39 |
-
"res_type": "polyphase"
|
40 |
-
},
|
41 |
-
"4": {
|
42 |
-
"sr": 44100,
|
43 |
-
"hl": 512,
|
44 |
-
"n_fft": 768,
|
45 |
-
"crop_start": 121,
|
46 |
-
"crop_stop": 382,
|
47 |
-
"hpf_start": 138,
|
48 |
-
"hpf_stop": 123,
|
49 |
-
"res_type": "sinc_medium"
|
50 |
-
}
|
51 |
-
},
|
52 |
-
"sr": 44100,
|
53 |
-
"pre_filter_start": 740,
|
54 |
-
"pre_filter_stop": 768
|
55 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/4band_44100_sw.json
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"stereo_w": true,
|
3 |
-
"bins": 768,
|
4 |
-
"unstable_bins": 7,
|
5 |
-
"reduction_bins": 668,
|
6 |
-
"band": {
|
7 |
-
"1": {
|
8 |
-
"sr": 11025,
|
9 |
-
"hl": 128,
|
10 |
-
"n_fft": 1024,
|
11 |
-
"crop_start": 0,
|
12 |
-
"crop_stop": 186,
|
13 |
-
"lpf_start": 37,
|
14 |
-
"lpf_stop": 73,
|
15 |
-
"res_type": "polyphase"
|
16 |
-
},
|
17 |
-
"2": {
|
18 |
-
"sr": 11025,
|
19 |
-
"hl": 128,
|
20 |
-
"n_fft": 512,
|
21 |
-
"crop_start": 4,
|
22 |
-
"crop_stop": 185,
|
23 |
-
"hpf_start": 36,
|
24 |
-
"hpf_stop": 18,
|
25 |
-
"lpf_start": 93,
|
26 |
-
"lpf_stop": 185,
|
27 |
-
"res_type": "polyphase"
|
28 |
-
},
|
29 |
-
"3": {
|
30 |
-
"sr": 22050,
|
31 |
-
"hl": 256,
|
32 |
-
"n_fft": 512,
|
33 |
-
"crop_start": 46,
|
34 |
-
"crop_stop": 186,
|
35 |
-
"hpf_start": 93,
|
36 |
-
"hpf_stop": 46,
|
37 |
-
"lpf_start": 164,
|
38 |
-
"lpf_stop": 186,
|
39 |
-
"res_type": "polyphase"
|
40 |
-
},
|
41 |
-
"4": {
|
42 |
-
"sr": 44100,
|
43 |
-
"hl": 512,
|
44 |
-
"n_fft": 768,
|
45 |
-
"crop_start": 121,
|
46 |
-
"crop_stop": 382,
|
47 |
-
"hpf_start": 138,
|
48 |
-
"hpf_stop": 123,
|
49 |
-
"res_type": "sinc_medium"
|
50 |
-
}
|
51 |
-
},
|
52 |
-
"sr": 44100,
|
53 |
-
"pre_filter_start": 740,
|
54 |
-
"pre_filter_stop": 768
|
55 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/4band_v2.json
DELETED
@@ -1,54 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bins": 672,
|
3 |
-
"unstable_bins": 8,
|
4 |
-
"reduction_bins": 637,
|
5 |
-
"band": {
|
6 |
-
"1": {
|
7 |
-
"sr": 7350,
|
8 |
-
"hl": 80,
|
9 |
-
"n_fft": 640,
|
10 |
-
"crop_start": 0,
|
11 |
-
"crop_stop": 85,
|
12 |
-
"lpf_start": 25,
|
13 |
-
"lpf_stop": 53,
|
14 |
-
"res_type": "polyphase"
|
15 |
-
},
|
16 |
-
"2": {
|
17 |
-
"sr": 7350,
|
18 |
-
"hl": 80,
|
19 |
-
"n_fft": 320,
|
20 |
-
"crop_start": 4,
|
21 |
-
"crop_stop": 87,
|
22 |
-
"hpf_start": 25,
|
23 |
-
"hpf_stop": 12,
|
24 |
-
"lpf_start": 31,
|
25 |
-
"lpf_stop": 62,
|
26 |
-
"res_type": "polyphase"
|
27 |
-
},
|
28 |
-
"3": {
|
29 |
-
"sr": 14700,
|
30 |
-
"hl": 160,
|
31 |
-
"n_fft": 512,
|
32 |
-
"crop_start": 17,
|
33 |
-
"crop_stop": 216,
|
34 |
-
"hpf_start": 48,
|
35 |
-
"hpf_stop": 24,
|
36 |
-
"lpf_start": 139,
|
37 |
-
"lpf_stop": 210,
|
38 |
-
"res_type": "polyphase"
|
39 |
-
},
|
40 |
-
"4": {
|
41 |
-
"sr": 44100,
|
42 |
-
"hl": 480,
|
43 |
-
"n_fft": 960,
|
44 |
-
"crop_start": 78,
|
45 |
-
"crop_stop": 383,
|
46 |
-
"hpf_start": 130,
|
47 |
-
"hpf_stop": 86,
|
48 |
-
"res_type": "kaiser_fast"
|
49 |
-
}
|
50 |
-
},
|
51 |
-
"sr": 44100,
|
52 |
-
"pre_filter_start": 668,
|
53 |
-
"pre_filter_stop": 672
|
54 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/4band_v2_sn.json
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bins": 672,
|
3 |
-
"unstable_bins": 8,
|
4 |
-
"reduction_bins": 637,
|
5 |
-
"band": {
|
6 |
-
"1": {
|
7 |
-
"sr": 7350,
|
8 |
-
"hl": 80,
|
9 |
-
"n_fft": 640,
|
10 |
-
"crop_start": 0,
|
11 |
-
"crop_stop": 85,
|
12 |
-
"lpf_start": 25,
|
13 |
-
"lpf_stop": 53,
|
14 |
-
"res_type": "polyphase"
|
15 |
-
},
|
16 |
-
"2": {
|
17 |
-
"sr": 7350,
|
18 |
-
"hl": 80,
|
19 |
-
"n_fft": 320,
|
20 |
-
"crop_start": 4,
|
21 |
-
"crop_stop": 87,
|
22 |
-
"hpf_start": 25,
|
23 |
-
"hpf_stop": 12,
|
24 |
-
"lpf_start": 31,
|
25 |
-
"lpf_stop": 62,
|
26 |
-
"res_type": "polyphase"
|
27 |
-
},
|
28 |
-
"3": {
|
29 |
-
"sr": 14700,
|
30 |
-
"hl": 160,
|
31 |
-
"n_fft": 512,
|
32 |
-
"crop_start": 17,
|
33 |
-
"crop_stop": 216,
|
34 |
-
"hpf_start": 48,
|
35 |
-
"hpf_stop": 24,
|
36 |
-
"lpf_start": 139,
|
37 |
-
"lpf_stop": 210,
|
38 |
-
"res_type": "polyphase"
|
39 |
-
},
|
40 |
-
"4": {
|
41 |
-
"sr": 44100,
|
42 |
-
"hl": 480,
|
43 |
-
"n_fft": 960,
|
44 |
-
"crop_start": 78,
|
45 |
-
"crop_stop": 383,
|
46 |
-
"hpf_start": 130,
|
47 |
-
"hpf_stop": 86,
|
48 |
-
"convert_channels": "stereo_n",
|
49 |
-
"res_type": "kaiser_fast"
|
50 |
-
}
|
51 |
-
},
|
52 |
-
"sr": 44100,
|
53 |
-
"pre_filter_start": 668,
|
54 |
-
"pre_filter_stop": 672
|
55 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/4band_v3.json
DELETED
@@ -1,54 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bins": 672,
|
3 |
-
"unstable_bins": 8,
|
4 |
-
"reduction_bins": 530,
|
5 |
-
"band": {
|
6 |
-
"1": {
|
7 |
-
"sr": 7350,
|
8 |
-
"hl": 80,
|
9 |
-
"n_fft": 640,
|
10 |
-
"crop_start": 0,
|
11 |
-
"crop_stop": 85,
|
12 |
-
"lpf_start": 25,
|
13 |
-
"lpf_stop": 53,
|
14 |
-
"res_type": "polyphase"
|
15 |
-
},
|
16 |
-
"2": {
|
17 |
-
"sr": 7350,
|
18 |
-
"hl": 80,
|
19 |
-
"n_fft": 320,
|
20 |
-
"crop_start": 4,
|
21 |
-
"crop_stop": 87,
|
22 |
-
"hpf_start": 25,
|
23 |
-
"hpf_stop": 12,
|
24 |
-
"lpf_start": 31,
|
25 |
-
"lpf_stop": 62,
|
26 |
-
"res_type": "polyphase"
|
27 |
-
},
|
28 |
-
"3": {
|
29 |
-
"sr": 14700,
|
30 |
-
"hl": 160,
|
31 |
-
"n_fft": 512,
|
32 |
-
"crop_start": 17,
|
33 |
-
"crop_stop": 216,
|
34 |
-
"hpf_start": 48,
|
35 |
-
"hpf_stop": 24,
|
36 |
-
"lpf_start": 139,
|
37 |
-
"lpf_stop": 210,
|
38 |
-
"res_type": "polyphase"
|
39 |
-
},
|
40 |
-
"4": {
|
41 |
-
"sr": 44100,
|
42 |
-
"hl": 480,
|
43 |
-
"n_fft": 960,
|
44 |
-
"crop_start": 78,
|
45 |
-
"crop_stop": 383,
|
46 |
-
"hpf_start": 130,
|
47 |
-
"hpf_stop": 86,
|
48 |
-
"res_type": "kaiser_fast"
|
49 |
-
}
|
50 |
-
},
|
51 |
-
"sr": 44100,
|
52 |
-
"pre_filter_start": 668,
|
53 |
-
"pre_filter_stop": 672
|
54 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/modelparams/ensemble.json
DELETED
@@ -1,43 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"mid_side_b2": true,
|
3 |
-
"bins": 1280,
|
4 |
-
"unstable_bins": 7,
|
5 |
-
"reduction_bins": 565,
|
6 |
-
"band": {
|
7 |
-
"1": {
|
8 |
-
"sr": 11025,
|
9 |
-
"hl": 108,
|
10 |
-
"n_fft": 2048,
|
11 |
-
"crop_start": 0,
|
12 |
-
"crop_stop": 374,
|
13 |
-
"lpf_start": 92,
|
14 |
-
"lpf_stop": 186,
|
15 |
-
"res_type": "polyphase"
|
16 |
-
},
|
17 |
-
"2": {
|
18 |
-
"sr": 22050,
|
19 |
-
"hl": 216,
|
20 |
-
"n_fft": 1536,
|
21 |
-
"crop_start": 0,
|
22 |
-
"crop_stop": 424,
|
23 |
-
"hpf_start": 68,
|
24 |
-
"hpf_stop": 34,
|
25 |
-
"lpf_start": 348,
|
26 |
-
"lpf_stop": 418,
|
27 |
-
"res_type": "polyphase"
|
28 |
-
},
|
29 |
-
"3": {
|
30 |
-
"sr": 44100,
|
31 |
-
"hl": 432,
|
32 |
-
"n_fft": 1280,
|
33 |
-
"crop_start": 132,
|
34 |
-
"crop_stop": 614,
|
35 |
-
"hpf_start": 172,
|
36 |
-
"hpf_stop": 144,
|
37 |
-
"res_type": "polyphase"
|
38 |
-
}
|
39 |
-
},
|
40 |
-
"sr": 44100,
|
41 |
-
"pre_filter_start": 1280,
|
42 |
-
"pre_filter_stop": 1280
|
43 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/nets.py
DELETED
@@ -1,123 +0,0 @@
|
|
1 |
-
import layers
|
2 |
-
import torch
|
3 |
-
import torch.nn.functional as F
|
4 |
-
from torch import nn
|
5 |
-
|
6 |
-
from . import spec_utils
|
7 |
-
|
8 |
-
|
9 |
-
class BaseASPPNet(nn.Module):
|
10 |
-
def __init__(self, nin, ch, dilations=(4, 8, 16)):
|
11 |
-
super(BaseASPPNet, self).__init__()
|
12 |
-
self.enc1 = layers.Encoder(nin, ch, 3, 2, 1)
|
13 |
-
self.enc2 = layers.Encoder(ch, ch * 2, 3, 2, 1)
|
14 |
-
self.enc3 = layers.Encoder(ch * 2, ch * 4, 3, 2, 1)
|
15 |
-
self.enc4 = layers.Encoder(ch * 4, ch * 8, 3, 2, 1)
|
16 |
-
|
17 |
-
self.aspp = layers.ASPPModule(ch * 8, ch * 16, dilations)
|
18 |
-
|
19 |
-
self.dec4 = layers.Decoder(ch * (8 + 16), ch * 8, 3, 1, 1)
|
20 |
-
self.dec3 = layers.Decoder(ch * (4 + 8), ch * 4, 3, 1, 1)
|
21 |
-
self.dec2 = layers.Decoder(ch * (2 + 4), ch * 2, 3, 1, 1)
|
22 |
-
self.dec1 = layers.Decoder(ch * (1 + 2), ch, 3, 1, 1)
|
23 |
-
|
24 |
-
def __call__(self, x):
|
25 |
-
h, e1 = self.enc1(x)
|
26 |
-
h, e2 = self.enc2(h)
|
27 |
-
h, e3 = self.enc3(h)
|
28 |
-
h, e4 = self.enc4(h)
|
29 |
-
|
30 |
-
h = self.aspp(h)
|
31 |
-
|
32 |
-
h = self.dec4(h, e4)
|
33 |
-
h = self.dec3(h, e3)
|
34 |
-
h = self.dec2(h, e2)
|
35 |
-
h = self.dec1(h, e1)
|
36 |
-
|
37 |
-
return h
|
38 |
-
|
39 |
-
|
40 |
-
class CascadedASPPNet(nn.Module):
|
41 |
-
def __init__(self, n_fft):
|
42 |
-
super(CascadedASPPNet, self).__init__()
|
43 |
-
self.stg1_low_band_net = BaseASPPNet(2, 16)
|
44 |
-
self.stg1_high_band_net = BaseASPPNet(2, 16)
|
45 |
-
|
46 |
-
self.stg2_bridge = layers.Conv2DBNActiv(18, 8, 1, 1, 0)
|
47 |
-
self.stg2_full_band_net = BaseASPPNet(8, 16)
|
48 |
-
|
49 |
-
self.stg3_bridge = layers.Conv2DBNActiv(34, 16, 1, 1, 0)
|
50 |
-
self.stg3_full_band_net = BaseASPPNet(16, 32)
|
51 |
-
|
52 |
-
self.out = nn.Conv2d(32, 2, 1, bias=False)
|
53 |
-
self.aux1_out = nn.Conv2d(16, 2, 1, bias=False)
|
54 |
-
self.aux2_out = nn.Conv2d(16, 2, 1, bias=False)
|
55 |
-
|
56 |
-
self.max_bin = n_fft // 2
|
57 |
-
self.output_bin = n_fft // 2 + 1
|
58 |
-
|
59 |
-
self.offset = 128
|
60 |
-
|
61 |
-
def forward(self, x, aggressiveness=None):
|
62 |
-
mix = x.detach()
|
63 |
-
x = x.clone()
|
64 |
-
|
65 |
-
x = x[:, :, : self.max_bin]
|
66 |
-
|
67 |
-
bandw = x.size()[2] // 2
|
68 |
-
aux1 = torch.cat(
|
69 |
-
[
|
70 |
-
self.stg1_low_band_net(x[:, :, :bandw]),
|
71 |
-
self.stg1_high_band_net(x[:, :, bandw:]),
|
72 |
-
],
|
73 |
-
dim=2,
|
74 |
-
)
|
75 |
-
|
76 |
-
h = torch.cat([x, aux1], dim=1)
|
77 |
-
aux2 = self.stg2_full_band_net(self.stg2_bridge(h))
|
78 |
-
|
79 |
-
h = torch.cat([x, aux1, aux2], dim=1)
|
80 |
-
h = self.stg3_full_band_net(self.stg3_bridge(h))
|
81 |
-
|
82 |
-
mask = torch.sigmoid(self.out(h))
|
83 |
-
mask = F.pad(
|
84 |
-
input=mask,
|
85 |
-
pad=(0, 0, 0, self.output_bin - mask.size()[2]),
|
86 |
-
mode="replicate",
|
87 |
-
)
|
88 |
-
|
89 |
-
if self.training:
|
90 |
-
aux1 = torch.sigmoid(self.aux1_out(aux1))
|
91 |
-
aux1 = F.pad(
|
92 |
-
input=aux1,
|
93 |
-
pad=(0, 0, 0, self.output_bin - aux1.size()[2]),
|
94 |
-
mode="replicate",
|
95 |
-
)
|
96 |
-
aux2 = torch.sigmoid(self.aux2_out(aux2))
|
97 |
-
aux2 = F.pad(
|
98 |
-
input=aux2,
|
99 |
-
pad=(0, 0, 0, self.output_bin - aux2.size()[2]),
|
100 |
-
mode="replicate",
|
101 |
-
)
|
102 |
-
return mask * mix, aux1 * mix, aux2 * mix
|
103 |
-
else:
|
104 |
-
if aggressiveness:
|
105 |
-
mask[:, :, : aggressiveness["split_bin"]] = torch.pow(
|
106 |
-
mask[:, :, : aggressiveness["split_bin"]],
|
107 |
-
1 + aggressiveness["value"] / 3,
|
108 |
-
)
|
109 |
-
mask[:, :, aggressiveness["split_bin"] :] = torch.pow(
|
110 |
-
mask[:, :, aggressiveness["split_bin"] :],
|
111 |
-
1 + aggressiveness["value"],
|
112 |
-
)
|
113 |
-
|
114 |
-
return mask * mix
|
115 |
-
|
116 |
-
def predict(self, x_mag, aggressiveness=None):
|
117 |
-
h = self.forward(x_mag, aggressiveness)
|
118 |
-
|
119 |
-
if self.offset > 0:
|
120 |
-
h = h[:, :, :, self.offset : -self.offset]
|
121 |
-
assert h.size()[3] > 0
|
122 |
-
|
123 |
-
return h
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/nets_123812KB.py
DELETED
@@ -1,122 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn.functional as F
|
3 |
-
from torch import nn
|
4 |
-
|
5 |
-
from . import layers_123821KB as layers
|
6 |
-
|
7 |
-
|
8 |
-
class BaseASPPNet(nn.Module):
|
9 |
-
def __init__(self, nin, ch, dilations=(4, 8, 16)):
|
10 |
-
super(BaseASPPNet, self).__init__()
|
11 |
-
self.enc1 = layers.Encoder(nin, ch, 3, 2, 1)
|
12 |
-
self.enc2 = layers.Encoder(ch, ch * 2, 3, 2, 1)
|
13 |
-
self.enc3 = layers.Encoder(ch * 2, ch * 4, 3, 2, 1)
|
14 |
-
self.enc4 = layers.Encoder(ch * 4, ch * 8, 3, 2, 1)
|
15 |
-
|
16 |
-
self.aspp = layers.ASPPModule(ch * 8, ch * 16, dilations)
|
17 |
-
|
18 |
-
self.dec4 = layers.Decoder(ch * (8 + 16), ch * 8, 3, 1, 1)
|
19 |
-
self.dec3 = layers.Decoder(ch * (4 + 8), ch * 4, 3, 1, 1)
|
20 |
-
self.dec2 = layers.Decoder(ch * (2 + 4), ch * 2, 3, 1, 1)
|
21 |
-
self.dec1 = layers.Decoder(ch * (1 + 2), ch, 3, 1, 1)
|
22 |
-
|
23 |
-
def __call__(self, x):
|
24 |
-
h, e1 = self.enc1(x)
|
25 |
-
h, e2 = self.enc2(h)
|
26 |
-
h, e3 = self.enc3(h)
|
27 |
-
h, e4 = self.enc4(h)
|
28 |
-
|
29 |
-
h = self.aspp(h)
|
30 |
-
|
31 |
-
h = self.dec4(h, e4)
|
32 |
-
h = self.dec3(h, e3)
|
33 |
-
h = self.dec2(h, e2)
|
34 |
-
h = self.dec1(h, e1)
|
35 |
-
|
36 |
-
return h
|
37 |
-
|
38 |
-
|
39 |
-
class CascadedASPPNet(nn.Module):
|
40 |
-
def __init__(self, n_fft):
|
41 |
-
super(CascadedASPPNet, self).__init__()
|
42 |
-
self.stg1_low_band_net = BaseASPPNet(2, 32)
|
43 |
-
self.stg1_high_band_net = BaseASPPNet(2, 32)
|
44 |
-
|
45 |
-
self.stg2_bridge = layers.Conv2DBNActiv(34, 16, 1, 1, 0)
|
46 |
-
self.stg2_full_band_net = BaseASPPNet(16, 32)
|
47 |
-
|
48 |
-
self.stg3_bridge = layers.Conv2DBNActiv(66, 32, 1, 1, 0)
|
49 |
-
self.stg3_full_band_net = BaseASPPNet(32, 64)
|
50 |
-
|
51 |
-
self.out = nn.Conv2d(64, 2, 1, bias=False)
|
52 |
-
self.aux1_out = nn.Conv2d(32, 2, 1, bias=False)
|
53 |
-
self.aux2_out = nn.Conv2d(32, 2, 1, bias=False)
|
54 |
-
|
55 |
-
self.max_bin = n_fft // 2
|
56 |
-
self.output_bin = n_fft // 2 + 1
|
57 |
-
|
58 |
-
self.offset = 128
|
59 |
-
|
60 |
-
def forward(self, x, aggressiveness=None):
|
61 |
-
mix = x.detach()
|
62 |
-
x = x.clone()
|
63 |
-
|
64 |
-
x = x[:, :, : self.max_bin]
|
65 |
-
|
66 |
-
bandw = x.size()[2] // 2
|
67 |
-
aux1 = torch.cat(
|
68 |
-
[
|
69 |
-
self.stg1_low_band_net(x[:, :, :bandw]),
|
70 |
-
self.stg1_high_band_net(x[:, :, bandw:]),
|
71 |
-
],
|
72 |
-
dim=2,
|
73 |
-
)
|
74 |
-
|
75 |
-
h = torch.cat([x, aux1], dim=1)
|
76 |
-
aux2 = self.stg2_full_band_net(self.stg2_bridge(h))
|
77 |
-
|
78 |
-
h = torch.cat([x, aux1, aux2], dim=1)
|
79 |
-
h = self.stg3_full_band_net(self.stg3_bridge(h))
|
80 |
-
|
81 |
-
mask = torch.sigmoid(self.out(h))
|
82 |
-
mask = F.pad(
|
83 |
-
input=mask,
|
84 |
-
pad=(0, 0, 0, self.output_bin - mask.size()[2]),
|
85 |
-
mode="replicate",
|
86 |
-
)
|
87 |
-
|
88 |
-
if self.training:
|
89 |
-
aux1 = torch.sigmoid(self.aux1_out(aux1))
|
90 |
-
aux1 = F.pad(
|
91 |
-
input=aux1,
|
92 |
-
pad=(0, 0, 0, self.output_bin - aux1.size()[2]),
|
93 |
-
mode="replicate",
|
94 |
-
)
|
95 |
-
aux2 = torch.sigmoid(self.aux2_out(aux2))
|
96 |
-
aux2 = F.pad(
|
97 |
-
input=aux2,
|
98 |
-
pad=(0, 0, 0, self.output_bin - aux2.size()[2]),
|
99 |
-
mode="replicate",
|
100 |
-
)
|
101 |
-
return mask * mix, aux1 * mix, aux2 * mix
|
102 |
-
else:
|
103 |
-
if aggressiveness:
|
104 |
-
mask[:, :, : aggressiveness["split_bin"]] = torch.pow(
|
105 |
-
mask[:, :, : aggressiveness["split_bin"]],
|
106 |
-
1 + aggressiveness["value"] / 3,
|
107 |
-
)
|
108 |
-
mask[:, :, aggressiveness["split_bin"] :] = torch.pow(
|
109 |
-
mask[:, :, aggressiveness["split_bin"] :],
|
110 |
-
1 + aggressiveness["value"],
|
111 |
-
)
|
112 |
-
|
113 |
-
return mask * mix
|
114 |
-
|
115 |
-
def predict(self, x_mag, aggressiveness=None):
|
116 |
-
h = self.forward(x_mag, aggressiveness)
|
117 |
-
|
118 |
-
if self.offset > 0:
|
119 |
-
h = h[:, :, :, self.offset : -self.offset]
|
120 |
-
assert h.size()[3] > 0
|
121 |
-
|
122 |
-
return h
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/nets_123821KB.py
DELETED
@@ -1,122 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn.functional as F
|
3 |
-
from torch import nn
|
4 |
-
|
5 |
-
from . import layers_123821KB as layers
|
6 |
-
|
7 |
-
|
8 |
-
class BaseASPPNet(nn.Module):
|
9 |
-
def __init__(self, nin, ch, dilations=(4, 8, 16)):
|
10 |
-
super(BaseASPPNet, self).__init__()
|
11 |
-
self.enc1 = layers.Encoder(nin, ch, 3, 2, 1)
|
12 |
-
self.enc2 = layers.Encoder(ch, ch * 2, 3, 2, 1)
|
13 |
-
self.enc3 = layers.Encoder(ch * 2, ch * 4, 3, 2, 1)
|
14 |
-
self.enc4 = layers.Encoder(ch * 4, ch * 8, 3, 2, 1)
|
15 |
-
|
16 |
-
self.aspp = layers.ASPPModule(ch * 8, ch * 16, dilations)
|
17 |
-
|
18 |
-
self.dec4 = layers.Decoder(ch * (8 + 16), ch * 8, 3, 1, 1)
|
19 |
-
self.dec3 = layers.Decoder(ch * (4 + 8), ch * 4, 3, 1, 1)
|
20 |
-
self.dec2 = layers.Decoder(ch * (2 + 4), ch * 2, 3, 1, 1)
|
21 |
-
self.dec1 = layers.Decoder(ch * (1 + 2), ch, 3, 1, 1)
|
22 |
-
|
23 |
-
def __call__(self, x):
|
24 |
-
h, e1 = self.enc1(x)
|
25 |
-
h, e2 = self.enc2(h)
|
26 |
-
h, e3 = self.enc3(h)
|
27 |
-
h, e4 = self.enc4(h)
|
28 |
-
|
29 |
-
h = self.aspp(h)
|
30 |
-
|
31 |
-
h = self.dec4(h, e4)
|
32 |
-
h = self.dec3(h, e3)
|
33 |
-
h = self.dec2(h, e2)
|
34 |
-
h = self.dec1(h, e1)
|
35 |
-
|
36 |
-
return h
|
37 |
-
|
38 |
-
|
39 |
-
class CascadedASPPNet(nn.Module):
|
40 |
-
def __init__(self, n_fft):
|
41 |
-
super(CascadedASPPNet, self).__init__()
|
42 |
-
self.stg1_low_band_net = BaseASPPNet(2, 32)
|
43 |
-
self.stg1_high_band_net = BaseASPPNet(2, 32)
|
44 |
-
|
45 |
-
self.stg2_bridge = layers.Conv2DBNActiv(34, 16, 1, 1, 0)
|
46 |
-
self.stg2_full_band_net = BaseASPPNet(16, 32)
|
47 |
-
|
48 |
-
self.stg3_bridge = layers.Conv2DBNActiv(66, 32, 1, 1, 0)
|
49 |
-
self.stg3_full_band_net = BaseASPPNet(32, 64)
|
50 |
-
|
51 |
-
self.out = nn.Conv2d(64, 2, 1, bias=False)
|
52 |
-
self.aux1_out = nn.Conv2d(32, 2, 1, bias=False)
|
53 |
-
self.aux2_out = nn.Conv2d(32, 2, 1, bias=False)
|
54 |
-
|
55 |
-
self.max_bin = n_fft // 2
|
56 |
-
self.output_bin = n_fft // 2 + 1
|
57 |
-
|
58 |
-
self.offset = 128
|
59 |
-
|
60 |
-
def forward(self, x, aggressiveness=None):
|
61 |
-
mix = x.detach()
|
62 |
-
x = x.clone()
|
63 |
-
|
64 |
-
x = x[:, :, : self.max_bin]
|
65 |
-
|
66 |
-
bandw = x.size()[2] // 2
|
67 |
-
aux1 = torch.cat(
|
68 |
-
[
|
69 |
-
self.stg1_low_band_net(x[:, :, :bandw]),
|
70 |
-
self.stg1_high_band_net(x[:, :, bandw:]),
|
71 |
-
],
|
72 |
-
dim=2,
|
73 |
-
)
|
74 |
-
|
75 |
-
h = torch.cat([x, aux1], dim=1)
|
76 |
-
aux2 = self.stg2_full_band_net(self.stg2_bridge(h))
|
77 |
-
|
78 |
-
h = torch.cat([x, aux1, aux2], dim=1)
|
79 |
-
h = self.stg3_full_band_net(self.stg3_bridge(h))
|
80 |
-
|
81 |
-
mask = torch.sigmoid(self.out(h))
|
82 |
-
mask = F.pad(
|
83 |
-
input=mask,
|
84 |
-
pad=(0, 0, 0, self.output_bin - mask.size()[2]),
|
85 |
-
mode="replicate",
|
86 |
-
)
|
87 |
-
|
88 |
-
if self.training:
|
89 |
-
aux1 = torch.sigmoid(self.aux1_out(aux1))
|
90 |
-
aux1 = F.pad(
|
91 |
-
input=aux1,
|
92 |
-
pad=(0, 0, 0, self.output_bin - aux1.size()[2]),
|
93 |
-
mode="replicate",
|
94 |
-
)
|
95 |
-
aux2 = torch.sigmoid(self.aux2_out(aux2))
|
96 |
-
aux2 = F.pad(
|
97 |
-
input=aux2,
|
98 |
-
pad=(0, 0, 0, self.output_bin - aux2.size()[2]),
|
99 |
-
mode="replicate",
|
100 |
-
)
|
101 |
-
return mask * mix, aux1 * mix, aux2 * mix
|
102 |
-
else:
|
103 |
-
if aggressiveness:
|
104 |
-
mask[:, :, : aggressiveness["split_bin"]] = torch.pow(
|
105 |
-
mask[:, :, : aggressiveness["split_bin"]],
|
106 |
-
1 + aggressiveness["value"] / 3,
|
107 |
-
)
|
108 |
-
mask[:, :, aggressiveness["split_bin"] :] = torch.pow(
|
109 |
-
mask[:, :, aggressiveness["split_bin"] :],
|
110 |
-
1 + aggressiveness["value"],
|
111 |
-
)
|
112 |
-
|
113 |
-
return mask * mix
|
114 |
-
|
115 |
-
def predict(self, x_mag, aggressiveness=None):
|
116 |
-
h = self.forward(x_mag, aggressiveness)
|
117 |
-
|
118 |
-
if self.offset > 0:
|
119 |
-
h = h[:, :, :, self.offset : -self.offset]
|
120 |
-
assert h.size()[3] > 0
|
121 |
-
|
122 |
-
return h
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/nets_33966KB.py
DELETED
@@ -1,122 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn.functional as F
|
3 |
-
from torch import nn
|
4 |
-
|
5 |
-
from . import layers_33966KB as layers
|
6 |
-
|
7 |
-
|
8 |
-
class BaseASPPNet(nn.Module):
|
9 |
-
def __init__(self, nin, ch, dilations=(4, 8, 16, 32)):
|
10 |
-
super(BaseASPPNet, self).__init__()
|
11 |
-
self.enc1 = layers.Encoder(nin, ch, 3, 2, 1)
|
12 |
-
self.enc2 = layers.Encoder(ch, ch * 2, 3, 2, 1)
|
13 |
-
self.enc3 = layers.Encoder(ch * 2, ch * 4, 3, 2, 1)
|
14 |
-
self.enc4 = layers.Encoder(ch * 4, ch * 8, 3, 2, 1)
|
15 |
-
|
16 |
-
self.aspp = layers.ASPPModule(ch * 8, ch * 16, dilations)
|
17 |
-
|
18 |
-
self.dec4 = layers.Decoder(ch * (8 + 16), ch * 8, 3, 1, 1)
|
19 |
-
self.dec3 = layers.Decoder(ch * (4 + 8), ch * 4, 3, 1, 1)
|
20 |
-
self.dec2 = layers.Decoder(ch * (2 + 4), ch * 2, 3, 1, 1)
|
21 |
-
self.dec1 = layers.Decoder(ch * (1 + 2), ch, 3, 1, 1)
|
22 |
-
|
23 |
-
def __call__(self, x):
|
24 |
-
h, e1 = self.enc1(x)
|
25 |
-
h, e2 = self.enc2(h)
|
26 |
-
h, e3 = self.enc3(h)
|
27 |
-
h, e4 = self.enc4(h)
|
28 |
-
|
29 |
-
h = self.aspp(h)
|
30 |
-
|
31 |
-
h = self.dec4(h, e4)
|
32 |
-
h = self.dec3(h, e3)
|
33 |
-
h = self.dec2(h, e2)
|
34 |
-
h = self.dec1(h, e1)
|
35 |
-
|
36 |
-
return h
|
37 |
-
|
38 |
-
|
39 |
-
class CascadedASPPNet(nn.Module):
|
40 |
-
def __init__(self, n_fft):
|
41 |
-
super(CascadedASPPNet, self).__init__()
|
42 |
-
self.stg1_low_band_net = BaseASPPNet(2, 16)
|
43 |
-
self.stg1_high_band_net = BaseASPPNet(2, 16)
|
44 |
-
|
45 |
-
self.stg2_bridge = layers.Conv2DBNActiv(18, 8, 1, 1, 0)
|
46 |
-
self.stg2_full_band_net = BaseASPPNet(8, 16)
|
47 |
-
|
48 |
-
self.stg3_bridge = layers.Conv2DBNActiv(34, 16, 1, 1, 0)
|
49 |
-
self.stg3_full_band_net = BaseASPPNet(16, 32)
|
50 |
-
|
51 |
-
self.out = nn.Conv2d(32, 2, 1, bias=False)
|
52 |
-
self.aux1_out = nn.Conv2d(16, 2, 1, bias=False)
|
53 |
-
self.aux2_out = nn.Conv2d(16, 2, 1, bias=False)
|
54 |
-
|
55 |
-
self.max_bin = n_fft // 2
|
56 |
-
self.output_bin = n_fft // 2 + 1
|
57 |
-
|
58 |
-
self.offset = 128
|
59 |
-
|
60 |
-
def forward(self, x, aggressiveness=None):
|
61 |
-
mix = x.detach()
|
62 |
-
x = x.clone()
|
63 |
-
|
64 |
-
x = x[:, :, : self.max_bin]
|
65 |
-
|
66 |
-
bandw = x.size()[2] // 2
|
67 |
-
aux1 = torch.cat(
|
68 |
-
[
|
69 |
-
self.stg1_low_band_net(x[:, :, :bandw]),
|
70 |
-
self.stg1_high_band_net(x[:, :, bandw:]),
|
71 |
-
],
|
72 |
-
dim=2,
|
73 |
-
)
|
74 |
-
|
75 |
-
h = torch.cat([x, aux1], dim=1)
|
76 |
-
aux2 = self.stg2_full_band_net(self.stg2_bridge(h))
|
77 |
-
|
78 |
-
h = torch.cat([x, aux1, aux2], dim=1)
|
79 |
-
h = self.stg3_full_band_net(self.stg3_bridge(h))
|
80 |
-
|
81 |
-
mask = torch.sigmoid(self.out(h))
|
82 |
-
mask = F.pad(
|
83 |
-
input=mask,
|
84 |
-
pad=(0, 0, 0, self.output_bin - mask.size()[2]),
|
85 |
-
mode="replicate",
|
86 |
-
)
|
87 |
-
|
88 |
-
if self.training:
|
89 |
-
aux1 = torch.sigmoid(self.aux1_out(aux1))
|
90 |
-
aux1 = F.pad(
|
91 |
-
input=aux1,
|
92 |
-
pad=(0, 0, 0, self.output_bin - aux1.size()[2]),
|
93 |
-
mode="replicate",
|
94 |
-
)
|
95 |
-
aux2 = torch.sigmoid(self.aux2_out(aux2))
|
96 |
-
aux2 = F.pad(
|
97 |
-
input=aux2,
|
98 |
-
pad=(0, 0, 0, self.output_bin - aux2.size()[2]),
|
99 |
-
mode="replicate",
|
100 |
-
)
|
101 |
-
return mask * mix, aux1 * mix, aux2 * mix
|
102 |
-
else:
|
103 |
-
if aggressiveness:
|
104 |
-
mask[:, :, : aggressiveness["split_bin"]] = torch.pow(
|
105 |
-
mask[:, :, : aggressiveness["split_bin"]],
|
106 |
-
1 + aggressiveness["value"] / 3,
|
107 |
-
)
|
108 |
-
mask[:, :, aggressiveness["split_bin"] :] = torch.pow(
|
109 |
-
mask[:, :, aggressiveness["split_bin"] :],
|
110 |
-
1 + aggressiveness["value"],
|
111 |
-
)
|
112 |
-
|
113 |
-
return mask * mix
|
114 |
-
|
115 |
-
def predict(self, x_mag, aggressiveness=None):
|
116 |
-
h = self.forward(x_mag, aggressiveness)
|
117 |
-
|
118 |
-
if self.offset > 0:
|
119 |
-
h = h[:, :, :, self.offset : -self.offset]
|
120 |
-
assert h.size()[3] > 0
|
121 |
-
|
122 |
-
return h
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/nets_537227KB.py
DELETED
@@ -1,123 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import torch
|
3 |
-
import torch.nn.functional as F
|
4 |
-
from torch import nn
|
5 |
-
|
6 |
-
from . import layers_537238KB as layers
|
7 |
-
|
8 |
-
|
9 |
-
class BaseASPPNet(nn.Module):
|
10 |
-
def __init__(self, nin, ch, dilations=(4, 8, 16)):
|
11 |
-
super(BaseASPPNet, self).__init__()
|
12 |
-
self.enc1 = layers.Encoder(nin, ch, 3, 2, 1)
|
13 |
-
self.enc2 = layers.Encoder(ch, ch * 2, 3, 2, 1)
|
14 |
-
self.enc3 = layers.Encoder(ch * 2, ch * 4, 3, 2, 1)
|
15 |
-
self.enc4 = layers.Encoder(ch * 4, ch * 8, 3, 2, 1)
|
16 |
-
|
17 |
-
self.aspp = layers.ASPPModule(ch * 8, ch * 16, dilations)
|
18 |
-
|
19 |
-
self.dec4 = layers.Decoder(ch * (8 + 16), ch * 8, 3, 1, 1)
|
20 |
-
self.dec3 = layers.Decoder(ch * (4 + 8), ch * 4, 3, 1, 1)
|
21 |
-
self.dec2 = layers.Decoder(ch * (2 + 4), ch * 2, 3, 1, 1)
|
22 |
-
self.dec1 = layers.Decoder(ch * (1 + 2), ch, 3, 1, 1)
|
23 |
-
|
24 |
-
def __call__(self, x):
|
25 |
-
h, e1 = self.enc1(x)
|
26 |
-
h, e2 = self.enc2(h)
|
27 |
-
h, e3 = self.enc3(h)
|
28 |
-
h, e4 = self.enc4(h)
|
29 |
-
|
30 |
-
h = self.aspp(h)
|
31 |
-
|
32 |
-
h = self.dec4(h, e4)
|
33 |
-
h = self.dec3(h, e3)
|
34 |
-
h = self.dec2(h, e2)
|
35 |
-
h = self.dec1(h, e1)
|
36 |
-
|
37 |
-
return h
|
38 |
-
|
39 |
-
|
40 |
-
class CascadedASPPNet(nn.Module):
|
41 |
-
def __init__(self, n_fft):
|
42 |
-
super(CascadedASPPNet, self).__init__()
|
43 |
-
self.stg1_low_band_net = BaseASPPNet(2, 64)
|
44 |
-
self.stg1_high_band_net = BaseASPPNet(2, 64)
|
45 |
-
|
46 |
-
self.stg2_bridge = layers.Conv2DBNActiv(66, 32, 1, 1, 0)
|
47 |
-
self.stg2_full_band_net = BaseASPPNet(32, 64)
|
48 |
-
|
49 |
-
self.stg3_bridge = layers.Conv2DBNActiv(130, 64, 1, 1, 0)
|
50 |
-
self.stg3_full_band_net = BaseASPPNet(64, 128)
|
51 |
-
|
52 |
-
self.out = nn.Conv2d(128, 2, 1, bias=False)
|
53 |
-
self.aux1_out = nn.Conv2d(64, 2, 1, bias=False)
|
54 |
-
self.aux2_out = nn.Conv2d(64, 2, 1, bias=False)
|
55 |
-
|
56 |
-
self.max_bin = n_fft // 2
|
57 |
-
self.output_bin = n_fft // 2 + 1
|
58 |
-
|
59 |
-
self.offset = 128
|
60 |
-
|
61 |
-
def forward(self, x, aggressiveness=None):
|
62 |
-
mix = x.detach()
|
63 |
-
x = x.clone()
|
64 |
-
|
65 |
-
x = x[:, :, : self.max_bin]
|
66 |
-
|
67 |
-
bandw = x.size()[2] // 2
|
68 |
-
aux1 = torch.cat(
|
69 |
-
[
|
70 |
-
self.stg1_low_band_net(x[:, :, :bandw]),
|
71 |
-
self.stg1_high_band_net(x[:, :, bandw:]),
|
72 |
-
],
|
73 |
-
dim=2,
|
74 |
-
)
|
75 |
-
|
76 |
-
h = torch.cat([x, aux1], dim=1)
|
77 |
-
aux2 = self.stg2_full_band_net(self.stg2_bridge(h))
|
78 |
-
|
79 |
-
h = torch.cat([x, aux1, aux2], dim=1)
|
80 |
-
h = self.stg3_full_band_net(self.stg3_bridge(h))
|
81 |
-
|
82 |
-
mask = torch.sigmoid(self.out(h))
|
83 |
-
mask = F.pad(
|
84 |
-
input=mask,
|
85 |
-
pad=(0, 0, 0, self.output_bin - mask.size()[2]),
|
86 |
-
mode="replicate",
|
87 |
-
)
|
88 |
-
|
89 |
-
if self.training:
|
90 |
-
aux1 = torch.sigmoid(self.aux1_out(aux1))
|
91 |
-
aux1 = F.pad(
|
92 |
-
input=aux1,
|
93 |
-
pad=(0, 0, 0, self.output_bin - aux1.size()[2]),
|
94 |
-
mode="replicate",
|
95 |
-
)
|
96 |
-
aux2 = torch.sigmoid(self.aux2_out(aux2))
|
97 |
-
aux2 = F.pad(
|
98 |
-
input=aux2,
|
99 |
-
pad=(0, 0, 0, self.output_bin - aux2.size()[2]),
|
100 |
-
mode="replicate",
|
101 |
-
)
|
102 |
-
return mask * mix, aux1 * mix, aux2 * mix
|
103 |
-
else:
|
104 |
-
if aggressiveness:
|
105 |
-
mask[:, :, : aggressiveness["split_bin"]] = torch.pow(
|
106 |
-
mask[:, :, : aggressiveness["split_bin"]],
|
107 |
-
1 + aggressiveness["value"] / 3,
|
108 |
-
)
|
109 |
-
mask[:, :, aggressiveness["split_bin"] :] = torch.pow(
|
110 |
-
mask[:, :, aggressiveness["split_bin"] :],
|
111 |
-
1 + aggressiveness["value"],
|
112 |
-
)
|
113 |
-
|
114 |
-
return mask * mix
|
115 |
-
|
116 |
-
def predict(self, x_mag, aggressiveness=None):
|
117 |
-
h = self.forward(x_mag, aggressiveness)
|
118 |
-
|
119 |
-
if self.offset > 0:
|
120 |
-
h = h[:, :, :, self.offset : -self.offset]
|
121 |
-
assert h.size()[3] > 0
|
122 |
-
|
123 |
-
return h
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/nets_537238KB.py
DELETED
@@ -1,123 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import torch
|
3 |
-
import torch.nn.functional as F
|
4 |
-
from torch import nn
|
5 |
-
|
6 |
-
from . import layers_537238KB as layers
|
7 |
-
|
8 |
-
|
9 |
-
class BaseASPPNet(nn.Module):
|
10 |
-
def __init__(self, nin, ch, dilations=(4, 8, 16)):
|
11 |
-
super(BaseASPPNet, self).__init__()
|
12 |
-
self.enc1 = layers.Encoder(nin, ch, 3, 2, 1)
|
13 |
-
self.enc2 = layers.Encoder(ch, ch * 2, 3, 2, 1)
|
14 |
-
self.enc3 = layers.Encoder(ch * 2, ch * 4, 3, 2, 1)
|
15 |
-
self.enc4 = layers.Encoder(ch * 4, ch * 8, 3, 2, 1)
|
16 |
-
|
17 |
-
self.aspp = layers.ASPPModule(ch * 8, ch * 16, dilations)
|
18 |
-
|
19 |
-
self.dec4 = layers.Decoder(ch * (8 + 16), ch * 8, 3, 1, 1)
|
20 |
-
self.dec3 = layers.Decoder(ch * (4 + 8), ch * 4, 3, 1, 1)
|
21 |
-
self.dec2 = layers.Decoder(ch * (2 + 4), ch * 2, 3, 1, 1)
|
22 |
-
self.dec1 = layers.Decoder(ch * (1 + 2), ch, 3, 1, 1)
|
23 |
-
|
24 |
-
def __call__(self, x):
|
25 |
-
h, e1 = self.enc1(x)
|
26 |
-
h, e2 = self.enc2(h)
|
27 |
-
h, e3 = self.enc3(h)
|
28 |
-
h, e4 = self.enc4(h)
|
29 |
-
|
30 |
-
h = self.aspp(h)
|
31 |
-
|
32 |
-
h = self.dec4(h, e4)
|
33 |
-
h = self.dec3(h, e3)
|
34 |
-
h = self.dec2(h, e2)
|
35 |
-
h = self.dec1(h, e1)
|
36 |
-
|
37 |
-
return h
|
38 |
-
|
39 |
-
|
40 |
-
class CascadedASPPNet(nn.Module):
|
41 |
-
def __init__(self, n_fft):
|
42 |
-
super(CascadedASPPNet, self).__init__()
|
43 |
-
self.stg1_low_band_net = BaseASPPNet(2, 64)
|
44 |
-
self.stg1_high_band_net = BaseASPPNet(2, 64)
|
45 |
-
|
46 |
-
self.stg2_bridge = layers.Conv2DBNActiv(66, 32, 1, 1, 0)
|
47 |
-
self.stg2_full_band_net = BaseASPPNet(32, 64)
|
48 |
-
|
49 |
-
self.stg3_bridge = layers.Conv2DBNActiv(130, 64, 1, 1, 0)
|
50 |
-
self.stg3_full_band_net = BaseASPPNet(64, 128)
|
51 |
-
|
52 |
-
self.out = nn.Conv2d(128, 2, 1, bias=False)
|
53 |
-
self.aux1_out = nn.Conv2d(64, 2, 1, bias=False)
|
54 |
-
self.aux2_out = nn.Conv2d(64, 2, 1, bias=False)
|
55 |
-
|
56 |
-
self.max_bin = n_fft // 2
|
57 |
-
self.output_bin = n_fft // 2 + 1
|
58 |
-
|
59 |
-
self.offset = 128
|
60 |
-
|
61 |
-
def forward(self, x, aggressiveness=None):
|
62 |
-
mix = x.detach()
|
63 |
-
x = x.clone()
|
64 |
-
|
65 |
-
x = x[:, :, : self.max_bin]
|
66 |
-
|
67 |
-
bandw = x.size()[2] // 2
|
68 |
-
aux1 = torch.cat(
|
69 |
-
[
|
70 |
-
self.stg1_low_band_net(x[:, :, :bandw]),
|
71 |
-
self.stg1_high_band_net(x[:, :, bandw:]),
|
72 |
-
],
|
73 |
-
dim=2,
|
74 |
-
)
|
75 |
-
|
76 |
-
h = torch.cat([x, aux1], dim=1)
|
77 |
-
aux2 = self.stg2_full_band_net(self.stg2_bridge(h))
|
78 |
-
|
79 |
-
h = torch.cat([x, aux1, aux2], dim=1)
|
80 |
-
h = self.stg3_full_band_net(self.stg3_bridge(h))
|
81 |
-
|
82 |
-
mask = torch.sigmoid(self.out(h))
|
83 |
-
mask = F.pad(
|
84 |
-
input=mask,
|
85 |
-
pad=(0, 0, 0, self.output_bin - mask.size()[2]),
|
86 |
-
mode="replicate",
|
87 |
-
)
|
88 |
-
|
89 |
-
if self.training:
|
90 |
-
aux1 = torch.sigmoid(self.aux1_out(aux1))
|
91 |
-
aux1 = F.pad(
|
92 |
-
input=aux1,
|
93 |
-
pad=(0, 0, 0, self.output_bin - aux1.size()[2]),
|
94 |
-
mode="replicate",
|
95 |
-
)
|
96 |
-
aux2 = torch.sigmoid(self.aux2_out(aux2))
|
97 |
-
aux2 = F.pad(
|
98 |
-
input=aux2,
|
99 |
-
pad=(0, 0, 0, self.output_bin - aux2.size()[2]),
|
100 |
-
mode="replicate",
|
101 |
-
)
|
102 |
-
return mask * mix, aux1 * mix, aux2 * mix
|
103 |
-
else:
|
104 |
-
if aggressiveness:
|
105 |
-
mask[:, :, : aggressiveness["split_bin"]] = torch.pow(
|
106 |
-
mask[:, :, : aggressiveness["split_bin"]],
|
107 |
-
1 + aggressiveness["value"] / 3,
|
108 |
-
)
|
109 |
-
mask[:, :, aggressiveness["split_bin"] :] = torch.pow(
|
110 |
-
mask[:, :, aggressiveness["split_bin"] :],
|
111 |
-
1 + aggressiveness["value"],
|
112 |
-
)
|
113 |
-
|
114 |
-
return mask * mix
|
115 |
-
|
116 |
-
def predict(self, x_mag, aggressiveness=None):
|
117 |
-
h = self.forward(x_mag, aggressiveness)
|
118 |
-
|
119 |
-
if self.offset > 0:
|
120 |
-
h = h[:, :, :, self.offset : -self.offset]
|
121 |
-
assert h.size()[3] > 0
|
122 |
-
|
123 |
-
return h
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/nets_61968KB.py
DELETED
@@ -1,122 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn.functional as F
|
3 |
-
from torch import nn
|
4 |
-
|
5 |
-
from . import layers_123821KB as layers
|
6 |
-
|
7 |
-
|
8 |
-
class BaseASPPNet(nn.Module):
|
9 |
-
def __init__(self, nin, ch, dilations=(4, 8, 16)):
|
10 |
-
super(BaseASPPNet, self).__init__()
|
11 |
-
self.enc1 = layers.Encoder(nin, ch, 3, 2, 1)
|
12 |
-
self.enc2 = layers.Encoder(ch, ch * 2, 3, 2, 1)
|
13 |
-
self.enc3 = layers.Encoder(ch * 2, ch * 4, 3, 2, 1)
|
14 |
-
self.enc4 = layers.Encoder(ch * 4, ch * 8, 3, 2, 1)
|
15 |
-
|
16 |
-
self.aspp = layers.ASPPModule(ch * 8, ch * 16, dilations)
|
17 |
-
|
18 |
-
self.dec4 = layers.Decoder(ch * (8 + 16), ch * 8, 3, 1, 1)
|
19 |
-
self.dec3 = layers.Decoder(ch * (4 + 8), ch * 4, 3, 1, 1)
|
20 |
-
self.dec2 = layers.Decoder(ch * (2 + 4), ch * 2, 3, 1, 1)
|
21 |
-
self.dec1 = layers.Decoder(ch * (1 + 2), ch, 3, 1, 1)
|
22 |
-
|
23 |
-
def __call__(self, x):
|
24 |
-
h, e1 = self.enc1(x)
|
25 |
-
h, e2 = self.enc2(h)
|
26 |
-
h, e3 = self.enc3(h)
|
27 |
-
h, e4 = self.enc4(h)
|
28 |
-
|
29 |
-
h = self.aspp(h)
|
30 |
-
|
31 |
-
h = self.dec4(h, e4)
|
32 |
-
h = self.dec3(h, e3)
|
33 |
-
h = self.dec2(h, e2)
|
34 |
-
h = self.dec1(h, e1)
|
35 |
-
|
36 |
-
return h
|
37 |
-
|
38 |
-
|
39 |
-
class CascadedASPPNet(nn.Module):
|
40 |
-
def __init__(self, n_fft):
|
41 |
-
super(CascadedASPPNet, self).__init__()
|
42 |
-
self.stg1_low_band_net = BaseASPPNet(2, 32)
|
43 |
-
self.stg1_high_band_net = BaseASPPNet(2, 32)
|
44 |
-
|
45 |
-
self.stg2_bridge = layers.Conv2DBNActiv(34, 16, 1, 1, 0)
|
46 |
-
self.stg2_full_band_net = BaseASPPNet(16, 32)
|
47 |
-
|
48 |
-
self.stg3_bridge = layers.Conv2DBNActiv(66, 32, 1, 1, 0)
|
49 |
-
self.stg3_full_band_net = BaseASPPNet(32, 64)
|
50 |
-
|
51 |
-
self.out = nn.Conv2d(64, 2, 1, bias=False)
|
52 |
-
self.aux1_out = nn.Conv2d(32, 2, 1, bias=False)
|
53 |
-
self.aux2_out = nn.Conv2d(32, 2, 1, bias=False)
|
54 |
-
|
55 |
-
self.max_bin = n_fft // 2
|
56 |
-
self.output_bin = n_fft // 2 + 1
|
57 |
-
|
58 |
-
self.offset = 128
|
59 |
-
|
60 |
-
def forward(self, x, aggressiveness=None):
|
61 |
-
mix = x.detach()
|
62 |
-
x = x.clone()
|
63 |
-
|
64 |
-
x = x[:, :, : self.max_bin]
|
65 |
-
|
66 |
-
bandw = x.size()[2] // 2
|
67 |
-
aux1 = torch.cat(
|
68 |
-
[
|
69 |
-
self.stg1_low_band_net(x[:, :, :bandw]),
|
70 |
-
self.stg1_high_band_net(x[:, :, bandw:]),
|
71 |
-
],
|
72 |
-
dim=2,
|
73 |
-
)
|
74 |
-
|
75 |
-
h = torch.cat([x, aux1], dim=1)
|
76 |
-
aux2 = self.stg2_full_band_net(self.stg2_bridge(h))
|
77 |
-
|
78 |
-
h = torch.cat([x, aux1, aux2], dim=1)
|
79 |
-
h = self.stg3_full_band_net(self.stg3_bridge(h))
|
80 |
-
|
81 |
-
mask = torch.sigmoid(self.out(h))
|
82 |
-
mask = F.pad(
|
83 |
-
input=mask,
|
84 |
-
pad=(0, 0, 0, self.output_bin - mask.size()[2]),
|
85 |
-
mode="replicate",
|
86 |
-
)
|
87 |
-
|
88 |
-
if self.training:
|
89 |
-
aux1 = torch.sigmoid(self.aux1_out(aux1))
|
90 |
-
aux1 = F.pad(
|
91 |
-
input=aux1,
|
92 |
-
pad=(0, 0, 0, self.output_bin - aux1.size()[2]),
|
93 |
-
mode="replicate",
|
94 |
-
)
|
95 |
-
aux2 = torch.sigmoid(self.aux2_out(aux2))
|
96 |
-
aux2 = F.pad(
|
97 |
-
input=aux2,
|
98 |
-
pad=(0, 0, 0, self.output_bin - aux2.size()[2]),
|
99 |
-
mode="replicate",
|
100 |
-
)
|
101 |
-
return mask * mix, aux1 * mix, aux2 * mix
|
102 |
-
else:
|
103 |
-
if aggressiveness:
|
104 |
-
mask[:, :, : aggressiveness["split_bin"]] = torch.pow(
|
105 |
-
mask[:, :, : aggressiveness["split_bin"]],
|
106 |
-
1 + aggressiveness["value"] / 3,
|
107 |
-
)
|
108 |
-
mask[:, :, aggressiveness["split_bin"] :] = torch.pow(
|
109 |
-
mask[:, :, aggressiveness["split_bin"] :],
|
110 |
-
1 + aggressiveness["value"],
|
111 |
-
)
|
112 |
-
|
113 |
-
return mask * mix
|
114 |
-
|
115 |
-
def predict(self, x_mag, aggressiveness=None):
|
116 |
-
h = self.forward(x_mag, aggressiveness)
|
117 |
-
|
118 |
-
if self.offset > 0:
|
119 |
-
h = h[:, :, :, self.offset : -self.offset]
|
120 |
-
assert h.size()[3] > 0
|
121 |
-
|
122 |
-
return h
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/lib/uvr5_pack/lib_v5/nets_new.py
DELETED
@@ -1,133 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn.functional as F
|
3 |
-
from torch import nn
|
4 |
-
|
5 |
-
from . import layers_new
|
6 |
-
|
7 |
-
|
8 |
-
class BaseNet(nn.Module):
|
9 |
-
def __init__(
|
10 |
-
self, nin, nout, nin_lstm, nout_lstm, dilations=((4, 2), (8, 4), (12, 6))
|
11 |
-
):
|
12 |
-
super(BaseNet, self).__init__()
|
13 |
-
self.enc1 = layers_new.Conv2DBNActiv(nin, nout, 3, 1, 1)
|
14 |
-
self.enc2 = layers_new.Encoder(nout, nout * 2, 3, 2, 1)
|
15 |
-
self.enc3 = layers_new.Encoder(nout * 2, nout * 4, 3, 2, 1)
|
16 |
-
self.enc4 = layers_new.Encoder(nout * 4, nout * 6, 3, 2, 1)
|
17 |
-
self.enc5 = layers_new.Encoder(nout * 6, nout * 8, 3, 2, 1)
|
18 |
-
|
19 |
-
self.aspp = layers_new.ASPPModule(nout * 8, nout * 8, dilations, dropout=True)
|
20 |
-
|
21 |
-
self.dec4 = layers_new.Decoder(nout * (6 + 8), nout * 6, 3, 1, 1)
|
22 |
-
self.dec3 = layers_new.Decoder(nout * (4 + 6), nout * 4, 3, 1, 1)
|
23 |
-
self.dec2 = layers_new.Decoder(nout * (2 + 4), nout * 2, 3, 1, 1)
|
24 |
-
self.lstm_dec2 = layers_new.LSTMModule(nout * 2, nin_lstm, nout_lstm)
|
25 |
-
self.dec1 = layers_new.Decoder(nout * (1 + 2) + 1, nout * 1, 3, 1, 1)
|
26 |
-
|
27 |
-
def __call__(self, x):
|
28 |
-
e1 = self.enc1(x)
|
29 |
-
e2 = self.enc2(e1)
|
30 |
-
e3 = self.enc3(e2)
|
31 |
-
e4 = self.enc4(e3)
|
32 |
-
e5 = self.enc5(e4)
|
33 |
-
|
34 |
-
h = self.aspp(e5)
|
35 |
-
|
36 |
-
h = self.dec4(h, e4)
|
37 |
-
h = self.dec3(h, e3)
|
38 |
-
h = self.dec2(h, e2)
|
39 |
-
h = torch.cat([h, self.lstm_dec2(h)], dim=1)
|
40 |
-
h = self.dec1(h, e1)
|
41 |
-
|
42 |
-
return h
|
43 |
-
|
44 |
-
|
45 |
-
class CascadedNet(nn.Module):
|
46 |
-
def __init__(self, n_fft, nout=32, nout_lstm=128):
|
47 |
-
super(CascadedNet, self).__init__()
|
48 |
-
|
49 |
-
self.max_bin = n_fft // 2
|
50 |
-
self.output_bin = n_fft // 2 + 1
|
51 |
-
self.nin_lstm = self.max_bin // 2
|
52 |
-
self.offset = 64
|
53 |
-
|
54 |
-
self.stg1_low_band_net = nn.Sequential(
|
55 |
-
BaseNet(2, nout // 2, self.nin_lstm // 2, nout_lstm),
|
56 |
-
layers_new.Conv2DBNActiv(nout // 2, nout // 4, 1, 1, 0),
|
57 |
-
)
|
58 |
-
|
59 |
-
self.stg1_high_band_net = BaseNet(
|
60 |
-
2, nout // 4, self.nin_lstm // 2, nout_lstm // 2
|
61 |
-
)
|
62 |
-
|
63 |
-
self.stg2_low_band_net = nn.Sequential(
|
64 |
-
BaseNet(nout // 4 + 2, nout, self.nin_lstm // 2, nout_lstm),
|
65 |
-
layers_new.Conv2DBNActiv(nout, nout // 2, 1, 1, 0),
|
66 |
-
)
|
67 |
-
self.stg2_high_band_net = BaseNet(
|
68 |
-
nout // 4 + 2, nout // 2, self.nin_lstm // 2, nout_lstm // 2
|
69 |
-
)
|
70 |
-
|
71 |
-
self.stg3_full_band_net = BaseNet(
|
72 |
-
3 * nout // 4 + 2, nout, self.nin_lstm, nout_lstm
|
73 |
-
)
|
74 |
-
|
75 |
-
self.out = nn.Conv2d(nout, 2, 1, bias=False)
|
76 |
-
self.aux_out = nn.Conv2d(3 * nout // 4, 2, 1, bias=False)
|
77 |
-
|
78 |
-
def forward(self, x):
|
79 |
-
x = x[:, :, : self.max_bin]
|
80 |
-
|
81 |
-
bandw = x.size()[2] // 2
|
82 |
-
l1_in = x[:, :, :bandw]
|
83 |
-
h1_in = x[:, :, bandw:]
|
84 |
-
l1 = self.stg1_low_band_net(l1_in)
|
85 |
-
h1 = self.stg1_high_band_net(h1_in)
|
86 |
-
aux1 = torch.cat([l1, h1], dim=2)
|
87 |
-
|
88 |
-
l2_in = torch.cat([l1_in, l1], dim=1)
|
89 |
-
h2_in = torch.cat([h1_in, h1], dim=1)
|
90 |
-
l2 = self.stg2_low_band_net(l2_in)
|
91 |
-
h2 = self.stg2_high_band_net(h2_in)
|
92 |
-
aux2 = torch.cat([l2, h2], dim=2)
|
93 |
-
|
94 |
-
f3_in = torch.cat([x, aux1, aux2], dim=1)
|
95 |
-
f3 = self.stg3_full_band_net(f3_in)
|
96 |
-
|
97 |
-
mask = torch.sigmoid(self.out(f3))
|
98 |
-
mask = F.pad(
|
99 |
-
input=mask,
|
100 |
-
pad=(0, 0, 0, self.output_bin - mask.size()[2]),
|
101 |
-
mode="replicate",
|
102 |
-
)
|
103 |
-
|
104 |
-
if self.training:
|
105 |
-
aux = torch.cat([aux1, aux2], dim=1)
|
106 |
-
aux = torch.sigmoid(self.aux_out(aux))
|
107 |
-
aux = F.pad(
|
108 |
-
input=aux,
|
109 |
-
pad=(0, 0, 0, self.output_bin - aux.size()[2]),
|
110 |
-
mode="replicate",
|
111 |
-
)
|
112 |
-
return mask, aux
|
113 |
-
else:
|
114 |
-
return mask
|
115 |
-
|
116 |
-
def predict_mask(self, x):
|
117 |
-
mask = self.forward(x)
|
118 |
-
|
119 |
-
if self.offset > 0:
|
120 |
-
mask = mask[:, :, :, self.offset : -self.offset]
|
121 |
-
assert mask.size()[3] > 0
|
122 |
-
|
123 |
-
return mask
|
124 |
-
|
125 |
-
def predict(self, x, aggressiveness=None):
|
126 |
-
mask = self.forward(x)
|
127 |
-
pred_mag = x * mask
|
128 |
-
|
129 |
-
if self.offset > 0:
|
130 |
-
pred_mag = pred_mag[:, :, :, self.offset : -self.offset]
|
131 |
-
assert pred_mag.size()[3] > 0
|
132 |
-
|
133 |
-
return pred_mag
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|