Spaces:

insiderakash
/

train_svc2

Runtime error

App Files Files Community

Akash Chavda commited on Sep 15, 2023

Commit

ab2e354

1 Parent(s): 547826f

feat: update modal with new code

Browse files

Files changed (28) hide show

.DS_Store +0 -0
Dockerfile +3 -21
GUI.py +1410 -0
a.png +0 -0
app.py +0 -0
audios/somegirl.mp3 +0 -0
audios/someguy.mp3 +0 -0
audios/unachica.mp3 +0 -0
audios/unchico.mp3 +0 -0
configs/config.py +1 -4
docker-compose.yml +1 -8
docs/en/README.en.md +1 -30
download_files.py +19 -0
gui_v1.py +44 -108
infer-web.py +0 -1
infer/lib/audio.py +12 -23
infer/lib/infer_pack/models.py +1 -4
infer/lib/rmvpe.py +2 -5
infer/modules/ipex/__init__.py +16 -28
infer/modules/ipex/attention.py +33 -94
infer/modules/ipex/gradscaler.py +11 -19
infer/modules/ipex/hijacks.py +92 -253
infer/modules/train/train.py +1 -4
infer/modules/vc/modules.py +9 -10
requirements-dml.txt +2 -0
requirements.txt +8 -2
tools/rvc_for_realtime.py +12 -6
tools/torchgate/utils.py +2 -6

.DS_Store ADDED Viewed

Binary file (10.2 kB). View file

Dockerfile CHANGED Viewed

@@ -1,6 +1,6 @@
 # syntax=docker/dockerfile:1
-FROM nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu20.04
 EXPOSE 7865
@@ -8,27 +8,9 @@ WORKDIR /app
 COPY . .
-# Install dependenceis to add PPAs
-RUN apt-get update && \
-    apt-get install -y -qq ffmpeg aria2 && apt clean && \
-    apt-get install -y software-properties-common && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-# Add the deadsnakes PPA to get Python 3.9
-RUN add-apt-repository ppa:deadsnakes/ppa
-# Install Python 3.9 and pip
-RUN apt-get update && \
-    apt-get install -y build-essential python-dev python3-dev python3.9-distutils python3.9-dev python3.9 curl && \
-    apt-get clean && \
-    update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 && \
-    curl https://bootstrap.pypa.io/get-pip.py | python3.9
-# Set Python 3.9 as the default
-RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1
-RUN python3 -m pip install --no-cache-dir -r requirements.txt
 RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d assets/pretrained_v2/ -o D40k.pth
 RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d assets/pretrained_v2/ -o G40k.pth

 # syntax=docker/dockerfile:1
+FROM python:3.10-bullseye
 EXPOSE 7865
 COPY . .
+RUN apt update && apt install -y -qq ffmpeg aria2 && apt clean
+RUN pip3 install --no-cache-dir -r requirements.txt
 RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d assets/pretrained_v2/ -o D40k.pth
 RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d assets/pretrained_v2/ -o G40k.pth

GUI.py ADDED Viewed

	@@ -0,0 +1,1410 @@

+import os, sys
+import datetime, subprocess
+from mega import Mega
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+import logging
+import shutil
+import threading
+import traceback
+import warnings
+from random import shuffle
+from subprocess import Popen
+from time import sleep
+import json
+import pathlib
+import fairseq
+import faiss
+import gradio as gr
+import numpy as np
+import torch
+from dotenv import load_dotenv
+from sklearn.cluster import MiniBatchKMeans
+from configs.config import Config
+from i18n.i18n import I18nAuto
+from infer.lib.train.process_ckpt import (
+    change_info,
+    extract_small_model,
+    merge,
+    show_info,
+)
+from infer.modules.uvr5.modules import uvr
+from infer.modules.vc.modules import VC
+logging.getLogger("numba").setLevel(logging.WARNING)
+logger = logging.getLogger(__name__)
+tmp = os.path.join(now_dir, "TEMP")
+shutil.rmtree(tmp, ignore_errors=True)
+shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
+shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True)
+os.makedirs(tmp, exist_ok=True)
+os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
+os.makedirs(os.path.join(now_dir, "assets/weights"), exist_ok=True)
+os.environ["TEMP"] = tmp
+warnings.filterwarnings("ignore")
+torch.manual_seed(114514)
+load_dotenv()
+config = Config()
+vc = VC(config)
+if config.dml == True:
+    def forward_dml(ctx, x, scale):
+        ctx.scale = scale
+        res = x.clone().detach()
+        return res
+    fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml
+i18n = I18nAuto()
+logger.info(i18n)
+# 判断是否有能用来训练和加速推理的N卡
+ngpu = torch.cuda.device_count()
+gpu_infos = []
+mem = []
+if_gpu_ok = False
+if torch.cuda.is_available() or ngpu != 0:
+    for i in range(ngpu):
+        gpu_name = torch.cuda.get_device_name(i)
+        if any(
+            value in gpu_name.upper()
+            for value in [
+                "10",
+                "16",
+                "20",
+                "30",
+                "40",
+                "A2",
+                "A3",
+                "A4",
+                "P4",
+                "A50",
+                "500",
+                "A60",
+                "70",
+                "80",
+                "90",
+                "M4",
+                "T4",
+                "TITAN",
+            ]
+        ):
+            # A10#A100#V100#A40#P40#M40#K80#A4500
+            if_gpu_ok = True  # 至少有一张能用的N卡
+            gpu_infos.append("%s\t%s" % (i, gpu_name))
+            mem.append(
+                int(
+                    torch.cuda.get_device_properties(i).total_memory
+                    / 1024
+                    / 1024
+                    / 1024
+                    + 0.4
+                )
+            )
+if if_gpu_ok and len(gpu_infos) > 0:
+    gpu_info = "\n".join(gpu_infos)
+    default_batch_size = min(mem) // 2
+else:
+    gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练")
+    default_batch_size = 1
+gpus = "-".join([i[0] for i in gpu_infos])
+class ToolButton(gr.Button, gr.components.FormComponent):
+    """Small button with single emoji as text, fits inside gradio forms"""
+    def __init__(self, **kwargs):
+        super().__init__(variant="tool", **kwargs)
+    def get_block_name(self):
+        return "button"
+weight_root = os.getenv("weight_root")
+weight_uvr5_root = os.getenv("weight_uvr5_root")
+index_root = os.getenv("index_root")
+names = []
+for name in os.listdir(weight_root):
+    if name.endswith(".pth"):
+        names.append(name)
+index_paths = []
+for root, dirs, files in os.walk(index_root, topdown=False):
+    for name in files:
+        if name.endswith(".index") and "trained" not in name:
+            index_paths.append("%s/%s" % (root, name))
+uvr5_names = []
+for name in os.listdir(weight_uvr5_root):
+    if name.endswith(".pth") or "onnx" in name:
+        uvr5_names.append(name.replace(".pth", ""))
+def change_choices():
+    names = []
+    for name in os.listdir(weight_root):
+        if name.endswith(".pth"):
+            names.append(name)
+    index_paths = []
+    for root, dirs, files in os.walk(index_root, topdown=False):
+        for name in files:
+            if name.endswith(".index") and "trained" not in name:
+                index_paths.append("%s/%s" % (root, name))
+    audio_files=[]
+    for filename in os.listdir("./audios"):
+        if filename.endswith(('.wav','.mp3','.ogg')):
+            audio_files.append('./audios/'+filename)
+    return {"choices": sorted(names), "__type__": "update"}, {
+        "choices": sorted(index_paths),
+        "__type__": "update",
+    }, {"choices": sorted(audio_files), "__type__": "update"}
+def clean():
+    return {"value": "", "__type__": "update"}
+def export_onnx():
+    from infer.modules.onnx.export import export_onnx as eo
+    eo()
+sr_dict = {
+    "32k": 32000,
+    "40k": 40000,
+    "48k": 48000,
+}
+def if_done(done, p):
+    while 1:
+        if p.poll() is None:
+            sleep(0.5)
+        else:
+            break
+    done[0] = True
+def if_done_multi(done, ps):
+    while 1:
+        # poll==None代表进程未结束
+        # 只要有一个进程未结束都不停
+        flag = 1
+        for p in ps:
+            if p.poll() is None:
+                flag = 0
+                sleep(0.5)
+                break
+        if flag == 1:
+            break
+    done[0] = True
+def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
+    sr = sr_dict[sr]
+    os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
+    f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w")
+    f.close()
+    per = 3.0 if config.is_half else 3.7
+    cmd = '"%s" infer/modules/train/preprocess.py "%s" %s %s "%s/logs/%s" %s %.1f' % (
+        config.python_cmd,
+        trainset_dir,
+        sr,
+        n_p,
+        now_dir,
+        exp_dir,
+        config.noparallel,
+        per,
+    )
+    logger.info(cmd)
+    p = Popen(cmd, shell=True)  # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir
+    ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
+    done = [False]
+    threading.Thread(
+        target=if_done,
+        args=(
+            done,
+            p,
+        ),
+    ).start()
+    while 1:
+        with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
+            yield (f.read())
+        sleep(1)
+        if done[0]:
+            break
+    with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
+        log = f.read()
+    logger.info(log)
+    yield log
+# but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2])
+def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvpe):
+    gpus = gpus.split("-")
+    os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
+    f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w")
+    f.close()
+    if if_f0:
+        if f0method != "rmvpe_gpu":
+            cmd = (
+                '"%s" infer/modules/train/extract/extract_f0_print.py "%s/logs/%s" %s %s'
+                % (
+                    config.python_cmd,
+                    now_dir,
+                    exp_dir,
+                    n_p,
+                    f0method,
+                )
+            )
+            logger.info(cmd)
+            p = Popen(
+                cmd, shell=True, cwd=now_dir
+            )  # , stdin=PIPE, stdout=PIPE,stderr=PIPE
+            ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
+            done = [False]
+            threading.Thread(
+                target=if_done,
+                args=(
+                    done,
+                    p,
+                ),
+            ).start()
+        else:
+            if gpus_rmvpe != "-":
+                gpus_rmvpe = gpus_rmvpe.split("-")
+                leng = len(gpus_rmvpe)
+                ps = []
+                for idx, n_g in enumerate(gpus_rmvpe):
+                    cmd = (
+                        '"%s" infer/modules/train/extract/extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s '
+                        % (
+                            config.python_cmd,
+                            leng,
+                            idx,
+                            n_g,
+                            now_dir,
+                            exp_dir,
+                            config.is_half,
+                        )
+                    )
+                    logger.info(cmd)
+                    p = Popen(
+                        cmd, shell=True, cwd=now_dir
+                    )  # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
+                    ps.append(p)
+                ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
+                done = [False]
+                threading.Thread(
+                    target=if_done_multi,  #
+                    args=(
+                        done,
+                        ps,
+                    ),
+                ).start()
+            else:
+                cmd = (
+                    config.python_cmd
+                    + ' infer/modules/train/extract/extract_f0_rmvpe_dml.py "%s/logs/%s" '
+                    % (
+                        now_dir,
+                        exp_dir,
+                    )
+                )
+                logger.info(cmd)
+                p = Popen(
+                    cmd, shell=True, cwd=now_dir
+                )  # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
+                p.wait()
+                done = [True]
+        while 1:
+            with open(
+                "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r"
+            ) as f:
+                yield (f.read())
+            sleep(1)
+            if done[0]:
+                break
+        with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
+            log = f.read()
+        logger.info(log)
+        yield log
+    ####对不同part分别开多进程
+    """
+    n_part=int(sys.argv[1])
+    i_part=int(sys.argv[2])
+    i_gpu=sys.argv[3]
+    exp_dir=sys.argv[4]
+    os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
+    """
+    leng = len(gpus)
+    ps = []
+    for idx, n_g in enumerate(gpus):
+        cmd = (
+            '"%s" infer/modules/train/extract_feature_print.py %s %s %s %s "%s/logs/%s" %s'
+            % (
+                config.python_cmd,
+                config.device,
+                leng,
+                idx,
+                n_g,
+                now_dir,
+                exp_dir,
+                version19,
+            )
+        )
+        logger.info(cmd)
+        p = Popen(
+            cmd, shell=True, cwd=now_dir
+        )  # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
+        ps.append(p)
+    ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
+    done = [False]
+    threading.Thread(
+        target=if_done_multi,
+        args=(
+            done,
+            ps,
+        ),
+    ).start()
+    while 1:
+        with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
+            yield (f.read())
+        sleep(1)
+        if done[0]:
+            break
+    with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
+        log = f.read()
+    logger.info(log)
+    yield log
+def get_pretrained_models(path_str, f0_str, sr2):
+    if_pretrained_generator_exist = os.access(
+        "assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK
+    )
+    if_pretrained_discriminator_exist = os.access(
+        "assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK
+    )
+    if not if_pretrained_generator_exist:
+        logger.warn(
+            "assets/pretrained%s/%sG%s.pth not exist, will not use pretrained model",
+            path_str,
+            f0_str,
+            sr2,
+        )
+    if not if_pretrained_discriminator_exist:
+        logger.warn(
+            "assets/pretrained%s/%sD%s.pth not exist, will not use pretrained model",
+            path_str,
+            f0_str,
+            sr2,
+        )
+    return (
+        "assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
+        if if_pretrained_generator_exist
+        else "",
+        "assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
+        if if_pretrained_discriminator_exist
+        else "",
+    )
+def change_sr2(sr2, if_f0_3, version19):
+    path_str = "" if version19 == "v1" else "_v2"
+    f0_str = "f0" if if_f0_3 else ""
+    return get_pretrained_models(path_str, f0_str, sr2)
+def change_version19(sr2, if_f0_3, version19):
+    path_str = "" if version19 == "v1" else "_v2"
+    if sr2 == "32k" and version19 == "v1":
+        sr2 = "40k"
+    to_return_sr2 = (
+        {"choices": ["40k", "48k"], "__type__": "update", "value": sr2}
+        if version19 == "v1"
+        else {"choices": ["40k", "48k", "32k"], "__type__": "update", "value": sr2}
+    )
+    f0_str = "f0" if if_f0_3 else ""
+    return (
+        *get_pretrained_models(path_str, f0_str, sr2),
+        to_return_sr2,
+    )
+def change_f0(if_f0_3, sr2, version19):  # f0method8,pretrained_G14,pretrained_D15
+    path_str = "" if version19 == "v1" else "_v2"
+    return (
+        {"visible": if_f0_3, "__type__": "update"},
+        *get_pretrained_models(path_str, "f0", sr2),
+    )
+# but3.click(click_train,[exp_dir1,sr2,if_f0_3,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16])
+def click_train(
+    exp_dir1,
+    sr2,
+    if_f0_3,
+    spk_id5,
+    save_epoch10,
+    total_epoch11,
+    batch_size12,
+    if_save_latest13,
+    pretrained_G14,
+    pretrained_D15,
+    gpus16,
+    if_cache_gpu17,
+    if_save_every_weights18,
+    version19,
+):
+    # 生成filelist
+    exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
+    os.makedirs(exp_dir, exist_ok=True)
+    gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir)
+    feature_dir = (
+        "%s/3_feature256" % (exp_dir)
+        if version19 == "v1"
+        else "%s/3_feature768" % (exp_dir)
+    )
+    if if_f0_3:
+        f0_dir = "%s/2a_f0" % (exp_dir)
+        f0nsf_dir = "%s/2b-f0nsf" % (exp_dir)
+        names = (
+            set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
+            & set([name.split(".")[0] for name in os.listdir(feature_dir)])
+            & set([name.split(".")[0] for name in os.listdir(f0_dir)])
+            & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
+        )
+    else:
+        names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
+            [name.split(".")[0] for name in os.listdir(feature_dir)]
+        )
+    opt = []
+    for name in names:
+        if if_f0_3:
+            opt.append(
+                "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s"
+                % (
+                    gt_wavs_dir.replace("\\", "\\\\"),
+                    name,
+                    feature_dir.replace("\\", "\\\\"),
+                    name,
+                    f0_dir.replace("\\", "\\\\"),
+                    name,
+                    f0nsf_dir.replace("\\", "\\\\"),
+                    name,
+                    spk_id5,
+                )
+            )
+        else:
+            opt.append(
+                "%s/%s.wav|%s/%s.npy|%s"
+                % (
+                    gt_wavs_dir.replace("\\", "\\\\"),
+                    name,
+                    feature_dir.replace("\\", "\\\\"),
+                    name,
+                    spk_id5,
+                )
+            )
+    fea_dim = 256 if version19 == "v1" else 768
+    if if_f0_3:
+        for _ in range(2):
+            opt.append(
+                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
+                % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
+            )
+    else:
+        for _ in range(2):
+            opt.append(
+                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
+                % (now_dir, sr2, now_dir, fea_dim, spk_id5)
+            )
+    shuffle(opt)
+    with open("%s/filelist.txt" % exp_dir, "w") as f:
+        f.write("\n".join(opt))
+    logger.debug("Write filelist done")
+    # 生成config#无需生成config
+    # cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0"
+    logger.info("Use gpus: %s", str(gpus16))
+    if pretrained_G14 == "":
+        logger.info("No pretrained Generator")
+    if pretrained_D15 == "":
+        logger.info("No pretrained Discriminator")
+    if version19 == "v1" or sr2 == "40k":
+        config_path = "v1/%s.json" % sr2
+    else:
+        config_path = "v2/%s.json" % sr2
+    config_save_path = os.path.join(exp_dir, "config.json")
+    if not pathlib.Path(config_save_path).exists():
+        with open(config_save_path, "w", encoding="utf-8") as f:
+            json.dump(
+                config.json_config[config_path],
+                f,
+                ensure_ascii=False,
+                indent=4,
+                sort_keys=True,
+            )
+            f.write("\n")
+    if gpus16:
+        cmd = (
+            '"%s" infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s'
+            % (
+                config.python_cmd,
+                exp_dir1,
+                sr2,
+                1 if if_f0_3 else 0,
+                batch_size12,
+                gpus16,
+                total_epoch11,
+                save_epoch10,
+                "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
+                "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
+                1 if if_save_latest13 == i18n("是") else 0,
+                1 if if_cache_gpu17 == i18n("是") else 0,
+                1 if if_save_every_weights18 == i18n("是") else 0,
+                version19,
+            )
+        )
+    else:
+        cmd = (
+            '"%s" infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s'
+            % (
+                config.python_cmd,
+                exp_dir1,
+                sr2,
+                1 if if_f0_3 else 0,
+                batch_size12,
+                total_epoch11,
+                save_epoch10,
+                "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
+                "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
+                1 if if_save_latest13 == i18n("是") else 0,
+                1 if if_cache_gpu17 == i18n("是") else 0,
+                1 if if_save_every_weights18 == i18n("是") else 0,
+                version19,
+            )
+        )
+    logger.info(cmd)
+    p = Popen(cmd, shell=True, cwd=now_dir)
+    p.wait()
+    return "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"
+# but4.click(train_index, [exp_dir1], info3)
+def train_index(exp_dir1, version19):
+    # exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
+    exp_dir = "logs/%s" % (exp_dir1)
+    os.makedirs(exp_dir, exist_ok=True)
+    feature_dir = (
+        "%s/3_feature256" % (exp_dir)
+        if version19 == "v1"
+        else "%s/3_feature768" % (exp_dir)
+    )
+    if not os.path.exists(feature_dir):
+        return "请先进行特征提取!"
+    listdir_res = list(os.listdir(feature_dir))
+    if len(listdir_res) == 0:
+        return "请先进行特征提取！"
+    infos = []
+    npys = []
+    for name in sorted(listdir_res):
+        phone = np.load("%s/%s" % (feature_dir, name))
+        npys.append(phone)
+    big_npy = np.concatenate(npys, 0)
+    big_npy_idx = np.arange(big_npy.shape[0])
+    np.random.shuffle(big_npy_idx)
+    big_npy = big_npy[big_npy_idx]
+    if big_npy.shape[0] > 2e5:
+        infos.append("Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0])
+        yield "\n".join(infos)
+        try:
+            big_npy = (
+                MiniBatchKMeans(
+                    n_clusters=10000,
+                    verbose=True,
+                    batch_size=256 * config.n_cpu,
+                    compute_labels=False,
+                    init="random",
+                )
+                .fit(big_npy)
+                .cluster_centers_
+            )
+        except:
+            info = traceback.format_exc()
+            logger.info(info)
+            infos.append(info)
+            yield "\n".join(infos)
+    np.save("%s/total_fea.npy" % exp_dir, big_npy)
+    n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
+    infos.append("%s,%s" % (big_npy.shape, n_ivf))
+    yield "\n".join(infos)
+    index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
+    # index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf)
+    infos.append("training")
+    yield "\n".join(infos)
+    index_ivf = faiss.extract_index_ivf(index)  #
+    index_ivf.nprobe = 1
+    index.train(big_npy)
+    faiss.write_index(
+        index,
+        "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
+    )
+    infos.append("adding")
+    yield "\n".join(infos)
+    batch_size_add = 8192
+    for i in range(0, big_npy.shape[0], batch_size_add):
+        index.add(big_npy[i : i + batch_size_add])
+    faiss.write_index(
+        index,
+        "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
+    )
+    infos.append(
+        "成功构建索引，added_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (n_ivf, index_ivf.nprobe, exp_dir1, version19)
+    )
+    # faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
+    # infos.append("成功构建索引，added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19))
+    yield "\n".join(infos)
+# but5.click(train1key, [exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0method8, save_epoch10, total_epoch11, batch_size12, if_save_latest13, pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17], info3)
+def train1key(
+    exp_dir1,
+    sr2,
+    if_f0_3,
+    trainset_dir4,
+    spk_id5,
+    np7,
+    f0method8,
+    save_epoch10,
+    total_epoch11,
+    batch_size12,
+    if_save_latest13,
+    pretrained_G14,
+    pretrained_D15,
+    gpus16,
+    if_cache_gpu17,
+    if_save_every_weights18,
+    version19,
+    gpus_rmvpe,
+):
+    infos = []
+    def get_info_str(strr):
+        infos.append(strr)
+        return "\n".join(infos)
+    ####### step1:处理数据
+    yield get_info_str(i18n("step1:正在处理数据"))
+    [get_info_str(_) for _ in preprocess_dataset(trainset_dir4, exp_dir1, sr2, np7)]
+    ####### step2a:提取音高
+    yield get_info_str(i18n("step2:正在提取音高&正在提取特征"))
+    [
+        get_info_str(_)
+        for _ in extract_f0_feature(
+            gpus16, np7, f0method8, if_f0_3, exp_dir1, version19, gpus_rmvpe
+        )
+    ]
+    ####### step3a:训练模型
+    yield get_info_str(i18n("step3a:正在训练模型"))
+    click_train(
+        exp_dir1,
+        sr2,
+        if_f0_3,
+        spk_id5,
+        save_epoch10,
+        total_epoch11,
+        batch_size12,
+        if_save_latest13,
+        pretrained_G14,
+        pretrained_D15,
+        gpus16,
+        if_cache_gpu17,
+        if_save_every_weights18,
+        version19,
+    )
+    yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"))
+    ####### step3b:训练索引
+    [get_info_str(_) for _ in train_index(exp_dir1, version19)]
+    yield get_info_str(i18n("全流程结束！"))
+#                    ckpt_path2.change(change_info_,[ckpt_path2],[sr__,if_f0__])
+def change_info_(ckpt_path):
+    if not os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log")):
+        return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
+    try:
+        with open(
+            ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r"
+        ) as f:
+            info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1])
+            sr, f0 = info["sample_rate"], info["if_f0"]
+            version = "v2" if ("version" in info and info["version"] == "v2") else "v1"
+            return sr, str(f0), version
+    except:
+        traceback.print_exc()
+        return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
+F0GPUVisible = config.dml == False
+def change_f0_method(f0method8):
+    if f0method8 == "rmvpe_gpu":
+        visible = F0GPUVisible
+    else:
+        visible = False
+    return {"visible": visible, "__type__": "update"}
+def find_model():
+    if len(names) > 0:
+        vc.get_vc(sorted(names)[0],None,None)
+        return sorted(names)[0]
+    else:
+        try:
+            gr.Info("Do not forget to choose a model.")
+        except:
+            pass
+        return ''
+def find_audios(index=False):
+    audio_files=[]
+    if not os.path.exists('./audios'): os.mkdir("./audios")
+    for filename in os.listdir("./audios"):
+        if filename.endswith(('.wav','.mp3','.ogg')):
+            audio_files.append("./audios/"+filename)
+    if index:
+        if len(audio_files) > 0: return sorted(audio_files)[0]
+        else: return ""
+    elif len(audio_files) > 0: return sorted(audio_files)
+    else: return []
+def get_index():
+    if find_model() != '':
+        chosen_model=sorted(names)[0].split(".")[0]
+        logs_path="./logs/"+chosen_model
+        if os.path.exists(logs_path):
+            for file in os.listdir(logs_path):
+                if file.endswith(".index"):
+                    return os.path.join(logs_path, file)
+            return ''
+        else:
+            return ''
+def get_indexes():
+    indexes_list=[]
+    for dirpath, dirnames, filenames in os.walk("./logs/"):
+        for filename in filenames:
+            if filename.endswith(".index"):
+                indexes_list.append(os.path.join(dirpath,filename))
+    if len(indexes_list) > 0:
+        return indexes_list
+    else:
+        return ''
+def save_wav(file):
+    try:
+        file_path=file.name
+        shutil.move(file_path,'./audios')
+        return './audios/'+os.path.basename(file_path)
+    except AttributeError:
+        try:
+            new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+'.wav'
+            new_path='./audios/'+new_name
+            shutil.move(file,new_path)
+            return new_path
+        except TypeError:
+            return None
+def download_from_url(url, model):
+    if url == '':
+        return "URL cannot be left empty."
+    if model =='':
+        return "You need to name your model. For example: My-Model"
+    url = url.strip()
+    zip_dirs = ["zips", "unzips"]
+    for directory in zip_dirs:
+        if os.path.exists(directory):
+            shutil.rmtree(directory)
+    os.makedirs("zips", exist_ok=True)
+    os.makedirs("unzips", exist_ok=True)
+    zipfile = model + '.zip'
+    zipfile_path = './zips/' + zipfile
+    try:
+        if "drive.google.com" in url:
+            subprocess.run(["gdown", url, "--fuzzy", "-O", zipfile_path])
+        elif "mega.nz" in url:
+            m = Mega()
+            m.download_url(url, './zips')
+        else:
+            subprocess.run(["wget", url, "-O", zipfile_path])
+        for filename in os.listdir("./zips"):
+            if filename.endswith(".zip"):
+                zipfile_path = os.path.join("./zips/",filename)
+                shutil.unpack_archive(zipfile_path, "./unzips", 'zip')
+            else:
+                return "No zipfile found."
+        for root, dirs, files in os.walk('./unzips'):
+            for file in files:
+                file_path = os.path.join(root, file)
+                if file.endswith(".index"):
+                    os.mkdir(f'./logs/{model}')
+                    shutil.copy2(file_path,f'./logs/{model}')
+                elif "G_" not in file and "D_" not in file and file.endswith(".pth"):
+                    shutil.copy(file_path,f'./assets/weights/{model}.pth')
+        shutil.rmtree("zips")
+        shutil.rmtree("unzips")
+        return "Success."
+    except:
+        return "There's been an error."
+def upload_to_dataset(files, dir):
+    if dir == '':
+        dir = './dataset/'+datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    if not os.path.exists(dir):
+        os.makedirs(dir)
+    for file in files:
+        path=file.name
+        shutil.copy2(path,dir)
+    try:
+        gr.Info(i18n("处理数据"))
+    except:
+        pass
+    return i18n("处理数据"), {"value":dir,"__type__":"update"}
+with gr.Blocks(title="EasyGUI v2.9",theme=gr.themes.Base()) as app:
+    gr.HTML("<h1> EasyGUI v2.9 </h1>")
+    with gr.Tabs():
+        with gr.TabItem(i18n("模型推理")):
+            with gr.Row():
+                sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names), value=find_model())
+                refresh_button = gr.Button(i18n("刷新音色列表和索引路径"), variant="primary")
+                #clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary")
+                spk_item = gr.Slider(
+                    minimum=0,
+                    maximum=2333,
+                    step=1,
+                    label=i18n("请选择说话人id"),
+                    value=0,
+                    visible=False,
+                    interactive=True,
+                )
+                #clean_button.click(
+                #    fn=clean, inputs=[], outputs=[sid0], api_name="infer_clean"
+                #)
+                vc_transform0 = gr.Number(
+                    label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
+                )
+                but0 = gr.Button(i18n("转换"), variant="primary")
+            with gr.Row():
+                with gr.Column():
+                    with gr.Row():
+                        dropbox = gr.File(label="Drop your audio here & hit the Reload button.")
+                    with gr.Row():
+                        record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath")
+                    with gr.Row():
+                        input_audio0 = gr.Dropdown(
+                            label=i18n("输入待处理音频文件路径(默认是正确格式示例)"),
+                            value=find_audios(True),
+                            choices=find_audios()
+                        )
+                        record_button.change(fn=save_wav, inputs=[record_button], outputs=[input_audio0])
+                        dropbox.upload(fn=save_wav, inputs=[dropbox], outputs=[input_audio0])
+                with gr.Column():
+                    with gr.Accordion(label=i18n("自动检测index路径,下拉式选择(dropdown)"), open=False):
+                        file_index2 = gr.Dropdown(
+                            label=i18n("自动检测index路径,下拉式选择(dropdown)"),
+                            choices=get_indexes(),
+                            interactive=True,
+                            value=get_index()
+                        )
+                        index_rate1 = gr.Slider(
+                            minimum=0,
+                            maximum=1,
+                            label=i18n("检索特征占比"),
+                            value=0.66,
+                            interactive=True,
+                        )
+                    vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)"))
+                    with gr.Accordion(label=i18n("常规设置"), open=False):
+                        f0method0 = gr.Radio(
+                            label=i18n(
+                                "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU"
+                            ),
+                            choices=["pm", "harvest", "crepe", "rmvpe"]
+                            if config.dml == False
+                            else ["pm", "harvest", "rmvpe"],
+                            value="rmvpe",
+                            interactive=True,
+                        )
+                        filter_radius0 = gr.Slider(
+                            minimum=0,
+                            maximum=7,
+                            label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波，数值为滤波半径，使用可以削弱哑音"),
+                            value=3,
+                            step=1,
+                            interactive=True,
+                        )
+                        resample_sr0 = gr.Slider(
+                            minimum=0,
+                            maximum=48000,
+                            label=i18n("后处理重采样至最终采样率，0为不进行重采样"),
+                            value=0,
+                            step=1,
+                            interactive=True,
+                        )
+                        rms_mix_rate0 = gr.Slider(
+                            minimum=0,
+                            maximum=1,
+                            label=i18n("输入源音量包络替换输出音量包络融合比例，越靠近1越使用输出包络"),
+                            value=0.21,
+                            interactive=True,
+                        )
+                        protect0 = gr.Slider(
+                            minimum=0,
+                            maximum=0.5,
+                            label=i18n(
+                                "保护清辅音和呼吸声，防止电音撕裂等artifact，拉满0.5不开启，调低加大保护力度但可能降低索引��果"
+                            ),
+                            value=0.33,
+                            step=0.01,
+                            interactive=True,
+                        )
+                    file_index1 = gr.Textbox(
+                        label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
+                        value="",
+                        interactive=True,
+                        visible=False
+                    )
+                    refresh_button.click(
+                        fn=change_choices,
+                        inputs=[],
+                        outputs=[sid0, file_index2, input_audio0],
+                        api_name="infer_refresh",
+                    )
+                    # file_big_npy1 = gr.Textbox(
+                    #     label=i18n("特征文件路径"),
+                    #     value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
+                    #     interactive=True,
+                    # )
+            with gr.Row():
+                f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"), visible=False)
+            with gr.Row():
+                vc_output1 = gr.Textbox(label=i18n("输出信息"))
+                but0.click(
+                    vc.vc_single,
+                    [
+                        spk_item,
+                        input_audio0,
+                        vc_transform0,
+                        f0_file,
+                        f0method0,
+                        file_index1,
+                        file_index2,
+                        # file_big_npy1,
+                        index_rate1,
+                        filter_radius0,
+                        resample_sr0,
+                        rms_mix_rate0,
+                        protect0,
+                    ],
+                    [vc_output1, vc_output2],
+                    api_name="infer_convert",
+                )
+            with gr.Row():
+                with gr.Accordion(open=False, label=i18n("批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. ")):
+                    with gr.Column():
+                        vc_transform1 = gr.Number(
+                            label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
+                        )
+                        opt_input = gr.Textbox(label=i18n("指定输出文件夹"), value="opt")
+                        f0method1 = gr.Radio(
+                            label=i18n(
+                                "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU"
+                            ),
+                            choices=["pm", "harvest", "crepe", "rmvpe"]
+                            if config.dml == False
+                            else ["pm", "harvest", "rmvpe"],
+                            value="pm",
+                            interactive=True,
+                        )
+                        filter_radius1 = gr.Slider(
+                            minimum=0,
+                            maximum=7,
+                            label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波，数值为滤波半径，使用可以削弱哑音"),
+                            value=3,
+                            step=1,
+                            interactive=True,
+                        )
+                    with gr.Column():
+                        file_index3 = gr.Textbox(
+                            label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
+                            value="",
+                            interactive=True,
+                            visible=False
+                        )
+                        file_index4 = gr.Dropdown(
+                            label=i18n("自动检测index路径,下拉式选择(dropdown)"),
+                            choices=sorted(index_paths),
+                            interactive=True,
+                        )
+                        refresh_button.click(
+                            fn=lambda: change_choices()[1],
+                            inputs=[],
+                            outputs=file_index4,
+                            api_name="infer_refresh_batch",
+                        )
+                        # file_big_npy2 = gr.Textbox(
+                        #     label=i18n("特征文件路径"),
+                        #     value="E:\\codes\\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
+                        #     interactive=True,
+                        # )
+                        index_rate2 = gr.Slider(
+                            minimum=0,
+                            maximum=1,
+                            label=i18n("检索特��占比"),
+                            value=1,
+                            interactive=True,
+                        )
+                    with gr.Column():
+                        resample_sr1 = gr.Slider(
+                            minimum=0,
+                            maximum=48000,
+                            label=i18n("后处理重采样至最终采样率，0为不进行重采样"),
+                            value=0,
+                            step=1,
+                            interactive=True,
+                        )
+                        rms_mix_rate1 = gr.Slider(
+                            minimum=0,
+                            maximum=1,
+                            label=i18n("输入源音量包络替换输出音量包络融合比例，越靠近1越使用输出包络"),
+                            value=1,
+                            interactive=True,
+                        )
+                        protect1 = gr.Slider(
+                            minimum=0,
+                            maximum=0.5,
+                            label=i18n(
+                                "保护清辅音和呼吸声，防止电音撕裂等artifact，拉满0.5不开启，调低加大保护力度但可能降低索引效果"
+                            ),
+                            value=0.33,
+                            step=0.01,
+                            interactive=True,
+                        )
+                    with gr.Column():
+                        dir_input = gr.Textbox(
+                            label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"),
+                            value="E:\codes\py39\\test-20230416b\\todo-songs",
+                        )
+                        inputs = gr.File(
+                            file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
+                        )
+                    with gr.Row():
+                        format1 = gr.Radio(
+                            label=i18n("导出文件格式"),
+                            choices=["wav", "flac", "mp3", "m4a"],
+                            value="flac",
+                            interactive=True,
+                        )
+                        but1 = gr.Button(i18n("转换"), variant="primary")
+                        vc_output3 = gr.Textbox(label=i18n("输出信息"))
+                        but1.click(
+                            vc.vc_multi,
+                            [
+                                spk_item,
+                                dir_input,
+                                opt_input,
+                                inputs,
+                                vc_transform1,
+                                f0method1,
+                                file_index3,
+                                file_index4,
+                                # file_big_npy2,
+                                index_rate2,
+                                filter_radius1,
+                                resample_sr1,
+                                rms_mix_rate1,
+                                protect1,
+                                format1,
+                            ],
+                            [vc_output3],
+                            api_name="infer_convert_batch",
+                        )
+            sid0.change(
+                fn=vc.get_vc,
+                inputs=[sid0, protect0, protect1],
+                outputs=[spk_item, protect0, protect1, file_index2, file_index4],
+            )
+        with gr.TabItem("Download Model"):
+            with gr.Row():
+                url=gr.Textbox(label="Enter the URL to the Model:")
+            with gr.Row():
+                model = gr.Textbox(label="Name your model:")
+                download_button=gr.Button("Download")
+            with gr.Row():
+                status_bar=gr.Textbox(label="")
+                download_button.click(fn=download_from_url, inputs=[url, model], outputs=[status_bar])
+            with gr.Row():
+                gr.Markdown(
+                """
+                ❤️ If you like the EasyGUI, help me keep it.❤️
+                https://paypal.me/lesantillan
+                """
+                )
+        with gr.TabItem(i18n("训练")):
+            with gr.Row():
+                with gr.Column():
+                    exp_dir1 = gr.Textbox(label=i18n("输入实验名"), value="My-Voice")
+                    np7 = gr.Slider(
+                        minimum=0,
+                        maximum=config.n_cpu,
+                        step=1,
+                        label=i18n("提取音高和处理数据使用的CPU进程数"),
+                        value=int(np.ceil(config.n_cpu / 1.5)),
+                        interactive=True,
+                    )
+                    sr2 = gr.Radio(
+                        label=i18n("目标采样率"),
+                        choices=["40k", "48k"],
+                        value="40k",
+                        interactive=True,
+                        visible=False
+                    )
+                    if_f0_3 = gr.Radio(
+                        label=i18n("模型是否带音高指导(唱歌一定要, 语音可以不要)"),
+                        choices=[True, False],
+                        value=True,
+                        interactive=True,
+                        visible=False
+                    )
+                    version19 = gr.Radio(
+                        label=i18n("版本"),
+                        choices=["v1", "v2"],
+                        value="v2",
+                        interactive=True,
+                        visible=False,
+                    )
+                    trainset_dir4 = gr.Textbox(
+                        label=i18n("输入训练文件夹路径"), value='./dataset/'+datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+                    )
+                    easy_uploader = gr.Files(label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹"),file_types=['audio'])
+                    but1 = gr.Button(label=i18n("处理数据"), variant="primary")
+                    info1 = gr.Textbox(label=i18n("输出信息"), value="")
+                    easy_uploader.upload(fn=upload_to_dataset, inputs=[easy_uploader, trainset_dir4], outputs=[info1, trainset_dir4])
+                    gpus6 = gr.Textbox(
+                        label=i18n("以-分隔输入使用的卡号, 例如   0-1-2   使用卡0和卡1和卡2"),
+                        value=gpus,
+                        interactive=True,
+                        visible=F0GPUVisible,
+                    )
+                    gpu_info9 = gr.Textbox(
+                        label=i18n("显卡信息"), value=gpu_info, visible=F0GPUVisible
+                    )
+                    spk_id5 = gr.Slider(
+                        minimum=0,
+                        maximum=4,
+                        step=1,
+                        label=i18n("请指定说话人id"),
+                        value=0,
+                        interactive=True,
+                        visible=False
+                    )
+                    but1.click(
+                        preprocess_dataset,
+                        [trainset_dir4, exp_dir1, sr2, np7],
+                        [info1],
+                        api_name="train_preprocess",
+                    )
+                with gr.Column():
+                    f0method8 = gr.Radio(
+                        label=i18n(
+                            "选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢,rmvpe效果最好且微吃CPU/GPU"
+                        ),
+                        choices=["pm", "harvest", "dio", "rmvpe", "rmvpe_gpu"],
+                        value="rmvpe_gpu",
+                        interactive=True,
+                    )
+                    gpus_rmvpe = gr.Textbox(
+                        label=i18n(
+                            "rmvpe卡号配置：以-分隔输入使用的不同进程卡号,例如0-0-1使用在卡0上跑2个进程并在卡1上跑1个进程"
+                        ),
+                        value="%s-%s" % (gpus, gpus),
+                        interactive=True,
+                        visible=F0GPUVisible,
+                    )
+                    but2 = gr.Button(i18n("特征提取"), variant="primary")
+                    info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
+                    f0method8.change(
+                        fn=change_f0_method,
+                        inputs=[f0method8],
+                        outputs=[gpus_rmvpe],
+                    )
+                    but2.click(
+                        extract_f0_feature,
+                        [
+                            gpus6,
+                            np7,
+                            f0method8,
+                            if_f0_3,
+                            exp_dir1,
+                            version19,
+                            gpus_rmvpe,
+                        ],
+                        [info2],
+                        api_name="train_extract_f0_feature",
+                    )
+                with gr.Column():
+                    total_epoch11 = gr.Slider(
+                        minimum=2,
+                        maximum=1000,
+                        step=1,
+                        label=i18n("总训练轮数total_epoch"),
+                        value=150,
+                        interactive=True,
+                    )
+                    gpus16 = gr.Textbox(
+                            label=i18n("以-分隔输入使用的卡号, 例如   0-1-2   使用卡0和卡1和卡2"),
+                            value="0",
+                            interactive=True,
+                            visible=True
+                        )
+                    but3 = gr.Button(i18n("训练模型"), variant="primary")
+                    but4 = gr.Button(i18n("训练特征索引"), variant="primary")
+                    info3 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=10)
+                    with gr.Accordion(label=i18n("常规设置"), open=False):
+                        save_epoch10 = gr.Slider(
+                            minimum=1,
+                            maximum=50,
+                            step=1,
+                            label=i18n("保存频率save_every_epoch"),
+                            value=25,
+                            interactive=True,
+                        )
+                        batch_size12 = gr.Slider(
+                            minimum=1,
+                            maximum=40,
+                            step=1,
+                            label=i18n("每张显卡的batch_size"),
+                            value=default_batch_size,
+                            interactive=True,
+                        )
+                        if_save_latest13 = gr.Radio(
+                            label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"),
+                            choices=[i18n("是"), i18n("否")],
+                            value=i18n("是"),
+                            interactive=True,
+                        )
+                        if_cache_gpu17 = gr.Radio(
+                            label=i18n(
+                                "是否缓存所有训练集至显存. 10min以下小数据可缓存以加速训练, 大数据缓存会炸显存也加不了多少速"
+                            ),
+                            choices=[i18n("是"), i18n("否")],
+                            value=i18n("否"),
+                            interactive=True,
+                        )
+                        if_save_every_weights18 = gr.Radio(
+                            label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"),
+                            choices=[i18n("是"), i18n("否")],
+                            value=i18n("是"),
+                            interactive=True,
+                        )
+                    with gr.Row():
+                        pretrained_G14 = gr.Textbox(
+                            label=i18n("加载预训练底模G路径"),
+                            value="assets/pretrained_v2/f0G40k.pth",
+                            interactive=True,
+                            visible=False
+                        )
+                        pretrained_D15 = gr.Textbox(
+                            label=i18n("加载预训练底模D路径"),
+                            value="assets/pretrained_v2/f0D40k.pth",
+                            interactive=True,
+                            visible=False
+                        )
+                        sr2.change(
+                            change_sr2,
+                            [sr2, if_f0_3, version19],
+                            [pretrained_G14, pretrained_D15],
+                        )
+                        version19.change(
+                            change_version19,
+                            [sr2, if_f0_3, version19],
+                            [pretrained_G14, pretrained_D15, sr2],
+                        )
+                        if_f0_3.change(
+                            change_f0,
+                            [if_f0_3, sr2, version19],
+                            [f0method8, pretrained_G14, pretrained_D15],
+                        )
+                    with gr.Row():
+                        but5 = gr.Button(i18n("一键训练"), variant="primary", visible=False)
+                        but3.click(
+                            click_train,
+                            [
+                                exp_dir1,
+                                sr2,
+                                if_f0_3,
+                                spk_id5,
+                                save_epoch10,
+                                total_epoch11,
+                                batch_size12,
+                                if_save_latest13,
+                                pretrained_G14,
+                                pretrained_D15,
+                                gpus16,
+                                if_cache_gpu17,
+                                if_save_every_weights18,
+                                version19,
+                            ],
+                            info3,
+                            api_name="train_start",
+                        )
+                        but4.click(train_index, [exp_dir1, version19], info3)
+                        but5.click(
+                            train1key,
+                            [
+                                exp_dir1,
+                                sr2,
+                                if_f0_3,
+                                trainset_dir4,
+                                spk_id5,
+                                np7,
+                                f0method8,
+                                save_epoch10,
+                                total_epoch11,
+                                batch_size12,
+                                if_save_latest13,
+                                pretrained_G14,
+                                pretrained_D15,
+                                gpus16,
+                                if_cache_gpu17,
+                                if_save_every_weights18,
+                                version19,
+                                gpus_rmvpe,
+                            ],
+                            info3,
+                            api_name="train_start_all",
+                        )
+    if config.iscolab:
+        app.queue(concurrency_count=511, max_size=1022).launch(share=True)
+    else:
+        app.queue(concurrency_count=511, max_size=1022).launch(
+            server_name="0.0.0.0",
+            inbrowser=not config.noautoopen,
+            server_port=config.listen_port,
+            quiet=True,
+        )

a.png ADDED Viewed

app.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

audios/somegirl.mp3 ADDED Viewed

Binary file (32.2 kB). View file

audios/someguy.mp3 ADDED Viewed

Binary file (24.9 kB). View file

audios/unachica.mp3 ADDED Viewed

Binary file (36.4 kB). View file

audios/unchico.mp3 ADDED Viewed

Binary file (35.9 kB). View file

configs/config.py CHANGED Viewed

@@ -5,13 +5,10 @@ import json
 from multiprocessing import cpu_count
 import torch
 try:
-    import intel_extension_for_pytorch as ipex  # pylint: disable=import-error, unused-import
     if torch.xpu.is_available():
         from infer.modules.ipex import ipex_init
         ipex_init()
 except Exception:
     pass

 from multiprocessing import cpu_count
 import torch
 try:
+    import intel_extension_for_pytorch as ipex # pylint: disable=import-error, unused-import
     if torch.xpu.is_available():
         from infer.modules.ipex import ipex_init
         ipex_init()
 except Exception:
     pass

docker-compose.yml CHANGED Viewed

@@ -10,11 +10,4 @@ services:
       - ./opt:/app/opt
       # - ./dataset:/app/dataset # you can use this folder in order to provide your dataset for model training
     ports:
-      - 7865:7865
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [gpu]

       - ./opt:/app/opt
       # - ./dataset:/app/dataset # you can use this folder in order to provide your dataset for model training
     ports:
+      - 7865:7865

docs/en/README.en.md CHANGED Viewed

@@ -57,9 +57,6 @@ pip install torch torchvision torchaudio
 #For Windows + Nvidia Ampere Architecture(RTX30xx), you need to specify the cuda version corresponding to pytorch according to the experience of https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/issues/21
 #pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117
-#For Linux + AMD Cards, you need to use the following pytorch versions:
-#pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.4.2
 ```
 Then can use poetry to install the other dependencies:
@@ -78,14 +75,12 @@ You can also use pip to install them:
 for Nvidia graphics cards
   pip install -r requirements.txt
-for AMD/Intel graphics cards on Windows (DirectML)：
   pip install -r requirements-dml.txt
 for Intel ARC graphics cards on Linux / WSL using Python 3.10:
   pip install -r requirements-ipex.txt
-for AMD graphics cards on Linux (ROCm):
-  pip install -r requirements-amd.txt
 ```
 ------
@@ -140,32 +135,8 @@ Then use this command to start Webui:
 ```bash
 python infer-web.py
 ```
 If you are using Windows or macOS, you can download and extract `RVC-beta.7z` to use RVC directly by using `go-web.bat` on windows or `sh ./run.sh` on macOS to start Webui.
-## ROCm Support for AMD graphic cards (Linux only)
-To use ROCm on Linux install all required drivers as described [here](https://rocm.docs.amd.com/en/latest/deploy/linux/os-native/install.html).
-On Arch use pacman to install the driver:
-````
-pacman -S rocm-hip-sdk rocm-opencl-sdk
-````
-You might also need to set these environment variables (e.g. on a RX6700XT):
-````
-export ROCM_PATH=/opt/rocm
-export HSA_OVERRIDE_GFX_VERSION=10.3.0
-````
-Also make sure your user is part of the `render` and `video` group:
-````
-sudo usermod -aG render $USERNAME
-sudo usermod -aG video $USERNAME
-````
-After that you can run the WebUI:
-```bash
-python infer-web.py
-```
 ## Credits
 + [ContentVec](https://github.com/auspicious3000/contentvec/)
 + [VITS](https://github.com/jaywalnut310/vits)

 #For Windows + Nvidia Ampere Architecture(RTX30xx), you need to specify the cuda version corresponding to pytorch according to the experience of https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/issues/21
 #pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117
 ```
 Then can use poetry to install the other dependencies:
 for Nvidia graphics cards
   pip install -r requirements.txt
+for AMD/Intel graphics cards：
   pip install -r requirements-dml.txt
 for Intel ARC graphics cards on Linux / WSL using Python 3.10:
   pip install -r requirements-ipex.txt
 ```
 ------
 ```bash
 python infer-web.py
 ```
 If you are using Windows or macOS, you can download and extract `RVC-beta.7z` to use RVC directly by using `go-web.bat` on windows or `sh ./run.sh` on macOS to start Webui.
 ## Credits
 + [ContentVec](https://github.com/auspicious3000/contentvec/)
 + [VITS](https://github.com/jaywalnut310/vits)

download_files.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import subprocess, os
+assets_folder = "./assets/"
+if not os.path.exists(assets_folder):
+    os.makedirs(assets_folder)
+files = {
+    "rmvpe/rmvpe.pt":"https://huggingface.co/Rejekts/project/resolve/main/rmvpe.pt",
+    "hubert/hubert_base.pt":"https://huggingface.co/Rejekts/project/resolve/main/hubert_base.pt",
+    "pretrained_v2/D40k.pth":"https://huggingface.co/Rejekts/project/resolve/main/D40k.pth",
+    "pretrained_v2/G40k.pth":"https://huggingface.co/Rejekts/project/resolve/main/G40k.pth",
+    "pretrained_v2/f0D40k.pth":"https://huggingface.co/Rejekts/project/resolve/main/f0D40k.pth",
+    "pretrained_v2/f0G40k.pth":"https://huggingface.co/Rejekts/project/resolve/main/f0G40k.pth"
+}
+for file, link in files.items():
+    file_path = os.path.join(assets_folder, file)
+    if not os.path.exists(file_path):
+        try:
+            subprocess.run(['wget', link, '-O', file_path], check=True)
+        except subprocess.CalledProcessError as e:
+            print(f"Error downloading {file}: {e}")

gui_v1.py CHANGED Viewed

@@ -377,7 +377,7 @@ if __name__ == "__main__":
                     )
                 if event == "start_vc" and self.flag_vc == False:
                     if self.set_values(values) == True:
-                        logger.info("cuda_is_available: %s", torch.cuda.is_available())
                         self.start_vc()
                         settings = {
                             "pth_path": values["pth_path"],
@@ -478,28 +478,15 @@ if __name__ == "__main__":
                 inp_q,
                 opt_q,
                 device,
-                self.rvc if hasattr(self, "rvc") else None,
             )
             self.config.samplerate = self.rvc.tgt_sr
             self.zc = self.rvc.tgt_sr // 100
-            self.block_frame = (
-                int(np.round(self.config.block_time * self.config.samplerate / self.zc))
-                * self.zc
-            )
             self.block_frame_16k = 160 * self.block_frame // self.zc
-            self.crossfade_frame = (
-                int(
-                    np.round(
-                        self.config.crossfade_time * self.config.samplerate / self.zc
-                    )
-                )
-                * self.zc
-            )
             self.sola_search_frame = self.zc
-            self.extra_frame = (
-                int(np.round(self.config.extra_time * self.config.samplerate / self.zc))
-                * self.zc
-            )
             self.input_wav: torch.Tensor = torch.zeros(
                 self.extra_frame
                 + self.crossfade_frame
@@ -508,11 +495,7 @@ if __name__ == "__main__":
                 device=device,
                 dtype=torch.float32,
             )
-            self.input_wav_res: torch.Tensor = torch.zeros(
-                160 * self.input_wav.shape[0] // self.zc,
-                device=device,
-                dtype=torch.float32,
-            )
             self.pitch: np.ndarray = np.zeros(
                 self.input_wav.shape[0] // self.zc,
                 dtype="int32",
@@ -526,9 +509,7 @@ if __name__ == "__main__":
             )
             self.nr_buffer: torch.Tensor = self.sola_buffer.clone()
             self.output_buffer: torch.Tensor = self.input_wav.clone()
-            self.res_buffer: torch.Tensor = torch.zeros(
-                2 * self.zc, device=device, dtype=torch.float32
-            )
             self.valid_rate = 1 - (self.extra_frame - 1) / self.input_wav.shape[0]
             self.fade_in_window: torch.Tensor = (
                 torch.sin(
@@ -548,9 +529,7 @@ if __name__ == "__main__":
             self.resampler = tat.Resample(
                 orig_freq=self.config.samplerate, new_freq=16000, dtype=torch.float32
             ).to(device)
-            self.tg = TorchGate(
-                sr=self.config.samplerate, n_fft=4 * self.zc, prop_decrease=0.9
-            ).to(device)
             thread_vc = threading.Thread(target=self.soundinput)
             thread_vc.start()
@@ -581,7 +560,7 @@ if __name__ == "__main__":
             indata = librosa.to_mono(indata.T)
             if self.config.threhold > -60:
                 rms = librosa.feature.rms(
-                    y=indata, frame_length=4 * self.zc, hop_length=self.zc
                 )
                 db_threhold = (
                     librosa.amplitude_to_db(rms, ref=1.0)[0] < self.config.threhold
@@ -589,44 +568,28 @@ if __name__ == "__main__":
                 for i in range(db_threhold.shape[0]):
                     if db_threhold[i]:
                         indata[i * self.zc : (i + 1) * self.zc] = 0
-            self.input_wav[: -self.block_frame] = self.input_wav[
-                self.block_frame :
-            ].clone()
-            self.input_wav[-self.block_frame :] = torch.from_numpy(indata).to(device)
-            self.input_wav_res[: -self.block_frame_16k] = self.input_wav_res[
-                self.block_frame_16k :
-            ].clone()
             # input noise reduction and resampling
             if self.config.I_noise_reduce:
-                input_wav = self.input_wav[
-                    -self.crossfade_frame - self.block_frame - 2 * self.zc :
-                ]
-                input_wav = self.tg(
-                    input_wav.unsqueeze(0), self.input_wav.unsqueeze(0)
-                )[0, 2 * self.zc :]
                 input_wav[: self.crossfade_frame] *= self.fade_in_window
-                input_wav[: self.crossfade_frame] += (
-                    self.nr_buffer * self.fade_out_window
-                )
-                self.nr_buffer[:] = input_wav[-self.crossfade_frame :]
-                input_wav = torch.cat(
-                    (self.res_buffer[:], input_wav[: self.block_frame])
-                )
-                self.res_buffer[:] = input_wav[-2 * self.zc :]
-                self.input_wav_res[-self.block_frame_16k - 160 :] = self.resampler(
-                    input_wav
-                )[160:]
             else:
-                self.input_wav_res[-self.block_frame_16k - 160 :] = self.resampler(
-                    self.input_wav[-self.block_frame - 2 * self.zc :]
-                )[160:]
             # infer
             f0_extractor_frame = self.block_frame_16k + 800
-            if self.config.f0method == "rmvpe":
                 f0_extractor_frame = 5120 * ((f0_extractor_frame - 1) // 5120 + 1)
             infer_wav = self.rvc.infer(
                 self.input_wav_res,
-                self.input_wav_res[-f0_extractor_frame:].cpu().numpy(),
                 self.block_frame_16k,
                 self.valid_rate,
                 self.pitch,
@@ -638,77 +601,48 @@ if __name__ == "__main__":
             ]
             # output noise reduction
             if self.config.O_noise_reduce:
-                self.output_buffer[: -self.block_frame] = self.output_buffer[
-                    self.block_frame :
-                ].clone()
-                self.output_buffer[-self.block_frame :] = infer_wav[-self.block_frame :]
-                infer_wav = self.tg(
-                    infer_wav.unsqueeze(0), self.output_buffer.unsqueeze(0)
-                ).squeeze(0)
             # volume envelop mixing
             if self.config.rms_mix_rate < 1:
                 rms1 = librosa.feature.rms(
-                    y=self.input_wav_res[-160 * infer_wav.shape[0] // self.zc :]
-                    .cpu()
-                    .numpy(),
-                    frame_length=640,
-                    hop_length=160,
                 )
                 rms1 = torch.from_numpy(rms1).to(device)
                 rms1 = F.interpolate(
-                    rms1.unsqueeze(0),
-                    size=infer_wav.shape[0] + 1,
-                    mode="linear",
-                    align_corners=True,
-                )[0, 0, :-1]
                 rms2 = librosa.feature.rms(
-                    y=infer_wav[:].cpu().numpy(),
-                    frame_length=4 * self.zc,
-                    hop_length=self.zc,
                 )
                 rms2 = torch.from_numpy(rms2).to(device)
                 rms2 = F.interpolate(
-                    rms2.unsqueeze(0),
-                    size=infer_wav.shape[0] + 1,
-                    mode="linear",
-                    align_corners=True,
-                )[0, 0, :-1]
                 rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-3)
-                infer_wav *= torch.pow(
-                    rms1 / rms2, torch.tensor(1 - self.config.rms_mix_rate)
-                )
             # SOLA algorithm from https://github.com/yxlllc/DDSP-SVC
-            conv_input = infer_wav[
-                None, None, : self.crossfade_frame + self.sola_search_frame
-            ]
             cor_nom = F.conv1d(conv_input, self.sola_buffer[None, None, :])
             cor_den = torch.sqrt(
-                F.conv1d(
-                    conv_input**2,
-                    torch.ones(1, 1, self.crossfade_frame, device=device),
-                )
-                + 1e-8
-            )
             if sys.platform == "darwin":
                 _, sola_offset = torch.max(cor_nom[0, 0] / cor_den[0, 0])
                 sola_offset = sola_offset.item()
             else:
                 sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0])
             logger.debug("sola_offset = %d", int(sola_offset))
-            infer_wav = infer_wav[
-                sola_offset : sola_offset + self.block_frame + self.crossfade_frame
-            ]
             infer_wav[: self.crossfade_frame] *= self.fade_in_window
-            infer_wav[: self.crossfade_frame] += self.sola_buffer * self.fade_out_window
-            self.sola_buffer[:] = infer_wav[-self.crossfade_frame :]
             if sys.platform == "darwin":
-                outdata[:] = (
-                    infer_wav[: -self.crossfade_frame].cpu().numpy()[:, np.newaxis]
-                )
             else:
-                outdata[:] = (
-                    infer_wav[: -self.crossfade_frame].repeat(2, 1).t().cpu().numpy()
-                )
             total_time = time.perf_counter() - start_time
             self.window["infer_time"].update(int(total_time * 1000))
             logger.info("Infer time: %.2f", total_time)
@@ -764,7 +698,9 @@ if __name__ == "__main__":
             sd.default.device[1] = output_device_indices[
                 output_devices.index(output_device)
             ]
-            logger.info("Input device: %s:%s", str(sd.default.device[0]), input_device)
             logger.info(
                 "Output device: %s:%s", str(sd.default.device[1]), output_device
             )

                     )
                 if event == "start_vc" and self.flag_vc == False:
                     if self.set_values(values) == True:
+                        logger.info("Use CUDA: %s", torch.cuda.is_available())
                         self.start_vc()
                         settings = {
                             "pth_path": values["pth_path"],
                 inp_q,
                 opt_q,
                 device,
+                self.rvc if hasattr(self, "rvc") else None
             )
             self.config.samplerate = self.rvc.tgt_sr
             self.zc = self.rvc.tgt_sr // 100
+            self.block_frame = int(np.round(self.config.block_time * self.config.samplerate / self.zc)) * self.zc
             self.block_frame_16k = 160 * self.block_frame // self.zc
+            self.crossfade_frame = int(np.round(self.config.crossfade_time * self.config.samplerate / self.zc)) * self.zc
             self.sola_search_frame = self.zc
+            self.extra_frame = int(np.round(self.config.extra_time * self.config.samplerate / self.zc)) * self.zc
             self.input_wav: torch.Tensor = torch.zeros(
                 self.extra_frame
                 + self.crossfade_frame
                 device=device,
                 dtype=torch.float32,
             )
+            self.input_wav_res: torch.Tensor= torch.zeros(160 * self.input_wav.shape[0] // self.zc, device=device,dtype=torch.float32)
             self.pitch: np.ndarray = np.zeros(
                 self.input_wav.shape[0] // self.zc,
                 dtype="int32",
             )
             self.nr_buffer: torch.Tensor = self.sola_buffer.clone()
             self.output_buffer: torch.Tensor = self.input_wav.clone()
+            self.res_buffer: torch.Tensor = torch.zeros(2 * self.zc, device=device,dtype=torch.float32)
             self.valid_rate = 1 - (self.extra_frame - 1) / self.input_wav.shape[0]
             self.fade_in_window: torch.Tensor = (
                 torch.sin(
             self.resampler = tat.Resample(
                 orig_freq=self.config.samplerate, new_freq=16000, dtype=torch.float32
             ).to(device)
+            self.tg = TorchGate(sr=self.config.samplerate, n_fft=4*self.zc, prop_decrease=0.9).to(device)
             thread_vc = threading.Thread(target=self.soundinput)
             thread_vc.start()
             indata = librosa.to_mono(indata.T)
             if self.config.threhold > -60:
                 rms = librosa.feature.rms(
+                y=indata, frame_length=4*self.zc, hop_length=self.zc
                 )
                 db_threhold = (
                     librosa.amplitude_to_db(rms, ref=1.0)[0] < self.config.threhold
                 for i in range(db_threhold.shape[0]):
                     if db_threhold[i]:
                         indata[i * self.zc : (i + 1) * self.zc] = 0
+            self.input_wav[: -self.block_frame] = self.input_wav[self.block_frame :].clone()
+            self.input_wav[-self.block_frame: ] = torch.from_numpy(indata).to(device)
+            self.input_wav_res[ : -self.block_frame_16k] = self.input_wav_res[self.block_frame_16k :].clone()
             # input noise reduction and resampling
             if self.config.I_noise_reduce:
+                input_wav = self.input_wav[-self.crossfade_frame -self.block_frame-2*self.zc: ]
+                input_wav = self.tg(input_wav.unsqueeze(0), self.input_wav.unsqueeze(0))[0, 2*self.zc:]
                 input_wav[: self.crossfade_frame] *= self.fade_in_window
+                input_wav[: self.crossfade_frame] += self.nr_buffer * self.fade_out_window
+                self.nr_buffer[:] = input_wav[-self.crossfade_frame: ]
+                input_wav = torch.cat((self.res_buffer[:], input_wav[: self.block_frame]))
+                self.res_buffer[:] = input_wav[-2*self.zc: ]
+                self.input_wav_res[-self.block_frame_16k-160: ] = self.resampler(input_wav)[160: ]
             else:
+                self.input_wav_res[-self.block_frame_16k-160: ] = self.resampler(self.input_wav[-self.block_frame-2*self.zc: ])[160: ]
             # infer
             f0_extractor_frame = self.block_frame_16k + 800
+            if self.config.f0method == 'rmvpe':
                 f0_extractor_frame = 5120 * ((f0_extractor_frame - 1) // 5120 + 1)
             infer_wav = self.rvc.infer(
                 self.input_wav_res,
+                self.input_wav_res[-f0_extractor_frame :].cpu().numpy(),
                 self.block_frame_16k,
                 self.valid_rate,
                 self.pitch,
             ]
             # output noise reduction
             if self.config.O_noise_reduce:
+                self.output_buffer[: -self.block_frame] = self.output_buffer[self.block_frame :].clone()
+                self.output_buffer[-self.block_frame: ] = infer_wav[-self.block_frame:]
+                infer_wav = self.tg(infer_wav.unsqueeze(0), self.output_buffer.unsqueeze(0)).squeeze(0)
             # volume envelop mixing
             if self.config.rms_mix_rate < 1:
                 rms1 = librosa.feature.rms(
+                y=self.input_wav_res[-160*infer_wav.shape[0]//self.zc :].cpu().numpy(),
+                frame_length=640,
+                hop_length=160,
                 )
                 rms1 = torch.from_numpy(rms1).to(device)
                 rms1 = F.interpolate(
+                    rms1.unsqueeze(0), size=infer_wav.shape[0] + 1, mode="linear",align_corners=True,
+                )[0,0,:-1]
                 rms2 = librosa.feature.rms(
+                y=infer_wav[:].cpu().numpy(), frame_length=4*self.zc, hop_length=self.zc
                 )
                 rms2 = torch.from_numpy(rms2).to(device)
                 rms2 = F.interpolate(
+                    rms2.unsqueeze(0), size=infer_wav.shape[0] + 1, mode="linear",align_corners=True,
+                )[0,0,:-1]
                 rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-3)
+                infer_wav *= torch.pow(rms1 / rms2, torch.tensor(1 - self.config.rms_mix_rate))
             # SOLA algorithm from https://github.com/yxlllc/DDSP-SVC
+            conv_input = infer_wav[None, None, : self.crossfade_frame + self.sola_search_frame]
             cor_nom = F.conv1d(conv_input, self.sola_buffer[None, None, :])
             cor_den = torch.sqrt(
+                F.conv1d(conv_input ** 2, torch.ones(1, 1, self.crossfade_frame, device=device)) + 1e-8)
             if sys.platform == "darwin":
                 _, sola_offset = torch.max(cor_nom[0, 0] / cor_den[0, 0])
                 sola_offset = sola_offset.item()
             else:
                 sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0])
             logger.debug("sola_offset = %d", int(sola_offset))
+            infer_wav = infer_wav[sola_offset: sola_offset + self.block_frame + self.crossfade_frame]
             infer_wav[: self.crossfade_frame] *= self.fade_in_window
+            infer_wav[: self.crossfade_frame] += self.sola_buffer *self.fade_out_window
+            self.sola_buffer[:] = infer_wav[-self.crossfade_frame:]
             if sys.platform == "darwin":
+                outdata[:] = infer_wav[:-self.crossfade_frame].cpu().numpy()[:, np.newaxis]
             else:
+                outdata[:] = infer_wav[:-self.crossfade_frame].repeat(2, 1).t().cpu().numpy()
             total_time = time.perf_counter() - start_time
             self.window["infer_time"].update(int(total_time * 1000))
             logger.info("Infer time: %.2f", total_time)
             sd.default.device[1] = output_device_indices[
                 output_devices.index(output_device)
             ]
+            logger.info(
+                "Input device: %s:%s", str(sd.default.device[0]), input_device
+            )
             logger.info(
                 "Output device: %s:%s", str(sd.default.device[1]), output_device
             )

infer-web.py CHANGED Viewed

@@ -1028,7 +1028,6 @@ with gr.Blocks(title="RVC WebUI") as app:
                 fn=vc.get_vc,
                 inputs=[sid0, protect0, protect1],
                 outputs=[spk_item, protect0, protect1, file_index2, file_index4],
-                api_name="infer_change_voice",
             )
         with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")):
             with gr.Group():

                 fn=vc.get_vc,
                 inputs=[sid0, protect0, protect1],
                 outputs=[spk_item, protect0, protect1, file_index2, file_index4],
             )
         with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")):
             with gr.Group():

infer/lib/audio.py CHANGED Viewed

@@ -3,49 +3,38 @@ import numpy as np
 import av
 from io import BytesIO
 def wav2(i, o, format):
-    inp = av.open(i, "rb")
-    if format == "m4a":
-        format = "mp4"
-    out = av.open(o, "wb", format=format)
-    if format == "ogg":
-        format = "libvorbis"
-    if format == "mp4":
-        format = "aac"
     ostream = out.add_stream(format)
     for frame in inp.decode(audio=0):
-        for p in ostream.encode(frame):
-            out.mux(p)
-    for p in ostream.encode(None):
-        out.mux(p)
     out.close()
     inp.close()
 def audio2(i, o, format, sr):
-    inp = av.open(i, "rb")
-    out = av.open(o, "wb", format=format)
-    if format == "ogg":
-        format = "libvorbis"
-    if format == "f32le":
-        format = "pcm_f32le"
     ostream = out.add_stream(format, channels=1)
     ostream.sample_rate = sr
     for frame in inp.decode(audio=0):
-        for p in ostream.encode(frame):
-            out.mux(p)
     out.close()
     inp.close()
 def load_audio(file, sr):
     try:
         file = (

 import av
 from io import BytesIO
 def wav2(i, o, format):
+    inp = av.open(i, 'rb')
+    if format == "m4a": format = "mp4"
+    out = av.open(o, 'wb', format=format)
+    if format == "ogg": format = "libvorbis"
+    if format == "mp4": format = "aac"
     ostream = out.add_stream(format)
     for frame in inp.decode(audio=0):
+        for p in ostream.encode(frame): out.mux(p)
+    for p in ostream.encode(None): out.mux(p)
     out.close()
     inp.close()
 def audio2(i, o, format, sr):
+    inp = av.open(i, 'rb')
+    out = av.open(o, 'wb', format=format)
+    if format == "ogg": format = "libvorbis"
+    if format == "f32le": format = "pcm_f32le"
     ostream = out.add_stream(format, channels=1)
     ostream.sample_rate = sr
     for frame in inp.decode(audio=0):
+        for p in ostream.encode(frame): out.mux(p)
     out.close()
     inp.close()
 def load_audio(file, sr):
     try:
         file = (

infer/lib/infer_pack/models.py CHANGED Viewed

@@ -15,7 +15,6 @@ from infer.lib.infer_pack.commons import get_padding, init_weights
 has_xpu = bool(hasattr(torch, "xpu") and torch.xpu.is_available())
 class TextEncoder256(nn.Module):
     def __init__(
         self,
@@ -1159,9 +1158,7 @@ class DiscriminatorP(torch.nn.Module):
         if t % self.period != 0:  # pad first
             n_pad = self.period - (t % self.period)
             if has_xpu and x.dtype == torch.bfloat16:
-                x = F.pad(x.to(dtype=torch.float16), (0, n_pad), "reflect").to(
-                    dtype=torch.bfloat16
-                )
             else:
                 x = F.pad(x, (0, n_pad), "reflect")
             t = t + n_pad

 has_xpu = bool(hasattr(torch, "xpu") and torch.xpu.is_available())
 class TextEncoder256(nn.Module):
     def __init__(
         self,
         if t % self.period != 0:  # pad first
             n_pad = self.period - (t % self.period)
             if has_xpu and x.dtype == torch.bfloat16:
+                x = F.pad(x.to(dtype=torch.float16), (0, n_pad), "reflect").to(dtype=torch.bfloat16)
             else:
                 x = F.pad(x, (0, n_pad), "reflect")
             t = t + n_pad

infer/lib/rmvpe.py CHANGED Viewed

@@ -2,14 +2,11 @@ import pdb, os
 import numpy as np
 import torch
 try:
-    # Fix "Torch not compiled with CUDA enabled"
-    import intel_extension_for_pytorch as ipex  # pylint: disable=import-error, unused-import
     if torch.xpu.is_available():
         from infer.modules.ipex import ipex_init
         ipex_init()
 except Exception:
     pass

 import numpy as np
 import torch
 try:
+    #Fix "Torch not compiled with CUDA enabled"
+    import intel_extension_for_pytorch as ipex # pylint: disable=import-error, unused-import
     if torch.xpu.is_available():
         from infer.modules.ipex import ipex_init
         ipex_init()
 except Exception:
     pass

infer/modules/ipex/__init__.py CHANGED Viewed

@@ -2,16 +2,15 @@ import os
 import sys
 import contextlib
 import torch
-import intel_extension_for_pytorch as ipex  # pylint: disable=import-error, unused-import
 from .hijacks import ipex_hijacks
 from .attention import attention_init
 # pylint: disable=protected-access, missing-function-docstring, line-too-long
-def ipex_init():  # pylint: disable=too-many-statements
     try:
-        # Replace cuda with xpu:
         torch.cuda.current_device = torch.xpu.current_device
         torch.cuda.current_stream = torch.xpu.current_stream
         torch.cuda.device = torch.xpu.device
@@ -92,11 +91,11 @@ def ipex_init():  # pylint: disable=too-many-statements
         torch.cuda.CharStorage = torch.xpu.CharStorage
         torch.cuda.__file__ = torch.xpu.__file__
         torch.cuda._is_in_bad_fork = torch.xpu.lazy_init._is_in_bad_fork
-        # torch.cuda.is_current_stream_capturing = torch.xpu.is_current_stream_capturing
-        # Memory:
         torch.cuda.memory = torch.xpu.memory
-        if "linux" in sys.platform and "WSL2" in os.popen("uname -a").read():
             torch.xpu.empty_cache = lambda: None
         torch.cuda.empty_cache = torch.xpu.empty_cache
         torch.cuda.memory_stats = torch.xpu.memory_stats
@@ -112,11 +111,9 @@ def ipex_init():  # pylint: disable=too-many-statements
         torch.cuda.reset_max_memory_cached = torch.xpu.reset_peak_memory_stats
         torch.cuda.reset_max_memory_allocated = torch.xpu.reset_peak_memory_stats
         torch.cuda.memory_stats_as_nested_dict = torch.xpu.memory_stats_as_nested_dict
-        torch.cuda.reset_accumulated_memory_stats = (
-            torch.xpu.reset_accumulated_memory_stats
-        )
-        # RNG:
         torch.cuda.get_rng_state = torch.xpu.get_rng_state
         torch.cuda.get_rng_state_all = torch.xpu.get_rng_state_all
         torch.cuda.set_rng_state = torch.xpu.set_rng_state
@@ -127,44 +124,35 @@ def ipex_init():  # pylint: disable=too-many-statements
         torch.cuda.seed_all = torch.xpu.seed_all
         torch.cuda.initial_seed = torch.xpu.initial_seed
-        # AMP:
         torch.cuda.amp = torch.xpu.amp
         if not hasattr(torch.cuda.amp, "common"):
             torch.cuda.amp.common = contextlib.nullcontext()
         torch.cuda.amp.common.amp_definitely_not_available = lambda: False
         try:
             torch.cuda.amp.GradScaler = torch.xpu.amp.GradScaler
-        except Exception:  # pylint: disable=broad-exception-caught
             try:
-                from .gradscaler import (
-                    gradscaler_init,
-                )  # pylint: disable=import-outside-toplevel, import-error
                 gradscaler_init()
                 torch.cuda.amp.GradScaler = torch.xpu.amp.GradScaler
-            except Exception:  # pylint: disable=broad-exception-caught
                 torch.cuda.amp.GradScaler = ipex.cpu.autocast._grad_scaler.GradScaler
-        # C
         torch._C._cuda_getCurrentRawStream = ipex._C._getCurrentStream
         ipex._C._DeviceProperties.major = 2023
         ipex._C._DeviceProperties.minor = 2
-        # Fix functions with ipex:
-        torch.cuda.mem_get_info = lambda device=None: [
-            (
-                torch.xpu.get_device_properties(device).total_memory
-                - torch.xpu.memory_allocated(device)
-            ),
-            torch.xpu.get_device_properties(device).total_memory,
-        ]
         torch._utils._get_available_device_type = lambda: "xpu"
         torch.has_cuda = True
         torch.cuda.has_half = True
         torch.cuda.is_bf16_supported = lambda *args, **kwargs: True
         torch.cuda.is_fp16_supported = lambda *args, **kwargs: True
         torch.version.cuda = "11.7"
-        torch.cuda.get_device_capability = lambda *args, **kwargs: [11, 7]
         torch.cuda.get_device_properties.major = 11
         torch.cuda.get_device_properties.minor = 7
         torch.cuda.ipc_collect = lambda *args, **kwargs: None

 import sys
 import contextlib
 import torch
+import intel_extension_for_pytorch as ipex # pylint: disable=import-error, unused-import
 from .hijacks import ipex_hijacks
 from .attention import attention_init
 # pylint: disable=protected-access, missing-function-docstring, line-too-long
+def ipex_init(): # pylint: disable=too-many-statements
     try:
+        #Replace cuda with xpu:
         torch.cuda.current_device = torch.xpu.current_device
         torch.cuda.current_stream = torch.xpu.current_stream
         torch.cuda.device = torch.xpu.device
         torch.cuda.CharStorage = torch.xpu.CharStorage
         torch.cuda.__file__ = torch.xpu.__file__
         torch.cuda._is_in_bad_fork = torch.xpu.lazy_init._is_in_bad_fork
+        #torch.cuda.is_current_stream_capturing = torch.xpu.is_current_stream_capturing
+        #Memory:
         torch.cuda.memory = torch.xpu.memory
+        if 'linux' in sys.platform and "WSL2" in os.popen("uname -a").read():
             torch.xpu.empty_cache = lambda: None
         torch.cuda.empty_cache = torch.xpu.empty_cache
         torch.cuda.memory_stats = torch.xpu.memory_stats
         torch.cuda.reset_max_memory_cached = torch.xpu.reset_peak_memory_stats
         torch.cuda.reset_max_memory_allocated = torch.xpu.reset_peak_memory_stats
         torch.cuda.memory_stats_as_nested_dict = torch.xpu.memory_stats_as_nested_dict
+        torch.cuda.reset_accumulated_memory_stats = torch.xpu.reset_accumulated_memory_stats
+        #RNG:
         torch.cuda.get_rng_state = torch.xpu.get_rng_state
         torch.cuda.get_rng_state_all = torch.xpu.get_rng_state_all
         torch.cuda.set_rng_state = torch.xpu.set_rng_state
         torch.cuda.seed_all = torch.xpu.seed_all
         torch.cuda.initial_seed = torch.xpu.initial_seed
+        #AMP:
         torch.cuda.amp = torch.xpu.amp
         if not hasattr(torch.cuda.amp, "common"):
             torch.cuda.amp.common = contextlib.nullcontext()
         torch.cuda.amp.common.amp_definitely_not_available = lambda: False
         try:
             torch.cuda.amp.GradScaler = torch.xpu.amp.GradScaler
+        except Exception: # pylint: disable=broad-exception-caught
             try:
+                from .gradscaler import gradscaler_init # pylint: disable=import-outside-toplevel, import-error
                 gradscaler_init()
                 torch.cuda.amp.GradScaler = torch.xpu.amp.GradScaler
+            except Exception: # pylint: disable=broad-exception-caught
                 torch.cuda.amp.GradScaler = ipex.cpu.autocast._grad_scaler.GradScaler
+        #C
         torch._C._cuda_getCurrentRawStream = ipex._C._getCurrentStream
         ipex._C._DeviceProperties.major = 2023
         ipex._C._DeviceProperties.minor = 2
+        #Fix functions with ipex:
+        torch.cuda.mem_get_info = lambda device=None: [(torch.xpu.get_device_properties(device).total_memory - torch.xpu.memory_allocated(device)), torch.xpu.get_device_properties(device).total_memory]
         torch._utils._get_available_device_type = lambda: "xpu"
         torch.has_cuda = True
         torch.cuda.has_half = True
         torch.cuda.is_bf16_supported = lambda *args, **kwargs: True
         torch.cuda.is_fp16_supported = lambda *args, **kwargs: True
         torch.version.cuda = "11.7"
+        torch.cuda.get_device_capability = lambda *args, **kwargs: [11,7]
         torch.cuda.get_device_properties.major = 11
         torch.cuda.get_device_properties.minor = 7
         torch.cuda.ipc_collect = lambda *args, **kwargs: None

infer/modules/ipex/attention.py CHANGED Viewed

@@ -1,32 +1,22 @@
 import torch
-import intel_extension_for_pytorch as ipex  # pylint: disable=import-error, unused-import
 # pylint: disable=protected-access, missing-function-docstring, line-too-long
 original_torch_bmm = torch.bmm
 def torch_bmm(input, mat2, *, out=None):
     if input.dtype != mat2.dtype:
         mat2 = mat2.to(input.dtype)
-    # ARC GPUs can't allocate more than 4GB to a single block, Slice it:
-    batch_size_attention, input_tokens, mat2_shape = (
-        input.shape[0],
-        input.shape[1],
-        mat2.shape[2],
-    )
     block_multiply = 2.4 if input.dtype == torch.float32 else 1.2
-    block_size = (
-        (batch_size_attention * input_tokens * mat2_shape) / 1024 * block_multiply
-    )  # MB
     split_slice_size = batch_size_attention
     if block_size >= 4000:
         do_split = True
-        # Find something divisible with the input_tokens
-        while (
-            (split_slice_size * input_tokens * mat2_shape) / 1024 * block_multiply
-        ) > 4000:
             split_slice_size = split_slice_size // 2
             if split_slice_size <= 1:
                 split_slice_size = 1
@@ -34,16 +24,12 @@ def torch_bmm(input, mat2, *, out=None):
     else:
         do_split = False
-    split_block_size = (
-        (split_slice_size * input_tokens * mat2_shape) / 1024 * block_multiply
-    )  # MB
     split_2_slice_size = input_tokens
     if split_block_size >= 4000:
         do_split_2 = True
-        # Find something divisible with the input_tokens
-        while (
-            (split_slice_size * split_2_slice_size * mat2_shape) / 1024 * block_multiply
-        ) > 4000:
             split_2_slice_size = split_2_slice_size // 2
             if split_2_slice_size <= 1:
                 split_2_slice_size = 1
@@ -52,61 +38,40 @@ def torch_bmm(input, mat2, *, out=None):
         do_split_2 = False
     if do_split:
-        hidden_states = torch.zeros(
-            input.shape[0],
-            input.shape[1],
-            mat2.shape[2],
-            device=input.device,
-            dtype=input.dtype,
-        )
         for i in range(batch_size_attention // split_slice_size):
             start_idx = i * split_slice_size
             end_idx = (i + 1) * split_slice_size
             if do_split_2:
-                for i2 in range(
-                    input_tokens // split_2_slice_size
-                ):  # pylint: disable=invalid-name
                     start_idx_2 = i2 * split_2_slice_size
                     end_idx_2 = (i2 + 1) * split_2_slice_size
-                    hidden_states[
-                        start_idx:end_idx, start_idx_2:end_idx_2
-                    ] = original_torch_bmm(
                         input[start_idx:end_idx, start_idx_2:end_idx_2],
                         mat2[start_idx:end_idx, start_idx_2:end_idx_2],
-                        out=out,
                     )
             else:
                 hidden_states[start_idx:end_idx] = original_torch_bmm(
-                    input[start_idx:end_idx], mat2[start_idx:end_idx], out=out
                 )
     else:
         return original_torch_bmm(input, mat2, out=out)
     return hidden_states
 original_scaled_dot_product_attention = torch.nn.functional.scaled_dot_product_attention
-def scaled_dot_product_attention(
-    query, key, value, attn_mask=None, dropout_p=0.0, is_causal=False
-):
-    # ARC GPUs can't allocate more than 4GB to a single block, Slice it:
     shape_one, batch_size_attention, query_tokens, shape_four = query.shape
     block_multiply = 2.4 if query.dtype == torch.float32 else 1.2
-    block_size = (
-        (shape_one * batch_size_attention * query_tokens * shape_four)
-        / 1024
-        * block_multiply
-    )  # MB
     split_slice_size = batch_size_attention
     if block_size >= 4000:
         do_split = True
-        # Find something divisible with the shape_one
-        while (
-            (shape_one * split_slice_size * query_tokens * shape_four)
-            / 1024
-            * block_multiply
-        ) > 4000:
             split_slice_size = split_slice_size // 2
             if split_slice_size <= 1:
                 split_slice_size = 1
@@ -114,20 +79,12 @@ def scaled_dot_product_attention(
     else:
         do_split = False
-    split_block_size = (
-        (shape_one * split_slice_size * query_tokens * shape_four)
-        / 1024
-        * block_multiply
-    )  # MB
     split_2_slice_size = query_tokens
     if split_block_size >= 4000:
         do_split_2 = True
-        # Find something divisible with the batch_size_attention
-        while (
-            (shape_one * split_slice_size * split_2_slice_size * shape_four)
-            / 1024
-            * block_multiply
-        ) > 4000:
             split_2_slice_size = split_2_slice_size // 2
             if split_2_slice_size <= 1:
                 split_2_slice_size = 1
@@ -141,49 +98,31 @@ def scaled_dot_product_attention(
             start_idx = i * split_slice_size
             end_idx = (i + 1) * split_slice_size
             if do_split_2:
-                for i2 in range(
-                    query_tokens // split_2_slice_size
-                ):  # pylint: disable=invalid-name
                     start_idx_2 = i2 * split_2_slice_size
                     end_idx_2 = (i2 + 1) * split_2_slice_size
-                    hidden_states[
-                        :, start_idx:end_idx, start_idx_2:end_idx_2
-                    ] = original_scaled_dot_product_attention(
                         query[:, start_idx:end_idx, start_idx_2:end_idx_2],
                         key[:, start_idx:end_idx, start_idx_2:end_idx_2],
                         value[:, start_idx:end_idx, start_idx_2:end_idx_2],
-                        attn_mask=attn_mask[:, start_idx:end_idx, start_idx_2:end_idx_2]
-                        if attn_mask is not None
-                        else attn_mask,
-                        dropout_p=dropout_p,
-                        is_causal=is_causal,
                     )
             else:
-                hidden_states[
-                    :, start_idx:end_idx
-                ] = original_scaled_dot_product_attention(
                     query[:, start_idx:end_idx],
                     key[:, start_idx:end_idx],
                     value[:, start_idx:end_idx],
-                    attn_mask=attn_mask[:, start_idx:end_idx]
-                    if attn_mask is not None
-                    else attn_mask,
-                    dropout_p=dropout_p,
-                    is_causal=is_causal,
                 )
     else:
         return original_scaled_dot_product_attention(
-            query,
-            key,
-            value,
-            attn_mask=attn_mask,
-            dropout_p=dropout_p,
-            is_causal=is_causal,
         )
     return hidden_states
 def attention_init():
-    # ARC GPUs can't allocate more than 4GB to a single block:
     torch.bmm = torch_bmm
     torch.nn.functional.scaled_dot_product_attention = scaled_dot_product_attention

 import torch
+import intel_extension_for_pytorch as ipex # pylint: disable=import-error, unused-import
 # pylint: disable=protected-access, missing-function-docstring, line-too-long
 original_torch_bmm = torch.bmm
 def torch_bmm(input, mat2, *, out=None):
     if input.dtype != mat2.dtype:
         mat2 = mat2.to(input.dtype)
+    #ARC GPUs can't allocate more than 4GB to a single block, Slice it:
+    batch_size_attention, input_tokens, mat2_shape = input.shape[0], input.shape[1], mat2.shape[2]
     block_multiply = 2.4 if input.dtype == torch.float32 else 1.2
+    block_size = (batch_size_attention * input_tokens * mat2_shape) / 1024 * block_multiply #MB
     split_slice_size = batch_size_attention
     if block_size >= 4000:
         do_split = True
+        #Find something divisible with the input_tokens
+        while ((split_slice_size * input_tokens * mat2_shape) / 1024 * block_multiply) > 4000:
             split_slice_size = split_slice_size // 2
             if split_slice_size <= 1:
                 split_slice_size = 1
     else:
         do_split = False
+    split_block_size = (split_slice_size * input_tokens * mat2_shape) / 1024 * block_multiply #MB
     split_2_slice_size = input_tokens
     if split_block_size >= 4000:
         do_split_2 = True
+        #Find something divisible with the input_tokens
+        while ((split_slice_size * split_2_slice_size * mat2_shape) / 1024 * block_multiply) > 4000:
             split_2_slice_size = split_2_slice_size // 2
             if split_2_slice_size <= 1:
                 split_2_slice_size = 1
         do_split_2 = False
     if do_split:
+        hidden_states = torch.zeros(input.shape[0], input.shape[1], mat2.shape[2], device=input.device, dtype=input.dtype)
         for i in range(batch_size_attention // split_slice_size):
             start_idx = i * split_slice_size
             end_idx = (i + 1) * split_slice_size
             if do_split_2:
+                for i2 in range(input_tokens // split_2_slice_size): # pylint: disable=invalid-name
                     start_idx_2 = i2 * split_2_slice_size
                     end_idx_2 = (i2 + 1) * split_2_slice_size
+                    hidden_states[start_idx:end_idx, start_idx_2:end_idx_2] = original_torch_bmm(
                         input[start_idx:end_idx, start_idx_2:end_idx_2],
                         mat2[start_idx:end_idx, start_idx_2:end_idx_2],
+                        out=out
                     )
             else:
                 hidden_states[start_idx:end_idx] = original_torch_bmm(
+                    input[start_idx:end_idx],
+                    mat2[start_idx:end_idx],
+                    out=out
                 )
     else:
         return original_torch_bmm(input, mat2, out=out)
     return hidden_states
 original_scaled_dot_product_attention = torch.nn.functional.scaled_dot_product_attention
+def scaled_dot_product_attention(query, key, value, attn_mask=None, dropout_p=0.0, is_causal=False):
+    #ARC GPUs can't allocate more than 4GB to a single block, Slice it:
     shape_one, batch_size_attention, query_tokens, shape_four = query.shape
     block_multiply = 2.4 if query.dtype == torch.float32 else 1.2
+    block_size = (shape_one * batch_size_attention * query_tokens * shape_four) / 1024 * block_multiply #MB
     split_slice_size = batch_size_attention
     if block_size >= 4000:
         do_split = True
+        #Find something divisible with the shape_one
+        while ((shape_one * split_slice_size * query_tokens * shape_four) / 1024 * block_multiply) > 4000:
             split_slice_size = split_slice_size // 2
             if split_slice_size <= 1:
                 split_slice_size = 1
     else:
         do_split = False
+    split_block_size = (shape_one * split_slice_size * query_tokens * shape_four) / 1024 * block_multiply #MB
     split_2_slice_size = query_tokens
     if split_block_size >= 4000:
         do_split_2 = True
+        #Find something divisible with the batch_size_attention
+        while ((shape_one * split_slice_size * split_2_slice_size * shape_four) / 1024 * block_multiply) > 4000:
             split_2_slice_size = split_2_slice_size // 2
             if split_2_slice_size <= 1:
                 split_2_slice_size = 1
             start_idx = i * split_slice_size
             end_idx = (i + 1) * split_slice_size
             if do_split_2:
+                for i2 in range(query_tokens // split_2_slice_size): # pylint: disable=invalid-name
                     start_idx_2 = i2 * split_2_slice_size
                     end_idx_2 = (i2 + 1) * split_2_slice_size
+                    hidden_states[:, start_idx:end_idx, start_idx_2:end_idx_2] = original_scaled_dot_product_attention(
                         query[:, start_idx:end_idx, start_idx_2:end_idx_2],
                         key[:, start_idx:end_idx, start_idx_2:end_idx_2],
                         value[:, start_idx:end_idx, start_idx_2:end_idx_2],
+                        attn_mask=attn_mask[:, start_idx:end_idx, start_idx_2:end_idx_2] if attn_mask is not None else attn_mask,
+                        dropout_p=dropout_p, is_causal=is_causal
                     )
             else:
+                hidden_states[:, start_idx:end_idx] = original_scaled_dot_product_attention(
                     query[:, start_idx:end_idx],
                     key[:, start_idx:end_idx],
                     value[:, start_idx:end_idx],
+                    attn_mask=attn_mask[:, start_idx:end_idx] if attn_mask is not None else attn_mask,
+                    dropout_p=dropout_p, is_causal=is_causal
                 )
     else:
         return original_scaled_dot_product_attention(
+            query, key, value, attn_mask=attn_mask, dropout_p=dropout_p, is_causal=is_causal
         )
     return hidden_states
 def attention_init():
+    #ARC GPUs can't allocate more than 4GB to a single block:
     torch.bmm = torch_bmm
     torch.nn.functional.scaled_dot_product_attention = scaled_dot_product_attention

infer/modules/ipex/gradscaler.py CHANGED Viewed

@@ -1,20 +1,15 @@
 from collections import defaultdict
 import torch
-import intel_extension_for_pytorch as ipex  # pylint: disable=import-error, unused-import
-import intel_extension_for_pytorch._C as core  # pylint: disable=import-error, unused-import
 # pylint: disable=protected-access, missing-function-docstring, line-too-long
 OptState = ipex.cpu.autocast._grad_scaler.OptState
 _MultiDeviceReplicator = ipex.cpu.autocast._grad_scaler._MultiDeviceReplicator
-_refresh_per_optimizer_state = (
-    ipex.cpu.autocast._grad_scaler._refresh_per_optimizer_state
-)
-def _unscale_grads_(
-    self, optimizer, inv_scale, found_inf, allow_fp16
-):  # pylint: disable=unused-argument
     per_device_inv_scale = _MultiDeviceReplicator(inv_scale)
     per_device_found_inf = _MultiDeviceReplicator(found_inf)
@@ -48,9 +43,9 @@ def _unscale_grads_(
                 # -: is there a way to split by device and dtype without appending in the inner loop?
                 to_unscale = to_unscale.to("cpu")
-                per_device_and_dtype_grads[to_unscale.device][to_unscale.dtype].append(
-                    to_unscale
-                )
         for _, per_dtype_grads in per_device_and_dtype_grads.items():
             for grads in per_dtype_grads.values():
@@ -62,7 +57,6 @@ def _unscale_grads_(
     return per_device_found_inf._per_device_tensors
 def unscale_(self, optimizer):
     """
     Divides ("unscales") the optimizer's gradient tensors by the scale factor.
@@ -93,7 +87,7 @@ def unscale_(self, optimizer):
     optimizer_state = self._per_optimizer_states[id(optimizer)]
-    if optimizer_state["stage"] is OptState.UNSCALED:  # pylint: disable=no-else-raise
         raise RuntimeError(
             "unscale_() has already been called on this optimizer since the last update()."
         )
@@ -102,17 +96,16 @@ def unscale_(self, optimizer):
     # FP32 division can be imprecise for certain compile options, so we carry out the reciprocal in FP64.
     assert self._scale is not None
-    inv_scale = (
-        self._scale.to("cpu").double().reciprocal().float().to(self._scale.device)
     )
-    found_inf = torch.full((1,), 0.0, dtype=torch.float32, device=self._scale.device)
     optimizer_state["found_inf_per_device"] = self._unscale_grads_(
         optimizer, inv_scale, found_inf, False
     )
     optimizer_state["stage"] = OptState.UNSCALED
 def update(self, new_scale=None):
     """
     Updates the scale factor.
@@ -178,7 +171,6 @@ def update(self, new_scale=None):
     # To prepare for next iteration, clear the data collected from optimizers this iteration.
     self._per_optimizer_states = defaultdict(_refresh_per_optimizer_state)
 def gradscaler_init():
     torch.xpu.amp.GradScaler = ipex.cpu.autocast._grad_scaler.GradScaler
     torch.xpu.amp.GradScaler._unscale_grads_ = _unscale_grads_

 from collections import defaultdict
 import torch
+import intel_extension_for_pytorch as ipex # pylint: disable=import-error, unused-import
+import intel_extension_for_pytorch._C as core # pylint: disable=import-error, unused-import
 # pylint: disable=protected-access, missing-function-docstring, line-too-long
 OptState = ipex.cpu.autocast._grad_scaler.OptState
 _MultiDeviceReplicator = ipex.cpu.autocast._grad_scaler._MultiDeviceReplicator
+_refresh_per_optimizer_state = ipex.cpu.autocast._grad_scaler._refresh_per_optimizer_state
+def _unscale_grads_(self, optimizer, inv_scale, found_inf, allow_fp16): # pylint: disable=unused-argument
     per_device_inv_scale = _MultiDeviceReplicator(inv_scale)
     per_device_found_inf = _MultiDeviceReplicator(found_inf)
                 # -: is there a way to split by device and dtype without appending in the inner loop?
                 to_unscale = to_unscale.to("cpu")
+                per_device_and_dtype_grads[to_unscale.device][
+                    to_unscale.dtype
+                ].append(to_unscale)
         for _, per_dtype_grads in per_device_and_dtype_grads.items():
             for grads in per_dtype_grads.values():
     return per_device_found_inf._per_device_tensors
 def unscale_(self, optimizer):
     """
     Divides ("unscales") the optimizer's gradient tensors by the scale factor.
     optimizer_state = self._per_optimizer_states[id(optimizer)]
+    if optimizer_state["stage"] is OptState.UNSCALED: # pylint: disable=no-else-raise
         raise RuntimeError(
             "unscale_() has already been called on this optimizer since the last update()."
         )
     # FP32 division can be imprecise for certain compile options, so we carry out the reciprocal in FP64.
     assert self._scale is not None
+    inv_scale = self._scale.to("cpu").double().reciprocal().float().to(self._scale.device)
+    found_inf = torch.full(
+        (1,), 0.0, dtype=torch.float32, device=self._scale.device
     )
     optimizer_state["found_inf_per_device"] = self._unscale_grads_(
         optimizer, inv_scale, found_inf, False
     )
     optimizer_state["stage"] = OptState.UNSCALED
 def update(self, new_scale=None):
     """
     Updates the scale factor.
     # To prepare for next iteration, clear the data collected from optimizers this iteration.
     self._per_optimizer_states = defaultdict(_refresh_per_optimizer_state)
 def gradscaler_init():
     torch.xpu.amp.GradScaler = ipex.cpu.autocast._grad_scaler.GradScaler
     torch.xpu.amp.GradScaler._unscale_grads_ = _unscale_grads_

infer/modules/ipex/hijacks.py CHANGED Viewed

@@ -1,59 +1,45 @@
 import contextlib
 import importlib
 import torch
-import intel_extension_for_pytorch as ipex  # pylint: disable=import-error, unused-import
 # pylint: disable=protected-access, missing-function-docstring, line-too-long, unnecessary-lambda, no-else-return
-class CondFunc:  # pylint: disable=missing-class-docstring
     def __new__(cls, orig_func, sub_func, cond_func):
         self = super(CondFunc, cls).__new__(cls)
         if isinstance(orig_func, str):
-            func_path = orig_func.split(".")
-            for i in range(len(func_path) - 1, -1, -1):
                 try:
-                    resolved_obj = importlib.import_module(".".join(func_path[:i]))
                     break
                 except ImportError:
                     pass
             for attr_name in func_path[i:-1]:
                 resolved_obj = getattr(resolved_obj, attr_name)
             orig_func = getattr(resolved_obj, func_path[-1])
-            setattr(
-                resolved_obj,
-                func_path[-1],
-                lambda *args, **kwargs: self(*args, **kwargs),
-            )
         self.__init__(orig_func, sub_func, cond_func)
         return lambda *args, **kwargs: self(*args, **kwargs)
     def __init__(self, orig_func, sub_func, cond_func):
         self.__orig_func = orig_func
         self.__sub_func = sub_func
         self.__cond_func = cond_func
     def __call__(self, *args, **kwargs):
         if not self.__cond_func or self.__cond_func(self.__orig_func, *args, **kwargs):
             return self.__sub_func(self.__orig_func, *args, **kwargs)
         else:
             return self.__orig_func(*args, **kwargs)
 _utils = torch.utils.data._utils
 def _shutdown_workers(self):
-    if (
-        torch.utils.data._utils is None
-        or torch.utils.data._utils.python_exit_status is True
-        or torch.utils.data._utils.python_exit_status is None
-    ):
         return
     if hasattr(self, "_shutdown") and not self._shutdown:
         self._shutdown = True
         try:
-            if hasattr(self, "_pin_memory_thread"):
                 self._pin_memory_thread_done_event.set()
                 self._worker_result_queue.put((None, None))
                 self._pin_memory_thread.join()
@@ -63,292 +49,145 @@ def _shutdown_workers(self):
             for worker_id in range(len(self._workers)):
                 if self._persistent_workers or self._workers_status[worker_id]:
                     self._mark_worker_as_unavailable(worker_id, shutdown=True)
-            for w in self._workers:  # pylint: disable=invalid-name
                 w.join(timeout=torch.utils.data._utils.MP_STATUS_CHECK_INTERVAL)
-            for q in self._index_queues:  # pylint: disable=invalid-name
                 q.cancel_join_thread()
                 q.close()
         finally:
             if self._worker_pids_set:
                 torch.utils.data._utils.signal_handling._remove_worker_pids(id(self))
                 self._worker_pids_set = False
-            for w in self._workers:  # pylint: disable=invalid-name
                 if w.is_alive():
                     w.terminate()
-class DummyDataParallel(
-    torch.nn.Module
-):  # pylint: disable=missing-class-docstring, unused-argument, too-few-public-methods
-    def __new__(
-        cls, module, device_ids=None, output_device=None, dim=0
-    ):  # pylint: disable=unused-argument
         if isinstance(device_ids, list) and len(device_ids) > 1:
             print("IPEX backend doesn't support DataParallel on multiple XPU devices")
         return module.to("xpu")
-def return_null_context(*args, **kwargs):  # pylint: disable=unused-argument
     return contextlib.nullcontext()
 def check_device(device):
-    return bool(
-        (isinstance(device, torch.device) and device.type == "cuda")
-        or (isinstance(device, str) and "cuda" in device)
-        or isinstance(device, int)
-    )
 def return_xpu(device):
-    return (
-        f"xpu:{device[-1]}"
-        if isinstance(device, str) and ":" in device
-        else f"xpu:{device}"
-        if isinstance(device, int)
-        else torch.device("xpu")
-        if isinstance(device, torch.device)
-        else "xpu"
-    )
 def ipex_no_cuda(orig_func, *args, **kwargs):
     torch.cuda.is_available = lambda: False
     orig_func(*args, **kwargs)
     torch.cuda.is_available = torch.xpu.is_available
 original_autocast = torch.autocast
 def ipex_autocast(*args, **kwargs):
     if len(args) > 0 and args[0] == "cuda":
         return original_autocast("xpu", *args[1:], **kwargs)
     else:
         return original_autocast(*args, **kwargs)
 original_torch_cat = torch.cat
 def torch_cat(tensor, *args, **kwargs):
-    if len(tensor) == 3 and (
-        tensor[0].dtype != tensor[1].dtype or tensor[2].dtype != tensor[1].dtype
-    ):
-        return original_torch_cat(
-            [tensor[0].to(tensor[1].dtype), tensor[1], tensor[2].to(tensor[1].dtype)],
-            *args,
-            **kwargs,
-        )
     else:
         return original_torch_cat(tensor, *args, **kwargs)
 original_interpolate = torch.nn.functional.interpolate
-def interpolate(
-    tensor,
-    size=None,
-    scale_factor=None,
-    mode="nearest",
-    align_corners=None,
-    recompute_scale_factor=None,
-    antialias=False,
-):  # pylint: disable=too-many-arguments
     if antialias or align_corners is not None:
         return_device = tensor.device
         return_dtype = tensor.dtype
-        return original_interpolate(
-            tensor.to("cpu", dtype=torch.float32),
-            size=size,
-            scale_factor=scale_factor,
-            mode=mode,
-            align_corners=align_corners,
-            recompute_scale_factor=recompute_scale_factor,
-            antialias=antialias,
-        ).to(return_device, dtype=return_dtype)
     else:
-        return original_interpolate(
-            tensor,
-            size=size,
-            scale_factor=scale_factor,
-            mode=mode,
-            align_corners=align_corners,
-            recompute_scale_factor=recompute_scale_factor,
-            antialias=antialias,
-        )
 original_linalg_solve = torch.linalg.solve
-def linalg_solve(A, B, *args, **kwargs):  # pylint: disable=invalid-name
     if A.device != torch.device("cpu") or B.device != torch.device("cpu"):
         return_device = A.device
-        return original_linalg_solve(A.to("cpu"), B.to("cpu"), *args, **kwargs).to(
-            return_device
-        )
     else:
         return original_linalg_solve(A, B, *args, **kwargs)
 def ipex_hijacks():
-    CondFunc(
-        "torch.Tensor.to",
-        lambda orig_func, self, device=None, *args, **kwargs: orig_func(
-            self, return_xpu(device), *args, **kwargs
-        ),
-        lambda orig_func, self, device=None, *args, **kwargs: check_device(device),
-    )
-    CondFunc(
-        "torch.Tensor.cuda",
-        lambda orig_func, self, device=None, *args, **kwargs: orig_func(
-            self, return_xpu(device), *args, **kwargs
-        ),
-        lambda orig_func, self, device=None, *args, **kwargs: check_device(device),
-    )
-    CondFunc(
-        "torch.empty",
-        lambda orig_func, *args, device=None, **kwargs: orig_func(
-            *args, device=return_xpu(device), **kwargs
-        ),
-        lambda orig_func, *args, device=None, **kwargs: check_device(device),
-    )
-    CondFunc(
-        "torch.load",
-        lambda orig_func, *args, map_location=None, **kwargs: orig_func(
-            *args, return_xpu(map_location), **kwargs
-        ),
-        lambda orig_func, *args, map_location=None, **kwargs: map_location is None
-        or check_device(map_location),
-    )
-    CondFunc(
-        "torch.randn",
-        lambda orig_func, *args, device=None, **kwargs: orig_func(
-            *args, device=return_xpu(device), **kwargs
-        ),
-        lambda orig_func, *args, device=None, **kwargs: check_device(device),
-    )
-    CondFunc(
-        "torch.ones",
-        lambda orig_func, *args, device=None, **kwargs: orig_func(
-            *args, device=return_xpu(device), **kwargs
-        ),
-        lambda orig_func, *args, device=None, **kwargs: check_device(device),
-    )
-    CondFunc(
-        "torch.zeros",
-        lambda orig_func, *args, device=None, **kwargs: orig_func(
-            *args, device=return_xpu(device), **kwargs
-        ),
-        lambda orig_func, *args, device=None, **kwargs: check_device(device),
-    )
-    CondFunc(
-        "torch.tensor",
-        lambda orig_func, *args, device=None, **kwargs: orig_func(
-            *args, device=return_xpu(device), **kwargs
-        ),
-        lambda orig_func, *args, device=None, **kwargs: check_device(device),
-    )
-    CondFunc(
-        "torch.linspace",
-        lambda orig_func, *args, device=None, **kwargs: orig_func(
-            *args, device=return_xpu(device), **kwargs
-        ),
-        lambda orig_func, *args, device=None, **kwargs: check_device(device),
-    )
-    CondFunc(
-        "torch.Generator",
         lambda orig_func, device=None: torch.xpu.Generator(device),
-        lambda orig_func, device=None: device is not None
-        and device != torch.device("cpu")
-        and device != "cpu",
-    )
-    CondFunc(
-        "torch.batch_norm",
-        lambda orig_func, input, weight, bias, *args, **kwargs: orig_func(
-            input,
-            weight
-            if weight is not None
-            else torch.ones(input.size()[1], device=input.device),
-            bias
-            if bias is not None
-            else torch.zeros(input.size()[1], device=input.device),
-            *args,
-            **kwargs,
-        ),
-        lambda orig_func, input, *args, **kwargs: input.device != torch.device("cpu"),
-    )
-    CondFunc(
-        "torch.instance_norm",
-        lambda orig_func, input, weight, bias, *args, **kwargs: orig_func(
-            input,
-            weight
-            if weight is not None
-            else torch.ones(input.size()[1], device=input.device),
-            bias
-            if bias is not None
-            else torch.zeros(input.size()[1], device=input.device),
-            *args,
-            **kwargs,
-        ),
-        lambda orig_func, input, *args, **kwargs: input.device != torch.device("cpu"),
-    )
-    # Functions with dtype errors:
-    CondFunc(
-        "torch.nn.modules.GroupNorm.forward",
-        lambda orig_func, self, input: orig_func(
-            self, input.to(self.weight.data.dtype)
-        ),
-        lambda orig_func, self, input: input.dtype != self.weight.data.dtype,
-    )
-    CondFunc(
-        "torch.nn.modules.linear.Linear.forward",
-        lambda orig_func, self, input: orig_func(
-            self, input.to(self.weight.data.dtype)
-        ),
-        lambda orig_func, self, input: input.dtype != self.weight.data.dtype,
-    )
-    CondFunc(
-        "torch.nn.modules.conv.Conv2d.forward",
-        lambda orig_func, self, input: orig_func(
-            self, input.to(self.weight.data.dtype)
-        ),
-        lambda orig_func, self, input: input.dtype != self.weight.data.dtype,
-    )
-    CondFunc(
-        "torch.nn.functional.layer_norm",
-        lambda orig_func, input, normalized_shape=None, weight=None, *args, **kwargs: orig_func(
-            input.to(weight.data.dtype), normalized_shape, weight, *args, **kwargs
-        ),
-        lambda orig_func, input, normalized_shape=None, weight=None, *args, **kwargs: weight
-        is not None
-        and input.dtype != weight.data.dtype,
-    )
-    # Diffusers Float64 (ARC GPUs doesn't support double or Float64):
     if not torch.xpu.has_fp64_dtype():
-        CondFunc(
-            "torch.from_numpy",
-            lambda orig_func, ndarray: orig_func(ndarray.astype("float32")),
-            lambda orig_func, ndarray: ndarray.dtype == float,
-        )
-    # Broken functions when torch.cuda.is_available is True:
-    CondFunc(
-        "torch.utils.data.dataloader._BaseDataLoaderIter.__init__",
         lambda orig_func, *args, **kwargs: ipex_no_cuda(orig_func, *args, **kwargs),
-        lambda orig_func, *args, **kwargs: True,
-    )
-    # Functions that make compile mad with CondFunc:
-    torch.utils.data.dataloader._MultiProcessingDataLoaderIter._shutdown_workers = (
-        _shutdown_workers
-    )
     torch.nn.DataParallel = DummyDataParallel
     torch.autocast = ipex_autocast
     torch.cat = torch_cat

 import contextlib
 import importlib
 import torch
+import intel_extension_for_pytorch as ipex # pylint: disable=import-error, unused-import
 # pylint: disable=protected-access, missing-function-docstring, line-too-long, unnecessary-lambda, no-else-return
+class CondFunc: # pylint: disable=missing-class-docstring
     def __new__(cls, orig_func, sub_func, cond_func):
         self = super(CondFunc, cls).__new__(cls)
         if isinstance(orig_func, str):
+            func_path = orig_func.split('.')
+            for i in range(len(func_path)-1, -1, -1):
                 try:
+                    resolved_obj = importlib.import_module('.'.join(func_path[:i]))
                     break
                 except ImportError:
                     pass
             for attr_name in func_path[i:-1]:
                 resolved_obj = getattr(resolved_obj, attr_name)
             orig_func = getattr(resolved_obj, func_path[-1])
+            setattr(resolved_obj, func_path[-1], lambda *args, **kwargs: self(*args, **kwargs))
         self.__init__(orig_func, sub_func, cond_func)
         return lambda *args, **kwargs: self(*args, **kwargs)
     def __init__(self, orig_func, sub_func, cond_func):
         self.__orig_func = orig_func
         self.__sub_func = sub_func
         self.__cond_func = cond_func
     def __call__(self, *args, **kwargs):
         if not self.__cond_func or self.__cond_func(self.__orig_func, *args, **kwargs):
             return self.__sub_func(self.__orig_func, *args, **kwargs)
         else:
             return self.__orig_func(*args, **kwargs)
 _utils = torch.utils.data._utils
 def _shutdown_workers(self):
+    if torch.utils.data._utils is None or torch.utils.data._utils.python_exit_status is True or torch.utils.data._utils.python_exit_status is None:
         return
     if hasattr(self, "_shutdown") and not self._shutdown:
         self._shutdown = True
         try:
+            if hasattr(self, '_pin_memory_thread'):
                 self._pin_memory_thread_done_event.set()
                 self._worker_result_queue.put((None, None))
                 self._pin_memory_thread.join()
             for worker_id in range(len(self._workers)):
                 if self._persistent_workers or self._workers_status[worker_id]:
                     self._mark_worker_as_unavailable(worker_id, shutdown=True)
+            for w in self._workers: # pylint: disable=invalid-name
                 w.join(timeout=torch.utils.data._utils.MP_STATUS_CHECK_INTERVAL)
+            for q in self._index_queues: # pylint: disable=invalid-name
                 q.cancel_join_thread()
                 q.close()
         finally:
             if self._worker_pids_set:
                 torch.utils.data._utils.signal_handling._remove_worker_pids(id(self))
                 self._worker_pids_set = False
+            for w in self._workers: # pylint: disable=invalid-name
                 if w.is_alive():
                     w.terminate()
+class DummyDataParallel(torch.nn.Module): # pylint: disable=missing-class-docstring, unused-argument, too-few-public-methods
+    def __new__(cls, module, device_ids=None, output_device=None, dim=0): # pylint: disable=unused-argument
         if isinstance(device_ids, list) and len(device_ids) > 1:
             print("IPEX backend doesn't support DataParallel on multiple XPU devices")
         return module.to("xpu")
+def return_null_context(*args, **kwargs): # pylint: disable=unused-argument
     return contextlib.nullcontext()
 def check_device(device):
+    return bool((isinstance(device, torch.device) and device.type == "cuda") or (isinstance(device, str) and "cuda" in device) or isinstance(device, int))
 def return_xpu(device):
+    return f"xpu:{device[-1]}" if isinstance(device, str) and ":" in device else f"xpu:{device}" if isinstance(device, int) else torch.device("xpu") if isinstance(device, torch.device) else "xpu"
 def ipex_no_cuda(orig_func, *args, **kwargs):
     torch.cuda.is_available = lambda: False
     orig_func(*args, **kwargs)
     torch.cuda.is_available = torch.xpu.is_available
 original_autocast = torch.autocast
 def ipex_autocast(*args, **kwargs):
     if len(args) > 0 and args[0] == "cuda":
         return original_autocast("xpu", *args[1:], **kwargs)
     else:
         return original_autocast(*args, **kwargs)
 original_torch_cat = torch.cat
 def torch_cat(tensor, *args, **kwargs):
+    if len(tensor) == 3 and (tensor[0].dtype != tensor[1].dtype or tensor[2].dtype != tensor[1].dtype):
+        return original_torch_cat([tensor[0].to(tensor[1].dtype), tensor[1], tensor[2].to(tensor[1].dtype)], *args, **kwargs)
     else:
         return original_torch_cat(tensor, *args, **kwargs)
 original_interpolate = torch.nn.functional.interpolate
+def interpolate(tensor, size=None, scale_factor=None, mode='nearest', align_corners=None, recompute_scale_factor=None, antialias=False): # pylint: disable=too-many-arguments
     if antialias or align_corners is not None:
         return_device = tensor.device
         return_dtype = tensor.dtype
+        return original_interpolate(tensor.to("cpu", dtype=torch.float32), size=size, scale_factor=scale_factor, mode=mode,
+        align_corners=align_corners, recompute_scale_factor=recompute_scale_factor, antialias=antialias).to(return_device, dtype=return_dtype)
     else:
+        return original_interpolate(tensor, size=size, scale_factor=scale_factor, mode=mode,
+        align_corners=align_corners, recompute_scale_factor=recompute_scale_factor, antialias=antialias)
 original_linalg_solve = torch.linalg.solve
+def linalg_solve(A, B, *args, **kwargs): # pylint: disable=invalid-name
     if A.device != torch.device("cpu") or B.device != torch.device("cpu"):
         return_device = A.device
+        return original_linalg_solve(A.to("cpu"), B.to("cpu"), *args, **kwargs).to(return_device)
     else:
         return original_linalg_solve(A, B, *args, **kwargs)
 def ipex_hijacks():
+    CondFunc('torch.Tensor.to',
+        lambda orig_func, self, device=None, *args, **kwargs: orig_func(self, return_xpu(device), *args, **kwargs),
+        lambda orig_func, self, device=None, *args, **kwargs: check_device(device))
+    CondFunc('torch.Tensor.cuda',
+        lambda orig_func, self, device=None, *args, **kwargs: orig_func(self, return_xpu(device), *args, **kwargs),
+        lambda orig_func, self, device=None, *args, **kwargs: check_device(device))
+    CondFunc('torch.empty',
+        lambda orig_func, *args, device=None, **kwargs: orig_func(*args, device=return_xpu(device), **kwargs),
+        lambda orig_func, *args, device=None, **kwargs: check_device(device))
+    CondFunc('torch.load',
+        lambda orig_func, *args, map_location=None, **kwargs: orig_func(*args, return_xpu(map_location), **kwargs),
+        lambda orig_func, *args, map_location=None, **kwargs: map_location is None or check_device(map_location))
+    CondFunc('torch.randn',
+        lambda orig_func, *args, device=None, **kwargs: orig_func(*args, device=return_xpu(device), **kwargs),
+        lambda orig_func, *args, device=None, **kwargs: check_device(device))
+    CondFunc('torch.ones',
+        lambda orig_func, *args, device=None, **kwargs: orig_func(*args, device=return_xpu(device), **kwargs),
+        lambda orig_func, *args, device=None, **kwargs: check_device(device))
+    CondFunc('torch.zeros',
+        lambda orig_func, *args, device=None, **kwargs: orig_func(*args, device=return_xpu(device), **kwargs),
+        lambda orig_func, *args, device=None, **kwargs: check_device(device))
+    CondFunc('torch.tensor',
+        lambda orig_func, *args, device=None, **kwargs: orig_func(*args, device=return_xpu(device), **kwargs),
+        lambda orig_func, *args, device=None, **kwargs: check_device(device))
+    CondFunc('torch.linspace',
+        lambda orig_func, *args, device=None, **kwargs: orig_func(*args, device=return_xpu(device), **kwargs),
+        lambda orig_func, *args, device=None, **kwargs: check_device(device))
+    CondFunc('torch.Generator',
         lambda orig_func, device=None: torch.xpu.Generator(device),
+        lambda orig_func, device=None: device is not None and device != torch.device("cpu") and device != "cpu")
+    CondFunc('torch.batch_norm',
+        lambda orig_func, input, weight, bias, *args, **kwargs: orig_func(input,
+        weight if weight is not None else torch.ones(input.size()[1], device=input.device),
+        bias if bias is not None else torch.zeros(input.size()[1], device=input.device), *args, **kwargs),
+        lambda orig_func, input, *args, **kwargs: input.device != torch.device("cpu"))
+    CondFunc('torch.instance_norm',
+        lambda orig_func, input, weight, bias, *args, **kwargs: orig_func(input,
+        weight if weight is not None else torch.ones(input.size()[1], device=input.device),
+        bias if bias is not None else torch.zeros(input.size()[1], device=input.device), *args, **kwargs),
+        lambda orig_func, input, *args, **kwargs: input.device != torch.device("cpu"))
+    #Functions with dtype errors:
+    CondFunc('torch.nn.modules.GroupNorm.forward',
+        lambda orig_func, self, input: orig_func(self, input.to(self.weight.data.dtype)),
+        lambda orig_func, self, input: input.dtype != self.weight.data.dtype)
+    CondFunc('torch.nn.modules.linear.Linear.forward',
+        lambda orig_func, self, input: orig_func(self, input.to(self.weight.data.dtype)),
+        lambda orig_func, self, input: input.dtype != self.weight.data.dtype)
+    CondFunc('torch.nn.modules.conv.Conv2d.forward',
+        lambda orig_func, self, input: orig_func(self, input.to(self.weight.data.dtype)),
+        lambda orig_func, self, input: input.dtype != self.weight.data.dtype)
+    CondFunc('torch.nn.functional.layer_norm',
+        lambda orig_func, input, normalized_shape=None, weight=None, *args, **kwargs:
+        orig_func(input.to(weight.data.dtype), normalized_shape, weight, *args, **kwargs),
+        lambda orig_func, input, normalized_shape=None, weight=None, *args, **kwargs:
+        weight is not None and input.dtype != weight.data.dtype)
+    #Diffusers Float64 (ARC GPUs doesn't support double or Float64):
     if not torch.xpu.has_fp64_dtype():
+        CondFunc('torch.from_numpy',
+        lambda orig_func, ndarray: orig_func(ndarray.astype('float32')),
+        lambda orig_func, ndarray: ndarray.dtype == float)
+    #Broken functions when torch.cuda.is_available is True:
+    CondFunc('torch.utils.data.dataloader._BaseDataLoaderIter.__init__',
         lambda orig_func, *args, **kwargs: ipex_no_cuda(orig_func, *args, **kwargs),
+        lambda orig_func, *args, **kwargs: True)
+    #Functions that make compile mad with CondFunc:
+    torch.utils.data.dataloader._MultiProcessingDataLoaderIter._shutdown_workers = _shutdown_workers
     torch.nn.DataParallel = DummyDataParallel
     torch.autocast = ipex_autocast
     torch.cat = torch_cat

infer/modules/train/train.py CHANGED Viewed

@@ -17,15 +17,12 @@ n_gpus = len(hps.gpus.split("-"))
 from random import randint, shuffle
 import torch
 try:
-    import intel_extension_for_pytorch as ipex  # pylint: disable=import-error, unused-import
     if torch.xpu.is_available():
         from infer.modules.ipex import ipex_init
         from infer.modules.ipex.gradscaler import gradscaler_init
         from torch.xpu.amp import autocast
         GradScaler = gradscaler_init()
         ipex_init()
     else:

 from random import randint, shuffle
 import torch
 try:
+    import intel_extension_for_pytorch as ipex # pylint: disable=import-error, unused-import
     if torch.xpu.is_available():
         from infer.modules.ipex import ipex_init
         from infer.modules.ipex.gradscaler import gradscaler_init
         from torch.xpu.amp import autocast
         GradScaler = gradscaler_init()
         ipex_init()
     else:

infer/modules/vc/modules.py CHANGED Viewed

@@ -209,9 +209,7 @@ class VC:
                 f0_file,
             )
             if self.tgt_sr != resample_sr >= 16000:
-                tgt_sr = resample_sr
-            else:
-                tgt_sr = self.tgt_sr
             index_info = (
                 "Index:\n%s." % file_index
                 if os.path.exists(file_index)
@@ -220,7 +218,7 @@ class VC:
             return (
                 "Success.\n%s\nTime:\nnpy: %.2fs, f0: %.2fs, infer: %.2fs."
                 % (index_info, *times),
-                (tgt_sr, audio_opt),
             )
         except:
             info = traceback.format_exc()
@@ -288,13 +286,14 @@ class VC:
                                 tgt_sr,
                             )
                         else:
-                            path = "%s/%s.%s" % (
-                                opt_root,
-                                os.path.basename(path),
-                                format1,
-                            )
                             with BytesIO() as wavf:
-                                sf.write(wavf, audio_opt, tgt_sr, format="wav")
                                 wavf.seek(0, 0)
                                 with open(path, "wb") as outf:
                                     wav2(wavf, outf, format1)

                 f0_file,
             )
             if self.tgt_sr != resample_sr >= 16000:
+                self.tgt_sr = resample_sr
             index_info = (
                 "Index:\n%s." % file_index
                 if os.path.exists(file_index)
             return (
                 "Success.\n%s\nTime:\nnpy: %.2fs, f0: %.2fs, infer: %.2fs."
                 % (index_info, *times),
+                (self.tgt_sr, audio_opt),
             )
         except:
             info = traceback.format_exc()
                                 tgt_sr,
                             )
                         else:
+                            path = "%s/%s.%s" % (opt_root, os.path.basename(path), format1)
                             with BytesIO() as wavf:
+                                sf.write(
+                                    wavf,
+                                    audio_opt,
+                                    tgt_sr,
+                                    format="wav"
+                                )
                                 wavf.seek(0, 0)
                                 with open(path, "wb") as outf:
                                     wav2(wavf, outf, format1)

requirements-dml.txt CHANGED Viewed

@@ -1,3 +1,5 @@
 joblib>=1.1.0
 numba==0.56.4
 numpy==1.23.5

+gdown
+mega.py
 joblib>=1.1.0
 numba==0.56.4
 numpy==1.23.5

requirements.txt CHANGED Viewed

@@ -1,12 +1,17 @@
 joblib>=1.1.0
 numba==0.56.4
-numpy==1.23.5
 scipy
 librosa==0.9.1
 llvmlite==0.39.0
 fairseq==0.12.2
 faiss-cpu==1.7.3
-gradio==3.34.0
 Cython
 pydub>=0.25.1
 soundfile>=0.12.1
@@ -45,3 +50,4 @@ fastapi==0.88
 ffmpy==0.3.1
 python-dotenv>=1.0.0
 av

+torch
+torchvision
+torchaudio
+gdown
+mega.py
 joblib>=1.1.0
 numba==0.56.4
+numpy==1.22.0
 scipy
 librosa==0.9.1
 llvmlite==0.39.0
 fairseq==0.12.2
 faiss-cpu==1.7.3
+gradio==3.43.2
 Cython
 pydub>=0.25.1
 soundfile>=0.12.1
 ffmpy==0.3.1
 python-dotenv>=1.0.0
 av
+pydantic==1.10.12

tools/rvc_for_realtime.py CHANGED Viewed

@@ -357,13 +357,19 @@ class RVC:
         with torch.no_grad():
             if self.if_f0 == 1:
                 # print(12222222222,feats.device,p_len.device,cache_pitch.device,cache_pitchf.device,sid.device,rate2)
-                infered_audio = self.net_g.infer(
-                    feats, p_len, cache_pitch, cache_pitchf, sid, rate
-                )[0][0, 0].data.float()
             else:
-                infered_audio = self.net_g.infer(feats, p_len, sid, rate)[0][
-                    0, 0
-                ].data.float()
         t5 = ttime()
         logger.info(
             "Spent time: fea = %.2fs, index = %.2fs, f0 = %.2fs, model = %.2fs",

         with torch.no_grad():
             if self.if_f0 == 1:
                 # print(12222222222,feats.device,p_len.device,cache_pitch.device,cache_pitchf.device,sid.device,rate2)
+                infered_audio = (
+                    self.net_g.infer(
+                        feats, p_len, cache_pitch, cache_pitchf, sid, rate
+                    )[0][0, 0]
+                    .data
+                    .float()
+                )
             else:
+                infered_audio = (
+                    self.net_g.infer(feats, p_len, sid, rate)[0][0, 0]
+                    .data
+                    .float()
+                )
         t5 = ttime()
         logger.info(
             "Spent time: fea = %.2fs, index = %.2fs, f0 = %.2fs, model = %.2fs",

tools/torchgate/utils.py CHANGED Viewed

@@ -3,9 +3,7 @@ from torch.types import Number
 @torch.no_grad()
-def amp_to_db(
-    x: torch.Tensor, eps=torch.finfo(torch.float64).eps, top_db=40
-) -> torch.Tensor:
     """
     Convert the input tensor from amplitude to decibel scale.
@@ -42,9 +40,7 @@ def temperature_sigmoid(x: torch.Tensor, x0: float, temp_coeff: float) -> torch.
 @torch.no_grad()
-def linspace(
-    start: Number, stop: Number, num: int = 50, endpoint: bool = True, **kwargs
-) -> torch.Tensor:
     """
     Generate a linearly spaced 1-D tensor.

 @torch.no_grad()
+def amp_to_db(x: torch.Tensor, eps=torch.finfo(torch.float64).eps, top_db=40) -> torch.Tensor:
     """
     Convert the input tensor from amplitude to decibel scale.
 @torch.no_grad()
+def linspace(start: Number, stop: Number, num: int = 50, endpoint: bool = True, **kwargs) -> torch.Tensor:
     """
     Generate a linearly spaced 1-D tensor.