{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "1cRKuRl7Z8Nj" }, "source": [ "# Requirment" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "background_save": true }, "id": "ouQGqsHfsDv6", "outputId": "8a464347-c2ba-489e-8f45-3707e9ba2e1d" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "G:\\AI\\VITS_WebUI\\monotonic_align\n", "running build_ext\n", "copying build\\lib.win-amd64-3.9\\monotonic_align\\core.cp39-win_amd64.pyd -> monotonic_align\n", "G:\\AI\\VITS_WebUI\n" ] } ], "source": [ "%cd G:\\AI\\VITS_WebUI\\monotonic_align\n", "!python setup.py build_ext --inplace\n", "%cd .." ] }, { "cell_type": "code", "execution_count": 2, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fri Apr 21 22:47:53 2023 \n", "+---------------------------------------------------------------------------------------+\n", "| NVIDIA-SMI 531.14 Driver Version: 531.14 CUDA Version: 12.1 |\n", "|-----------------------------------------+----------------------+----------------------+\n", "| GPU Name TCC/WDDM | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", "| | | MIG M. |\n", "|=========================================+======================+======================|\n", "| 0 NVIDIA GeForce RTX 3060 WDDM | 00000000:01:00.0 On | N/A |\n", "| 0% 33C P8 20W / 170W| 8114MiB / 12288MiB | 32% Default |\n", "| | | N/A |\n", "+-----------------------------------------+----------------------+----------------------+\n", " \n", "+---------------------------------------------------------------------------------------+\n", "| Processes: |\n", "| GPU GI CI PID Type Process name GPU Memory |\n", "| ID ID Usage |\n", "|=======================================================================================|\n", "| 0 N/A N/A 5040 C+G ...\\cef\\cef.win7x64\\steamwebhelper.exe N/A |\n", "| 0 N/A N/A 5872 C+G ...on\\wallpaper_engine\\wallpaper32.exe N/A |\n", "| 0 N/A N/A 7144 C+G ....0_x64__kzh8wxbdkxb8p\\DCv2\\DCv2.exe N/A |\n", "| 0 N/A N/A 8724 C+G C:\\Windows\\explorer.exe N/A |\n", "| 0 N/A N/A 9632 C+G ....Search_cw5n1h2txyewy\\SearchApp.exe N/A |\n", "| 0 N/A N/A 9704 C+G D:\\CloudMusic\\cloudmusic.exe N/A |\n", "| 0 N/A N/A 9812 C+G ...2txyewy\\StartMenuExperienceHost.exe N/A |\n", "| 0 N/A N/A 10872 C+G ...0.0_x64__p7pnf6hceqser\\snipaste.exe N/A |\n", "| 0 N/A N/A 11900 C+G D:\\Typora\\Typora.exe N/A |\n", "| 0 N/A N/A 12268 C+G ...t.LockApp_cw5n1h2txyewy\\LockApp.exe N/A |\n", "| 0 N/A N/A 13320 C+G ...rPicker\\PowerToys.ColorPickerUI.exe N/A |\n", "| 0 N/A N/A 13600 C+G ...FancyZones\\PowerToys.FancyZones.exe N/A |\n", "| 0 N/A N/A 13660 C+G ...5n1h2txyewy\\ShellExperienceHost.exe N/A |\n", "| 0 N/A N/A 13904 C+G D:\\Eagle\\Eagle.exe N/A |\n", "| 0 N/A N/A 16220 C+G ...GeForce Experience\\NVIDIA Share.exe N/A |\n", "| 0 N/A N/A 16240 C+G ...GeForce Experience\\NVIDIA Share.exe N/A |\n", "| 0 N/A N/A 16332 C+G ...CBS_cw5n1h2txyewy\\TextInputHost.exe N/A |\n", "| 0 N/A N/A 17608 C+G ...B\\system_tray\\lghub_system_tray.exe N/A |\n", "| 0 N/A N/A 17696 C+G C:\\Program Files\\LGHUB\\lghub.exe N/A |\n", "| 0 N/A N/A 20848 C+G ...oogle\\Chrome\\Application\\chrome.exe N/A |\n", "| 0 N/A N/A 23484 C+G ...auncher\\PowerToys.PowerLauncher.exe N/A |\n", "| 0 N/A N/A 26616 C+G D:\\motrix\\Motrix.exe N/A |\n", "| 0 N/A N/A 27388 C+G D:\\BaiduNetdisk\\baidunetdiskrender.exe N/A |\n", "| 0 N/A N/A 28064 C+G ...on\\112.0.1722.39\\msedgewebview2.exe N/A |\n", "| 0 N/A N/A 28988 C+G ...3\\extracted\\runtime\\WeChatAppEx.exe N/A |\n", "| 0 N/A N/A 32628 C+G D:\\RaiDrive\\RaiDrive.exe N/A |\n", "| 0 N/A N/A 34352 C+G ...1.0_x64__8wekyb3d8bbwe\\Video.UI.exe N/A |\n", "| 0 N/A N/A 64972 C+G ...ft Office\\root\\Office16\\WINWORD.EXE N/A |\n", "| 0 N/A N/A 86756 C+G ..._8wekyb3d8bbwe\\Microsoft.Photos.exe N/A |\n", "| 0 N/A N/A 159156 C+G ...siveControlPanel\\SystemSettings.exe N/A |\n", "| 0 N/A N/A 186184 C+G ...9.0.0_x64__gqbn7fs4pywxm\\Db.App.exe N/A |\n", "| 0 N/A N/A 326192 C+G ...les\\Microsoft OneDrive\\OneDrive.exe N/A |\n", "| 0 N/A N/A 366360 C+G ...ekyb3d8bbwe\\PhoneExperienceHost.exe N/A |\n", "| 0 N/A N/A 455764 C+G ...-ins\\Spaces\\Adobe Spaces Helper.exe N/A |\n", "| 0 N/A N/A 456280 C+G ...obe Photoshop CC 2019\\Photoshop.exe N/A |\n", "| 0 N/A N/A 456496 C+G ...CEP\\CEPHtmlEngine\\CEPHtmlEngine.exe N/A |\n", "| 0 N/A N/A 469488 C+G ...t Office\\root\\Office16\\POWERPNT.EXE N/A |\n", "| 0 N/A N/A 493720 C+G ...rm 2022.3.2\\jbr\\bin\\jcef_helper.exe N/A |\n", "| 0 N/A N/A 498508 C+G ...crosoft\\Edge\\Application\\msedge.exe N/A |\n", "+---------------------------------------------------------------------------------------+\n" ] } ], "source": [ "!nvidia-smi" ], "metadata": { "collapsed": false } }, { "cell_type": "markdown", "metadata": { "id": "SxpEIauJZ0s6" }, "source": [ "# Settings" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "cellView": "form", "id": "v10x1lO7Z5AK" }, "outputs": [], "source": [ "#@title Edit config\n", "import json\n", "batchsize = 16 #@param {type:\"number\"}\n", "training_files = \"filelists/yuuka_train.txt.cleaned\" #@param {type:\"string\"}\n", "validation_files = \"filelists/yuuka_val.txt.cleaned\" #@param {type:\"string\"}\n", "config = json.load(open(\"configs/config.json\"))\n", "config['train']['batch_size'] = batchsize\n", "config['data']['training_files'] = training_files\n", "config['data']['validation_files'] = validation_files\n", "with open(\"configs/config.json\", 'w+') as f:\n", " json.dump(config, f, indent=4)" ] }, { "cell_type": "markdown", "metadata": { "id": "XBNba8Qpa7XF" }, "source": [ "# GUI" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "zF5IUSAQa_EB" }, "outputs": [], "source": [ "import gradio as gr\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "gcO8hd1Jr2t6" }, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import IPython.display as ipd\n", "import os\n", "import json\n", "import math\n", "import torch\n", "import commons\n", "import utils\n", "from models import SynthesizerTrn\n", "from text.symbols import symbols\n", "from text import text_to_sequence\n", "from scipy.io.wavfile import write\n", "from gradio.processing_utils import download_tmp_copy_of_file\n", "from PIL import Image\n", "import numpy as np\n", "import os\n", "from pathlib import Path\n", "import openai\n", "\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "tp-8n_YBg5FN" }, "outputs": [], "source": [ "LANGUAGES = ['EN','CN','JP']\n", "SPEAKER_ID = 0\n", "COVER = \"models/Yuuka/cover.png\"\n", "speaker_choice = \"Yuuka\"\n", "MODEL_ZH_NAME = \"早濑优香\"\n", "EXAMPLE_TEXT = \"先生。今日も全力であなたをアシストしますね。\"\n", "USER_INPUT_TEXT = \"\"" ] }, { "cell_type": "code", "execution_count": 8, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:root:Loaded checkpoint 'models/Yuuka/Yuuka.pth' (iteration 445)\n" ] } ], "source": [ "CONFIG_PATH = \"configs/config.json\"\n", "MODEL_PATH = \"models/Yuuka/Yuuka.pth\"\n", "\n", "hps = utils.get_hparams_from_file(CONFIG_PATH)\n", "net_g = SynthesizerTrn(\n", " len(hps.symbols),\n", " hps.data.filter_length // 2 + 1,\n", " hps.train.segment_size // hps.data.hop_length,\n", " n_speakers=hps.data.n_speakers,\n", " **hps.model).cuda()\n", "\n", "model = net_g.eval()\n", "model = utils.load_checkpoint(MODEL_PATH, net_g, None)\n", "\n", "def tts_fn(text, noise_scale, noise_scale_w, length_scale):\n", " stn_tst = get_text(text, hps)\n", " with torch.no_grad():\n", " x_tst = stn_tst.cuda().unsqueeze(0)\n", " x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cuda()\n", " sid = torch.LongTensor([SPEAKER_ID]).cuda()\n", " audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.cpu().float().numpy()\n", " return (22050, audio)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 9, "outputs": [], "source": [ "with open(\"models/model_info.json\", \"r\", encoding=\"utf-8\") as f:\n", " models_info = json.load(f)\n", "\n", "for i,model_info in models_info.items():\n", " name_en = model_info['name_en']" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 10, "outputs": [], "source": [ "def load_model():\n", " global hps,net_g,model\n", "\n", " hps = utils.get_hparams_from_file(CONFIG_PATH)\n", " net_g = SynthesizerTrn(\n", " len(hps.symbols),\n", " hps.data.filter_length // 2 + 1,\n", " hps.train.segment_size // hps.data.hop_length,\n", " n_speakers=hps.data.n_speakers,\n", " **hps.model).cuda()\n", "\n", " model = net_g.eval()\n", " model = utils.load_checkpoint(MODEL_PATH, net_g, None)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 11, "outputs": [], "source": [ "def get_text(text, hps):\n", " text_norm = text_to_sequence(text, hps.data.text_cleaners)\n", " if hps.data.add_blank:\n", " text_norm = commons.intersperse(text_norm, 0)\n", " text_norm = torch.LongTensor(text_norm)\n", " return text_norm" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 12, "outputs": [], "source": [ "def tts_fn(text, noise_scale, noise_scale_w, length_scale):\n", " stn_tst = get_text(text, hps)\n", " with torch.no_grad():\n", " x_tst = stn_tst.cuda().unsqueeze(0)\n", " x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cuda()\n", " sid = torch.LongTensor([SPEAKER_ID]).cuda()\n", " audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.cpu().float().numpy()\n", " return (22050, audio)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 13, "outputs": [], "source": [ "def add_model_fn(example_text, cover, speakerID, name_en, name_cn, language):\n", "\n", " # 检查必填字段是否为空\n", " if not SPEAKER_ID or not name_en or not language:\n", " raise gr.Error(\"Please fill in all required fields!\")\n", " return \"Failed to add model\"\n", "\n", " ### 保存上传的文件\n", "\n", " # 生成文件路径\n", " model_save_dir = Path(\"models\")\n", " model_save_dir = model_save_dir / name_en\n", " img_save_dir = model_save_dir\n", " model_save_dir.mkdir(parents=True, exist_ok=True)\n", "\n", " Model_name = name_en + \".pth\"\n", " model_save_dir = model_save_dir / Model_name\n", "\n", " # 保存上传的图片\n", " if cover is not None:\n", " img = np.array(cover)\n", " img = Image.fromarray(img)\n", " img.save(os.path.join(img_save_dir, 'cover_white_background.png'))\n", "\n", " #获取用户输入\n", " new_model = {\n", " \"name_en\": name_en,\n", " \"name_zh\": name_cn,\n", " \"cover\": img_save_dir / \"cover.png\",\n", " \"sid\": speakerID,\n", " \"example\": example_text,\n", " \"language\": language,\n", " \"type\": \"single\",\n", " \"model_path\": model_save_dir\n", " }\n", "\n", " #写入json\n", " with open(\"models/model_info.json\", \"r\", encoding=\"utf-8\") as f:\n", " models_info = json.load(f)\n", "\n", " models_info[name_en] = new_model\n", " with open(\"models/model_info.json\", \"w\") as f:\n", " json.dump(models_info, f, cls=CustomEncoder)\n", "\n", "\n", " return \"Success\"" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 14, "outputs": [], "source": [ "def clear_input_text():\n", " return \"\"" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 15, "outputs": [], "source": [ "def clear_add_model_info():\n", " return \"\",None,\"\",\"\",\"\",\"\"" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 16, "outputs": [], "source": [ "def get_options():\n", " with open(\"models/model_info.json\", \"r\", encoding=\"utf-8\") as f:\n", " global models_info\n", " models_info = json.load(f)\n", "\n", " for i,model_info in models_info.items():\n", " global name_en\n", " name_en = model_info['name_en']" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 17, "outputs": [], "source": [ "def reset_options():\n", " value_model_choice = models_info['Yuuka']['name_en']\n", " value_speaker_id = models_info['Yuuka']['sid']\n", " return value_model_choice,value_speaker_id" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 18, "outputs": [], "source": [ "def refresh_options():\n", " get_options()\n", " value_model_choice = models_info[speaker_choice]['name_en']\n", " value_speaker_id = models_info[speaker_choice]['sid']\n", " return value_model_choice,value_speaker_id" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 19, "outputs": [], "source": [ "def change_dropdown(choice):\n", " global speaker_choice\n", " speaker_choice = choice\n", " global COVER\n", " COVER = str(models_info[speaker_choice]['cover'])\n", " global MODEL_PATH\n", " MODEL_PATH = str(models_info[speaker_choice]['model_path'])\n", " global MODEL_ZH_NAME\n", " MODEL_ZH_NAME = str(models_info[speaker_choice]['name_zh'])\n", " global EXAMPLE_TEXT\n", " EXAMPLE_TEXT = str(models_info[speaker_choice]['example'])\n", "\n", " speaker_id_change = gr.update(value=str(models_info[speaker_choice]['sid']))\n", " cover_change = gr.update(value='