{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "1cRKuRl7Z8Nj"
   },
   "source": [
    "# Requirment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "colab": {
     "background_save": true
    },
    "id": "ouQGqsHfsDv6",
    "outputId": "8a464347-c2ba-489e-8f45-3707e9ba2e1d"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "G:\\AI\\VITS_WebUI\\monotonic_align\n",
      "running build_ext\n",
      "copying build\\lib.win-amd64-3.9\\monotonic_align\\core.cp39-win_amd64.pyd -> monotonic_align\n",
      "G:\\AI\\VITS_WebUI\n"
     ]
    }
   ],
   "source": [
    "%cd G:\\AI\\VITS_WebUI\\monotonic_align\n",
    "!python setup.py build_ext --inplace\n",
    "%cd .."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fri Apr 21 22:47:53 2023       \n",
      "+---------------------------------------------------------------------------------------+\n",
      "| NVIDIA-SMI 531.14                 Driver Version: 531.14       CUDA Version: 12.1     |\n",
      "|-----------------------------------------+----------------------+----------------------+\n",
      "| GPU  Name                      TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
      "| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
      "|                                         |                      |               MIG M. |\n",
      "|=========================================+======================+======================|\n",
      "|   0  NVIDIA GeForce RTX 3060       WDDM | 00000000:01:00.0  On |                  N/A |\n",
      "|  0%   33C    P8               20W / 170W|   8114MiB / 12288MiB |     32%      Default |\n",
      "|                                         |                      |                  N/A |\n",
      "+-----------------------------------------+----------------------+----------------------+\n",
      "                                                                                         \n",
      "+---------------------------------------------------------------------------------------+\n",
      "| Processes:                                                                            |\n",
      "|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n",
      "|        ID   ID                                                             Usage      |\n",
      "|=======================================================================================|\n",
      "|    0   N/A  N/A      5040    C+G   ...\\cef\\cef.win7x64\\steamwebhelper.exe    N/A      |\n",
      "|    0   N/A  N/A      5872    C+G   ...on\\wallpaper_engine\\wallpaper32.exe    N/A      |\n",
      "|    0   N/A  N/A      7144    C+G   ....0_x64__kzh8wxbdkxb8p\\DCv2\\DCv2.exe    N/A      |\n",
      "|    0   N/A  N/A      8724    C+G   C:\\Windows\\explorer.exe                   N/A      |\n",
      "|    0   N/A  N/A      9632    C+G   ....Search_cw5n1h2txyewy\\SearchApp.exe    N/A      |\n",
      "|    0   N/A  N/A      9704    C+G   D:\\CloudMusic\\cloudmusic.exe              N/A      |\n",
      "|    0   N/A  N/A      9812    C+G   ...2txyewy\\StartMenuExperienceHost.exe    N/A      |\n",
      "|    0   N/A  N/A     10872    C+G   ...0.0_x64__p7pnf6hceqser\\snipaste.exe    N/A      |\n",
      "|    0   N/A  N/A     11900    C+G   D:\\Typora\\Typora.exe                      N/A      |\n",
      "|    0   N/A  N/A     12268    C+G   ...t.LockApp_cw5n1h2txyewy\\LockApp.exe    N/A      |\n",
      "|    0   N/A  N/A     13320    C+G   ...rPicker\\PowerToys.ColorPickerUI.exe    N/A      |\n",
      "|    0   N/A  N/A     13600    C+G   ...FancyZones\\PowerToys.FancyZones.exe    N/A      |\n",
      "|    0   N/A  N/A     13660    C+G   ...5n1h2txyewy\\ShellExperienceHost.exe    N/A      |\n",
      "|    0   N/A  N/A     13904    C+G   D:\\Eagle\\Eagle.exe                        N/A      |\n",
      "|    0   N/A  N/A     16220    C+G   ...GeForce Experience\\NVIDIA Share.exe    N/A      |\n",
      "|    0   N/A  N/A     16240    C+G   ...GeForce Experience\\NVIDIA Share.exe    N/A      |\n",
      "|    0   N/A  N/A     16332    C+G   ...CBS_cw5n1h2txyewy\\TextInputHost.exe    N/A      |\n",
      "|    0   N/A  N/A     17608    C+G   ...B\\system_tray\\lghub_system_tray.exe    N/A      |\n",
      "|    0   N/A  N/A     17696    C+G   C:\\Program Files\\LGHUB\\lghub.exe          N/A      |\n",
      "|    0   N/A  N/A     20848    C+G   ...oogle\\Chrome\\Application\\chrome.exe    N/A      |\n",
      "|    0   N/A  N/A     23484    C+G   ...auncher\\PowerToys.PowerLauncher.exe    N/A      |\n",
      "|    0   N/A  N/A     26616    C+G   D:\\motrix\\Motrix.exe                      N/A      |\n",
      "|    0   N/A  N/A     27388    C+G   D:\\BaiduNetdisk\\baidunetdiskrender.exe    N/A      |\n",
      "|    0   N/A  N/A     28064    C+G   ...on\\112.0.1722.39\\msedgewebview2.exe    N/A      |\n",
      "|    0   N/A  N/A     28988    C+G   ...3\\extracted\\runtime\\WeChatAppEx.exe    N/A      |\n",
      "|    0   N/A  N/A     32628    C+G   D:\\RaiDrive\\RaiDrive.exe                  N/A      |\n",
      "|    0   N/A  N/A     34352    C+G   ...1.0_x64__8wekyb3d8bbwe\\Video.UI.exe    N/A      |\n",
      "|    0   N/A  N/A     64972    C+G   ...ft Office\\root\\Office16\\WINWORD.EXE    N/A      |\n",
      "|    0   N/A  N/A     86756    C+G   ..._8wekyb3d8bbwe\\Microsoft.Photos.exe    N/A      |\n",
      "|    0   N/A  N/A    159156    C+G   ...siveControlPanel\\SystemSettings.exe    N/A      |\n",
      "|    0   N/A  N/A    186184    C+G   ...9.0.0_x64__gqbn7fs4pywxm\\Db.App.exe    N/A      |\n",
      "|    0   N/A  N/A    326192    C+G   ...les\\Microsoft OneDrive\\OneDrive.exe    N/A      |\n",
      "|    0   N/A  N/A    366360    C+G   ...ekyb3d8bbwe\\PhoneExperienceHost.exe    N/A      |\n",
      "|    0   N/A  N/A    455764    C+G   ...-ins\\Spaces\\Adobe Spaces Helper.exe    N/A      |\n",
      "|    0   N/A  N/A    456280    C+G   ...obe Photoshop CC 2019\\Photoshop.exe    N/A      |\n",
      "|    0   N/A  N/A    456496    C+G   ...CEP\\CEPHtmlEngine\\CEPHtmlEngine.exe    N/A      |\n",
      "|    0   N/A  N/A    469488    C+G   ...t Office\\root\\Office16\\POWERPNT.EXE    N/A      |\n",
      "|    0   N/A  N/A    493720    C+G   ...rm 2022.3.2\\jbr\\bin\\jcef_helper.exe    N/A      |\n",
      "|    0   N/A  N/A    498508    C+G   ...crosoft\\Edge\\Application\\msedge.exe    N/A      |\n",
      "+---------------------------------------------------------------------------------------+\n"
     ]
    }
   ],
   "source": [
    "!nvidia-smi"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "SxpEIauJZ0s6"
   },
   "source": [
    "# Settings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "cellView": "form",
    "id": "v10x1lO7Z5AK"
   },
   "outputs": [],
   "source": [
    "#@title  Edit config\n",
    "import json\n",
    "batchsize = 16  #@param {type:\"number\"}\n",
    "training_files = \"filelists/yuuka_train.txt.cleaned\" #@param {type:\"string\"}\n",
    "validation_files = \"filelists/yuuka_val.txt.cleaned\" #@param {type:\"string\"}\n",
    "config = json.load(open(\"configs/config.json\"))\n",
    "config['train']['batch_size'] = batchsize\n",
    "config['data']['training_files'] = training_files\n",
    "config['data']['validation_files'] = validation_files\n",
    "with open(\"configs/config.json\", 'w+') as f:\n",
    "    json.dump(config, f, indent=4)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "XBNba8Qpa7XF"
   },
   "source": [
    "# GUI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "id": "zF5IUSAQa_EB"
   },
   "outputs": [],
   "source": [
    "import gradio as gr\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "id": "gcO8hd1Jr2t6"
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "import IPython.display as ipd\n",
    "import os\n",
    "import json\n",
    "import math\n",
    "import torch\n",
    "import commons\n",
    "import utils\n",
    "from models import SynthesizerTrn\n",
    "from text.symbols import symbols\n",
    "from text import text_to_sequence\n",
    "from scipy.io.wavfile import write\n",
    "from gradio.processing_utils import download_tmp_copy_of_file\n",
    "from PIL import Image\n",
    "import numpy as np\n",
    "import os\n",
    "from pathlib import Path\n",
    "import openai\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "id": "tp-8n_YBg5FN"
   },
   "outputs": [],
   "source": [
    "LANGUAGES = ['EN','CN','JP']\n",
    "SPEAKER_ID = 0\n",
    "COVER = \"models/Yuuka/cover.png\"\n",
    "speaker_choice = \"Yuuka\"\n",
    "MODEL_ZH_NAME = \"早濑优香\"\n",
    "EXAMPLE_TEXT = \"先生。今日も全力であなたをアシストしますね。\"\n",
    "USER_INPUT_TEXT = \"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:root:Loaded checkpoint 'models/Yuuka/Yuuka.pth' (iteration 445)\n"
     ]
    }
   ],
   "source": [
    "CONFIG_PATH = \"configs/config.json\"\n",
    "MODEL_PATH = \"models/Yuuka/Yuuka.pth\"\n",
    "\n",
    "hps = utils.get_hparams_from_file(CONFIG_PATH)\n",
    "net_g = SynthesizerTrn(\n",
    "    len(hps.symbols),\n",
    "    hps.data.filter_length // 2 + 1,\n",
    "    hps.train.segment_size // hps.data.hop_length,\n",
    "    n_speakers=hps.data.n_speakers,\n",
    "    **hps.model).cuda()\n",
    "\n",
    "model = net_g.eval()\n",
    "model = utils.load_checkpoint(MODEL_PATH, net_g, None)\n",
    "\n",
    "def tts_fn(text, noise_scale, noise_scale_w, length_scale):\n",
    "  stn_tst = get_text(text, hps)\n",
    "  with torch.no_grad():\n",
    "    x_tst = stn_tst.cuda().unsqueeze(0)\n",
    "    x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cuda()\n",
    "    sid = torch.LongTensor([SPEAKER_ID]).cuda()\n",
    "    audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.cpu().float().numpy()\n",
    "  return  (22050, audio)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "outputs": [],
   "source": [
    "with open(\"models/model_info.json\", \"r\", encoding=\"utf-8\") as f:\n",
    "    models_info = json.load(f)\n",
    "\n",
    "for i,model_info in models_info.items():\n",
    "    name_en = model_info['name_en']"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [],
   "source": [
    "def load_model():\n",
    "    global hps,net_g,model\n",
    "\n",
    "    hps = utils.get_hparams_from_file(CONFIG_PATH)\n",
    "    net_g = SynthesizerTrn(\n",
    "    len(hps.symbols),\n",
    "    hps.data.filter_length // 2 + 1,\n",
    "    hps.train.segment_size // hps.data.hop_length,\n",
    "    n_speakers=hps.data.n_speakers,\n",
    "    **hps.model).cuda()\n",
    "\n",
    "    model = net_g.eval()\n",
    "    model = utils.load_checkpoint(MODEL_PATH, net_g, None)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [],
   "source": [
    "def get_text(text, hps):\n",
    "    text_norm = text_to_sequence(text, hps.data.text_cleaners)\n",
    "    if hps.data.add_blank:\n",
    "        text_norm = commons.intersperse(text_norm, 0)\n",
    "    text_norm = torch.LongTensor(text_norm)\n",
    "    return text_norm"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "outputs": [],
   "source": [
    "def tts_fn(text, noise_scale, noise_scale_w, length_scale):\n",
    "  stn_tst = get_text(text, hps)\n",
    "  with torch.no_grad():\n",
    "    x_tst = stn_tst.cuda().unsqueeze(0)\n",
    "    x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).cuda()\n",
    "    sid = torch.LongTensor([SPEAKER_ID]).cuda()\n",
    "    audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.cpu().float().numpy()\n",
    "  return  (22050, audio)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "outputs": [],
   "source": [
    "def add_model_fn(example_text, cover, speakerID, name_en, name_cn, language):\n",
    "\n",
    "    # 检查必填字段是否为空\n",
    "    if not SPEAKER_ID or not name_en or not language:\n",
    "        raise gr.Error(\"Please fill in all required fields!\")\n",
    "        return \"Failed to add model\"\n",
    "\n",
    "    ### 保存上传的文件\n",
    "\n",
    "    # 生成文件路径\n",
    "    model_save_dir = Path(\"models\")\n",
    "    model_save_dir = model_save_dir / name_en\n",
    "    img_save_dir = model_save_dir\n",
    "    model_save_dir.mkdir(parents=True, exist_ok=True)\n",
    "\n",
    "    Model_name = name_en + \".pth\"\n",
    "    model_save_dir = model_save_dir / Model_name\n",
    "\n",
    "    # 保存上传的图片\n",
    "    if cover is not None:\n",
    "        img = np.array(cover)\n",
    "        img = Image.fromarray(img)\n",
    "        img.save(os.path.join(img_save_dir, 'cover_white_background.png'))\n",
    "\n",
    "    #获取用户输入\n",
    "    new_model = {\n",
    "        \"name_en\": name_en,\n",
    "        \"name_zh\": name_cn,\n",
    "        \"cover\": img_save_dir / \"cover.png\",\n",
    "        \"sid\": speakerID,\n",
    "        \"example\": example_text,\n",
    "        \"language\": language,\n",
    "        \"type\": \"single\",\n",
    "        \"model_path\": model_save_dir\n",
    "    }\n",
    "\n",
    "    #写入json\n",
    "    with open(\"models/model_info.json\", \"r\", encoding=\"utf-8\") as f:\n",
    "        models_info = json.load(f)\n",
    "\n",
    "    models_info[name_en] = new_model\n",
    "    with open(\"models/model_info.json\", \"w\") as f:\n",
    "        json.dump(models_info, f, cls=CustomEncoder)\n",
    "\n",
    "\n",
    "    return \"Success\""
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "outputs": [],
   "source": [
    "def clear_input_text():\n",
    "    return \"\""
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "outputs": [],
   "source": [
    "def clear_add_model_info():\n",
    "    return \"\",None,\"\",\"\",\"\",\"\""
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "outputs": [],
   "source": [
    "def get_options():\n",
    "  with open(\"models/model_info.json\", \"r\", encoding=\"utf-8\") as f:\n",
    "    global models_info\n",
    "    models_info = json.load(f)\n",
    "\n",
    "  for i,model_info in models_info.items():\n",
    "    global name_en\n",
    "    name_en = model_info['name_en']"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "outputs": [],
   "source": [
    "def reset_options():\n",
    "  value_model_choice = models_info['Yuuka']['name_en']\n",
    "  value_speaker_id = models_info['Yuuka']['sid']\n",
    "  return value_model_choice,value_speaker_id"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "outputs": [],
   "source": [
    "def refresh_options():\n",
    "  get_options()\n",
    "  value_model_choice = models_info[speaker_choice]['name_en']\n",
    "  value_speaker_id = models_info[speaker_choice]['sid']\n",
    "  return value_model_choice,value_speaker_id"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "outputs": [],
   "source": [
    "def change_dropdown(choice):\n",
    "  global speaker_choice\n",
    "  speaker_choice = choice\n",
    "  global COVER\n",
    "  COVER = str(models_info[speaker_choice]['cover'])\n",
    "  global MODEL_PATH\n",
    "  MODEL_PATH = str(models_info[speaker_choice]['model_path'])\n",
    "  global MODEL_ZH_NAME\n",
    "  MODEL_ZH_NAME = str(models_info[speaker_choice]['name_zh'])\n",
    "  global EXAMPLE_TEXT\n",
    "  EXAMPLE_TEXT = str(models_info[speaker_choice]['example'])\n",
    "\n",
    "  speaker_id_change = gr.update(value=str(models_info[speaker_choice]['sid']))\n",
    "  cover_change = gr.update(value='<div align=\"center\">'\n",
    "                f'<img style=\"width:auto;height:512px;\" src=\"file/{COVER}\">' if COVER else \"\"\n",
    "                f'<a><strong>{speaker_choice}</strong></a>'\n",
    "                                                                                           '</div>')\n",
    "  title_change = gr.update(value=\n",
    "                '<div align=\"center\">'\n",
    "                f'<h3><a><strong>{\"语音名称: \"}{MODEL_ZH_NAME}</strong></a>'\n",
    "                f'<h3><strong>{\"checkpoint: \"}{speaker_choice}</strong>'\n",
    "                                                                                           '</div>')\n",
    "\n",
    "\n",
    "  lan_change = gr.update(value=str(models_info[speaker_choice]['language']))\n",
    "\n",
    "  example_change = gr.update(value=EXAMPLE_TEXT)\n",
    "\n",
    "  ChatGPT_cover_change = gr.update(value='<div align=\"center\">'\n",
    "                f'<img style=\"width:auto;height:512px;\" src=\"file/{COVER}\">' if COVER else \"\"\n",
    "                f'<a><strong>{speaker_choice}</strong></a>'\n",
    "                                                                                           '</div>')\n",
    "  ChatGPT_title_change = gr.update(value=\n",
    "                '<div align=\"center\">'\n",
    "                f'<h3><a><strong>{\"语音名称: \"}{MODEL_ZH_NAME}</strong></a>'\n",
    "                f'<h3><strong>{\"checkpoint: \"}{speaker_choice}</strong>'\n",
    "                                                                                           '</div>')\n",
    "\n",
    "  load_model()\n",
    "\n",
    "  return [speaker_id_change,cover_change,title_change,lan_change,example_change,cover_change,title_change,lan_change]"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "outputs": [],
   "source": [
    "def load_api_key(value):\n",
    "  openai.api_key = value"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "outputs": [],
   "source": [
    "def usr_input_update(value):\n",
    "  global USER_INPUT_TEXT\n",
    "  USER_INPUT_TEXT = value"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "outputs": [],
   "source": [
    "# def ChatGPT_Bot(history):\n",
    "#     response = openai.ChatCompletion.create(\n",
    "#       model=\"gpt-3.5-turbo\",\n",
    "#       messages=[\n",
    "#           {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
    "#           {\"role\": \"user\", \"content\": USER_INPUT_TEXT}\n",
    "#         ]\n",
    "#     )\n",
    "#\n",
    "#     history[-1][1] = response['choices'][0]['message']['content']\n",
    "#\n",
    "#     return history\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "def ChatGPT_Bot(history):\n",
    "    response = \"先生。今日も全力であなたをアシストしますね。\"\n",
    "\n",
    "    history[-1][1] = response\n",
    "\n",
    "    return history\n",
    "\n",
    "\n",
    "\n",
    "def bot(history):\n",
    "    last_input = history[-1][0]\n",
    "\n",
    "    audio = tts_fn(last_input,0.6,0.668,1.0)\n",
    "    history[-1][1] = audio\n",
    "\n",
    "    return history\n"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "outputs": [],
   "source": [
    "def add_text(history, text):\n",
    "    history = history + [(text, None)]\n",
    "    return history, \"\""
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "outputs": [],
   "source": [
    "class CustomEncoder(json.JSONEncoder):\n",
    "    def default(self, obj):\n",
    "        if isinstance(obj, Path):\n",
    "            return str(obj)\n",
    "        return super().default(obj)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "outputs": [],
   "source": [
    "download_audio_js = \"\"\"\n",
    "() =>{{\n",
    "    let root = document.querySelector(\"body > gradio-app\");\n",
    "    if (root.shadowRoot != null)\n",
    "        root = root.shadowRoot;\n",
    "    let audio = root.querySelector(\"#tts-audio-{audio_id}\").querySelector(\"audio\");\n",
    "    let text = root.querySelector(\"#input-text-{audio_id}\").querySelector(\"textarea\");\n",
    "    if (audio == undefined)\n",
    "        return;\n",
    "    text = text.value;\n",
    "    if (text == undefined)\n",
    "        text = Math.floor(Math.random()*100000000);\n",
    "    audio = audio.src;\n",
    "    let oA = document.createElement(\"a\");\n",
    "    oA.download = text.substr(0, 20)+'.wav';\n",
    "    oA.href = audio;\n",
    "    document.body.appendChild(oA);\n",
    "    oA.click();\n",
    "    oA.remove();\n",
    "}}\n",
    "\"\"\""
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DEBUG:markdown_it.rules_block.code:entering code: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.fence:entering fence: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.gradio.app:443\n",
      "DEBUG:markdown_it.rules_block.blockquote:entering blockquote: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n",
      "DEBUG:markdown_it.rules_block.hr:entering hr: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.list:entering list: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.reference:entering reference: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.html_block:entering html_block: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.code:entering code: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.fence:entering fence: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.blockquote:entering blockquote: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.hr:entering hr: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.list:entering list: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.reference:entering reference: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.html_block:entering html_block: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.code:entering code: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.fence:entering fence: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.blockquote:entering blockquote: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.hr:entering hr: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.list:entering list: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.reference:entering reference: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.html_block:entering html_block: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.code:entering code: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.fence:entering fence: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.blockquote:entering blockquote: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.hr:entering hr: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.list:entering list: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.reference:entering reference: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.html_block:entering html_block: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.code:entering code: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.fence:entering fence: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.blockquote:entering blockquote: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.hr:entering hr: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.list:entering list: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.reference:entering reference: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.html_block:entering html_block: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.heading:entering heading: StateBlock(line=0,level=0,tokens=0), 0, 1, False\n",
      "DEBUG:markdown_it.rules_block.code:entering code: StateBlock(line=0,level=0,tokens=0), 0, 3, False\n",
      "DEBUG:markdown_it.rules_block.fence:entering fence: StateBlock(line=0,level=0,tokens=0), 0, 3, False\n",
      "DEBUG:markdown_it.rules_block.blockquote:entering blockquote: StateBlock(line=0,level=0,tokens=0), 0, 3, False\n",
      "DEBUG:markdown_it.rules_block.hr:entering hr: StateBlock(line=0,level=0,tokens=0), 0, 3, False\n",
      "DEBUG:markdown_it.rules_block.list:entering list: StateBlock(line=0,level=0,tokens=0), 0, 3, False\n",
      "DEBUG:markdown_it.rules_block.reference:entering reference: StateBlock(line=0,level=0,tokens=0), 0, 3, False\n",
      "DEBUG:markdown_it.rules_block.html_block:entering html_block: StateBlock(line=0,level=0,tokens=0), 0, 3, False\n",
      "DEBUG:markdown_it.rules_block.heading:entering heading: StateBlock(line=0,level=0,tokens=0), 0, 3, False\n",
      "DEBUG:markdown_it.rules_block.code:entering code: StateBlock(line=1,level=0,tokens=3), 1, 3, False\n",
      "DEBUG:markdown_it.rules_block.fence:entering fence: StateBlock(line=1,level=0,tokens=3), 1, 3, False\n",
      "DEBUG:markdown_it.rules_block.blockquote:entering blockquote: StateBlock(line=1,level=0,tokens=3), 1, 3, False\n",
      "DEBUG:markdown_it.rules_block.code:entering code: StateBlock(line=1,level=1,tokens=4), 1, 3, False\n",
      "DEBUG:markdown_it.rules_block.fence:entering fence: StateBlock(line=1,level=1,tokens=4), 1, 3, False\n",
      "DEBUG:markdown_it.rules_block.blockquote:entering blockquote: StateBlock(line=1,level=1,tokens=4), 1, 3, False\n",
      "DEBUG:markdown_it.rules_block.hr:entering hr: StateBlock(line=1,level=1,tokens=4), 1, 3, False\n",
      "DEBUG:markdown_it.rules_block.list:entering list: StateBlock(line=1,level=1,tokens=4), 1, 3, False\n",
      "DEBUG:markdown_it.rules_block.reference:entering reference: StateBlock(line=1,level=1,tokens=4), 1, 3, False\n",
      "DEBUG:markdown_it.rules_block.html_block:entering html_block: StateBlock(line=1,level=1,tokens=4), 1, 3, False\n",
      "DEBUG:markdown_it.rules_block.heading:entering heading: StateBlock(line=1,level=1,tokens=4), 1, 3, False\n",
      "DEBUG:markdown_it.rules_block.lheading:entering lheading: StateBlock(line=1,level=1,tokens=4), 1, 3, False\n",
      "DEBUG:markdown_it.rules_block.fence:entering fence: StateBlock(line=1,level=1,tokens=4), 2, 3, True\n",
      "DEBUG:markdown_it.rules_block.blockquote:entering blockquote: StateBlock(line=1,level=1,tokens=4), 2, 3, True\n",
      "DEBUG:markdown_it.rules_block.hr:entering hr: StateBlock(line=1,level=1,tokens=4), 2, 3, True\n",
      "DEBUG:markdown_it.rules_block.list:entering list: StateBlock(line=1,level=1,tokens=4), 2, 3, True\n",
      "DEBUG:markdown_it.rules_block.html_block:entering html_block: StateBlock(line=1,level=1,tokens=4), 2, 3, True\n",
      "DEBUG:markdown_it.rules_block.heading:entering heading: StateBlock(line=1,level=1,tokens=4), 2, 3, True\n",
      "DEBUG:markdown_it.rules_block.paragraph:entering paragraph: StateBlock(line=1,level=1,tokens=4), 1, 3, False\n",
      "DEBUG:markdown_it.rules_block.fence:entering fence: StateBlock(line=1,level=1,tokens=4), 2, 3, True\n",
      "DEBUG:markdown_it.rules_block.blockquote:entering blockquote: StateBlock(line=1,level=1,tokens=4), 2, 3, True\n",
      "DEBUG:markdown_it.rules_block.hr:entering hr: StateBlock(line=1,level=1,tokens=4), 2, 3, True\n",
      "DEBUG:markdown_it.rules_block.list:entering list: StateBlock(line=1,level=1,tokens=4), 2, 3, True\n",
      "DEBUG:markdown_it.rules_block.html_block:entering html_block: StateBlock(line=1,level=1,tokens=4), 2, 3, True\n",
      "DEBUG:markdown_it.rules_block.heading:entering heading: StateBlock(line=1,level=1,tokens=4), 2, 3, True\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\envs\\ML\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Textbox, please remove them: {'scale': 2}\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DEBUG:asyncio:Using selector: SelectSelector\n",
      "DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): 127.0.0.1:7860\n",
      "DEBUG:urllib3.connectionpool:http://127.0.0.1:7860 \"GET /startup-events HTTP/1.1\" 200 5\n",
      "DEBUG:urllib3.connectionpool:Starting new HTTP connection (1): 127.0.0.1:7860\n",
      "DEBUG:urllib3.connectionpool:http://127.0.0.1:7860 \"HEAD / HTTP/1.1\" 200 0\n",
      "Running on local URL:  http://127.0.0.1:7860\n",
      "\n",
      "To create a public link, set `share=True` in `launch()`.\n"
     ]
    },
    {
     "data": {
      "text/plain": "<IPython.core.display.HTML object>",
      "text/html": "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.gradio.app:443\n",
      "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n",
      "DEBUG:httpx._client:HTTP Request: POST http://127.0.0.1:7860/api/predict \"HTTP/1.1 200 OK\"\n",
      "DEBUG:httpx._client:HTTP Request: POST http://127.0.0.1:7860/reset \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda\\envs\\ML\\lib\\site-packages\\gradio\\processing_utils.py:234: UserWarning: Trying to convert audio automatically from float32 to 16-bit int format.\n",
      "  warnings.warn(warning.format(data.dtype))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DEBUG:httpx._client:HTTP Request: POST http://127.0.0.1:7860/api/predict \"HTTP/1.1 500 Internal Server Error\"\n",
      "DEBUG:httpx._client:HTTP Request: POST http://127.0.0.1:7860/reset \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Traceback (most recent call last):\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\gradio\\routes.py\", line 394, in run_predict\n",
      "    output = await app.get_blocks().process_api(\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\gradio\\blocks.py\", line 1075, in process_api\n",
      "    result = await self.call_function(\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\gradio\\blocks.py\", line 884, in call_function\n",
      "    prediction = await anyio.to_thread.run_sync(\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\anyio\\to_thread.py\", line 31, in run_sync\n",
      "    return await get_asynclib().run_sync_in_worker_thread(\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\anyio\\_backends\\_asyncio.py\", line 937, in run_sync_in_worker_thread\n",
      "    return await future\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\anyio\\_backends\\_asyncio.py\", line 867, in run\n",
      "    result = context.run(func, *args)\n",
      "  File \"C:\\Users\\l4227\\AppData\\Local\\Temp\\ipykernel_501044\\4197914779.py\", line 7, in tts_fn\n",
      "    audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.cpu().float().numpy()\n",
      "  File \"G:\\AI\\VITS_WebUI\\models.py\", line 500, in infer\n",
      "    x, m_p, logs_p, x_mask = self.enc_p(x, x_lengths)\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1194, in _call_impl\n",
      "    return forward_call(*input, **kwargs)\n",
      "  File \"G:\\AI\\VITS_WebUI\\models.py\", line 172, in forward\n",
      "    x = self.encoder(x * x_mask, x_mask)\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1194, in _call_impl\n",
      "    return forward_call(*input, **kwargs)\n",
      "  File \"G:\\AI\\VITS_WebUI\\attentions.py\", line 39, in forward\n",
      "    y = self.attn_layers[i](x, x, attn_mask)\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1194, in _call_impl\n",
      "    return forward_call(*input, **kwargs)\n",
      "  File \"G:\\AI\\VITS_WebUI\\attentions.py\", line 143, in forward\n",
      "    x, self.attn = self.attention(q, k, v, mask=attn_mask)\n",
      "  File \"G:\\AI\\VITS_WebUI\\attentions.py\", line 160, in attention\n",
      "    scores_local = self._relative_position_to_absolute_position(rel_logits)\n",
      "  File \"G:\\AI\\VITS_WebUI\\attentions.py\", line 225, in _relative_position_to_absolute_position\n",
      "    x_flat = F.pad(x_flat, commons.convert_pad_shape([[0,0],[0,0],[0,length-1]]))\n",
      "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 3.46 GiB (GPU 0; 12.00 GiB total capacity; 9.99 GiB already allocated; 0 bytes free; 10.09 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DEBUG:httpx._client:HTTP Request: POST http://127.0.0.1:7860/api/predict \"HTTP/1.1 500 Internal Server Error\"\n",
      "DEBUG:httpx._client:HTTP Request: POST http://127.0.0.1:7860/reset \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Traceback (most recent call last):\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\gradio\\routes.py\", line 394, in run_predict\n",
      "    output = await app.get_blocks().process_api(\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\gradio\\blocks.py\", line 1075, in process_api\n",
      "    result = await self.call_function(\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\gradio\\blocks.py\", line 884, in call_function\n",
      "    prediction = await anyio.to_thread.run_sync(\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\anyio\\to_thread.py\", line 31, in run_sync\n",
      "    return await get_asynclib().run_sync_in_worker_thread(\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\anyio\\_backends\\_asyncio.py\", line 937, in run_sync_in_worker_thread\n",
      "    return await future\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\anyio\\_backends\\_asyncio.py\", line 867, in run\n",
      "    result = context.run(func, *args)\n",
      "  File \"C:\\Users\\l4227\\AppData\\Local\\Temp\\ipykernel_501044\\4197914779.py\", line 7, in tts_fn\n",
      "    audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.cpu().float().numpy()\n",
      "  File \"G:\\AI\\VITS_WebUI\\models.py\", line 500, in infer\n",
      "    x, m_p, logs_p, x_mask = self.enc_p(x, x_lengths)\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1194, in _call_impl\n",
      "    return forward_call(*input, **kwargs)\n",
      "  File \"G:\\AI\\VITS_WebUI\\models.py\", line 172, in forward\n",
      "    x = self.encoder(x * x_mask, x_mask)\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1194, in _call_impl\n",
      "    return forward_call(*input, **kwargs)\n",
      "  File \"G:\\AI\\VITS_WebUI\\attentions.py\", line 39, in forward\n",
      "    y = self.attn_layers[i](x, x, attn_mask)\n",
      "  File \"D:\\Anaconda\\envs\\ML\\lib\\site-packages\\torch\\nn\\modules\\module.py\", line 1194, in _call_impl\n",
      "    return forward_call(*input, **kwargs)\n",
      "  File \"G:\\AI\\VITS_WebUI\\attentions.py\", line 143, in forward\n",
      "    x, self.attn = self.attention(q, k, v, mask=attn_mask)\n",
      "  File \"G:\\AI\\VITS_WebUI\\attentions.py\", line 160, in attention\n",
      "    scores_local = self._relative_position_to_absolute_position(rel_logits)\n",
      "  File \"G:\\AI\\VITS_WebUI\\attentions.py\", line 221, in _relative_position_to_absolute_position\n",
      "    x = F.pad(x, commons.convert_pad_shape([[0,0],[0,0],[0,0],[0,1]]))\n",
      "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 572.00 MiB (GPU 0; 12.00 GiB total capacity; 11.00 GiB already allocated; 0 bytes free; 11.06 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DEBUG:httpx._client:HTTP Request: POST http://127.0.0.1:7860/api/predict \"HTTP/1.1 200 OK\"\n",
      "DEBUG:httpx._client:HTTP Request: POST http://127.0.0.1:7860/reset \"HTTP/1.1 200 OK\"\n",
      "Keyboard interruption in main thread... closing server.\n"
     ]
    },
    {
     "data": {
      "text/plain": ""
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_options()\n",
    "\n",
    "theme = gr.themes.Base()\n",
    "\n",
    "with gr.Blocks(theme=theme) as interface:\n",
    "    with gr.Tab(\"Text to Speech\"):\n",
    "        with gr.Column():\n",
    "            cover_markdown = gr.Markdown(\n",
    "                '<div align=\"center\">'\n",
    "                f'<img style=\"width:auto;height:512px;\" src=\"file/{COVER}\">' if COVER else \"\"\n",
    "                                                                                           '</div>')\n",
    "            title_markdown = gr.Markdown(\n",
    "                '<div align=\"center\">'\n",
    "                f'<h3><a><strong>{\"语音名称: \"}{MODEL_ZH_NAME}</strong></a>'\n",
    "                f'<h3><strong>{\"checkpoint: \"}{speaker_choice}</strong>'\n",
    "                                                                                           '</div>')\n",
    "\n",
    "            with gr.Row():\n",
    "                with gr.Column(scale = 4):\n",
    "                    input_text = gr.Textbox(\n",
    "                        label=\"Input\",\n",
    "                        lines=2,\n",
    "                        placeholder=\"Enter the text you want to process here\",\n",
    "                        elem_id=f\"input-text-en-{name_en.replace(' ','')}\",\n",
    "                        scale = 2\n",
    "                    )\n",
    "                with gr.Column(scale = 1):\n",
    "                    gen_button = gr.Button(\"Generate\", variant=\"primary\")\n",
    "                    clear_input_button = gr.Button(\"Clear\")\n",
    "\n",
    "            with gr.Row():\n",
    "                with gr.Column(scale = 2):\n",
    "                    lan = gr.Radio(label=\"Language\", choices=LANGUAGES, value=\"JP\")\n",
    "                    noise_scale = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, label = \"Noise Scale (情感变化程度)\", value = 0.6)\n",
    "                    noise_scale_w = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, label = \"Noise Scale w (发音长度)\", value = 0.668)\n",
    "                    length_scale = gr.Slider(minimum=0.1, maximum=2.0, step=0.1, label = \"Length Scale (语速)\", value=1.0)\n",
    "\n",
    "                with gr.Column(scale = 1):\n",
    "                    example_text_box = gr.Textbox(label=\"Example:\",\n",
    "                                                  value=EXAMPLE_TEXT)\n",
    "\n",
    "                    output_audio = gr.Audio(label=\"Output\", elem_id=f\"tts-audio-en-{name_en.replace(' ','')}\")\n",
    "                    download_button = gr.Button(\"Download\")\n",
    "\n",
    "                    # example = gr.Examples(\n",
    "                    #     examples = [EXAMPLE_TEXT],\n",
    "                    #     inputs=input_text,\n",
    "                    #     outputs = output_audio,\n",
    "                    #     fn=example_tts_fn,\n",
    "                    #     cache_examples=True\n",
    "                    # )\n",
    "\n",
    "\n",
    "        gen_button.click(\n",
    "                    tts_fn,\n",
    "                    inputs = [input_text, noise_scale, noise_scale_w, length_scale],\n",
    "                    outputs = output_audio)\n",
    "        clear_input_button.click(\n",
    "                    clear_input_text,\n",
    "                    outputs = input_text\n",
    "        )\n",
    "        download_button.click(None, [], [], _js=download_audio_js.format(audio_id=f\"en-{name_en.replace(' ', '')}\"))\n",
    "\n",
    "\n",
    "\n",
    "#------------------------------------------------------------------------------------------------------------------------\n",
    "    with gr.Tab(\"AI Singer\"):\n",
    "        input_text_gpt = gr.Textbox()\n",
    "\n",
    "\n",
    "#------------------------------------------------------------------------------------------------------------------------\n",
    "    with gr.Tab(\"TTS with ChatGPT\"):\n",
    "        with gr.Row():\n",
    "            with gr.Column(scale=7):\n",
    "                api_key = gr.Textbox(\n",
    "                    label = \"API Key\",\n",
    "                    type=\"password\")\n",
    "                api_key.change(fn=load_api_key,inputs=api_key)\n",
    "            with gr.Column(scale=1):\n",
    "                lan_ChatGPT = gr.Radio(label=\"Language\", choices=LANGUAGES, value=\"JP\")\n",
    "\n",
    "        with gr.Row():\n",
    "            with gr.Column(scale=1):\n",
    "                user_input = gr.Textbox(\n",
    "                    show_label=False,\n",
    "                    placeholder=\"Enter text and press enter\")\n",
    "\n",
    "                with gr.Row():\n",
    "                    submit_button = gr.Button(\"Submit\", variant=\"primary\")\n",
    "                    submit_clear_button = gr.Button(\"Clear\")\n",
    "\n",
    "                cover_markdown_ChatGPT = gr.Markdown(\n",
    "                '<div align=\"center\">'\n",
    "                f'<img style=\"width:auto;height:512px;\" src=\"file/{COVER}\">' if COVER else \"\"\n",
    "                                                                                           '</div>')\n",
    "                title_markdown_ChatGPT = gr.Markdown(\n",
    "                '<div align=\"center\">'\n",
    "                f'<h3><a><strong>{\"语音名称: \"}{MODEL_ZH_NAME}</strong></a>'\n",
    "                f'<h3><strong>{\"checkpoint: \"}{speaker_choice}</strong>'\n",
    "                                                                                           '</div>')\n",
    "            with gr.Column(scale=2):\n",
    "                chatbot = gr.Chatbot([], elem_id=\"chatbot\").style(height=750)\n",
    "\n",
    "\n",
    "\n",
    "        user_input.change(fn=usr_input_update, inputs=user_input)\n",
    "\n",
    "        user_input.submit(add_text, [chatbot ,user_input], [chatbot ,user_input], queue=False).then(bot, chatbot, chatbot)\n",
    "\n",
    "        submit_button.click(\n",
    "            fn=add_text,\n",
    "            inputs=[chatbot ,user_input],\n",
    "            outputs=[chatbot ,user_input],\n",
    "            queue=False).then(ChatGPT_Bot, chatbot, chatbot)\n",
    "\n",
    "\n",
    "\n",
    "#------------------------------------------------------------------------------------------------------------------------\n",
    "    with gr.Tab(\"Settings\"):\n",
    "        with gr.Box():\n",
    "            gr.Markdown(\"\"\"# Select Model\"\"\")\n",
    "            with gr.Row():\n",
    "\n",
    "                with gr.Column(scale = 5):\n",
    "                    model_choice = gr.Dropdown(label = \"Model\",\n",
    "                                           choices=[(model[\"name_en\"]) for name, model in models_info.items()],\n",
    "                                           interactive=True,\n",
    "                                           value=models_info['Yuuka']['name_en']\n",
    "                                         )\n",
    "                with gr.Column(scale = 5):\n",
    "                    speaker_id_choice = gr.Dropdown(label = \"Speaker ID\",\n",
    "                                         choices=[(str(model[\"sid\"])) for name, model in models_info.items()],\n",
    "                                         interactive=True,\n",
    "                                         value=str(models_info['Yuuka']['sid'])\n",
    "                                         )\n",
    "\n",
    "                with gr.Column(scale = 1):\n",
    "                    refresh_button = gr.Button(\"Refresh\", variant=\"primary\")\n",
    "                    reset_button = gr.Button(\"Reset\")\n",
    "\n",
    "        ### 切换模型功能实现\n",
    "        model_choice.change(fn=change_dropdown, inputs=model_choice, outputs=[speaker_id_choice,cover_markdown,title_markdown,lan,example_text_box,cover_markdown_ChatGPT,title_markdown_ChatGPT,lan_ChatGPT])\n",
    "\n",
    "        refresh_button.click(fn=refresh_options, outputs = [model_choice,speaker_id_choice])\n",
    "        reset_button.click(reset_options, outputs = [model_choice,speaker_id_choice])\n",
    "\n",
    "\n",
    "        with gr.Box():\n",
    "            gr.Markdown(\"# Add Model\\n\"\n",
    "                        \"> *为必填选项\\n\"\n",
    "                        \"> 添加完成后将**checkpoints**文件放到对应生成的文件夹中\"\n",
    "                        )\n",
    "\n",
    "\n",
    "            with gr.Row():\n",
    "                # file = gr.Files(label = \"VITS Model*\", file_types=[\".pth\"])\n",
    "                example_text = gr.Textbox(label = \"Example Text\",\n",
    "                                          lines=16,\n",
    "                                          placeholder=\"Enter the example text here\",)\n",
    "                model_cover = gr.Image(label = \"Cover\")\n",
    "\n",
    "                with gr.Column():\n",
    "                    model_speaker_id = gr.Textbox(label = \"Speaker List*\",\n",
    "                                                  placeholder=\"Single speaker model default=0\")\n",
    "                    model_name_en = gr.Textbox(label = \"name_en*\")\n",
    "                    model_name_cn = gr.Textbox(label = \"name_cn\")\n",
    "                    model_language = gr.Dropdown(label = \"Language*\",\n",
    "                                               choices=LANGUAGES,\n",
    "                                               interactive=True)\n",
    "                    with gr.Row():\n",
    "                        add_model_button = gr.Button(\"Add Model\", variant=\"primary\")\n",
    "                        clear_add_model_button = gr.Button(\"Clear\")\n",
    "            with gr.Box():\n",
    "              with gr.Row():\n",
    "                message_box = gr.Textbox(label = \"Message\")\n",
    "\n",
    "\n",
    "\n",
    "        add_model_button.click(add_model_fn,\n",
    "                               inputs = [example_text, model_cover, model_speaker_id, model_name_en, model_name_cn, model_language],\n",
    "                               outputs = message_box\n",
    "                               )\n",
    "        clear_add_model_button.click(clear_add_model_info,\n",
    "                                     outputs = [example_text, model_cover, model_speaker_id, model_name_en, model_name_cn, model_language]\n",
    "        )\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "interface.queue(concurrency_count=1).launch(debug=True)\n",
    "\n"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "is_executing": true
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "colab": {
   "authorship_tag": "ABX9TyOanTxjuTkrY9G5Z/F1JKYD",
   "collapsed_sections": [
    "1cRKuRl7Z8Nj",
    "uYQ2esCNI4IT",
    "YvWwpaTKI5Ut",
    "1rerX8gxPmLf",
    "vs-wM321Zk0u",
    "SxpEIauJZ0s6"
   ],
   "provenance": []
  },
  "gpuClass": "standard",
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}